Added JIT FPU, removed Softfloat option (too slow to be useful) - Thanks TomB!

This commit is contained in:
Dimitris Panokostas 2018-01-30 01:08:23 +01:00
parent f24301e8dd
commit a8815b211e
55 changed files with 3579 additions and 10940 deletions

View file

@ -254,11 +254,6 @@ OBJS = \
src/filesys.o \
src/flashrom.o \
src/fpp.o \
src/fpp_native.o \
src/fpp_softfloat.o \
src/softfloat/softfloat.o \
src/softfloat/softfloat_decimal.o \
src/softfloat/softfloat_fpsp.o \
src/fsdb.o \
src/fsdb_unix.o \
src/fsusage.o \

View file

@ -192,10 +192,6 @@
<ClInclude Include="..\..\src\osdep\picasso96.h" />
<ClInclude Include="..\..\src\osdep\sysconfig.h" />
<ClInclude Include="..\..\src\osdep\target.h" />
<ClInclude Include="..\..\src\softfloat\softfloat-macros.h" />
<ClInclude Include="..\..\src\softfloat\softfloat-specialize.h" />
<ClInclude Include="..\..\src\softfloat\softfloat.h" />
<ClInclude Include="..\..\src\softfloat\softfloat_fpsp_tables.h" />
<ClInclude Include="..\..\src\sounddep\sound.h" />
<ClInclude Include="..\..\src\threaddep\thread.h" />
</ItemGroup>
@ -295,7 +291,6 @@
<ClCompile Include="..\..\src\flashrom.cpp" />
<ClCompile Include="..\..\src\fpp.cpp" />
<ClCompile Include="..\..\src\fpp_native.cpp" />
<ClCompile Include="..\..\src\fpp_softfloat.cpp" />
<ClCompile Include="..\..\src\fsdb.cpp" />
<ClCompile Include="..\..\src\fsdb_unix.cpp" />
<ClCompile Include="..\..\src\fsusage.cpp" />
@ -372,9 +367,6 @@
<ClCompile Include="..\..\src\rtc.cpp" />
<ClCompile Include="..\..\src\savestate.cpp" />
<ClCompile Include="..\..\src\scsi.cpp" />
<ClCompile Include="..\..\src\softfloat\softfloat.cpp" />
<ClCompile Include="..\..\src\softfloat\softfloat_decimal.cpp" />
<ClCompile Include="..\..\src\softfloat\softfloat_fpsp.cpp" />
<ClCompile Include="..\..\src\sounddep\sound.cpp" />
<ClCompile Include="..\..\src\statusline.cpp" />
<ClCompile Include="..\..\src\traps.cpp" />

View file

@ -54,9 +54,6 @@
<Filter Include="Source files\osdep\gui">
<UniqueIdentifier>{d946fd2c-30b2-45d3-9999-ccc3749160b7}</UniqueIdentifier>
</Filter>
<Filter Include="Source files\softfloat">
<UniqueIdentifier>{628a02d1-51f3-4021-81e5-6103ddf96904}</UniqueIdentifier>
</Filter>
<Filter Include="Source files\sounddep">
<UniqueIdentifier>{49dfa14b-d5bf-4aa3-a660-12f97ae62bdb}</UniqueIdentifier>
</Filter>
@ -188,9 +185,6 @@
<ClCompile Include="..\..\src\fpp_native.cpp">
<Filter>Source files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\fpp_softfloat.cpp">
<Filter>Source files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\fsdb.cpp">
<Filter>Source files</Filter>
</ClCompile>
@ -658,18 +652,6 @@
<ClInclude Include="..\..\src\osdep\gui\UaeRadioButton.hpp">
<Filter>Source files\osdep\gui</Filter>
</ClInclude>
<ClInclude Include="..\..\src\softfloat\softfloat.h">
<Filter>Source files\softfloat</Filter>
</ClInclude>
<ClInclude Include="..\..\src\softfloat\softfloat_fpsp_tables.h">
<Filter>Source files\softfloat</Filter>
</ClInclude>
<ClInclude Include="..\..\src\softfloat\softfloat-macros.h">
<Filter>Source files\softfloat</Filter>
</ClInclude>
<ClInclude Include="..\..\src\softfloat\softfloat-specialize.h">
<Filter>Source files\softfloat</Filter>
</ClInclude>
<ClInclude Include="..\..\src\sounddep\sound.h">
<Filter>Source files\sounddep</Filter>
</ClInclude>
@ -1007,15 +989,6 @@
<ClCompile Include="..\..\src\osdep\gui\UaeRadioButton.cpp">
<Filter>Source files\osdep\gui</Filter>
</ClCompile>
<ClCompile Include="..\..\src\softfloat\softfloat.cpp">
<Filter>Source files\softfloat</Filter>
</ClCompile>
<ClCompile Include="..\..\src\softfloat\softfloat_decimal.cpp">
<Filter>Source files\softfloat</Filter>
</ClCompile>
<ClCompile Include="..\..\src\softfloat\softfloat_fpsp.cpp">
<Filter>Source files\softfloat</Filter>
</ClCompile>
<ClCompile Include="..\..\src\sounddep\sound.cpp">
<Filter>Source files\sounddep</Filter>
</ClCompile>

View file

@ -58,7 +58,6 @@
<ProjectFile>Amiberry.vcxproj</ProjectFile>
<RemoteBuildEnvironment>
<Records />
<EnvironmentSetupFiles />
</RemoteBuildEnvironment>
<ParallelJobCount>1</ParallelJobCount>
<SuppressDirectoryChangeMessages>true</SuppressDirectoryChangeMessages>

View file

@ -11,6 +11,7 @@
<Vfork>false</Vfork>
<Syscalls />
</CatchpointConfiguration>
<LiveWatches />
<MaskInterruptsWhileStepping>false</MaskInterruptsWhileStepping>
<MemoryWindowPreferences />
</DebugPreferences>

View file

@ -35,6 +35,7 @@
</FileMasks>
<TransferNewFilesOnly>true</TransferNewFilesOnly>
<IncludeSubdirectories>true</IncludeSubdirectories>
<SelectedDirectories />
<DeleteDisappearedFiles>false</DeleteDisappearedFiles>
<ApplyGlobalExclusionList>true</ApplyGlobalExclusionList>
</MainSourceTransferCommand>
@ -57,7 +58,6 @@
<ProjectFile>Amiberry.vcxproj</ProjectFile>
<RemoteBuildEnvironment>
<Records />
<EnvironmentSetupFiles />
</RemoteBuildEnvironment>
<ParallelJobCount>1</ParallelJobCount>
<SuppressDirectoryChangeMessages>true</SuppressDirectoryChangeMessages>

View file

@ -67,7 +67,7 @@
<Link>
<AdditionalLinkerInputs>;%(Link.AdditionalLinkerInputs)</AdditionalLinkerInputs>
<LibrarySearchDirectories>=/usr/local/lib;../../src/guisan/lib;%(Link.LibrarySearchDirectories)</LibrarySearchDirectories>
<AdditionalLibraryNames>SDL2;pthread;z;png;rt;xml2;FLAC;mpg123;dl;mpeg2convert;mpeg2;SDL2_image;SDL2_ttf;guisan;m;%(Link.AdditionalLibraryNames)</AdditionalLibraryNames>
<AdditionalLibraryNames>SDL2;pthread;z;png;rt;xml2;FLAC;mpg123;dl;mpeg2convert;mpeg2;SDL2_image;SDL2_ttf;guisan;%(Link.AdditionalLibraryNames)</AdditionalLibraryNames>
<LinkerScript />
</Link>
</ItemDefinitionGroup>
@ -90,7 +90,7 @@
<ClCompile>
<CPPLanguageStandard>GNUPP14</CPPLanguageStandard>
<AdditionalIncludeDirectories>=/usr/local/include/SDL2;=/usr/include/libxml2;../../src;../../src/osdep;../../src/threaddep;../../src/include;../../src/guisan/include;../../src/archivers;%(ClCompile.AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>NDEBUG=1;RELEASE=1;ARMV6T2;USE_ARMNEON;_REENTRANT;AMIBERRY;CPU_arm;ARMV6_ASSEMBLY;USE_SDL2;%(ClCompile.PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>NDEBUG=1;RELEASE=1;ARMV6T2;USE_ARMNEON;_REENTRANT;AMIBERRY;CPU_arm;ARMV6_ASSEMBLY;USE_SDL2;USE_RENDER_THREAD;%(ClCompile.PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalOptions>-march=armv8-a -mfpu=neon-fp-armv8 -mfloat-abi=hard %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
@ -207,8 +207,6 @@
<ClCompile Include="..\..\src\filesys.cpp" />
<ClCompile Include="..\..\src\flashrom.cpp" />
<ClCompile Include="..\..\src\fpp.cpp" />
<ClCompile Include="..\..\src\fpp_native.cpp" />
<ClCompile Include="..\..\src\fpp_softfloat.cpp" />
<ClCompile Include="..\..\src\fsdb.cpp" />
<ClCompile Include="..\..\src\fsdb_unix.cpp" />
<ClCompile Include="..\..\src\fsusage.cpp" />
@ -284,9 +282,6 @@
<ClCompile Include="..\..\src\rtc.cpp" />
<ClCompile Include="..\..\src\savestate.cpp" />
<ClCompile Include="..\..\src\scsi.cpp" />
<ClCompile Include="..\..\src\softfloat\softfloat.cpp" />
<ClCompile Include="..\..\src\softfloat\softfloat_decimal.cpp" />
<ClCompile Include="..\..\src\softfloat\softfloat_fpsp.cpp" />
<ClCompile Include="..\..\src\sounddep\sound.cpp" />
<ClCompile Include="..\..\src\statusline.cpp" />
<ClCompile Include="..\..\src\traps.cpp" />
@ -434,10 +429,6 @@
<ClInclude Include="..\..\src\osdep\picasso96.h" />
<ClInclude Include="..\..\src\osdep\sysconfig.h" />
<ClInclude Include="..\..\src\osdep\target.h" />
<ClInclude Include="..\..\src\softfloat\softfloat-macros.h" />
<ClInclude Include="..\..\src\softfloat\softfloat-specialize.h" />
<ClInclude Include="..\..\src\softfloat\softfloat.h" />
<ClInclude Include="..\..\src\softfloat\softfloat_fpsp_tables.h" />
<ClInclude Include="..\..\src\sounddep\sound.h" />
<ClInclude Include="..\..\src\threaddep\thread.h" />
</ItemGroup>

View file

@ -18,9 +18,6 @@
<Filter Include="Source files\sounddep">
<UniqueIdentifier>{406f7c18-2b0e-4564-8646-fdaef3089f65}</UniqueIdentifier>
</Filter>
<Filter Include="Source files\softfloat">
<UniqueIdentifier>{6e21b349-366f-4684-bb77-ead2ccf9c8f4}</UniqueIdentifier>
</Filter>
<Filter Include="Source files\osdep">
<UniqueIdentifier>{29512242-0e9f-4bfa-b302-f46f792e55cd}</UniqueIdentifier>
</Filter>
@ -196,12 +193,6 @@
<ClCompile Include="..\..\src\fpp.cpp">
<Filter>Source files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\fpp_native.cpp">
<Filter>Source files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\fpp_softfloat.cpp">
<Filter>Source files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\fsdb.cpp">
<Filter>Source files</Filter>
</ClCompile>
@ -283,15 +274,6 @@
<ClCompile Include="..\..\src\sounddep\sound.cpp">
<Filter>Source files\sounddep</Filter>
</ClCompile>
<ClCompile Include="..\..\src\softfloat\softfloat.cpp">
<Filter>Source files\softfloat</Filter>
</ClCompile>
<ClCompile Include="..\..\src\softfloat\softfloat_decimal.cpp">
<Filter>Source files\softfloat</Filter>
</ClCompile>
<ClCompile Include="..\..\src\softfloat\softfloat_fpsp.cpp">
<Filter>Source files\softfloat</Filter>
</ClCompile>
<ClCompile Include="..\..\src\osdep\amiberry.cpp">
<Filter>Source files\osdep</Filter>
</ClCompile>
@ -633,18 +615,6 @@
<ClInclude Include="..\..\src\sounddep\sound.h">
<Filter>Source files\sounddep</Filter>
</ClInclude>
<ClInclude Include="..\..\src\softfloat\softfloat.h">
<Filter>Source files\softfloat</Filter>
</ClInclude>
<ClInclude Include="..\..\src\softfloat\softfloat_fpsp_tables.h">
<Filter>Source files\softfloat</Filter>
</ClInclude>
<ClInclude Include="..\..\src\softfloat\softfloat-macros.h">
<Filter>Source files\softfloat</Filter>
</ClInclude>
<ClInclude Include="..\..\src\softfloat\softfloat-specialize.h">
<Filter>Source files\softfloat</Filter>
</ClInclude>
<ClInclude Include="..\..\src\osdep\amiberry_gfx.h">
<Filter>Source files\osdep</Filter>
</ClInclude>

View file

@ -60,7 +60,6 @@
<Value>C:\SysGCC\raspberry\bin;%PATH%</Value>
</Record>
</Records>
<EnvironmentSetupFiles />
</RemoteBuildEnvironment>
<ParallelJobCount>1</ParallelJobCount>
<SuppressDirectoryChangeMessages>true</SuppressDirectoryChangeMessages>

View file

@ -60,7 +60,6 @@
<Value>C:\SysGCC\raspberry\bin;%PATH%</Value>
</Record>
</Records>
<EnvironmentSetupFiles />
</RemoteBuildEnvironment>
<ParallelJobCount>1</ParallelJobCount>
<SuppressDirectoryChangeMessages>true</SuppressDirectoryChangeMessages>

View file

@ -35,26 +35,26 @@
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|VisualGDB'">
<ClCompile>
<CPPLanguageStandard>GNUPP14</CPPLanguageStandard>
<AdditionalIncludeDirectories>C:\SysGCC\raspberry\lib\gcc\arm-linux-gnueabihf\4.9\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\include\c++\4.9;C:\SysGCC\raspberry\arm-linux-gnueabihf\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\sysroot\usr\include;../../src/include;../../src;../../src/osdep;%(ClCompile.AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>C:\SysGCC\raspberry\lib\gcc\arm-linux-gnueabihf\4.9\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\include\c++\4.9;C:\SysGCC\raspberry\arm-linux-gnueabihf\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\sysroot\usr\include;../../src/include;../../src;../../src/osdep;=/usr/local/include/SDL2;%(ClCompile.AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>DEBUG=1;%(ClCompile.PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<AdditionalLinkerInputs>;%(Link.AdditionalLinkerInputs)</AdditionalLinkerInputs>
<LibrarySearchDirectories>;%(Link.LibrarySearchDirectories)</LibrarySearchDirectories>
<AdditionalLibraryNames>;%(Link.AdditionalLibraryNames)</AdditionalLibraryNames>
<LibrarySearchDirectories>=/usr/local/lib;%(Link.LibrarySearchDirectories)</LibrarySearchDirectories>
<AdditionalLibraryNames>SDL2;%(Link.AdditionalLibraryNames)</AdditionalLibraryNames>
<LinkerScript />
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|VisualGDB'">
<ClCompile>
<CPPLanguageStandard>GNUPP14</CPPLanguageStandard>
<AdditionalIncludeDirectories>C:\SysGCC\raspberry\lib\gcc\arm-linux-gnueabihf\4.9\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\include\c++\4.9;C:\SysGCC\raspberry\arm-linux-gnueabihf\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\sysroot\usr\include;../../src/include;../../src;../../src/osdep;%(ClCompile.AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>C:\SysGCC\raspberry\lib\gcc\arm-linux-gnueabihf\4.9\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\include\c++\4.9;C:\SysGCC\raspberry\arm-linux-gnueabihf\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\sysroot\usr\include;../../src/include;../../src;../../src/osdep;=/usr/local/include/SDL2;%(ClCompile.AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>NDEBUG=1;RELEASE=1;%(ClCompile.PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<AdditionalLinkerInputs>;%(Link.AdditionalLinkerInputs)</AdditionalLinkerInputs>
<LibrarySearchDirectories>;%(Link.LibrarySearchDirectories)</LibrarySearchDirectories>
<AdditionalLibraryNames>;%(Link.AdditionalLibraryNames)</AdditionalLibraryNames>
<LibrarySearchDirectories>=/usr/local/lib;%(Link.LibrarySearchDirectories)</LibrarySearchDirectories>
<AdditionalLibraryNames>SDL2;%(Link.AdditionalLibraryNames)</AdditionalLibraryNames>
<LinkerScript />
</Link>
</ItemDefinitionGroup>

View file

@ -197,13 +197,17 @@ static const TCHAR* obsolete[] = {
_T("avoid_vid"), _T("avoid_dga"), _T("z3chipmem_size"), _T("state_replay_buffer"), _T("state_replay"),
_T("z3realmapping"), _T("force_0x10000000_z3"),
_T("fpu_arithmetic_exceptions"),
_T("gfx_filter_vert_zoom"),_T("gfx_filter_horiz_zoom"),
_T("gfx_filter_vert_zoom_mult"), _T("gfx_filter_horiz_zoom_mult"),
_T("gfx_filter_vert_offset"), _T("gfx_filter_horiz_offset"),
_T("pcibridge_rom_file"),
_T("pcibridge_rom_options"),
_T("cpuboard_ext_rom_file"),
_T("uaeboard_mode"),
_T("comp_oldsegv"),
_T("comp_midopt"),
_T("comp_lowopt"),
@ -1433,8 +1437,10 @@ void cfgfile_save_options(struct zfile* f, struct uae_prefs* p, int type)
cfgfile_dwrite_bool(f, _T("fpu_no_unimplemented"), p->fpu_no_unimplemented);
cfgfile_write_bool(f, _T("fpu_strict"), p->fpu_strict);
cfgfile_dwrite_bool(f, _T("fpu_softfloat"), p->fpu_softfloat);
#ifdef USE_JIT_FPU
cfgfile_write_bool(f, _T("compfpu"), p->compfpu);
#endif
cfgfile_write(f, _T("cachesize"), _T("%d"), p->cachesize);
cfg_write(_T("; "), f);
@ -3566,11 +3572,14 @@ static int cfgfile_parse_hardware(struct uae_prefs* p, const TCHAR* option, TCHA
|| cfgfile_yesno(option, value, _T("ksmirror_a8"), &p->cs_ksmirror_a8)
|| cfgfile_yesno(option, value, _T("cia_todbug"), &p->cs_ciatodbug)
|| cfgfile_yesno(option, value, _T("z3_autoconfig"), &p->cs_z3autoconfig)
|| cfgfile_yesno(option, value, _T("ntsc"), &p->ntscmode)
|| cfgfile_yesno(option, value, _T("cpu_compatible"), &p->cpu_compatible)
|| cfgfile_yesno(option, value, _T("cpu_24bit_addressing"), &p->address_space_24)
|| cfgfile_yesno(option, value, _T("fpu_strict"), &p->fpu_strict)
|| cfgfile_yesno(option, value, _T("fpu_softfloat"), &p->fpu_softfloat)
#ifdef USE_JIT_FPU
|| cfgfile_yesno(option, value, _T("compfpu"), &p->compfpu)
#endif
|| cfgfile_yesno(option, value, _T("floppy_write_protect"), &p->floppy_read_only)
|| cfgfile_yesno(option, value, _T("harddrive_write_protect"), &p->harddrive_read_only))
return 1;
@ -5165,6 +5174,11 @@ void default_prefs(struct uae_prefs* p, bool reset, int type)
p->sound_filter_type = 0;
p->sound_volume_cd = 20;
#ifdef USE_JIT_FPU
p->compfpu = 1;
#else
p->compfpu = 0;
#endif
p->cachesize = 0;
p->gfx_framerate = 1;
@ -5223,7 +5237,6 @@ void default_prefs(struct uae_prefs* p, bool reset, int type)
p->cpu_model = 68000;
p->fpu_no_unimplemented = false;
p->fpu_strict = false;
p->fpu_softfloat = false;
p->m68k_speed = 0;
p->cpu_compatible = false;
p->address_space_24 = true;

View file

@ -313,7 +313,6 @@ struct color_change *curr_color_changes = 0;
struct decision line_decisions[2 * (MAXVPOS + 2) + 1];
struct draw_info curr_drawinfo[2 * (MAXVPOS + 2) + 1];
#define COLOR_TABLE_SIZE (MAXVPOS + 2) * 2
struct color_entry curr_color_tables[COLOR_TABLE_SIZE];
static int next_sprite_entry = 0;

View file

@ -10,9 +10,9 @@
#define __USE_ISOC9X /* We might be able to pick up a NaN */
#include <math.h>
#include <cmath>
#include <float.h>
#include <fenv.h>
#include <cfenv>
#include "sysconfig.h"
#include "sysdeps.h"
@ -27,92 +27,9 @@
#include "savestate.h"
#include "cpu_prefetch.h"
#include "softfloat/softfloat.h"
void fpsr_set_exception(uae_u32 exception);
FPP_PRINT fpp_print;
FPP_IS fpp_is_snan;
FPP_IS fpp_unset_snan;
FPP_IS fpp_is_nan;
FPP_IS fpp_is_infinity;
FPP_IS fpp_is_zero;
FPP_IS fpp_is_neg;
FPP_IS fpp_is_denormal;
FPP_IS fpp_is_unnormal;
FPP_GET_STATUS fpp_get_status;
FPP_CLEAR_STATUS fpp_clear_status;
FPP_SET_MODE fpp_set_mode;
FPP_FROM_NATIVE fpp_from_native;
FPP_TO_NATIVE fpp_to_native;
FPP_TO_INT fpp_to_int;
FPP_FROM_INT fpp_from_int;
FPP_PACK fpp_to_pack;
FPP_PACK fpp_from_pack;
FPP_TO_SINGLE fpp_to_single;
FPP_FROM_SINGLE fpp_from_single;
FPP_TO_DOUBLE fpp_to_double;
FPP_FROM_DOUBLE fpp_from_double;
FPP_TO_EXTEN fpp_to_exten;
FPP_FROM_EXTEN fpp_from_exten;
FPP_TO_EXTEN fpp_to_exten_fmovem;
FPP_FROM_EXTEN fpp_from_exten_fmovem;
FPP_A fpp_normalize;
FPP_DENORMALIZE fpp_denormalize;
FPP_A fpp_get_internal_overflow;
FPP_A fpp_get_internal_underflow;
FPP_A fpp_get_internal_round_all;
FPP_A fpp_get_internal_round;
FPP_A fpp_get_internal_round_exten;
FPP_A fpp_get_internal;
FPP_GET32 fpp_get_internal_grs;
FPP_A fpp_round_single;
FPP_A fpp_round_double;
FPP_A fpp_round32;
FPP_A fpp_round64;
FPP_AB fpp_int;
FPP_AB fpp_sinh;
FPP_AB fpp_intrz;
FPP_ABP fpp_sqrt;
FPP_AB fpp_lognp1;
FPP_AB fpp_etoxm1;
FPP_AB fpp_tanh;
FPP_AB fpp_atan;
FPP_AB fpp_atanh;
FPP_AB fpp_sin;
FPP_AB fpp_asin;
FPP_AB fpp_tan;
FPP_AB fpp_etox;
FPP_AB fpp_twotox;
FPP_AB fpp_tentox;
FPP_AB fpp_logn;
FPP_AB fpp_log10;
FPP_AB fpp_log2;
FPP_ABP fpp_abs;
FPP_AB fpp_cosh;
FPP_ABP fpp_neg;
FPP_AB fpp_acos;
FPP_AB fpp_cos;
FPP_AB fpp_getexp;
FPP_AB fpp_getman;
FPP_ABP fpp_div;
FPP_ABQS fpp_mod;
FPP_ABP fpp_add;
FPP_ABP fpp_mul;
FPP_ABQS fpp_rem;
FPP_AB fpp_scale;
FPP_ABP fpp_sub;
FPP_AB fpp_sgldiv;
FPP_AB fpp_sglmul;
FPP_AB fpp_cmp;
FPP_AB fpp_tst;
FPP_ABP fpp_move;
#include "fpp_native.cpp"
#define DEBUG_FPP 0
#define EXCEPTION_FPP 0
@ -313,10 +230,6 @@ static uae_u32 get_ftag(fpdata *src, int size)
{
if (fpp_is_zero(src)) {
return 1; // ZERO
} else if (fpp_is_unnormal(src) || fpp_is_denormal(src)) {
if (size == 1 || size == 5)
return 5; // Single/double DENORMAL
return 4; // Extended DENORMAL or UNNORMAL
} else if (fpp_is_nan(src)) {
return 3; // NAN
} else if (fpp_is_infinity(src)) {
@ -332,16 +245,6 @@ STATIC_INLINE bool fp_is_dyadic(uae_u16 extra)
static bool fp_exception_pending(bool pre)
{
// first check for pending arithmetic exceptions
if (currprefs.fpu_softfloat) {
if (regs.fp_exp_pend) {
regs.fpu_exp_pre = pre;
Exception(regs.fp_exp_pend);
if (currprefs.fpu_model != 68882)
regs.fp_exp_pend = 0;
return true;
}
}
// no arithmetic exceptions pending, check for unimplemented datatype
if (regs.fp_unimp_pend) {
regs.fpu_exp_pre = pre;
@ -381,136 +284,11 @@ static uae_u32 fpsr_get_vector(uae_u32 exception)
return 0;
}
static void fpsr_check_arithmetic_exception(uae_u32 mask, fpdata *src, uae_u32 opcode, uae_u16 extra, uae_u32 ea)
{
if (!currprefs.fpu_softfloat)
return;
bool nonmaskable;
uae_u32 exception;
// Any exception status bit and matching exception enable bits set?
exception = regs.fpsr & regs.fpcr & 0xff00;
// Add 68040/68060 nonmaskable exceptions
if (currprefs.cpu_model >= 68040 && currprefs.fpu_model)
exception |= regs.fpsr & (FPSR_OVFL | FPSR_UNFL | mask);
if (exception) {
regs.fp_exp_pend = fpsr_get_vector(exception);
nonmaskable = (regs.fp_exp_pend != fpsr_get_vector(regs.fpsr & regs.fpcr));
if (!currprefs.fpu_softfloat) {
// log message and exit
regs.fp_exp_pend = 0;
return;
}
regs.fp_opword = opcode;
regs.fp_ea = ea;
// data for FSAVE stack frame
fpdata eo;
uae_u32 opclass = (extra >> 13) & 7;
reset_fsave_data();
if (currprefs.fpu_model == 68881 || currprefs.fpu_model == 68882) {
// fsave data for 68881 and 68882
if (opclass == 3) { // 011
fsave_data.ccr = ((uae_u32)extra << 16) | extra;
} else { // 000 or 010
fsave_data.ccr = ((uae_u32)(opcode | 0x0080) << 16) | extra;
}
if (regs.fp_exp_pend == 54 || regs.fp_exp_pend == 52 || regs.fp_exp_pend == 50) { // SNAN, OPERR, DZ
fpp_from_exten_fmovem(src, &fsave_data.eo[0], &fsave_data.eo[1], &fsave_data.eo[2]);
if (regs.fp_exp_pend == 52 && opclass == 3) { // OPERR from move to integer or packed
fsave_data.eo[0] &= 0x4fff0000;
fsave_data.eo[1] = fsave_data.eo[2] = 0;
}
} else if (regs.fp_exp_pend == 53) { // OVFL
fpp_get_internal_overflow(&eo);
fpp_from_exten_fmovem(&eo, &fsave_data.eo[0], &fsave_data.eo[1], &fsave_data.eo[2]);
} else if (regs.fp_exp_pend == 51) { // UNFL
fpp_get_internal_underflow(&eo);
fpp_from_exten_fmovem(&eo, &fsave_data.eo[0], &fsave_data.eo[1], &fsave_data.eo[2]);
} // else INEX1, INEX2: do nothing
} else {
// fsave data for 68040
regs.fpu_exp_state = 1; // 68040 UNIMP frame
uae_u32 reg = (extra >> 7) & 7;
int size = (extra >> 10) & 7;
fsave_data.fpiarcu = regs.fpiar;
if (regs.fp_exp_pend == 54) { // SNAN (undocumented)
fsave_data.wbte15 = 1;
fsave_data.grs = 7;
} else {
fsave_data.grs = 1;
}
if (opclass == 3) { // OPCLASS 011
fsave_data.cmdreg1b = extra;
fsave_data.e1 = 1;
fsave_data.t = 1;
fsave_data.wbte15 = (regs.fp_exp_pend == 51 || regs.fp_exp_pend == 54) ? 1 : 0; // UNFL, SNAN
if (fpp_is_snan(src)) {
fpp_unset_snan(src);
}
fpp_from_exten_fmovem(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]);
fsave_data.stag = get_ftag(src, -1);
} else { // OPCLASS 000 and 010
fsave_data.cmdreg1b = extra;
fsave_data.e1 = 1;
fsave_data.wbte15 = (regs.fp_exp_pend == 54) ? 1 : 0; // SNAN (undocumented)
if (regs.fp_exp_pend == 51 || regs.fp_exp_pend == 53 || regs.fp_exp_pend == 49) { // UNFL, OVFL, INEX
if ((extra & 0x30) == 0x20 || (extra & 0x3f) == 0x04) { // FADD, FSUB, FMUL, FDIV, FSQRT
regs.fpu_exp_state = 2; // 68040 BUSY frame
fsave_data.e3 = 1;
fsave_data.e1 = 0;
fsave_data.cmdreg3b = (extra & 0x3C3) | ((extra & 0x038)>>1) | ((extra & 0x004)<<3);
if (regs.fp_exp_pend == 51) { // UNFL
fpp_get_internal(&eo);
} else { // OVFL, INEX
fpp_get_internal_round(&eo);
}
fsave_data.grs = fpp_get_internal_grs();
fpp_from_exten_fmovem(&eo, &fsave_data.wbt[0], &fsave_data.wbt[1], &fsave_data.wbt[2]);
fsave_data.wbte15 = (regs.fp_exp_pend == 51) ? 1 : 0; // UNFL
// src and dst is stored (undocumented)
fpp_from_exten_fmovem(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]);
fsave_data.stag = get_ftag(src, (opclass == 0) ? -1 : size);
if (fp_is_dyadic(extra)) {
fpp_from_exten_fmovem(&regs.fp[reg], &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]);
fsave_data.dtag = get_ftag(&regs.fp[reg], -1);
}
} else { // FMOVE to register, FABS, FNEG
fpp_get_internal_round_exten(&eo);
fsave_data.grs = fpp_get_internal_grs();
fpp_from_exten_fmovem(&eo, &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]);
fpp_get_internal_round_all(&eo); // weird
fpp_from_exten_fmovem(&eo, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]); // undocumented
fsave_data.stag = get_ftag(src, (opclass == 0) ? -1 : size);
}
} else { // SNAN, OPERR, DZ
fpp_from_exten_fmovem(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]);
fsave_data.stag = get_ftag(src, (opclass == 0) ? -1 : size);
if (fp_is_dyadic(extra)) {
fpp_from_exten_fmovem(&regs.fp[reg], &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]);
fsave_data.dtag = get_ftag(&regs.fp[reg], -1);
}
}
}
}
}
}
static void fpsr_set_result(fpdata *result)
{
#ifdef JIT
regs.fp_result = *result;
#endif
// condition code byte
regs.fpsr &= 0x00fffff8; // clear cc
if (fpp_is_nan (result)) {
@ -527,18 +305,10 @@ static void fpsr_clear_status(void)
{
// clear exception status byte only
regs.fpsr &= 0x0fff00f8;
// clear external status
fpp_clear_status();
}
static uae_u32 fpsr_make_status(void)
{
uae_u32 exception;
// get external status
fpp_get_status(&regs.fpsr);
// update accrued exception byte
if (regs.fpsr & (FPSR_BSUN | FPSR_SNAN | FPSR_OPERR))
regs.fpsr |= FPSR_AE_IOP; // IOP = BSUN || SNAN || OPERR
@ -551,15 +321,7 @@ static uae_u32 fpsr_make_status(void)
if (regs.fpsr & (FPSR_OVFL | FPSR_INEX2 | FPSR_INEX1))
regs.fpsr |= FPSR_AE_INEX; // INEX = INEX1 || INEX2 || OVFL
if (!currprefs.fpu_softfloat)
return 0;
// return exceptions that interrupt calculation
exception = regs.fpsr & regs.fpcr & (FPSR_SNAN | FPSR_OPERR | FPSR_DZ);
if (currprefs.cpu_model >= 68040 && currprefs.fpu_model)
exception |= regs.fpsr & (FPSR_OVFL | FPSR_UNFL);
return exception;
return 0;
}
static int fpsr_set_bsun(void)
@ -567,15 +329,6 @@ static int fpsr_set_bsun(void)
regs.fpsr |= FPSR_BSUN;
regs.fpsr |= FPSR_AE_IOP;
if (regs.fpcr & FPSR_BSUN) {
// logging only so far
write_log (_T("FPU exception: BSUN! (FPSR: %08x, FPCR: %04x)\n"), regs.fpsr, regs.fpcr);
if (currprefs.fpu_softfloat) {
regs.fp_exp_pend = fpsr_get_vector(FPSR_BSUN);
fp_exception_pending(true);
return 1;
}
}
return 0;
}
@ -591,8 +344,22 @@ static void fpsr_get_quotient(uae_u64 *quot, uae_u8 *sign)
*sign = (regs.fpsr & FPSR_QUOT_SIGN) ? 1 : 0;
}
uae_u32 fpp_get_fpsr (void)
static uae_u32 fpp_get_fpsr (void)
{
#ifdef JIT
if (currprefs.cachesize && currprefs.compfpu) {
regs.fpsr &= 0x00fffff8; // clear cc
if (fpp_is_nan (&regs.fp_result)) {
regs.fpsr |= FPSR_CC_NAN;
} else if (fpp_is_zero(&regs.fp_result)) {
regs.fpsr |= FPSR_CC_Z;
} else if (fpp_is_infinity (&regs.fp_result)) {
regs.fpsr |= FPSR_CC_I;
}
if (fpp_is_neg(&regs.fp_result))
regs.fpsr |= FPSR_CC_N;
}
#endif
return regs.fpsr;
}
@ -619,9 +386,23 @@ static void fpset (fpdata *fpd, uae_s32 val)
static void fpp_set_fpsr (uae_u32 val)
{
regs.fpsr = val;
#ifdef JIT
// check comment in fpp_cond
if (currprefs.cachesize && currprefs.compfpu) {
if (val & 0x01000000)
fpnan(&regs.fp_result);
else if (val & 0x04000000)
fpset(&regs.fp_result, 0);
else if (val & 0x08000000)
fpset(&regs.fp_result, -1);
else
fpset(&regs.fp_result, 1);
}
#endif
}
bool fpu_get_constant(fpdata *fpd, int cr)
static bool fpu_get_constant(fpdata *fpd, int cr)
{
uae_u32 f[3] = { 0, 0, 0 };
int entry = 0;
@ -739,16 +520,16 @@ bool fpu_get_constant(fpdata *fpd, int cr)
}
}
}
fpp_to_exten_fmovem(fpd, f[0], f[1], f[2]);
fpp_to_exten(fpd, f[0], f[1], f[2]);
if (prec == 1)
fpp_round32(fpd);
if (prec >= 2)
fpp_round64(fpd);
if (f1_adjust) {
fpp_from_exten_fmovem(fpd, &f[0], &f[1], &f[2]);
fpp_from_exten(fpd, &f[0], &f[1], &f[2]);
f[1] += f1_adjust * 0x80;
fpp_to_exten_fmovem(fpd, f[0], f[1], f[2]);
fpp_to_exten(fpd, f[0], f[1], f[2]);
}
fpsr_set_result(fpd);
@ -767,7 +548,7 @@ bool fpu_get_constant(fpdata *fpd, int cr)
f[2] += fpp_cr[entry].rndoff[mode];
}
fpp_to_exten_fmovem(fpd, f[0], f[1], f[2]);
fpp_to_exten(fpd, f[0], f[1], f[2]);
if (prec == 1)
fpp_round32(fpd);
@ -795,10 +576,10 @@ static void fp_unimp_instruction(uae_u16 opcode, uae_u16 extra, uae_u32 ea, uaec
reset_fsave_data();
fsave_data.cmdreg3b = (extra & 0x3C3) | ((extra & 0x038) >> 1) | ((extra & 0x004) << 3);
fsave_data.cmdreg1b = extra;
fpp_from_exten_fmovem(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]);
fpp_from_exten(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]);
fsave_data.stag = get_ftag(src, size);
if (reg >= 0) {
fpp_from_exten_fmovem(&regs.fp[reg], &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]);
fpp_from_exten(&regs.fp[reg], &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]);
fsave_data.dtag = get_ftag(&regs.fp[reg], -1);
}
}
@ -838,9 +619,9 @@ static void fp_unimp_datatype(uae_u16 opcode, uae_u16 extra, uae_u32 ea, uaecptr
}
if (opclass == 3) { // OPCLASS 011
fsave_data.t = 1;
fpp_from_exten_fmovem(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]);
fpp_from_exten(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]);
fsave_data.stag = get_ftag(src, -1);
fpp_from_exten_fmovem(src, &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]); // undocumented
fpp_from_exten(src, &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]); // undocumented
fsave_data.dtag = get_ftag(src, -1); // undocumented
} else { // OPCLASS 000 and 010
if (packed) {
@ -850,13 +631,13 @@ static void fp_unimp_datatype(uae_u16 opcode, uae_u16 extra, uae_u32 ea, uaecptr
fsave_data.et[2] = packed[2];
fsave_data.stag = 7; // undocumented
} else {
fpp_from_exten_fmovem(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]);
fpp_from_exten(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]);
fsave_data.stag = get_ftag(src, (opclass == 0) ? -1 : size);
if (fsave_data.stag == 5) {
fsave_data.et[0] = (size == 1) ? 0x3f800000 : 0x3c000000; // exponent for denormalized single and double
}
if (fp_is_dyadic(extra)) {
fpp_from_exten_fmovem(&regs.fp[reg], &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]);
fpp_from_exten(&regs.fp[reg], &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]);
fsave_data.dtag = get_ftag(&regs.fp[reg], -1);
}
}
@ -1026,9 +807,7 @@ static bool fault_if_unimplemented_6888x (uae_u16 opcode, uae_u16 extra, uaecptr
static bool fault_if_no_fpu_u (uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc)
{
if (fault_if_no_fpu (opcode, extra, ea, oldpc))
return true;
return false;
return fault_if_no_fpu (opcode, extra, ea, oldpc);
}
static bool fault_if_no_6888x (uae_u16 opcode, uae_u16 extra, uaecptr oldpc)
@ -1073,44 +852,6 @@ static void fpu_null (void)
fpnan (&regs.fp[i]);
}
// 68040/060 does not support denormals
static bool normalize_or_fault_if_no_denormal_support(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *src)
{
if (!currprefs.fpu_softfloat)
return false;
if (fpp_is_unnormal(src) || fpp_is_denormal(src)) {
if (currprefs.cpu_model >= 68040 && currprefs.fpu_model && currprefs.fpu_no_unimplemented) {
if (fpp_is_zero(src)) {
fpp_normalize(src); // 68040/060 can only fix unnormal zeros
} else {
fp_unimp_datatype(opcode, extra, ea, oldpc, src, NULL);
return true;
}
} else {
fpp_normalize(src);
}
}
return false;
}
static bool normalize_or_fault_if_no_denormal_support_dst(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *dst, fpdata *src)
{
if (!currprefs.fpu_softfloat)
return false;
if (fpp_is_unnormal(dst) || fpp_is_denormal(dst)) {
if (currprefs.cpu_model >= 68040 && currprefs.fpu_model && currprefs.fpu_no_unimplemented) {
if (fpp_is_zero(dst)) {
fpp_normalize(dst); // 68040/060 can only fix unnormal zeros
} else {
fp_unimp_datatype(opcode, extra, ea, oldpc, src, NULL);
return true;
}
} else {
fpp_normalize(dst);
}
}
return false;
}
// 68040/060 does not support packed decimal format
static bool fault_if_no_packed_support(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *src, uae_u32 *packed)
{
@ -1119,20 +860,6 @@ static bool fault_if_no_packed_support(uae_u16 opcode, uae_u16 extra, uaecptr ea
return true;
}
return false;
}
// 68040 does not support move to integer format
static bool fault_if_68040_integer_nonmaskable(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *src)
{
if (currprefs.cpu_model == 68040 && currprefs.fpu_model && currprefs.fpu_softfloat) {
fpsr_make_status();
if (regs.fpsr & (FPSR_SNAN | FPSR_OPERR)) {
fpsr_check_arithmetic_exception(FPSR_SNAN | FPSR_OPERR, src, opcode, extra, ea);
fp_exception_pending(false); // post
return true;
}
}
return false;
}
static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr oldpc, uae_u32 *adp)
@ -1148,7 +875,6 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old
if (fault_if_no_fpu (opcode, extra, 0, oldpc))
return -1;
*src = regs.fp[(extra >> 10) & 7];
normalize_or_fault_if_no_denormal_support(opcode, extra, 0, oldpc, src);
return 1;
}
mode = (opcode >> 3) & 7;
@ -1172,7 +898,6 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old
break;
case 1:
fpp_to_single (src, m68k_dreg (regs, reg));
normalize_or_fault_if_no_denormal_support(opcode, extra, 0, oldpc, src);
break;
default:
return 0;
@ -1257,7 +982,6 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old
break;
case 1:
fpp_to_single (src, (doext ? exts[0] : x_cp_get_long (ad)));
normalize_or_fault_if_no_denormal_support(opcode, extra, adold, oldpc, src);
break;
case 2:
{
@ -1268,7 +992,6 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old
ad += 4;
wrd3 = (doext ? exts[2] : x_cp_get_long (ad));
fpp_to_exten (src, wrd1, wrd2, wrd3);
normalize_or_fault_if_no_denormal_support(opcode, extra, adold, oldpc, src);
}
break;
case 3:
@ -1282,7 +1005,6 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old
if (fault_if_no_packed_support (opcode, extra, adold, oldpc, NULL, wrd))
return 1;
fpp_to_pack (src, wrd, 0);
fpp_normalize(src);
return 1;
}
break;
@ -1296,7 +1018,6 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old
ad += 4;
wrd2 = (doext ? exts[1] : x_cp_get_long (ad));
fpp_to_double (src, wrd1, wrd2);
normalize_or_fault_if_no_denormal_support(opcode, extra, adold, oldpc, src);
}
break;
case 6:
@ -1331,31 +1052,17 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o
switch (size)
{
case 6:
if (normalize_or_fault_if_no_denormal_support(opcode, extra, 0, oldpc, value))
return 1;
m68k_dreg (regs, reg) = (uae_u32)(((fpp_to_int (value, 0) & 0xff)
| (m68k_dreg (regs, reg) & ~0xff)));
if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value))
return -1;
break;
case 4:
if (normalize_or_fault_if_no_denormal_support(opcode, extra, 0, oldpc, value))
return 1;
m68k_dreg (regs, reg) = (uae_u32)(((fpp_to_int (value, 1) & 0xffff)
| (m68k_dreg (regs, reg) & ~0xffff)));
if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value))
return -1;
break;
case 0:
if (normalize_or_fault_if_no_denormal_support(opcode, extra, 0, oldpc, value))
return 1;
m68k_dreg (regs, reg) = (uae_u32)fpp_to_int (value, 2);
if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value))
return -1;
break;
case 1:
if (normalize_or_fault_if_no_denormal_support(opcode, extra, 0, oldpc, value))
return 1;
m68k_dreg (regs, reg) = fpp_from_single (value);
break;
default:
@ -1410,21 +1117,13 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o
switch (size)
{
case 0:
if (normalize_or_fault_if_no_denormal_support(opcode, extra, ad, oldpc, value))
return 1;
x_cp_put_long(ad, (uae_u32)fpp_to_int(value, 2));
if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value))
return -1;
break;
case 1:
if (normalize_or_fault_if_no_denormal_support(opcode, extra, ad, oldpc, value))
return 1;
x_cp_put_long(ad, fpp_from_single(value));
break;
case 2:
{
if (normalize_or_fault_if_no_denormal_support(opcode, extra, ad, oldpc, value))
return 1;
uae_u32 wrd1, wrd2, wrd3;
fpp_from_exten(value, &wrd1, &wrd2, &wrd3);
x_cp_put_long (ad, wrd1);
@ -1445,7 +1144,6 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o
kfactor &= 127;
if (kfactor & 64)
kfactor |= ~63;
fpp_normalize(value);
fpp_from_pack(value, wrd, kfactor);
x_cp_put_long (ad, wrd[0]);
ad += 4;
@ -1455,16 +1153,10 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o
}
break;
case 4:
if (normalize_or_fault_if_no_denormal_support(opcode, extra, ad, oldpc, value))
return 1;
x_cp_put_word(ad, (uae_s16)fpp_to_int(value, 1));
if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value))
return -1;
break;
case 5:
{
if (normalize_or_fault_if_no_denormal_support(opcode, extra, ad, oldpc, value))
return 1;
uae_u32 wrd1, wrd2;
fpp_from_double(value, &wrd1, &wrd2);
x_cp_put_long (ad, wrd1);
@ -1473,11 +1165,7 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o
}
break;
case 6:
if (normalize_or_fault_if_no_denormal_support(opcode, extra, ad, oldpc, value))
return 1;
x_cp_put_byte(ad, (uae_s8)fpp_to_int(value, 0));
if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value))
return -1;
break;
default:
return 0;
@ -1539,9 +1227,19 @@ int fpp_cond (int condition)
{
int NotANumber, N, Z;
NotANumber = (regs.fpsr & FPSR_CC_NAN) != 0;
N = (regs.fpsr & FPSR_CC_N) != 0;
Z = (regs.fpsr & FPSR_CC_Z) != 0;
#ifdef JIT
if (currprefs.cachesize && currprefs.compfpu) {
// JIT reads and writes regs.fpu_result
NotANumber = fpp_is_nan(&regs.fp_result);
N = fpp_is_neg(&regs.fp_result);
Z = fpp_is_zero(&regs.fp_result);
} else
#endif
{
NotANumber = (regs.fpsr & FPSR_CC_NAN) != 0;
N = (regs.fpsr & FPSR_CC_N) != 0;
Z = (regs.fpsr & FPSR_CC_Z) != 0;
}
if ((condition & 0x10) && NotANumber) {
if (fpsr_set_bsun())
@ -1994,14 +1692,12 @@ retry:
if (cusavepc == 0xFE) {
if (opclass == 0 || opclass == 2) {
fpp_to_exten_fmovem(&dst, fsave_data.fpt[0], fsave_data.fpt[1], fsave_data.fpt[2]);
fpp_denormalize(&dst, fpte15);
fpp_to_exten_fmovem(&src, fsave_data.et[0], fsave_data.et[1], fsave_data.et[2]);
fpp_denormalize(&src, et15);
fpp_to_exten(&dst, fsave_data.fpt[0], fsave_data.fpt[1], fsave_data.fpt[2]);
fpp_to_exten(&src, fsave_data.et[0], fsave_data.et[1], fsave_data.et[2]);
#if EXCEPTION_FPP
uae_u32 tmpsrc[3], tmpdst[3];
fpp_from_exten_fmovem(&src, &tmpsrc[0], &tmpsrc[1], &tmpsrc[2]);
fpp_from_exten_fmovem(&dst, &tmpdst[0], &tmpdst[1], &tmpdst[2]);
fpp_from_exten(&src, &tmpsrc[0], &tmpsrc[1], &tmpsrc[2]);
fpp_from_exten(&dst, &tmpdst[0], &tmpdst[1], &tmpdst[2]);
write_log (_T("FRESTORE src = %08X %08X %08X, dst = %08X %08X %08X, extra = %04X\n"),
tmpsrc[0], tmpsrc[1], tmpsrc[2], tmpdst[0], tmpdst[1], tmpdst[2], cmdreg1b);
#endif
@ -2011,8 +1707,6 @@ retry:
if (v)
regs.fp[(cmdreg1b>>7)&7] = dst;
fpsr_check_arithmetic_exception(0, &src, regs.fp_opword, cmdreg1b, regs.fp_ea);
} else {
write_log (_T("FRESTORE resume of opclass %d instruction not supported %08x\n"), opclass, ad_orig);
}
@ -2117,7 +1811,7 @@ static uaecptr fmovem2mem (uaecptr ad, uae_u32 list, int incr, int regdir)
else
reg = r;
if (list & 0x80) {
fpp_from_exten_fmovem(&regs.fp[reg], &wrd1, &wrd2, &wrd3);
fpp_from_exten(&regs.fp[reg], &wrd1, &wrd2, &wrd3);
if (incr < 0)
ad -= 3 * 4;
x_put_long(ad + 0, wrd1);
@ -2164,7 +1858,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra)
switch (extra & 0x7f)
{
case 0x00: /* FMOVE */
fpp_move(dst, src, 0);
fpp_move(dst, src, fpu_prec);
break;
case 0x40: /* FSMOVE */
fpp_move(dst, src, 32);
@ -2182,7 +1876,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra)
fpp_intrz(dst, src);
break;
case 0x04: /* FSQRT */
fpp_sqrt(dst, src, 0);
fpp_sqrt(dst, src, fpu_prec);
break;
case 0x41: /* FSSQRT */
fpp_sqrt(dst, src, 32);
@ -2233,7 +1927,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra)
fpp_log2(dst, src);
break;
case 0x18: /* FABS */
fpp_abs(dst, src, 0);
fpp_abs(dst, src, fpu_prec);
break;
case 0x58: /* FSABS */
fpp_abs(dst, src, 32);
@ -2245,7 +1939,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra)
fpp_cosh(dst, src);
break;
case 0x1a: /* FNEG */
fpp_neg(dst, src, 0);
fpp_neg(dst, src, fpu_prec);
break;
case 0x5a: /* FSNEG */
fpp_neg(dst, src, 32);
@ -2266,7 +1960,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra)
fpp_getman(dst, src);
break;
case 0x20: /* FDIV */
fpp_div(dst, src, 0);
fpp_div(dst, src, fpu_prec);
break;
case 0x60: /* FSDIV */
fpp_div(dst, src, 32);
@ -2280,7 +1974,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra)
fpsr_set_quotient(q, s);
break;
case 0x22: /* FADD */
fpp_add(dst, src, 0);
fpp_add(dst, src, fpu_prec);
break;
case 0x62: /* FSADD */
fpp_add(dst, src, 32);
@ -2289,7 +1983,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra)
fpp_add(dst, src, 64);
break;
case 0x23: /* FMUL */
fpp_mul(dst, src, 0);
fpp_mul(dst, src, fpu_prec);
break;
case 0x63: /* FSMUL */
fpp_mul(dst, src, 32);
@ -2312,7 +2006,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra)
fpp_sglmul(dst, src);
break;
case 0x28: /* FSUB */
fpp_sub(dst, src, 0);
fpp_sub(dst, src, fpu_prec);
break;
case 0x68: /* FSSUB */
fpp_sub(dst, src, 32);
@ -2390,7 +2084,6 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
return;
}
fpsr_make_status();
fpsr_check_arithmetic_exception(0, &src, opcode, extra, ad);
fp_exception_pending(false); // post/mid instruction
return;
@ -2598,7 +2291,6 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
fpsr_clear_status();
fpu_get_constant(&regs.fp[reg], extra & 0x7f);
fpsr_make_status();
fpsr_check_arithmetic_exception(0, &src, opcode, extra, ad);
return;
}
@ -2620,9 +2312,6 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
dst = regs.fp[reg];
if (fp_is_dyadic(extra))
normalize_or_fault_if_no_denormal_support_dst(opcode, extra, ad, pc, &dst, &src);
// check for 680x0 unimplemented instruction
if (fault_if_unimplemented_680x0 (opcode, extra, ad, pc, &src, reg))
return;
@ -2635,8 +2324,6 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
v = fp_arithmetic(&src, &dst, extra);
fpsr_check_arithmetic_exception(0, &src, opcode, extra, ad);
if (v)
regs.fp[reg] = dst;
@ -2654,35 +2341,8 @@ void fpuop_arithmetic (uae_u32 opcode, uae_u16 extra)
fpuop_arithmetic2 (opcode, extra);
}
void fpu_modechange(void)
{
uae_u32 temp_ext[8][3];
if (currprefs.fpu_softfloat == changed_prefs.fpu_softfloat)
return;
currprefs.fpu_softfloat = changed_prefs.fpu_softfloat;
for (int i = 0; i < 8; i++) {
fpp_from_exten_fmovem(&regs.fp[i], &temp_ext[i][0], &temp_ext[i][1], &temp_ext[i][2]);
}
if (currprefs.fpu_softfloat) {
fp_init_softfloat();
} else {
fp_init_native();
}
for (int i = 0; i < 8; i++) {
fpp_to_exten_fmovem(&regs.fp[i], temp_ext[i][0], temp_ext[i][1], temp_ext[i][2]);
}
}
void fpu_reset (void)
{
if (currprefs.fpu_softfloat) {
fp_init_softfloat();
} else {
fp_init_native();
}
#if defined(CPU_i386) || defined(CPU_x86_64)
init_fpucw_x87();
#endif
@ -2709,7 +2369,7 @@ uae_u8 *restore_fpu (uae_u8 *src)
w1 = restore_u16 () << 16;
w2 = restore_u32 ();
w3 = restore_u32 ();
fpp_to_exten_fmovem(&regs.fp[i], w1, w2, w3);
fpp_to_exten(&regs.fp[i], w1, w2, w3);
}
regs.fpcr = restore_u32 ();
regs.fpsr = restore_u32 ();
@ -2776,7 +2436,7 @@ uae_u8 *save_fpu (int *len, uae_u8 *dstptr)
save_u32 (currprefs.fpu_model);
save_u32 (0x80000000 | 0x20000000);
for (i = 0; i < 8; i++) {
fpp_from_exten_fmovem(&regs.fp[i], &w1, &w2, &w3);
fpp_from_exten(&regs.fp[i], &w1, &w2, &w3);
save_u16 (w1 >> 16);
save_u32 (w2);
save_u32 (w3);

File diff suppressed because it is too large Load diff

View file

@ -1,780 +0,0 @@
/*
* UAE - The Un*x Amiga Emulator
*
* MC68881/68882/68040/68060 FPU emulation
* Softfloat version
*
* Andreas Grabher and Toni Wilen
*
*/
#define __USE_ISOC9X /* We might be able to pick up a NaN */
#define SOFTFLOAT_FAST_INT64
#include <math.h>
#include <float.h>
#include <fenv.h>
#include "sysconfig.h"
#include "sysdeps.h"
#include "options.h"
#include "memory.h"
#include "newcpu.h"
#include "fpp.h"
#include "newcpu.h"
#include "softfloat/softfloat-macros.h"
#include "softfloat/softfloat-specialize.h"
#define FPCR_ROUNDING_MODE 0x00000030
#define FPCR_ROUND_NEAR 0x00000000
#define FPCR_ROUND_ZERO 0x00000010
#define FPCR_ROUND_MINF 0x00000020
#define FPCR_ROUND_PINF 0x00000030
#define FPCR_ROUNDING_PRECISION 0x000000c0
#define FPCR_PRECISION_SINGLE 0x00000040
#define FPCR_PRECISION_DOUBLE 0x00000080
#define FPCR_PRECISION_EXTENDED 0x00000000
static struct float_status fs;
/* Functions for setting host/library modes and getting status */
static void fp_set_mode(uae_u32 mode_control)
{
set_float_detect_tininess(float_tininess_before_rounding, &fs);
switch(mode_control & FPCR_ROUNDING_PRECISION) {
case FPCR_PRECISION_SINGLE: // single
set_floatx80_rounding_precision(32, &fs);
break;
default: // double
case FPCR_PRECISION_DOUBLE: // double
set_floatx80_rounding_precision(64, &fs);
break;
case FPCR_PRECISION_EXTENDED: // extended
set_floatx80_rounding_precision(80, &fs);
break;
}
switch(mode_control & FPCR_ROUNDING_MODE) {
case FPCR_ROUND_NEAR: // to neareset
set_float_rounding_mode(float_round_nearest_even, &fs);
break;
case FPCR_ROUND_ZERO: // to zero
set_float_rounding_mode(float_round_to_zero, &fs);
break;
case FPCR_ROUND_MINF: // to minus
set_float_rounding_mode(float_round_down, &fs);
break;
case FPCR_ROUND_PINF: // to plus
set_float_rounding_mode(float_round_up, &fs);
break;
}
}
static void fp_get_status(uae_u32 *status)
{
if (fs.float_exception_flags & float_flag_signaling)
*status |= FPSR_SNAN;
if (fs.float_exception_flags & float_flag_invalid)
*status |= FPSR_OPERR;
if (fs.float_exception_flags & float_flag_divbyzero)
*status |= FPSR_DZ;
if (fs.float_exception_flags & float_flag_overflow)
*status |= FPSR_OVFL;
if (fs.float_exception_flags & float_flag_underflow)
*status |= FPSR_UNFL;
if (fs.float_exception_flags & float_flag_inexact)
*status |= FPSR_INEX2;
if (fs.float_exception_flags & float_flag_decimal)
*status |= FPSR_INEX1;
}
STATIC_INLINE void fp_clear_status(void)
{
fs.float_exception_flags = 0;
}
static const TCHAR *fp_printx80(floatx80 *fx, int mode)
{
static TCHAR fsout[32];
flag n, u, d;
if (mode < 0) {
_stprintf(fsout, _T("%04X-%08X-%08X"), fx->high, (uae_u32)(fx->low >> 32), (uae_u32)fx->low);
return fsout;
}
n = floatx80_is_negative(*fx);
u = floatx80_is_unnormal(*fx);
d = floatx80_is_denormal(*fx);
if (floatx80_is_infinity(*fx)) {
_stprintf(fsout, _T("%c%s"), n ? '-' : '+', _T("inf"));
} else if (floatx80_is_signaling_nan(*fx)) {
_stprintf(fsout, _T("%c%s"), n ? '-' : '+', _T("snan"));
} else if (floatx80_is_nan(*fx)) {
_stprintf(fsout, _T("%c%s"), n ? '-' : '+', _T("nan"));
} else {
int32_t len = 17;
int8_t save_exception_flags = fs.float_exception_flags;
fs.float_exception_flags = 0;
floatx80 x = floatx80_to_floatdecimal(*fx, &len, &fs);
_stprintf(fsout, _T("%c%01lld.%016llde%c%04d%s%s"), n ? '-' : '+',
x.low / LIT64(10000000000000000), x.low % LIT64(10000000000000000),
(x.high & 0x4000) ? '-' : '+', x.high & 0x3FFF, d ? _T("D") : u ? _T("U") : _T(""),
(fs.float_exception_flags & float_flag_inexact) ? _T("~") : _T(""));
fs.float_exception_flags = save_exception_flags;
}
if (mode == 0 || mode > _tcslen(fsout))
return fsout;
fsout[mode] = 0;
return fsout;
}
static const TCHAR *fp_print(fpdata *fpd, int mode)
{
return fp_printx80(&fpd->fpx, mode);
}
/* Functions for detecting float type */
static bool fp_is_snan(fpdata *fpd)
{
return floatx80_is_signaling_nan(fpd->fpx) != 0;
}
static bool fp_unset_snan(fpdata *fpd)
{
fpd->fpx.low |= LIT64(0x4000000000000000);
return 0;
}
static bool fp_is_nan (fpdata *fpd)
{
return floatx80_is_any_nan(fpd->fpx) != 0;
}
static bool fp_is_infinity (fpdata *fpd)
{
return floatx80_is_infinity(fpd->fpx) != 0;
}
static bool fp_is_zero(fpdata *fpd)
{
return floatx80_is_zero(fpd->fpx) != 0;
}
static bool fp_is_neg(fpdata *fpd)
{
return floatx80_is_negative(fpd->fpx) != 0;
}
static bool fp_is_denormal(fpdata *fpd)
{
return floatx80_is_denormal(fpd->fpx) != 0;
}
static bool fp_is_unnormal(fpdata *fpd)
{
return floatx80_is_unnormal(fpd->fpx) != 0;
}
static void to_single(fpdata *fpd, uae_u32 wrd1)
{
float32 f = wrd1;
fpd->fpx = float32_to_floatx80_allowunnormal(f, &fs);
}
static uae_u32 from_single(fpdata *fpd)
{
float32 f = floatx80_to_float32(fpd->fpx, &fs);
return f;
}
static void to_double(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2)
{
float64 f = ((float64)wrd1 << 32) | wrd2;
fpd->fpx = float64_to_floatx80_allowunnormal(f, &fs);
}
static void from_double(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2)
{
float64 f = floatx80_to_float64(fpd->fpx, &fs);
*wrd1 = f >> 32;
*wrd2 = (uae_u32)f;
}
static void to_exten(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3)
{
fpd->fpx.high = (uae_u16)(wrd1 >> 16);
fpd->fpx.low = ((uae_u64)wrd2 << 32) | wrd3;
}
static void from_exten(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *wrd3)
{
floatx80 f = floatx80_to_floatx80(fpd->fpx, &fs);
*wrd1 = (uae_u32)(f.high << 16);
*wrd2 = f.low >> 32;
*wrd3 = (uae_u32)f.low;
}
static void to_exten_fmovem(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3)
{
fpd->fpx.high = (uae_u16)(wrd1 >> 16);
fpd->fpx.low = ((uae_u64)wrd2 << 32) | wrd3;
}
static void from_exten_fmovem(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *wrd3)
{
*wrd1 = (uae_u32)(fpd->fpx.high << 16);
*wrd2 = fpd->fpx.low >> 32;
*wrd3 = (uae_u32)fpd->fpx.low;
}
static uae_s64 to_int(fpdata *src, int size)
{
switch (size) {
case 0: return floatx80_to_int8(src->fpx, &fs);
case 1: return floatx80_to_int16(src->fpx, &fs);
case 2: return floatx80_to_int32(src->fpx, &fs);
default: return 0;
}
}
static void from_int(fpdata *fpd, uae_s32 src)
{
fpd->fpx = int32_to_floatx80(src);
}
/* Functions for returning exception state data */
static void fp_get_internal_overflow(fpdata *fpd)
{
fpd->fpx = getFloatInternalOverflow();
}
static void fp_get_internal_underflow(fpdata *fpd)
{
fpd->fpx = getFloatInternalUnderflow();
}
static void fp_get_internal_round_all(fpdata *fpd)
{
fpd->fpx = getFloatInternalRoundedAll();
}
static void fp_get_internal_round(fpdata *fpd)
{
fpd->fpx = getFloatInternalRoundedSome();
}
static void fp_get_internal_round_exten(fpdata *fpd)
{
fpd->fpx = getFloatInternalFloatx80();
}
static void fp_get_internal(fpdata *fpd)
{
fpd->fpx = getFloatInternalUnrounded();
}
static uae_u32 fp_get_internal_grs(void)
{
return (uae_u32)getFloatInternalGRS();
}
/* Function for denormalizing */
static void fp_denormalize(fpdata *fpd, int esign)
{
fpd->fpx = floatx80_denormalize(fpd->fpx, esign);
}
/* Functions for rounding */
// round to float with extended precision exponent
static void fp_round32(fpdata *fpd)
{
fpd->fpx = floatx80_round32(fpd->fpx, &fs);
}
// round to double with extended precision exponent
static void fp_round64(fpdata *fpd)
{
fpd->fpx = floatx80_round64(fpd->fpx, &fs);
}
// round to float
static void fp_round_single(fpdata *fpd)
{
fpd->fpx = floatx80_round_to_float32(fpd->fpx, &fs);
}
// round to double
static void fp_round_double(fpdata *fpd)
{
fpd->fpx = floatx80_round_to_float64(fpd->fpx, &fs);
}
/* Arithmetic functions */
static void fp_int(fpdata *a, fpdata *b)
{
a->fpx = floatx80_round_to_int(b->fpx, &fs);
}
static void fp_intrz(fpdata *a, fpdata *b)
{
a->fpx = floatx80_round_to_int_toward_zero(b->fpx, &fs);
}
static void fp_getexp(fpdata *a, fpdata *b)
{
a->fpx = floatx80_getexp(b->fpx, &fs);
}
static void fp_getman(fpdata *a, fpdata *b)
{
a->fpx = floatx80_getman(b->fpx, &fs);
}
static void fp_mod(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s)
{
a->fpx = floatx80_mod(a->fpx, b->fpx, q, s, &fs);
}
static void fp_sgldiv(fpdata *a, fpdata *b)
{
a->fpx = floatx80_sgldiv(a->fpx, b->fpx, &fs);
}
static void fp_sglmul(fpdata *a, fpdata *b)
{
a->fpx = floatx80_sglmul(a->fpx, b->fpx, &fs);
}
static void fp_rem(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s)
{
a->fpx = floatx80_rem(a->fpx, b->fpx, q, s, &fs);
}
static void fp_scale(fpdata *a, fpdata *b)
{
a->fpx = floatx80_scale(a->fpx, b->fpx, &fs);
}
static void fp_cmp(fpdata *a, fpdata *b)
{
a->fpx = floatx80_cmp(a->fpx, b->fpx, &fs);
}
static void fp_tst(fpdata *a, fpdata *b)
{
a->fpx = floatx80_tst(b->fpx, &fs);
}
#define SETPREC \
uint8_t oldprec = fs.floatx80_rounding_precision; \
if (prec > 0) \
set_floatx80_rounding_precision(prec, &fs);
#define RESETPREC \
if (prec > 0) \
set_floatx80_rounding_precision(oldprec, &fs);
/* Functions with fixed precision */
static void fp_move(fpdata *a, fpdata *b, int prec)
{
SETPREC
a->fpx = floatx80_move(b->fpx, &fs);
RESETPREC
}
static void fp_abs(fpdata *a, fpdata *b, int prec)
{
SETPREC
a->fpx = floatx80_abs(b->fpx, &fs);
RESETPREC
}
static void fp_neg(fpdata *a, fpdata *b, int prec)
{
SETPREC
a->fpx = floatx80_neg(b->fpx, &fs);
RESETPREC
}
static void fp_add(fpdata *a, fpdata *b, int prec)
{
SETPREC
a->fpx = floatx80_add(a->fpx, b->fpx, &fs);
RESETPREC
}
static void fp_sub(fpdata *a, fpdata *b, int prec)
{
SETPREC
a->fpx = floatx80_sub(a->fpx, b->fpx, &fs);
RESETPREC
}
static void fp_mul(fpdata *a, fpdata *b, int prec)
{
SETPREC
a->fpx = floatx80_mul(a->fpx, b->fpx, &fs);
RESETPREC
}
static void fp_div(fpdata *a, fpdata *b, int prec)
{
SETPREC
a->fpx = floatx80_div(a->fpx, b->fpx, &fs);
RESETPREC
}
static void fp_sqrt(fpdata *a, fpdata *b, int prec)
{
SETPREC
a->fpx = floatx80_sqrt(b->fpx, &fs);
RESETPREC
}
static void fp_sinh(fpdata *a, fpdata *b)
{
a->fpx = floatx80_sinh(b->fpx, &fs);
}
static void fp_lognp1(fpdata *a, fpdata *b)
{
a->fpx = floatx80_lognp1(b->fpx, &fs);
}
static void fp_etoxm1(fpdata *a, fpdata *b)
{
a->fpx = floatx80_etoxm1(b->fpx, &fs);
}
static void fp_tanh(fpdata *a, fpdata *b)
{
a->fpx = floatx80_tanh(b->fpx, &fs);
}
static void fp_atan(fpdata *a, fpdata *b)
{
a->fpx = floatx80_atan(b->fpx, &fs);
}
static void fp_asin(fpdata *a, fpdata *b)
{
a->fpx = floatx80_asin(b->fpx, &fs);
}
static void fp_atanh(fpdata *a, fpdata *b)
{
a->fpx = floatx80_atanh(b->fpx, &fs);
}
static void fp_sin(fpdata *a, fpdata *b)
{
a->fpx = floatx80_sin(b->fpx, &fs);
}
static void fp_tan(fpdata *a, fpdata *b)
{
a->fpx = floatx80_tan(b->fpx, &fs);
}
static void fp_etox(fpdata *a, fpdata *b)
{
a->fpx = floatx80_etox(b->fpx, &fs);
}
static void fp_twotox(fpdata *a, fpdata *b)
{
a->fpx = floatx80_twotox(b->fpx, &fs);
}
static void fp_tentox(fpdata *a, fpdata *b)
{
a->fpx = floatx80_tentox(b->fpx, &fs);
}
static void fp_logn(fpdata *a, fpdata *b)
{
a->fpx = floatx80_logn(b->fpx, &fs);
}
static void fp_log10(fpdata *a, fpdata *b)
{
a->fpx = floatx80_log10(b->fpx, &fs);
}
static void fp_log2(fpdata *a, fpdata *b)
{
a->fpx = floatx80_log2(b->fpx, &fs);
}
static void fp_cosh(fpdata *a, fpdata *b)
{
a->fpx = floatx80_cosh(b->fpx, &fs);
}
static void fp_acos(fpdata *a, fpdata *b)
{
a->fpx = floatx80_acos(b->fpx, &fs);
}
static void fp_cos(fpdata *a, fpdata *b)
{
a->fpx = floatx80_cos(b->fpx, &fs);
}
/* Functions for converting between float formats */
static const fptype twoto32 = 4294967296.0;
static void to_native(fptype *fp, fpdata *fpd)
{
int expon;
fptype frac;
expon = fpd->fpx.high & 0x7fff;
if (fp_is_zero(fpd)) {
*fp = fp_is_neg(fpd) ? -0.0 : +0.0;
return;
}
if (fp_is_nan(fpd)) {
*fp = sqrt(-1);
return;
}
if (fp_is_infinity(fpd)) {
double zero = 0.0;
*fp = fp_is_neg(fpd) ? log(0.0) : (1.0 / zero);
return;
}
frac = (fptype)fpd->fpx.low / (fptype)(twoto32 * 2147483648.0);
if (fp_is_neg(fpd))
frac = -frac;
*fp = ldexp (frac, expon - 16383);
}
static void from_native(fptype fp, fpdata *fpd)
{
int expon;
fptype frac;
if (signbit(fp))
fpd->fpx.high = 0x8000;
else
fpd->fpx.high = 0x0000;
if (isnan(fp)) {
fpd->fpx.high |= 0x7fff;
fpd->fpx.low = LIT64(0xffffffffffffffff);
return;
}
if (isinf(fp)) {
fpd->fpx.high |= 0x7fff;
fpd->fpx.low = LIT64(0x0000000000000000);
return;
}
if (fp == 0.0) {
fpd->fpx.low = LIT64(0x0000000000000000);
return;
}
if (fp < 0.0)
fp = -fp;
frac = frexp (fp, &expon);
frac += 0.5 / (twoto32 * twoto32);
if (frac >= 1.0) {
frac /= 2.0;
expon++;
}
fpd->fpx.high |= (expon + 16383 - 1) & 0x7fff;
fpd->fpx.low = (uint64_t)(frac * (fptype)(twoto32 * twoto32));
while (!(fpd->fpx.low & LIT64( 0x8000000000000000))) {
if (fpd->fpx.high == 0) {
break;
}
fpd->fpx.low <<= 1;
fpd->fpx.high--;
}
}
static void fp_normalize(fpdata *a)
{
a->fpx = floatx80_normalize(a->fpx);
}
static void fp_to_pack(fpdata *fp, uae_u32 *wrd, int dummy)
{
floatx80 f;
int i;
uae_s32 exp;
uae_s64 mant;
uae_u32 pack_exp, pack_int, pack_se, pack_sm;
uae_u64 pack_frac;
if (((wrd[0] >> 16) & 0x7fff) == 0x7fff) {
// infinity has extended exponent and all 0 packed fraction
// nans are copies bit by bit
fpp_to_exten(fp, wrd[0], wrd[1], wrd[2]);
return;
}
if (!(wrd[0] & 0xf) && !wrd[1] && !wrd[2]) {
// exponent is not cared about, if mantissa is zero
wrd[0] &= 0x80000000;
fpp_to_exten(fp, wrd[0], wrd[1], wrd[2]);
return;
}
pack_exp = (wrd[0] >> 16) & 0xFFF; // packed exponent
pack_int = wrd[0] & 0xF; // packed integer part
pack_frac = ((uae_u64)wrd[1] << 32) | wrd[2]; // packed fraction
pack_se = (wrd[0] >> 30) & 1; // sign of packed exponent
pack_sm = (wrd[0] >> 31) & 1; // sign of packed significand
exp = 0;
for (i = 0; i < 3; i++) {
exp *= 10;
exp += (pack_exp >> (8 - i * 4)) & 0xF;
}
if (pack_se) {
exp = -exp;
}
exp -= 16;
if (exp < 0) {
exp = -exp;
pack_se = 1;
}
mant = pack_int;
for (i = 0; i < 16; i++) {
mant *= 10;
mant += (pack_frac >> (60 - i * 4)) & 0xF;
}
f.high = exp & 0x3FFF;
f.high |= pack_se ? 0x4000 : 0;
f.high |= pack_sm ? 0x8000 : 0;
f.low = mant;
fp->fpx = floatdecimal_to_floatx80(f, &fs);
}
static void fp_from_pack(fpdata *fp, uae_u32 *wrd, int kfactor)
{
floatx80 f = floatx80_to_floatdecimal(fp->fpx, &kfactor, &fs);
uae_u32 pack_exp, pack_exp4, pack_int, pack_se, pack_sm;
uae_u64 pack_frac;
uae_u32 exponent;
uae_u64 significand;
uae_s32 len;
uae_u64 digit;
if ((f.high & 0x7FFF) == 0x7FFF) {
wrd[0] = (uae_u32)(f.high << 16);
wrd[1] = f.low >> 32;
wrd[2] = (uae_u32)f.low;
} else {
exponent = f.high & 0x3FFF;
significand = f.low;
pack_int = 0;
pack_frac = 0;
len = kfactor; // SoftFloat saved len to kfactor variable
while (len > 0) {
len--;
digit = significand % 10;
significand /= 10;
if (len == 0) {
pack_int = digit;
} else {
pack_frac |= digit << (64 - len * 4);
}
}
pack_exp = 0;
pack_exp4 = 0;
len = 4;
while (len > 0) {
len--;
digit = exponent % 10;
exponent /= 10;
if (len == 0) {
pack_exp4 = digit;
} else {
pack_exp |= digit << (12 - len * 4);
}
}
pack_se = f.high & 0x4000;
pack_sm = f.high & 0x8000;
wrd[0] = pack_exp << 16;
wrd[0] |= pack_exp4 << 12;
wrd[0] |= pack_int;
wrd[0] |= pack_se ? 0x40000000 : 0;
wrd[0] |= pack_sm ? 0x80000000 : 0;
wrd[1] = pack_frac >> 32;
wrd[2] = pack_frac & 0xffffffff;
}
}
void fp_init_softfloat(void)
{
float_status fsx = { 0 };
set_floatx80_rounding_precision(80, &fsx);
set_float_rounding_mode(float_round_to_zero, &fsx);
fpp_print = fp_print;
fpp_is_snan = fp_is_snan;
fpp_unset_snan = fp_unset_snan;
fpp_is_nan = fp_is_nan;
fpp_is_infinity = fp_is_infinity;
fpp_is_zero = fp_is_zero;
fpp_is_neg = fp_is_neg;
fpp_is_denormal = fp_is_denormal;
fpp_is_unnormal = fp_is_unnormal;
fpp_get_status = fp_get_status;
fpp_clear_status = fp_clear_status;
fpp_set_mode = fp_set_mode;
fpp_from_native = from_native;
fpp_to_native = to_native;
fpp_to_int = to_int;
fpp_from_int = from_int;
fpp_to_pack = fp_to_pack;
fpp_from_pack = fp_from_pack;
fpp_to_single = to_single;
fpp_from_single = from_single;
fpp_to_double = to_double;
fpp_from_double = from_double;
fpp_to_exten = to_exten;
fpp_from_exten = from_exten;
fpp_to_exten_fmovem = to_exten_fmovem;
fpp_from_exten_fmovem = from_exten_fmovem;
fpp_round_single = fp_round_single;
fpp_round_double = fp_round_double;
fpp_round32 = fp_round32;
fpp_round64 = fp_round64;
fpp_normalize = fp_normalize;
fpp_denormalize = fp_denormalize;
fpp_get_internal_overflow = fp_get_internal_overflow;
fpp_get_internal_underflow = fp_get_internal_underflow;
fpp_get_internal_round_all = fp_get_internal_round_all;
fpp_get_internal_round = fp_get_internal_round;
fpp_get_internal_round_exten = fp_get_internal_round_exten;
fpp_get_internal = fp_get_internal;
fpp_get_internal_grs = fp_get_internal_grs;
fpp_int = fp_int;
fpp_sinh = fp_sinh;
fpp_intrz = fp_intrz;
fpp_sqrt = fp_sqrt;
fpp_lognp1 = fp_lognp1;
fpp_etoxm1 = fp_etoxm1;
fpp_tanh = fp_tanh;
fpp_atan = fp_atan;
fpp_atanh = fp_atanh;
fpp_sin = fp_sin;
fpp_asin = fp_asin;
fpp_tan = fp_tan;
fpp_etox = fp_etox;
fpp_twotox = fp_twotox;
fpp_tentox = fp_tentox;
fpp_logn = fp_logn;
fpp_log10 = fp_log10;
fpp_log2 = fp_log2;
fpp_abs = fp_abs;
fpp_cosh = fp_cosh;
fpp_neg = fp_neg;
fpp_acos = fp_acos;
fpp_cos = fp_cos;
fpp_getexp = fp_getexp;
fpp_getman = fp_getman;
fpp_div = fp_div;
fpp_mod = fp_mod;
fpp_add = fp_add;
fpp_mul = fp_mul;
fpp_rem = fp_rem;
fpp_scale = fp_scale;
fpp_sub = fp_sub;
fpp_sgldiv = fp_sgldiv;
fpp_sglmul = fp_sglmul;
fpp_cmp = fp_cmp;
fpp_tst = fp_tst;
fpp_move = fp_move;
}

View file

@ -4,14 +4,14 @@
#define AKIKO_BASE 0xb80000
#define AKIKO_BASE_END 0xb80100 /* ?? */
extern void akiko_reset(void);
extern int akiko_init(void);
extern void akiko_free(void);
extern void akiko_reset (void);
extern int akiko_init (void);
extern void akiko_free (void);
extern void AKIKO_hsync_handler(void);
extern void akiko_mute(int);
extern void AKIKO_hsync_handler (void);
extern void akiko_mute (int);
extern bool akiko_ntscmode(void);
extern void rethink_akiko(void);
extern void rethink_akiko (void);
#endif /* UAE_AKIKO_H */

View file

@ -94,44 +94,38 @@ struct color_entry {
};
/* convert 24 bit AGA Amiga RGB to native color */
// Disabled because it only works for 16-bit modes (wrong colors on AGA modes if running 32-bit)
//#ifdef ARMV6T2
//STATIC_INLINE uae_u32 CONVERT_RGB(uae_u32 c)
//{
// uae_u32 ret;
// __asm__ (
// "ubfx r1, %[c], #19, #5 \n\t"
// "ubfx r2, %[c], #10, #6 \n\t"
// "ubfx %[v], %[c], #3, #5 \n\t"
// "orr %[v], %[v], r1, lsl #11 \n\t"
// "orr %[v], %[v], r2, lsl #5 \n\t"
// "pkhbt %[v], %[v], %[v], lsl #16 \n\t"
// : [v] "=r" (ret) : [c] "r" (c) : "r1", "r2" );
// return ret;
//}
//STATIC_INLINE uae_u16 CONVERT_RGB_16(uae_u32 c)
//{
// uae_u16 ret;
// __asm__ (
// "ubfx r1, %[c], #19, #5 \n\t"
// "ubfx r2, %[c], #10, #6 \n\t"
// "ubfx %[v], %[c], #3, #5 \n\t"
// "orr %[v], %[v], r1, lsl #11 \n\t"
// "orr %[v], %[v], r2, lsl #5 \n\t"
// : [v] "=r" (ret) : [c] "r" (c) : "r1", "r2" );
// return ret;
//}
//#else
#ifdef WORDS_BIGENDIAN
# define CONVERT_RGB(c) \
( xbluecolors[((uae_u8*)(&c))[3]] | xgreencolors[((uae_u8*)(&c))[2]] | xredcolors[((uae_u8*)(&c))[1]] )
#ifdef ARMV6T2
STATIC_INLINE uae_u32 CONVERT_RGB(uae_u32 c)
{
uae_u32 ret;
__asm__ (
"ubfx r1, %[c], #19, #5 \n\t"
"ubfx r2, %[c], #10, #6 \n\t"
"ubfx %[v], %[c], #3, #5 \n\t"
"orr %[v], %[v], r1, lsl #11 \n\t"
"orr %[v], %[v], r2, lsl #5 \n\t"
"pkhbt %[v], %[v], %[v], lsl #16 \n\t"
: [v] "=r" (ret) : [c] "r" (c) : "r1", "r2" );
return ret;
}
STATIC_INLINE uae_u16 CONVERT_RGB_16(uae_u32 c)
{
uae_u16 ret;
__asm__ (
"ubfx r1, %[c], #19, #5 \n\t"
"ubfx r2, %[c], #10, #6 \n\t"
"ubfx %[v], %[c], #3, #5 \n\t"
"orr %[v], %[v], r1, lsl #11 \n\t"
"orr %[v], %[v], r2, lsl #5 \n\t"
: [v] "=r" (ret) : [c] "r" (c) : "r1", "r2" );
return ret;
}
#else
# define CONVERT_RGB(c) \
( xbluecolors[((uae_u8*)(&c))[0]] | xgreencolors[((uae_u8*)(&c))[1]] | xredcolors[((uae_u8*)(&c))[2]] )
#define CONVERT_RGB(c) \
( xbluecolors[((uae_u8*)(&c))[0]] | xgreencolors[((uae_u8*)(&c))[1]] | xredcolors[((uae_u8*)(&c))[2]] )
#define CONVERT_RGB_16(c) \
( xbluecolors[((uae_u8*)(&c))[0]] | xgreencolors[((uae_u8*)(&c))[1]] | xredcolors[((uae_u8*)(&c))[2]] )
#endif
//#endif
STATIC_INLINE xcolnr getxcolor (int c)
{
@ -213,8 +207,9 @@ extern uae_u16 spixels[MAX_SPR_PIXELS * 2];
/* Way too much... */
#define MAX_REG_CHANGE ((MAXVPOS + 1) * MAXHPOS)
#define COLOR_TABLE_SIZE (MAXVPOS + 2) * 2
extern struct color_entry curr_color_tables[(MAXVPOS + 2) * 2];
extern struct color_entry curr_color_tables[COLOR_TABLE_SIZE];
extern struct sprite_entry *curr_sprite_entries;
extern struct color_change *curr_color_changes;
@ -229,19 +224,14 @@ struct decision {
int diwfirstword, diwlastword;
int ctable;
uae_u16 bplcon0, bplcon2;
#ifdef AGA
uae_u16 bplcon3, bplcon4;
#endif
uae_u8 nr_planes;
uae_u8 bplres;
bool ehb_seen;
bool ham_seen;
bool ham_at_start;
#ifdef AGA
uae_u16 bplcon0, bplcon2;
uae_u16 bplcon3, bplcon4;
uae_u8 nr_planes;
uae_u8 bplres;
bool ham_seen;
bool ham_at_start;
bool bordersprite_seen;
bool xor_seen;
#endif
};
/* Anything related to changes in hw registers during the DDF for one

View file

@ -32,21 +32,36 @@
/* Native integer code conditions */
enum {
NATIVE_CC_EQ = 0,
NATIVE_CC_NE = 1,
NATIVE_CC_CS = 2,
NATIVE_CC_CC = 3,
NATIVE_CC_MI = 4,
NATIVE_CC_PL = 5,
NATIVE_CC_VS = 6,
NATIVE_CC_VC = 7,
NATIVE_CC_HI = 8,
NATIVE_CC_LS = 9,
NATIVE_CC_GE = 10,
NATIVE_CC_LT = 11,
NATIVE_CC_GT = 12,
NATIVE_CC_LE = 13,
NATIVE_CC_AL = 14
NATIVE_CC_EQ = 0,
NATIVE_CC_NE = 1,
NATIVE_CC_CS = 2,
NATIVE_CC_CC = 3,
NATIVE_CC_MI = 4,
NATIVE_CC_PL = 5,
NATIVE_CC_VS = 6,
NATIVE_CC_VC = 7,
NATIVE_CC_HI = 8,
NATIVE_CC_LS = 9,
NATIVE_CC_GE = 10,
NATIVE_CC_LT = 11,
NATIVE_CC_GT = 12,
NATIVE_CC_LE = 13,
NATIVE_CC_AL = 14,
// For FBcc, we need some pseudo condition codes
NATIVE_CC_F_OGT = 16 + 2,
NATIVE_CC_F_OGE = 16 + 3,
NATIVE_CC_F_OLT = 16 + 4,
NATIVE_CC_F_OLE = 16 + 5,
NATIVE_CC_F_OGL = 16 + 6,
NATIVE_CC_F_OR = 16 + 7,
NATIVE_CC_F_UN = 16 + 8,
NATIVE_CC_F_UEQ = 16 + 9,
NATIVE_CC_F_UGT = 16 + 10,
NATIVE_CC_F_UGE = 16 + 11,
NATIVE_CC_F_ULT = 16 + 12,
NATIVE_CC_F_ULE = 16 + 13
};
#endif /* NATIVE_FLAGS_ARM_H */

View file

@ -7,6 +7,7 @@
/* E = MAX & F # 0 -> NotANumber */
/* E = biased by 127 (single) ,1023 (double) ,16383 (extended) */
#pragma once
#define FPSR_BSUN 0x00008000
#define FPSR_SNAN 0x00004000
#define FPSR_OPERR 0x00002000
@ -16,129 +17,8 @@
#define FPSR_INEX2 0x00000200
#define FPSR_INEX1 0x00000100
extern void fp_init_native(void);
extern void fp_init_softfloat(void);
extern void fpsr_set_exception(uae_u32 exception);
extern void fpu_modechange(void);
#if defined(CPU_i386) || defined(CPU_x86_64)
extern void init_fpucw_x87(void);
#endif
typedef void (*FPP_ABQS)(fpdata*, fpdata*, uae_u64*, uae_u8*);
typedef void (*FPP_AB)(fpdata*, fpdata*);
typedef void (*FPP_ABP)(fpdata*, fpdata*, int);
typedef void (*FPP_A)(fpdata*);
typedef bool (*FPP_IS)(fpdata*);
typedef void (*FPP_SET_MODE)(uae_u32);
typedef void (*FPP_GET_STATUS)(uae_u32*);
typedef void (*FPP_CLEAR_STATUS)(void);
typedef void (*FPP_FROM_NATIVE)(fptype, fpdata*);
typedef void (*FPP_TO_NATIVE)(fptype*, fpdata*);
typedef void (*FPP_FROM_INT)(fpdata*,uae_s32);
typedef uae_s64 (*FPP_TO_INT)(fpdata*, int);
typedef void (*FPP_TO_SINGLE)(fpdata*, uae_u32);
typedef uae_u32 (*FPP_FROM_SINGLE)(fpdata*);
typedef void (*FPP_TO_DOUBLE)(fpdata*, uae_u32, uae_u32);
typedef void (*FPP_FROM_DOUBLE)(fpdata*, uae_u32*, uae_u32*);
typedef void (*FPP_TO_EXTEN)(fpdata*, uae_u32, uae_u32, uae_u32);
typedef void (*FPP_FROM_EXTEN)(fpdata*, uae_u32*, uae_u32*, uae_u32*);
typedef void (*FPP_PACK)(fpdata*, uae_u32*, int);
typedef const TCHAR* (*FPP_PRINT)(fpdata*,int);
typedef uae_u32 (*FPP_GET32)(void);
typedef void (*FPP_DENORMALIZE)(fpdata*,int);
extern FPP_PRINT fpp_print;
extern FPP_IS fpp_is_snan;
extern FPP_IS fpp_unset_snan;
extern FPP_IS fpp_is_nan;
extern FPP_IS fpp_is_infinity;
extern FPP_IS fpp_is_zero;
extern FPP_IS fpp_is_neg;
extern FPP_IS fpp_is_denormal;
extern FPP_IS fpp_is_unnormal;
extern FPP_GET_STATUS fpp_get_status;
extern FPP_CLEAR_STATUS fpp_clear_status;
extern FPP_SET_MODE fpp_set_mode;
extern FPP_FROM_NATIVE fpp_from_native;
extern FPP_TO_NATIVE fpp_to_native;
extern FPP_TO_INT fpp_to_int;
extern FPP_FROM_INT fpp_from_int;
extern FPP_PACK fpp_to_pack;
extern FPP_PACK fpp_from_pack;
extern FPP_TO_SINGLE fpp_to_single;
extern FPP_FROM_SINGLE fpp_from_single;
extern FPP_TO_DOUBLE fpp_to_double;
extern FPP_FROM_DOUBLE fpp_from_double;
extern FPP_TO_EXTEN fpp_to_exten;
extern FPP_FROM_EXTEN fpp_from_exten;
extern FPP_TO_EXTEN fpp_to_exten_fmovem;
extern FPP_FROM_EXTEN fpp_from_exten_fmovem;
extern FPP_A fpp_round_single;
extern FPP_A fpp_round_double;
extern FPP_A fpp_round32;
extern FPP_A fpp_round64;
extern FPP_A fpp_normalize;
extern FPP_DENORMALIZE fpp_denormalize;
extern FPP_A fpp_get_internal_overflow;
extern FPP_A fpp_get_internal_underflow;
extern FPP_A fpp_get_internal_round_all;
extern FPP_A fpp_get_internal_round;
extern FPP_A fpp_get_internal_round_exten;
extern FPP_A fpp_get_internal;
extern FPP_GET32 fpp_get_internal_grs;
extern FPP_AB fpp_int;
extern FPP_AB fpp_sinh;
extern FPP_AB fpp_intrz;
extern FPP_ABP fpp_sqrt;
extern FPP_AB fpp_lognp1;
extern FPP_AB fpp_etoxm1;
extern FPP_AB fpp_tanh;
extern FPP_AB fpp_atan;
extern FPP_AB fpp_atanh;
extern FPP_AB fpp_sin;
extern FPP_AB fpp_asin;
extern FPP_AB fpp_tan;
extern FPP_AB fpp_etox;
extern FPP_AB fpp_twotox;
extern FPP_AB fpp_tentox;
extern FPP_AB fpp_logn;
extern FPP_AB fpp_log10;
extern FPP_AB fpp_log2;
extern FPP_ABP fpp_abs;
extern FPP_AB fpp_cosh;
extern FPP_ABP fpp_neg;
extern FPP_AB fpp_acos;
extern FPP_AB fpp_cos;
extern FPP_AB fpp_getexp;
extern FPP_AB fpp_getman;
extern FPP_ABP fpp_div;
extern FPP_ABQS fpp_mod;
extern FPP_ABP fpp_add;
extern FPP_ABP fpp_mul;
extern FPP_ABQS fpp_rem;
extern FPP_AB fpp_scale;
extern FPP_ABP fpp_sub;
extern FPP_AB fpp_sgldiv;
extern FPP_AB fpp_sglmul;
extern FPP_AB fpp_cmp;
extern FPP_AB fpp_tst;
extern FPP_ABP fpp_move;
extern void fpsr_set_exception(uae_u32 exception);

View file

@ -18,7 +18,7 @@ void uae_PutMsg(uaecptr port, uaecptr msg);
void uae_Signal(uaecptr task, uae_u32 mask);
void uae_NotificationHack(uaecptr, uaecptr);
#endif
int native2amiga_isfree (void);
int native2amiga_isfree(void);
void uae_nativesem_wait(void);
void uae_nativesem_post(void);

View file

@ -12,7 +12,6 @@
#include "uae/types.h"
#include "readcpu.h"
#include "machdep/m68k.h"
#include <softfloat/softfloat.h>
extern const int areg_byteinc[];
extern const int imm8_table[];
@ -68,7 +67,6 @@ typedef double fptype;
typedef struct
{
floatx80 fpx;
fptype fp;
} fpdata;
@ -101,6 +99,9 @@ struct regstruct
#ifdef FPUEMU
fpdata fp[8];
#ifdef JIT
fpdata fp_result;
#endif
uae_u32 fpcr,fpsr, fpiar;
uae_u32 fpu_state;
uae_u32 fpu_exp_state;
@ -341,9 +342,7 @@ extern void fpuop_trapcc(uae_u32, uaecptr, uae_u16);
extern void fpuop_bcc(uae_u32, uaecptr, uae_u32);
extern void fpuop_save(uae_u32);
extern void fpuop_restore(uae_u32);
extern uae_u32 fpp_get_fpsr (void);
extern void fpu_reset (void);
extern bool fpu_get_constant(fpdata *fp, int cr);
extern int fpp_cond(int condition);
extern void exception3_read(uae_u32 opcode, uaecptr addr);

View file

@ -310,9 +310,9 @@ struct uae_prefs {
int sound_volume_paula;
int sound_volume_cd;
bool compfpu;
int cachesize;
bool fpu_strict;
bool fpu_softfloat;
int gfx_framerate;
struct wh gfx_size;

View file

@ -408,17 +408,18 @@ extern void gui_message(const TCHAR *, ...);
*/
#ifdef ARMV6_ASSEMBLY
STATIC_INLINE uae_u32 do_byteswap_32(uae_u32 v) {
__asm__(
"rev %0, %0"
: "=r" (v) : "0" (v)); return v;
__asm__ (
"rev %0, %0"
: "=r" (v) : "0" (v) ); return v;
}
STATIC_INLINE uae_u32 do_byteswap_16(uae_u32 v) {
__asm__(
"revsh %0, %0\n\t"
"uxth %0, %0"
: "=r" (v) : "0" (v)); return v;
__asm__ (
"revsh %0, %0\n\t"
"uxth %0, %0"
: "=r" (v) : "0" (v) ); return v;
}
#define bswap_16(x) do_byteswap_16(x)
#define bswap_32(x) do_byteswap_32(x)

View file

@ -333,6 +333,24 @@ LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, RR4 s))
}
LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, RR4 s))
LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
{
if(s >= (uae_u32) &regs && s < ((uae_u32) &regs) + sizeof(struct regstruct)) {
uae_s32 idx = s - (uae_u32) & regs;
LDR_rRI(d, R_REGSTRUCT, idx);
} else {
#ifdef ARMV6T2
MOVW_ri16(REG_WORK1, s);
MOVT_ri16(REG_WORK1, s >> 16);
#else
uae_s32 offs = data_long_offs(s);
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
#endif
LDR_rR(d, REG_WORK1);
}
}
LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, RR2 s))
{
PKHBT_rrr(d, s, d);
@ -465,11 +483,6 @@ STATIC_INLINE void raw_emit_nop_filler(int nbytes)
while(nbytes--) { NOP(); }
}
STATIC_INLINE void raw_emit_nop(void)
{
NOP();
}
//
// Arm instructions
//
@ -611,7 +624,7 @@ LOWFUNC(NONE,NONE,3,compemu_raw_lea_l_brr,(W4 d, RR4 s, IMM offset))
}
LENDFUNC(NONE,NONE,3,compemu_raw_lea_l_brr,(W4 d, RR4 s, IMM offset))
LOWFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(IMM d, RR1 s))
LOWFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(MEMW d, RR1 s))
{
if(d >= (uae_u32) &regs && d < ((uae_u32) &regs) + sizeof(struct regstruct)) {
uae_s32 idx = d - (uae_u32) & regs;
@ -627,7 +640,7 @@ LOWFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(IMM d, RR1 s))
STRB_rR(s, REG_WORK1);
}
}
LENDFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(IMM d, RR1 s))
LENDFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(MEMW d, RR1 s))
LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mi,(MEMW d, IMM s))
{
@ -664,7 +677,7 @@ LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mi,(MEMW d, IMM s))
}
LENDFUNC(NONE,WRITE,2,compemu_raw_mov_l_mi,(MEMW d, IMM s))
LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(IMM d, RR4 s))
LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(MEMW d, RR4 s))
{
if(d >= (uae_u32) &regs && d < ((uae_u32) &regs) + sizeof(struct regstruct)) {
uae_s32 idx = d - (uae_u32) & regs;
@ -680,7 +693,7 @@ LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(IMM d, RR4 s))
STR_rR(s, REG_WORK1);
}
}
LENDFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(IMM d, RR4 s))
LENDFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(MEMW d, RR4 s))
LOWFUNC(NONE,NONE,2,compemu_raw_mov_l_ri,(W4 d, IMM s))
{
@ -831,26 +844,101 @@ STATIC_INLINE void compemu_raw_call_r(RR4 r)
STATIC_INLINE void compemu_raw_jcc_l_oponly(int cc)
{
switch (cc) {
case 9: // LS
BEQ_i(0); // beq <dojmp>
BCC_i(1); // bcc <jp>
case NATIVE_CC_HI: // HI
BEQ_i(2); // beq no jump
BCS_i(1); // bcs no jump
// jump
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
// no jump
break;
//<dojmp>:
LDR_rRI(RPC_INDEX, RPC_INDEX, -4); // ldr pc, [pc] ; <value>
break;
case NATIVE_CC_LS: // LS
BEQ_i(0); // beq jump
BCC_i(1); // bcc no jump
// jump
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
// no jump
break;
case 8: // HI
BEQ_i(2); // beq <jp>
BCS_i(1); // bcs <jp>
case NATIVE_CC_F_OGT: // Jump if valid and greater than
BVS_i(2); // do not jump if NaN
BLE_i(1); // do not jump if less or equal
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
//<dojmp>:
LDR_rRI(RPC_INDEX, RPC_INDEX, -4); // ldr pc, [pc] ; <value>
break;
case NATIVE_CC_F_OGE: // Jump if valid and greater or equal
BVS_i(2); // do not jump if NaN
BCC_i(1); // do not jump if carry cleared
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
case NATIVE_CC_F_OLT: // Jump if vaild and less than
BVS_i(2); // do not jump if NaN
BCS_i(1); // do not jump if carry set
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
case NATIVE_CC_F_OLE: // Jump if valid and less or equal
BVS_i(2); // do not jump if NaN
BGT_i(1); // do not jump if greater than
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
case NATIVE_CC_F_OGL: // Jump if valid and greator or less
BVS_i(2); // do not jump if NaN
BEQ_i(1); // do not jump if equal
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
default:
CC_B_i(cc^1, 1);
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
case NATIVE_CC_F_OR: // Jump if valid
BVS_i(1); // do not jump if NaN
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
case NATIVE_CC_F_UN: // Jump if NAN
BVC_i(1); // do not jump if valid
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
case NATIVE_CC_F_UEQ: // Jump if NAN or equal
BVS_i(0); // jump if NaN
BNE_i(1); // do not jump if greater or less
// jump
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
case NATIVE_CC_F_UGT: // Jump if NAN or greater than
BVS_i(0); // jump if NaN
BLS_i(1); // do not jump if lower or same
// jump
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
case NATIVE_CC_F_UGE: // Jump if NAN or greater or equal
BVS_i(0); // jump if NaN
BMI_i(1); // do not jump if lower
// jump
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
case NATIVE_CC_F_ULT: // Jump if NAN or less than
BVS_i(0); // jump if NaN
BGE_i(1); // do not jump if greater or equal
// jump
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
case NATIVE_CC_F_ULE: // Jump if NAN or less or equal
BVS_i(0); // jump if NaN
BGT_i(1); // do not jump if greater
// jump
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
default:
CC_B_i(cc^1, 1);
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
}
// emit of target will be done by caller
}
@ -889,11 +977,6 @@ STATIC_INLINE void compemu_raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
emit_long(base);
}
STATIC_INLINE void compemu_raw_jmp_r(RR4 r)
{
BX_r(r);
}
STATIC_INLINE void compemu_raw_jnz(uae_u32 t)
{
#ifdef ARMV6T2
@ -1009,3 +1092,317 @@ LOWFUNC(NONE,NONE,2,compemu_raw_endblock_pc_isconst,(IMM cycles, IMM v))
// <target emitted by caller>
}
LENDFUNC(NONE,NONE,2,compemu_raw_endblock_pc_isconst,(IMM cycles, IMM v))
/*************************************************************************
* FPU stuff *
*************************************************************************/
LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
{
VMOV64_rr(d, s);
}
LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
LOWFUNC(NONE,WRITE,2,compemu_raw_fmov_mr_drop,(MEMW mem, FR s))
{
if(mem >= (uae_u32) &regs && mem < (uae_u32) &regs + 1020 && ((mem - (uae_u32) &regs) & 0x3) == 0) {
VSTR64(s, R_REGSTRUCT, (mem - (uae_u32) &regs));
} else {
MOVW_ri16(REG_WORK1, mem);
MOVT_ri16(REG_WORK1, mem >> 16);
VSTR64(s, REG_WORK1, 0);
}
}
LENDFUNC(NONE,WRITE,2,compemu_raw_fmov_mr_drop,(MEMW mem, FR s))
LOWFUNC(NONE,READ,2,compemu_raw_fmov_rm,(FW d, MEMR mem))
{
if(mem >= (uae_u32) &regs && mem < (uae_u32) &regs + 1020 && ((mem - (uae_u32) &regs) & 0x3) == 0) {
VLDR64(d, R_REGSTRUCT, (mem - (uae_u32) &regs));
} else {
MOVW_ri16(REG_WORK1, mem);
MOVT_ri16(REG_WORK1, mem >> 16);
VLDR64(d, REG_WORK1, 0);
}
}
LENDFUNC(NONE,READ,2,compemu_raw_fmov_rm,(FW d, MEMW mem))
LOWFUNC(NONE,NONE,2,raw_fmov_l_rr,(FW d, RR4 s))
{
VMOVi_from_ARM(SCRATCH_F64_1, s);
VCVT_64_from_i(d, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,2,raw_fmov_l_rr,(FW d, RR4 s))
LOWFUNC(NONE,NONE,2,raw_fmov_s_rr,(FW d, RR4 s))
{
VMOV32_from_ARM(SCRATCH_F32_1, s);
VCVT_32_to_64(d, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,2,raw_fmov_s_rr,(FW d, RR4 s))
LOWFUNC(NONE,NONE,2,raw_fmov_w_rr,(FW d, RR2 s))
{
SIGN_EXTEND_16_REG_2_REG(REG_WORK1, s);
VMOVi_from_ARM(SCRATCH_F64_1, REG_WORK1);
VCVT_64_from_i(d, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,2,raw_fmov_w_rr,(FW d, RR2 s))
LOWFUNC(NONE,NONE,2,raw_fmov_b_rr,(FW d, RR1 s))
{
SIGN_EXTEND_8_REG_2_REG(REG_WORK1, s);
VMOVi_from_ARM(SCRATCH_F64_1, REG_WORK1);
VCVT_64_from_i(d, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,2,raw_fmov_b_rr,(FW d, RR1 s))
LOWFUNC(NONE,NONE,2,raw_fmov_d_rrr,(FW d, RR4 s1, RR4 s2))
{
VMOV64_from_ARM(d, s1, s2);
}
LENDFUNC(NONE,NONE,2,raw_fmov_d_rrr,(FW d, RR4 s1, RR4 s2))
LOWFUNC(NONE,NONE,2,raw_fmov_to_l_rr,(W4 d, FR s))
{
VCVTR_64_to_i(SCRATCH_F32_1, s);
VMOVi_to_ARM(d, SCRATCH_F64_1);
}
LENDFUNC(NONE,NONE,2,raw_fmov_to_l_rr,(W4 d, FR s))
LOWFUNC(NONE,NONE,2,raw_fmov_to_s_rr,(W4 d, FR s))
{
VCVT_64_to_32(SCRATCH_F32_1, s);
VMOV32_to_ARM(d, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,2,raw_fmov_to_s_rr,(W4 d, FR s))
LOWFUNC(NONE,NONE,2,raw_fmov_to_w_rr,(W4 d, FR s))
{
VCVTR_64_to_i(SCRATCH_F32_1, s);
VMOVi_to_ARM(REG_WORK1, SCRATCH_F64_1);
SSAT_rir(REG_WORK1, 15, REG_WORK1);
BFI_rrii(d, REG_WORK1, 0, 15);
}
LENDFUNC(NONE,NONE,2,raw_fmov_to_w_rr,(W4 d, FR s))
LOWFUNC(NONE,NONE,2,raw_fmov_to_b_rr,(W4 d, FR s))
{
VCVTR_64_to_i(SCRATCH_F32_1, s);
VMOVi_to_ARM(REG_WORK1, SCRATCH_F64_1);
SSAT_rir(REG_WORK1, 7, REG_WORK1);
BFI_rrii(d, REG_WORK1, 0, 7);
}
LENDFUNC(NONE,NONE,2,raw_fmov_to_b_rr,(W4 d, FR s))
LOWFUNC(NONE,NONE,1,raw_fmov_d_ri_0,(FW r))
{
VMOV64_i(r, 0x7, 0x0); // load imm #1 into reg
VSUB64(r, r, r);
}
LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_0,(FW r))
LOWFUNC(NONE,NONE,1,raw_fmov_d_ri_1,(FW r))
{
VMOV64_i(r, 0x7, 0x0); // load imm #1 into reg
}
LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_1,(FW r))
LOWFUNC(NONE,NONE,1,raw_fmov_d_ri_10,(FW r))
{
VMOV64_i(r, 0x2, 0x4); // load imm #10 into reg
}
LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_10,(FW r))
LOWFUNC(NONE,NONE,1,raw_fmov_d_ri_100,(FW r))
{
VMOV64_i(r, 0x2, 0x4); // load imm #10 into reg
VMUL64(r, r, r);
}
LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_10,(FW r))
LOWFUNC(NONE,READ,2,raw_fmov_d_rm,(FW r, MEMR m))
{
MOVW_ri16(REG_WORK1, m);
MOVT_ri16(REG_WORK1, m >> 16);
VLDR64(r, REG_WORK1, 0);
}
LENDFUNC(NONE,READ,2,raw_fmov_d_rm,(FW r, MEMR m))
LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
{
MOVW_ri16(REG_WORK1, m);
MOVT_ri16(REG_WORK1, m >> 16);
VLDR32(SCRATCH_F32_1, REG_WORK1, 0);
VCVT_32_to_64(r, SCRATCH_F32_1);
}
LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
LOWFUNC(NONE,NONE,3,raw_fmov_to_d_rrr,(W4 d1, W4 d2, FR s))
{
VMOV64_to_ARM(d1, d2, s);
}
LENDFUNC(NONE,NONE,3,raw_fmov_to_d_rrr,(W4 d1, W4 d2, FR s))
LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
{
VSQRT64(d, s);
}
LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
{
VABS64(d, s);
}
LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
{
VNEG64(d, s);
}
LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
{
VDIV64(d, d, s);
}
LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
{
VADD64(d, d, s);
}
LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
{
VMUL64(d, d, s);
}
LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
{
VSUB64(d, d, s);
}
LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
{
VCVTR_64_to_i(SCRATCH_F32_1, s);
VCVT_64_from_i(d, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
LOWFUNC(NONE,NONE,2,raw_frndintz_rr,(FW d, FR s))
{
VCVT_64_to_i(SCRATCH_F32_1, s);
VCVT_64_from_i(d, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,2,raw_frndintz_rr,(FW d, FR s))
LOWFUNC(NONE,NONE,2,raw_fmod_rr,(FRW d, FR s))
{
VDIV64(SCRATCH_F64_2, d, s);
VCVT_64_to_i(SCRATCH_F32_1, SCRATCH_F64_2);
VCVT_64_from_i(SCRATCH_F64_2, SCRATCH_F32_1);
VMUL64(SCRATCH_F64_1, SCRATCH_F64_2, s);
VSUB64(d, d, SCRATCH_F64_1);
}
LENDFUNC(NONE,NONE,2,raw_fmod_rr,(FRW d, FR s))
LOWFUNC(NONE,NONE,2,raw_fsgldiv_rr,(FRW d, FR s))
{
VCVT_64_to_32(SCRATCH_F32_1, d);
VCVT_64_to_32(SCRATCH_F32_2, s);
VDIV32(SCRATCH_F32_1, SCRATCH_F32_1, SCRATCH_F32_2);
VCVT_32_to_64(d, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,2,raw_fsgldiv_rr,(FRW d, FR s))
LOWFUNC(NONE,NONE,1,raw_fcuts_r,(FRW r))
{
VCVT_64_to_32(SCRATCH_F32_1, r);
VCVT_32_to_64(r, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,1,raw_fcuts_r,(FRW r))
LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
{
VMRS(REG_WORK1);
BIC_rri(REG_WORK2, REG_WORK1, 0x00c00000);
VMSR(REG_WORK2);
VDIV64(SCRATCH_F64_2, d, s);
VCVTR_64_to_i(SCRATCH_F32_1, SCRATCH_F64_2);
VCVT_64_from_i(SCRATCH_F64_2, SCRATCH_F32_1);
VMUL64(SCRATCH_F64_1, SCRATCH_F64_2, s);
VSUB64(d, d, SCRATCH_F64_1);
VMRS(REG_WORK2);
UBFX_rrii(REG_WORK1, REG_WORK1, 22, 2);
BFI_rrii(REG_WORK2, REG_WORK1, 22, 2);
VMSR(REG_WORK2);
}
LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
LOWFUNC(NONE,NONE,2,raw_fsglmul_rr,(FRW d, FR s))
{
VCVT_64_to_32(SCRATCH_F32_1, d);
VCVT_64_to_32(SCRATCH_F32_2, s);
VMUL32(SCRATCH_F32_1, SCRATCH_F32_1, SCRATCH_F32_2);
VCVT_32_to_64(d, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,2,raw_fsglmul_rr,(FRW d, FR s))
LOWFUNC(NONE,NONE,2,raw_fmovs_rr,(FW d, FR s))
{
VCVT_64_to_32(SCRATCH_F32_1, s);
VCVT_32_to_64(d, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,2,raw_fmovs_rr,(FW d, FR s))
LOWFUNC(NONE,NONE,3,raw_ffunc_rr,(double (*func)(double), FW d, FR s))
{
VMOV64_rr(0, s);
MOVW_ri16(REG_WORK1, (uae_u32)func);
MOVT_ri16(REG_WORK1, ((uae_u32)func) >> 16);
PUSH(RLR_INDEX);
BLX_r(REG_WORK1);
POP(RLR_INDEX);
VMOV64_rr(d, 0);
}
LENDFUNC(NONE,NONE,3,raw_ffunc_rr,(double (*func)(double), FW d, FR s))
LOWFUNC(NONE,NONE,3,raw_fpowx_rr,(uae_u32 x, FW d, FR s))
{
double (*func)(double,double) = pow;
if(x == 2) {
VMOV64_i(0, 0x0, 0x0); // load imm #2 into first reg
} else {
VMOV64_i(0, 0x2, 0x4); // load imm #10 into first reg
}
VMOV64_rr(1, s);
MOVW_ri16(REG_WORK1, (uae_u32)func);
MOVT_ri16(REG_WORK1, ((uae_u32)func) >> 16);
PUSH(RLR_INDEX);
BLX_r(REG_WORK1);
POP(RLR_INDEX);
VMOV64_rr(d, 0);
}
LENDFUNC(NONE,NONE,3,raw_fpowx_rr,(uae_u32 x, FW d, FR s))
STATIC_INLINE void raw_fflags_into_flags(int r)
{
VCMP64_0(r);
VMRS(15); // special case: move flags from FPSCR to APSR_nzcv
}

View file

@ -1319,6 +1319,7 @@ enum {
// ARMv6T2
#ifdef ARMV6T2
#define CC_BFI_rrii(cc,Rd,Rn,lsb,msb) _W(((cc) << 28) | (0x3e << 21) | ((msb) << 16) | (Rd << 12) | ((lsb) << 7) | (0x1 << 4) | (Rn))
#define BFI_rrii(Rd,Rn,lsb,msb) CC_BFI_rrii(NATIVE_CC_AL,Rd,Rn,lsb,msb)
@ -1333,10 +1334,138 @@ enum {
#define CC_MOVT_ri16(cc,Rd,i) _W(((cc) << 28) | (0x34 << 20) | (((i >> 12) & 0xf) << 16) | (Rd << 12) | (i & 0x0fff))
#define MOVT_ri16(Rd,i) CC_MOVT_ri16(NATIVE_CC_AL,Rd,i)
#define CC_SSAT_rir(cc,Rd,i,Rn) _W(((cc) << 28) | (0x6a << 20) | (i << 16) | (Rd << 12) | (0x1 << 4) | (Rn))
#define SSAT_rir(Rd,i,Rn) CC_SSAT_rir(NATIVE_CC_AL,Rd,i,Rn)
#endif
// Floatingpoint
#define FADR_ADD(offs) ((1 << 23) | (offs) >> 2)
#define FADR_SUB(offs) ((0 << 23) | (offs) >> 2)
#define FIMM8(offs) (offs >= 0 ? FADR_ADD(offs) : FADR_SUB(-offs))
#define MAKE_Dd(Dd) (((Dd & 0x10) << 18) | ((Dd & 0x0f) << 12))
#define MAKE_Dm(Dm) (((Dm & 0x10) << 1) | ((Dm & 0x0f) << 0))
#define MAKE_Dn(Dn) (((Dn & 0x10) << 3) | ((Dn & 0x0f) << 16))
#define MAKE_Sd(Sd) (((Sd & 0x01) << 22) | ((Sd & 0x1e) << 11))
#define MAKE_Sm(Sm) (((Sm & 0x01) << 5) | ((Sm & 0x1e) >> 1))
#define MAKE_Sn(Sn) (((Sn & 0x01) << 7) | ((Sn & 0x1e) << 15))
#define CC_VLDR64(cc,Dd,Rn,offs) _W(((cc) << 28) | (0xd << 24) | (0x1 << 20) | (Rn << 16) | (0xb << 8) | FIMM8(offs) | MAKE_Dd(Dd))
#define VLDR64(Dd,Rn,offs) CC_VLDR64(NATIVE_CC_AL,Dd,Rn,offs)
#define CC_VLDR32(cc,Sd,Rn,offs) _W(((cc) << 28) | (0xd << 24) | (0x1 << 20) | (Rn << 16) | (0xa << 8) | FIMM8(offs) | MAKE_Sd(Sd))
#define VLDR32(Sd,Rn,offs) CC_VLDR32(NATIVE_CC_AL,Sd,Rn,offs)
#define CC_VSTR64(cc,Dd,Rn,offs) _W(((cc) << 28) | (0xd << 24) | (0x0 << 20) | (Rn << 16) | (0xb << 8) | FIMM8(offs) | MAKE_Dd(Dd))
#define VSTR64(Dd,Rn,offs) CC_VSTR64(NATIVE_CC_AL,Dd,Rn,offs)
#define CC_VSTR32(cc,Dd,Rn,offs) _W(((cc) << 28) | (0xd << 24) | (0x0 << 20) | (Rn << 16) | (0xa << 8) | FIMM8(offs) | MAKE_Dd(Dd))
#define VSTR32(Dd,Rn,offs) CC_VSTR32(NATIVE_CC_AL,Dd,Rn,offs)
#define CC_VMOV64_rr(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm))
#define VMOV64_rr(Dd,Dm) CC_VMOV64_rr(NATIVE_CC_AL,Dd,Dm)
#define CC_VMOV32_rr(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
#define VMOV32_rr(Sd,Sm) CC_VMOV32_rr(NATIVE_CC_AL,Sd,Sm)
#define CC_VMOV32_to_ARM(cc,Rt,Sn) _W(((cc) << 28) | (0xe << 24) | (0x1 << 20) | (Rt << 12) | (0xa << 8) | (0x1 << 4) | MAKE_Sn(Sn))
#define VMOV32_to_ARM(Rt,Sn) CC_VMOV32_to_ARM(NATIVE_CC_AL,Rt,Sn)
#define CC_VMOV32_from_ARM(cc,Sn,Rt) _W(((cc) << 28) | (0xe << 24) | (0x0 << 20) | (Rt << 12) | (0xa << 8) | (0x1 << 4) | MAKE_Sn(Sn))
#define VMOV32_from_ARM(Sn,Rt) CC_VMOV32_from_ARM(NATIVE_CC_AL,Sn,Rt)
#define CC_VMOVi_from_ARM(cc,Dn,Rt) _W(((cc) << 28) | (0xe << 24) | (0x0 << 20) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dn(Dn))
#define VMOVi_from_ARM(Dn,Rt) CC_VMOVi_from_ARM(NATIVE_CC_AL,Dn,Rt)
#define CC_VMOVi_to_ARM(cc,Rt,Dn) _W(((cc) << 28) | (0xe << 24) | (0x1 << 20) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dn(Dn))
#define VMOVi_to_ARM(Rt,Dn) CC_VMOVi_to_ARM(NATIVE_CC_AL,Rt,Dn)
#define CC_VMOV64_to_ARM(cc,Rt,Rt2,Dm) _W(((cc) << 28) | (0xc << 24) | (0x5 << 20) | (Rt2 << 16) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dm(Dm))
#define VMOV64_to_ARM(Rt,Rt2,Dm) CC_VMOV64_to_ARM(NATIVE_CC_AL,Rt,Rt2,Dm)
#define CC_VMOV64_from_ARM(cc,Dm,Rt,Rt2) _W(((cc) << 28) | (0xc << 24) | (0x4 << 20) | (Rt2 << 16) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dm(Dm))
#define VMOV64_from_ARM(Dm,Rt,Rt2) CC_VMOV64_from_ARM(NATIVE_CC_AL,Dm,Rt,Rt2)
#define CC_VCVT_64_to_32(cc,Sd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x7 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Dm(Dm))
#define VCVT_64_to_32(Sd,Dm) CC_VCVT_64_to_32(NATIVE_CC_AL,Sd,Dm)
#define CC_VCVT_32_to_64(cc,Dd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x7 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Sm(Sm))
#define VCVT_32_to_64(Dd,Sm) CC_VCVT_32_to_64(NATIVE_CC_AL,Dd,Sm)
#define CC_VCVTR_64_to_i(cc,Sd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xb << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Dm(Dm))
#define VCVTR_64_to_i(Sd,Dm) CC_VCVTR_64_to_i(NATIVE_CC_AL,Sd,Dm)
#define CC_VCVTR_32_to_i(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
#define VCVTR_32_to_i(Sd,Sm) CC_VCVTR_32_to_i(NATIVE_CC_AL,Sd,Sm)
#define CC_VCVT_64_to_i(cc,Sd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xb << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Dm(Dm))
#define VCVT_64_to_i(Sd,Dm) CC_VCVT_64_to_i(NATIVE_CC_AL,Sd,Dm)
#define CC_VCVT_32_to_i(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
#define VCVT_32_to_i(Sd,Sm) CC_VCVT_32_to_i(NATIVE_CC_AL,Sd,Sm)
#define CC_VCVT_64_from_i(cc,Dd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x8 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Sm(Sm))
#define VCVT_64_from_i(Dd,Sm) CC_VCVT_64_from_i(NATIVE_CC_AL,Dd,Sm)
#define CC_VCVT_32_from_i(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x8 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
#define VCVT_32_from_i(Sd,Sm) CC_VCVT_32_from_i(NATIVE_CC_AL,Dd,Sm)
#define CC_VMOV_rr64(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm))
#define VMOV_rr64(Dd,Dm) CC_VMOV_rr64(NATIVE_CC_AL,Dd,Dm)
#define CC_VMOV_rr32(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
#define VMOV_rr32(Sd,Sm) CC_VMOV_rr32(NATIVE_CC_AL,Sd,Sm)
#define CC_VADD64(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xb << 8) | (0x0 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm))
#define VADD64(Dd,Dn,Dm) CC_VADD64(NATIVE_CC_AL,Dd,Dn,Dm)
#define CC_VADD32(cc,Sd,Sn,Sm) _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xa << 8) | (0x0 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm))
#define VADD32(Sd,Sn,Sm) CC_VADD32(NATIVE_CC_AL,Sd,Sn,Sm)
#define CC_VSUB64(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm))
#define VSUB64(Dd,Dn,Dm) CC_VSUB64(NATIVE_CC_AL,Dd,Dn,Dm)
#define CC_VSUB32(cc,Sd,Sn,Sm) _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm))
#define VSUB32(Sd,Sn,Sm) CC_VSUB32(NATIVE_CC_AL,Sd,Sn,Sm)
#define CC_VMUL64(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0xe << 24) | (0x2 << 20) | (0xb << 8) | (0x0 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm))
#define VMUL64(Dd,Dn,Dm) CC_VMUL64(NATIVE_CC_AL,Dd,Dn,Dm)
#define CC_VMUL32(cc,Sd,Sn,Sm) _W(((cc) << 28) | (0xe << 24) | (0x2 << 20) | (0xa << 8) | (0x0 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm))
#define VMUL32(Sd,Sn,Sm) CC_VMUL32(NATIVE_CC_AL,Sd,Sn,Sm)
#define CC_VDIV64(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0xe << 24) | (0x8 << 20) | (0xb << 8) | (0x0 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm))
#define VDIV64(Dd,Dn,Dm) CC_VDIV64(NATIVE_CC_AL,Dd,Dn,Dm)
#define CC_VDIV32(cc,Sd,Sn,Sm) _W(((cc) << 28) | (0xe << 24) | (0x8 << 20) | (0xa << 8) | (0x0 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm))
#define VDIV32(Sd,Sn,Sm) CC_VDIV32(NATIVE_CC_AL,Sd,Sn,Sm)
#define CC_VABS64(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm))
#define VABS64(Dd,Dm) CC_VABS64(NATIVE_CC_AL,Dd,Dm)
#define CC_VABS32(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
#define VABS32(Sd,Sm) CC_VABS32(NATIVE_CC_AL,Sd,Sm)
#define CC_VNEG64(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm))
#define VNEG64(Dd,Dm) CC_VNEG64(NATIVE_CC_AL,Dd,Dm)
#define CC_VNEG32(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
#define VNEG32(Sd,Sm) CC_VNEG32(NATIVE_CC_AL,Sd,Sm)
#define CC_VSQRT64(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm))
#define VSQRT64(Dd,Dm) CC_VSQRT64(NATIVE_CC_AL,Dd,Dm)
#define CC_VSQRT32(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
#define VSQRT32(Sd,Sm) CC_VSQRT32(NATIVE_CC_AL,Sd,Sm)
#define CC_VCMP64(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x4 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm))
#define VCMP64(Dd,Dm) CC_VCMP64(NATIVE_CC_AL,Dd,Dm)
#define CC_VCMP32(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x4 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
#define VCMP32(Sd,Sm) CC_VCMP32(NATIVE_CC_AL,Sd,Sm)
#define CC_VCMP64_0(cc,Dd) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x5 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd))
#define VCMP64_0(Dd) CC_VCMP64_0(NATIVE_CC_AL,Dd)
#define CC_VTST64(cc,Dd) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x5 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd))
#define VTST64(Dd) CC_VTST64(NATIVE_CC_AL,Dd)
#define CC_VTST32(cc,Sd) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x5 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd))
#define VTST32(Sd) CC_VTST32(NATIVE_CC_AL,Sd)
#define CC_VMRS(cc,Rt) _W(((cc) << 28) | (0xe << 24) | (0xf << 20) | (0x1 << 16) | (Rt << 12) | (0xa << 8) | (0x1 << 4))
#define VMRS(Rt) CC_VMRS(NATIVE_CC_AL,Rt)
#define CC_VMSR(cc,Rt) _W(((cc) << 28) | (0xe << 24) | (0xe << 20) | (0x1 << 16) | (Rt << 12) | (0xa << 8) | (0x1 << 4))
#define VMSR(Rt) CC_VMSR(NATIVE_CC_AL,Rt)
#define CC_VMOV64_i(cc,Dd,imm4H,imm4L) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (imm4H << 16) | (0xb << 8) | (imm4L) | MAKE_Dd(Dd))
#define VMOV64_i(Dd,imm4H,imm4L) CC_VMOV64_i(NATIVE_CC_AL,Dd,imm4H,imm4L)
// Floatingpoint used by non FPU JIT
#define CC_VMOV_sr(cc,Sd,Rn) _W(((cc) << 28) | (0x70 << 21) | (0 << 20) | (Sd << 16) | (Rn << 12) | (0x0a << 8) | (0x10))
#define VMOV_sr(Sd,Rn) CC_VMOV_sr(NATIVE_CC_AL,Sd,Rn)
@ -1352,4 +1481,5 @@ enum {
#define CC_VDIV_ddd(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0x1d << 23) | (0x0 << 20) | (Dn << 16) | (Dd << 12) | (0xb << 8) | (0x0 << 4) | (Dm))
#define VDIV_ddd(Dd,Dn,Dm) CC_VDIV_ddd(NATIVE_CC_AL,Dd,Dn,Dm)
#endif /* ARM_RTASM_H */

File diff suppressed because it is too large Load diff

View file

@ -90,7 +90,7 @@ typedef union {
#define BYTES_PER_INST 10240 /* paranoid ;-) */
#if defined(CPU_arm)
#define LONGEST_68K_INST 256 /* The number of bytes the longest possible
#define LONGEST_68K_INST 128 /* The number of bytes the longest possible
68k instruction takes */
#else
#define LONGEST_68K_INST 16 /* The number of bytes the longest possible
@ -127,7 +127,8 @@ typedef union {
#else
#define N_REGS 8 /* really only 7, but they are numbered 0,1,2,3,5,6,7 */
#endif
#define N_FREGS 6 /* That leaves us two positions on the stack to play with */
#define N_FREGS 16 // We use 16 regs: 0 - FP_RESULT, 1-3 - SCRATCH, 4-7 - ???, 8-15 - Amiga regs FP0-FP7
/* Functions exposed to newcpu, or to what was moved from newcpu.c to
* compemu_support.c */
@ -151,11 +152,21 @@ extern int check_for_cache_miss(void);
#define scaled_cycles(x) (currprefs.m68k_speed<0?(((x)/SCALE)?(((x)/SCALE<MAXCYCLES?((x)/SCALE):MAXCYCLES)):1):(x))
/* JIT FPU compilation */
extern void comp_fpp_opp (uae_u32 opcode, uae_u16 extra);
extern void comp_fbcc_opp (uae_u32 opcode);
extern void comp_fscc_opp (uae_u32 opcode, uae_u16 extra);
void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra);
void comp_ftrapcc_opp (uae_u32 opcode, uaecptr oldpc);
void comp_fsave_opp (uae_u32 opcode);
void comp_frestore_opp (uae_u32 opcode);
extern uae_u32 needed_flags;
extern uae_u8* comp_pc_p;
extern void* pushall_call_handler;
#define VREGS 32
#define VFREGS 16
#define INMEM 1
#define CLEAN 2
@ -173,6 +184,13 @@ typedef struct {
uae_u8 dirtysize;
} reg_status;
typedef struct {
uae_u32* mem;
uae_u8 status;
uae_s8 realreg; /* gb-- realreg can hold -1 */
uae_u8 needflush;
} freg_status;
typedef struct {
uae_u8 use_flags;
uae_u8 set_flags;
@ -209,6 +227,13 @@ STATIC_INLINE int end_block(uae_u16 opcode)
#define FS2 10
#define FS3 11
#define SCRATCH_F64_1 1
#define SCRATCH_F64_2 2
#define SCRATCH_F64_3 3
#define SCRATCH_F32_1 2
#define SCRATCH_F32_2 4
#define SCRATCH_F32_3 6
typedef struct {
uae_u32 touched;
uae_s8 holds[VREGS];
@ -216,6 +241,11 @@ typedef struct {
uae_u8 locked;
} n_status;
typedef struct {
uae_s8 holds;
uae_u8 nholds;
} fn_status;
/* For flag handling */
#define NADA 1
#define TRASH 2
@ -233,6 +263,9 @@ typedef struct {
uae_u32 flags_on_stack;
uae_u32 flags_in_flags;
uae_u32 flags_are_important;
/* FPU part */
freg_status fate[VFREGS];
fn_status fat[N_FREGS];
} bigstate;
typedef struct {
@ -276,9 +309,9 @@ extern int touchcnt;
#include "compemu_midfunc_arm2.h"
#endif
//#if defined(CPU_i386) || defined(CPU_x86_64)
//#include "compemu_midfunc_x86.h"
//#endif
#if defined(CPU_i386) || defined(CPU_x86_64)
#include "compemu_midfunc_x86.h"
#endif
#undef DECLARE_MIDFUNC
@ -297,7 +330,7 @@ extern void writelong_clobber(int address, int source, int tmp);
extern void get_n_addr(int address, int dest, int tmp);
extern void get_n_addr_jmp(int address, int dest, int tmp);
extern void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp);
#define SYNC_PC_OFFSET 100
#define SYNC_PC_OFFSET 124
extern void sync_m68k_pc(void);
extern uae_u32 get_const(int r);
extern int is_const(int r);
@ -374,13 +407,9 @@ void execute_normal(void);
void exec_nostats(void);
void do_nothing(void);
void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra);
void comp_fscc_opp (uae_u32 opcode, uae_u16 extra);
void comp_ftrapcc_opp (uae_u32 opcode, uaecptr oldpc);
void comp_fbcc_opp (uae_u32 opcode);
void comp_fsave_opp (uae_u32 opcode);
void comp_frestore_opp (uae_u32 opcode);
void comp_fpp_opp (uae_u32 opcode, uae_u16 extra);
/* ARAnyM uses fpu_register name, used in scratch_t */
/* FIXME: check that no ARAnyM code assumes different floating point type */
typedef fptype fpu_register;
void jit_abort(const TCHAR *format,...);

View file

@ -8,7 +8,7 @@
* Modified 2005 Peter Keunecke
*/
#include <math.h>
#include <cmath>
#include "sysconfig.h"
#include "sysdeps.h"
@ -18,41 +18,794 @@
#include "custom.h"
#include "newcpu.h"
#include "compemu.h"
#include "flags_arm.h"
#if defined(JIT)
extern void fpp_to_exten(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3);
static const int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 };
static const int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 };
/* return the required floating point precision or -1 for failure, 0=E, 1=S, 2=D */
STATIC_INLINE int comp_fp_get (uae_u32 opcode, uae_u16 extra, int treg)
{
int reg = opcode & 7;
int mode = (opcode >> 3) & 7;
int size = (extra >> 10) & 7;
if ((size == 2 && (mode != 7 || reg != 4)) || size == 3 || size == 7) /* 2 = long double, 3 = packed decimal, 7 is not defined */
return -1;
switch (mode) {
case 0: /* Dn */
switch (size) {
case 0: /* Long */
fmov_l_rr (treg, reg);
return 2;
case 1: /* Single */
fmov_s_rr (treg, reg);
return 1;
case 4: /* Word */
fmov_w_rr (treg, reg);
return 1;
case 6: /* Byte */
fmov_b_rr (treg, reg);
return 1;
default:
return -1;
}
case 1: /* An, invalid mode */
return -1;
case 2: /* (An) */
mov_l_rr (S1, reg + 8);
break;
case 3: /* (An)+ */
mov_l_rr (S1, reg + 8);
arm_ADD_l_ri8(reg + 8, (reg == 7 ? sz2[size] : sz1[size]));
break;
case 4: /* -(An) */
arm_SUB_l_ri8(reg + 8, (reg == 7 ? sz2[size] : sz1[size]));
mov_l_rr (S1, reg + 8);
break;
case 5: /* (d16,An) */
{
uae_u32 off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
mov_l_rr (S1, reg + 8);
lea_l_brr (S1, S1, off);
break;
}
case 6: /* (d8,An,Xn) or (bd,An,Xn) or ([bd,An,Xn],od) or ([bd,An],Xn,od) */
{
uae_u32 dp = comp_get_iword ((m68k_pc_offset += 2) - 2);
calc_disp_ea_020 (reg + 8, dp, S1, S2);
break;
}
case 7:
switch (reg) {
case 0: /* (xxx).W */
{
uae_u32 off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
mov_l_ri (S1, off);
break;
}
case 1: /* (xxx).L */
{
uae_u32 off = comp_get_ilong ((m68k_pc_offset += 4) - 4);
mov_l_ri (S1, off);
break;
}
case 2: /* (d16,PC) */
{
uae_u32 address = start_pc + ((uae_char*) comp_pc_p - (uae_char*) start_pc_p) +
m68k_pc_offset;
uae_s32 PC16off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
mov_l_ri (S1, address + PC16off);
break;
}
case 3: /* (d8,PC,Xn) or (bd,PC,Xn) or ([bd,PC,Xn],od) or ([bd,PC],Xn,od) */
return -1; /* rarely used, fallback to non-JIT */
case 4: /* # < data >; Constants should be converted just once by the JIT */
m68k_pc_offset += sz2[size];
switch (size) {
case 0:
{
uae_s32 li = comp_get_ilong(m68k_pc_offset - 4);
float si = (float)li;
if (li == (int)si) {
//write_log ("converted immediate LONG constant to SINGLE\n");
fmov_s_ri(treg, *(uae_u32 *)&si);
return 1;
}
//write_log ("immediate LONG constant\n");
fmov_l_ri(treg, *(uae_u32 *)&li);
return 2;
}
case 1:
//write_log (_T("immediate SINGLE constant\n"));
fmov_s_ri(treg, comp_get_ilong(m68k_pc_offset - 4));
return 1;
case 2:
{
//write_log (_T("immediate LONG DOUBLE constant\n"));
uae_u32 wrd1, wrd2, wrd3;
fpdata tmp;
wrd3 = comp_get_ilong(m68k_pc_offset - 4);
wrd2 = comp_get_ilong(m68k_pc_offset - 8);
wrd1 = comp_get_iword(m68k_pc_offset - 12) << 16;
fpp_to_exten(&tmp, wrd1, wrd2, wrd3);
mov_l_ri(S1, ((uae_u32*)&tmp)[0]);
mov_l_ri(S2, ((uae_u32*)&tmp)[1]);
fmov_d_rrr (treg, S1, S2);
return 0;
}
case 4:
{
float si = (float)(uae_s16)comp_get_iword(m68k_pc_offset-2);
//write_log (_T("converted immediate WORD constant %f to SINGLE\n"), si);
fmov_s_ri(treg, *(uae_u32 *)&si);
return 1;
}
case 5:
{
//write_log (_T("immediate DOUBLE constant\n"));
mov_l_ri(S1, comp_get_ilong(m68k_pc_offset - 4));
mov_l_ri(S2, comp_get_ilong(m68k_pc_offset - 8));
fmov_d_rrr (treg, S1, S2);
return 2;
}
case 6:
{
float si = (float)(uae_s8)comp_get_ibyte(m68k_pc_offset - 2);
//write_log (_T("converted immediate BYTE constant to SINGLE\n"));
fmov_s_ri(treg, *(uae_u32 *)&si);
return 1;
}
default: /* never reached */
return -1;
}
default: /* never reached */
return -1;
}
}
switch (size) {
case 0: /* Long */
readlong (S1, S2, S3);
fmov_l_rr (treg, S2);
return 2;
case 1: /* Single */
readlong (S1, S2, S3);
fmov_s_rr (treg, S2);
return 1;
case 4: /* Word */
readword (S1, S2, S3);
fmov_w_rr (treg, S2);
return 1;
case 5: /* Double */
readlong (S1, S2, S3);
add_l_ri (S1, 4);
readlong (S1, S4, S3);
fmov_d_rrr (treg, S4, S2);
return 2;
case 6: /* Byte */
readbyte (S1, S2, S3);
fmov_b_rr (treg, S2);
return 1;
default:
return -1;
}
return -1;
}
/* return of -1 means failure, >=0 means OK */
STATIC_INLINE int comp_fp_put (uae_u32 opcode, uae_u16 extra)
{
int reg = opcode & 7;
int sreg = (extra >> 7) & 7;
int mode = (opcode >> 3) & 7;
int size = (extra >> 10) & 7;
if (size == 2 || size == 3 || size == 7) /* 2 = long double, 3 = packed decimal, 7 is not defined */
return -1;
switch (mode) {
case 0: /* Dn */
switch (size) {
case 0: /* FMOVE.L FPx, Dn */
fmov_to_l_rr(reg, sreg);
return 0;
case 1: /* FMOVE.S FPx, Dn */
fmov_to_s_rr(reg, sreg);
return 0;
case 4: /* FMOVE.W FPx, Dn */
fmov_to_w_rr(reg, sreg);
return 0;
case 6: /* FMOVE.B FPx, Dn */
fmov_to_b_rr(reg, sreg);
return 0;
default:
return -1;
}
case 1: /* An, invalid mode */
return -1;
case 2: /* (An) */
mov_l_rr (S1, reg + 8);
break;
case 3: /* (An)+ */
mov_l_rr (S1, reg + 8);
arm_ADD_l_ri8(reg + 8, (reg == 7 ? sz2[size] : sz1[size]));
break;
case 4: /* -(An) */
arm_SUB_l_ri8(reg + 8, (reg == 7 ? sz2[size] : sz1[size]));
mov_l_rr (S1, reg + 8);
break;
case 5: /* (d16,An) */
{
uae_u32 off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
mov_l_rr (S1, reg + 8);
add_l_ri (S1, off);
break;
}
case 6: /* (d8,An,Xn) or (bd,An,Xn) or ([bd,An,Xn],od) or ([bd,An],Xn,od) */
{
uae_u32 dp = comp_get_iword ((m68k_pc_offset += 2) - 2);
calc_disp_ea_020 (reg + 8, dp, S1, S2);
break;
}
case 7:
switch (reg) {
case 0: /* (xxx).W */
{
uae_u32 off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
mov_l_ri (S1, off);
break;
}
case 1: /* (xxx).L */
{
uae_u32 off = comp_get_ilong ((m68k_pc_offset += 4) - 4);
mov_l_ri (S1, off);
break;
}
default: /* All other modes are not allowed for FPx to <EA> */
write_log (_T ("JIT FMOVE FPx,<EA> Mode is not allowed %04x %04x\n"), opcode, extra);
return -1;
}
}
switch (size) {
case 0: /* Long */
fmov_to_l_rr(S2, sreg);
writelong_clobber (S1, S2, S3);
return 0;
case 1: /* Single */
fmov_to_s_rr(S2, sreg);
writelong_clobber (S1, S2, S3);
return 0;
case 4: /* Word */
fmov_to_w_rr(S2, sreg);
writeword (S1, S2, S3);
return 0;
case 5: /* Double */
fmov_to_d_rrr(S2, S3, sreg);
writelong_clobber (S1, S3, S4);
add_l_ri (S1, 4);
writelong_clobber (S1, S2, S4);
return 0;
case 6: /* Byte */
fmov_to_b_rr(S2, sreg);
writebyte (S1, S2, S3);
return 0;
default:
return -1;
}
return -1;
}
/* return -1 for failure, or register number for success */
STATIC_INLINE int comp_fp_adr (uae_u32 opcode)
{
uae_s32 off;
int mode = (opcode >> 3) & 7;
int reg = opcode & 7;
switch (mode) {
case 2:
case 3:
case 4:
mov_l_rr (S1, 8 + reg);
return S1;
case 5:
off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
mov_l_rr (S1, 8 + reg);
add_l_ri (S1, off);
return S1;
case 7:
switch (reg) {
case 0:
off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
mov_l_ri (S1, off);
return S1;
case 1:
off = comp_get_ilong ((m68k_pc_offset += 4) - 4);
mov_l_ri (S1, off);
return S1;
}
default:
return -1;
}
}
void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra)
{
printf("comp_fdbcc_opp not yet implemented\n");
FAIL (1);
return;
}
void comp_fscc_opp (uae_u32 opcode, uae_u16 extra)
{
printf("comp_fscc_opp not yet implemented\n");
//printf("comp_fscc_opp() called (0x%04x, 0x%04x)\n", opcode, extra);
if (!currprefs.compfpu) {
FAIL (1);
return;
}
FAIL (1);
return;
}
void comp_ftrapcc_opp (uae_u32 opcode, uaecptr oldpc)
{
printf("comp_ftrapcc_opp not yet implemented\n");
FAIL (1);
return;
}
void comp_fbcc_opp (uae_u32 opcode)
{
printf("comp_fbcc_opp not yet implemented\n");
uae_u32 start_68k_offset = m68k_pc_offset;
uae_u32 off, v1, v2;
int cc;
if (!currprefs.compfpu) {
FAIL (1);
return;
}
if (opcode & 0x20) { /* only cc from 00 to 1f are defined */
FAIL (1);
return;
}
if (!(opcode & 0x40)) {
off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
}
else {
off = comp_get_ilong ((m68k_pc_offset += 4) - 4);
}
/* according to fpp.c, the 0x10 bit is ignored
(it handles exception handling, which we don't
do, anyway ;-) */
cc = opcode & 0x0f;
if(cc == 0)
return; /* jump never */
/* Note, "off" will sometimes be (unsigned) "negative", so the following
* uintptr can be > 0xffffffff, but the result will be correct due to
* wraparound when truncated to 32 bit in the call to mov_l_ri. */
mov_l_ri(S1, (uintptr)
(comp_pc_p + off - (m68k_pc_offset - start_68k_offset)));
mov_l_ri(PC_P, (uintptr) comp_pc_p);
/* Now they are both constant. Might as well fold in m68k_pc_offset */
add_l_ri (S1, m68k_pc_offset);
add_l_ri (PC_P, m68k_pc_offset);
m68k_pc_offset = 0;
v1 = get_const (PC_P);
v2 = get_const (S1);
fflags_into_flags ();
switch (cc) {
case 1: register_branch (v1, v2, NATIVE_CC_EQ); break;
case 2: register_branch (v1, v2, NATIVE_CC_F_OGT); break;
case 3: register_branch (v1, v2, NATIVE_CC_F_OGE); break;
case 4: register_branch (v1, v2, NATIVE_CC_F_OLT); break;
case 5: register_branch (v1, v2, NATIVE_CC_F_OLE); break;
case 6: register_branch (v1, v2, NATIVE_CC_F_OGL); break;
case 7: register_branch (v1, v2, NATIVE_CC_F_OR); break;
case 8: register_branch (v1, v2, NATIVE_CC_F_UN); break;
case 9: register_branch (v1, v2, NATIVE_CC_F_UEQ); break;
case 10: register_branch (v1, v2, NATIVE_CC_F_UGT); break;
case 11: register_branch (v1, v2, NATIVE_CC_F_UGE); break;
case 12: register_branch (v1, v2, NATIVE_CC_F_ULT); break;
case 13: register_branch (v1, v2, NATIVE_CC_F_ULE); break;
case 14: register_branch (v1, v2, NATIVE_CC_NE); break;
case 15: register_branch (v2, v2, NATIVE_CC_AL); break;
}
}
void comp_fsave_opp (uae_u32 opcode)
{
printf("comp_fsave_opp not yet implemented\n");
FAIL (1);
return;
}
void comp_frestore_opp (uae_u32 opcode)
{
printf("comp_frestore_opp not yet implemented\n");
FAIL (1);
return;
}
static uae_u32 dhex_pi[] ={0x54442D18, 0x400921FB};
static uae_u32 dhex_exp_1[] ={0x8B145769, 0x4005BF0A};
static uae_u32 dhex_l2_e[] ={0x652B82FE, 0x3FF71547};
static uae_u32 dhex_ln_2[] ={0xFEFA39EF, 0x3FE62E42};
static uae_u32 dhex_ln_10[] ={0xBBB55516, 0x40026BB1};
static uae_u32 dhex_l10_2[] ={0x509F79FF, 0x3FD34413};
static uae_u32 dhex_l10_e[] ={0x1526E50E, 0x3FDBCB7B};
static uae_u32 dhex_1e16[] ={0x37E08000, 0x4341C379};
static uae_u32 dhex_1e32[] ={0xB5056E17, 0x4693B8B5};
static uae_u32 dhex_1e64[] ={0xE93FF9F5, 0x4D384F03};
static uae_u32 dhex_1e128[] ={0xF9301D32, 0x5A827748};
static uae_u32 dhex_1e256[] ={0x7F73BF3C, 0x75154FDD};
static uae_u32 dhex_inf[] ={0x00000000, 0x7ff00000};
static uae_u32 dhex_nan[] ={0xffffffff, 0x7fffffff};
extern double fp_1e8;
void comp_fpp_opp (uae_u32 opcode, uae_u16 extra)
{
printf("comp_fpp_opp not yet implemented\n");
int reg;
int sreg, prec = 0;
int dreg = (extra >> 7) & 7;
int source = (extra >> 13) & 7;
int opmode = extra & 0x7f;
if (!currprefs.compfpu) {
FAIL (1);
return;
}
switch (source) {
case 3: /* FMOVE FPx, <EA> */
if (comp_fp_put (opcode, extra) < 0)
FAIL (1);
return;
case 4: /* FMOVE.L <EA>, ControlReg */
if (!(opcode & 0x30)) { /* Dn or An */
if (extra & 0x1000) { /* FPCR */
mov_l_mr (uae_p32(&regs.fpcr), opcode & 15);
return;
}
if (extra & 0x0800) { /* FPSR */
FAIL (1);
return;
// set_fpsr(m68k_dreg (regs, opcode & 15));
}
if (extra & 0x0400) { /* FPIAR */
mov_l_mr (uae_p32(&regs.fpiar), opcode & 15); return;
}
}
else if ((opcode & 0x3f) == 0x3c) {
if (extra & 0x1000) { /* FPCR */
uae_u32 val = comp_get_ilong ((m68k_pc_offset += 4) - 4);
mov_l_mi (uae_p32(&regs.fpcr), val);
return;
}
if (extra & 0x0800) { /* FPSR */
FAIL (1);
return;
}
if (extra & 0x0400) { /* FPIAR */
uae_u32 val = comp_get_ilong ((m68k_pc_offset += 4) - 4);
mov_l_mi (uae_p32(&regs.fpiar), val);
return;
}
}
FAIL (1);
return;
case 5: /* FMOVE.L ControlReg, <EA> */
if (!(opcode & 0x30)) { /* Dn or An */
if (extra & 0x1000) { /* FPCR */
mov_l_rm (opcode & 15, uae_p32(&regs.fpcr)); return;
}
if (extra & 0x0800) { /* FPSR */
FAIL (1);
return;
}
if (extra & 0x0400) { /* FPIAR */
mov_l_rm (opcode & 15, uae_p32(&regs.fpiar)); return;
}
}
FAIL (1);
return;
case 6:
case 7:
FAIL (1);
return;
case 2: /* from <EA> to FPx */
dont_care_fflags ();
if ((extra & 0xfc00) == 0x5c00) { /* FMOVECR */
//write_log (_T("JIT FMOVECR %x\n"), opmode);
switch (opmode) {
case 0x00:
fmov_d_rm (dreg, uae_p32(&dhex_pi));
break;
case 0x0b:
fmov_d_rm (dreg, uae_p32(&dhex_l10_2));
break;
case 0x0c:
fmov_d_rm (dreg, uae_p32(&dhex_exp_1));
break;
case 0x0d:
fmov_d_rm (dreg, uae_p32(&dhex_l2_e));
break;
case 0x0e:
fmov_d_rm (dreg, uae_p32(&dhex_l10_e));
break;
case 0x0f:
fmov_d_ri_0 (dreg);
break;
case 0x30:
fmov_d_rm (dreg, uae_p32(&dhex_ln_2));
break;
case 0x31:
fmov_d_rm (dreg, uae_p32(&dhex_ln_10));
break;
case 0x32:
fmov_d_ri_1 (dreg);
break;
case 0x33:
fmov_d_ri_10 (dreg);
break;
case 0x34:
fmov_d_ri_100 (dreg);
break;
case 0x35:
fmov_l_ri (dreg, 10000);
break;
case 0x36:
fmov_rm (dreg, uae_p32(&fp_1e8));
break;
case 0x37:
fmov_d_rm (dreg, uae_p32(&dhex_1e16));
break;
case 0x38:
fmov_d_rm (dreg, uae_p32(&dhex_1e32));
break;
case 0x39:
fmov_d_rm (dreg, uae_p32(&dhex_1e64));
break;
case 0x3a:
fmov_d_rm (dreg, uae_p32(&dhex_1e128));
break;
case 0x3b:
fmov_d_rm (dreg, uae_p32(&dhex_1e256));
break;
default:
FAIL (1);
return;
}
fmov_rr (FP_RESULT, dreg);
return;
}
if (opmode & 0x20) /* two operands, so we need a scratch reg */
sreg = FS1;
else /* one operand only, thus we can load the argument into dreg */
sreg = dreg;
if ((prec = comp_fp_get (opcode, extra, sreg)) < 0) {
FAIL (1);
return;
}
if (!opmode) { /* FMOVE <EA>,FPx */
fmov_rr (FP_RESULT, dreg);
return;
}
/* no break here for <EA> to dreg */
case 0: /* directly from sreg to dreg */
if (!source) { /* no <EA> */
dont_care_fflags ();
sreg = (extra >> 10) & 7;
}
switch (opmode) {
case 0x00: /* FMOVE */
fmov_rr (dreg, sreg);
break;
case 0x01: /* FINT */
frndint_rr (dreg, sreg);
break;
case 0x02: /* FSINH */
ffunc_rr (sinh, dreg, sreg);
break;
case 0x03: /* FINTRZ */
frndintz_rr (dreg, sreg);
break;
case 0x04: /* FSQRT */
fsqrt_rr (dreg, sreg);
break;
case 0x06: /* FLOGNP1 */
ffunc_rr (log1p, dreg, sreg);
break;
case 0x08: /* FETOXM1 */
ffunc_rr (expm1, dreg, sreg);
break;
case 0x09: /* FTANH */
ffunc_rr (tanh, dreg, sreg);
break;
case 0x0a: /* FATAN */
ffunc_rr (atan, dreg, sreg);
break;
case 0x0c: /* FASIN */
ffunc_rr (asin, dreg, sreg);
break;
case 0x0d: /* FATANH */
ffunc_rr (atanh, dreg, sreg);
break;
case 0x0e: /* FSIN */
ffunc_rr (sin, dreg, sreg);
break;
case 0x0f: /* FTAN */
ffunc_rr (tan, dreg, sreg);
break;
case 0x10: /* FETOX */
ffunc_rr (exp, dreg, sreg);
break;
case 0x11: /* FTWOTOX */
fpowx_rr (2, dreg, sreg);
break;
case 0x12: /* FTENTOX */
fpowx_rr (10, dreg, sreg);
break;
case 0x14: /* FLOGN */
ffunc_rr (log, dreg, sreg);
break;
case 0x15: /* FLOG10 */
ffunc_rr (log10, dreg, sreg);
break;
case 0x16: /* FLOG2 */
ffunc_rr (log2, dreg, sreg);
break;
case 0x18: /* FABS */
fabs_rr (dreg, sreg);
break;
case 0x19: /* FCOSH */
ffunc_rr (cosh, dreg, sreg);
break;
case 0x1a: /* FNEG */
fneg_rr (dreg, sreg);
break;
case 0x1c: /* FACOS */
ffunc_rr (acos, dreg, sreg);
break;
case 0x1d: /* FCOS */
ffunc_rr (cos, dreg, sreg);
break;
case 0x20: /* FDIV */
fdiv_rr (dreg, sreg);
break;
case 0x21: /* FMOD */
fmod_rr (dreg, sreg);
break;
case 0x22: /* FADD */
fadd_rr (dreg, sreg);
break;
case 0x23: /* FMUL */
fmul_rr (dreg, sreg);
break;
case 0x24: /* FSGLDIV */
fsgldiv_rr (dreg, sreg);
break;
case 0x60: /* FSDIV */
fdiv_rr (dreg, sreg);
if (!currprefs.fpu_strict) /* faster, but less strict rounding */
break;
fcuts_r (dreg);
break;
case 0x25: /* FREM */
frem1_rr (dreg, sreg);
break;
case 0x27: /* FSGLMUL */
fsglmul_rr (dreg, sreg);
break;
case 0x63: /* FSMUL */
fmul_rr (dreg, sreg);
if (!currprefs.fpu_strict) /* faster, but less strict rounding */
break;
fcuts_r (dreg);
break;
case 0x28: /* FSUB */
fsub_rr (dreg, sreg);
break;
case 0x30: /* FSINCOS */
case 0x31:
case 0x32:
case 0x33:
case 0x34:
case 0x35:
case 0x36:
case 0x37:
if (dreg == (extra & 7))
ffunc_rr (sin, dreg, sreg);
else
fsincos_rr (dreg, extra & 7, sreg);
break;
case 0x38: /* FCMP */
fmov_rr (FP_RESULT, dreg);
fsub_rr (FP_RESULT, sreg);
return;
case 0x3a: /* FTST */
fmov_rr (FP_RESULT, sreg);
return;
case 0x40: /* FSMOVE */
if (prec == 1 || !currprefs.fpu_strict) {
if (sreg != dreg) /* no <EA> */
fmov_rr (dreg, sreg);
}
else {
fmovs_rr (dreg, sreg);
}
break;
case 0x44: /* FDMOVE */
if (sreg != dreg) /* no <EA> */
fmov_rr (dreg, sreg);
break;
case 0x41: /* FSSQRT */
fsqrt_rr (dreg, sreg);
if (!currprefs.fpu_strict) /* faster, but less strict rounding */
break;
fcuts_r (dreg);
break;
case 0x45: /* FDSQRT */
fsqrt_rr (dreg, sreg);
break;
case 0x58: /* FSABS */
fabs_rr (dreg, sreg);
if (prec != 1 && currprefs.fpu_strict)
fcuts_r (dreg);
break;
case 0x5a: /* FSNEG */
fneg_rr (dreg, sreg);
if (prec != 1 && currprefs.fpu_strict)
fcuts_r (dreg);
break;
case 0x5c: /* FDABS */
fabs_rr (dreg, sreg);
break;
case 0x5e: /* FDNEG */
fneg_rr (dreg, sreg);
break;
case 0x62: /* FSADD */
fadd_rr (dreg, sreg);
if (!currprefs.fpu_strict) /* faster, but less strict rounding */
break;
fcuts_r (dreg);
break;
case 0x64: /* FDDIV */
fdiv_rr (dreg, sreg);
break;
case 0x66: /* FDADD */
fadd_rr (dreg, sreg);
break;
case 0x67: /* FDMUL */
fmul_rr (dreg, sreg);
break;
case 0x68: /* FSSUB */
fsub_rr (dreg, sreg);
if (!currprefs.fpu_strict) /* faster, but less strict rounding */
break;
fcuts_r (dreg);
break;
case 0x6c: /* FDSUB */
fsub_rr (dreg, sreg);
break;
default:
FAIL (1);
return;
}
fmov_rr (FP_RESULT, dreg);
return;
default:
write_log (_T ("Unsupported JIT-FPU instruction: 0x%04x %04x\n"), opcode, extra);
FAIL (1);
return;
}
}
#endif

View file

@ -224,9 +224,6 @@ MIDFUNC(2,mov_l_rr,(W4 d, RR4 s))
live.nat[s].holds[live.nat[s].nholds] = d;
live.nat[s].nholds++;
#if defined(DEBUG) && DEBUG > 1
jit_log("Added %d to nreg %d(%d), now holds %d regs", d, s, live.state[d].realind, live.nat[s].nholds);
#endif
unlock2(s);
}
MENDFUNC(2,mov_l_rr,(W4 d, RR4 s))
@ -244,6 +241,14 @@ MIDFUNC(2,mov_l_mr,(IMM d, RR4 s))
}
MENDFUNC(2,mov_l_mr,(IMM d, RR4 s))
MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
{
d = writereg(d, 4);
raw_mov_l_rm(d, s);
unlock2(d);
}
MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
{
set_const(d, s);
@ -480,3 +485,435 @@ STATIC_INLINE void write_jmp_target(uae_u32* jmpaddr, cpuop_func* a) {
STATIC_INLINE void emit_jmp_target(uae_u32 a) {
emit_long((uae_u32)a);
}
/*************************************************************************
* FPU stuff *
*************************************************************************/
MIDFUNC(1,f_forget_about,(FW r))
{
if (f_isinreg(r))
f_disassociate(r);
live.fate[r].status=UNDEF;
}
MENDFUNC(1,f_forget_about,(FW r))
MIDFUNC(0,dont_care_fflags,(void))
{
f_disassociate(FP_RESULT);
}
MENDFUNC(0,dont_care_fflags,(void))
MIDFUNC(2,fmov_rr,(FW d, FR s))
{
if (d == s) { /* How pointless! */
return;
}
s = f_readreg(s);
d = f_writereg(d);
raw_fmov_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fmov_rr,(FW d, FR s))
MIDFUNC(2,fmov_l_rr,(FW d, RR4 s))
{
s = readreg(s, 4);
d = f_writereg(d);
raw_fmov_l_rr(d, s);
f_unlock(d);
unlock2(s);
}
MENDFUNC(2,fmov_l_rr,(FW d, RR4 s))
MIDFUNC(2,fmov_s_rr,(FW d, RR4 s))
{
s = readreg(s, 4);
d = f_writereg(d);
raw_fmov_s_rr(d, s);
f_unlock(d);
unlock2(s);
}
MENDFUNC(2,fmov_s_rr,(FW d, RR4 s))
MIDFUNC(2,fmov_w_rr,(FW d, RR2 s))
{
s = readreg(s, 2);
d = f_writereg(d);
raw_fmov_w_rr(d, s);
f_unlock(d);
unlock2(s);
}
MENDFUNC(2,fmov_w_rr,(FW d, RR2 s))
MIDFUNC(2,fmov_b_rr,(FW d, RR1 s))
{
s = readreg(s, 1);
d = f_writereg(d);
raw_fmov_b_rr(d, s);
f_unlock(d);
unlock2(s);
}
MENDFUNC(2,fmov_b_rr,(FW d, RR1 s))
MIDFUNC(3,fmov_d_rrr,(FW d, RR4 s1, RR4 s2))
{
s1 = readreg(s1, 4);
s2 = readreg(s2, 4);
d = f_writereg(d);
raw_fmov_d_rrr(d, s1, s2);
f_unlock(d);
unlock2(s2);
unlock2(s1);
}
MENDFUNC(3,fmov_d_rrr,(FW d, RR4 s1, RR4 s2))
MIDFUNC(2,fmov_l_ri,(FW d, IMM i))
{
switch(i) {
case 0:
fmov_d_ri_0(d);
break;
case 1:
fmov_d_ri_1(d);
break;
case 10:
fmov_d_ri_10(d);
break;
case 100:
fmov_d_ri_100(d);
break;
default:
d = f_writereg(d);
compemu_raw_mov_l_ri(REG_WORK1, i);
raw_fmov_l_rr(d, REG_WORK1);
f_unlock(d);
}
}
MENDFUNC(2,fmov_l_ri,(FW d, IMM i))
MIDFUNC(2,fmov_s_ri,(FW d, IMM i))
{
d = f_writereg(d);
compemu_raw_mov_l_ri(REG_WORK1, i);
raw_fmov_s_rr(d, REG_WORK1);
f_unlock(d);
}
MENDFUNC(2,fmov_s_ri,(FW d, IMM i))
MIDFUNC(2,fmov_to_l_rr,(W4 d, FR s))
{
s = f_readreg(s);
d = writereg(d, 4);
raw_fmov_to_l_rr(d, s);
unlock2(d);
f_unlock(s);
}
MENDFUNC(2,fmov_to_l_rr,(W4 d, FR s))
MIDFUNC(2,fmov_to_s_rr,(W4 d, FR s))
{
s = f_readreg(s);
d = writereg(d, 4);
raw_fmov_to_s_rr(d, s);
unlock2(d);
f_unlock(s);
}
MENDFUNC(2,fmov_to_s_rr,(W4 d, FR s))
MIDFUNC(2,fmov_to_w_rr,(W4 d, FR s))
{
s = f_readreg(s);
d = rmw(d, 2, 4);
raw_fmov_to_w_rr(d, s);
unlock2(d);
f_unlock(s);
}
MENDFUNC(2,fmov_to_w_rr,(W4 d, FR s))
MIDFUNC(2,fmov_to_b_rr,(W4 d, FR s))
{
s = f_readreg(s);
d = rmw(d, 1, 4);
raw_fmov_to_b_rr(d, s);
unlock2(d);
f_unlock(s);
}
MENDFUNC(2,fmov_to_b_rr,(W4 d, FR s))
MIDFUNC(1,fmov_d_ri_0,(FW r))
{
r = f_writereg(r);
raw_fmov_d_ri_0(r);
f_unlock(r);
}
MENDFUNC(1,fmov_d_ri_0,(FW r))
MIDFUNC(1,fmov_d_ri_1,(FW r))
{
r = f_writereg(r);
raw_fmov_d_ri_1(r);
f_unlock(r);
}
MENDFUNC(1,fmov_d_ri_1,(FW r))
MIDFUNC(1,fmov_d_ri_10,(FW r))
{
r = f_writereg(r);
raw_fmov_d_ri_10(r);
f_unlock(r);
}
MENDFUNC(1,fmov_d_ri_10,(FW r))
MIDFUNC(1,fmov_d_ri_100,(FW r))
{
r = f_writereg(r);
raw_fmov_d_ri_100(r);
f_unlock(r);
}
MENDFUNC(1,fmov_d_ri_100,(FW r))
MIDFUNC(2,fmov_d_rm,(FW r, MEMR m))
{
r = f_writereg(r);
raw_fmov_d_rm(r, m);
f_unlock(r);
}
MENDFUNC(2,fmov_d_rm,(FW r, MEMR m))
MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
{
r = f_writereg(r);
raw_fmovs_rm(r, m);
f_unlock(r);
}
MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
MIDFUNC(2,fmov_rm,(FW r, MEMR m))
{
r = f_writereg(r);
raw_fmov_d_rm(r, m);
f_unlock(r);
}
MENDFUNC(2,fmov_rm,(FW r, MEMR m))
MIDFUNC(3,fmov_to_d_rrr,(W4 d1, W4 d2, FR s))
{
s = f_readreg(s);
d1 = writereg(d1, 4);
d2 = writereg(d2, 4);
raw_fmov_to_d_rrr(d1, d2, s);
unlock2(d2);
unlock2(d1);
f_unlock(s);
}
MENDFUNC(3,fmov_to_d_rrr,(W4 d1, W4 d2, FR s))
MIDFUNC(2,fsqrt_rr,(FW d, FR s))
{
s = f_readreg(s);
d = f_writereg(d);
raw_fsqrt_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fsqrt_rr,(FW d, FR s))
MIDFUNC(2,fabs_rr,(FW d, FR s))
{
s = f_readreg(s);
d = f_writereg(d);
raw_fabs_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fabs_rr,(FW d, FR s))
MIDFUNC(2,fneg_rr,(FW d, FR s))
{
s = f_readreg(s);
d = f_writereg(d);
raw_fneg_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fneg_rr,(FW d, FR s))
MIDFUNC(2,fdiv_rr,(FRW d, FR s))
{
s = f_readreg(s);
d = f_rmw(d);
raw_fdiv_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fdiv_rr,(FRW d, FR s))
MIDFUNC(2,fadd_rr,(FRW d, FR s))
{
s = f_readreg(s);
d = f_rmw(d);
raw_fadd_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fadd_rr,(FRW d, FR s))
MIDFUNC(2,fmul_rr,(FRW d, FR s))
{
s = f_readreg(s);
d = f_rmw(d);
raw_fmul_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fmul_rr,(FRW d, FR s))
MIDFUNC(2,fsub_rr,(FRW d, FR s))
{
s = f_readreg(s);
d = f_rmw(d);
raw_fsub_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fsub_rr,(FRW d, FR s))
MIDFUNC(2,frndint_rr,(FW d, FR s))
{
s = f_readreg(s);
d = f_writereg(d);
raw_frndint_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,frndint_rr,(FW d, FR s))
MIDFUNC(2,frndintz_rr,(FW d, FR s))
{
s = f_readreg(s);
d = f_writereg(d);
raw_frndintz_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,frndintz_rr,(FW d, FR s))
MIDFUNC(2,fmod_rr,(FRW d, FR s))
{
s = f_readreg(s);
d = f_rmw(d);
raw_fmod_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fmod_rr,(FRW d, FR s))
MIDFUNC(2,fsgldiv_rr,(FRW d, FR s))
{
s = f_readreg(s);
d = f_rmw(d);
raw_fsgldiv_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fsgldiv_rr,(FRW d, FR s))
MIDFUNC(1,fcuts_r,(FRW r))
{
r = f_rmw(r);
raw_fcuts_r(r);
f_unlock(r);
}
MENDFUNC(1,fcuts_r,(FRW r))
MIDFUNC(2,frem1_rr,(FRW d, FR s))
{
s = f_readreg(s);
d = f_rmw(d);
raw_frem1_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,frem1_rr,(FRW d, FR s))
MIDFUNC(2,fsglmul_rr,(FRW d, FR s))
{
s = f_readreg(s);
d = f_rmw(d);
raw_fsglmul_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fsglmul_rr,(FRW d, FR s))
MIDFUNC(2,fmovs_rr,(FW d, FR s))
{
s = f_readreg(s);
d = f_writereg(d);
raw_fmovs_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fmovs_rr,(FW d, FR s))
MIDFUNC(3,ffunc_rr,(double (*func)(double), FW d, FR s))
{
clobber_flags();
prepare_for_call_1();
prepare_for_call_2();
s = f_readreg(s);
d = f_writereg(d);
raw_ffunc_rr(func, d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(3,ffunc_rr,(double (*func)(double), FW d, FR s))
MIDFUNC(3,fsincos_rr,(FW d, FW c, FR s))
{
clobber_flags();
prepare_for_call_1();
prepare_for_call_2();
s = f_readreg(s); /* s for source */
d = f_writereg(d); /* d for sine */
c = f_writereg(c); /* c for cosine */
raw_ffunc_rr(cos, c, s);
raw_ffunc_rr(sin, d, s);
f_unlock(s);
f_unlock(d);
f_unlock(c);
}
MENDFUNC(3,fsincos_rr,(FW d, FW c, FR s))
MIDFUNC(3,fpowx_rr,(uae_u32 x, FW d, FR s))
{
clobber_flags();
prepare_for_call_1();
prepare_for_call_2();
s = f_readreg(s);
d = f_writereg(d);
raw_fpowx_rr(x, d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(3,fpowx_rr,(uae_u32 x, FW d, FR s))
MIDFUNC(1,fflags_into_flags,())
{
clobber_flags();
fflags_into_flags_internal();
}
MENDFUNC(1,fflags_into_flags,())

View file

@ -50,6 +50,7 @@ DECLARE_MIDFUNC(lea_l_brr_indexed(W4 d, RR4 s, RR4 index, IMM factor, IMM offset
DECLARE_MIDFUNC(lea_l_rr_indexed(W4 d, RR4 s, RR4 index, IMM factor));
DECLARE_MIDFUNC(mov_l_rr(W4 d, RR4 s));
DECLARE_MIDFUNC(mov_l_mr(IMM d, RR4 s));
DECLARE_MIDFUNC(mov_l_rm(W4 d, IMM s));
DECLARE_MIDFUNC(mov_b_rm(W1 d, IMM s));
DECLARE_MIDFUNC(mov_l_ri(W4 d, IMM s));
DECLARE_MIDFUNC(mov_w_ri(W2 d, IMM s));
@ -66,3 +67,44 @@ DECLARE_MIDFUNC(make_flags_live(void));
DECLARE_MIDFUNC(forget_about(W4 r));
DECLARE_MIDFUNC(f_forget_about(FW r));
DECLARE_MIDFUNC(dont_care_fflags(void));
DECLARE_MIDFUNC(fmov_rr(FW d, FR s));
DECLARE_MIDFUNC(fmov_l_rr(FW d, RR4 s));
DECLARE_MIDFUNC(fmov_s_rr(FW d, RR4 s));
DECLARE_MIDFUNC(fmov_w_rr(FW d, RR2 s));
DECLARE_MIDFUNC(fmov_b_rr(FW d, RR1 s));
DECLARE_MIDFUNC(fmov_d_rrr(FW d, RR4 s1, RR4 s2));
DECLARE_MIDFUNC(fmov_l_ri(FW d, IMM i));
DECLARE_MIDFUNC(fmov_s_ri(FW d, IMM i));
DECLARE_MIDFUNC(fmov_to_l_rr(W4 d, FR s));
DECLARE_MIDFUNC(fmov_to_s_rr(W4 d, FR s));
DECLARE_MIDFUNC(fmov_to_w_rr(W4 d, FR s));
DECLARE_MIDFUNC(fmov_to_b_rr(W4 d, FR s));
DECLARE_MIDFUNC(fmov_d_ri_0(FW d));
DECLARE_MIDFUNC(fmov_d_ri_1(FW d));
DECLARE_MIDFUNC(fmov_d_ri_10(FW d));
DECLARE_MIDFUNC(fmov_d_ri_100(FW d));
DECLARE_MIDFUNC(fmov_d_rm(FW r, MEMR m));
DECLARE_MIDFUNC(fmovs_rm(FW r, MEMR m));
DECLARE_MIDFUNC(fmov_rm(FW r, MEMR m));
DECLARE_MIDFUNC(fmov_to_d_rrr(W4 d1, W4 d2, FR s));
DECLARE_MIDFUNC(fsqrt_rr(FW d, FR s));
DECLARE_MIDFUNC(fabs_rr(FW d, FR s));
DECLARE_MIDFUNC(fneg_rr(FW d, FR s));
DECLARE_MIDFUNC(fdiv_rr(FRW d, FR s));
DECLARE_MIDFUNC(fadd_rr(FRW d, FR s));
DECLARE_MIDFUNC(fmul_rr(FRW d, FR s));
DECLARE_MIDFUNC(fsub_rr(FRW d, FR s));
DECLARE_MIDFUNC(frndint_rr(FW d, FR s));
DECLARE_MIDFUNC(frndintz_rr(FW d, FR s));
DECLARE_MIDFUNC(fmod_rr(FRW d, FR s));
DECLARE_MIDFUNC(fsgldiv_rr(FRW d, FR s));
DECLARE_MIDFUNC(fcuts_r(FRW r));
DECLARE_MIDFUNC(frem1_rr(FRW d, FR s));
DECLARE_MIDFUNC(fsglmul_rr(FRW d, FR s));
DECLARE_MIDFUNC(fmovs_rr(FW d, FR s));
DECLARE_MIDFUNC(ffunc_rr(double (*func)(double), FW d, FR s));
DECLARE_MIDFUNC(fsincos_rr(FW d, FW c, FR s));
DECLARE_MIDFUNC(fpowx_rr(uae_u32 x, FW d, FR s));
DECLARE_MIDFUNC(fflags_into_flags());

View file

@ -32,6 +32,8 @@
#define writemem_special writemem
#define readmem_special readmem
#include <math.h>
#include "sysconfig.h"
#include "sysdeps.h"
@ -108,7 +110,11 @@ const int follow_const_jumps = 0;
static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
#ifdef USE_JIT_FPU
#define avoid_fpu (!currprefs.compfpu)
#else
#define avoid_fpu (true)
#endif
static const int align_loops = 0; // Align the start of loops
static const int align_jumps = 0; // Align the start of jumps
static int optcount[10] = {
@ -646,13 +652,15 @@ bool check_prefs_changed_comp(bool checkonly)
{
bool changed = 0;
if (currprefs.fpu_strict != changed_prefs.fpu_strict ||
if (currprefs.compfpu != changed_prefs.compfpu ||
currprefs.fpu_strict != changed_prefs.fpu_strict ||
currprefs.cachesize != changed_prefs.cachesize)
changed = 1;
if (checkonly)
return changed;
currprefs.compfpu = changed_prefs.compfpu;
currprefs.fpu_strict = changed_prefs.fpu_strict;
if (currprefs.cachesize != changed_prefs.cachesize) {
@ -955,6 +963,7 @@ static void evict(int r)
if (live.nat[rr].nholds != live.state[r].realind) { /* Was not last */
int topreg = live.nat[rr].holds[live.nat[rr].nholds];
int thisind = live.state[r].realind;
live.nat[rr].holds[thisind] = topreg;
live.state[topreg].realind = thisind;
}
@ -1343,6 +1352,142 @@ static int rmw(int r, int wsize, int rsize)
return rmw_general(r, wsize, rsize);
}
/********************************************************************
* FPU register status handling. EMIT TIME! *
********************************************************************/
STATIC_INLINE void f_tomem_drop(int r)
{
if (live.fate[r].status == DIRTY) {
compemu_raw_fmov_mr_drop((uintptr)live.fate[r].mem, live.fate[r].realreg);
live.fate[r].status = INMEM;
}
}
STATIC_INLINE int f_isinreg(int r)
{
return live.fate[r].status == CLEAN || live.fate[r].status == DIRTY;
}
STATIC_INLINE void f_evict(int r)
{
int rr;
if (!f_isinreg(r))
return;
rr = live.fate[r].realreg;
f_tomem_drop(r);
live.fat[rr].nholds = 0;
live.fate[r].status = INMEM;
live.fate[r].realreg = -1;
}
STATIC_INLINE void f_free_nreg(int r)
{
int vr;
vr = live.fat[r].holds;
f_evict(vr);
}
/* Use with care! */
STATIC_INLINE void f_isclean(int r)
{
if (!f_isinreg(r))
return;
live.fate[r].status = CLEAN;
}
STATIC_INLINE void f_disassociate(int r)
{
f_isclean(r);
f_evict(r);
}
static int f_alloc_reg(int r, int willclobber)
{
int bestreg;
if(r < 8)
bestreg = r + 8; // map real Amiga reg to ARM VFP reg 8-15
else
bestreg = r - 8; // map FP_RESULT, FS1, FS2 or FS3 to ARM VFP reg 0-3
if (!willclobber) {
if (live.fate[r].status == INMEM) {
compemu_raw_fmov_rm(bestreg, (uintptr)live.fate[r].mem);
live.fate[r].status=CLEAN;
}
}
else {
live.fate[r].status = DIRTY;
}
live.fate[r].realreg=bestreg;
live.fat[bestreg].holds = r;
live.fat[bestreg].nholds = 1;
return bestreg;
}
STATIC_INLINE void f_unlock(int r)
{
}
STATIC_INLINE int f_readreg(int r)
{
int answer=-1;
if (f_isinreg(r)) {
answer = live.fate[r].realreg;
}
/* either the value was in memory to start with, or it was evicted and
is in memory now */
if (answer < 0)
answer = f_alloc_reg(r,0);
return answer;
}
STATIC_INLINE int f_writereg(int r)
{
int answer = -1;
if (f_isinreg(r)) {
answer = live.fate[r].realreg;
}
if (answer < 0) {
answer = f_alloc_reg(r,1);
}
live.fate[r].status = DIRTY;
return answer;
}
STATIC_INLINE int f_rmw(int r)
{
int n;
if (f_isinreg(r)) {
n = live.fate[r].realreg;
}
else
n = f_alloc_reg(r,0);
live.fate[r].status = DIRTY;
return n;
}
static void fflags_into_flags_internal(void)
{
int r;
r = f_readreg(FP_RESULT);
raw_fflags_into_flags(r);
f_unlock(r);
live_flags();
}
#if defined(CPU_arm)
@ -1379,6 +1524,7 @@ void sync_m68k_pc(void)
struct scratch_t {
uae_u32 regs[VREGS];
fpu_register fregs[VFREGS];
};
static scratch_t scratch;
@ -1479,6 +1625,12 @@ void init_comp(void)
set_status(i, UNDEF);
}
for (i=0;i<VFREGS;i++) {
live.fate[i].status = UNDEF;
live.fate[i].realreg = -1;
live.fate[i].needflush = NF_SCRATCH;
}
for (i=0; i<VREGS; i++) {
if (i < 16) { /* First 16 registers map to 68k registers */
live.state[i].mem = &regs.regs[i];
@ -1502,6 +1654,22 @@ void init_comp(void)
set_status(NEXT_HANDLER, UNDEF);
for (i = 0; i < VFREGS; i++) {
if (i < 8) { /* First 8 registers map to 68k FPU registers */
live.fate[i].mem = (uae_u32*)(&regs.fp[i].fp);
live.fate[i].needflush = NF_TOMEM;
live.fate[i].status = INMEM;
}
else if (i == FP_RESULT) {
live.fate[i].mem = (uae_u32*)(&regs.fp_result.fp);
live.fate[i].needflush = NF_TOMEM;
live.fate[i].status = INMEM;
}
else
live.fate[i].mem = (uae_u32*)(&scratch.fregs[i]);
}
for (i=0; i<N_REGS; i++) {
live.nat[i].touched = 0;
live.nat[i].nholds = 0;
@ -1512,6 +1680,10 @@ void init_comp(void)
}
}
for (i=0;i<N_FREGS;i++) {
live.fat[i].nholds = 0;
}
touchcnt = 1;
m68k_pc_offset = 0;
live.flags_in_flags = TRASH;
@ -1528,6 +1700,12 @@ void flush(int save_regs)
sync_m68k_pc(); /* mid level */
if (save_regs) {
for (i = 0; i < VFREGS; i++) {
if (live.fate[i].needflush == NF_SCRATCH ||
live.fate[i].status == CLEAN) {
f_disassociate(i);
}
}
for (i=0; i<=FLAGTMP; i++) {
switch(live.state[i].status) {
case INMEM:
@ -1548,6 +1726,11 @@ void flush(int save_regs)
break;
}
}
for (i = 0; i <= FP_RESULT; i++) {
if (live.fate[i].status == DIRTY) {
f_evict(i);
}
}
}
}
@ -1565,6 +1748,9 @@ void freescratch(void)
for (i = S1; i < VREGS; i++)
forget_about(i);
for (i = FS1; i <= FS3; i++) // only FS1-FS3
f_forget_about(i);
}
/********************************************************************
@ -1598,6 +1784,9 @@ static void flush_all(void)
tomem(i);
}
}
for (i = FP_RESULT; i <= FS3; i++) // only FP_RESULT and FS1-FS3, FP0-FP7 are call save
if (f_isinreg(i))
f_evict(i);
}
/* Make sure all registers that will get clobbered by a call are
@ -1619,6 +1808,10 @@ static void prepare_for_call_2(void)
free_nreg(i);
}
for (i = 0; i < 4; i++) // only FP_RESULT and FS1-FS3, FP0-FP7 are call save
if (live.fat[i].nholds > 0)
f_free_nreg(i);
live.flags_in_flags = TRASH; /* Note: We assume we already rescued the
flags at the very start of the call_r
functions! */
@ -2038,7 +2231,6 @@ STATIC_INLINE int block_check_checksum(blockinfo* bi)
means we have to move it into the needs-to-be-flushed list */
bi->handler_to_use = bi->handler;
set_dhtu(bi, bi->direct_handler);
bi->status = BI_CHECKING;
isgood = called_check_checksum(bi) != 0;
}
@ -2694,7 +2886,7 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
if (next_pc_p) { /* A branch was registered */
uintptr t1 = next_pc_p;
uintptr t2 = taken_pc_p;
int cc = branch_cc;
int cc = branch_cc; // this is native (ARM) condition code
uae_u32* branchadd;
uae_u32* tba;
@ -2707,7 +2899,10 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
the 68k branch is taken. */
t1 = taken_pc_p;
t2 = next_pc_p;
cc = branch_cc^1;
if(cc < NATIVE_CC_AL)
cc = branch_cc^1;
else if(cc > NATIVE_CC_AL)
cc = 0x10 | (branch_cc ^ 0xf);
}
tmp = live; /* ouch! This is big... */

View file

@ -1803,32 +1803,32 @@ extern const struct comptbl op_smalltbl_0_comp_ff[] = {
{ NULL, 0x00000001, 61488 }, /* MMUOP030 */
{ NULL, 0x00000001, 61496 }, /* MMUOP030 */
{ NULL, 0x00000001, 61497 }, /* MMUOP030 */
{ NULL, 0x00000022, 61952 }, /* FPP */
{ NULL, 0x00000022, 61960 }, /* FPP */
{ NULL, 0x00000022, 61968 }, /* FPP */
{ NULL, 0x00000022, 61976 }, /* FPP */
{ NULL, 0x00000022, 61984 }, /* FPP */
{ NULL, 0x00000022, 61992 }, /* FPP */
{ NULL, 0x00000022, 62000 }, /* FPP */
{ NULL, 0x00000022, 62008 }, /* FPP */
{ NULL, 0x00000022, 62009 }, /* FPP */
{ NULL, 0x00000022, 62010 }, /* FPP */
{ NULL, 0x00000022, 62011 }, /* FPP */
{ NULL, 0x00000022, 62012 }, /* FPP */
{ NULL, 0x00000006, 62016 }, /* FScc */
{ op_f200_0_comp_ff, 0x00000022, 61952 }, /* FPP */
{ op_f208_0_comp_ff, 0x00000022, 61960 }, /* FPP */
{ op_f210_0_comp_ff, 0x00000022, 61968 }, /* FPP */
{ op_f218_0_comp_ff, 0x00000022, 61976 }, /* FPP */
{ op_f220_0_comp_ff, 0x00000022, 61984 }, /* FPP */
{ op_f228_0_comp_ff, 0x00000022, 61992 }, /* FPP */
{ op_f230_0_comp_ff, 0x00000022, 62000 }, /* FPP */
{ op_f238_0_comp_ff, 0x00000022, 62008 }, /* FPP */
{ op_f239_0_comp_ff, 0x00000022, 62009 }, /* FPP */
{ op_f23a_0_comp_ff, 0x00000022, 62010 }, /* FPP */
{ op_f23b_0_comp_ff, 0x00000022, 62011 }, /* FPP */
{ op_f23c_0_comp_ff, 0x00000022, 62012 }, /* FPP */
{ op_f240_0_comp_ff, 0x00000006, 62016 }, /* FScc */
{ NULL, 0x00000021, 62024 }, /* FDBcc */
{ NULL, 0x00000006, 62032 }, /* FScc */
{ NULL, 0x00000006, 62040 }, /* FScc */
{ NULL, 0x00000006, 62048 }, /* FScc */
{ NULL, 0x00000006, 62056 }, /* FScc */
{ NULL, 0x00000006, 62064 }, /* FScc */
{ NULL, 0x00000006, 62072 }, /* FScc */
{ NULL, 0x00000006, 62073 }, /* FScc */
{ op_f250_0_comp_ff, 0x00000006, 62032 }, /* FScc */
{ op_f258_0_comp_ff, 0x00000006, 62040 }, /* FScc */
{ op_f260_0_comp_ff, 0x00000006, 62048 }, /* FScc */
{ op_f268_0_comp_ff, 0x00000006, 62056 }, /* FScc */
{ op_f270_0_comp_ff, 0x00000006, 62064 }, /* FScc */
{ op_f278_0_comp_ff, 0x00000006, 62072 }, /* FScc */
{ op_f279_0_comp_ff, 0x00000006, 62073 }, /* FScc */
{ NULL, 0x00000021, 62074 }, /* FTRAPcc */
{ NULL, 0x00000021, 62075 }, /* FTRAPcc */
{ NULL, 0x00000021, 62076 }, /* FTRAPcc */
{ NULL, 0x00000005, 62080 }, /* FBcc */
{ NULL, 0x00000005, 62144 }, /* FBcc */
{ op_f280_0_comp_ff, 0x00000005, 62080 }, /* FBcc */
{ op_f2c0_0_comp_ff, 0x00000005, 62144 }, /* FBcc */
{ NULL, 0x00000020, 62224 }, /* FSAVE */
{ NULL, 0x00000020, 62240 }, /* FSAVE */
{ NULL, 0x00000020, 62248 }, /* FSAVE */
@ -3675,32 +3675,32 @@ extern const struct comptbl op_smalltbl_0_comp_nf[] = {
{ NULL, 0x00000001, 61488 }, /* MMUOP030 */
{ NULL, 0x00000001, 61496 }, /* MMUOP030 */
{ NULL, 0x00000001, 61497 }, /* MMUOP030 */
{ NULL, 0x00000022, 61952 }, /* FPP */
{ NULL, 0x00000022, 61960 }, /* FPP */
{ NULL, 0x00000022, 61968 }, /* FPP */
{ NULL, 0x00000022, 61976 }, /* FPP */
{ NULL, 0x00000022, 61984 }, /* FPP */
{ NULL, 0x00000022, 61992 }, /* FPP */
{ NULL, 0x00000022, 62000 }, /* FPP */
{ NULL, 0x00000022, 62008 }, /* FPP */
{ NULL, 0x00000022, 62009 }, /* FPP */
{ NULL, 0x00000022, 62010 }, /* FPP */
{ NULL, 0x00000022, 62011 }, /* FPP */
{ NULL, 0x00000022, 62012 }, /* FPP */
{ NULL, 0x00000006, 62016 }, /* FScc */
{ op_f200_0_comp_nf, 0x00000022, 61952 }, /* FPP */
{ op_f208_0_comp_nf, 0x00000022, 61960 }, /* FPP */
{ op_f210_0_comp_nf, 0x00000022, 61968 }, /* FPP */
{ op_f218_0_comp_nf, 0x00000022, 61976 }, /* FPP */
{ op_f220_0_comp_nf, 0x00000022, 61984 }, /* FPP */
{ op_f228_0_comp_nf, 0x00000022, 61992 }, /* FPP */
{ op_f230_0_comp_nf, 0x00000022, 62000 }, /* FPP */
{ op_f238_0_comp_nf, 0x00000022, 62008 }, /* FPP */
{ op_f239_0_comp_nf, 0x00000022, 62009 }, /* FPP */
{ op_f23a_0_comp_nf, 0x00000022, 62010 }, /* FPP */
{ op_f23b_0_comp_nf, 0x00000022, 62011 }, /* FPP */
{ op_f23c_0_comp_nf, 0x00000022, 62012 }, /* FPP */
{ op_f240_0_comp_nf, 0x00000006, 62016 }, /* FScc */
{ NULL, 0x00000021, 62024 }, /* FDBcc */
{ NULL, 0x00000006, 62032 }, /* FScc */
{ NULL, 0x00000006, 62040 }, /* FScc */
{ NULL, 0x00000006, 62048 }, /* FScc */
{ NULL, 0x00000006, 62056 }, /* FScc */
{ NULL, 0x00000006, 62064 }, /* FScc */
{ NULL, 0x00000006, 62072 }, /* FScc */
{ NULL, 0x00000006, 62073 }, /* FScc */
{ op_f250_0_comp_nf, 0x00000006, 62032 }, /* FScc */
{ op_f258_0_comp_nf, 0x00000006, 62040 }, /* FScc */
{ op_f260_0_comp_nf, 0x00000006, 62048 }, /* FScc */
{ op_f268_0_comp_nf, 0x00000006, 62056 }, /* FScc */
{ op_f270_0_comp_nf, 0x00000006, 62064 }, /* FScc */
{ op_f278_0_comp_nf, 0x00000006, 62072 }, /* FScc */
{ op_f279_0_comp_nf, 0x00000006, 62073 }, /* FScc */
{ NULL, 0x00000021, 62074 }, /* FTRAPcc */
{ NULL, 0x00000021, 62075 }, /* FTRAPcc */
{ NULL, 0x00000021, 62076 }, /* FTRAPcc */
{ NULL, 0x00000005, 62080 }, /* FBcc */
{ NULL, 0x00000005, 62144 }, /* FBcc */
{ op_f280_0_comp_nf, 0x00000005, 62080 }, /* FBcc */
{ op_f2c0_0_comp_nf, 0x00000005, 62144 }, /* FBcc */
{ NULL, 0x00000020, 62224 }, /* FSAVE */
{ NULL, 0x00000020, 62240 }, /* FSAVE */
{ NULL, 0x00000020, 62248 }, /* FSAVE */

View file

@ -1446,6 +1446,28 @@ extern compop_func op_e7e8_0_comp_ff;
extern compop_func op_e7f0_0_comp_ff;
extern compop_func op_e7f8_0_comp_ff;
extern compop_func op_e7f9_0_comp_ff;
extern compop_func op_f200_0_comp_ff;
extern compop_func op_f208_0_comp_ff;
extern compop_func op_f210_0_comp_ff;
extern compop_func op_f218_0_comp_ff;
extern compop_func op_f220_0_comp_ff;
extern compop_func op_f228_0_comp_ff;
extern compop_func op_f230_0_comp_ff;
extern compop_func op_f238_0_comp_ff;
extern compop_func op_f239_0_comp_ff;
extern compop_func op_f23a_0_comp_ff;
extern compop_func op_f23b_0_comp_ff;
extern compop_func op_f23c_0_comp_ff;
extern compop_func op_f240_0_comp_ff;
extern compop_func op_f250_0_comp_ff;
extern compop_func op_f258_0_comp_ff;
extern compop_func op_f260_0_comp_ff;
extern compop_func op_f268_0_comp_ff;
extern compop_func op_f270_0_comp_ff;
extern compop_func op_f278_0_comp_ff;
extern compop_func op_f279_0_comp_ff;
extern compop_func op_f280_0_comp_ff;
extern compop_func op_f2c0_0_comp_ff;
extern compop_func op_f600_0_comp_ff;
extern compop_func op_f608_0_comp_ff;
extern compop_func op_f610_0_comp_ff;
@ -2893,6 +2915,28 @@ extern compop_func op_e7e8_0_comp_nf;
extern compop_func op_e7f0_0_comp_nf;
extern compop_func op_e7f8_0_comp_nf;
extern compop_func op_e7f9_0_comp_nf;
extern compop_func op_f200_0_comp_nf;
extern compop_func op_f208_0_comp_nf;
extern compop_func op_f210_0_comp_nf;
extern compop_func op_f218_0_comp_nf;
extern compop_func op_f220_0_comp_nf;
extern compop_func op_f228_0_comp_nf;
extern compop_func op_f230_0_comp_nf;
extern compop_func op_f238_0_comp_nf;
extern compop_func op_f239_0_comp_nf;
extern compop_func op_f23a_0_comp_nf;
extern compop_func op_f23b_0_comp_nf;
extern compop_func op_f23c_0_comp_nf;
extern compop_func op_f240_0_comp_nf;
extern compop_func op_f250_0_comp_nf;
extern compop_func op_f258_0_comp_nf;
extern compop_func op_f260_0_comp_nf;
extern compop_func op_f268_0_comp_nf;
extern compop_func op_f270_0_comp_nf;
extern compop_func op_f278_0_comp_nf;
extern compop_func op_f279_0_comp_nf;
extern compop_func op_f280_0_comp_nf;
extern compop_func op_f2c0_0_comp_nf;
extern compop_func op_f600_0_comp_nf;
extern compop_func op_f608_0_comp_nf;
extern compop_func op_f610_0_comp_nf;

View file

@ -1,52 +0,0 @@
/*
* compiler/flags_arm.h - Native flags definitions for ARM
*
* Copyright (c) 2013 Jens Heitmann of ARAnyM dev team (see AUTHORS)
*
* Inspired by Christian Bauer's Basilisk II
*
* Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
*
* Adaptation for Basilisk II and improvements, copyright 2000-2002
* Gwenole Beauchesne
*
* Basilisk II (C) 1997-2002 Christian Bauer
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef NATIVE_FLAGS_ARM_H
#define NATIVE_FLAGS_ARM_H
/* Native integer code conditions */
enum {
NATIVE_CC_EQ = 0,
NATIVE_CC_NE = 1,
NATIVE_CC_CS = 2,
NATIVE_CC_CC = 3,
NATIVE_CC_MI = 4,
NATIVE_CC_PL = 5,
NATIVE_CC_VS = 6,
NATIVE_CC_VC = 7,
NATIVE_CC_HI = 8,
NATIVE_CC_LS = 9,
NATIVE_CC_GE = 10,
NATIVE_CC_LT = 11,
NATIVE_CC_GT = 12,
NATIVE_CC_LE = 13,
NATIVE_CC_AL = 14
};
#endif /* NATIVE_FLAGS_ARM_H */

View file

@ -7,9 +7,6 @@
* Adaptation for ARAnyM/ARM, copyright 2001-2015
* Milan Jurik, Jens Heitmann
*
* Adaptation for Basilisk II and improvements, copyright 2000-2005
* Gwenole Beauchesne
*
* Basilisk II (C) 1997-2005 Christian Bauer
*
* This program is free software; you can redistribute it and/or modify
@ -121,13 +118,14 @@
#define DISABLE_I_ROXLW
#define DISABLE_I_ROXRW
//#define DISABLE_I_MULL
#define DISABLE_I_FPP
#define DISABLE_I_FBCC
#define DISABLE_I_FSCC
//#define DISABLE_I_FPP
//#define DISABLE_I_FBCC
//#define DISABLE_I_FSCC
//#define DISABLE_I_MOVE16
#define DISABLE_I_DIVU // DIVU works, but we have to think about exceptions. No big performance enhancement.
#define RETURN "return 0;"
#define BOOL_TYPE "int"
@ -1222,9 +1220,6 @@ static void gen_dbcc(uae_u32 opcode, struct instr *curi, char* ssize) {
comprintf("\tarm_ADD_l_ri(PC_P, m68k_pc_offset);\n");
comprintf("\tm68k_pc_offset=0;\n");
start_brace();
comprintf("\tint nsrc = scratchie++;\n");
if (curi->cc >= 2) {
comprintf("\tmake_flags_live();\n"); /* Load the flags */
}
@ -1262,7 +1257,6 @@ static void gen_dbcc(uae_u32 opcode, struct instr *curi, char* ssize) {
break;
default: abort();
}
genastore("src", curi->smode, "srcreg", curi->size, "src");
gen_update_next_handler();
}
@ -2071,7 +2065,6 @@ gen_opcode(unsigned long int opcode) {
case i_SBCD:
failure;
/* I don't think so! */
break;
case i_ADD:
@ -2097,7 +2090,6 @@ gen_opcode(unsigned long int opcode) {
case i_ABCD:
failure;
/* No BCD maths for me.... */
break;
case i_NEG:
@ -2116,7 +2108,6 @@ gen_opcode(unsigned long int opcode) {
case i_NBCD:
failure;
/* Nope! */
break;
case i_CLR:
@ -2362,7 +2353,8 @@ gen_opcode(unsigned long int opcode) {
isjump;
genamode(curi->smode, "srcreg", curi->size, "src", 0, 0);
start_brace();
comprintf("\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n");
comprintf(
"\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n");
comprintf("\tint ret=scratchie++;\n"
"\tmov_l_ri(ret,retadd);\n"
"\tsub_l_ri(15,4);\n"
@ -2391,10 +2383,12 @@ gen_opcode(unsigned long int opcode) {
#ifdef DISABLE_I_BSR
failure;
#endif
is_const_jump;
is_const_jump
;
genamode(curi->smode, "srcreg", curi->size, "src", 1, 0);
start_brace();
comprintf("\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n");
comprintf(
"\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n");
comprintf("\tint ret=scratchie++;\n"
"\tmov_l_ri(ret,retadd);\n"
"\tsub_l_ri(15,4);\n"
@ -2427,9 +2421,10 @@ gen_opcode(unsigned long int opcode) {
comprintf("\tv2 = get_const(src);\n");
comprintf("\tregister_branch(v1, v2, %d);\n", cond_codes[curi->cc]);
comprintf("\tmake_flags_live();\n"); /* Load the flags */
isjump;
isjump;
} else {
is_const_jump;
is_const_jump
;
}
switch (curi->cc) {
@ -3124,11 +3119,16 @@ generate_one_opcode(int rp, int noflags)
fprintf(stblfile, "{ NULL, 0x%08x, %ld }, /* %s */\n", flags, opcode, name);
com_discard();
} else {
const char *tbl = noflags ? "nf" : "ff";
printf ("/* %s */\n", outopcode (opcode));
fprintf(stblfile, "{ op_%lx_%d_comp_%s, 0x%08x, %ld }, /* %s */\n", opcode, postfix, tbl, flags, opcode, name);
fprintf(headerfile, "extern compop_func op_%lx_%d_comp_%s;\n", opcode, postfix, tbl);
printf("uae_u32 REGPARAM2 op_%lx_%d_comp_%s(uae_u32 opcode)\n{\n", opcode, postfix, tbl);
if (noflags) {
fprintf(stblfile, "{ op_%lx_%d_comp_nf, 0x%08x, %ld }, /* %s */\n", opcode, postfix, flags, opcode, name);
fprintf(headerfile, "extern compop_func op_%lx_%d_comp_nf;\n", opcode, postfix);
printf("uae_u32 REGPARAM2 op_%lx_%d_comp_nf(uae_u32 opcode)\n{\n", opcode, postfix);
} else {
fprintf(stblfile, "{ op_%lx_%d_comp_ff, 0x%08x, %ld }, /* %s */\n", opcode, postfix, flags, opcode, name);
fprintf(headerfile, "extern compop_func op_%lx_%d_comp_ff;\n", opcode, postfix);
printf("uae_u32 REGPARAM2 op_%lx_%d_comp_ff(uae_u32 opcode)\n{\n", opcode, postfix);
}
com_flush();
}
}

View file

@ -348,8 +348,8 @@ static void build_cpufunctbl (void)
write_log(_T("CPU=%d, FPU=%d%s, JIT%s=%d."),
currprefs.cpu_model,
currprefs.fpu_model, currprefs.fpu_model ? (currprefs.fpu_softfloat ? _T(" (softfloat)") : _T(" (host)")) : _T(""),
currprefs.cachesize ? _T("=CPU") : _T(""),
currprefs.fpu_model, currprefs.fpu_model ? _T(" (host)") : _T(""),
currprefs.cachesize ? (currprefs.compfpu ? _T("=CPU/FPU") : _T("=CPU")) : _T(""),
currprefs.cachesize);
regs.address_space_mask = 0xffffffff;
@ -428,8 +428,7 @@ static int check_prefs_changed_cpu2(void)
|| currprefs.cpu_model != changed_prefs.cpu_model
|| currprefs.fpu_model != changed_prefs.fpu_model
|| currprefs.fpu_no_unimplemented != changed_prefs.fpu_no_unimplemented
|| currprefs.cpu_compatible != changed_prefs.cpu_compatible
|| currprefs.fpu_softfloat != changed_prefs.fpu_softfloat) {
|| currprefs.cpu_compatible != changed_prefs.cpu_compatible) {
cpu_prefs_changed_flag |= 1;
}
if (changed
@ -1740,6 +1739,10 @@ bool is_hardreset(void)
return cpu_hardreset;
}
#ifdef USE_JIT_FPU
static uae_u8 fp_buffer[8 * 8];
#endif
void m68k_go (int may_quit)
{
int hardboot = 1;
@ -1750,6 +1753,10 @@ void m68k_go (int may_quit)
abort ();
}
#ifdef USE_JIT_FPU
save_host_fp_regs(fp_buffer);
#endif
reset_frame_rate_hack ();
update_68k_cycles ();
@ -1808,7 +1815,6 @@ void m68k_go (int may_quit)
if (cpu_prefs_changed_flag & 1) {
uaecptr pc = m68k_getpc();
prefs_changed_cpu();
fpu_modechange();
build_cpufunctbl();
m68k_setpc_normal(pc);
fill_prefetch();
@ -1862,6 +1868,10 @@ void m68k_go (int may_quit)
regs.pc_p = NULL;
regs.pc_oldp = NULL;
#ifdef USE_JIT_FPU
restore_host_fp_regs(fp_buffer);
#endif
in_m68k_go--;
}
@ -1977,8 +1987,7 @@ uae_u8 *restore_cpu_extra (uae_u8 *src)
currprefs.m68k_speed = changed_prefs.m68k_speed = -1;
if (flags & 16)
currprefs.m68k_speed = changed_prefs.m68k_speed = (flags >> 24) * CYCLE_UNIT;
if (flags & 32)
currprefs.m68k_speed = changed_prefs.m68k_speed = -30;
return src;
}
@ -1997,7 +2006,6 @@ uae_u8 *save_cpu_extra (int *len, uae_u8 *dstptr)
flags |= currprefs.m68k_speed < 0 ? 4 : 0;
flags |= currprefs.cachesize > 0 ? 8 : 0;
flags |= currprefs.m68k_speed > 0 ? 16 : 0;
flags |= currprefs.m68k_speed < -25 ? 32 : 0;
if (currprefs.m68k_speed > 0)
flags |= (currprefs.m68k_speed / CYCLE_UNIT) << 24;
save_u32 (flags);

View file

@ -1011,32 +1011,38 @@ int handle_msgpump()
break;
case SDL_KEYDOWN:
// If the Enter GUI key was pressed, handle it
if (enter_gui_key && rEvent.key.keysym.sym == enter_gui_key && rEvent.key.repeat == 0)
#ifdef USE_SDL2
if (rEvent.key.repeat == 0)
{
inputdevice_add_inputcode(AKS_ENTERGUI, 1, nullptr);
break;
}
// If the Quit emulator key was pressed, handle it
if (quit_key && rEvent.key.keysym.sym == quit_key && rEvent.key.repeat == 0)
{
inputdevice_add_inputcode(AKS_QUIT, 1, nullptr);
break;
}
#endif
// If the Enter GUI key was pressed, handle it
if (enter_gui_key && rEvent.key.keysym.sym == enter_gui_key)
{
inputdevice_add_inputcode(AKS_ENTERGUI, 1, nullptr);
break;
}
if (action_replay_button && rEvent.key.keysym.sym == action_replay_button && rEvent.key.repeat == 0)
{
inputdevice_add_inputcode(AKS_FREEZEBUTTON, 1, nullptr);
break;
}
// If the Quit emulator key was pressed, handle it
if (quit_key && rEvent.key.keysym.sym == quit_key)
{
inputdevice_add_inputcode(AKS_QUIT, 1, nullptr);
break;
}
if (fullscreen_key && rEvent.key.keysym.sym == fullscreen_key && rEvent.key.repeat == 0)
{
inputdevice_add_inputcode(AKS_TOGGLEWINDOWEDFULLSCREEN, 1, nullptr);
break;
}
if (action_replay_button && rEvent.key.keysym.sym == action_replay_button)
{
inputdevice_add_inputcode(AKS_FREEZEBUTTON, 1, nullptr);
break;
}
if (fullscreen_key && rEvent.key.keysym.sym == fullscreen_key)
{
inputdevice_add_inputcode(AKS_TOGGLEWINDOWEDFULLSCREEN, 1, nullptr);
break;
}
#ifdef USE_SDL2
}
#endif
// If the reset combination was pressed, handle it
#ifdef USE_SDL1
// Strangely in FBCON left window is seen as left alt ??
@ -1063,34 +1069,35 @@ int handle_msgpump()
if (rEvent.key.keysym.scancode == 58 && rEvent.key.keysym.sym == SDLK_UNKNOWN)
rEvent.key.keysym.sym = SDLK_CAPSLOCK;
#endif
if (rEvent.key.keysym.sym == SDLK_CAPSLOCK && rEvent.key.repeat == 0)
{
// Treat CAPSLOCK as a toggle. If on, set off and vice/versa
ioctl(0, KDGKBLED, &kbd_flags);
ioctl(0, KDGETLED, &kbd_led_status);
if (kbd_flags & 07 & LED_CAP)
{
// On, so turn off
kbd_led_status &= ~LED_CAP;
kbd_flags &= ~LED_CAP;
inputdevice_do_keyboard(AK_CAPSLOCK, 0);
}
else
{
// Off, so turn on
kbd_led_status |= LED_CAP;
kbd_flags |= LED_CAP;
inputdevice_do_keyboard(AK_CAPSLOCK, 1);
}
ioctl(0, KDSETLED, kbd_led_status);
ioctl(0, KDSKBLED, kbd_flags);
break;
}
// Handle all other keys
#ifdef USE_SDL2
if (rEvent.key.repeat == 0)
{
#endif
if (rEvent.key.keysym.sym == SDLK_CAPSLOCK)
{
// Treat CAPSLOCK as a toggle. If on, set off and vice/versa
ioctl(0, KDGKBLED, &kbd_flags);
ioctl(0, KDGETLED, &kbd_led_status);
if (kbd_flags & 07 & LED_CAP)
{
// On, so turn off
kbd_led_status &= ~LED_CAP;
kbd_flags &= ~LED_CAP;
inputdevice_do_keyboard(AK_CAPSLOCK, 0);
}
else
{
// Off, so turn on
kbd_led_status |= LED_CAP;
kbd_flags |= LED_CAP;
inputdevice_do_keyboard(AK_CAPSLOCK, 1);
}
ioctl(0, KDSETLED, kbd_led_status);
ioctl(0, KDSKBLED, kbd_flags);
break;
}
// Handle all other keys
#ifdef USE_SDL1
if (keyboard_type == KEYCODE_UNK)
inputdevice_translatekeycode(0, rEvent.key.keysym.sym, 1);
@ -1098,12 +1105,14 @@ int handle_msgpump()
inputdevice_translatekeycode(0, rEvent.key.keysym.scancode, 1);
#elif USE_SDL2
inputdevice_translatekeycode(0, rEvent.key.keysym.scancode, 1);
#endif
}
#endif
break;
case SDL_KEYUP:
#ifdef USE_SDL2
if (rEvent.key.repeat == 0)
{
#endif
#ifdef USE_SDL1
if (keyboard_type == KEYCODE_UNK)
inputdevice_translatekeycode(0, rEvent.key.keysym.sym, 0);
@ -1111,8 +1120,8 @@ int handle_msgpump()
inputdevice_translatekeycode(0, rEvent.key.keysym.scancode, 0);
#elif USE_SDL2
inputdevice_translatekeycode(0, rEvent.key.keysym.scancode, 0);
#endif
}
#endif
break;
case SDL_MOUSEBUTTONDOWN:
@ -1148,14 +1157,14 @@ int handle_msgpump()
const auto x = rEvent.motion.xrel;
const auto y = rEvent.motion.yrel;
#if defined (ANDROIDSDL)
if(rEvent.motion.x == 0 && x > -4)
x = -4;
if(rEvent.motion.y == 0 && y > -4)
y = -4;
if(rEvent.motion.x == currprefs.gfx_size.width - 1 && x < 4)
x = 4;
if(rEvent.motion.y == currprefs.gfx_size.height - 1 && y < 4)
y = 4;
if (rEvent.motion.x == 0 && x > -4)
x = -4;
if (rEvent.motion.y == 0 && y > -4)
y = -4;
if (rEvent.motion.x == currprefs.gfx_size.width - 1 && x < 4)
x = 4;
if (rEvent.motion.y == currprefs.gfx_size.height - 1 && y < 4)
y = 4;
#endif //ANDROIDSDL
setmousestate(0, 0, x * mouseScale, 0);
setmousestate(0, 1, y * mouseScale, 0);

View file

@ -359,6 +359,7 @@ int graphics_setup(void)
void toggle_fullscreen()
{
#ifdef USE_SDL2
Uint32 FullscreenFlag = SDL_WINDOW_FULLSCREEN;
if (sdlWindow)
{
@ -366,6 +367,7 @@ void toggle_fullscreen()
SDL_SetWindowFullscreen(sdlWindow, is_fullscreen ? 0 : FullscreenFlag);
SDL_ShowCursor(is_fullscreen);
}
#endif
}
#ifdef USE_DISPMANX

View file

@ -131,6 +131,7 @@ static void parse_compatibility(struct uae_prefs* p, xmlNode* node)
{
p->cachesize = MAX_JIT_CACHE;
p->address_space_24 = false;
p->compfpu = true;
}
else if (strcmp(reinterpret_cast<const char *>(content), "flexible-cpu-cycles") == 0)
p->cpu_compatible = false;
@ -315,12 +316,18 @@ static void parse_peripheral(struct uae_prefs* p, xmlNode* node)
}
else if (strcmp(reinterpret_cast<const char *>(content), "jit") == 0)
{
const auto attr = xmlGetProp(curr_node, reinterpret_cast<const xmlChar *>("memory"));
auto attr = xmlGetProp(curr_node, reinterpret_cast<const xmlChar *>("memory"));
if (attr != nullptr)
{
p->cachesize = atoi(reinterpret_cast<const char *>(attr)) / 1024;
xmlFree(attr);
}
attr = xmlGetProp(curr_node, (const xmlChar *)_T("fpu"));
if (attr != NULL)
{
if (strcmp((const char *)attr, "false") == 0)
p->compfpu = false;
}
}
xmlFree(content);
}

View file

@ -2,6 +2,8 @@
.arm
.global save_host_fp_regs
.global restore_host_fp_regs
.global copy_screen_8bit
.global copy_screen_16bit_swap_arm
.global copy_screen_32bit_to_16bit_arm
@ -10,6 +12,20 @@
.align 8
@----------------------------------------------------------------
@ save_host_fp_regs
@----------------------------------------------------------------
save_host_fp_regs:
vstmia r0!, {d7-d15}
bx lr
@----------------------------------------------------------------
@ restore_host_fp_regs
@----------------------------------------------------------------
restore_host_fp_regs:
vldmia r0!, {d7-d15}
bx lr
@----------------------------------------------------------------
@ copy_screen_8bit

View file

@ -6,50 +6,6 @@
* Copyright 1995 - 1998 Bernd Schmidt
*/
/*
* Please note: Many things are configurable with command line parameters,
* and you can put anything you can pass on the command line into a
* configuration file ~/.uaerc. Please read the documentation for more
* information.
*
* NOTE NOTE NOTE
* Whenever you change something in this file, you have to "make clean"
* afterwards.
* Don't remove the '#' signs. If you want to enable something, move it out
* of the C comment block, if you want to disable something, move it inside
* the block.
*/
/*
* When USE_COMPILER is defined, a m68k->i386 instruction compiler will be
* used. This is experimental. It has only been tested on a Linux/i386 ELF
* machine, although it might work on other i386 Unices.
* This is supposed to speed up application programs. It will not work very
* well for hardware bangers like games and demos, in fact it will be much
* slower. It can also be slower for some applications and/or benchmarks.
* It needs a lot of tuning. Please let me know your results with this.
* The second define, RELY_ON_LOADSEG_DETECTION, decides how the compiler
* tries to detect self-modifying code. If it is not set, the first bytes
* of every compiled routine are used as checksum before executing the
* routine. If it is set, the UAE filesystem will perform some checks to
* detect whether an executable is being loaded. This is less reliable
* (it won't work if you don't use the harddisk emulation, so don't try to
* use floppies or even the RAM disk), but much faster.
*
* @@@ NOTE: This option is unfortunately broken in this version. Don't
* try to use it. @@@
*
#define USE_COMPILER
#define RELY_ON_LOADSEG_DETECTION
*/
/***************************************************************************
* Operating system/machine specific options
* Configure these for your CPU. The default settings should work on any
* machine, but may not give optimal performance everywhere.
* (These don't do very much yet, except HAVE_RDTSC
*/
/*
* [pismy] defines virtual keys
* Still hard-coded but can be easily changed by recompiling the project...

View file

@ -107,15 +107,15 @@ static NavigationMap navMap[] =
{ "68020", "CPU and FPU", "68882", "68010", "68030" },
{ "68030", "CPU and FPU", "CPU internal", "68020", "68040" },
{ "68040", "CPU and FPU", "FPUstrict", "68030", "CPU24Bit" },
{ "CPU24Bit", "CPU and FPU", "SoftFloat", "68040", "CPUComp" },
{ "CPUComp", "CPU and FPU", "SoftFloat", "CPU24Bit", "JIT" },
{ "JIT", "CPU and FPU", "SoftFloat", "CPUComp", "68000" },
{ "FPUnone", "68000", "7 Mhz", "SoftFloat", "68881" },
{ "CPU24Bit", "CPU and FPU", "FPUJIT", "68040", "CPUComp" },
{ "CPUComp", "CPU and FPU", "FPUJIT", "CPU24Bit", "JIT" },
{ "JIT", "CPU and FPU", "FPUJIT", "CPUComp", "68000" },
{ "FPUnone", "68000", "7 Mhz", "FPUJIT", "68881" },
{ "68881", "68010", "14 Mhz", "FPUnone", "68882" },
{ "68882", "68020", "25 Mhz", "68881", "CPU internal" },
{ "CPU internal", "68030", "Fastest", "68882", "FPUstrict" },
{ "FPUstrict", "68040", "Fastest", "CPU internal", "SoftFloat" },
{ "SoftFloat", "CPU24Bit", "Fastest", "FPUstrict", "FPUnone" },
{ "FPUstrict", "68040", "Fastest", "CPU internal", "FPUJIT" },
{ "FPUJIT", "CPU24Bit", "Fastest", "FPUstrict", "FPUnone" },
{ "7 Mhz", "FPUnone", "CPU and FPU", "Fastest", "14 Mhz" },
{ "14 Mhz", "68881", "CPU and FPU", "7 Mhz", "25 Mhz" },
{ "25 Mhz", "68882", "CPU and FPU", "14 Mhz", "Fastest" },

View file

@ -38,7 +38,7 @@ static gcn::UaeRadioButton* optFPU68881;
static gcn::UaeRadioButton* optFPU68882;
static gcn::UaeRadioButton* optFPUinternal;
static gcn::UaeCheckBox* chkFPUstrict;
static gcn::UaeCheckBox* chkSoftFloat;
static gcn::UaeCheckBox* chkFPUJIT;
static gcn::Window* grpCPUSpeed;
static gcn::UaeRadioButton* opt7Mhz;
static gcn::UaeRadioButton* opt14Mhz;
@ -186,14 +186,23 @@ class JITActionListener : public gcn::ActionListener
public:
void action(const gcn::ActionEvent& actionEvent) override
{
if (chkJIT->isSelected())
if (actionEvent.getSource() == chkJIT)
{
changed_prefs.cpu_compatible = false;
changed_prefs.cachesize = MAX_JIT_CACHE;
if (chkJIT->isSelected())
{
changed_prefs.cpu_compatible = 0;
changed_prefs.cachesize = MAX_JIT_CACHE;
changed_prefs.compfpu = true;
}
else
{
changed_prefs.cachesize = 0;
changed_prefs.compfpu = false;
}
}
else
else if (actionEvent.getSource() == chkFPUJIT)
{
changed_prefs.cachesize = 0;
changed_prefs.compfpu = chkFPUJIT->isSelected();
}
RefreshPanelCPU();
}
@ -209,10 +218,6 @@ public:
if (actionEvent.getSource() == chkFPUstrict) {
changed_prefs.fpu_strict = chkFPUstrict->isSelected();
}
else if (actionEvent.getSource() == chkSoftFloat) {
changed_prefs.fpu_softfloat = chkSoftFloat->isSelected();
}
RefreshPanelCPU();
}
@ -285,9 +290,9 @@ void InitPanelCPU(const struct _ConfigCategory& category)
chkFPUstrict->setId("FPUstrict");
chkFPUstrict->addActionListener(fpuActionListener);
chkSoftFloat = new gcn::UaeCheckBox("Softfloat FPU emul.", true);
chkSoftFloat->setId("SoftFloat");
chkSoftFloat->addActionListener(fpuActionListener);
chkFPUJIT = new gcn::UaeCheckBox("FPU JIT", true);
chkFPUJIT->setId("FPUJIT");
chkFPUJIT->addActionListener(jitActionListener);
grpFPU = new gcn::Window("FPU");
grpFPU->setPosition(DISTANCE_BORDER + grpCPU->getWidth() + DISTANCE_NEXT_X, DISTANCE_BORDER);
@ -296,7 +301,7 @@ void InitPanelCPU(const struct _ConfigCategory& category)
grpFPU->add(optFPU68882, 5, 70);
grpFPU->add(optFPUinternal, 5, 100);
grpFPU->add(chkFPUstrict, 5, 140);
grpFPU->add(chkSoftFloat, 5, 170);
grpFPU->add(chkFPUJIT, 5, 170);
grpFPU->setMovable(false);
grpFPU->setSize(185, 215);
grpFPU->setBaseColor(gui_baseCol);
@ -358,7 +363,7 @@ void ExitPanelCPU()
delete optFPU68882;
delete optFPUinternal;
delete chkFPUstrict;
delete chkSoftFloat;
delete chkFPUJIT;
delete grpFPU;
delete fpuButtonActionListener;
delete fpuActionListener;
@ -413,7 +418,8 @@ void RefreshPanelCPU()
optFPUinternal->setEnabled(changed_prefs.cpu_model == 68040);
chkFPUstrict->setSelected(changed_prefs.fpu_strict);
chkSoftFloat->setSelected(changed_prefs.fpu_softfloat);
chkFPUJIT->setSelected(changed_prefs.compfpu);
chkFPUJIT->setEnabled(changed_prefs.cachesize > 0);
if (changed_prefs.m68k_speed == M68K_SPEED_7MHZ_CYCLES)
opt7Mhz->setSelected(true);
@ -440,8 +446,6 @@ bool HelpPanelCPU(std::vector<std::string> &helptext)
helptext.emplace_back("");
helptext.emplace_back("The available FPU models depending on the selected CPU.");
helptext.emplace_back("The option \"More compatible\" activates more accurate rounding and compare of two floats.");
helptext.emplace_back("\"Softfloat FPU emul.\" aktivates the FPU emulation from QEMU. This is more accurate,");
helptext.emplace_back("but a bit slower.");
helptext.emplace_back("");
helptext.emplace_back("With \"CPU Speed\" you can choose the clock rate of the Amiga.");
helptext.emplace_back("Use 7MHz for A500 games or 14MHz for A1200 ones. Fastest uses more emulation time");

View file

@ -2,6 +2,8 @@
.arm
.global save_host_fp_regs
.global restore_host_fp_regs
.global copy_screen_8bit
.global copy_screen_16bit_swap
.global copy_screen_32bit_to_16bit_neon
@ -16,6 +18,20 @@
.align 8
@----------------------------------------------------------------
@ save_host_fp_regs
@----------------------------------------------------------------
save_host_fp_regs:
vstmia r0!, {d7-d15}
bx lr
@----------------------------------------------------------------
@ restore_host_fp_regs
@----------------------------------------------------------------
restore_host_fp_regs:
vldmia r0!, {d7-d15}
bx lr
@----------------------------------------------------------------
@ copy_screen_8bit

View file

@ -1230,7 +1230,6 @@ static uae_u32 REGPARAM2 picasso_SetSpriteColor (TrapContext *ctx)
return 0;
}
/*
SetSpriteImage:
Synopsis: SetSpriteImage(bi, RGBFormat);
@ -1942,7 +1941,7 @@ static void init_picasso_screen(void)
* This function is called whenever another ModeInfo has to be set. This
* function simply sets up the CRTC and TS registers to generate the
* timing used for that screen mode. You should not set the DAC, clocks
* or linear start adress. They will be set when appropriate by their
* or linear start address. They will be set when appropriate by their
* own functions.
*/
static uae_u32 REGPARAM2 picasso_SetGC(TrapContext *ctx)
@ -2103,6 +2102,7 @@ static uae_u32 REGPARAM2 picasso_InvertRect(TrapContext *ctx)
if (NOBLITTER)
return 0;
if (CopyRenderInfoStructureA2U(ctx, renderinfo, &ri)) {
P96TRACE((_T("InvertRect %dbpp 0x%lx\n"), Bpp, (long)mask));
@ -2451,6 +2451,7 @@ static uae_u32 REGPARAM2 picasso_BlitPattern(TrapContext *ctx)
if (NOBLITTER)
return 0;
if (CopyRenderInfoStructureA2U(ctx, rinf, &ri) && CopyPatternStructureA2U(ctx, pinf, &pattern)) {
if (!validatecoords(ctx, &ri, &X, &Y, &W, &H))
return 0;
@ -2497,6 +2498,7 @@ static uae_u32 REGPARAM2 picasso_BlitPattern(TrapContext *ctx)
unsigned long cols;
d = do_get_mem_word(((uae_u16 *)pattern.Memory) + prow);
if (xshift != 0)
d = (d << xshift) | (d >> (16 - xshift));
@ -3069,6 +3071,7 @@ static uae_u32 REGPARAM2 picasso_BlitPlanar2Direct(TrapContext *ctx)
if (NOBLITTER)
return 0;
if (minterm != 0x0C) {
write_log(_T("WARNING - BlitPlanar2Direct() has unhandled op-code 0x%x. Using fall-back routine.\n"), minterm);
return 0;

View file

@ -15,7 +15,7 @@
#define UAE_FILESYS_THREADS
#define AUTOCONFIG /* autoconfig support, fast ram, harddrives etc.. */
#define JIT /* JIT compiler support */
/* #define USE_JIT_FPU */
#define USE_JIT_FPU
/* #define NATMEM_OFFSET natmem_offset */
/* #define CATWEASEL */ /* Catweasel MK2/3 support */
/* #define AHI */ /* AHI sound emulation */

View file

@ -145,3 +145,14 @@ STATIC_INLINE void atomic_set(volatile uae_atomic *p, uae_u32 v)
{
__sync_lock_test_and_set(p, v);
}
#ifdef USE_JIT_FPU
#ifdef __cplusplus
extern "C" {
#endif
void save_host_fp_regs(void* buf);
void restore_host_fp_regs(void* buf);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -1,793 +0,0 @@
/*
* QEMU float support macros
*
* The code in this source file is derived from release 2a of the SoftFloat
* IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
* some later contributions) are provided under that license, as detailed below.
* It has subsequently been modified by contributors to the QEMU Project,
* so some portions are provided under:
* the SoftFloat-2a license
* the BSD license
* GPL-v2-or-later
*
* Any future contributions to this file after December 1st 2014 will be
* taken to be licensed under the Softfloat-2a license unless specifically
* indicated otherwise.
*/
/*
===============================================================================
This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
Arithmetic Package, Release 2a.
Written by John R. Hauser. This work was made possible in part by the
International Computer Science Institute, located at Suite 600, 1947 Center
Street, Berkeley, California 94704. Funding was partially provided by the
National Science Foundation under grant MIP-9311980. The original version
of this code was written as part of a project to build a fixed-point vector
processor in collaboration with the University of California at Berkeley,
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
arithmetic/SoftFloat.html'.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
Derivative works are acceptable, even for commercial purposes, so long as
(1) they include prominent notice that the work is derivative, and (2) they
include prominent notice akin to these four paragraphs for those parts of
this code that are retained.
===============================================================================
*/
/* BSD licensing:
* Copyright (c) 2006, Fabrice Bellard
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Portions of this work are licensed under the terms of the GNU GPL,
* version 2 or later. See the COPYING file in the top-level directory.
*/
/*----------------------------------------------------------------------------
| This macro tests for minimum version of the GNU C compiler.
*----------------------------------------------------------------------------*/
#if defined(__GNUC__) && defined(__GNUC_MINOR__)
# define SOFTFLOAT_GNUC_PREREQ(maj, min) \
((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
#else
# define SOFTFLOAT_GNUC_PREREQ(maj, min) 0
#endif
/*----------------------------------------------------------------------------
| Shifts `a' right by the number of bits given in `count'. If any nonzero
| bits are shifted off, they are ``jammed'' into the least significant bit of
| the result by setting the least significant bit to 1. The value of `count'
| can be arbitrarily large; in particular, if `count' is greater than 32, the
| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
| The result is stored in the location pointed to by `zPtr'.
*----------------------------------------------------------------------------*/
static inline void shift32RightJamming(uint32_t a, int count, uint32_t *zPtr)
{
uint32_t z;
if ( count == 0 ) {
z = a;
}
else if ( count < 32 ) {
z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
}
else {
z = ( a != 0 );
}
*zPtr = z;
}
/*----------------------------------------------------------------------------
| Shifts `a' right by the number of bits given in `count'. If any nonzero
| bits are shifted off, they are ``jammed'' into the least significant bit of
| the result by setting the least significant bit to 1. The value of `count'
| can be arbitrarily large; in particular, if `count' is greater than 64, the
| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
| The result is stored in the location pointed to by `zPtr'.
*----------------------------------------------------------------------------*/
static inline void shift64RightJamming(uint64_t a, int count, uint64_t *zPtr)
{
uint64_t z;
if ( count == 0 ) {
z = a;
}
else if ( count < 64 ) {
z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
}
else {
z = ( a != 0 );
}
*zPtr = z;
}
/*----------------------------------------------------------------------------
| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
| _plus_ the number of bits given in `count'. The shifted result is at most
| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
| bits shifted off form a second 64-bit result as follows: The _last_ bit
| shifted off is the most-significant bit of the extra result, and the other
| 63 bits of the extra result are all zero if and only if _all_but_the_last_
| bits shifted off were all zero. This extra result is stored in the location
| pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
| (This routine makes more sense if `a0' and `a1' are considered to form a
| fixed-point value with binary point between `a0' and `a1'. This fixed-point
| value is shifted right by the number of bits given in `count', and the
| integer part of the result is returned at the location pointed to by
| `z0Ptr'. The fractional part of the result may be slightly corrupted as
| described above, and is returned at the location pointed to by `z1Ptr'.)
*----------------------------------------------------------------------------*/
static inline void
shift64ExtraRightJamming(
uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
{
uint64_t z0, z1;
int8_t negCount = ( - count ) & 63;
if ( count == 0 ) {
z1 = a1;
z0 = a0;
}
else if ( count < 64 ) {
z1 = ( a0<<negCount ) | ( a1 != 0 );
z0 = a0>>count;
}
else {
if ( count == 64 ) {
z1 = a0 | ( a1 != 0 );
}
else {
z1 = ( ( a0 | a1 ) != 0 );
}
z0 = 0;
}
*z1Ptr = z1;
*z0Ptr = z0;
}
/*----------------------------------------------------------------------------
| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
| number of bits given in `count'. Any bits shifted off are lost. The value
| of `count' can be arbitrarily large; in particular, if `count' is greater
| than 128, the result will be 0. The result is broken into two 64-bit pieces
| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
*----------------------------------------------------------------------------*/
static inline void
shift128Right(
uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
{
uint64_t z0, z1;
int8_t negCount = ( - count ) & 63;
if ( count == 0 ) {
z1 = a1;
z0 = a0;
}
else if ( count < 64 ) {
z1 = ( a0<<negCount ) | ( a1>>count );
z0 = a0>>count;
}
else {
z1 = (count < 128) ? (a0 >> (count & 63)) : 0;
z0 = 0;
}
*z1Ptr = z1;
*z0Ptr = z0;
}
/*----------------------------------------------------------------------------
| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
| number of bits given in `count'. If any nonzero bits are shifted off, they
| are ``jammed'' into the least significant bit of the result by setting the
| least significant bit to 1. The value of `count' can be arbitrarily large;
| in particular, if `count' is greater than 128, the result will be either
| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
| nonzero. The result is broken into two 64-bit pieces which are stored at
| the locations pointed to by `z0Ptr' and `z1Ptr'.
*----------------------------------------------------------------------------*/
static inline void
shift128RightJamming(
uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
{
uint64_t z0, z1;
int8_t negCount = ( - count ) & 63;
if ( count == 0 ) {
z1 = a1;
z0 = a0;
}
else if ( count < 64 ) {
z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
z0 = a0>>count;
}
else {
if ( count == 64 ) {
z1 = a0 | ( a1 != 0 );
}
else if ( count < 128 ) {
z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
}
else {
z1 = ( ( a0 | a1 ) != 0 );
}
z0 = 0;
}
*z1Ptr = z1;
*z0Ptr = z0;
}
/*----------------------------------------------------------------------------
| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
| by 64 _plus_ the number of bits given in `count'. The shifted result is
| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
| stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
| off form a third 64-bit result as follows: The _last_ bit shifted off is
| the most-significant bit of the extra result, and the other 63 bits of the
| extra result are all zero if and only if _all_but_the_last_ bits shifted off
| were all zero. This extra result is stored in the location pointed to by
| `z2Ptr'. The value of `count' can be arbitrarily large.
| (This routine makes more sense if `a0', `a1', and `a2' are considered
| to form a fixed-point value with binary point between `a1' and `a2'. This
| fixed-point value is shifted right by the number of bits given in `count',
| and the integer part of the result is returned at the locations pointed to
| by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
| corrupted as described above, and is returned at the location pointed to by
| `z2Ptr'.)
*----------------------------------------------------------------------------*/
static inline void
shift128ExtraRightJamming(
uint64_t a0,
uint64_t a1,
uint64_t a2,
int count,
uint64_t *z0Ptr,
uint64_t *z1Ptr,
uint64_t *z2Ptr
)
{
uint64_t z0, z1, z2;
int8_t negCount = ( - count ) & 63;
if ( count == 0 ) {
z2 = a2;
z1 = a1;
z0 = a0;
}
else {
if ( count < 64 ) {
z2 = a1<<negCount;
z1 = ( a0<<negCount ) | ( a1>>count );
z0 = a0>>count;
}
else {
if ( count == 64 ) {
z2 = a1;
z1 = a0;
}
else {
a2 |= a1;
if ( count < 128 ) {
z2 = a0<<negCount;
z1 = a0>>( count & 63 );
}
else {
z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
z1 = 0;
}
}
z0 = 0;
}
z2 |= ( a2 != 0 );
}
*z2Ptr = z2;
*z1Ptr = z1;
*z0Ptr = z0;
}
/*----------------------------------------------------------------------------
| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
| number of bits given in `count'. Any bits shifted off are lost. The value
| of `count' must be less than 64. The result is broken into two 64-bit
| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
*----------------------------------------------------------------------------*/
static inline void
shortShift128Left(
uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
{
*z1Ptr = a1<<count;
*z0Ptr =
( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
}
/*----------------------------------------------------------------------------
| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
| by the number of bits given in `count'. Any bits shifted off are lost.
| The value of `count' must be less than 64. The result is broken into three
| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
| `z1Ptr', and `z2Ptr'.
*----------------------------------------------------------------------------*/
static inline void
shortShift192Left(
uint64_t a0,
uint64_t a1,
uint64_t a2,
int count,
uint64_t *z0Ptr,
uint64_t *z1Ptr,
uint64_t *z2Ptr
)
{
uint64_t z0, z1, z2;
int8_t negCount;
z2 = a2<<count;
z1 = a1<<count;
z0 = a0<<count;
if ( 0 < count ) {
negCount = ( ( - count ) & 63 );
z1 |= a2>>negCount;
z0 |= a1>>negCount;
}
*z2Ptr = z2;
*z1Ptr = z1;
*z0Ptr = z0;
}
/*----------------------------------------------------------------------------
| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
| value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
| any carry out is lost. The result is broken into two 64-bit pieces which
| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
*----------------------------------------------------------------------------*/
static inline void
add128(
uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
{
uint64_t z1;
z1 = a1 + b1;
*z1Ptr = z1;
*z0Ptr = a0 + b0 + ( z1 < a1 );
}
/*----------------------------------------------------------------------------
| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
| 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
| modulo 2^192, so any carry out is lost. The result is broken into three
| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
| `z1Ptr', and `z2Ptr'.
*----------------------------------------------------------------------------*/
static inline void
add192(
uint64_t a0,
uint64_t a1,
uint64_t a2,
uint64_t b0,
uint64_t b1,
uint64_t b2,
uint64_t *z0Ptr,
uint64_t *z1Ptr,
uint64_t *z2Ptr
)
{
uint64_t z0, z1, z2;
uint8_t carry0, carry1;
z2 = a2 + b2;
carry1 = ( z2 < a2 );
z1 = a1 + b1;
carry0 = ( z1 < a1 );
z0 = a0 + b0;
z1 += carry1;
z0 += ( z1 < carry1 );
z0 += carry0;
*z2Ptr = z2;
*z1Ptr = z1;
*z0Ptr = z0;
}
/*----------------------------------------------------------------------------
| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
| 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
| 2^128, so any borrow out (carry out) is lost. The result is broken into two
| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
| `z1Ptr'.
*----------------------------------------------------------------------------*/
static inline void
sub128(
uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
{
*z1Ptr = a1 - b1;
*z0Ptr = a0 - b0 - ( a1 < b1 );
}
/*----------------------------------------------------------------------------
| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
| Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
| result is broken into three 64-bit pieces which are stored at the locations
| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
*----------------------------------------------------------------------------*/
static inline void
sub192(
uint64_t a0,
uint64_t a1,
uint64_t a2,
uint64_t b0,
uint64_t b1,
uint64_t b2,
uint64_t *z0Ptr,
uint64_t *z1Ptr,
uint64_t *z2Ptr
)
{
uint64_t z0, z1, z2;
uint8_t borrow0, borrow1;
z2 = a2 - b2;
borrow1 = ( a2 < b2 );
z1 = a1 - b1;
borrow0 = ( a1 < b1 );
z0 = a0 - b0;
z0 -= ( z1 < borrow1 );
z1 -= borrow1;
z0 -= borrow0;
*z2Ptr = z2;
*z1Ptr = z1;
*z0Ptr = z0;
}
/*----------------------------------------------------------------------------
| Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
| into two 64-bit pieces which are stored at the locations pointed to by
| `z0Ptr' and `z1Ptr'.
*----------------------------------------------------------------------------*/
static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
{
uint32_t aHigh, aLow, bHigh, bLow;
uint64_t z0, zMiddleA, zMiddleB, z1;
aLow = a;
aHigh = a>>32;
bLow = b;
bHigh = b>>32;
z1 = ( (uint64_t) aLow ) * bLow;
zMiddleA = ( (uint64_t) aLow ) * bHigh;
zMiddleB = ( (uint64_t) aHigh ) * bLow;
z0 = ( (uint64_t) aHigh ) * bHigh;
zMiddleA += zMiddleB;
z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
zMiddleA <<= 32;
z1 += zMiddleA;
z0 += ( z1 < zMiddleA );
*z1Ptr = z1;
*z0Ptr = z0;
}
/*----------------------------------------------------------------------------
| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
| `b' to obtain a 192-bit product. The product is broken into three 64-bit
| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
| `z2Ptr'.
*----------------------------------------------------------------------------*/
static inline void
mul128By64To192(
uint64_t a0,
uint64_t a1,
uint64_t b,
uint64_t *z0Ptr,
uint64_t *z1Ptr,
uint64_t *z2Ptr
)
{
uint64_t z0, z1, z2, more1;
mul64To128( a1, b, &z1, &z2 );
mul64To128( a0, b, &z0, &more1 );
add128( z0, more1, 0, z1, &z0, &z1 );
*z2Ptr = z2;
*z1Ptr = z1;
*z0Ptr = z0;
}
/*----------------------------------------------------------------------------
| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
| product. The product is broken into four 64-bit pieces which are stored at
| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
*----------------------------------------------------------------------------*/
static inline void
mul128To256(
uint64_t a0,
uint64_t a1,
uint64_t b0,
uint64_t b1,
uint64_t *z0Ptr,
uint64_t *z1Ptr,
uint64_t *z2Ptr,
uint64_t *z3Ptr
)
{
uint64_t z0, z1, z2, z3;
uint64_t more1, more2;
mul64To128( a1, b1, &z2, &z3 );
mul64To128( a1, b0, &z1, &more2 );
add128( z1, more2, 0, z2, &z1, &z2 );
mul64To128( a0, b0, &z0, &more1 );
add128( z0, more1, 0, z1, &z0, &z1 );
mul64To128( a0, b1, &more1, &more2 );
add128( more1, more2, 0, z2, &more1, &z2 );
add128( z0, z1, 0, more1, &z0, &z1 );
*z3Ptr = z3;
*z2Ptr = z2;
*z1Ptr = z1;
*z0Ptr = z0;
}
/*----------------------------------------------------------------------------
| Returns an approximation to the 64-bit integer quotient obtained by dividing
| `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
| divisor `b' must be at least 2^63. If q is the exact quotient truncated
| toward zero, the approximation returned lies between q and q + 2 inclusive.
| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
| unsigned integer is returned.
*----------------------------------------------------------------------------*/
static uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b )
{
uint64_t b0, b1;
uint64_t rem0, rem1, term0, term1;
uint64_t z;
if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
b0 = b>>32;
z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
mul64To128( b, z, &term0, &term1 );
sub128( a0, a1, term0, term1, &rem0, &rem1 );
while ( ( (int64_t) rem0 ) < 0 ) {
z -= LIT64( 0x100000000 );
b1 = b<<32;
add128( rem0, rem1, b0, b1, &rem0, &rem1 );
}
rem0 = ( rem0<<32 ) | ( rem1>>32 );
z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
return z;
}
/*----------------------------------------------------------------------------
| Returns an approximation to the square root of the 32-bit significand given
| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
| `aExp' (the least significant bit) is 1, the integer returned approximates
| 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
| case, the approximation returned lies strictly within +/-2 of the exact
| value.
*----------------------------------------------------------------------------*/
static uint32_t estimateSqrt32(int aExp, uint32_t a)
{
static const uint16_t sqrtOddAdjustments[] = {
0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
};
static const uint16_t sqrtEvenAdjustments[] = {
0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
};
int8_t index;
uint32_t z;
index = ( a>>27 ) & 15;
if ( aExp & 1 ) {
z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
z = ( ( a / z )<<14 ) + ( z<<15 );
a >>= 1;
}
else {
z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
z = a / z + z;
z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
}
return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
}
/*----------------------------------------------------------------------------
| Returns the number of leading 0 bits before the most-significant 1 bit of
| `a'. If `a' is zero, 32 is returned.
*----------------------------------------------------------------------------*/
static inline int8_t countLeadingZeros32( uint32_t a )
{
#if SOFTFLOAT_GNUC_PREREQ(3, 4)
if (a) {
return __builtin_clz(a);
} else {
return 32;
}
#else
static const int8_t countLeadingZerosHigh[] = {
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
int8_t shiftCount;
shiftCount = 0;
if ( a < 0x10000 ) {
shiftCount += 16;
a <<= 16;
}
if ( a < 0x1000000 ) {
shiftCount += 8;
a <<= 8;
}
shiftCount += countLeadingZerosHigh[ a>>24 ];
return shiftCount;
#endif
}
/*----------------------------------------------------------------------------
| Returns the number of leading 0 bits before the most-significant 1 bit of
| `a'. If `a' is zero, 64 is returned.
*----------------------------------------------------------------------------*/
static inline int8_t countLeadingZeros64( uint64_t a )
{
#if SOFTFLOAT_GNUC_PREREQ(3, 4)
if (a) {
return __builtin_clzll(a);
} else {
return 64;
}
#else
int8_t shiftCount;
shiftCount = 0;
if ( a < ( (uint64_t) 1 )<<32 ) {
shiftCount += 32;
}
else {
a >>= 32;
}
shiftCount += countLeadingZeros32( a );
return shiftCount;
#endif
}
/*----------------------------------------------------------------------------
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
| Otherwise, returns 0.
*----------------------------------------------------------------------------*/
static inline flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
{
return ( a0 == b0 ) && ( a1 == b1 );
}
/*----------------------------------------------------------------------------
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
| Otherwise, returns 0.
*----------------------------------------------------------------------------*/
static inline flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
{
return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
}
/*----------------------------------------------------------------------------
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
| than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
| returns 0.
*----------------------------------------------------------------------------*/
static inline flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
{
return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
}
/*----------------------------------------------------------------------------
| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
| Otherwise, returns 0.
*----------------------------------------------------------------------------*/
static inline flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
{
return ( a0 != b0 ) || ( a1 != b1 );
}

View file

@ -1,443 +0,0 @@
/*
* QEMU float support
*
* The code in this source file is derived from release 2a of the SoftFloat
* IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
* some later contributions) are provided under that license, as detailed below.
* It has subsequently been modified by contributors to the QEMU Project,
* so some portions are provided under:
* the SoftFloat-2a license
* the BSD license
* GPL-v2-or-later
*
* Any future contributions to this file after December 1st 2014 will be
* taken to be licensed under the Softfloat-2a license unless specifically
* indicated otherwise.
*/
/*
===============================================================================
This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
Arithmetic Package, Release 2a.
Written by John R. Hauser. This work was made possible in part by the
International Computer Science Institute, located at Suite 600, 1947 Center
Street, Berkeley, California 94704. Funding was partially provided by the
National Science Foundation under grant MIP-9311980. The original version
of this code was written as part of a project to build a fixed-point vector
processor in collaboration with the University of California at Berkeley,
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
arithmetic/SoftFloat.html'.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
Derivative works are acceptable, even for commercial purposes, so long as
(1) they include prominent notice that the work is derivative, and (2) they
include prominent notice akin to these four paragraphs for those parts of
this code that are retained.
===============================================================================
*/
/* BSD licensing:
* Copyright (c) 2006, Fabrice Bellard
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Portions of this work are licensed under the terms of the GNU GPL,
* version 2 or later. See the COPYING file in the top-level directory.
*/
/*----------------------------------------------------------------------------
| Returns 1 if the extended double-precision floating-point value `a' is a
| NaN; otherwise returns 0.
*----------------------------------------------------------------------------*/
static inline flag floatx80_is_nan( floatx80 a )
{
return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (uint64_t) ( a.low<<1 );
}
/*----------------------------------------------------------------------------
| The pattern for a default generated extended double-precision NaN.
*----------------------------------------------------------------------------*/
static inline floatx80 floatx80_default_nan(float_status *status)
{
floatx80 r;
r.high = 0x7FFF;
r.low = LIT64( 0xFFFFFFFFFFFFFFFF );
return r;
}
/*----------------------------------------------------------------------------
| Raises the exceptions specified by `flags'. Floating-point traps can be
| defined here if desired. It is currently not possible for such a trap
| to substitute a result value. If traps are not implemented, this routine
| should be simply `float_exception_flags |= flags;'.
*----------------------------------------------------------------------------*/
static inline void float_raise(uint8_t flags, float_status *status)
{
status->float_exception_flags |= flags;
}
/*----------------------------------------------------------------------------
| Internal canonical NaN format.
*----------------------------------------------------------------------------*/
typedef struct {
flag sign;
uint64_t high, low;
} commonNaNT;
/*----------------------------------------------------------------------------
| Returns 1 if the single-precision floating-point value `a' is a NaN;
| otherwise returns 0.
*----------------------------------------------------------------------------*/
static inline flag float32_is_nan( float32 a )
{
return ( 0xFF000000 < (uint32_t) ( a<<1 ) );
}
/*----------------------------------------------------------------------------
| Returns 1 if the single-precision floating-point value `a' is a signaling
| NaN; otherwise returns 0.
*----------------------------------------------------------------------------*/
static inline flag float32_is_signaling_nan( float32 a )
{
return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
}
/*----------------------------------------------------------------------------
| Returns the result of converting the single-precision floating-point NaN
| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
static inline commonNaNT float32ToCommonNaN( float32 a, float_status *status )
{
commonNaNT z;
if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_signaling, status );
z.sign = a>>31;
z.low = 0;
z.high = ( (uint64_t) a )<<41;
return z;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the canonical NaN `a' to the single-
| precision floating-point format.
*----------------------------------------------------------------------------*/
static inline float32 commonNaNToFloat32( commonNaNT a )
{
return ( ( (uint32_t) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 );
}
/*----------------------------------------------------------------------------
| Takes two single-precision floating-point values `a' and `b', one of which
| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
| signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
static inline float32 propagateFloat32NaN( float32 a, float32 b, float_status *status )
{
flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
aIsNaN = float32_is_nan( a );
aIsSignalingNaN = float32_is_signaling_nan( a );
bIsNaN = float32_is_nan( b );
bIsSignalingNaN = float32_is_signaling_nan( b );
a |= 0x00400000;
b |= 0x00400000;
if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_signaling, status );
if ( aIsNaN ) {
return ( aIsSignalingNaN & bIsNaN ) ? b : a;
}
else {
return b;
}
}
/*----------------------------------------------------------------------------
| Returns 1 if the double-precision floating-point value `a' is a NaN;
| otherwise returns 0.
*----------------------------------------------------------------------------*/
static inline flag float64_is_nan( float64 a )
{
return ( LIT64( 0xFFE0000000000000 ) < (uint64_t) ( a<<1 ) );
}
/*----------------------------------------------------------------------------
| Returns 1 if the double-precision floating-point value `a' is a signaling
| NaN; otherwise returns 0.
*----------------------------------------------------------------------------*/
static inline flag float64_is_signaling_nan( float64 a )
{
return
( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
&& ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
}
/*----------------------------------------------------------------------------
| Returns the result of converting the double-precision floating-point NaN
| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
static inline commonNaNT float64ToCommonNaN(float64 a, float_status *status)
{
commonNaNT z;
if (float64_is_signaling_nan(a)) {
float_raise(float_flag_invalid, status);
}
z.sign = float64_val(a) >> 63;
z.low = 0;
z.high = float64_val(a) << 12;
return z;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the canonical NaN `a' to the double-
| precision floating-point format.
*----------------------------------------------------------------------------*/
static inline float64 commonNaNToFloat64(commonNaNT a, float_status *status)
{
return
( ( (uint64_t) a.sign )<<63 )
| LIT64( 0x7FF8000000000000 )
| ( a.high>>12 );
}
/*----------------------------------------------------------------------------
| Returns 1 if the extended double-precision floating-point value `a' is a
| signaling NaN; otherwise returns 0.
*----------------------------------------------------------------------------*/
static inline flag floatx80_is_signaling_nan( floatx80 a )
{
uint64_t aLow;
aLow = a.low & ~ LIT64( 0x4000000000000000 );
return
( ( a.high & 0x7FFF ) == 0x7FFF )
&& (uint64_t) ( aLow<<1 )
&& ( a.low == aLow );
}
/*----------------------------------------------------------------------------
| Returns the result of converting the extended double-precision floating-
| point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the
| invalid exception is raised.
*----------------------------------------------------------------------------*/
static inline commonNaNT floatx80ToCommonNaN( floatx80 a, float_status *status )
{
commonNaNT z;
if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_signaling, status );
z.sign = a.high>>15;
z.low = 0;
z.high = a.low<<1;
return z;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the canonical NaN `a' to the extended
| double-precision floating-point format.
*----------------------------------------------------------------------------*/
static inline floatx80 commonNaNToFloatx80(commonNaNT a, float_status *status)
{
floatx80 z;
#ifdef SOFTFLOAT_68K
z.low = LIT64( 0x4000000000000000 ) | ( a.high>>1 );
#else
z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
#endif
z.high = ( ( (int16_t) a.sign )<<15 ) | 0x7FFF;
return z;
}
/*----------------------------------------------------------------------------
| Takes two extended double-precision floating-point values `a' and `b', one
| of which is a NaN, and returns the appropriate NaN result. If either `a' or
| `b' is a signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
static inline floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b, float_status *status )
{
flag aIsNaN, aIsSignalingNaN, bIsSignalingNaN;
#ifndef SOFTFLOAT_68K
flag bIsNaN;
#endif
aIsNaN = floatx80_is_nan( a );
aIsSignalingNaN = floatx80_is_signaling_nan( a );
bIsSignalingNaN = floatx80_is_signaling_nan( b );
#ifdef SOFTFLOAT_68K
a.low |= LIT64( 0x4000000000000000 );
b.low |= LIT64( 0x4000000000000000 );
if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_signaling, status );
return aIsNaN ? a : b;
#else
bIsNaN = floatx80_is_nan( b );
a.low |= LIT64( 0xC000000000000000 );
b.low |= LIT64( 0xC000000000000000 );
if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_signaling, status );
if ( aIsNaN ) {
return ( aIsSignalingNaN & bIsNaN ) ? b : a;
}
else {
return b;
}
#endif
}
#ifdef SOFTFLOAT_68K
/*----------------------------------------------------------------------------
| Takes extended double-precision floating-point NaN `a' and returns the
| appropriate NaN result. If `a' is a signaling NaN, the invalid exception
| is raised.
*----------------------------------------------------------------------------*/
static inline floatx80 propagateFloatx80NaNOneArg(floatx80 a, float_status *status)
{
if ( floatx80_is_signaling_nan( a ) )
float_raise( float_flag_signaling, status );
a.low |= LIT64( 0x4000000000000000 );
return a;
}
#endif
// 28-12-2016: Added for Previous:
/*----------------------------------------------------------------------------
| Returns 1 if the extended double-precision floating-point value `a' is
| zero; otherwise returns 0.
*----------------------------------------------------------------------------*/
static inline flag floatx80_is_zero( floatx80 a )
{
return ( ( a.high & 0x7FFF ) < 0x7FFF ) && ( a.low == 0 );
}
/*----------------------------------------------------------------------------
| Returns 1 if the extended double-precision floating-point value `a' is
| infinity; otherwise returns 0.
*----------------------------------------------------------------------------*/
static inline flag floatx80_is_infinity( floatx80 a )
{
return ( ( a.high & 0x7FFF ) == 0x7FFF ) && ( (uint64_t) ( a.low<<1 ) == 0 );
}
/*----------------------------------------------------------------------------
| Returns 1 if the extended double-precision floating-point value `a' is
| negative; otherwise returns 0.
*----------------------------------------------------------------------------*/
static inline flag floatx80_is_negative( floatx80 a )
{
return ( ( a.high & 0x8000 ) == 0x8000 );
}
/*----------------------------------------------------------------------------
| Returns 1 if the extended double-precision floating-point value `a' is
| unnormal; otherwise returns 0.
*----------------------------------------------------------------------------*/
static inline flag floatx80_is_unnormal( floatx80 a )
{
return
( ( a.high & 0x7FFF ) > 0 )
&& ( ( a.high & 0x7FFF ) < 0x7FFF)
&& ( (uint64_t) ( a.low & LIT64( 0x8000000000000000 ) ) == LIT64( 0x0000000000000000 ) );
}
/*----------------------------------------------------------------------------
| Returns 1 if the extended double-precision floating-point value `a' is
| denormal; otherwise returns 0.
*----------------------------------------------------------------------------*/
static inline flag floatx80_is_denormal( floatx80 a )
{
return
( ( a.high & 0x7FFF ) == 0 )
&& ( (uint64_t) ( a.low & LIT64( 0x8000000000000000 ) ) == LIT64( 0x0000000000000000 ) )
&& (uint64_t) ( a.low<<1 );
}
/*----------------------------------------------------------------------------
| Returns 1 if the extended double-precision floating-point value `a' is
| normal; otherwise returns 0.
*----------------------------------------------------------------------------*/
static inline flag floatx80_is_normal( floatx80 a )
{
return
( ( a.high & 0x7FFF ) < 0x7FFF )
&& ( (uint64_t) ( a.low & LIT64( 0x8000000000000000 ) ) == LIT64( 0x8000000000000000 ) );
}
// End of addition for Previous

File diff suppressed because it is too large Load diff

View file

@ -1,488 +0,0 @@
#define SOFTFLOAT_68K
/*
* QEMU float support
*
* The code in this source file is derived from release 2a of the SoftFloat
* IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
* some later contributions) are provided under that license, as detailed below.
* It has subsequently been modified by contributors to the QEMU Project,
* so some portions are provided under:
* the SoftFloat-2a license
* the BSD license
* GPL-v2-or-later
*
* Any future contributions to this file after December 1st 2014 will be
* taken to be licensed under the Softfloat-2a license unless specifically
* indicated otherwise.
*/
/*
===============================================================================
This C header file is part of the SoftFloat IEC/IEEE Floating-point
Arithmetic Package, Release 2a.
Written by John R. Hauser. This work was made possible in part by the
International Computer Science Institute, located at Suite 600, 1947 Center
Street, Berkeley, California 94704. Funding was partially provided by the
National Science Foundation under grant MIP-9311980. The original version
of this code was written as part of a project to build a fixed-point vector
processor in collaboration with the University of California at Berkeley,
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
arithmetic/SoftFloat.html'.
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
Derivative works are acceptable, even for commercial purposes, so long as
(1) they include prominent notice that the work is derivative, and (2) they
include prominent notice akin to these four paragraphs for those parts of
this code that are retained.
===============================================================================
*/
/* BSD licensing:
* Copyright (c) 2006, Fabrice Bellard
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Portions of this work are licensed under the terms of the GNU GPL,
* version 2 or later. See the COPYING file in the top-level directory.
*/
#ifndef SOFTFLOAT_H
#define SOFTFLOAT_H
#if defined(CONFIG_SOLARIS) && defined(CONFIG_NEEDS_LIBSUNMATH)
#include <sunmath.h>
#endif
/* This 'flag' type must be able to hold at least 0 and 1. It should
* probably be replaced with 'bool' but the uses would need to be audited
* to check that they weren't accidentally relying on it being a larger type.
*/
typedef uint8_t flag;
#define LIT64( a ) a##ULL
/*----------------------------------------------------------------------------
| Software IEC/IEEE floating-point ordering relations
*----------------------------------------------------------------------------*/
enum {
float_relation_less = -1,
float_relation_equal = 0,
float_relation_greater = 1,
float_relation_unordered = 2
};
/*----------------------------------------------------------------------------
| Software IEC/IEEE floating-point types.
*----------------------------------------------------------------------------*/
/* Use structures for soft-float types. This prevents accidentally mixing
them with native int/float types. A sufficiently clever compiler and
sane ABI should be able to see though these structs. However
x86/gcc 3.x seems to struggle a bit, so leave them disabled by default. */
//#define USE_SOFTFLOAT_STRUCT_TYPES
#ifdef USE_SOFTFLOAT_STRUCT_TYPES
typedef struct {
uint16_t v;
} float16;
#define float16_val(x) (((float16)(x)).v)
#define make_float16(x) __extension__ ({ float16 f16_val = {x}; f16_val; })
#define const_float16(x) { x }
typedef struct {
uint32_t v;
} float32;
/* The cast ensures an error if the wrong type is passed. */
#define float32_val(x) (((float32)(x)).v)
#define make_float32(x) __extension__ ({ float32 f32_val = {x}; f32_val; })
#define const_float32(x) { x }
typedef struct {
uint64_t v;
} float64;
#define float64_val(x) (((float64)(x)).v)
#define make_float64(x) __extension__ ({ float64 f64_val = {x}; f64_val; })
#define const_float64(x) { x }
#else
typedef uint16_t float16;
typedef uint32_t float32;
typedef uint64_t float64;
#define float16_val(x) (x)
#define float32_val(x) (x)
#define float64_val(x) (x)
#define make_float16(x) (x)
#define make_float32(x) (x)
#define make_float64(x) (x)
#define const_float16(x) (x)
#define const_float32(x) (x)
#define const_float64(x) (x)
#endif
typedef struct {
uint16_t high;
uint64_t low;
} floatx80;
typedef struct {
#ifdef HOST_WORDS_BIGENDIAN
uint64_t high, low;
#else
uint64_t low, high;
#endif
} float128;
/*----------------------------------------------------------------------------
| Software IEC/IEEE floating-point underflow tininess-detection mode.
*----------------------------------------------------------------------------*/
enum {
float_tininess_after_rounding = 0,
float_tininess_before_rounding = 1
};
/*----------------------------------------------------------------------------
| Software IEC/IEEE floating-point rounding mode.
*----------------------------------------------------------------------------*/
enum {
float_round_nearest_even = 0,
float_round_down = 1,
float_round_up = 2,
float_round_to_zero = 3,
float_round_ties_away = 4,
};
/*----------------------------------------------------------------------------
| Software IEC/IEEE floating-point exception flags.
*----------------------------------------------------------------------------*/
enum {
float_flag_invalid = 0x01,
float_flag_denormal = 0x02,
float_flag_divbyzero = 0x04,
float_flag_overflow = 0x08,
float_flag_underflow = 0x10,
float_flag_inexact = 0x20,
float_flag_signaling = 0x40,
float_flag_decimal = 0x80
};
/*----------------------------------------------------------------------------
| Variables for storing sign, exponent and significand of overflowed or
| underflowed extended double-precision floating-point value.
| Variables for storing sign, exponent and significand of internal extended
| double-precision floating-point value for external use.
*----------------------------------------------------------------------------*/
extern flag floatx80_internal_sign;
extern int32_t floatx80_internal_exp;
extern uint64_t floatx80_internal_sig;
extern int32_t floatx80_internal_exp0;
extern uint64_t floatx80_internal_sig0;
extern uint64_t floatx80_internal_sig1;
extern int8_t floatx80_internal_precision;
extern int8_t floatx80_internal_mode;
typedef struct float_status {
signed char float_detect_tininess;
signed char float_rounding_mode;
uint8_t float_exception_flags;
signed char floatx80_rounding_precision;
/* should denormalised results go to zero and set the inexact flag? */
flag flush_to_zero;
/* should denormalised inputs go to zero and set the input_denormal flag? */
flag flush_inputs_to_zero;
flag default_nan_mode;
flag snan_bit_is_one;
} float_status;
/*----------------------------------------------------------------------------
| Function for getting sign, exponent and significand of extended
| double-precision floating-point intermediate result for external use.
*----------------------------------------------------------------------------*/
floatx80 getFloatInternalOverflow( void );
floatx80 getFloatInternalUnderflow( void );
floatx80 getFloatInternalRoundedAll( void );
floatx80 getFloatInternalRoundedSome( void );
floatx80 getFloatInternalUnrounded( void );
floatx80 getFloatInternalFloatx80( void );
uint64_t getFloatInternalGRS( void );
static inline void set_float_detect_tininess(int val, float_status *status)
{
status->float_detect_tininess = val;
}
static inline void set_float_rounding_mode(int val, float_status *status)
{
status->float_rounding_mode = val;
}
static inline void set_float_exception_flags(int val, float_status *status)
{
status->float_exception_flags = val;
}
static inline void set_floatx80_rounding_precision(int val,
float_status *status)
{
status->floatx80_rounding_precision = val;
}
static inline void set_flush_to_zero(flag val, float_status *status)
{
status->flush_to_zero = val;
}
static inline void set_flush_inputs_to_zero(flag val, float_status *status)
{
status->flush_inputs_to_zero = val;
}
static inline void set_default_nan_mode(flag val, float_status *status)
{
status->default_nan_mode = val;
}
static inline void set_snan_bit_is_one(flag val, float_status *status)
{
status->snan_bit_is_one = val;
}
static inline int get_float_detect_tininess(float_status *status)
{
return status->float_detect_tininess;
}
static inline int get_float_rounding_mode(float_status *status)
{
return status->float_rounding_mode;
}
static inline int get_float_exception_flags(float_status *status)
{
return status->float_exception_flags;
}
static inline int get_floatx80_rounding_precision(float_status *status)
{
return status->floatx80_rounding_precision;
}
static inline flag get_flush_to_zero(float_status *status)
{
return status->flush_to_zero;
}
static inline flag get_flush_inputs_to_zero(float_status *status)
{
return status->flush_inputs_to_zero;
}
static inline flag get_default_nan_mode(float_status *status)
{
return status->default_nan_mode;
}
/*----------------------------------------------------------------------------
| Routine to raise any or all of the software IEC/IEEE floating-point
| exception flags.
*----------------------------------------------------------------------------*/
//void float_raise(uint8_t flags, float_status *status);
/*----------------------------------------------------------------------------
| The pattern for a default generated single-precision NaN.
*----------------------------------------------------------------------------*/
#define float32_default_nan 0x7FFFFFFF
/*----------------------------------------------------------------------------
| The pattern for a default generated double-precision NaN.
*----------------------------------------------------------------------------*/
#define float64_default_nan LIT64( 0x7FFFFFFFFFFFFFFF )
/*----------------------------------------------------------------------------
| The pattern for a default generated extended double-precision NaN. The
| `high' and `low' values hold the most- and least-significant bits,
| respectively.
*----------------------------------------------------------------------------*/
#define floatx80_default_nan_high 0x7FFF
#define floatx80_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF )
/*----------------------------------------------------------------------------
| The pattern for a default generated extended double-precision infinity.
*----------------------------------------------------------------------------*/
#define floatx80_default_infinity_low LIT64( 0x0000000000000000 )
/*----------------------------------------------------------------------------
| If `a' is denormal and we are in flush-to-zero mode then set the
| input-denormal exception and return zero. Otherwise just return the value.
*----------------------------------------------------------------------------*/
float64 float64_squash_input_denormal(float64 a, float_status *status);
/*----------------------------------------------------------------------------
| Options to indicate which negations to perform in float*_muladd()
| Using these differs from negating an input or output before calling
| the muladd function in that this means that a NaN doesn't have its
| sign bit inverted before it is propagated.
| We also support halving the result before rounding, as a special
| case to support the ARM fused-sqrt-step instruction FRSQRTS.
*----------------------------------------------------------------------------*/
enum {
float_muladd_negate_c = 1,
float_muladd_negate_product = 2,
float_muladd_negate_result = 4,
float_muladd_halve_result = 8,
};
/*----------------------------------------------------------------------------
| Software IEC/IEEE integer-to-floating-point conversion routines.
*----------------------------------------------------------------------------*/
floatx80 int32_to_floatx80(int32_t);
floatx80 int64_to_floatx80(int64_t);
/*----------------------------------------------------------------------------
| Software IEC/IEEE single-precision conversion routines.
*----------------------------------------------------------------------------*/
floatx80 float32_to_floatx80(float32, float_status *status);
floatx80 float32_to_floatx80_allowunnormal(float32, float_status *status);
/*----------------------------------------------------------------------------
| Software IEC/IEEE double-precision conversion routines.
*----------------------------------------------------------------------------*/
floatx80 float64_to_floatx80(float64, float_status *status);
floatx80 float64_to_floatx80_allowunnormal( float64 a, float_status *status );
/*----------------------------------------------------------------------------
| Software IEC/IEEE extended double-precision conversion routines.
*----------------------------------------------------------------------------*/
int32_t floatx80_to_int32(floatx80, float_status *status);
#ifdef SOFTFLOAT_68K
int16_t floatx80_to_int16(floatx80, float_status *status);
int8_t floatx80_to_int8(floatx80, float_status *status);
#endif
int32_t floatx80_to_int32_round_to_zero(floatx80, float_status *status);
int64_t floatx80_to_int64(floatx80, float_status *status);
float32 floatx80_to_float32(floatx80, float_status *status);
float64 floatx80_to_float64(floatx80, float_status *status);
#ifdef SOFTFLOAT_68K
floatx80 floatx80_to_floatx80( floatx80, float_status *status);
floatx80 floatdecimal_to_floatx80(floatx80, float_status *status);
floatx80 floatx80_to_floatdecimal(floatx80, int32_t*, float_status *status);
#endif
uint64_t extractFloatx80Frac( floatx80 a );
int32_t extractFloatx80Exp( floatx80 a );
flag extractFloatx80Sign( floatx80 a );
floatx80 floatx80_round_to_int_toward_zero( floatx80 a, float_status *status);
floatx80 floatx80_round_to_float32( floatx80, float_status *status );
floatx80 floatx80_round_to_float64( floatx80, float_status *status );
floatx80 floatx80_round32( floatx80, float_status *status);
floatx80 floatx80_round64( floatx80, float_status *status);
flag floatx80_eq( floatx80, floatx80, float_status *status);
flag floatx80_le( floatx80, floatx80, float_status *status);
flag floatx80_lt( floatx80, floatx80, float_status *status);
#ifdef SOFTFLOAT_68K
// functions are in softfloat.c
floatx80 floatx80_move( floatx80 a, float_status *status );
floatx80 floatx80_abs( floatx80 a, float_status *status );
floatx80 floatx80_neg( floatx80 a, float_status *status );
floatx80 floatx80_getexp( floatx80 a, float_status *status );
floatx80 floatx80_getman( floatx80 a, float_status *status );
floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status );
floatx80 floatx80_rem( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status );
floatx80 floatx80_mod( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status );
floatx80 floatx80_sglmul( floatx80 a, floatx80 b, float_status *status );
floatx80 floatx80_sgldiv( floatx80 a, floatx80 b, float_status *status );
floatx80 floatx80_cmp( floatx80 a, floatx80 b, float_status *status );
floatx80 floatx80_tst( floatx80 a, float_status *status );
// functions are in softfloat_fpsp.c
floatx80 floatx80_acos(floatx80 a, float_status *status);
floatx80 floatx80_asin(floatx80 a, float_status *status);
floatx80 floatx80_atan(floatx80 a, float_status *status);
floatx80 floatx80_atanh(floatx80 a, float_status *status);
floatx80 floatx80_cos(floatx80 a, float_status *status);
floatx80 floatx80_cosh(floatx80 a, float_status *status);
floatx80 floatx80_etox(floatx80 a, float_status *status);
floatx80 floatx80_etoxm1(floatx80 a, float_status *status);
floatx80 floatx80_log10(floatx80 a, float_status *status);
floatx80 floatx80_log2(floatx80 a, float_status *status);
floatx80 floatx80_logn(floatx80 a, float_status *status);
floatx80 floatx80_lognp1(floatx80 a, float_status *status);
floatx80 floatx80_sin(floatx80 a, float_status *status);
floatx80 floatx80_sinh(floatx80 a, float_status *status);
floatx80 floatx80_tan(floatx80 a, float_status *status);
floatx80 floatx80_tanh(floatx80 a, float_status *status);
floatx80 floatx80_tentox(floatx80 a, float_status *status);
floatx80 floatx80_twotox(floatx80 a, float_status *status);
#endif
// functions originally internal to softfloat.c
void normalizeFloatx80Subnormal( uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr );
floatx80 packFloatx80( flag zSign, int32_t zExp, uint64_t zSig );
floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status);
/*----------------------------------------------------------------------------
| Software IEC/IEEE extended double-precision operations.
*----------------------------------------------------------------------------*/
floatx80 floatx80_round_to_int(floatx80, float_status *status);
floatx80 floatx80_add(floatx80, floatx80, float_status *status);
floatx80 floatx80_sub(floatx80, floatx80, float_status *status);
floatx80 floatx80_mul(floatx80, floatx80, float_status *status);
floatx80 floatx80_div(floatx80, floatx80, float_status *status);
floatx80 floatx80_sqrt(floatx80, float_status *status);
floatx80 floatx80_normalize(floatx80);
floatx80 floatx80_denormalize(floatx80, flag);
static inline int floatx80_is_zero_or_denormal(floatx80 a)
{
return (a.high & 0x7fff) == 0;
}
static inline int floatx80_is_any_nan(floatx80 a)
{
return ((a.high & 0x7fff) == 0x7fff) && (a.low<<1);
}
/*----------------------------------------------------------------------------
| Return whether the given value is an invalid floatx80 encoding.
| Invalid floatx80 encodings arise when the integer bit is not set, but
| the exponent is not zero. The only times the integer bit is permitted to
| be zero is in subnormal numbers and the value zero.
| This includes what the Intel software developer's manual calls pseudo-NaNs,
| pseudo-infinities and un-normal numbers. It does not include
| pseudo-denormals, which must still be correctly handled as inputs even
| if they are never generated as outputs.
*----------------------------------------------------------------------------*/
static inline bool floatx80_invalid_encoding(floatx80 a)
{
return (a.low & (1ULL << 63)) == 0 && (a.high & 0x7FFF) != 0 && (a.high & 0x7FFF) != 0x7FFF;
}
#define floatx80_zero make_floatx80(0x0000, 0x0000000000000000LL)
#define floatx80_one make_floatx80(0x3fff, 0x8000000000000000LL)
#define floatx80_ln2 make_floatx80(0x3ffe, 0xb17217f7d1cf79acLL)
#define floatx80_pi make_floatx80(0x4000, 0xc90fdaa22168c235LL)
#define floatx80_half make_floatx80(0x3ffe, 0x8000000000000000LL)
#define floatx80_infinity make_floatx80(0x7fff, 0x8000000000000000LL)
#endif /* SOFTFLOAT_H */

View file

@ -1,461 +0,0 @@
/*============================================================================
This C source file is an extension to the SoftFloat IEC/IEEE Floating-point
Arithmetic Package, Release 2a.
=============================================================================*/
#include <stdint.h>
#include "sysconfig.h"
#include "sysdeps.h"
#define DECIMAL_LOG 0
#if DECIMAL_LOG
#define decimal_log write_log
#else
#define decimal_log(fmt, ...)
#endif
#include "softfloat.h"
#include "softfloat-macros.h"
#include "softfloat/softfloat-specialize.h"
/*----------------------------------------------------------------------------
| Methods for converting decimal floats to binary extended precision floats.
*----------------------------------------------------------------------------*/
static void round128to64(flag aSign, int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, float_status *status)
{
flag increment;
int32_t zExp;
uint64_t zSig0, zSig1;
zExp = *aExp;
zSig0 = *aSig0;
zSig1 = *aSig1;
increment = ( (int64_t) zSig1 < 0 );
if (status->float_rounding_mode != float_round_nearest_even) {
if (status->float_rounding_mode == float_round_to_zero) {
increment = 0;
} else {
if (aSign) {
increment = (status->float_rounding_mode == float_round_down) && zSig1;
} else {
increment = (status->float_rounding_mode == float_round_up) && zSig1;
}
}
}
if (increment) {
++zSig0;
if (zSig0 == 0) {
++zExp;
zSig0 = LIT64(0x8000000000000000);
} else {
zSig0 &= ~ (((uint64_t) (zSig1<<1) == 0) & (status->float_rounding_mode == float_round_nearest_even));
}
} else {
if ( zSig0 == 0 ) zExp = 0;
}
*aExp = zExp;
*aSig0 = zSig0;
*aSig1 = 0;
}
static void mul128by128round(int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1, float_status *status)
{
int32_t zExp;
uint64_t zSig0, zSig1, zSig2, zSig3;
zExp = *aExp;
zSig0 = *aSig0;
zSig1 = *aSig1;
round128to64(0, &bExp, &bSig0, &bSig1, status);
zExp += bExp - 0x3FFE;
mul128To256(zSig0, zSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3);
zSig1 |= (zSig2 | zSig3) != 0;
if ( 0 < (int64_t) zSig0 ) {
shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
--zExp;
}
*aExp = zExp;
*aSig0 = zSig0;
*aSig1 = zSig1;
round128to64(0, aExp, aSig0, aSig1, status);
}
static void mul128by128(int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1)
{
int32_t zExp;
uint64_t zSig0, zSig1, zSig2, zSig3;
zExp = *aExp;
zSig0 = *aSig0;
zSig1 = *aSig1;
zExp += bExp - 0x3FFE;
mul128To256(zSig0, zSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3);
zSig1 |= (zSig2 | zSig3) != 0;
if ( 0 < (int64_t) zSig0 ) {
shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
--zExp;
}
*aExp = zExp;
*aSig0 = zSig0;
*aSig1 = zSig1;
}
static void div128by128(int32_t *paExp, uint64_t *paSig0, uint64_t *paSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1)
{
int32_t zExp, aExp;
uint64_t zSig0, zSig1, aSig0, aSig1;
uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
aExp = *paExp;
aSig0 = *paSig0;
aSig1 = *paSig1;
zExp = aExp - bExp + 0x3FFE;
if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
++zExp;
}
zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
while ( (int64_t) rem0 < 0 ) {
--zSig0;
add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
}
zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
if ( ( zSig1 & 0x3FFF ) <= 4 ) {
mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
while ( (int64_t) rem1 < 0 ) {
--zSig1;
add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
}
zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
}
*paExp = zExp;
*paSig0 = zSig0;
*paSig1 = zSig1;
}
static void tentoint128(flag mSign, flag eSign, int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t scale, float_status *status)
{
int8_t save_rounding_mode;
int32_t mExp;
uint64_t mSig0, mSig1;
save_rounding_mode = status->float_rounding_mode;
switch (status->float_rounding_mode) {
case float_round_nearest_even:
break;
case float_round_down:
if (mSign != eSign) {
set_float_rounding_mode(float_round_up, status);
}
break;
case float_round_up:
if (mSign != eSign) {
set_float_rounding_mode(float_round_down, status);
}
break;
case float_round_to_zero:
if (eSign == 0) {
set_float_rounding_mode(float_round_down, status);
} else {
set_float_rounding_mode(float_round_up, status);
}
break;
default:
break;
}
*aExp = 0x3FFF;
*aSig0 = LIT64(0x8000000000000000);
*aSig1 = 0;
mExp = 0x4002;
mSig0 = LIT64(0xA000000000000000);
mSig1 = 0;
while (scale) {
if (scale & 1) {
mul128by128round(aExp, aSig0, aSig1, mExp, mSig0, mSig1, status);
}
mul128by128(&mExp, &mSig0, &mSig1, mExp, mSig0, mSig1);
scale >>= 1;
}
set_float_rounding_mode(save_rounding_mode, status);
}
static int64_t tentointdec(int32_t scale)
{
uint64_t decM, decX;
decX = 1;
decM = 10;
while (scale) {
if (scale & 1) {
decX *= decM;
}
decM *= decM;
scale >>= 1;
}
return decX;
}
static int64_t float128toint64(flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status)
{
int8_t roundingMode;
flag roundNearestEven, increment;
int64_t z;
shift128RightJamming(zSig0, zSig1, 0x403E - zExp, &zSig0, &zSig1);
roundingMode = status->float_rounding_mode;
roundNearestEven = (roundingMode == float_round_nearest_even);
increment = ((int64_t)zSig1 < 0);
if (!roundNearestEven) {
if (roundingMode == float_round_to_zero) {
increment = 0;
} else {
if (zSign) {
increment = (roundingMode == float_round_down ) && zSig1;
} else {
increment = (roundingMode == float_round_up ) && zSig1;
}
}
}
if (increment) {
++zSig0;
zSig0 &= ~ (((uint64_t)(zSig1<<1) == 0) & roundNearestEven);
}
z = zSig0;
if (zSig1) float_raise(float_flag_inexact, status);
return z;
}
static int32_t getDecimalExponent(int32_t aExp, uint64_t aSig)
{
flag zSign;
int32_t zExp, shiftCount;
uint64_t zSig0, zSig1;
if (aSig == 0 || aExp == 0x3FFF) {
return 0;
}
if (aExp < 0) {
return -4932;
}
aSig ^= LIT64(0x8000000000000000);
aExp -= 0x3FFF;
zSign = (aExp < 0);
aExp = zSign ? -aExp : aExp;
shiftCount = 31 - countLeadingZeros32(aExp);
zExp = 0x3FFF + shiftCount;
if (shiftCount < 0) {
shortShift128Left(aSig, 0, -shiftCount, &zSig0, &zSig1);
} else {
shift128Right(aSig, 0, shiftCount, &zSig0, &zSig1);
aSig = (uint64_t)aExp << (63 - shiftCount);
if (zSign) {
sub128(aSig, 0, zSig0, zSig1, &zSig0, &zSig1);
} else {
add128(aSig, 0, zSig0, zSig1, &zSig0, &zSig1);
}
}
shiftCount = countLeadingZeros64(zSig0);
shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1);
zExp -= shiftCount;
mul128by128(&zExp, &zSig0, &zSig1, 0x3FFD, LIT64(0x9A209A84FBCFF798), LIT64(0x8F8959AC0B7C9178));
shiftCount = 0x403E - zExp;
shift128RightJamming(zSig0, zSig1, shiftCount, &zSig0, &zSig1);
if ((int64_t)zSig1 < 0) {
++zSig0;
zSig0 &= ~(((int64_t)(zSig1<<1) == 0) & 1);
}
zExp = zSign ? -zSig0 : zSig0;
return zExp;
}
/*----------------------------------------------------------------------------
| Decimal to binary
*----------------------------------------------------------------------------*/
floatx80 floatdecimal_to_floatx80(floatx80 a, float_status *status)
{
flag decSign, zSign, decExpSign;
int32_t decExp, zExp, xExp, shiftCount;
uint64_t decSig, zSig0, zSig1, xSig0, xSig1;
decSign = extractFloatx80Sign(a);
decExp = extractFloatx80Exp(a);
decSig = extractFloatx80Frac(a);
if (decExp == 0x7FFF) return a;
if (decExp == 0 && decSig == 0) return a;
decExpSign = (decExp >> 14) & 1;
decExp &= 0x3FFF;
shiftCount = countLeadingZeros64( decSig );
zExp = 0x403E - shiftCount;
zSig0 = decSig << shiftCount;
zSig1 = 0;
zSign = decSign;
tentoint128(decSign, decExpSign, &xExp, &xSig0, &xSig1, decExp, status);
if (decExpSign) {
div128by128(&zExp, &zSig0, &zSig1, xExp, xSig0, xSig1);
} else {
mul128by128(&zExp, &zSig0, &zSig1, xExp, xSig0, xSig1);
}
if (zSig1) float_raise(float_flag_decimal, status);
round128to64(zSign, &zExp, &zSig0, &zSig1, status);
return packFloatx80( zSign, zExp, zSig0 );
}
/*----------------------------------------------------------------------------
| Binary to decimal
*----------------------------------------------------------------------------*/
floatx80 floatx80_to_floatdecimal(floatx80 a, int32_t *k, float_status *status)
{
flag aSign, decSign;
int32_t aExp, decExp, zExp, xExp;
uint64_t aSig, decSig, decX, zSig0, zSig1, xSig0, xSig1;
flag ictr, lambda;
int32_t kfactor, ilog, iscale, len;
aSign = extractFloatx80Sign(a);
aExp = extractFloatx80Exp(a);
aSig = extractFloatx80Frac(a);
if (aExp == 0x7FFF) {
if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status);
return a;
}
if (aExp == 0) {
if (aSig == 0) return packFloatx80(aSign, 0, 0);
normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
}
kfactor = *k;
ilog = getDecimalExponent(aExp, aSig);
ictr = 0;
try_again:
decimal_log(_T("ILOG = %i\n"), ilog);
if (kfactor > 0) {
if (kfactor > 17) {
kfactor = 17;
float_raise(float_flag_invalid, status);
}
len = kfactor;
} else {
len = ilog + 1 - kfactor;
if (len > 17) {
len = 17;
}
if (len < 1) {
len = 1;
}
if (kfactor > ilog) {
ilog = kfactor;
decimal_log(_T("ILOG is kfactor = %i\n"), ilog);
}
}
decimal_log(_T("LEN = %i\n"),len);
lambda = 0;
iscale = ilog + 1 - len;
if (iscale < 0) {
lambda = 1;
iscale = -iscale;
}
decimal_log(_T("ISCALE = %i, LAMBDA = %i\n"),iscale, lambda);
tentoint128(lambda, 0, &xExp, &xSig0, &xSig1, iscale, status);
decimal_log(_T("AFTER tentoint128: zExp = %04x, zSig0 = %16llx, zSig1 = %16llx\n"), xExp, xSig0, xSig1);
zExp = aExp;
zSig0 = aSig;
zSig1 = 0;
if (lambda) {
mul128by128(&zExp, &zSig0, &zSig1, xExp, xSig0, xSig1);
} else {
div128by128(&zExp, &zSig0, &zSig1, xExp, xSig0, xSig1);
}
decimal_log(_T("BEFORE: zExp = %04x, zSig0 = %16llx, zSig1 = %16llx\n"),zExp,zSig0,zSig1);
decSig = float128toint64(aSign, zExp, zSig0, zSig1, status);
decimal_log(_T("AFTER: decSig = %llu\n"),decSig);
if (ictr == 0) {
decX = tentointdec(len - 1);
if (decSig < decX) { // z < x
ilog -= 1;
ictr = 1;
goto try_again;
}
decX *= 10;
if (decSig > decX) { // z > x
ilog += 1;
ictr = 1;
goto try_again;
}
}
decSign = aSign;
decExp = (ilog < 0) ? -ilog : ilog;
if (decExp > 999) {
float_raise(float_flag_invalid, status);
}
if (ilog < 0) decExp |= 0x4000;
*k = len;
return packFloatx80(decSign, decExp, decSig);
}

File diff suppressed because it is too large Load diff

View file

@ -1,528 +0,0 @@
static const floatx80 atan_tbl[128] = {
{0x3FFB, LIT64(0x83D152C5060B7A51)},
{0x3FFB, LIT64(0x8BC8544565498B8B)},
{0x3FFB, LIT64(0x93BE406017626B0D)},
{0x3FFB, LIT64(0x9BB3078D35AEC202)},
{0x3FFB, LIT64(0xA3A69A525DDCE7DE)},
{0x3FFB, LIT64(0xAB98E94362765619)},
{0x3FFB, LIT64(0xB389E502F9C59862)},
{0x3FFB, LIT64(0xBB797E436B09E6FB)},
{0x3FFB, LIT64(0xC367A5C739E5F446)},
{0x3FFB, LIT64(0xCB544C61CFF7D5C6)},
{0x3FFB, LIT64(0xD33F62F82488533E)},
{0x3FFB, LIT64(0xDB28DA8162404C77)},
{0x3FFB, LIT64(0xE310A4078AD34F18)},
{0x3FFB, LIT64(0xEAF6B0A8188EE1EB)},
{0x3FFB, LIT64(0xF2DAF1949DBE79D5)},
{0x3FFB, LIT64(0xFABD581361D47E3E)},
{0x3FFC, LIT64(0x8346AC210959ECC4)},
{0x3FFC, LIT64(0x8B232A08304282D8)},
{0x3FFC, LIT64(0x92FB70B8D29AE2F9)},
{0x3FFC, LIT64(0x9ACF476F5CCD1CB4)},
{0x3FFC, LIT64(0xA29E76304954F23F)},
{0x3FFC, LIT64(0xAA68C5D08AB85230)},
{0x3FFC, LIT64(0xB22DFFFD9D539F83)},
{0x3FFC, LIT64(0xB9EDEF453E900EA5)},
{0x3FFC, LIT64(0xC1A85F1CC75E3EA5)},
{0x3FFC, LIT64(0xC95D1BE828138DE6)},
{0x3FFC, LIT64(0xD10BF300840D2DE4)},
{0x3FFC, LIT64(0xD8B4B2BA6BC05E7A)},
{0x3FFC, LIT64(0xE0572A6BB42335F6)},
{0x3FFC, LIT64(0xE7F32A70EA9CAA8F)},
{0x3FFC, LIT64(0xEF88843264ECEFAA)},
{0x3FFC, LIT64(0xF7170A28ECC06666)},
{0x3FFD, LIT64(0x812FD288332DAD32)},
{0x3FFD, LIT64(0x88A8D1B1218E4D64)},
{0x3FFD, LIT64(0x9012AB3F23E4AEE8)},
{0x3FFD, LIT64(0x976CC3D411E7F1B9)},
{0x3FFD, LIT64(0x9EB689493889A227)},
{0x3FFD, LIT64(0xA5EF72C34487361B)},
{0x3FFD, LIT64(0xAD1700BAF07A7227)},
{0x3FFD, LIT64(0xB42CBCFAFD37EFB7)},
{0x3FFD, LIT64(0xBB303A940BA80F89)},
{0x3FFD, LIT64(0xC22115C6FCAEBBAF)},
{0x3FFD, LIT64(0xC8FEF3E686331221)},
{0x3FFD, LIT64(0xCFC98330B4000C70)},
{0x3FFD, LIT64(0xD6807AA1102C5BF9)},
{0x3FFD, LIT64(0xDD2399BC31252AA3)},
{0x3FFD, LIT64(0xE3B2A8556B8FC517)},
{0x3FFD, LIT64(0xEA2D764F64315989)},
{0x3FFD, LIT64(0xF3BF5BF8BAD1A21D)},
{0x3FFE, LIT64(0x801CE39E0D205C9A)},
{0x3FFE, LIT64(0x8630A2DADA1ED066)},
{0x3FFE, LIT64(0x8C1AD445F3E09B8C)},
{0x3FFE, LIT64(0x91DB8F1664F350E2)},
{0x3FFE, LIT64(0x97731420365E538C)},
{0x3FFE, LIT64(0x9CE1C8E6A0B8CDBA)},
{0x3FFE, LIT64(0xA22832DBCADAAE09)},
{0x3FFE, LIT64(0xA746F2DDB7602294)},
{0x3FFE, LIT64(0xAC3EC0FB997DD6A2)},
{0x3FFE, LIT64(0xB110688AEBDC6F6A)},
{0x3FFE, LIT64(0xB5BCC49059ECC4B0)},
{0x3FFE, LIT64(0xBA44BC7DD470782F)},
{0x3FFE, LIT64(0xBEA94144FD049AAC)},
{0x3FFE, LIT64(0xC2EB4ABB661628B6)},
{0x3FFE, LIT64(0xC70BD54CE602EE14)},
{0x3FFE, LIT64(0xCD000549ADEC7159)},
{0x3FFE, LIT64(0xD48457D2D8EA4EA3)},
{0x3FFE, LIT64(0xDB948DA712DECE3B)},
{0x3FFE, LIT64(0xE23855F969E8096A)},
{0x3FFE, LIT64(0xE8771129C4353259)},
{0x3FFE, LIT64(0xEE57C16E0D379C0D)},
{0x3FFE, LIT64(0xF3E10211A87C3779)},
{0x3FFE, LIT64(0xF919039D758B8D41)},
{0x3FFE, LIT64(0xFE058B8F64935FB3)},
{0x3FFF, LIT64(0x8155FB497B685D04)},
{0x3FFF, LIT64(0x83889E3549D108E1)},
{0x3FFF, LIT64(0x859CFA76511D724B)},
{0x3FFF, LIT64(0x87952ECFFF8131E7)},
{0x3FFF, LIT64(0x89732FD19557641B)},
{0x3FFF, LIT64(0x8B38CAD101932A35)},
{0x3FFF, LIT64(0x8CE7A8D8301EE6B5)},
{0x3FFF, LIT64(0x8F46A39E2EAE5281)},
{0x3FFF, LIT64(0x922DA7D791888487)},
{0x3FFF, LIT64(0x94D19FCBDEDF5241)},
{0x3FFF, LIT64(0x973AB94419D2A08B)},
{0x3FFF, LIT64(0x996FF00E08E10B96)},
{0x3FFF, LIT64(0x9B773F9512321DA7)},
{0x3FFF, LIT64(0x9D55CC320F935624)},
{0x3FFF, LIT64(0x9F100575006CC571)},
{0x3FFF, LIT64(0xA0A9C290D97CC06C)},
{0x3FFF, LIT64(0xA22659EBEBC0630A)},
{0x3FFF, LIT64(0xA388B4AFF6EF0EC9)},
{0x3FFF, LIT64(0xA4D35F1061D292C4)},
{0x3FFF, LIT64(0xA60895DCFBE3187E)},
{0x3FFF, LIT64(0xA72A51DC7367BEAC)},
{0x3FFF, LIT64(0xA83A51530956168F)},
{0x3FFF, LIT64(0xA93A20077539546E)},
{0x3FFF, LIT64(0xAA9E7245023B2605)},
{0x3FFF, LIT64(0xAC4C84BA6FE4D58F)},
{0x3FFF, LIT64(0xADCE4A4A606B9712)},
{0x3FFF, LIT64(0xAF2A2DCD8D263C9C)},
{0x3FFF, LIT64(0xB0656F81F22265C7)},
{0x3FFF, LIT64(0xB18465150F71496A)},
{0x3FFF, LIT64(0xB28AAA156F9ADA35)},
{0x3FFF, LIT64(0xB37B44FF3766B895)},
{0x3FFF, LIT64(0xB458C3DCE9630433)},
{0x3FFF, LIT64(0xB525529D562246BD)},
{0x3FFF, LIT64(0xB5E2CCA95F9D88CC)},
{0x3FFF, LIT64(0xB692CADA7ACA1ADA)},
{0x3FFF, LIT64(0xB736AEA7A6925838)},
{0x3FFF, LIT64(0xB7CFAB287E9F7B36)},
{0x3FFF, LIT64(0xB85ECC66CB219835)},
{0x3FFF, LIT64(0xB8E4FD5A20A593DA)},
{0x3FFF, LIT64(0xB99F41F64AFF9BB5)},
{0x3FFF, LIT64(0xBA7F1E17842BBE7B)},
{0x3FFF, LIT64(0xBB4712857637E17D)},
{0x3FFF, LIT64(0xBBFABE8A4788DF6F)},
{0x3FFF, LIT64(0xBC9D0FAD2B689D79)},
{0x3FFF, LIT64(0xBD306A39471ECD86)},
{0x3FFF, LIT64(0xBDB6C731856AF18A)},
{0x3FFF, LIT64(0xBE31CAC502E80D70)},
{0x3FFF, LIT64(0xBEA2D55CE33194E2)},
{0x3FFF, LIT64(0xBF0B10B7C03128F0)},
{0x3FFF, LIT64(0xBF6B7A18DACB778D)},
{0x3FFF, LIT64(0xBFC4EA4663FA18F6)},
{0x3FFF, LIT64(0xC0181BDE8B89A454)},
{0x3FFF, LIT64(0xC065B066CFBF6439)},
{0x3FFF, LIT64(0xC0AE345F56340AE6)},
{0x3FFF, LIT64(0xC0F222919CB9E6A7)}
};
static const floatx80 exp_tbl[64] = {
{0x3FFF, LIT64(0x8000000000000000)},
{0x3FFF, LIT64(0x8164D1F3BC030774)},
{0x3FFF, LIT64(0x82CD8698AC2BA1D8)},
{0x3FFF, LIT64(0x843A28C3ACDE4048)},
{0x3FFF, LIT64(0x85AAC367CC487B14)},
{0x3FFF, LIT64(0x871F61969E8D1010)},
{0x3FFF, LIT64(0x88980E8092DA8528)},
{0x3FFF, LIT64(0x8A14D575496EFD9C)},
{0x3FFF, LIT64(0x8B95C1E3EA8BD6E8)},
{0x3FFF, LIT64(0x8D1ADF5B7E5BA9E4)},
{0x3FFF, LIT64(0x8EA4398B45CD53C0)},
{0x3FFF, LIT64(0x9031DC431466B1DC)},
{0x3FFF, LIT64(0x91C3D373AB11C338)},
{0x3FFF, LIT64(0x935A2B2F13E6E92C)},
{0x3FFF, LIT64(0x94F4EFA8FEF70960)},
{0x3FFF, LIT64(0x96942D3720185A00)},
{0x3FFF, LIT64(0x9837F0518DB8A970)},
{0x3FFF, LIT64(0x99E0459320B7FA64)},
{0x3FFF, LIT64(0x9B8D39B9D54E5538)},
{0x3FFF, LIT64(0x9D3ED9A72CFFB750)},
{0x3FFF, LIT64(0x9EF5326091A111AC)},
{0x3FFF, LIT64(0xA0B0510FB9714FC4)},
{0x3FFF, LIT64(0xA27043030C496818)},
{0x3FFF, LIT64(0xA43515AE09E680A0)},
{0x3FFF, LIT64(0xA5FED6A9B15138EC)},
{0x3FFF, LIT64(0xA7CD93B4E9653568)},
{0x3FFF, LIT64(0xA9A15AB4EA7C0EF8)},
{0x3FFF, LIT64(0xAB7A39B5A93ED338)},
{0x3FFF, LIT64(0xAD583EEA42A14AC8)},
{0x3FFF, LIT64(0xAF3B78AD690A4374)},
{0x3FFF, LIT64(0xB123F581D2AC2590)},
{0x3FFF, LIT64(0xB311C412A9112488)},
{0x3FFF, LIT64(0xB504F333F9DE6484)},
{0x3FFF, LIT64(0xB6FD91E328D17790)},
{0x3FFF, LIT64(0xB8FBAF4762FB9EE8)},
{0x3FFF, LIT64(0xBAFF5AB2133E45FC)},
{0x3FFF, LIT64(0xBD08A39F580C36C0)},
{0x3FFF, LIT64(0xBF1799B67A731084)},
{0x3FFF, LIT64(0xC12C4CCA66709458)},
{0x3FFF, LIT64(0xC346CCDA24976408)},
{0x3FFF, LIT64(0xC5672A115506DADC)},
{0x3FFF, LIT64(0xC78D74C8ABB9B15C)},
{0x3FFF, LIT64(0xC9B9BD866E2F27A4)},
{0x3FFF, LIT64(0xCBEC14FEF2727C5C)},
{0x3FFF, LIT64(0xCE248C151F8480E4)},
{0x3FFF, LIT64(0xD06333DAEF2B2594)},
{0x3FFF, LIT64(0xD2A81D91F12AE45C)},
{0x3FFF, LIT64(0xD4F35AABCFEDFA20)},
{0x3FFF, LIT64(0xD744FCCAD69D6AF4)},
{0x3FFF, LIT64(0xD99D15C278AFD7B4)},
{0x3FFF, LIT64(0xDBFBB797DAF23754)},
{0x3FFF, LIT64(0xDE60F4825E0E9124)},
{0x3FFF, LIT64(0xE0CCDEEC2A94E110)},
{0x3FFF, LIT64(0xE33F8972BE8A5A50)},
{0x3FFF, LIT64(0xE5B906E77C8348A8)},
{0x3FFF, LIT64(0xE8396A503C4BDC68)},
{0x3FFF, LIT64(0xEAC0C6E7DD243930)},
{0x3FFF, LIT64(0xED4F301ED9942B84)},
{0x3FFF, LIT64(0xEFE4B99BDCDAF5CC)},
{0x3FFF, LIT64(0xF281773C59FFB138)},
{0x3FFF, LIT64(0xF5257D152486CC2C)},
{0x3FFF, LIT64(0xF7D0DF730AD13BB8)},
{0x3FFF, LIT64(0xFA83B2DB722A033C)},
{0x3FFF, LIT64(0xFD3E0C0CF486C174)}
};
static const float32 exp_tbl2[64] = {
0x00000000, 0x9F841A9B, 0x9FC1D5B9, 0xA0728369,
0x1FC5C95C, 0x1EE85C9F, 0x9FA20729, 0xA07BF9AF,
0xA0020DCF, 0x205A63DA, 0x1EB70051, 0x1F6EB029,
0xA0781494, 0x9EB319B0, 0x2017457D, 0x1F11D537,
0x9FB952DD, 0x1FE43087, 0x1FA2A818, 0x1FDE494D,
0x20504890, 0xA073691C, 0x1F9B7A05, 0xA0797126,
0xA071A140, 0x204F62DA, 0x1F283C4A, 0x9F9A7FDC,
0xA05B3FAC, 0x1FDF2610, 0x9F705F90, 0x201F678A,
0x1F32FB13, 0x20038B30, 0x200DC3CC, 0x9F8B2AE6,
0xA02BBF70, 0xA00BF518, 0xA041DD41, 0x9FDF137B,
0x201F1568, 0x1FC13A2E, 0xA03F8F03, 0x1FF4907D,
0x9E6E53E4, 0x1FD6D45C, 0xA076EDB9, 0x9FA6DE21,
0x1EE69A2F, 0x207F439F, 0x201EC207, 0x9E8BE175,
0x20032C4B, 0x2004DFF5, 0x1E72F47A, 0x1F722F22,
0xA017E945, 0x1F401A5B, 0x9FB9A9E3, 0x20744C05,
0x1F773A19, 0x1FFE90D5, 0xA041ED22, 0x1F853F3A
};
static const floatx80 exp2_tbl[64] = {
{0x3FFF, LIT64(0x8000000000000000)},
{0x3FFF, LIT64(0x8164D1F3BC030773)},
{0x3FFF, LIT64(0x82CD8698AC2BA1D7)},
{0x3FFF, LIT64(0x843A28C3ACDE4046)},
{0x3FFF, LIT64(0x85AAC367CC487B15)},
{0x3FFF, LIT64(0x871F61969E8D1010)},
{0x3FFF, LIT64(0x88980E8092DA8527)},
{0x3FFF, LIT64(0x8A14D575496EFD9A)},
{0x3FFF, LIT64(0x8B95C1E3EA8BD6E7)},
{0x3FFF, LIT64(0x8D1ADF5B7E5BA9E6)},
{0x3FFF, LIT64(0x8EA4398B45CD53C0)},
{0x3FFF, LIT64(0x9031DC431466B1DC)},
{0x3FFF, LIT64(0x91C3D373AB11C336)},
{0x3FFF, LIT64(0x935A2B2F13E6E92C)},
{0x3FFF, LIT64(0x94F4EFA8FEF70961)},
{0x3FFF, LIT64(0x96942D3720185A00)},
{0x3FFF, LIT64(0x9837F0518DB8A96F)},
{0x3FFF, LIT64(0x99E0459320B7FA65)},
{0x3FFF, LIT64(0x9B8D39B9D54E5539)},
{0x3FFF, LIT64(0x9D3ED9A72CFFB751)},
{0x3FFF, LIT64(0x9EF5326091A111AE)},
{0x3FFF, LIT64(0xA0B0510FB9714FC2)},
{0x3FFF, LIT64(0xA27043030C496819)},
{0x3FFF, LIT64(0xA43515AE09E6809E)},
{0x3FFF, LIT64(0xA5FED6A9B15138EA)},
{0x3FFF, LIT64(0xA7CD93B4E965356A)},
{0x3FFF, LIT64(0xA9A15AB4EA7C0EF8)},
{0x3FFF, LIT64(0xAB7A39B5A93ED337)},
{0x3FFF, LIT64(0xAD583EEA42A14AC6)},
{0x3FFF, LIT64(0xAF3B78AD690A4375)},
{0x3FFF, LIT64(0xB123F581D2AC2590)},
{0x3FFF, LIT64(0xB311C412A9112489)},
{0x3FFF, LIT64(0xB504F333F9DE6484)},
{0x3FFF, LIT64(0xB6FD91E328D17791)},
{0x3FFF, LIT64(0xB8FBAF4762FB9EE9)},
{0x3FFF, LIT64(0xBAFF5AB2133E45FB)},
{0x3FFF, LIT64(0xBD08A39F580C36BF)},
{0x3FFF, LIT64(0xBF1799B67A731083)},
{0x3FFF, LIT64(0xC12C4CCA66709456)},
{0x3FFF, LIT64(0xC346CCDA24976407)},
{0x3FFF, LIT64(0xC5672A115506DADD)},
{0x3FFF, LIT64(0xC78D74C8ABB9B15D)},
{0x3FFF, LIT64(0xC9B9BD866E2F27A3)},
{0x3FFF, LIT64(0xCBEC14FEF2727C5D)},
{0x3FFF, LIT64(0xCE248C151F8480E4)},
{0x3FFF, LIT64(0xD06333DAEF2B2595)},
{0x3FFF, LIT64(0xD2A81D91F12AE45A)},
{0x3FFF, LIT64(0xD4F35AABCFEDFA1F)},
{0x3FFF, LIT64(0xD744FCCAD69D6AF4)},
{0x3FFF, LIT64(0xD99D15C278AFD7B6)},
{0x3FFF, LIT64(0xDBFBB797DAF23755)},
{0x3FFF, LIT64(0xDE60F4825E0E9124)},
{0x3FFF, LIT64(0xE0CCDEEC2A94E111)},
{0x3FFF, LIT64(0xE33F8972BE8A5A51)},
{0x3FFF, LIT64(0xE5B906E77C8348A8)},
{0x3FFF, LIT64(0xE8396A503C4BDC68)},
{0x3FFF, LIT64(0xEAC0C6E7DD24392F)},
{0x3FFF, LIT64(0xED4F301ED9942B84)},
{0x3FFF, LIT64(0xEFE4B99BDCDAF5CB)},
{0x3FFF, LIT64(0xF281773C59FFB13A)},
{0x3FFF, LIT64(0xF5257D152486CC2C)},
{0x3FFF, LIT64(0xF7D0DF730AD13BB9)},
{0x3FFF, LIT64(0xFA83B2DB722A033A)},
{0x3FFF, LIT64(0xFD3E0C0CF486C175)}
};
static const float32 exp2_tbl2[64] = {
0x3F738000, 0x3FBEF7CA, 0x3FBDF8A9, 0x3FBCD7C9,
0xBFBDE8DA, 0x3FBDE85C, 0x3FBEBBF1, 0x3FBB80CA,
0xBFBA8373, 0xBFBE9670, 0x3FBDB700, 0x3FBEEEB0,
0x3FBBFD6D, 0xBFBDB319, 0x3FBDBA2B, 0x3FBE91D5,
0x3FBE8D5A, 0xBFBCDE7B, 0xBFBEBAAF, 0xBFBD86DA,
0xBFBEBEDD, 0x3FBCC96E, 0xBFBEC90B, 0x3FBBD1DB,
0x3FBCE5EB, 0xBFBEC274, 0x3FBEA83C, 0x3FBECB00,
0x3FBE9301, 0xBFBD8367, 0xBFBEF05F, 0x3FBDFB3C,
0x3FBEB2FB, 0x3FBAE2CB, 0x3FBCDC3C, 0x3FBEE9AA,
0xBFBEAEFD, 0xBFBCBF51, 0x3FBEF88A, 0x3FBD83B2,
0x3FBDF8AB, 0xBFBDFB17, 0xBFBEFE3C, 0xBFBBB6F8,
0xBFBCEE53, 0xBFBDA4AE, 0x3FBC9124, 0x3FBEB243,
0x3FBDE69A, 0xBFB8BC61, 0x3FBDF610, 0xBFBD8BE1,
0x3FBACB12, 0x3FBB9BFE, 0x3FBCF2F4, 0x3FBEF22F,
0xBFBDBF4A, 0x3FBEC01A, 0x3FBE8CAC, 0xBFBCBB3F,
0x3FBEF73A, 0xBFB8B795, 0x3FBEF84B, 0xBFBEF581
};
static const floatx80 log_tbl[128] = {
{0x3FFE, LIT64(0xFE03F80FE03F80FE)},
{0x3FF7, LIT64(0xFF015358833C47E2)},
{0x3FFE, LIT64(0xFA232CF252138AC0)},
{0x3FF9, LIT64(0xBDC8D83EAD88D549)},
{0x3FFE, LIT64(0xF6603D980F6603DA)},
{0x3FFA, LIT64(0x9CF43DCFF5EAFD48)},
{0x3FFE, LIT64(0xF2B9D6480F2B9D65)},
{0x3FFA, LIT64(0xDA16EB88CB8DF614)},
{0x3FFE, LIT64(0xEF2EB71FC4345238)},
{0x3FFB, LIT64(0x8B29B7751BD70743)},
{0x3FFE, LIT64(0xEBBDB2A5C1619C8C)},
{0x3FFB, LIT64(0xA8D839F830C1FB49)},
{0x3FFE, LIT64(0xE865AC7B7603A197)},
{0x3FFB, LIT64(0xC61A2EB18CD907AD)},
{0x3FFE, LIT64(0xE525982AF70C880E)},
{0x3FFB, LIT64(0xE2F2A47ADE3A18AF)},
{0x3FFE, LIT64(0xE1FC780E1FC780E2)},
{0x3FFB, LIT64(0xFF64898EDF55D551)},
{0x3FFE, LIT64(0xDEE95C4CA037BA57)},
{0x3FFC, LIT64(0x8DB956A97B3D0148)},
{0x3FFE, LIT64(0xDBEB61EED19C5958)},
{0x3FFC, LIT64(0x9B8FE100F47BA1DE)},
{0x3FFE, LIT64(0xD901B2036406C80E)},
{0x3FFC, LIT64(0xA9372F1D0DA1BD17)},
{0x3FFE, LIT64(0xD62B80D62B80D62C)},
{0x3FFC, LIT64(0xB6B07F38CE90E46B)},
{0x3FFE, LIT64(0xD3680D3680D3680D)},
{0x3FFC, LIT64(0xC3FD032906488481)},
{0x3FFE, LIT64(0xD0B69FCBD2580D0B)},
{0x3FFC, LIT64(0xD11DE0FF15AB18CA)},
{0x3FFE, LIT64(0xCE168A7725080CE1)},
{0x3FFC, LIT64(0xDE1433A16C66B150)},
{0x3FFE, LIT64(0xCB8727C065C393E0)},
{0x3FFC, LIT64(0xEAE10B5A7DDC8ADD)},
{0x3FFE, LIT64(0xC907DA4E871146AD)},
{0x3FFC, LIT64(0xF7856E5EE2C9B291)},
{0x3FFE, LIT64(0xC6980C6980C6980C)},
{0x3FFD, LIT64(0x82012CA5A68206D7)},
{0x3FFE, LIT64(0xC4372F855D824CA6)},
{0x3FFD, LIT64(0x882C5FCD7256A8C5)},
{0x3FFE, LIT64(0xC1E4BBD595F6E947)},
{0x3FFD, LIT64(0x8E44C60B4CCFD7DE)},
{0x3FFE, LIT64(0xBFA02FE80BFA02FF)},
{0x3FFD, LIT64(0x944AD09EF4351AF6)},
{0x3FFE, LIT64(0xBD69104707661AA3)},
{0x3FFD, LIT64(0x9A3EECD4C3EAA6B2)},
{0x3FFE, LIT64(0xBB3EE721A54D880C)},
{0x3FFD, LIT64(0xA0218434353F1DE8)},
{0x3FFE, LIT64(0xB92143FA36F5E02E)},
{0x3FFD, LIT64(0xA5F2FCABBBC506DA)},
{0x3FFE, LIT64(0xB70FBB5A19BE3659)},
{0x3FFD, LIT64(0xABB3B8BA2AD362A5)},
{0x3FFE, LIT64(0xB509E68A9B94821F)},
{0x3FFD, LIT64(0xB1641795CE3CA97B)},
{0x3FFE, LIT64(0xB30F63528917C80B)},
{0x3FFD, LIT64(0xB70475515D0F1C61)},
{0x3FFE, LIT64(0xB11FD3B80B11FD3C)},
{0x3FFD, LIT64(0xBC952AFEEA3D13E1)},
{0x3FFE, LIT64(0xAF3ADDC680AF3ADE)},
{0x3FFD, LIT64(0xC2168ED0F458BA4A)},
{0x3FFE, LIT64(0xAD602B580AD602B6)},
{0x3FFD, LIT64(0xC788F439B3163BF1)},
{0x3FFE, LIT64(0xAB8F69E28359CD11)},
{0x3FFD, LIT64(0xCCECAC08BF04565D)},
{0x3FFE, LIT64(0xA9C84A47A07F5638)},
{0x3FFD, LIT64(0xD24204872DD85160)},
{0x3FFE, LIT64(0xA80A80A80A80A80B)},
{0x3FFD, LIT64(0xD78949923BC3588A)},
{0x3FFE, LIT64(0xA655C4392D7B73A8)},
{0x3FFD, LIT64(0xDCC2C4B49887DACC)},
{0x3FFE, LIT64(0xA4A9CF1D96833751)},
{0x3FFD, LIT64(0xE1EEBD3E6D6A6B9E)},
{0x3FFE, LIT64(0xA3065E3FAE7CD0E0)},
{0x3FFD, LIT64(0xE70D785C2F9F5BDC)},
{0x3FFE, LIT64(0xA16B312EA8FC377D)},
{0x3FFD, LIT64(0xEC1F392C5179F283)},
{0x3FFE, LIT64(0x9FD809FD809FD80A)},
{0x3FFD, LIT64(0xF12440D3E36130E6)},
{0x3FFE, LIT64(0x9E4CAD23DD5F3A20)},
{0x3FFD, LIT64(0xF61CCE92346600BB)},
{0x3FFE, LIT64(0x9CC8E160C3FB19B9)},
{0x3FFD, LIT64(0xFB091FD38145630A)},
{0x3FFE, LIT64(0x9B4C6F9EF03A3CAA)},
{0x3FFD, LIT64(0xFFE97042BFA4C2AD)},
{0x3FFE, LIT64(0x99D722DABDE58F06)},
{0x3FFE, LIT64(0x825EFCED49369330)},
{0x3FFE, LIT64(0x9868C809868C8098)},
{0x3FFE, LIT64(0x84C37A7AB9A905C9)},
{0x3FFE, LIT64(0x97012E025C04B809)},
{0x3FFE, LIT64(0x87224C2E8E645FB7)},
{0x3FFE, LIT64(0x95A02568095A0257)},
{0x3FFE, LIT64(0x897B8CAC9F7DE298)},
{0x3FFE, LIT64(0x9445809445809446)},
{0x3FFE, LIT64(0x8BCF55DEC4CD05FE)},
{0x3FFE, LIT64(0x92F113840497889C)},
{0x3FFE, LIT64(0x8E1DC0FB89E125E5)},
{0x3FFE, LIT64(0x91A2B3C4D5E6F809)},
{0x3FFE, LIT64(0x9066E68C955B6C9B)},
{0x3FFE, LIT64(0x905A38633E06C43B)},
{0x3FFE, LIT64(0x92AADE74C7BE59E0)},
{0x3FFE, LIT64(0x8F1779D9FDC3A219)},
{0x3FFE, LIT64(0x94E9BFF615845643)},
{0x3FFE, LIT64(0x8DDA520237694809)},
{0x3FFE, LIT64(0x9723A1B720134203)},
{0x3FFE, LIT64(0x8CA29C046514E023)},
{0x3FFE, LIT64(0x995899C890EB8990)},
{0x3FFE, LIT64(0x8B70344A139BC75A)},
{0x3FFE, LIT64(0x9B88BDAA3A3DAE2F)},
{0x3FFE, LIT64(0x8A42F8705669DB46)},
{0x3FFE, LIT64(0x9DB4224FFFE1157C)},
{0x3FFE, LIT64(0x891AC73AE9819B50)},
{0x3FFE, LIT64(0x9FDADC268B7A12DA)},
{0x3FFE, LIT64(0x87F78087F78087F8)},
{0x3FFE, LIT64(0xA1FCFF17CE733BD4)},
{0x3FFE, LIT64(0x86D905447A34ACC6)},
{0x3FFE, LIT64(0xA41A9E8F5446FB9F)},
{0x3FFE, LIT64(0x85BF37612CEE3C9B)},
{0x3FFE, LIT64(0xA633CD7E6771CD8B)},
{0x3FFE, LIT64(0x84A9F9C8084A9F9D)},
{0x3FFE, LIT64(0xA8489E600B435A5E)},
{0x3FFE, LIT64(0x839930523FBE3368)},
{0x3FFE, LIT64(0xAA59233CCCA4BD49)},
{0x3FFE, LIT64(0x828CBFBEB9A020A3)},
{0x3FFE, LIT64(0xAC656DAE6BCC4985)},
{0x3FFE, LIT64(0x81848DA8FAF0D277)},
{0x3FFE, LIT64(0xAE6D8EE360BB2468)},
{0x3FFE, LIT64(0x8080808080808081)},
{0x3FFE, LIT64(0xB07197A23C46C654)}
};
static const floatx80 pi_tbl[65] = {
{0xC004, LIT64(0xC90FDAA22168C235)},
{0xC004, LIT64(0xC2C75BCD105D7C23)},
{0xC004, LIT64(0xBC7EDCF7FF523611)},
{0xC004, LIT64(0xB6365E22EE46F000)},
{0xC004, LIT64(0xAFEDDF4DDD3BA9EE)},
{0xC004, LIT64(0xA9A56078CC3063DD)},
{0xC004, LIT64(0xA35CE1A3BB251DCB)},
{0xC004, LIT64(0x9D1462CEAA19D7B9)},
{0xC004, LIT64(0x96CBE3F9990E91A8)},
{0xC004, LIT64(0x9083652488034B96)},
{0xC004, LIT64(0x8A3AE64F76F80584)},
{0xC004, LIT64(0x83F2677A65ECBF73)},
{0xC003, LIT64(0xFB53D14AA9C2F2C2)},
{0xC003, LIT64(0xEEC2D3A087AC669F)},
{0xC003, LIT64(0xE231D5F66595DA7B)},
{0xC003, LIT64(0xD5A0D84C437F4E58)},
{0xC003, LIT64(0xC90FDAA22168C235)},
{0xC003, LIT64(0xBC7EDCF7FF523611)},
{0xC003, LIT64(0xAFEDDF4DDD3BA9EE)},
{0xC003, LIT64(0xA35CE1A3BB251DCB)},
{0xC003, LIT64(0x96CBE3F9990E91A8)},
{0xC003, LIT64(0x8A3AE64F76F80584)},
{0xC002, LIT64(0xFB53D14AA9C2F2C2)},
{0xC002, LIT64(0xE231D5F66595DA7B)},
{0xC002, LIT64(0xC90FDAA22168C235)},
{0xC002, LIT64(0xAFEDDF4DDD3BA9EE)},
{0xC002, LIT64(0x96CBE3F9990E91A8)},
{0xC001, LIT64(0xFB53D14AA9C2F2C2)},
{0xC001, LIT64(0xC90FDAA22168C235)},
{0xC001, LIT64(0x96CBE3F9990E91A8)},
{0xC000, LIT64(0xC90FDAA22168C235)},
{0xBFFF, LIT64(0xC90FDAA22168C235)},
{0x0000, LIT64(0x0000000000000000)},
{0x3FFF, LIT64(0xC90FDAA22168C235)},
{0x4000, LIT64(0xC90FDAA22168C235)},
{0x4001, LIT64(0x96CBE3F9990E91A8)},
{0x4001, LIT64(0xC90FDAA22168C235)},
{0x4001, LIT64(0xFB53D14AA9C2F2C2)},
{0x4002, LIT64(0x96CBE3F9990E91A8)},
{0x4002, LIT64(0xAFEDDF4DDD3BA9EE)},
{0x4002, LIT64(0xC90FDAA22168C235)},
{0x4002, LIT64(0xE231D5F66595DA7B)},
{0x4002, LIT64(0xFB53D14AA9C2F2C2)},
{0x4003, LIT64(0x8A3AE64F76F80584)},
{0x4003, LIT64(0x96CBE3F9990E91A8)},
{0x4003, LIT64(0xA35CE1A3BB251DCB)},
{0x4003, LIT64(0xAFEDDF4DDD3BA9EE)},
{0x4003, LIT64(0xBC7EDCF7FF523611)},
{0x4003, LIT64(0xC90FDAA22168C235)},
{0x4003, LIT64(0xD5A0D84C437F4E58)},
{0x4003, LIT64(0xE231D5F66595DA7B)},
{0x4003, LIT64(0xEEC2D3A087AC669F)},
{0x4003, LIT64(0xFB53D14AA9C2F2C2)},
{0x4004, LIT64(0x83F2677A65ECBF73)},
{0x4004, LIT64(0x8A3AE64F76F80584)},
{0x4004, LIT64(0x9083652488034B96)},
{0x4004, LIT64(0x96CBE3F9990E91A8)},
{0x4004, LIT64(0x9D1462CEAA19D7B9)},
{0x4004, LIT64(0xA35CE1A3BB251DCB)},
{0x4004, LIT64(0xA9A56078CC3063DD)},
{0x4004, LIT64(0xAFEDDF4DDD3BA9EE)},
{0x4004, LIT64(0xB6365E22EE46F000)},
{0x4004, LIT64(0xBC7EDCF7FF523611)},
{0x4004, LIT64(0xC2C75BCD105D7C23)},
{0x4004, LIT64(0xC90FDAA22168C235)}
};
static const float32 pi_tbl2[65] = {
0x21800000, 0xA0D00000, 0xA1E80000, 0x21480000,
0xA1200000, 0x21FC0000, 0x21100000, 0xA1580000,
0x21E00000, 0x20B00000, 0xA1880000, 0x21C40000,
0x20000000, 0x21380000, 0xA1300000, 0x9FC00000,
0x21000000, 0xA1680000, 0xA0A00000, 0x20900000,
0x21600000, 0xA1080000, 0x1F800000, 0xA0B00000,
0x20800000, 0xA0200000, 0x20E00000, 0x1F000000,
0x20000000, 0x20600000, 0x1F800000, 0x1F000000,
0x00000000,
0x9F000000, 0x9F800000, 0xA0600000, 0xA0000000,
0x9F000000, 0xA0E00000, 0x20200000, 0xA0800000,
0x20B00000, 0x9F800000, 0x21080000, 0xA1600000,
0xA0900000, 0x20A00000, 0x21680000, 0xA1000000,
0x1FC00000, 0x21300000, 0xA1380000, 0xA0000000,
0xA1C40000, 0x21880000, 0xA0B00000, 0xA1E00000,
0x21580000, 0xA1100000, 0xA1FC0000, 0x21200000,
0xA1480000, 0x21E80000, 0x20D00000, 0xA1800000
};