Added JIT FPU, removed Softfloat option (too slow to be useful) - Thanks TomB!

2018-01-30 01:08:23 +01:00 · 2018-01-30 01:08:23 +01:00 · a8815b211e
commit a8815b211e
parent f24301e8dd
55 changed files with 3579 additions and 10940 deletions
--- a/5
+++ b/5
@ -254,11 +254,6 @@ OBJS =	\
 	src/filesys.o \
 	src/flashrom.o \
 	src/fpp.o \
-	src/fpp_native.o \
-	src/fpp_softfloat.o \
-	src/softfloat/softfloat.o \
-	src/softfloat/softfloat_decimal.o \
-	src/softfloat/softfloat_fpsp.o \
 	src/fsdb.o \
 	src/fsdb_unix.o \
 	src/fsusage.o \
--- a/VisualGDB/Amiberry-sdl1/Amiberry-sdl1.vcxproj
+++ b/VisualGDB/Amiberry-sdl1/Amiberry-sdl1.vcxproj
@ -192,10 +192,6 @@
    <ClInclude Include="..\..\src\osdep\picasso96.h" />
    <ClInclude Include="..\..\src\osdep\sysconfig.h" />
    <ClInclude Include="..\..\src\osdep\target.h" />
-    <ClInclude Include="..\..\src\softfloat\softfloat-macros.h" />
-    <ClInclude Include="..\..\src\softfloat\softfloat-specialize.h" />
-    <ClInclude Include="..\..\src\softfloat\softfloat.h" />
-    <ClInclude Include="..\..\src\softfloat\softfloat_fpsp_tables.h" />
    <ClInclude Include="..\..\src\sounddep\sound.h" />
    <ClInclude Include="..\..\src\threaddep\thread.h" />
  </ItemGroup>
@ -295,7 +291,6 @@
    <ClCompile Include="..\..\src\flashrom.cpp" />
    <ClCompile Include="..\..\src\fpp.cpp" />
    <ClCompile Include="..\..\src\fpp_native.cpp" />
-    <ClCompile Include="..\..\src\fpp_softfloat.cpp" />
    <ClCompile Include="..\..\src\fsdb.cpp" />
    <ClCompile Include="..\..\src\fsdb_unix.cpp" />
    <ClCompile Include="..\..\src\fsusage.cpp" />
@ -372,9 +367,6 @@
    <ClCompile Include="..\..\src\rtc.cpp" />
    <ClCompile Include="..\..\src\savestate.cpp" />
    <ClCompile Include="..\..\src\scsi.cpp" />
-    <ClCompile Include="..\..\src\softfloat\softfloat.cpp" />
-    <ClCompile Include="..\..\src\softfloat\softfloat_decimal.cpp" />
-    <ClCompile Include="..\..\src\softfloat\softfloat_fpsp.cpp" />
    <ClCompile Include="..\..\src\sounddep\sound.cpp" />
    <ClCompile Include="..\..\src\statusline.cpp" />
    <ClCompile Include="..\..\src\traps.cpp" />
--- a/VisualGDB/Amiberry-sdl1/Amiberry-sdl1.vcxproj.filters
+++ b/VisualGDB/Amiberry-sdl1/Amiberry-sdl1.vcxproj.filters
@ -54,9 +54,6 @@
    <Filter Include="Source files\osdep\gui">
      <UniqueIdentifier>{d946fd2c-30b2-45d3-9999-ccc3749160b7}</UniqueIdentifier>
    </Filter>
-    <Filter Include="Source files\softfloat">
-      <UniqueIdentifier>{628a02d1-51f3-4021-81e5-6103ddf96904}</UniqueIdentifier>
-    </Filter>
    <Filter Include="Source files\sounddep">
      <UniqueIdentifier>{49dfa14b-d5bf-4aa3-a660-12f97ae62bdb}</UniqueIdentifier>
    </Filter>
@ -188,9 +185,6 @@
    <ClCompile Include="..\..\src\fpp_native.cpp">
      <Filter>Source files</Filter>
    </ClCompile>
-    <ClCompile Include="..\..\src\fpp_softfloat.cpp">
-      <Filter>Source files</Filter>
-    </ClCompile>
    <ClCompile Include="..\..\src\fsdb.cpp">
      <Filter>Source files</Filter>
    </ClCompile>
@ -658,18 +652,6 @@
    <ClInclude Include="..\..\src\osdep\gui\UaeRadioButton.hpp">
      <Filter>Source files\osdep\gui</Filter>
    </ClInclude>
-    <ClInclude Include="..\..\src\softfloat\softfloat.h">
-      <Filter>Source files\softfloat</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\src\softfloat\softfloat_fpsp_tables.h">
-      <Filter>Source files\softfloat</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\src\softfloat\softfloat-macros.h">
-      <Filter>Source files\softfloat</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\src\softfloat\softfloat-specialize.h">
-      <Filter>Source files\softfloat</Filter>
-    </ClInclude>
    <ClInclude Include="..\..\src\sounddep\sound.h">
      <Filter>Source files\sounddep</Filter>
    </ClInclude>
@ -1007,15 +989,6 @@
    <ClCompile Include="..\..\src\osdep\gui\UaeRadioButton.cpp">
      <Filter>Source files\osdep\gui</Filter>
    </ClCompile>
-    <ClCompile Include="..\..\src\softfloat\softfloat.cpp">
-      <Filter>Source files\softfloat</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\src\softfloat\softfloat_decimal.cpp">
-      <Filter>Source files\softfloat</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\src\softfloat\softfloat_fpsp.cpp">
-      <Filter>Source files\softfloat</Filter>
-    </ClCompile>
    <ClCompile Include="..\..\src\sounddep\sound.cpp">
      <Filter>Source files\sounddep</Filter>
    </ClCompile>
--- a/VisualGDB/Amiberry/Amiberry-Debug-dispmanx.vgdbsettings
+++ b/VisualGDB/Amiberry/Amiberry-Debug-dispmanx.vgdbsettings
@ -58,7 +58,6 @@
    <ProjectFile>Amiberry.vcxproj</ProjectFile>
    <RemoteBuildEnvironment>
      <Records />
-      <EnvironmentSetupFiles />
    </RemoteBuildEnvironment>
    <ParallelJobCount>1</ParallelJobCount>
    <SuppressDirectoryChangeMessages>true</SuppressDirectoryChangeMessages>
--- a/VisualGDB/Amiberry/Amiberry-Debug.vgdbsettings.midwan.user
+++ b/VisualGDB/Amiberry/Amiberry-Debug.vgdbsettings.midwan.user
@ -11,6 +11,7 @@
      <Vfork>false</Vfork>
      <Syscalls />
    </CatchpointConfiguration>
+    <LiveWatches />
    <MaskInterruptsWhileStepping>false</MaskInterruptsWhileStepping>
    <MemoryWindowPreferences />
  </DebugPreferences>
--- a/VisualGDB/Amiberry/Amiberry-Release.vgdbsettings
+++ b/VisualGDB/Amiberry/Amiberry-Release.vgdbsettings
@ -35,6 +35,7 @@
      </FileMasks>
      <TransferNewFilesOnly>true</TransferNewFilesOnly>
      <IncludeSubdirectories>true</IncludeSubdirectories>
+      <SelectedDirectories />
      <DeleteDisappearedFiles>false</DeleteDisappearedFiles>
      <ApplyGlobalExclusionList>true</ApplyGlobalExclusionList>
    </MainSourceTransferCommand>
@ -57,7 +58,6 @@
    <ProjectFile>Amiberry.vcxproj</ProjectFile>
    <RemoteBuildEnvironment>
      <Records />
-      <EnvironmentSetupFiles />
    </RemoteBuildEnvironment>
    <ParallelJobCount>1</ParallelJobCount>
    <SuppressDirectoryChangeMessages>true</SuppressDirectoryChangeMessages>
--- a/VisualGDB/Amiberry/Amiberry.vcxproj
+++ b/VisualGDB/Amiberry/Amiberry.vcxproj
@ -67,7 +67,7 @@
    <Link>
      <AdditionalLinkerInputs>;%(Link.AdditionalLinkerInputs)</AdditionalLinkerInputs>
      <LibrarySearchDirectories>=/usr/local/lib;../../src/guisan/lib;%(Link.LibrarySearchDirectories)</LibrarySearchDirectories>
-      <AdditionalLibraryNames>SDL2;pthread;z;png;rt;xml2;FLAC;mpg123;dl;mpeg2convert;mpeg2;SDL2_image;SDL2_ttf;guisan;m;%(Link.AdditionalLibraryNames)</AdditionalLibraryNames>
+      <AdditionalLibraryNames>SDL2;pthread;z;png;rt;xml2;FLAC;mpg123;dl;mpeg2convert;mpeg2;SDL2_image;SDL2_ttf;guisan;%(Link.AdditionalLibraryNames)</AdditionalLibraryNames>
      <LinkerScript />
    </Link>
  </ItemDefinitionGroup>
@ -90,7 +90,7 @@
    <ClCompile>
      <CPPLanguageStandard>GNUPP14</CPPLanguageStandard>
      <AdditionalIncludeDirectories>=/usr/local/include/SDL2;=/usr/include/libxml2;../../src;../../src/osdep;../../src/threaddep;../../src/include;../../src/guisan/include;../../src/archivers;%(ClCompile.AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>NDEBUG=1;RELEASE=1;ARMV6T2;USE_ARMNEON;_REENTRANT;AMIBERRY;CPU_arm;ARMV6_ASSEMBLY;USE_SDL2;%(ClCompile.PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>NDEBUG=1;RELEASE=1;ARMV6T2;USE_ARMNEON;_REENTRANT;AMIBERRY;CPU_arm;ARMV6_ASSEMBLY;USE_SDL2;USE_RENDER_THREAD;%(ClCompile.PreprocessorDefinitions)</PreprocessorDefinitions>
      <AdditionalOptions>-march=armv8-a -mfpu=neon-fp-armv8 -mfloat-abi=hard %(AdditionalOptions)</AdditionalOptions>
    </ClCompile>
    <Link>
@ -207,8 +207,6 @@
    <ClCompile Include="..\..\src\filesys.cpp" />
    <ClCompile Include="..\..\src\flashrom.cpp" />
    <ClCompile Include="..\..\src\fpp.cpp" />
-    <ClCompile Include="..\..\src\fpp_native.cpp" />
-    <ClCompile Include="..\..\src\fpp_softfloat.cpp" />
    <ClCompile Include="..\..\src\fsdb.cpp" />
    <ClCompile Include="..\..\src\fsdb_unix.cpp" />
    <ClCompile Include="..\..\src\fsusage.cpp" />
@ -284,9 +282,6 @@
    <ClCompile Include="..\..\src\rtc.cpp" />
    <ClCompile Include="..\..\src\savestate.cpp" />
    <ClCompile Include="..\..\src\scsi.cpp" />
-    <ClCompile Include="..\..\src\softfloat\softfloat.cpp" />
-    <ClCompile Include="..\..\src\softfloat\softfloat_decimal.cpp" />
-    <ClCompile Include="..\..\src\softfloat\softfloat_fpsp.cpp" />
    <ClCompile Include="..\..\src\sounddep\sound.cpp" />
    <ClCompile Include="..\..\src\statusline.cpp" />
    <ClCompile Include="..\..\src\traps.cpp" />
@ -434,10 +429,6 @@
    <ClInclude Include="..\..\src\osdep\picasso96.h" />
    <ClInclude Include="..\..\src\osdep\sysconfig.h" />
    <ClInclude Include="..\..\src\osdep\target.h" />
-    <ClInclude Include="..\..\src\softfloat\softfloat-macros.h" />
-    <ClInclude Include="..\..\src\softfloat\softfloat-specialize.h" />
-    <ClInclude Include="..\..\src\softfloat\softfloat.h" />
-    <ClInclude Include="..\..\src\softfloat\softfloat_fpsp_tables.h" />
    <ClInclude Include="..\..\src\sounddep\sound.h" />
    <ClInclude Include="..\..\src\threaddep\thread.h" />
  </ItemGroup>
--- a/VisualGDB/Amiberry/Amiberry.vcxproj.filters
+++ b/VisualGDB/Amiberry/Amiberry.vcxproj.filters
@ -18,9 +18,6 @@
    <Filter Include="Source files\sounddep">
      <UniqueIdentifier>{406f7c18-2b0e-4564-8646-fdaef3089f65}</UniqueIdentifier>
    </Filter>
-    <Filter Include="Source files\softfloat">
-      <UniqueIdentifier>{6e21b349-366f-4684-bb77-ead2ccf9c8f4}</UniqueIdentifier>
-    </Filter>
    <Filter Include="Source files\osdep">
      <UniqueIdentifier>{29512242-0e9f-4bfa-b302-f46f792e55cd}</UniqueIdentifier>
    </Filter>
@ -196,12 +193,6 @@
    <ClCompile Include="..\..\src\fpp.cpp">
      <Filter>Source files</Filter>
    </ClCompile>
-    <ClCompile Include="..\..\src\fpp_native.cpp">
-      <Filter>Source files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\src\fpp_softfloat.cpp">
-      <Filter>Source files</Filter>
-    </ClCompile>
    <ClCompile Include="..\..\src\fsdb.cpp">
      <Filter>Source files</Filter>
    </ClCompile>
@ -283,15 +274,6 @@
    <ClCompile Include="..\..\src\sounddep\sound.cpp">
      <Filter>Source files\sounddep</Filter>
    </ClCompile>
-    <ClCompile Include="..\..\src\softfloat\softfloat.cpp">
-      <Filter>Source files\softfloat</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\src\softfloat\softfloat_decimal.cpp">
-      <Filter>Source files\softfloat</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\src\softfloat\softfloat_fpsp.cpp">
-      <Filter>Source files\softfloat</Filter>
-    </ClCompile>
    <ClCompile Include="..\..\src\osdep\amiberry.cpp">
      <Filter>Source files\osdep</Filter>
    </ClCompile>
@ -633,18 +615,6 @@
    <ClInclude Include="..\..\src\sounddep\sound.h">
      <Filter>Source files\sounddep</Filter>
    </ClInclude>
-    <ClInclude Include="..\..\src\softfloat\softfloat.h">
-      <Filter>Source files\softfloat</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\src\softfloat\softfloat_fpsp_tables.h">
-      <Filter>Source files\softfloat</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\src\softfloat\softfloat-macros.h">
-      <Filter>Source files\softfloat</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\src\softfloat\softfloat-specialize.h">
-      <Filter>Source files\softfloat</Filter>
-    </ClInclude>
    <ClInclude Include="..\..\src\osdep\amiberry_gfx.h">
      <Filter>Source files\osdep</Filter>
    </ClInclude>
--- a/VisualGDB/genlinetoscr/genlinetoscr-Debug.vgdbsettings
+++ b/VisualGDB/genlinetoscr/genlinetoscr-Debug.vgdbsettings
@ -60,7 +60,6 @@
          <Value>C:\SysGCC\raspberry\bin;%PATH%</Value>
        </Record>
      </Records>
-      <EnvironmentSetupFiles />
    </RemoteBuildEnvironment>
    <ParallelJobCount>1</ParallelJobCount>
    <SuppressDirectoryChangeMessages>true</SuppressDirectoryChangeMessages>
--- a/VisualGDB/genlinetoscr/genlinetoscr-Release.vgdbsettings
+++ b/VisualGDB/genlinetoscr/genlinetoscr-Release.vgdbsettings
@ -60,7 +60,6 @@
          <Value>C:\SysGCC\raspberry\bin;%PATH%</Value>
        </Record>
      </Records>
-      <EnvironmentSetupFiles />
    </RemoteBuildEnvironment>
    <ParallelJobCount>1</ParallelJobCount>
    <SuppressDirectoryChangeMessages>true</SuppressDirectoryChangeMessages>
--- a/VisualGDB/genlinetoscr/genlinetoscr.vcxproj
+++ b/VisualGDB/genlinetoscr/genlinetoscr.vcxproj
@ -35,26 +35,26 @@
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|VisualGDB'">
    <ClCompile>
      <CPPLanguageStandard>GNUPP14</CPPLanguageStandard>
-      <AdditionalIncludeDirectories>C:\SysGCC\raspberry\lib\gcc\arm-linux-gnueabihf\4.9\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\include\c++\4.9;C:\SysGCC\raspberry\arm-linux-gnueabihf\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\sysroot\usr\include;../../src/include;../../src;../../src/osdep;%(ClCompile.AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>C:\SysGCC\raspberry\lib\gcc\arm-linux-gnueabihf\4.9\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\include\c++\4.9;C:\SysGCC\raspberry\arm-linux-gnueabihf\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\sysroot\usr\include;../../src/include;../../src;../../src/osdep;=/usr/local/include/SDL2;%(ClCompile.AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
      <PreprocessorDefinitions>DEBUG=1;%(ClCompile.PreprocessorDefinitions)</PreprocessorDefinitions>
    </ClCompile>
    <Link>
      <AdditionalLinkerInputs>;%(Link.AdditionalLinkerInputs)</AdditionalLinkerInputs>
-      <LibrarySearchDirectories>;%(Link.LibrarySearchDirectories)</LibrarySearchDirectories>
-      <AdditionalLibraryNames>;%(Link.AdditionalLibraryNames)</AdditionalLibraryNames>
+      <LibrarySearchDirectories>=/usr/local/lib;%(Link.LibrarySearchDirectories)</LibrarySearchDirectories>
+      <AdditionalLibraryNames>SDL2;%(Link.AdditionalLibraryNames)</AdditionalLibraryNames>
      <LinkerScript />
    </Link>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|VisualGDB'">
    <ClCompile>
      <CPPLanguageStandard>GNUPP14</CPPLanguageStandard>
-      <AdditionalIncludeDirectories>C:\SysGCC\raspberry\lib\gcc\arm-linux-gnueabihf\4.9\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\include\c++\4.9;C:\SysGCC\raspberry\arm-linux-gnueabihf\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\sysroot\usr\include;../../src/include;../../src;../../src/osdep;%(ClCompile.AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>C:\SysGCC\raspberry\lib\gcc\arm-linux-gnueabihf\4.9\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\include\c++\4.9;C:\SysGCC\raspberry\arm-linux-gnueabihf\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\sysroot\usr\include;../../src/include;../../src;../../src/osdep;=/usr/local/include/SDL2;%(ClCompile.AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
      <PreprocessorDefinitions>NDEBUG=1;RELEASE=1;%(ClCompile.PreprocessorDefinitions)</PreprocessorDefinitions>
    </ClCompile>
    <Link>
      <AdditionalLinkerInputs>;%(Link.AdditionalLinkerInputs)</AdditionalLinkerInputs>
-      <LibrarySearchDirectories>;%(Link.LibrarySearchDirectories)</LibrarySearchDirectories>
-      <AdditionalLibraryNames>;%(Link.AdditionalLibraryNames)</AdditionalLibraryNames>
+      <LibrarySearchDirectories>=/usr/local/lib;%(Link.LibrarySearchDirectories)</LibrarySearchDirectories>
+      <AdditionalLibraryNames>SDL2;%(Link.AdditionalLibraryNames)</AdditionalLibraryNames>
      <LinkerScript />
    </Link>
  </ItemDefinitionGroup>
--- a/src/cfgfile.cpp
+++ b/src/cfgfile.cpp
@ -197,13 +197,17 @@ static const TCHAR* obsolete[] = {
 	_T("avoid_vid"), _T("avoid_dga"), _T("z3chipmem_size"), _T("state_replay_buffer"), _T("state_replay"),
 	_T("z3realmapping"), _T("force_0x10000000_z3"),
 	_T("fpu_arithmetic_exceptions"),
+
 	_T("gfx_filter_vert_zoom"),_T("gfx_filter_horiz_zoom"),
 	_T("gfx_filter_vert_zoom_mult"), _T("gfx_filter_horiz_zoom_mult"),
 	_T("gfx_filter_vert_offset"), _T("gfx_filter_horiz_offset"),
+
 	_T("pcibridge_rom_file"),
 	_T("pcibridge_rom_options"),
+
 	_T("cpuboard_ext_rom_file"),
 	_T("uaeboard_mode"),
+
 	_T("comp_oldsegv"),
 	_T("comp_midopt"),
 	_T("comp_lowopt"),
@ -1433,8 +1437,10 @@ void cfgfile_save_options(struct zfile* f, struct uae_prefs* p, int type)

 	cfgfile_dwrite_bool(f, _T("fpu_no_unimplemented"), p->fpu_no_unimplemented);
 	cfgfile_write_bool(f, _T("fpu_strict"), p->fpu_strict);
-	cfgfile_dwrite_bool(f, _T("fpu_softfloat"), p->fpu_softfloat);

+#ifdef USE_JIT_FPU
+	cfgfile_write_bool(f, _T("compfpu"), p->compfpu);
+#endif
 	cfgfile_write(f, _T("cachesize"), _T("%d"), p->cachesize);

 	cfg_write(_T("; "), f);
@ -3566,11 +3572,14 @@ static int cfgfile_parse_hardware(struct uae_prefs* p, const TCHAR* option, TCHA
 		|| cfgfile_yesno(option, value, _T("ksmirror_a8"), &p->cs_ksmirror_a8)
 		|| cfgfile_yesno(option, value, _T("cia_todbug"), &p->cs_ciatodbug)
 		|| cfgfile_yesno(option, value, _T("z3_autoconfig"), &p->cs_z3autoconfig)
+
 		|| cfgfile_yesno(option, value, _T("ntsc"), &p->ntscmode)
 		|| cfgfile_yesno(option, value, _T("cpu_compatible"), &p->cpu_compatible)
 		|| cfgfile_yesno(option, value, _T("cpu_24bit_addressing"), &p->address_space_24)
 		|| cfgfile_yesno(option, value, _T("fpu_strict"), &p->fpu_strict)
-		|| cfgfile_yesno(option, value, _T("fpu_softfloat"), &p->fpu_softfloat)
+#ifdef USE_JIT_FPU
+		|| cfgfile_yesno(option, value, _T("compfpu"), &p->compfpu)
+#endif
 		|| cfgfile_yesno(option, value, _T("floppy_write_protect"), &p->floppy_read_only)
 		|| cfgfile_yesno(option, value, _T("harddrive_write_protect"), &p->harddrive_read_only))
 		return 1;
@ -5165,6 +5174,11 @@ void default_prefs(struct uae_prefs* p, bool reset, int type)
 	p->sound_filter_type = 0;
 	p->sound_volume_cd = 20;

+#ifdef USE_JIT_FPU
+	p->compfpu = 1;
+#else
+	p->compfpu = 0;
+#endif
 	p->cachesize = 0;

 	p->gfx_framerate = 1;
@ -5223,7 +5237,6 @@ void default_prefs(struct uae_prefs* p, bool reset, int type)
 	p->cpu_model = 68000;
 	p->fpu_no_unimplemented = false;
 	p->fpu_strict = false;
-	p->fpu_softfloat = false;
 	p->m68k_speed = 0;
 	p->cpu_compatible = false;
 	p->address_space_24 = true;
--- a/src/custom.cpp
+++ b/src/custom.cpp
@ -313,7 +313,6 @@ struct color_change *curr_color_changes = 0;

 struct decision line_decisions[2 * (MAXVPOS + 2) + 1];
 struct draw_info curr_drawinfo[2 * (MAXVPOS + 2) + 1];
-#define COLOR_TABLE_SIZE (MAXVPOS + 2) * 2
 struct color_entry curr_color_tables[COLOR_TABLE_SIZE];

 static int next_sprite_entry = 0;
--- a/src/fpp.cpp
+++ b/src/fpp.cpp
@ -10,9 +10,9 @@

 #define __USE_ISOC9X  /* We might be able to pick up a NaN */

-#include <math.h>
+#include <cmath>
 #include <float.h>
-#include <fenv.h>
+#include <cfenv>

 #include "sysconfig.h"
 #include "sysdeps.h"
@ -27,92 +27,9 @@
 #include "savestate.h"
 #include "cpu_prefetch.h"

-#include "softfloat/softfloat.h"
+void fpsr_set_exception(uae_u32 exception);

-FPP_PRINT fpp_print;
-
-FPP_IS fpp_is_snan;
-FPP_IS fpp_unset_snan;
-FPP_IS fpp_is_nan;
-FPP_IS fpp_is_infinity;
-FPP_IS fpp_is_zero;
-FPP_IS fpp_is_neg;
-FPP_IS fpp_is_denormal;
-FPP_IS fpp_is_unnormal;
-
-FPP_GET_STATUS fpp_get_status;
-FPP_CLEAR_STATUS fpp_clear_status;
-FPP_SET_MODE fpp_set_mode;
-
-FPP_FROM_NATIVE fpp_from_native;
-FPP_TO_NATIVE fpp_to_native;
-
-FPP_TO_INT fpp_to_int;
-FPP_FROM_INT fpp_from_int;
-
-FPP_PACK fpp_to_pack;
-FPP_PACK fpp_from_pack;
-
-FPP_TO_SINGLE fpp_to_single;
-FPP_FROM_SINGLE fpp_from_single;
-FPP_TO_DOUBLE fpp_to_double;
-FPP_FROM_DOUBLE fpp_from_double;
-FPP_TO_EXTEN fpp_to_exten;
-FPP_FROM_EXTEN fpp_from_exten;
-FPP_TO_EXTEN fpp_to_exten_fmovem;
-FPP_FROM_EXTEN fpp_from_exten_fmovem;
-
-FPP_A fpp_normalize;
-FPP_DENORMALIZE fpp_denormalize;
-FPP_A fpp_get_internal_overflow;
-FPP_A fpp_get_internal_underflow;
-FPP_A fpp_get_internal_round_all;
-FPP_A fpp_get_internal_round;
-FPP_A fpp_get_internal_round_exten;
-FPP_A fpp_get_internal;
-FPP_GET32 fpp_get_internal_grs;
-
-FPP_A fpp_round_single;
-FPP_A fpp_round_double;
-FPP_A fpp_round32;
-FPP_A fpp_round64;
-FPP_AB fpp_int;
-FPP_AB fpp_sinh;
-FPP_AB fpp_intrz;
-FPP_ABP fpp_sqrt;
-FPP_AB fpp_lognp1;
-FPP_AB fpp_etoxm1;
-FPP_AB fpp_tanh;
-FPP_AB fpp_atan;
-FPP_AB fpp_atanh;
-FPP_AB fpp_sin;
-FPP_AB fpp_asin;
-FPP_AB fpp_tan;
-FPP_AB fpp_etox;
-FPP_AB fpp_twotox;
-FPP_AB fpp_tentox;
-FPP_AB fpp_logn;
-FPP_AB fpp_log10;
-FPP_AB fpp_log2;
-FPP_ABP fpp_abs;
-FPP_AB fpp_cosh;
-FPP_ABP fpp_neg;
-FPP_AB fpp_acos;
-FPP_AB fpp_cos;
-FPP_AB fpp_getexp;
-FPP_AB fpp_getman;
-FPP_ABP fpp_div;
-FPP_ABQS fpp_mod;
-FPP_ABP fpp_add;
-FPP_ABP fpp_mul;
-FPP_ABQS fpp_rem;
-FPP_AB fpp_scale;
-FPP_ABP fpp_sub;
-FPP_AB fpp_sgldiv;
-FPP_AB fpp_sglmul;
-FPP_AB fpp_cmp;
-FPP_AB fpp_tst;
-FPP_ABP fpp_move;
+#include "fpp_native.cpp"

 #define DEBUG_FPP 0
 #define EXCEPTION_FPP 0
@ -313,10 +230,6 @@ static uae_u32 get_ftag(fpdata *src, int size)
 {
 	if (fpp_is_zero(src)) {
 		return 1; // ZERO
-	} else if (fpp_is_unnormal(src) || fpp_is_denormal(src)) {
-		if (size == 1 || size == 5)
-			return 5; // Single/double DENORMAL
-		return 4; // Extended DENORMAL or UNNORMAL
 	} else if  (fpp_is_nan(src)) {
 		return 3; // NAN
 	} else if (fpp_is_infinity(src)) {
@ -332,16 +245,6 @@ STATIC_INLINE bool fp_is_dyadic(uae_u16 extra)

 static bool fp_exception_pending(bool pre)
 {
-	// first check for pending arithmetic exceptions
-	if (currprefs.fpu_softfloat) {
-		if (regs.fp_exp_pend) {
-			regs.fpu_exp_pre = pre;
-			Exception(regs.fp_exp_pend);
-			if (currprefs.fpu_model != 68882)
-				regs.fp_exp_pend = 0;
-			return true;
-		}
-	}
 	// no arithmetic exceptions pending, check for unimplemented datatype
 	if (regs.fp_unimp_pend) {
 		regs.fpu_exp_pre = pre;
@ -381,136 +284,11 @@ static uae_u32 fpsr_get_vector(uae_u32 exception)
 	return 0;
 }

-static void fpsr_check_arithmetic_exception(uae_u32 mask, fpdata *src, uae_u32 opcode, uae_u16 extra, uae_u32 ea)
-{
-	if (!currprefs.fpu_softfloat)
-		return;
-
-	bool nonmaskable;
-	uae_u32 exception;
-	// Any exception status bit and matching exception enable bits set?
-	exception = regs.fpsr & regs.fpcr & 0xff00;
-	// Add 68040/68060 nonmaskable exceptions
-	if (currprefs.cpu_model >= 68040 && currprefs.fpu_model)
-		exception |= regs.fpsr & (FPSR_OVFL | FPSR_UNFL | mask);
-
-	if (exception) {
-		regs.fp_exp_pend = fpsr_get_vector(exception);
-		nonmaskable = (regs.fp_exp_pend != fpsr_get_vector(regs.fpsr & regs.fpcr));
-
-		if (!currprefs.fpu_softfloat) {
-			// log message and exit
-			regs.fp_exp_pend = 0;
-			return;
-		}
-
-		regs.fp_opword = opcode;
-		regs.fp_ea = ea;
-
-		// data for FSAVE stack frame
-		fpdata eo;
-		uae_u32 opclass = (extra >> 13) & 7;
-
-		reset_fsave_data();
-
-		if (currprefs.fpu_model == 68881 || currprefs.fpu_model == 68882) {
-			// fsave data for 68881 and 68882
-
-			if (opclass == 3) { // 011
-				fsave_data.ccr = ((uae_u32)extra << 16) | extra;
-			} else { // 000 or 010
-				fsave_data.ccr = ((uae_u32)(opcode | 0x0080) << 16) | extra;
-			}
-			if (regs.fp_exp_pend == 54 || regs.fp_exp_pend == 52 || regs.fp_exp_pend == 50) { // SNAN, OPERR, DZ
-				fpp_from_exten_fmovem(src, &fsave_data.eo[0], &fsave_data.eo[1], &fsave_data.eo[2]);
-				if (regs.fp_exp_pend == 52 && opclass == 3) { // OPERR from move to integer or packed
-					fsave_data.eo[0] &= 0x4fff0000;
-					fsave_data.eo[1] = fsave_data.eo[2] = 0;
-				}
-			} else if (regs.fp_exp_pend == 53) { // OVFL
-				fpp_get_internal_overflow(&eo);
-				fpp_from_exten_fmovem(&eo, &fsave_data.eo[0], &fsave_data.eo[1], &fsave_data.eo[2]);
-			} else if (regs.fp_exp_pend == 51) { // UNFL
-				fpp_get_internal_underflow(&eo);
-				fpp_from_exten_fmovem(&eo, &fsave_data.eo[0], &fsave_data.eo[1], &fsave_data.eo[2]);
-			} // else INEX1, INEX2: do nothing
-
-		} else {
-			// fsave data for 68040
-			regs.fpu_exp_state = 1; // 68040 UNIMP frame
-
-			uae_u32 reg = (extra >> 7) & 7;
-			int size = (extra >> 10) & 7;
-
-			fsave_data.fpiarcu = regs.fpiar;
-
-			if (regs.fp_exp_pend == 54) { // SNAN (undocumented)
-				fsave_data.wbte15 = 1;
-				fsave_data.grs = 7;
-			} else {
-				fsave_data.grs = 1;
-			}
-
-			if (opclass == 3) { // OPCLASS 011
-				fsave_data.cmdreg1b = extra;
-				fsave_data.e1 = 1;
-				fsave_data.t = 1;
-				fsave_data.wbte15 = (regs.fp_exp_pend == 51 || regs.fp_exp_pend == 54) ? 1 : 0; // UNFL, SNAN
-
-				if (fpp_is_snan(src)) {
-					fpp_unset_snan(src);
-				}
-				fpp_from_exten_fmovem(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]);
-				fsave_data.stag = get_ftag(src, -1);
-			} else { // OPCLASS 000 and 010
-				fsave_data.cmdreg1b = extra;
-				fsave_data.e1 = 1;
-				fsave_data.wbte15 = (regs.fp_exp_pend == 54) ? 1 : 0; // SNAN (undocumented)
-
-				if (regs.fp_exp_pend == 51 || regs.fp_exp_pend == 53 || regs.fp_exp_pend == 49) { // UNFL, OVFL, INEX
-					if ((extra & 0x30) == 0x20 || (extra & 0x3f) == 0x04) { // FADD, FSUB, FMUL, FDIV, FSQRT
-						regs.fpu_exp_state = 2; // 68040 BUSY frame
-						fsave_data.e3 = 1;
-						fsave_data.e1 = 0;
-						fsave_data.cmdreg3b = (extra & 0x3C3) | ((extra & 0x038)>>1) | ((extra & 0x004)<<3);
-						if (regs.fp_exp_pend == 51) { // UNFL
-							fpp_get_internal(&eo);
-						} else { // OVFL, INEX
-							fpp_get_internal_round(&eo);
-						}
-						fsave_data.grs = fpp_get_internal_grs();
-						fpp_from_exten_fmovem(&eo, &fsave_data.wbt[0], &fsave_data.wbt[1], &fsave_data.wbt[2]);
-						fsave_data.wbte15 = (regs.fp_exp_pend == 51) ? 1 : 0; // UNFL
-						// src and dst is stored (undocumented)
-						fpp_from_exten_fmovem(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]);
-						fsave_data.stag = get_ftag(src, (opclass == 0) ? -1 : size);
-						if (fp_is_dyadic(extra)) {
-							fpp_from_exten_fmovem(&regs.fp[reg], &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]);
-							fsave_data.dtag = get_ftag(&regs.fp[reg], -1);
-						}
-					} else { // FMOVE to register, FABS, FNEG
-						fpp_get_internal_round_exten(&eo);
-						fsave_data.grs = fpp_get_internal_grs();
-						fpp_from_exten_fmovem(&eo, &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]);
-						fpp_get_internal_round_all(&eo); // weird
-						fpp_from_exten_fmovem(&eo, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]); // undocumented
-						fsave_data.stag = get_ftag(src, (opclass == 0) ? -1 : size);
-					}
-				} else { // SNAN, OPERR, DZ
-					fpp_from_exten_fmovem(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]);
-					fsave_data.stag = get_ftag(src, (opclass == 0) ? -1 : size);
-					if (fp_is_dyadic(extra)) {
-						fpp_from_exten_fmovem(&regs.fp[reg], &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]);
-						fsave_data.dtag = get_ftag(&regs.fp[reg], -1);
-					}
-				}
-			}
-		}
-	}
-}
-
 static void fpsr_set_result(fpdata *result)
 {
+#ifdef JIT
+	regs.fp_result = *result;
+#endif
 	// condition code byte
 	regs.fpsr &= 0x00fffff8; // clear cc
 	if (fpp_is_nan (result)) {
@ -527,18 +305,10 @@ static void fpsr_clear_status(void)
 {
 	// clear exception status byte only
 	regs.fpsr &= 0x0fff00f8;
-	
-	// clear external status
-	fpp_clear_status();
 }

 static uae_u32 fpsr_make_status(void)
 {
-	uae_u32 exception;
-
-	// get external status
-	fpp_get_status(&regs.fpsr);
-	
 	// update accrued exception byte
 	if (regs.fpsr & (FPSR_BSUN | FPSR_SNAN | FPSR_OPERR))
 		regs.fpsr |= FPSR_AE_IOP;  // IOP = BSUN || SNAN || OPERR
@ -551,15 +321,7 @@ static uae_u32 fpsr_make_status(void)
 	if (regs.fpsr & (FPSR_OVFL | FPSR_INEX2 | FPSR_INEX1))
 		regs.fpsr |= FPSR_AE_INEX; // INEX = INEX1 || INEX2 || OVFL
 	
-	if (!currprefs.fpu_softfloat)
-		return 0;
-
-	// return exceptions that interrupt calculation
-	exception = regs.fpsr & regs.fpcr & (FPSR_SNAN | FPSR_OPERR | FPSR_DZ);
-	if (currprefs.cpu_model >= 68040 && currprefs.fpu_model)
-		exception |= regs.fpsr & (FPSR_OVFL | FPSR_UNFL);
-
-	return exception;
+	return 0;
 }

 static int fpsr_set_bsun(void)
@ -567,15 +329,6 @@ static int fpsr_set_bsun(void)
 	regs.fpsr |= FPSR_BSUN;
 	regs.fpsr |= FPSR_AE_IOP;
 	
-	if (regs.fpcr & FPSR_BSUN) {
-		// logging only so far
-		write_log (_T("FPU exception: BSUN! (FPSR: %08x, FPCR: %04x)\n"), regs.fpsr, regs.fpcr);
-		if (currprefs.fpu_softfloat) {
-			regs.fp_exp_pend = fpsr_get_vector(FPSR_BSUN);
-			fp_exception_pending(true);
-			return 1;
-		}
-	}
 	return 0;
 }

@ -591,8 +344,22 @@ static void fpsr_get_quotient(uae_u64 *quot, uae_u8 *sign)
 	*sign = (regs.fpsr & FPSR_QUOT_SIGN) ? 1 : 0;
 }

-uae_u32 fpp_get_fpsr (void)
+static uae_u32 fpp_get_fpsr (void)
 {
+#ifdef JIT
+	if (currprefs.cachesize && currprefs.compfpu) {
+		regs.fpsr &= 0x00fffff8; // clear cc
+		if (fpp_is_nan (&regs.fp_result)) {
+			regs.fpsr |= FPSR_CC_NAN;
+		} else if (fpp_is_zero(&regs.fp_result)) {
+			regs.fpsr |= FPSR_CC_Z;
+		} else if (fpp_is_infinity (&regs.fp_result)) {
+			regs.fpsr |= FPSR_CC_I;
+		}
+		if (fpp_is_neg(&regs.fp_result))
+			regs.fpsr |= FPSR_CC_N;
+	}
+#endif
 	return regs.fpsr;
 }

@ -619,9 +386,23 @@ static void fpset (fpdata *fpd, uae_s32 val)
 static void fpp_set_fpsr (uae_u32 val)
 {
 	regs.fpsr = val;
+
+#ifdef JIT
+	// check comment in fpp_cond
+	if (currprefs.cachesize && currprefs.compfpu) {
+		if (val & 0x01000000)
+			fpnan(&regs.fp_result);
+		else if (val & 0x04000000)
+			fpset(&regs.fp_result, 0);
+		else if (val & 0x08000000)
+			fpset(&regs.fp_result, -1);
+		else
+			fpset(&regs.fp_result, 1);
+	}
+#endif
 }

-bool fpu_get_constant(fpdata *fpd, int cr)
+static bool fpu_get_constant(fpdata *fpd, int cr)
 {
 	uae_u32 f[3] = { 0, 0, 0 };
 	int entry = 0;
@ -739,16 +520,16 @@ bool fpu_get_constant(fpdata *fpd, int cr)
 					}
 				}
 			}
-			fpp_to_exten_fmovem(fpd, f[0], f[1], f[2]);
+			fpp_to_exten(fpd, f[0], f[1], f[2]);
 			if (prec == 1)
 				fpp_round32(fpd);
 			if (prec >= 2)
 				fpp_round64(fpd);

 			if (f1_adjust) {
-				fpp_from_exten_fmovem(fpd, &f[0], &f[1], &f[2]);
+				fpp_from_exten(fpd, &f[0], &f[1], &f[2]);
 				f[1] += f1_adjust * 0x80;
-				fpp_to_exten_fmovem(fpd, f[0], f[1], f[2]);
+				fpp_to_exten(fpd, f[0], f[1], f[2]);
 			}

 			fpsr_set_result(fpd);
@ -767,7 +548,7 @@ bool fpu_get_constant(fpdata *fpd, int cr)
 		f[2] += fpp_cr[entry].rndoff[mode];
 	}

-	fpp_to_exten_fmovem(fpd, f[0], f[1], f[2]);
+	fpp_to_exten(fpd, f[0], f[1], f[2]);
 	
 	if (prec == 1)
 		fpp_round32(fpd);
@ -795,10 +576,10 @@ static void fp_unimp_instruction(uae_u16 opcode, uae_u16 extra, uae_u32 ea, uaec
 			reset_fsave_data();
 			fsave_data.cmdreg3b = (extra & 0x3C3) | ((extra & 0x038) >> 1) | ((extra & 0x004) << 3);
 			fsave_data.cmdreg1b = extra;
-			fpp_from_exten_fmovem(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]);
+			fpp_from_exten(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]);
 			fsave_data.stag = get_ftag(src, size);
 			if (reg >= 0) {
-				fpp_from_exten_fmovem(&regs.fp[reg], &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]);
+				fpp_from_exten(&regs.fp[reg], &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]);
 				fsave_data.dtag = get_ftag(&regs.fp[reg], -1);
 			}
 		}
@ -838,9 +619,9 @@ static void fp_unimp_datatype(uae_u16 opcode, uae_u16 extra, uae_u32 ea, uaecptr
 		}
 		if (opclass == 3) { // OPCLASS 011
 			fsave_data.t = 1;
-			fpp_from_exten_fmovem(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]);
+			fpp_from_exten(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]);
 			fsave_data.stag = get_ftag(src, -1);
-			fpp_from_exten_fmovem(src, &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]); // undocumented
+			fpp_from_exten(src, &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]); // undocumented
 			fsave_data.dtag = get_ftag(src, -1); // undocumented
 		} else { // OPCLASS 000 and 010
 			if (packed) {
@ -850,13 +631,13 @@ static void fp_unimp_datatype(uae_u16 opcode, uae_u16 extra, uae_u32 ea, uaecptr
 				fsave_data.et[2] = packed[2];
 				fsave_data.stag = 7; // undocumented
 			} else {
-				fpp_from_exten_fmovem(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]);
+				fpp_from_exten(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]);
 				fsave_data.stag = get_ftag(src, (opclass == 0) ? -1 : size);
 				if (fsave_data.stag == 5) {
 					fsave_data.et[0] = (size == 1) ? 0x3f800000 : 0x3c000000; // exponent for denormalized single and double
 				}
 				if (fp_is_dyadic(extra)) {
-					fpp_from_exten_fmovem(&regs.fp[reg], &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]);
+					fpp_from_exten(&regs.fp[reg], &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]);
 					fsave_data.dtag = get_ftag(&regs.fp[reg], -1);
 				}
 			}
@ -1026,9 +807,7 @@ static bool fault_if_unimplemented_6888x (uae_u16 opcode, uae_u16 extra, uaecptr

 static bool fault_if_no_fpu_u (uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc)
 {
-	if (fault_if_no_fpu (opcode, extra, ea, oldpc))
-		return true;
-	return false;
+	return fault_if_no_fpu (opcode, extra, ea, oldpc);
 }

 static bool fault_if_no_6888x (uae_u16 opcode, uae_u16 extra, uaecptr oldpc)
@ -1073,44 +852,6 @@ static void fpu_null (void)
 		fpnan (&regs.fp[i]);
 }

-// 68040/060 does not support denormals
-static bool normalize_or_fault_if_no_denormal_support(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *src)
-{
-	if (!currprefs.fpu_softfloat)
-		return false;
-	if (fpp_is_unnormal(src) || fpp_is_denormal(src)) {
-		if (currprefs.cpu_model >= 68040 && currprefs.fpu_model && currprefs.fpu_no_unimplemented) {
-			if (fpp_is_zero(src)) {
-				fpp_normalize(src); // 68040/060 can only fix unnormal zeros
-			} else {
-			  fp_unimp_datatype(opcode, extra, ea, oldpc, src, NULL);
-			  return true;
-			}
-		} else {
-			fpp_normalize(src);
-		}
-	}
-	return false;
-}
-static bool normalize_or_fault_if_no_denormal_support_dst(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *dst, fpdata *src)
-{
-	if (!currprefs.fpu_softfloat)
-		return false;
-	if (fpp_is_unnormal(dst) || fpp_is_denormal(dst)) {
-		if (currprefs.cpu_model >= 68040 && currprefs.fpu_model && currprefs.fpu_no_unimplemented) {
-			if (fpp_is_zero(dst)) {
-				fpp_normalize(dst); // 68040/060 can only fix unnormal zeros
-			} else {
-			  fp_unimp_datatype(opcode, extra, ea, oldpc, src, NULL);
-			  return true;
-			}
-		} else {
-			fpp_normalize(dst);
-		}
-	}
-	return false;
-}
-
 // 68040/060 does not support packed decimal format
 static bool fault_if_no_packed_support(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *src, uae_u32 *packed)
 {
@ -1119,20 +860,6 @@ static bool fault_if_no_packed_support(uae_u16 opcode, uae_u16 extra, uaecptr ea
 		return true;
 	}
 	return false;
- }
-
-// 68040 does not support move to integer format
-static bool fault_if_68040_integer_nonmaskable(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *src)
-{
-	if (currprefs.cpu_model == 68040 && currprefs.fpu_model && currprefs.fpu_softfloat) {
-		fpsr_make_status();
-		if (regs.fpsr & (FPSR_SNAN | FPSR_OPERR)) {
-			fpsr_check_arithmetic_exception(FPSR_SNAN | FPSR_OPERR, src, opcode, extra, ea);
-			fp_exception_pending(false); // post
-			return true;
-		}
-	}
-	return false;
 }

 static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr oldpc, uae_u32 *adp)
@ -1148,7 +875,6 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old
 		if (fault_if_no_fpu (opcode, extra, 0, oldpc))
 			return -1;
 		*src = regs.fp[(extra >> 10) & 7];
-		normalize_or_fault_if_no_denormal_support(opcode, extra, 0, oldpc, src);
 		return 1;
 	}
 	mode = (opcode >> 3) & 7;
@ -1172,7 +898,6 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old
 					break;
 				case 1:
 					fpp_to_single (src, m68k_dreg (regs, reg));
-					normalize_or_fault_if_no_denormal_support(opcode, extra, 0, oldpc, src);
 					break;
 				default:
 					return 0;
@ -1257,7 +982,6 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old
 			break;
 		case 1:
 			fpp_to_single (src, (doext ? exts[0] : x_cp_get_long (ad)));
-			normalize_or_fault_if_no_denormal_support(opcode, extra, adold, oldpc, src);
 			break;
 		case 2:
 			{
@ -1268,7 +992,6 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old
 				ad += 4;
 				wrd3 = (doext ? exts[2] : x_cp_get_long (ad));
 				fpp_to_exten (src, wrd1, wrd2, wrd3);
-				normalize_or_fault_if_no_denormal_support(opcode, extra, adold, oldpc, src);
 			}
 			break;
 		case 3:
@ -1282,7 +1005,6 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old
 				if (fault_if_no_packed_support (opcode, extra, adold, oldpc, NULL, wrd))
 					return 1;
 				fpp_to_pack (src, wrd, 0);
-				fpp_normalize(src);
 				return 1;
 			}
 			break;
@ -1296,7 +1018,6 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old
 				ad += 4;
 				wrd2 = (doext ? exts[1] : x_cp_get_long (ad));
 				fpp_to_double (src, wrd1, wrd2);
-				normalize_or_fault_if_no_denormal_support(opcode, extra, adold, oldpc, src);
 			}
 			break;
 		case 6:
@ -1331,31 +1052,17 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o
 			switch (size)
 			{
 				case 6:
-					if (normalize_or_fault_if_no_denormal_support(opcode, extra, 0, oldpc, value))
-						return 1;
 					m68k_dreg (regs, reg) = (uae_u32)(((fpp_to_int (value, 0) & 0xff)
 						| (m68k_dreg (regs, reg) & ~0xff)));
-					if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value))
-						return -1;
 					break;
 				case 4:
-					if (normalize_or_fault_if_no_denormal_support(opcode, extra, 0, oldpc, value))
-						return 1;
 					m68k_dreg (regs, reg) = (uae_u32)(((fpp_to_int (value, 1) & 0xffff)
 						| (m68k_dreg (regs, reg) & ~0xffff)));
-					if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value))
-						return -1;
 					break;
 				case 0:
-					if (normalize_or_fault_if_no_denormal_support(opcode, extra, 0, oldpc, value))
-						return 1;
 					m68k_dreg (regs, reg) = (uae_u32)fpp_to_int (value, 2);
-					if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value))
-						return -1;
 					break;
 				case 1:
-					if (normalize_or_fault_if_no_denormal_support(opcode, extra, 0, oldpc, value))
-						return 1;
 					m68k_dreg (regs, reg) = fpp_from_single (value);
 					break;
 				default:
@ -1410,21 +1117,13 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o
 	switch (size)
 	{
 		case 0:
-			if (normalize_or_fault_if_no_denormal_support(opcode, extra, ad, oldpc, value))
-				return 1;
 			x_cp_put_long(ad, (uae_u32)fpp_to_int(value, 2));
-			if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value))
-				return -1;
 			break;
 		case 1:
-			if (normalize_or_fault_if_no_denormal_support(opcode, extra, ad, oldpc, value))
-				return 1;
 			x_cp_put_long(ad, fpp_from_single(value));
 			break;
 		case 2:
 			{
-				if (normalize_or_fault_if_no_denormal_support(opcode, extra, ad, oldpc, value))
-					return 1;
 				uae_u32 wrd1, wrd2, wrd3;
 				fpp_from_exten(value, &wrd1, &wrd2, &wrd3);
 				x_cp_put_long (ad, wrd1);
@ -1445,7 +1144,6 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o
 				kfactor &= 127;
 				if (kfactor & 64)
 					kfactor |= ~63;
-				fpp_normalize(value);
 				fpp_from_pack(value, wrd, kfactor);
 				x_cp_put_long (ad, wrd[0]);
 				ad += 4;
@ -1455,16 +1153,10 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o
 			}
 			break;
 		case 4:
-			if (normalize_or_fault_if_no_denormal_support(opcode, extra, ad, oldpc, value))
-				return 1;
 			x_cp_put_word(ad, (uae_s16)fpp_to_int(value, 1));
-			if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value))
-				return -1;
 			break;
 		case 5:
 			{
-				if (normalize_or_fault_if_no_denormal_support(opcode, extra, ad, oldpc, value))
-					return 1;
 				uae_u32 wrd1, wrd2;
 				fpp_from_double(value, &wrd1, &wrd2);
 				x_cp_put_long (ad, wrd1);
@ -1473,11 +1165,7 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o
 			}
 			break;
 		case 6:
-			if (normalize_or_fault_if_no_denormal_support(opcode, extra, ad, oldpc, value))
-				return 1;
 			x_cp_put_byte(ad, (uae_s8)fpp_to_int(value, 0));
-			if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value))
-				return -1;
 			break;
 		default:
 			return 0;
@ -1539,9 +1227,19 @@ int fpp_cond (int condition)
 {
 	int NotANumber, N, Z;

-	NotANumber = (regs.fpsr & FPSR_CC_NAN) != 0;
-	N = (regs.fpsr & FPSR_CC_N) != 0;
-	Z = (regs.fpsr & FPSR_CC_Z) != 0;
+#ifdef JIT
+	if (currprefs.cachesize && currprefs.compfpu) {
+		// JIT reads and writes regs.fpu_result
+		NotANumber = fpp_is_nan(&regs.fp_result);
+		N = fpp_is_neg(&regs.fp_result);
+		Z = fpp_is_zero(&regs.fp_result);
+	} else
+#endif
+	{
+	  NotANumber = (regs.fpsr & FPSR_CC_NAN) != 0;
+	  N = (regs.fpsr & FPSR_CC_N) != 0;
+	  Z = (regs.fpsr & FPSR_CC_Z) != 0;
+	}

 	if ((condition & 0x10) && NotANumber) {
 		if (fpsr_set_bsun())
@ -1994,14 +1692,12 @@ retry:
 				
 				if (cusavepc == 0xFE) {
 					if (opclass == 0 || opclass == 2) {
-						fpp_to_exten_fmovem(&dst, fsave_data.fpt[0], fsave_data.fpt[1], fsave_data.fpt[2]);
-						fpp_denormalize(&dst, fpte15);
-						fpp_to_exten_fmovem(&src, fsave_data.et[0], fsave_data.et[1], fsave_data.et[2]);
-						fpp_denormalize(&src, et15);
+						fpp_to_exten(&dst, fsave_data.fpt[0], fsave_data.fpt[1], fsave_data.fpt[2]);
+						fpp_to_exten(&src, fsave_data.et[0], fsave_data.et[1], fsave_data.et[2]);
 #if EXCEPTION_FPP
 						uae_u32 tmpsrc[3], tmpdst[3];
-						fpp_from_exten_fmovem(&src, &tmpsrc[0], &tmpsrc[1], &tmpsrc[2]);
-						fpp_from_exten_fmovem(&dst, &tmpdst[0], &tmpdst[1], &tmpdst[2]);
+						fpp_from_exten(&src, &tmpsrc[0], &tmpsrc[1], &tmpsrc[2]);
+						fpp_from_exten(&dst, &tmpdst[0], &tmpdst[1], &tmpdst[2]);
 						write_log (_T("FRESTORE src = %08X %08X %08X, dst = %08X %08X %08X, extra = %04X\n"),
 								   tmpsrc[0], tmpsrc[1], tmpsrc[2], tmpdst[0], tmpdst[1], tmpdst[2], cmdreg1b);
 #endif
@ -2011,8 +1707,6 @@ retry:
 						
 						if (v)
 							regs.fp[(cmdreg1b>>7)&7] = dst;
-						
-						fpsr_check_arithmetic_exception(0, &src, regs.fp_opword, cmdreg1b, regs.fp_ea);
 					} else {
 						write_log (_T("FRESTORE resume of opclass %d instruction not supported %08x\n"), opclass, ad_orig);
 					}
@ -2117,7 +1811,7 @@ static uaecptr fmovem2mem (uaecptr ad, uae_u32 list, int incr, int regdir)
 		else
 			reg = r;
 		if (list & 0x80) {
-			fpp_from_exten_fmovem(&regs.fp[reg], &wrd1, &wrd2, &wrd3);
+			fpp_from_exten(&regs.fp[reg], &wrd1, &wrd2, &wrd3);
 			if (incr < 0)
 				ad -= 3 * 4;
 			x_put_long(ad + 0, wrd1);
@ -2164,7 +1858,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra)
 	switch (extra & 0x7f)
 	{
 		case 0x00: /* FMOVE */
-			fpp_move(dst, src, 0);
+			fpp_move(dst, src, fpu_prec);
 			break;
 		case 0x40: /* FSMOVE */
 			fpp_move(dst, src, 32);
@ -2182,7 +1876,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra)
 			fpp_intrz(dst, src);
 			break;
 		case 0x04: /* FSQRT */
-			fpp_sqrt(dst, src, 0);
+			fpp_sqrt(dst, src, fpu_prec);
 			break;
 		case 0x41: /* FSSQRT */
 			fpp_sqrt(dst, src,  32);
@ -2233,7 +1927,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra)
 			fpp_log2(dst, src);
 			break;
 		case 0x18: /* FABS */
-			fpp_abs(dst, src, 0);
+			fpp_abs(dst, src, fpu_prec);
 			break;
 		case 0x58: /* FSABS */
 			fpp_abs(dst, src, 32);
@ -2245,7 +1939,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra)
 			fpp_cosh(dst, src);
 			break;
 		case 0x1a: /* FNEG */
-			fpp_neg(dst, src, 0);
+			fpp_neg(dst, src, fpu_prec);
 			break;
 		case 0x5a: /* FSNEG */
 			fpp_neg(dst, src, 32);
@ -2266,7 +1960,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra)
 			fpp_getman(dst, src);
 			break;
 		case 0x20: /* FDIV */
-			fpp_div(dst, src, 0);
+			fpp_div(dst, src, fpu_prec);
 			break;
 		case 0x60: /* FSDIV */
 			fpp_div(dst, src, 32);
@ -2280,7 +1974,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra)
 			fpsr_set_quotient(q, s);
 			break;
 		case 0x22: /* FADD */
-			fpp_add(dst, src, 0);
+			fpp_add(dst, src, fpu_prec);
 			break;
 		case 0x62: /* FSADD */
 			fpp_add(dst, src, 32);
@ -2289,7 +1983,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra)
 			fpp_add(dst, src, 64);
 			break;
 		case 0x23: /* FMUL */
-			fpp_mul(dst, src, 0);
+			fpp_mul(dst, src, fpu_prec);
 			break;
 		case 0x63: /* FSMUL */
 			fpp_mul(dst, src, 32);
@ -2312,7 +2006,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra)
 			fpp_sglmul(dst, src);
 			break;
 		case 0x28: /* FSUB */
-			fpp_sub(dst, src, 0);
+			fpp_sub(dst, src, fpu_prec);
 			break;
 		case 0x68: /* FSSUB */
 			fpp_sub(dst, src, 32);
@ -2390,7 +2084,6 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
 				return;
 			}
 			fpsr_make_status();
-			fpsr_check_arithmetic_exception(0, &src, opcode, extra, ad);
 			fp_exception_pending(false); // post/mid instruction
 			return;

@ -2598,7 +2291,6 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
 				fpsr_clear_status();
 				fpu_get_constant(&regs.fp[reg], extra & 0x7f);
 				fpsr_make_status();
-				fpsr_check_arithmetic_exception(0, &src, opcode, extra, ad);
 				return;
 			}

@ -2620,9 +2312,6 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)

 			dst = regs.fp[reg];

-			if (fp_is_dyadic(extra))
-				normalize_or_fault_if_no_denormal_support_dst(opcode, extra, ad, pc, &dst, &src);
-
 			// check for 680x0 unimplemented instruction
 			if (fault_if_unimplemented_680x0 (opcode, extra, ad, pc, &src, reg))
 				return;
@ -2635,8 +2324,6 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)

 			v = fp_arithmetic(&src, &dst, extra);

-			fpsr_check_arithmetic_exception(0, &src, opcode, extra, ad);
-
 			if (v)
 				regs.fp[reg] = dst;

@ -2654,35 +2341,8 @@ void fpuop_arithmetic (uae_u32 opcode, uae_u16 extra)
 	fpuop_arithmetic2 (opcode, extra);
 }

-void fpu_modechange(void)
-{
-	uae_u32 temp_ext[8][3];
-
-	if (currprefs.fpu_softfloat == changed_prefs.fpu_softfloat)
-		return;
-	currprefs.fpu_softfloat = changed_prefs.fpu_softfloat;
-
-	for (int i = 0; i < 8; i++) {
-		fpp_from_exten_fmovem(&regs.fp[i], &temp_ext[i][0], &temp_ext[i][1], &temp_ext[i][2]);
-	}
-	if (currprefs.fpu_softfloat) {
-		fp_init_softfloat();
-	} else {
-		fp_init_native();
-	}
-	for (int i = 0; i < 8; i++) {
-		fpp_to_exten_fmovem(&regs.fp[i], temp_ext[i][0], temp_ext[i][1], temp_ext[i][2]);
-	}
-}
-
 void fpu_reset (void)
 {
-	if (currprefs.fpu_softfloat) {
-		fp_init_softfloat();
-	} else {
-		fp_init_native();
-	}
-
 #if defined(CPU_i386) || defined(CPU_x86_64)
 	init_fpucw_x87();
 #endif
@ -2709,7 +2369,7 @@ uae_u8 *restore_fpu (uae_u8 *src)
 		w1 = restore_u16 () << 16;
 		w2 = restore_u32 ();
 		w3 = restore_u32 ();
-		fpp_to_exten_fmovem(&regs.fp[i], w1, w2, w3);
+		fpp_to_exten(&regs.fp[i], w1, w2, w3);
 	}
 	regs.fpcr = restore_u32 ();
 	regs.fpsr = restore_u32 ();
@ -2776,7 +2436,7 @@ uae_u8 *save_fpu (int *len, uae_u8 *dstptr)
 	save_u32 (currprefs.fpu_model);
 	save_u32 (0x80000000 | 0x20000000);
 	for (i = 0; i < 8; i++) {
-		fpp_from_exten_fmovem(&regs.fp[i], &w1, &w2, &w3);
+		fpp_from_exten(&regs.fp[i], &w1, &w2, &w3);
 		save_u16 (w1 >> 16);
 		save_u32 (w2);
 		save_u32 (w3);
--- a/src/fpp_native.cpp
+++ b/src/fpp_native.cpp
--- a/src/fpp_softfloat.cpp
+++ b/src/fpp_softfloat.cpp
@ -1,780 +0,0 @@
-/*
-* UAE - The Un*x Amiga Emulator
-*
-* MC68881/68882/68040/68060 FPU emulation
-* Softfloat version
-*
-* Andreas Grabher and Toni Wilen
-*
-*/
-#define __USE_ISOC9X  /* We might be able to pick up a NaN */
-
-#define SOFTFLOAT_FAST_INT64
-
-#include <math.h>
-#include <float.h>
-#include <fenv.h>
-
-#include "sysconfig.h"
-#include "sysdeps.h"
-
-#include "options.h"
-#include "memory.h"
-#include "newcpu.h"
-#include "fpp.h"
-#include "newcpu.h"
-
-#include "softfloat/softfloat-macros.h"
-#include "softfloat/softfloat-specialize.h"
-
-#define	FPCR_ROUNDING_MODE	0x00000030
-#define	FPCR_ROUND_NEAR		0x00000000
-#define	FPCR_ROUND_ZERO		0x00000010
-#define	FPCR_ROUND_MINF		0x00000020
-#define	FPCR_ROUND_PINF		0x00000030
-
-#define	FPCR_ROUNDING_PRECISION	0x000000c0
-#define	FPCR_PRECISION_SINGLE	0x00000040
-#define	FPCR_PRECISION_DOUBLE	0x00000080
-#define FPCR_PRECISION_EXTENDED	0x00000000
-
-static struct float_status fs;
-
-/* Functions for setting host/library modes and getting status */
-static void fp_set_mode(uae_u32 mode_control)
-{
-	set_float_detect_tininess(float_tininess_before_rounding, &fs);
-
-	switch(mode_control & FPCR_ROUNDING_PRECISION) {
-		case FPCR_PRECISION_SINGLE: // single
-			set_floatx80_rounding_precision(32, &fs);
-			break;
-		default: // double
-		case FPCR_PRECISION_DOUBLE: // double
-			set_floatx80_rounding_precision(64, &fs);
-			break;
-		case FPCR_PRECISION_EXTENDED: // extended
-			set_floatx80_rounding_precision(80, &fs);
-			break;
-	}
-	
-	switch(mode_control & FPCR_ROUNDING_MODE) {
-		case FPCR_ROUND_NEAR: // to neareset
-			set_float_rounding_mode(float_round_nearest_even, &fs);
-			break;
-		case FPCR_ROUND_ZERO: // to zero
-			set_float_rounding_mode(float_round_to_zero, &fs);
-			break;
-		case FPCR_ROUND_MINF: // to minus
-			set_float_rounding_mode(float_round_down, &fs);
-			break;
-		case FPCR_ROUND_PINF: // to plus
-			set_float_rounding_mode(float_round_up, &fs);
-			break;
-	}
-}
-
-static void fp_get_status(uae_u32 *status)
-{
-	if (fs.float_exception_flags & float_flag_signaling)
-		*status |= FPSR_SNAN;
-	if (fs.float_exception_flags & float_flag_invalid)
-		*status |= FPSR_OPERR;
-	if (fs.float_exception_flags & float_flag_divbyzero)
-		*status |= FPSR_DZ;
-	if (fs.float_exception_flags & float_flag_overflow)
-		*status |= FPSR_OVFL;
-	if (fs.float_exception_flags & float_flag_underflow)
-		*status |= FPSR_UNFL;
-	if (fs.float_exception_flags & float_flag_inexact)
-		*status |= FPSR_INEX2;
-	if (fs.float_exception_flags & float_flag_decimal)
-		*status |= FPSR_INEX1;
-}
-STATIC_INLINE void fp_clear_status(void)
-{
-	fs.float_exception_flags = 0;
-}
-
-
-static const TCHAR *fp_printx80(floatx80 *fx, int mode)
-{
-	static TCHAR fsout[32];
-	flag n, u, d;
-
-	if (mode < 0) {
-		_stprintf(fsout, _T("%04X-%08X-%08X"), fx->high, (uae_u32)(fx->low >> 32), (uae_u32)fx->low);
-		return fsout;
-	}
-
-	n = floatx80_is_negative(*fx);
-	u = floatx80_is_unnormal(*fx);
-	d = floatx80_is_denormal(*fx);
-	
-	if (floatx80_is_infinity(*fx)) {
-		_stprintf(fsout, _T("%c%s"), n ? '-' : '+', _T("inf"));
-	} else if (floatx80_is_signaling_nan(*fx)) {
-		_stprintf(fsout, _T("%c%s"), n ? '-' : '+', _T("snan"));
-	} else if (floatx80_is_nan(*fx)) {
-		_stprintf(fsout, _T("%c%s"), n ? '-' : '+', _T("nan"));
-	} else {
-		int32_t len = 17;
-		int8_t save_exception_flags = fs.float_exception_flags;
-		fs.float_exception_flags = 0;
-		floatx80 x = floatx80_to_floatdecimal(*fx, &len, &fs);
-		_stprintf(fsout, _T("%c%01lld.%016llde%c%04d%s%s"), n ? '-' : '+',
-				x.low / LIT64(10000000000000000), x.low % LIT64(10000000000000000),
-				(x.high & 0x4000) ? '-' : '+', x.high & 0x3FFF, d ? _T("D") : u ? _T("U") : _T(""),
-				(fs.float_exception_flags & float_flag_inexact) ? _T("~") : _T(""));
-		fs.float_exception_flags = save_exception_flags;
-	}
-
-	if (mode == 0 || mode > _tcslen(fsout))
-		return fsout;
-	fsout[mode] = 0;
-	return fsout;
-}
-
-static const TCHAR *fp_print(fpdata *fpd, int mode)
-{
-	return fp_printx80(&fpd->fpx, mode);
-}
-
-/* Functions for detecting float type */
-static bool fp_is_snan(fpdata *fpd)
-{
-	return floatx80_is_signaling_nan(fpd->fpx) != 0;
-}
-static bool fp_unset_snan(fpdata *fpd)
-{
-	fpd->fpx.low |= LIT64(0x4000000000000000);
-	return 0;
-}
-static bool fp_is_nan (fpdata *fpd)
-{
-	return floatx80_is_any_nan(fpd->fpx) != 0;
-}
-static bool fp_is_infinity (fpdata *fpd)
-{
-	return floatx80_is_infinity(fpd->fpx) != 0;
-}
-static bool fp_is_zero(fpdata *fpd)
-{
-	return floatx80_is_zero(fpd->fpx) != 0;
-}
-static bool fp_is_neg(fpdata *fpd)
-{
-	return floatx80_is_negative(fpd->fpx) != 0;
-}
-static bool fp_is_denormal(fpdata *fpd)
-{
-	return floatx80_is_denormal(fpd->fpx) != 0;
-}
-static bool fp_is_unnormal(fpdata *fpd)
-{
-	return floatx80_is_unnormal(fpd->fpx) != 0;
-}
-
-
-static void to_single(fpdata *fpd, uae_u32 wrd1)
-{
-	float32 f = wrd1;
-	fpd->fpx = float32_to_floatx80_allowunnormal(f, &fs);
-}
-static uae_u32 from_single(fpdata *fpd)
-{
-	float32 f = floatx80_to_float32(fpd->fpx, &fs);
-	return f;
-}
-
-static void to_double(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2)
-{
-	float64 f = ((float64)wrd1 << 32) | wrd2;
-	fpd->fpx = float64_to_floatx80_allowunnormal(f, &fs);
-}
-static void from_double(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2)
-{
-	float64 f = floatx80_to_float64(fpd->fpx, &fs);
-	*wrd1 = f >> 32;
-	*wrd2 = (uae_u32)f;
-}
-
-static void to_exten(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3)
-{
-	fpd->fpx.high = (uae_u16)(wrd1 >> 16);
-	fpd->fpx.low = ((uae_u64)wrd2 << 32) | wrd3;
-}
-static void from_exten(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *wrd3)
-{
-	floatx80 f = floatx80_to_floatx80(fpd->fpx, &fs);
-	*wrd1 = (uae_u32)(f.high << 16);
-	*wrd2 = f.low >> 32;
-	*wrd3 = (uae_u32)f.low;
-}
-
-static void to_exten_fmovem(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3)
-{
-	fpd->fpx.high = (uae_u16)(wrd1 >> 16);
-	fpd->fpx.low = ((uae_u64)wrd2 << 32) | wrd3;
-}
-static void from_exten_fmovem(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *wrd3)
- {
-	*wrd1 = (uae_u32)(fpd->fpx.high << 16);
-	*wrd2 = fpd->fpx.low >> 32;
-	*wrd3 = (uae_u32)fpd->fpx.low;
- }
-
-static uae_s64 to_int(fpdata *src, int size)
-{
-	switch (size) {
-		case 0: return floatx80_to_int8(src->fpx, &fs);
-		case 1: return floatx80_to_int16(src->fpx, &fs);
-		case 2: return floatx80_to_int32(src->fpx, &fs);
-		default: return 0;
-	 }
-}
-static void from_int(fpdata *fpd, uae_s32 src)
-{
-	fpd->fpx = int32_to_floatx80(src);
-}
-
-/* Functions for returning exception state data */
-static void fp_get_internal_overflow(fpdata *fpd)
-{
-	fpd->fpx = getFloatInternalOverflow();
-}
-static void fp_get_internal_underflow(fpdata *fpd)
- {
-	fpd->fpx = getFloatInternalUnderflow();
-}
-static void fp_get_internal_round_all(fpdata *fpd)
-{
-	fpd->fpx = getFloatInternalRoundedAll();
-}
-static void fp_get_internal_round(fpdata *fpd)
-{
-	fpd->fpx = getFloatInternalRoundedSome();
-}
-static void fp_get_internal_round_exten(fpdata *fpd)
-{
-	fpd->fpx = getFloatInternalFloatx80();
-}
-static void fp_get_internal(fpdata *fpd)
-{
-	fpd->fpx = getFloatInternalUnrounded();
-}
-static uae_u32 fp_get_internal_grs(void)
-{
-	return (uae_u32)getFloatInternalGRS();
-}
-/* Function for denormalizing */
-static void fp_denormalize(fpdata *fpd, int esign)
-{
-    fpd->fpx = floatx80_denormalize(fpd->fpx, esign);
-}
-
-/* Functions for rounding */
-
-// round to float with extended precision exponent
-static void fp_round32(fpdata *fpd)
-{
-	fpd->fpx = floatx80_round32(fpd->fpx, &fs);
-}
-
-// round to double with extended precision exponent
-static void fp_round64(fpdata *fpd)
-{
-	fpd->fpx = floatx80_round64(fpd->fpx, &fs);
-}
-
-// round to float
-static void fp_round_single(fpdata *fpd)
-{
-	fpd->fpx = floatx80_round_to_float32(fpd->fpx, &fs);
-}
-
-// round to double
-static void fp_round_double(fpdata *fpd)
-{
-	fpd->fpx = floatx80_round_to_float64(fpd->fpx, &fs);
-}
-
-/* Arithmetic functions */
-
-static void fp_int(fpdata *a, fpdata *b)
-{
-	a->fpx = floatx80_round_to_int(b->fpx, &fs);
-}
-
-static void fp_intrz(fpdata *a, fpdata *b)
-{
-	a->fpx = floatx80_round_to_int_toward_zero(b->fpx, &fs);
-}
-
-static void fp_getexp(fpdata *a, fpdata *b)
-{
-	a->fpx = floatx80_getexp(b->fpx, &fs);
-}
-static void fp_getman(fpdata *a, fpdata *b)
-{
-	a->fpx = floatx80_getman(b->fpx, &fs);
-}
-static void fp_mod(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s)
-{
-	a->fpx = floatx80_mod(a->fpx, b->fpx, q, s, &fs);
-}
-static void fp_sgldiv(fpdata *a, fpdata *b)
-{
-	a->fpx = floatx80_sgldiv(a->fpx, b->fpx, &fs);
-}
-static void fp_sglmul(fpdata *a, fpdata *b)
-{
-	a->fpx = floatx80_sglmul(a->fpx, b->fpx, &fs);
-}
-static void fp_rem(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s)
-{
-	a->fpx = floatx80_rem(a->fpx, b->fpx, q, s, &fs);
-}
-static void fp_scale(fpdata *a, fpdata *b)
-{
-	a->fpx = floatx80_scale(a->fpx, b->fpx, &fs);
-}
-static void fp_cmp(fpdata *a, fpdata *b)
-{
-	a->fpx = floatx80_cmp(a->fpx, b->fpx, &fs);
-}
-static void fp_tst(fpdata *a, fpdata *b)
-{
-	a->fpx = floatx80_tst(b->fpx, &fs);
-}
-
-#define SETPREC \
-	uint8_t oldprec = fs.floatx80_rounding_precision; \
-	if (prec > 0) \
-		set_floatx80_rounding_precision(prec, &fs);
-
-#define RESETPREC \
-	if (prec > 0) \
-		set_floatx80_rounding_precision(oldprec, &fs);
-
-
-/* Functions with fixed precision */
-static void fp_move(fpdata *a, fpdata *b, int prec)
-{
-	SETPREC
-	a->fpx = floatx80_move(b->fpx, &fs);
-	RESETPREC
-}
-static void fp_abs(fpdata *a, fpdata *b, int prec)
-{
-	SETPREC
-	a->fpx = floatx80_abs(b->fpx, &fs);
-	RESETPREC
-}
-static void fp_neg(fpdata *a, fpdata *b, int prec)
-{
-	SETPREC
-	a->fpx = floatx80_neg(b->fpx, &fs);
-	RESETPREC
-}
-static void fp_add(fpdata *a, fpdata *b, int prec)
-{
-	SETPREC
-	a->fpx = floatx80_add(a->fpx, b->fpx, &fs);
-	RESETPREC
-}
-static void fp_sub(fpdata *a, fpdata *b, int prec)
-{
-	SETPREC
-	a->fpx = floatx80_sub(a->fpx, b->fpx, &fs);
-	RESETPREC
-}
-static void fp_mul(fpdata *a, fpdata *b, int prec)
-{
-	SETPREC
-	a->fpx = floatx80_mul(a->fpx, b->fpx, &fs);
-	RESETPREC
-}
-static void fp_div(fpdata *a, fpdata *b, int prec)
-{
-	SETPREC
-	a->fpx = floatx80_div(a->fpx, b->fpx, &fs);
-	RESETPREC
-}
-static void fp_sqrt(fpdata *a, fpdata *b, int prec)
-{
-	SETPREC
-	a->fpx = floatx80_sqrt(b->fpx, &fs);
-	RESETPREC
-}
-
-
-static void fp_sinh(fpdata *a, fpdata *b)
-{
-    a->fpx = floatx80_sinh(b->fpx, &fs);
-}
-static void fp_lognp1(fpdata *a, fpdata *b)
-{
-    a->fpx = floatx80_lognp1(b->fpx, &fs);
-}
-static void fp_etoxm1(fpdata *a, fpdata *b)
-{
-    a->fpx = floatx80_etoxm1(b->fpx, &fs);
-}
-static void fp_tanh(fpdata *a, fpdata *b)
-{
-    a->fpx = floatx80_tanh(b->fpx, &fs);
-}
-static void fp_atan(fpdata *a, fpdata *b)
-{
-    a->fpx = floatx80_atan(b->fpx, &fs);
-}
-static void fp_asin(fpdata *a, fpdata *b)
-{
-    a->fpx = floatx80_asin(b->fpx, &fs);
-}
-static void fp_atanh(fpdata *a, fpdata *b)
-{
-    a->fpx = floatx80_atanh(b->fpx, &fs);
-}
-static void fp_sin(fpdata *a, fpdata *b)
-{
-    a->fpx = floatx80_sin(b->fpx, &fs);
-}
-static void fp_tan(fpdata *a, fpdata *b)
-{
-    a->fpx = floatx80_tan(b->fpx, &fs);
-}
-static void fp_etox(fpdata *a, fpdata *b)
-{
-    a->fpx = floatx80_etox(b->fpx, &fs);
-}
-static void fp_twotox(fpdata *a, fpdata *b)
-{
-    a->fpx = floatx80_twotox(b->fpx, &fs);
-}
-static void fp_tentox(fpdata *a, fpdata *b)
-{
-    a->fpx = floatx80_tentox(b->fpx, &fs);
-}
-static void fp_logn(fpdata *a, fpdata *b)
-{
-    a->fpx = floatx80_logn(b->fpx, &fs);
-}
-static void fp_log10(fpdata *a, fpdata *b)
-{
-    a->fpx = floatx80_log10(b->fpx, &fs);
-}
-static void fp_log2(fpdata *a, fpdata *b)
-{
-    a->fpx = floatx80_log2(b->fpx, &fs);
-}
-static void fp_cosh(fpdata *a, fpdata *b)
-{
-    a->fpx = floatx80_cosh(b->fpx, &fs);
-}
-static void fp_acos(fpdata *a, fpdata *b)
-{
-    a->fpx = floatx80_acos(b->fpx, &fs);
-}
-static void fp_cos(fpdata *a, fpdata *b)
-{
-    a->fpx = floatx80_cos(b->fpx, &fs);
-}
-
-/* Functions for converting between float formats */
-static const fptype twoto32 = 4294967296.0;
-
-static void to_native(fptype *fp, fpdata *fpd)
-{
-	int expon;
-	fptype frac;
-	
-	expon = fpd->fpx.high & 0x7fff;
-	
-	if (fp_is_zero(fpd)) {
-		*fp = fp_is_neg(fpd) ? -0.0 : +0.0;
-		return;
-	}
-	if (fp_is_nan(fpd)) {
-		*fp = sqrt(-1);
-		return;
-	}
-	if (fp_is_infinity(fpd)) {
-		double zero = 0.0;
-		*fp = fp_is_neg(fpd) ? log(0.0) : (1.0 / zero);
-		return;
-	}
-	
-	frac = (fptype)fpd->fpx.low / (fptype)(twoto32 * 2147483648.0);
-	if (fp_is_neg(fpd))
-		frac = -frac;
-	*fp = ldexp (frac, expon - 16383);
-}
-
-static void from_native(fptype fp, fpdata *fpd)
-{
-	int expon;
-	fptype frac;
-	
-	if (signbit(fp))
-		fpd->fpx.high = 0x8000;
-	else
-		fpd->fpx.high = 0x0000;
-	
-	if (isnan(fp)) {
-		fpd->fpx.high |= 0x7fff;
-		fpd->fpx.low = LIT64(0xffffffffffffffff);
-		return;
-	}
-	if (isinf(fp)) {
-		fpd->fpx.high |= 0x7fff;
-		fpd->fpx.low = LIT64(0x0000000000000000);
-		return;
-	}
-	if (fp == 0.0) {
-		fpd->fpx.low = LIT64(0x0000000000000000);
-		return;
-	}
-	if (fp < 0.0)
-		fp = -fp;
-	
-	 frac = frexp (fp, &expon);
-	frac += 0.5 / (twoto32 * twoto32);
-	if (frac >= 1.0) {
-		frac /= 2.0;
-		expon++;
-	}
-	fpd->fpx.high |= (expon + 16383 - 1) & 0x7fff;
-	fpd->fpx.low = (uint64_t)(frac * (fptype)(twoto32 * twoto32));
-	
-	while (!(fpd->fpx.low & LIT64( 0x8000000000000000))) {
-		if (fpd->fpx.high == 0) {
-			break;
-		}
-		fpd->fpx.low <<= 1;
-		fpd->fpx.high--;
-	}
-}
-
-static void fp_normalize(fpdata *a)
-{
-	a->fpx = floatx80_normalize(a->fpx);
-}
-
-static void fp_to_pack(fpdata *fp, uae_u32 *wrd, int dummy)
-{
-	floatx80 f;
-	int i;
-	uae_s32 exp;
-	uae_s64 mant;
-	uae_u32 pack_exp, pack_int, pack_se, pack_sm;
-	uae_u64 pack_frac;
-
-	if (((wrd[0] >> 16) & 0x7fff) == 0x7fff) {
-		// infinity has extended exponent and all 0 packed fraction
-		// nans are copies bit by bit
-		fpp_to_exten(fp, wrd[0], wrd[1], wrd[2]);
-		return;
-	}
-	if (!(wrd[0] & 0xf) && !wrd[1] && !wrd[2]) {
-		// exponent is not cared about, if mantissa is zero
-		wrd[0] &= 0x80000000;
-		fpp_to_exten(fp, wrd[0], wrd[1], wrd[2]);
-		return;
-	}
-
-	pack_exp = (wrd[0] >> 16) & 0xFFF;              // packed exponent
-	pack_int = wrd[0] & 0xF;                        // packed integer part
-	pack_frac = ((uae_u64)wrd[1] << 32) | wrd[2];   // packed fraction
-	pack_se = (wrd[0] >> 30) & 1;                   // sign of packed exponent
-	pack_sm = (wrd[0] >> 31) & 1;                   // sign of packed significand
-	exp = 0;
-	
-	for (i = 0; i < 3; i++) {
-		exp *= 10;
-		exp += (pack_exp >> (8 - i * 4)) & 0xF;
-	}
-	
-	if (pack_se) {
-		exp = -exp;
-	}
-
-	exp -= 16;
-	
-	if (exp < 0) {
-		exp = -exp;
-		pack_se = 1;
-	}
-	
-	mant = pack_int;
-
-	for (i = 0; i < 16; i++) {
-		mant *= 10;
-		mant += (pack_frac >> (60 - i * 4)) & 0xF;
-	}
-
-	f.high = exp & 0x3FFF;
-	f.high |= pack_se ? 0x4000 : 0;
-	f.high |= pack_sm ? 0x8000 : 0;
-	f.low = mant;
-	
-	fp->fpx = floatdecimal_to_floatx80(f, &fs);
-}
-
-
-static void fp_from_pack(fpdata *fp, uae_u32 *wrd, int kfactor)
-{
-	floatx80 f = floatx80_to_floatdecimal(fp->fpx, &kfactor, &fs);
-	
-	uae_u32 pack_exp, pack_exp4, pack_int, pack_se, pack_sm;
-	uae_u64 pack_frac;    
-
-	uae_u32 exponent;
-	uae_u64 significand;
-
-	uae_s32 len;
-	uae_u64 digit;
- 
-	if ((f.high & 0x7FFF) == 0x7FFF) {
-		wrd[0] = (uae_u32)(f.high << 16);
-		wrd[1] = f.low >> 32;
-		wrd[2] = (uae_u32)f.low;
-	} else {
-		exponent = f.high & 0x3FFF;
-		significand = f.low;
-		
-		pack_int = 0;
-		pack_frac = 0;
-		len = kfactor; // SoftFloat saved len to kfactor variable
-		while (len > 0) {
-			len--;
-			digit = significand % 10;
-			significand /= 10;
-			if (len == 0) {
-				pack_int = digit;
-			} else {
-				pack_frac |= digit << (64 - len * 4);
-			}
-		}
-
-		pack_exp = 0;
-		pack_exp4 = 0;
-		len = 4;
-		while (len > 0) {
-			len--;
-			digit = exponent % 10;
-			exponent /= 10;
-			if (len == 0) {
-				pack_exp4 = digit;
-			} else {
-				pack_exp |= digit << (12 - len * 4);
-			}
-		}
-		
-		pack_se = f.high & 0x4000;
-		pack_sm = f.high & 0x8000;
-		
-		wrd[0] = pack_exp << 16;
-		wrd[0] |= pack_exp4 << 12;
-		wrd[0] |= pack_int;
-		wrd[0] |= pack_se ? 0x40000000 : 0;
-		wrd[0] |= pack_sm ? 0x80000000 : 0;
-		
-		wrd[1] = pack_frac >> 32;
-		wrd[2] = pack_frac & 0xffffffff;
-	}
-}
-
-void fp_init_softfloat(void)
-{
-	float_status fsx = { 0 };
-	set_floatx80_rounding_precision(80, &fsx);
-	set_float_rounding_mode(float_round_to_zero, &fsx);
-
-	fpp_print = fp_print;
-	fpp_is_snan = fp_is_snan;
-	fpp_unset_snan = fp_unset_snan;
-	fpp_is_nan = fp_is_nan;
-	fpp_is_infinity = fp_is_infinity;
-	fpp_is_zero = fp_is_zero;
-	fpp_is_neg = fp_is_neg;
-	fpp_is_denormal = fp_is_denormal;
-	fpp_is_unnormal = fp_is_unnormal;
-
-	fpp_get_status = fp_get_status;
-	fpp_clear_status = fp_clear_status;
-	fpp_set_mode = fp_set_mode;
-
-	fpp_from_native = from_native;
-	fpp_to_native = to_native;
-
-	fpp_to_int = to_int;
-	fpp_from_int = from_int;
-
-	fpp_to_pack = fp_to_pack;
-	fpp_from_pack = fp_from_pack;
-
-	fpp_to_single = to_single;
-	fpp_from_single = from_single;
-	fpp_to_double = to_double;
-	fpp_from_double = from_double;
-	fpp_to_exten = to_exten;
-	fpp_from_exten = from_exten;
-	fpp_to_exten_fmovem = to_exten_fmovem;
-	fpp_from_exten_fmovem = from_exten_fmovem;
-
-	fpp_round_single = fp_round_single;
-	fpp_round_double = fp_round_double;
-	fpp_round32 = fp_round32;
-	fpp_round64 = fp_round64;
-
-	fpp_normalize = fp_normalize;
-	fpp_denormalize = fp_denormalize;
-	fpp_get_internal_overflow = fp_get_internal_overflow;
-	fpp_get_internal_underflow = fp_get_internal_underflow;
-	fpp_get_internal_round_all = fp_get_internal_round_all;
-	fpp_get_internal_round = fp_get_internal_round;
-	fpp_get_internal_round_exten = fp_get_internal_round_exten;
-	fpp_get_internal = fp_get_internal;
-	fpp_get_internal_grs = fp_get_internal_grs;
-
-	fpp_int = fp_int;
-	fpp_sinh = fp_sinh;
-	fpp_intrz = fp_intrz;
-	fpp_sqrt = fp_sqrt;
-	fpp_lognp1 = fp_lognp1;
-	fpp_etoxm1 = fp_etoxm1;
-	fpp_tanh = fp_tanh;
-	fpp_atan = fp_atan;
-	fpp_atanh = fp_atanh;
-	fpp_sin = fp_sin;
-	fpp_asin = fp_asin;
-	fpp_tan = fp_tan;
-	fpp_etox = fp_etox;
-	fpp_twotox = fp_twotox;
-	fpp_tentox = fp_tentox;
-	fpp_logn = fp_logn;
-	fpp_log10 = fp_log10;
-	fpp_log2 = fp_log2;
-	fpp_abs = fp_abs;
-	fpp_cosh = fp_cosh;
-	fpp_neg = fp_neg;
-	fpp_acos = fp_acos;
-	fpp_cos = fp_cos;
-	fpp_getexp = fp_getexp;
-	fpp_getman = fp_getman;
-	fpp_div = fp_div;
-	fpp_mod = fp_mod;
-	fpp_add = fp_add;
-	fpp_mul = fp_mul;
-	fpp_rem = fp_rem;
-	fpp_scale = fp_scale;
-	fpp_sub = fp_sub;
-	fpp_sgldiv = fp_sgldiv;
-	fpp_sglmul = fp_sglmul;
-	fpp_cmp = fp_cmp;
-	fpp_tst = fp_tst;
-	fpp_move = fp_move;
-}
-
--- a/src/include/akiko.h
+++ b/src/include/akiko.h
@ -4,14 +4,14 @@
 #define AKIKO_BASE 0xb80000
 #define AKIKO_BASE_END 0xb80100 /* ?? */

-extern void akiko_reset(void);
-extern int akiko_init(void);
-extern void akiko_free(void);
+extern void akiko_reset (void);
+extern int akiko_init (void);
+extern void akiko_free (void);

-extern void AKIKO_hsync_handler(void);
-extern void akiko_mute(int);
+extern void AKIKO_hsync_handler (void);
+extern void akiko_mute (int);
 extern bool akiko_ntscmode(void);

-extern void rethink_akiko(void);
+extern void rethink_akiko (void);

 #endif /* UAE_AKIKO_H */
--- a/src/include/drawing.h
+++ b/src/include/drawing.h
@ -94,44 +94,38 @@ struct color_entry {
 };

 /* convert 24 bit AGA Amiga RGB to native color */
-// Disabled because it only works for 16-bit modes (wrong colors on AGA modes if running 32-bit)
-//#ifdef ARMV6T2
-//STATIC_INLINE uae_u32 CONVERT_RGB(uae_u32 c)
-//{
-//  uae_u32 ret;
-//  __asm__ (
-//			"ubfx    r1, %[c], #19, #5 \n\t"
-//			"ubfx    r2, %[c], #10, #6 \n\t"
-//			"ubfx    %[v], %[c], #3, #5 \n\t"
-//			"orr     %[v], %[v], r1, lsl #11 \n\t"
-//			"orr     %[v], %[v], r2, lsl #5 \n\t"
-//			"pkhbt   %[v], %[v], %[v], lsl #16 \n\t"
-//           : [v] "=r" (ret) : [c] "r" (c) : "r1", "r2" );
-//  return ret;
-//}
-//STATIC_INLINE uae_u16 CONVERT_RGB_16(uae_u32 c)
-//{
-//  uae_u16 ret;
-//  __asm__ (
-//			"ubfx    r1, %[c], #19, #5 \n\t"
-//			"ubfx    r2, %[c], #10, #6 \n\t"
-//			"ubfx    %[v], %[c], #3, #5 \n\t"
-//			"orr     %[v], %[v], r1, lsl #11 \n\t"
-//			"orr     %[v], %[v], r2, lsl #5 \n\t"
-//           : [v] "=r" (ret) : [c] "r" (c) : "r1", "r2" );
-//  return ret;
-//}
-//#else
-#ifdef WORDS_BIGENDIAN
-# define CONVERT_RGB(c) \
-	( xbluecolors[((uae_u8*)(&c))[3]] | xgreencolors[((uae_u8*)(&c))[2]] | xredcolors[((uae_u8*)(&c))[1]] )
+#ifdef ARMV6T2
+STATIC_INLINE uae_u32 CONVERT_RGB(uae_u32 c)
+{
+  uae_u32 ret;
+  __asm__ (
+			"ubfx    r1, %[c], #19, #5 \n\t"
+			"ubfx    r2, %[c], #10, #6 \n\t"
+			"ubfx    %[v], %[c], #3, #5 \n\t"
+			"orr     %[v], %[v], r1, lsl #11 \n\t"
+			"orr     %[v], %[v], r2, lsl #5 \n\t"
+			"pkhbt   %[v], %[v], %[v], lsl #16 \n\t"
+           : [v] "=r" (ret) : [c] "r" (c) : "r1", "r2" );
+  return ret;
+}
+STATIC_INLINE uae_u16 CONVERT_RGB_16(uae_u32 c)
+{
+  uae_u16 ret;
+  __asm__ (
+			"ubfx    r1, %[c], #19, #5 \n\t"
+			"ubfx    r2, %[c], #10, #6 \n\t"
+			"ubfx    %[v], %[c], #3, #5 \n\t"
+			"orr     %[v], %[v], r1, lsl #11 \n\t"
+			"orr     %[v], %[v], r2, lsl #5 \n\t"
+           : [v] "=r" (ret) : [c] "r" (c) : "r1", "r2" );
+  return ret;
+}
 #else
-# define CONVERT_RGB(c) \
-	( xbluecolors[((uae_u8*)(&c))[0]] | xgreencolors[((uae_u8*)(&c))[1]] | xredcolors[((uae_u8*)(&c))[2]] )
+#define CONVERT_RGB(c) \
+    ( xbluecolors[((uae_u8*)(&c))[0]] | xgreencolors[((uae_u8*)(&c))[1]] | xredcolors[((uae_u8*)(&c))[2]] )
 #define CONVERT_RGB_16(c) \
    ( xbluecolors[((uae_u8*)(&c))[0]] | xgreencolors[((uae_u8*)(&c))[1]] | xredcolors[((uae_u8*)(&c))[2]] )
 #endif
-//#endif

 STATIC_INLINE xcolnr getxcolor (int c)
 {
@ -213,8 +207,9 @@ extern uae_u16 spixels[MAX_SPR_PIXELS * 2];

 /* Way too much... */
 #define MAX_REG_CHANGE ((MAXVPOS + 1) * MAXHPOS)
+#define COLOR_TABLE_SIZE (MAXVPOS + 2) * 2

-extern struct color_entry curr_color_tables[(MAXVPOS + 2) * 2];
+extern struct color_entry curr_color_tables[COLOR_TABLE_SIZE];

 extern struct sprite_entry *curr_sprite_entries;
 extern struct color_change *curr_color_changes;
@ -229,19 +224,14 @@ struct decision {
 	int diwfirstword, diwlastword;
 	int ctable;

-	uae_u16 bplcon0, bplcon2;
-#ifdef AGA
-	uae_u16 bplcon3, bplcon4;
-#endif
-	uae_u8 nr_planes;
-	uae_u8 bplres;
-	bool ehb_seen;
-	bool ham_seen;
-	bool ham_at_start;
-#ifdef AGA
+  uae_u16 bplcon0, bplcon2;
+  uae_u16 bplcon3, bplcon4;
+  uae_u8 nr_planes;
+  uae_u8 bplres;
+  bool ham_seen;
+  bool ham_at_start;
 	bool bordersprite_seen;
 	bool xor_seen;
-#endif
 };

 /* Anything related to changes in hw registers during the DDF for one
--- a/src/include/flags_arm.h
+++ b/src/include/flags_arm.h
@ -32,21 +32,36 @@

 /* Native integer code conditions */
 enum {
-      	NATIVE_CC_EQ = 0,
-        NATIVE_CC_NE = 1,
-        NATIVE_CC_CS = 2,
-        NATIVE_CC_CC = 3,
-        NATIVE_CC_MI = 4,
-        NATIVE_CC_PL = 5,
-        NATIVE_CC_VS = 6,
-        NATIVE_CC_VC = 7,
-        NATIVE_CC_HI = 8,
-        NATIVE_CC_LS = 9,
-        NATIVE_CC_GE = 10,
-        NATIVE_CC_LT = 11,
-        NATIVE_CC_GT = 12,
-        NATIVE_CC_LE = 13,
-        NATIVE_CC_AL = 14
+	NATIVE_CC_EQ = 0,
+  NATIVE_CC_NE = 1,
+  NATIVE_CC_CS = 2,
+  NATIVE_CC_CC = 3,
+  NATIVE_CC_MI = 4,
+  NATIVE_CC_PL = 5,
+  NATIVE_CC_VS = 6,
+  NATIVE_CC_VC = 7,
+  NATIVE_CC_HI = 8,
+  NATIVE_CC_LS = 9,
+  NATIVE_CC_GE = 10,
+  NATIVE_CC_LT = 11,
+  NATIVE_CC_GT = 12,
+  NATIVE_CC_LE = 13,
+  NATIVE_CC_AL = 14,
+  
+  // For FBcc, we need some pseudo condition codes
+  NATIVE_CC_F_OGT = 16 + 2,
+  NATIVE_CC_F_OGE = 16 + 3,
+  NATIVE_CC_F_OLT = 16 + 4,
+  NATIVE_CC_F_OLE = 16 + 5,
+  NATIVE_CC_F_OGL = 16 + 6,
+  NATIVE_CC_F_OR  = 16 + 7,
+  NATIVE_CC_F_UN  = 16 + 8,
+  NATIVE_CC_F_UEQ = 16 + 9,
+  NATIVE_CC_F_UGT = 16 + 10,
+  NATIVE_CC_F_UGE = 16 + 11,
+  NATIVE_CC_F_ULT = 16 + 12,
+  NATIVE_CC_F_ULE = 16 + 13
+  
 };

 #endif /* NATIVE_FLAGS_ARM_H */
--- a/src/include/fpp.h
+++ b/src/include/fpp.h
@ -7,6 +7,7 @@
 /* E = MAX & F # 0 -> NotANumber */
 /* E = biased by 127 (single) ,1023 (double) ,16383 (extended) */

+#pragma once
 #define FPSR_BSUN       0x00008000
 #define FPSR_SNAN       0x00004000
 #define FPSR_OPERR      0x00002000
@ -16,129 +17,8 @@
 #define FPSR_INEX2      0x00000200
 #define FPSR_INEX1      0x00000100

-extern void fp_init_native(void);
-extern void fp_init_softfloat(void);
-extern void fpsr_set_exception(uae_u32 exception);
-extern void fpu_modechange(void);
-
 #if defined(CPU_i386) || defined(CPU_x86_64)
 extern void init_fpucw_x87(void);
 #endif

-typedef void (*FPP_ABQS)(fpdata*, fpdata*, uae_u64*, uae_u8*);
-typedef void (*FPP_AB)(fpdata*, fpdata*);
-typedef void (*FPP_ABP)(fpdata*, fpdata*, int);
-typedef void (*FPP_A)(fpdata*);
-
-typedef bool (*FPP_IS)(fpdata*);
-typedef void (*FPP_SET_MODE)(uae_u32);
-typedef void (*FPP_GET_STATUS)(uae_u32*);
-typedef void (*FPP_CLEAR_STATUS)(void);
-
-typedef void (*FPP_FROM_NATIVE)(fptype, fpdata*);
-typedef void (*FPP_TO_NATIVE)(fptype*, fpdata*);
-
-typedef void (*FPP_FROM_INT)(fpdata*,uae_s32);
-typedef uae_s64 (*FPP_TO_INT)(fpdata*, int);
-
-typedef void (*FPP_TO_SINGLE)(fpdata*, uae_u32);
-typedef uae_u32 (*FPP_FROM_SINGLE)(fpdata*);
-
-typedef void (*FPP_TO_DOUBLE)(fpdata*, uae_u32, uae_u32);
-typedef void (*FPP_FROM_DOUBLE)(fpdata*, uae_u32*, uae_u32*);
-
-typedef void (*FPP_TO_EXTEN)(fpdata*, uae_u32, uae_u32, uae_u32);
-typedef void (*FPP_FROM_EXTEN)(fpdata*, uae_u32*, uae_u32*, uae_u32*);
-
-typedef void (*FPP_PACK)(fpdata*, uae_u32*, int);
-
-typedef const TCHAR* (*FPP_PRINT)(fpdata*,int);
-typedef uae_u32 (*FPP_GET32)(void);
-
-typedef void (*FPP_DENORMALIZE)(fpdata*,int);
-
-extern FPP_PRINT fpp_print;
-
-extern FPP_IS fpp_is_snan;
-extern FPP_IS fpp_unset_snan;
-extern FPP_IS fpp_is_nan;
-extern FPP_IS fpp_is_infinity;
-extern FPP_IS fpp_is_zero;
-extern FPP_IS fpp_is_neg;
-extern FPP_IS fpp_is_denormal;
-extern FPP_IS fpp_is_unnormal;
-
-extern FPP_GET_STATUS fpp_get_status;
-extern FPP_CLEAR_STATUS fpp_clear_status;
-extern FPP_SET_MODE fpp_set_mode;
-
-extern FPP_FROM_NATIVE fpp_from_native;
-extern FPP_TO_NATIVE fpp_to_native;
-
-extern FPP_TO_INT fpp_to_int;
-extern FPP_FROM_INT fpp_from_int;
-
-extern FPP_PACK fpp_to_pack;
-extern FPP_PACK fpp_from_pack;
-
-extern FPP_TO_SINGLE fpp_to_single;
-extern FPP_FROM_SINGLE fpp_from_single;
-extern FPP_TO_DOUBLE fpp_to_double;
-extern FPP_FROM_DOUBLE fpp_from_double;
-extern FPP_TO_EXTEN fpp_to_exten;
-extern FPP_FROM_EXTEN fpp_from_exten;
-extern FPP_TO_EXTEN fpp_to_exten_fmovem;
-extern FPP_FROM_EXTEN fpp_from_exten_fmovem;
-
-extern FPP_A fpp_round_single;
-extern FPP_A fpp_round_double;
-extern FPP_A fpp_round32;
-extern FPP_A fpp_round64;
-
-extern FPP_A fpp_normalize;
-extern FPP_DENORMALIZE fpp_denormalize;
-extern FPP_A fpp_get_internal_overflow;
-extern FPP_A fpp_get_internal_underflow;
-extern FPP_A fpp_get_internal_round_all;
-extern FPP_A fpp_get_internal_round;
-extern FPP_A fpp_get_internal_round_exten;
-extern FPP_A fpp_get_internal;
-extern FPP_GET32 fpp_get_internal_grs;
-
-extern FPP_AB fpp_int;
-extern FPP_AB fpp_sinh;
-extern FPP_AB fpp_intrz;
-extern FPP_ABP fpp_sqrt;
-extern FPP_AB fpp_lognp1;
-extern FPP_AB fpp_etoxm1;
-extern FPP_AB fpp_tanh;
-extern FPP_AB fpp_atan;
-extern FPP_AB fpp_atanh;
-extern FPP_AB fpp_sin;
-extern FPP_AB fpp_asin;
-extern FPP_AB fpp_tan;
-extern FPP_AB fpp_etox;
-extern FPP_AB fpp_twotox;
-extern FPP_AB fpp_tentox;
-extern FPP_AB fpp_logn;
-extern FPP_AB fpp_log10;
-extern FPP_AB fpp_log2;
-extern FPP_ABP fpp_abs;
-extern FPP_AB fpp_cosh;
-extern FPP_ABP fpp_neg;
-extern FPP_AB fpp_acos;
-extern FPP_AB fpp_cos;
-extern FPP_AB fpp_getexp;
-extern FPP_AB fpp_getman;
-extern FPP_ABP fpp_div;
-extern FPP_ABQS fpp_mod;
-extern FPP_ABP fpp_add;
-extern FPP_ABP fpp_mul;
-extern FPP_ABQS fpp_rem;
-extern FPP_AB fpp_scale;
-extern FPP_ABP fpp_sub;
-extern FPP_AB fpp_sgldiv;
-extern FPP_AB fpp_sglmul;
-extern FPP_AB fpp_cmp;
-extern FPP_AB fpp_tst;
-extern FPP_ABP fpp_move;
+extern void fpsr_set_exception(uae_u32 exception);
--- a/src/include/native2amiga_api.h
+++ b/src/include/native2amiga_api.h
@ -18,7 +18,7 @@ void uae_PutMsg(uaecptr port, uaecptr msg);
 void uae_Signal(uaecptr task, uae_u32 mask);
 void uae_NotificationHack(uaecptr, uaecptr);
 #endif
-int native2amiga_isfree (void);
+int native2amiga_isfree(void);
 void uae_nativesem_wait(void);
 void uae_nativesem_post(void);

--- a/src/include/newcpu.h
+++ b/src/include/newcpu.h
@ -12,7 +12,6 @@
 #include "uae/types.h"
 #include "readcpu.h"
 #include "machdep/m68k.h"
-#include <softfloat/softfloat.h>

 extern const int areg_byteinc[];
 extern const int imm8_table[];
@ -68,7 +67,6 @@ typedef double fptype;

 typedef struct
 {
-	floatx80 fpx;
 	fptype fp;
 } fpdata;

@ -101,6 +99,9 @@ struct regstruct

 #ifdef FPUEMU
 	fpdata fp[8];
+#ifdef JIT
+	fpdata fp_result;
+#endif
  uae_u32 fpcr,fpsr, fpiar;
 	uae_u32 fpu_state;
 	uae_u32 fpu_exp_state;
@ -341,9 +342,7 @@ extern void fpuop_trapcc(uae_u32, uaecptr, uae_u16);
 extern void fpuop_bcc(uae_u32, uaecptr, uae_u32);
 extern void fpuop_save(uae_u32);
 extern void fpuop_restore(uae_u32);
-extern uae_u32 fpp_get_fpsr (void);
 extern void fpu_reset (void);
-extern bool fpu_get_constant(fpdata *fp, int cr);
 extern int fpp_cond(int condition);

 extern void exception3_read(uae_u32 opcode, uaecptr addr);
--- a/src/include/options.h
+++ b/src/include/options.h
@ -310,9 +310,9 @@ struct uae_prefs {
 	int sound_volume_paula;
 	int sound_volume_cd;

+	bool compfpu;
 	int cachesize;
 	bool fpu_strict;
-	bool fpu_softfloat;

 	int gfx_framerate;
 	struct wh gfx_size;
--- a/src/include/sysdeps.h
+++ b/src/include/sysdeps.h
@ -408,17 +408,18 @@ extern void gui_message(const TCHAR *, ...);
 */

 #ifdef ARMV6_ASSEMBLY
+
 STATIC_INLINE uae_u32 do_byteswap_32(uae_u32 v) {
-	__asm__(
-			"rev %0, %0"
-            : "=r" (v) : "0" (v)); return v;
+  __asm__ (
+		"rev %0, %0"
+    : "=r" (v) : "0" (v) ); return v;
 }

 STATIC_INLINE uae_u32 do_byteswap_16(uae_u32 v) {
-	__asm__(
-  			"revsh %0, %0\n\t"
-            "uxth %0, %0"
-            : "=r" (v) : "0" (v)); return v;
+  __asm__ (
+    "revsh %0, %0\n\t"
+    "uxth %0, %0"
+    : "=r" (v) : "0" (v) ); return v;
 }
 #define bswap_16(x) do_byteswap_16(x)
 #define bswap_32(x) do_byteswap_32(x)
--- a/src/jit/codegen_arm.cpp
+++ b/src/jit/codegen_arm.cpp
@ -333,6 +333,24 @@ LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, RR4 s))
 }
 LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, RR4 s))

+LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
+{
+  if(s >= (uae_u32) &regs && s < ((uae_u32) &regs) + sizeof(struct regstruct)) {
+    uae_s32 idx = s - (uae_u32) & regs;
+    LDR_rRI(d, R_REGSTRUCT, idx);
+  } else {
+#ifdef ARMV6T2
+    MOVW_ri16(REG_WORK1, s);
+    MOVT_ri16(REG_WORK1, s >> 16);
+#else
+    uae_s32 offs = data_long_offs(s);
+	  LDR_rRI(REG_WORK1, RPC_INDEX, offs);
+#endif
+	  LDR_rR(d, REG_WORK1);
+  }
+}
+LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
+
 LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, RR2 s))
 {
  PKHBT_rrr(d, s, d);
@ -465,11 +483,6 @@ STATIC_INLINE void raw_emit_nop_filler(int nbytes)
 	while(nbytes--) { NOP(); }
 }

-STATIC_INLINE void raw_emit_nop(void)
-{
-  NOP();
-}
-
 //
 // Arm instructions
 //
@ -611,7 +624,7 @@ LOWFUNC(NONE,NONE,3,compemu_raw_lea_l_brr,(W4 d, RR4 s, IMM offset))
 }
 LENDFUNC(NONE,NONE,3,compemu_raw_lea_l_brr,(W4 d, RR4 s, IMM offset))

-LOWFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(IMM d, RR1 s))
+LOWFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(MEMW d, RR1 s))
 {
  if(d >= (uae_u32) &regs && d < ((uae_u32) &regs) + sizeof(struct regstruct)) {
    uae_s32 idx = d - (uae_u32) & regs;
@ -627,7 +640,7 @@ LOWFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(IMM d, RR1 s))
 	  STRB_rR(s, REG_WORK1);
  }
 }
-LENDFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(IMM d, RR1 s))
+LENDFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(MEMW d, RR1 s))

 LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mi,(MEMW d, IMM s))
 {
@ -664,7 +677,7 @@ LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mi,(MEMW d, IMM s))
 }
 LENDFUNC(NONE,WRITE,2,compemu_raw_mov_l_mi,(MEMW d, IMM s))

-LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(IMM d, RR4 s))
+LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(MEMW d, RR4 s))
 {
  if(d >= (uae_u32) &regs && d < ((uae_u32) &regs) + sizeof(struct regstruct)) {
    uae_s32 idx = d - (uae_u32) & regs;
@ -680,7 +693,7 @@ LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(IMM d, RR4 s))
 	  STR_rR(s, REG_WORK1);
  }
 }
-LENDFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(IMM d, RR4 s))
+LENDFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(MEMW d, RR4 s))

 LOWFUNC(NONE,NONE,2,compemu_raw_mov_l_ri,(W4 d, IMM s))
 {
@ -831,26 +844,101 @@ STATIC_INLINE void compemu_raw_call_r(RR4 r)
 STATIC_INLINE void compemu_raw_jcc_l_oponly(int cc)
 {
 	switch (cc) {
-	case 9: // LS
-		BEQ_i(0);										// beq <dojmp>
-		BCC_i(1);										// bcc <jp>
+		case NATIVE_CC_HI: // HI
+			BEQ_i(2);										// beq no jump
+			BCS_i(1);										// bcs no jump
+		  // jump
+			LDR_rRI(RPC_INDEX, RPC_INDEX, -4); 
+			// no jump
+			break;

-		//<dojmp>:
-		LDR_rRI(RPC_INDEX, RPC_INDEX, -4); 	// ldr	pc, [pc]	; <value>
-		break;
+		case NATIVE_CC_LS: // LS
+			BEQ_i(0);										// beq jump
+			BCC_i(1);										// bcc no jump
+			// jump
+			LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
+			// no jump
+			break;

-	case 8: // HI
-		BEQ_i(2);										// beq <jp>
-		BCS_i(1);										// bcs <jp>
+		case NATIVE_CC_F_OGT: // Jump if valid and greater than
+			BVS_i(2);		// do not jump if NaN
+			BLE_i(1);		// do not jump if less or equal
+			LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
+			break;

-		//<dojmp>:
-		LDR_rRI(RPC_INDEX, RPC_INDEX, -4);  	// ldr	pc, [pc]	; <value>
-		break;
+		case NATIVE_CC_F_OGE: // Jump if valid and greater or equal
+			BVS_i(2);		// do not jump if NaN
+			BCC_i(1);		// do not jump if carry cleared
+			LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
+			break;
+			
+		case NATIVE_CC_F_OLT: // Jump if vaild and less than
+			BVS_i(2);		// do not jump if NaN
+			BCS_i(1);		// do not jump if carry set
+			LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
+			break;
+			
+		case NATIVE_CC_F_OLE: // Jump if valid and less or equal
+			BVS_i(2);		// do not jump if NaN
+			BGT_i(1);		// do not jump if greater than
+			LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
+			break;
+			
+		case NATIVE_CC_F_OGL: // Jump if valid and greator or less
+			BVS_i(2);		// do not jump if NaN
+			BEQ_i(1);		// do not jump if equal
+			LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
+			break;

-	default:
-    CC_B_i(cc^1, 1);
-    LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
-		break;
+		case NATIVE_CC_F_OR: // Jump if valid
+			BVS_i(1); 	// do not jump if NaN
+			LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
+			break;
+			
+		case NATIVE_CC_F_UN: // Jump if NAN
+			BVC_i(1); 	// do not jump if valid
+			LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
+			break;
+
+		case NATIVE_CC_F_UEQ: // Jump if NAN or equal
+			BVS_i(0); 	// jump if NaN
+			BNE_i(1);		// do not jump if greater or less
+			// jump
+			LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
+			break;
+
+		case NATIVE_CC_F_UGT: // Jump if NAN or greater than
+			BVS_i(0); 	// jump if NaN
+			BLS_i(1);		// do not jump if lower or same
+			// jump
+			LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
+			break;
+
+		case NATIVE_CC_F_UGE: // Jump if NAN or greater or equal
+			BVS_i(0); 	// jump if NaN
+			BMI_i(1);		// do not jump if lower
+			// jump
+			LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
+			break;
+
+		case NATIVE_CC_F_ULT: // Jump if NAN or less than
+			BVS_i(0); 	// jump if NaN
+			BGE_i(1);		// do not jump if greater or equal
+			// jump
+			LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
+			break;
+
+		case NATIVE_CC_F_ULE: // Jump if NAN or less or equal
+			BVS_i(0); 	// jump if NaN
+			BGT_i(1);		// do not jump if greater
+			// jump
+			LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
+			break;
+	
+		default:
+	    CC_B_i(cc^1, 1);
+      LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
+			break;
 	}
  // emit of target will be done by caller
 }
@ -889,11 +977,6 @@ STATIC_INLINE void compemu_raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
 	emit_long(base);
 }

-STATIC_INLINE void compemu_raw_jmp_r(RR4 r)
-{
-	BX_r(r);
-}
-
 STATIC_INLINE void compemu_raw_jnz(uae_u32 t)
 {
 #ifdef ARMV6T2
@ -1009,3 +1092,317 @@ LOWFUNC(NONE,NONE,2,compemu_raw_endblock_pc_isconst,(IMM cycles, IMM v))
  // <target emitted by caller>
 }
 LENDFUNC(NONE,NONE,2,compemu_raw_endblock_pc_isconst,(IMM cycles, IMM v))
+
+
+/*************************************************************************
+* FPU stuff                                                             *
+*************************************************************************/
+
+LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
+{
+	VMOV64_rr(d, s);
+}
+LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
+
+LOWFUNC(NONE,WRITE,2,compemu_raw_fmov_mr_drop,(MEMW mem, FR s))
+{
+  if(mem >= (uae_u32) &regs && mem < (uae_u32) &regs + 1020 && ((mem - (uae_u32) &regs) & 0x3) == 0) {
+    VSTR64(s, R_REGSTRUCT, (mem - (uae_u32) &regs));
+  } else {
+    MOVW_ri16(REG_WORK1, mem);
+    MOVT_ri16(REG_WORK1, mem >> 16);
+    VSTR64(s, REG_WORK1, 0);
+  }
+}
+LENDFUNC(NONE,WRITE,2,compemu_raw_fmov_mr_drop,(MEMW mem, FR s))
+
+
+LOWFUNC(NONE,READ,2,compemu_raw_fmov_rm,(FW d, MEMR mem))
+{
+  if(mem >= (uae_u32) &regs && mem < (uae_u32) &regs + 1020 && ((mem - (uae_u32) &regs) & 0x3) == 0) {
+    VLDR64(d, R_REGSTRUCT, (mem - (uae_u32) &regs));
+  } else {
+    MOVW_ri16(REG_WORK1, mem);
+    MOVT_ri16(REG_WORK1, mem >> 16);
+    VLDR64(d, REG_WORK1, 0);
+  }
+}
+LENDFUNC(NONE,READ,2,compemu_raw_fmov_rm,(FW d, MEMW mem))
+
+LOWFUNC(NONE,NONE,2,raw_fmov_l_rr,(FW d, RR4 s))
+{
+  VMOVi_from_ARM(SCRATCH_F64_1, s);
+  VCVT_64_from_i(d, SCRATCH_F32_1);
+}
+LENDFUNC(NONE,NONE,2,raw_fmov_l_rr,(FW d, RR4 s))
+
+LOWFUNC(NONE,NONE,2,raw_fmov_s_rr,(FW d, RR4 s))
+{
+  VMOV32_from_ARM(SCRATCH_F32_1, s);
+  VCVT_32_to_64(d, SCRATCH_F32_1);
+}
+LENDFUNC(NONE,NONE,2,raw_fmov_s_rr,(FW d, RR4 s))
+
+LOWFUNC(NONE,NONE,2,raw_fmov_w_rr,(FW d, RR2 s))
+{
+  SIGN_EXTEND_16_REG_2_REG(REG_WORK1, s);
+  VMOVi_from_ARM(SCRATCH_F64_1, REG_WORK1);
+  VCVT_64_from_i(d, SCRATCH_F32_1);
+}
+LENDFUNC(NONE,NONE,2,raw_fmov_w_rr,(FW d, RR2 s))
+
+LOWFUNC(NONE,NONE,2,raw_fmov_b_rr,(FW d, RR1 s))
+{
+  SIGN_EXTEND_8_REG_2_REG(REG_WORK1, s);
+  VMOVi_from_ARM(SCRATCH_F64_1, REG_WORK1);
+  VCVT_64_from_i(d, SCRATCH_F32_1);
+}
+LENDFUNC(NONE,NONE,2,raw_fmov_b_rr,(FW d, RR1 s))
+
+LOWFUNC(NONE,NONE,2,raw_fmov_d_rrr,(FW d, RR4 s1, RR4 s2))
+{
+  VMOV64_from_ARM(d, s1, s2);
+}
+LENDFUNC(NONE,NONE,2,raw_fmov_d_rrr,(FW d, RR4 s1, RR4 s2))
+
+LOWFUNC(NONE,NONE,2,raw_fmov_to_l_rr,(W4 d, FR s))
+{
+  VCVTR_64_to_i(SCRATCH_F32_1, s);
+  VMOVi_to_ARM(d, SCRATCH_F64_1);
+}
+LENDFUNC(NONE,NONE,2,raw_fmov_to_l_rr,(W4 d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fmov_to_s_rr,(W4 d, FR s))
+{
+  VCVT_64_to_32(SCRATCH_F32_1, s);
+  VMOV32_to_ARM(d, SCRATCH_F32_1);
+}
+LENDFUNC(NONE,NONE,2,raw_fmov_to_s_rr,(W4 d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fmov_to_w_rr,(W4 d, FR s))
+{
+  VCVTR_64_to_i(SCRATCH_F32_1, s);
+  VMOVi_to_ARM(REG_WORK1, SCRATCH_F64_1);
+	SSAT_rir(REG_WORK1, 15, REG_WORK1);
+	BFI_rrii(d, REG_WORK1, 0, 15);
+}
+LENDFUNC(NONE,NONE,2,raw_fmov_to_w_rr,(W4 d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fmov_to_b_rr,(W4 d, FR s))
+{
+  VCVTR_64_to_i(SCRATCH_F32_1, s);
+  VMOVi_to_ARM(REG_WORK1, SCRATCH_F64_1);
+  SSAT_rir(REG_WORK1, 7, REG_WORK1);
+  BFI_rrii(d, REG_WORK1, 0, 7);
+}
+LENDFUNC(NONE,NONE,2,raw_fmov_to_b_rr,(W4 d, FR s))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_d_ri_0,(FW r))
+{
+  VMOV64_i(r, 0x7, 0x0); // load imm #1 into reg
+  VSUB64(r, r, r);
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_0,(FW r))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_d_ri_1,(FW r))
+{
+  VMOV64_i(r, 0x7, 0x0); // load imm #1 into reg
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_1,(FW r))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_d_ri_10,(FW r))
+{
+  VMOV64_i(r, 0x2, 0x4); // load imm #10 into reg
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_10,(FW r))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_d_ri_100,(FW r))
+{
+  VMOV64_i(r, 0x2, 0x4); // load imm #10 into reg
+  VMUL64(r, r, r);
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_10,(FW r))
+
+LOWFUNC(NONE,READ,2,raw_fmov_d_rm,(FW r, MEMR m))
+{
+  MOVW_ri16(REG_WORK1, m);
+  MOVT_ri16(REG_WORK1, m >> 16);
+  VLDR64(r, REG_WORK1, 0);
+}
+LENDFUNC(NONE,READ,2,raw_fmov_d_rm,(FW r, MEMR m))
+
+LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
+{
+  MOVW_ri16(REG_WORK1, m);
+  MOVT_ri16(REG_WORK1, m >> 16);
+  VLDR32(SCRATCH_F32_1, REG_WORK1, 0);
+  VCVT_32_to_64(r, SCRATCH_F32_1);
+}
+LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
+
+LOWFUNC(NONE,NONE,3,raw_fmov_to_d_rrr,(W4 d1, W4 d2, FR s))
+{
+  VMOV64_to_ARM(d1, d2, s);
+}
+LENDFUNC(NONE,NONE,3,raw_fmov_to_d_rrr,(W4 d1, W4 d2, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
+{
+	VSQRT64(d, s);
+}
+LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
+{
+	VABS64(d, s);
+}
+LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
+{
+	VNEG64(d, s);
+}
+LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
+{
+	VDIV64(d, d, s);
+}
+LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
+{
+	VADD64(d, d, s);
+}
+LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
+{
+	VMUL64(d, d, s);
+}
+LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
+{
+	VSUB64(d, d, s);
+}
+LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
+{
+	VCVTR_64_to_i(SCRATCH_F32_1, s);
+	VCVT_64_from_i(d, SCRATCH_F32_1);
+}
+LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_frndintz_rr,(FW d, FR s))
+{
+	VCVT_64_to_i(SCRATCH_F32_1, s);
+	VCVT_64_from_i(d, SCRATCH_F32_1);
+}
+LENDFUNC(NONE,NONE,2,raw_frndintz_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fmod_rr,(FRW d, FR s))
+{
+	VDIV64(SCRATCH_F64_2, d, s);
+	VCVT_64_to_i(SCRATCH_F32_1, SCRATCH_F64_2);
+	VCVT_64_from_i(SCRATCH_F64_2, SCRATCH_F32_1);
+	VMUL64(SCRATCH_F64_1, SCRATCH_F64_2, s);
+	VSUB64(d, d, SCRATCH_F64_1);
+}
+LENDFUNC(NONE,NONE,2,raw_fmod_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fsgldiv_rr,(FRW d, FR s))
+{
+	VCVT_64_to_32(SCRATCH_F32_1, d);
+	VCVT_64_to_32(SCRATCH_F32_2, s);
+	VDIV32(SCRATCH_F32_1, SCRATCH_F32_1, SCRATCH_F32_2);
+	VCVT_32_to_64(d, SCRATCH_F32_1);
+}
+LENDFUNC(NONE,NONE,2,raw_fsgldiv_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,1,raw_fcuts_r,(FRW r))
+{
+	VCVT_64_to_32(SCRATCH_F32_1, r);
+	VCVT_32_to_64(r, SCRATCH_F32_1);
+}
+LENDFUNC(NONE,NONE,1,raw_fcuts_r,(FRW r))
+
+LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
+{
+	VMRS(REG_WORK1);
+	BIC_rri(REG_WORK2, REG_WORK1, 0x00c00000);
+	VMSR(REG_WORK2);
+	
+	VDIV64(SCRATCH_F64_2, d, s);
+	VCVTR_64_to_i(SCRATCH_F32_1, SCRATCH_F64_2);
+	VCVT_64_from_i(SCRATCH_F64_2, SCRATCH_F32_1);
+	VMUL64(SCRATCH_F64_1, SCRATCH_F64_2, s);
+	VSUB64(d, d, SCRATCH_F64_1);
+	
+	VMRS(REG_WORK2);
+	UBFX_rrii(REG_WORK1, REG_WORK1, 22, 2);
+	BFI_rrii(REG_WORK2, REG_WORK1, 22, 2);
+	VMSR(REG_WORK2);
+}
+LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fsglmul_rr,(FRW d, FR s))
+{
+	VCVT_64_to_32(SCRATCH_F32_1, d);
+	VCVT_64_to_32(SCRATCH_F32_2, s);
+	VMUL32(SCRATCH_F32_1, SCRATCH_F32_1, SCRATCH_F32_2);
+	VCVT_32_to_64(d, SCRATCH_F32_1);
+}
+LENDFUNC(NONE,NONE,2,raw_fsglmul_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fmovs_rr,(FW d, FR s))
+{
+	VCVT_64_to_32(SCRATCH_F32_1, s);
+	VCVT_32_to_64(d, SCRATCH_F32_1);
+}
+LENDFUNC(NONE,NONE,2,raw_fmovs_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,3,raw_ffunc_rr,(double (*func)(double), FW d, FR s))
+{
+	VMOV64_rr(0, s);
+
+  MOVW_ri16(REG_WORK1, (uae_u32)func);
+  MOVT_ri16(REG_WORK1, ((uae_u32)func) >> 16);
+
+	PUSH(RLR_INDEX);
+	BLX_r(REG_WORK1);
+	POP(RLR_INDEX);
+
+	VMOV64_rr(d, 0);
+}
+LENDFUNC(NONE,NONE,3,raw_ffunc_rr,(double (*func)(double), FW d, FR s))
+
+LOWFUNC(NONE,NONE,3,raw_fpowx_rr,(uae_u32 x, FW d, FR s))
+{
+	double (*func)(double,double) = pow;
+
+	if(x == 2) {
+		VMOV64_i(0, 0x0, 0x0); // load imm #2 into first reg
+	} else {
+		VMOV64_i(0, 0x2, 0x4); // load imm #10 into first reg
+	}
+
+	VMOV64_rr(1, s);
+		
+  MOVW_ri16(REG_WORK1, (uae_u32)func);
+  MOVT_ri16(REG_WORK1, ((uae_u32)func) >> 16);
+
+	PUSH(RLR_INDEX);
+	BLX_r(REG_WORK1);
+	POP(RLR_INDEX);
+
+	VMOV64_rr(d, 0);
+}
+LENDFUNC(NONE,NONE,3,raw_fpowx_rr,(uae_u32 x, FW d, FR s))
+
+STATIC_INLINE void raw_fflags_into_flags(int r)
+{
+	VCMP64_0(r);
+	VMRS(15); // special case: move flags from FPSCR to APSR_nzcv
+}
--- a/src/jit/codegen_arm.h
+++ b/src/jit/codegen_arm.h
@ -1319,6 +1319,7 @@ enum {

 // ARMv6T2
 #ifdef ARMV6T2
+
 #define CC_BFI_rrii(cc,Rd,Rn,lsb,msb)   _W(((cc) << 28) | (0x3e << 21) | ((msb) << 16) | (Rd << 12) | ((lsb) << 7) | (0x1 << 4) | (Rn))
 #define BFI_rrii(Rd,Rn,lsb,msb)         CC_BFI_rrii(NATIVE_CC_AL,Rd,Rn,lsb,msb)

@ -1333,10 +1334,138 @@ enum {

 #define CC_MOVT_ri16(cc,Rd,i)                _W(((cc) << 28) | (0x34 << 20) | (((i >> 12) & 0xf) << 16) | (Rd << 12) | (i & 0x0fff))
 #define MOVT_ri16(Rd,i)					             CC_MOVT_ri16(NATIVE_CC_AL,Rd,i)
+
+#define CC_SSAT_rir(cc,Rd,i,Rn)			_W(((cc) << 28) | (0x6a << 20) | (i << 16) | (Rd << 12) | (0x1 << 4) | (Rn))
+#define SSAT_rir(Rd,i,Rn)						CC_SSAT_rir(NATIVE_CC_AL,Rd,i,Rn)
+
 #endif

 // Floatingpoint
+#define FADR_ADD(offs)              ((1 << 23) | (offs) >> 2)
+#define FADR_SUB(offs)              ((0 << 23) | (offs) >> 2)
+#define FIMM8(offs)                 (offs >= 0 ? FADR_ADD(offs) : FADR_SUB(-offs))

+#define MAKE_Dd(Dd)                 (((Dd & 0x10) << 18) | ((Dd & 0x0f) << 12))
+#define MAKE_Dm(Dm)                 (((Dm & 0x10) <<  1) | ((Dm & 0x0f) <<  0))
+#define MAKE_Dn(Dn)                 (((Dn & 0x10) <<  3) | ((Dn & 0x0f) << 16))
+#define MAKE_Sd(Sd)                 (((Sd & 0x01) << 22) | ((Sd & 0x1e) << 11))
+#define MAKE_Sm(Sm)                 (((Sm & 0x01) <<  5) | ((Sm & 0x1e) >>  1))
+#define MAKE_Sn(Sn)                 (((Sn & 0x01) <<  7) | ((Sn & 0x1e) << 15))
+
+
+#define CC_VLDR64(cc,Dd,Rn,offs)    _W(((cc) << 28) | (0xd << 24) | (0x1 << 20) | (Rn << 16) | (0xb << 8) | FIMM8(offs) | MAKE_Dd(Dd))
+#define VLDR64(Dd,Rn,offs)          CC_VLDR64(NATIVE_CC_AL,Dd,Rn,offs)
+#define CC_VLDR32(cc,Sd,Rn,offs)    _W(((cc) << 28) | (0xd << 24) | (0x1 << 20) | (Rn << 16) | (0xa << 8) | FIMM8(offs) | MAKE_Sd(Sd))
+#define VLDR32(Sd,Rn,offs)          CC_VLDR32(NATIVE_CC_AL,Sd,Rn,offs)
+
+#define CC_VSTR64(cc,Dd,Rn,offs)    _W(((cc) << 28) | (0xd << 24) | (0x0 << 20) | (Rn << 16) | (0xb << 8) | FIMM8(offs) | MAKE_Dd(Dd))
+#define VSTR64(Dd,Rn,offs)          CC_VSTR64(NATIVE_CC_AL,Dd,Rn,offs)
+#define CC_VSTR32(cc,Dd,Rn,offs)    _W(((cc) << 28) | (0xd << 24) | (0x0 << 20) | (Rn << 16) | (0xa << 8) | FIMM8(offs) | MAKE_Dd(Dd))
+#define VSTR32(Dd,Rn,offs)          CC_VSTR32(NATIVE_CC_AL,Dd,Rn,offs)
+
+#define CC_VMOV64_rr(cc,Dd,Dm)			_W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm))
+#define VMOV64_rr(Dd,Dm)						CC_VMOV64_rr(NATIVE_CC_AL,Dd,Dm)
+#define CC_VMOV32_rr(cc,Sd,Sm)			_W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
+#define VMOV32_rr(Sd,Sm)						CC_VMOV32_rr(NATIVE_CC_AL,Sd,Sm)
+
+#define CC_VMOV32_to_ARM(cc,Rt,Sn)        _W(((cc) << 28) | (0xe << 24) | (0x1 << 20) | (Rt << 12) | (0xa << 8) | (0x1 << 4) | MAKE_Sn(Sn))
+#define VMOV32_to_ARM(Rt,Sn)              CC_VMOV32_to_ARM(NATIVE_CC_AL,Rt,Sn)
+#define CC_VMOV32_from_ARM(cc,Sn,Rt)      _W(((cc) << 28) | (0xe << 24) | (0x0 << 20) | (Rt << 12) | (0xa << 8) | (0x1 << 4) | MAKE_Sn(Sn))
+#define VMOV32_from_ARM(Sn,Rt)            CC_VMOV32_from_ARM(NATIVE_CC_AL,Sn,Rt)
+
+#define CC_VMOVi_from_ARM(cc,Dn,Rt)       _W(((cc) << 28) | (0xe << 24) | (0x0 << 20) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dn(Dn))
+#define VMOVi_from_ARM(Dn,Rt)             CC_VMOVi_from_ARM(NATIVE_CC_AL,Dn,Rt)
+#define CC_VMOVi_to_ARM(cc,Rt,Dn)         _W(((cc) << 28) | (0xe << 24) | (0x1 << 20) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dn(Dn))
+#define VMOVi_to_ARM(Rt,Dn)               CC_VMOVi_to_ARM(NATIVE_CC_AL,Rt,Dn)
+
+#define CC_VMOV64_to_ARM(cc,Rt,Rt2,Dm)    _W(((cc) << 28) | (0xc << 24) | (0x5 << 20) | (Rt2 << 16) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dm(Dm))
+#define VMOV64_to_ARM(Rt,Rt2,Dm)          CC_VMOV64_to_ARM(NATIVE_CC_AL,Rt,Rt2,Dm)
+#define CC_VMOV64_from_ARM(cc,Dm,Rt,Rt2)  _W(((cc) << 28) | (0xc << 24) | (0x4 << 20) | (Rt2 << 16) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dm(Dm))
+#define VMOV64_from_ARM(Dm,Rt,Rt2)        CC_VMOV64_from_ARM(NATIVE_CC_AL,Dm,Rt,Rt2)
+
+#define CC_VCVT_64_to_32(cc,Sd,Dm)  _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x7 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Dm(Dm))
+#define VCVT_64_to_32(Sd,Dm)        CC_VCVT_64_to_32(NATIVE_CC_AL,Sd,Dm)
+#define CC_VCVT_32_to_64(cc,Dd,Sm)  _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x7 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Sm(Sm))
+#define VCVT_32_to_64(Dd,Sm)        CC_VCVT_32_to_64(NATIVE_CC_AL,Dd,Sm)
+
+#define CC_VCVTR_64_to_i(cc,Sd,Dm)  _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xb << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Dm(Dm))
+#define VCVTR_64_to_i(Sd,Dm)        CC_VCVTR_64_to_i(NATIVE_CC_AL,Sd,Dm)
+#define CC_VCVTR_32_to_i(cc,Sd,Sm)  _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
+#define VCVTR_32_to_i(Sd,Sm)        CC_VCVTR_32_to_i(NATIVE_CC_AL,Sd,Sm)
+
+#define CC_VCVT_64_to_i(cc,Sd,Dm)   _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xb << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Dm(Dm))
+#define VCVT_64_to_i(Sd,Dm)         CC_VCVT_64_to_i(NATIVE_CC_AL,Sd,Dm)
+#define CC_VCVT_32_to_i(cc,Sd,Sm)   _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
+#define VCVT_32_to_i(Sd,Sm)         CC_VCVT_32_to_i(NATIVE_CC_AL,Sd,Sm)
+
+#define CC_VCVT_64_from_i(cc,Dd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x8 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Sm(Sm))
+#define VCVT_64_from_i(Dd,Sm)       CC_VCVT_64_from_i(NATIVE_CC_AL,Dd,Sm)
+#define CC_VCVT_32_from_i(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x8 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
+#define VCVT_32_from_i(Sd,Sm)       CC_VCVT_32_from_i(NATIVE_CC_AL,Dd,Sm)
+
+#define CC_VMOV_rr64(cc,Dd,Dm)      _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm))
+#define VMOV_rr64(Dd,Dm)            CC_VMOV_rr64(NATIVE_CC_AL,Dd,Dm)
+#define CC_VMOV_rr32(cc,Sd,Sm)      _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
+#define VMOV_rr32(Sd,Sm)            CC_VMOV_rr32(NATIVE_CC_AL,Sd,Sm)
+
+#define CC_VADD64(cc,Dd,Dn,Dm)      _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xb << 8) | (0x0 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm))
+#define VADD64(Dd,Dn,Dm)            CC_VADD64(NATIVE_CC_AL,Dd,Dn,Dm)
+#define CC_VADD32(cc,Sd,Sn,Sm)      _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xa << 8) | (0x0 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm))
+#define VADD32(Sd,Sn,Sm)            CC_VADD32(NATIVE_CC_AL,Sd,Sn,Sm)
+
+#define CC_VSUB64(cc,Dd,Dn,Dm)      _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm))
+#define VSUB64(Dd,Dn,Dm)            CC_VSUB64(NATIVE_CC_AL,Dd,Dn,Dm)
+#define CC_VSUB32(cc,Sd,Sn,Sm)      _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm))
+#define VSUB32(Sd,Sn,Sm)            CC_VSUB32(NATIVE_CC_AL,Sd,Sn,Sm)
+
+#define CC_VMUL64(cc,Dd,Dn,Dm)      _W(((cc) << 28) | (0xe << 24) | (0x2 << 20) | (0xb << 8) | (0x0 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm))
+#define VMUL64(Dd,Dn,Dm)            CC_VMUL64(NATIVE_CC_AL,Dd,Dn,Dm)
+#define CC_VMUL32(cc,Sd,Sn,Sm)      _W(((cc) << 28) | (0xe << 24) | (0x2 << 20) | (0xa << 8) | (0x0 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm))
+#define VMUL32(Sd,Sn,Sm)            CC_VMUL32(NATIVE_CC_AL,Sd,Sn,Sm)
+
+#define CC_VDIV64(cc,Dd,Dn,Dm)      _W(((cc) << 28) | (0xe << 24) | (0x8 << 20) | (0xb << 8) | (0x0 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm))
+#define VDIV64(Dd,Dn,Dm)            CC_VDIV64(NATIVE_CC_AL,Dd,Dn,Dm)
+#define CC_VDIV32(cc,Sd,Sn,Sm)      _W(((cc) << 28) | (0xe << 24) | (0x8 << 20) | (0xa << 8) | (0x0 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm))
+#define VDIV32(Sd,Sn,Sm)            CC_VDIV32(NATIVE_CC_AL,Sd,Sn,Sm)
+
+#define CC_VABS64(cc,Dd,Dm)         _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm))
+#define VABS64(Dd,Dm)               CC_VABS64(NATIVE_CC_AL,Dd,Dm)
+#define CC_VABS32(cc,Sd,Sm)         _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
+#define VABS32(Sd,Sm)               CC_VABS32(NATIVE_CC_AL,Sd,Sm)
+
+#define CC_VNEG64(cc,Dd,Dm)         _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm))
+#define VNEG64(Dd,Dm)               CC_VNEG64(NATIVE_CC_AL,Dd,Dm)
+#define CC_VNEG32(cc,Sd,Sm)         _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
+#define VNEG32(Sd,Sm)               CC_VNEG32(NATIVE_CC_AL,Sd,Sm)
+
+#define CC_VSQRT64(cc,Dd,Dm)        _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm))
+#define VSQRT64(Dd,Dm)              CC_VSQRT64(NATIVE_CC_AL,Dd,Dm)
+#define CC_VSQRT32(cc,Sd,Sm)        _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
+#define VSQRT32(Sd,Sm)              CC_VSQRT32(NATIVE_CC_AL,Sd,Sm)
+
+#define CC_VCMP64(cc,Dd,Dm)         _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x4 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm))
+#define VCMP64(Dd,Dm)               CC_VCMP64(NATIVE_CC_AL,Dd,Dm)
+#define CC_VCMP32(cc,Sd,Sm)         _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x4 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
+#define VCMP32(Sd,Sm)               CC_VCMP32(NATIVE_CC_AL,Sd,Sm)
+
+#define CC_VCMP64_0(cc,Dd)          _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x5 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd))
+#define VCMP64_0(Dd)                CC_VCMP64_0(NATIVE_CC_AL,Dd)
+
+#define CC_VTST64(cc,Dd)            _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x5 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd))
+#define VTST64(Dd)                  CC_VTST64(NATIVE_CC_AL,Dd)
+#define CC_VTST32(cc,Sd)            _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x5 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd))
+#define VTST32(Sd)                  CC_VTST32(NATIVE_CC_AL,Sd)
+
+#define CC_VMRS(cc,Rt)  						_W(((cc) << 28) | (0xe << 24) | (0xf << 20) | (0x1 << 16) | (Rt << 12) | (0xa << 8) | (0x1 << 4))
+#define VMRS(Rt)										CC_VMRS(NATIVE_CC_AL,Rt)
+
+#define CC_VMSR(cc,Rt)  						_W(((cc) << 28) | (0xe << 24) | (0xe << 20) | (0x1 << 16) | (Rt << 12) | (0xa << 8) | (0x1 << 4))
+#define VMSR(Rt)										CC_VMSR(NATIVE_CC_AL,Rt)
+
+#define CC_VMOV64_i(cc,Dd,imm4H,imm4L)	_W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (imm4H << 16) | (0xb << 8) | (imm4L) | MAKE_Dd(Dd))
+#define VMOV64_i(Dd,imm4H,imm4L)				CC_VMOV64_i(NATIVE_CC_AL,Dd,imm4H,imm4L)
+
+// Floatingpoint used by non FPU JIT
 #define CC_VMOV_sr(cc,Sd,Rn)        _W(((cc) << 28) | (0x70 << 21) | (0 << 20) | (Sd << 16) | (Rn << 12) | (0x0a << 8) | (0x10))
 #define VMOV_sr(Sd,Rn)              CC_VMOV_sr(NATIVE_CC_AL,Sd,Rn)

@ -1352,4 +1481,5 @@ enum {
 #define CC_VDIV_ddd(cc,Dd,Dn,Dm)    _W(((cc) << 28) | (0x1d << 23) | (0x0 << 20) | (Dn << 16) | (Dd << 12) | (0xb << 8) | (0x0 << 4) | (Dm))
 #define VDIV_ddd(Dd,Dn,Dm)          CC_VDIV_ddd(NATIVE_CC_AL,Dd,Dn,Dm)

+
 #endif /* ARM_RTASM_H */
--- a/src/jit/compemu.cpp
+++ b/src/jit/compemu.cpp
--- a/src/jit/compemu.h
+++ b/src/jit/compemu.h
@ -90,7 +90,7 @@ typedef union {

 #define BYTES_PER_INST 10240  /* paranoid ;-) */
 #if defined(CPU_arm)
-#define LONGEST_68K_INST 256 /* The number of bytes the longest possible
+#define LONGEST_68K_INST 128 /* The number of bytes the longest possible
 			       68k instruction takes */
 #else
 #define LONGEST_68K_INST 16 /* The number of bytes the longest possible
@ -127,7 +127,8 @@ typedef union {
 #else
 #define N_REGS 8  /* really only 7, but they are numbered 0,1,2,3,5,6,7 */
 #endif
-#define N_FREGS 6 /* That leaves us two positions on the stack to play with */
+#define N_FREGS 16  // We use 16 regs: 0 - FP_RESULT, 1-3 - SCRATCH, 4-7 - ???, 8-15 - Amiga regs FP0-FP7
+

 /* Functions exposed to newcpu, or to what was moved from newcpu.c to
 * compemu_support.c */
@ -151,11 +152,21 @@ extern int check_for_cache_miss(void);

 #define scaled_cycles(x) (currprefs.m68k_speed<0?(((x)/SCALE)?(((x)/SCALE<MAXCYCLES?((x)/SCALE):MAXCYCLES)):1):(x))

+/* JIT FPU compilation */
+extern void comp_fpp_opp (uae_u32 opcode, uae_u16 extra);
+extern void comp_fbcc_opp (uae_u32 opcode);
+extern void comp_fscc_opp (uae_u32 opcode, uae_u16 extra);
+void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra);
+void comp_ftrapcc_opp (uae_u32 opcode, uaecptr oldpc);
+void comp_fsave_opp (uae_u32 opcode);
+void comp_frestore_opp (uae_u32 opcode);
+
 extern uae_u32 needed_flags;
 extern uae_u8* comp_pc_p;
 extern void* pushall_call_handler;

 #define VREGS 32
+#define VFREGS 16

 #define INMEM 1
 #define CLEAN 2
@ -173,6 +184,13 @@ typedef struct {
  uae_u8 dirtysize;
 } reg_status;

+typedef struct {
+  uae_u32* mem;
+  uae_u8 status;
+  uae_s8 realreg; /* gb-- realreg can hold -1 */
+  uae_u8 needflush;
+} freg_status;
+
 typedef struct {
    uae_u8 use_flags;
    uae_u8 set_flags;
@ -209,6 +227,13 @@ STATIC_INLINE int end_block(uae_u16 opcode)
 #define FS2 10
 #define FS3 11

+#define SCRATCH_F64_1  1
+#define SCRATCH_F64_2  2
+#define SCRATCH_F64_3  3
+#define SCRATCH_F32_1  2
+#define SCRATCH_F32_2  4
+#define SCRATCH_F32_3  6
+
 typedef struct {
  uae_u32 touched;
  uae_s8 holds[VREGS];
@ -216,6 +241,11 @@ typedef struct {
  uae_u8 locked;
 } n_status;

+typedef struct {
+  uae_s8 holds;
+  uae_u8 nholds;
+} fn_status;
+
 /* For flag handling */
 #define NADA 1
 #define TRASH 2
@ -233,6 +263,9 @@ typedef struct {
    uae_u32 flags_on_stack;
    uae_u32 flags_in_flags;
    uae_u32 flags_are_important;
+    /* FPU part */
+    freg_status fate[VFREGS];
+    fn_status   fat[N_FREGS];
 } bigstate;

 typedef struct {
@ -276,9 +309,9 @@ extern int touchcnt;
 #include "compemu_midfunc_arm2.h"
 #endif

-//#if defined(CPU_i386) || defined(CPU_x86_64)
-//#include "compemu_midfunc_x86.h"
-//#endif
+#if defined(CPU_i386) || defined(CPU_x86_64)
+#include "compemu_midfunc_x86.h"
+#endif

 #undef DECLARE_MIDFUNC

@ -297,7 +330,7 @@ extern void writelong_clobber(int address, int source, int tmp);
 extern void get_n_addr(int address, int dest, int tmp);
 extern void get_n_addr_jmp(int address, int dest, int tmp);
 extern void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp);
-#define SYNC_PC_OFFSET 100
+#define SYNC_PC_OFFSET 124
 extern void sync_m68k_pc(void);
 extern uae_u32 get_const(int r);
 extern int  is_const(int r);
@ -374,13 +407,9 @@ void execute_normal(void);
 void exec_nostats(void);
 void do_nothing(void);

-void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra);
-void comp_fscc_opp (uae_u32 opcode, uae_u16 extra);
-void comp_ftrapcc_opp (uae_u32 opcode, uaecptr oldpc);
-void comp_fbcc_opp (uae_u32 opcode);
-void comp_fsave_opp (uae_u32 opcode);
-void comp_frestore_opp (uae_u32 opcode);
-void comp_fpp_opp (uae_u32 opcode, uae_u16 extra);
+/* ARAnyM uses fpu_register name, used in scratch_t */
+/* FIXME: check that no ARAnyM code assumes different floating point type */
+typedef fptype fpu_register;

 void jit_abort(const TCHAR *format,...);

--- a/src/jit/compemu_fpp.cpp
+++ b/src/jit/compemu_fpp.cpp
@ -8,7 +8,7 @@
  * Modified 2005 Peter Keunecke
 */

-#include <math.h>
+#include <cmath>

 #include "sysconfig.h"
 #include "sysdeps.h"
@ -18,41 +18,794 @@
 #include "custom.h"
 #include "newcpu.h"
 #include "compemu.h"
+#include "flags_arm.h"

 #if defined(JIT)

+extern void fpp_to_exten(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3);
+
+static const int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 };
+static const int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 };
+
+/* return the required floating point precision or -1 for failure, 0=E, 1=S, 2=D */
+STATIC_INLINE int comp_fp_get (uae_u32 opcode, uae_u16 extra, int treg)
+{
+	int reg = opcode & 7;
+	int mode = (opcode >> 3) & 7;
+	int size = (extra >> 10) & 7;
+
+	if ((size == 2 && (mode != 7 || reg != 4)) || size == 3 || size == 7) /* 2 = long double, 3 = packed decimal, 7 is not defined */
+		return -1;
+	switch (mode) {
+		case 0: /* Dn */
+  		switch (size) {
+  			case 0: /* Long */
+    			fmov_l_rr (treg, reg);
+    			return 2;
+  			case 1: /* Single */
+    			fmov_s_rr (treg, reg);
+    			return 1;
+  			case 4: /* Word */
+    			fmov_w_rr (treg, reg);
+    			return 1;
+  			case 6: /* Byte */
+  			  fmov_b_rr (treg, reg);
+    			return 1;
+  			default:
+    			return -1;
+  		}
+		case 1: /* An,  invalid mode */
+  		return -1;
+		case 2: /* (An) */
+  		mov_l_rr (S1, reg + 8);
+  		break;
+		case 3: /* (An)+ */
+  		mov_l_rr (S1, reg + 8);
+      arm_ADD_l_ri8(reg + 8, (reg == 7 ? sz2[size] : sz1[size]));
+  		break;
+		case 4: /* -(An) */
+		  arm_SUB_l_ri8(reg + 8, (reg == 7 ? sz2[size] : sz1[size]));
+  		mov_l_rr (S1, reg + 8);
+  		break;
+		case 5: /* (d16,An)  */
+		{
+			uae_u32 off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
+			mov_l_rr (S1, reg + 8);
+			lea_l_brr (S1, S1, off);
+			break;
+		}
+		case 6: /* (d8,An,Xn) or (bd,An,Xn) or ([bd,An,Xn],od) or ([bd,An],Xn,od) */
+		{
+			uae_u32 dp = comp_get_iword ((m68k_pc_offset += 2) - 2);
+			calc_disp_ea_020 (reg + 8, dp, S1, S2);
+			break;
+		}
+		case 7:
+  		switch (reg) {
+  			case 0: /* (xxx).W */
+  			{
+  				uae_u32 off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
+  				mov_l_ri (S1, off);
+  				break;
+  			}
+  			case 1: /* (xxx).L */
+  			{
+  				uae_u32 off = comp_get_ilong ((m68k_pc_offset += 4) - 4);
+  				mov_l_ri (S1, off);
+  				break;
+  			}
+  			case 2: /* (d16,PC) */
+  			{
+  				uae_u32 address = start_pc + ((uae_char*) comp_pc_p - (uae_char*) start_pc_p) +
+  					m68k_pc_offset;
+  				uae_s32 PC16off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
+  				mov_l_ri (S1, address + PC16off);
+  				break;
+  			}
+  			case 3: /* (d8,PC,Xn) or (bd,PC,Xn) or ([bd,PC,Xn],od) or ([bd,PC],Xn,od) */
+  			  return -1; /* rarely used, fallback to non-JIT */
+  			case 4: /* # < data >; Constants should be converted just once by the JIT */
+    			m68k_pc_offset += sz2[size];
+    			switch (size) {
+    				case 0:
+    				{
+    					uae_s32 li = comp_get_ilong(m68k_pc_offset - 4);
+    					float si = (float)li;
+    
+    					if (li == (int)si) {
+						    //write_log ("converted immediate LONG constant to SINGLE\n");
+    						fmov_s_ri(treg, *(uae_u32 *)&si);
+    						return 1;
+    					}
+					    //write_log ("immediate LONG constant\n");
+    					fmov_l_ri(treg, *(uae_u32 *)&li);
+    					return 2;
+    				}
+    				case 1:
+				      //write_log (_T("immediate SINGLE constant\n"));
+      				fmov_s_ri(treg, comp_get_ilong(m68k_pc_offset - 4));
+      				return 1;
+    				case 2:
+            {
+				      //write_log (_T("immediate LONG DOUBLE constant\n"));
+              uae_u32 wrd1, wrd2, wrd3;
+              fpdata tmp;
+              wrd3 = comp_get_ilong(m68k_pc_offset - 4);
+              wrd2 = comp_get_ilong(m68k_pc_offset - 8);
+              wrd1 = comp_get_iword(m68k_pc_offset - 12) << 16;
+              fpp_to_exten(&tmp, wrd1, wrd2, wrd3);
+              mov_l_ri(S1, ((uae_u32*)&tmp)[0]);
+              mov_l_ri(S2, ((uae_u32*)&tmp)[1]);
+              fmov_d_rrr (treg, S1, S2);
+      				return 0;
+            }
+    				case 4:
+    				{
+    					float si = (float)(uae_s16)comp_get_iword(m68k_pc_offset-2);
+    
+					    //write_log (_T("converted immediate WORD constant %f to SINGLE\n"), si);
+    					fmov_s_ri(treg, *(uae_u32 *)&si);
+    					return 1;
+    				}
+    				case 5:
+    				{
+					    //write_log (_T("immediate DOUBLE constant\n"));
+              mov_l_ri(S1, comp_get_ilong(m68k_pc_offset - 4));
+              mov_l_ri(S2, comp_get_ilong(m68k_pc_offset - 8));
+              fmov_d_rrr (treg, S1, S2);
+    					return 2;
+    				}
+    				case 6:
+    				{
+    					float si = (float)(uae_s8)comp_get_ibyte(m68k_pc_offset - 2);
+    
+    					//write_log (_T("converted immediate BYTE constant to SINGLE\n"));
+    					fmov_s_ri(treg, *(uae_u32 *)&si);
+    					return 1;
+    				}
+    				default: /* never reached */
+      				return -1;
+          }
+   			default: /* never reached */
+  	  		return -1;
+     }
+  }
+  
+	switch (size) {
+		case 0: /* Long */
+  		readlong (S1, S2, S3);
+  		fmov_l_rr (treg, S2);
+  		return 2;
+		case 1: /* Single */
+  		readlong (S1, S2, S3);
+			fmov_s_rr (treg, S2);
+  		return 1;
+		case 4: /* Word */
+  		readword (S1, S2, S3);
+			fmov_w_rr (treg, S2);
+  		return 1;
+		case 5: /* Double */
+  		readlong (S1, S2, S3);
+  		add_l_ri (S1, 4);
+  		readlong (S1, S4, S3);
+  		fmov_d_rrr (treg, S4, S2);
+  		return 2;
+		case 6: /* Byte */
+  		readbyte (S1, S2, S3);
+		  fmov_b_rr (treg, S2);
+  		return 1;
+		default:
+  		return -1;
+  }
+	return -1;
+}
+
+/* return of -1 means failure, >=0 means OK */
+STATIC_INLINE int comp_fp_put (uae_u32 opcode, uae_u16 extra)
+{
+	int reg = opcode & 7;
+	int sreg = (extra >> 7) & 7;
+	int mode = (opcode >> 3) & 7;
+	int size = (extra >> 10) & 7;
+
+	if (size == 2 || size == 3 || size == 7) /* 2 = long double, 3 = packed decimal, 7 is not defined */
+		return -1;
+	switch (mode) {
+		case 0: /* Dn */
+  		switch (size) {
+  			case 0: /* FMOVE.L FPx, Dn */
+  			  fmov_to_l_rr(reg, sreg);
+    			return 0;
+  			case 1: /* FMOVE.S FPx, Dn */
+  			  fmov_to_s_rr(reg, sreg);
+    			return 0;
+  			case 4: /* FMOVE.W FPx, Dn */
+  			  fmov_to_w_rr(reg, sreg);
+    			return 0;
+  			case 6: /* FMOVE.B FPx, Dn */
+  			  fmov_to_b_rr(reg, sreg);
+			    return 0;
+			  default:
+			    return -1;
+      }
+		case 1: /* An, invalid mode */
+  		return -1;
+		case 2: /* (An) */
+		  mov_l_rr (S1, reg + 8);
+		  break;
+		case 3: /* (An)+ */
+		  mov_l_rr (S1, reg + 8);
+      arm_ADD_l_ri8(reg + 8, (reg == 7 ? sz2[size] : sz1[size]));
+		  break;
+		case 4: /* -(An) */
+	    arm_SUB_l_ri8(reg + 8, (reg == 7 ? sz2[size] : sz1[size]));
+		  mov_l_rr (S1, reg + 8);
+		  break;
+		case 5: /* (d16,An) */
+		{
+			uae_u32 off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
+			mov_l_rr (S1, reg + 8);
+			add_l_ri (S1, off);
+			break;
+		}
+		case 6: /* (d8,An,Xn) or (bd,An,Xn) or ([bd,An,Xn],od) or ([bd,An],Xn,od) */
+		{
+			uae_u32 dp = comp_get_iword ((m68k_pc_offset += 2) - 2);
+			calc_disp_ea_020 (reg + 8, dp, S1, S2);
+			break;
+		}
+		case 7:
+		  switch (reg) {
+			  case 0: /* (xxx).W */
+			  {
+				  uae_u32 off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
+				  mov_l_ri (S1, off);
+				  break;
+			  }
+			  case 1: /* (xxx).L */
+			  {
+				  uae_u32 off = comp_get_ilong ((m68k_pc_offset += 4) - 4);
+				  mov_l_ri (S1, off);
+				  break;
+			  }
+			  default: /* All other modes are not allowed for FPx to <EA> */
+			    write_log (_T ("JIT FMOVE FPx,<EA> Mode is not allowed %04x %04x\n"), opcode, extra);
+		    return -1;
+		  }
+  }
+	switch (size) {
+		case 0: /* Long */
+    	fmov_to_l_rr(S2, sreg);
+		  writelong_clobber (S1, S2, S3);
+		  return 0;
+		case 1: /* Single */
+      fmov_to_s_rr(S2, sreg);
+	    writelong_clobber (S1, S2, S3);
+	    return 0;
+		case 4: /* Word */
+		  fmov_to_w_rr(S2, sreg);
+		  writeword (S1, S2, S3);
+		  return 0;
+		case 5: /* Double */
+      fmov_to_d_rrr(S2, S3, sreg);
+  		writelong_clobber (S1, S3, S4);
+      add_l_ri (S1, 4);
+      writelong_clobber (S1, S2, S4);
+		  return 0;
+		case 6: /* Byte */
+      fmov_to_b_rr(S2, sreg);
+		  writebyte (S1, S2, S3);
+		  return 0;
+		default:
+  		return -1;
+  }
+	return -1;
+}
+
+/* return -1 for failure, or register number for success */
+STATIC_INLINE int comp_fp_adr (uae_u32 opcode)
+{
+	uae_s32 off;
+	int mode = (opcode >> 3) & 7;
+	int reg = opcode & 7;
+
+	switch (mode) {
+		case 2:
+		case 3:
+		case 4:
+		  mov_l_rr (S1, 8 + reg);
+		  return S1;
+		case 5:
+		  off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
+      mov_l_rr (S1, 8 + reg);
+		  add_l_ri (S1, off);
+		  return S1;
+		case 7:
+		  switch (reg) {
+			  case 0:
+			    off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
+			    mov_l_ri (S1, off);
+			    return S1;
+			  case 1:
+			    off = comp_get_ilong ((m68k_pc_offset += 4) - 4);
+			    mov_l_ri (S1, off);
+			    return S1;
+		  }
+		default:
+  		return -1;
+	}
+}
+
 void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra)
 {
-  printf("comp_fdbcc_opp not yet implemented\n");
+	FAIL (1);
+	return;
 }

 void comp_fscc_opp (uae_u32 opcode, uae_u16 extra)
 {
-  printf("comp_fscc_opp not yet implemented\n");
+  //printf("comp_fscc_opp() called (0x%04x, 0x%04x)\n", opcode, extra);
+	if (!currprefs.compfpu) {
+		FAIL (1);
+		return;
+	}
+
+	FAIL (1);
+	return;
 }

 void comp_ftrapcc_opp (uae_u32 opcode, uaecptr oldpc)
 {
-  printf("comp_ftrapcc_opp not yet implemented\n");
+	FAIL (1);
+	return;
 }

 void comp_fbcc_opp (uae_u32 opcode)
 {
-  printf("comp_fbcc_opp not yet implemented\n");
+	uae_u32 start_68k_offset = m68k_pc_offset;
+	uae_u32 off, v1, v2;
+	int cc;
+
+	if (!currprefs.compfpu) {
+		FAIL (1);
+		return;
+	}
+
+	if (opcode & 0x20) {  /* only cc from 00 to 1f are defined */
+		FAIL (1);
+		return;
+	}
+	if (!(opcode & 0x40)) {
+		off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
+	}
+	else {
+		off = comp_get_ilong ((m68k_pc_offset += 4) - 4);
+	}
+
+	/* according to fpp.c, the 0x10 bit is ignored
+	   (it handles exception handling, which we don't
+	   do, anyway ;-) */
+	cc = opcode & 0x0f;
+  if(cc == 0)
+    return; /* jump never */
+
+	/* Note, "off" will sometimes be (unsigned) "negative", so the following
+         * uintptr can be > 0xffffffff, but the result will be correct due to
+         * wraparound when truncated to 32 bit in the call to mov_l_ri. */
+	mov_l_ri(S1, (uintptr)
+		(comp_pc_p + off - (m68k_pc_offset - start_68k_offset)));
+	mov_l_ri(PC_P, (uintptr) comp_pc_p);
+
+	/* Now they are both constant. Might as well fold in m68k_pc_offset */
+	add_l_ri (S1, m68k_pc_offset);
+	add_l_ri (PC_P, m68k_pc_offset);
+	m68k_pc_offset = 0;
+
+	v1 = get_const (PC_P);
+	v2 = get_const (S1);
+	fflags_into_flags ();
+
+	switch (cc) {
+		case 1: register_branch (v1, v2, NATIVE_CC_EQ); break;
+		case 2: register_branch (v1, v2, NATIVE_CC_F_OGT); break;
+		case 3: register_branch (v1, v2, NATIVE_CC_F_OGE); break;
+		case 4: register_branch (v1, v2, NATIVE_CC_F_OLT); break;
+		case 5: register_branch (v1, v2, NATIVE_CC_F_OLE); break;
+		case 6: register_branch (v1, v2, NATIVE_CC_F_OGL); break;
+		case 7: register_branch (v1, v2, NATIVE_CC_F_OR); break;
+		case 8: register_branch (v1, v2, NATIVE_CC_F_UN); break;
+		case 9: register_branch (v1, v2, NATIVE_CC_F_UEQ); break;
+		case 10: register_branch (v1, v2, NATIVE_CC_F_UGT); break;
+		case 11: register_branch (v1, v2, NATIVE_CC_F_UGE); break;
+		case 12: register_branch (v1, v2, NATIVE_CC_F_ULT); break;
+		case 13: register_branch (v1, v2, NATIVE_CC_F_ULE); break;
+		case 14: register_branch (v1, v2, NATIVE_CC_NE); break;
+		case 15: register_branch (v2, v2, NATIVE_CC_AL); break;
+	}
 }

 void comp_fsave_opp (uae_u32 opcode)
 {
-  printf("comp_fsave_opp not yet implemented\n");
+	FAIL (1);
+	return;
 }

 void comp_frestore_opp (uae_u32 opcode)
 {
-  printf("comp_frestore_opp not yet implemented\n");
+	FAIL (1);
+	return;
 }

+static uae_u32 dhex_pi[]    ={0x54442D18, 0x400921FB};
+static uae_u32 dhex_exp_1[] ={0x8B145769, 0x4005BF0A};
+static uae_u32 dhex_l2_e[]  ={0x652B82FE, 0x3FF71547};
+static uae_u32 dhex_ln_2[]  ={0xFEFA39EF, 0x3FE62E42};
+static uae_u32 dhex_ln_10[] ={0xBBB55516, 0x40026BB1};
+static uae_u32 dhex_l10_2[] ={0x509F79FF, 0x3FD34413};
+static uae_u32 dhex_l10_e[] ={0x1526E50E, 0x3FDBCB7B};
+static uae_u32 dhex_1e16[]  ={0x37E08000, 0x4341C379};
+static uae_u32 dhex_1e32[]  ={0xB5056E17, 0x4693B8B5};
+static uae_u32 dhex_1e64[]  ={0xE93FF9F5, 0x4D384F03};
+static uae_u32 dhex_1e128[] ={0xF9301D32, 0x5A827748};
+static uae_u32 dhex_1e256[] ={0x7F73BF3C, 0x75154FDD};
+static uae_u32 dhex_inf[]   ={0x00000000, 0x7ff00000};
+static uae_u32 dhex_nan[]   ={0xffffffff, 0x7fffffff};
+extern double fp_1e8;
+
 void comp_fpp_opp (uae_u32 opcode, uae_u16 extra)
 {
-  printf("comp_fpp_opp not yet implemented\n");
+	int reg;
+	int sreg, prec = 0;
+	int	dreg = (extra >> 7) & 7;
+	int source = (extra >> 13) & 7;
+	int	opmode = extra & 0x7f;
+
+	if (!currprefs.compfpu) {
+		FAIL (1);
+		return;
+	}
+	switch (source) {
+		case 3: /* FMOVE FPx, <EA> */
+		  if (comp_fp_put (opcode, extra) < 0)
+			  FAIL (1);
+		  return;
+		case 4: /* FMOVE.L  <EA>, ControlReg */
+		  if (!(opcode & 0x30)) { /* Dn or An */
+			  if (extra & 0x1000) { /* FPCR */
+				  mov_l_mr (uae_p32(&regs.fpcr), opcode & 15);
+				  return;
+			  }
+			  if (extra & 0x0800) { /* FPSR */
+				  FAIL (1);
+				  return;
+				  // set_fpsr(m68k_dreg (regs, opcode & 15));
+			  }
+			  if (extra & 0x0400) { /* FPIAR */
+				  mov_l_mr (uae_p32(&regs.fpiar), opcode & 15); return;
+			  }
+		  }
+		  else if ((opcode & 0x3f) == 0x3c) {
+			  if (extra & 0x1000) { /* FPCR */
+				  uae_u32 val = comp_get_ilong ((m68k_pc_offset += 4) - 4);
+				  mov_l_mi (uae_p32(&regs.fpcr), val);
+				  return;
+			  }
+			  if (extra & 0x0800) { /* FPSR */
+				  FAIL (1);
+				  return;
+			  }
+			  if (extra & 0x0400) { /* FPIAR */
+				  uae_u32 val = comp_get_ilong ((m68k_pc_offset += 4) - 4);
+				  mov_l_mi (uae_p32(&regs.fpiar), val);
+				  return;
+			  }
+		  }
+		  FAIL (1);
+		  return;
+		case 5: /* FMOVE.L  ControlReg, <EA> */
+		  if (!(opcode & 0x30)) { /* Dn or An */
+			  if (extra & 0x1000) { /* FPCR */
+				  mov_l_rm (opcode & 15, uae_p32(&regs.fpcr)); return;
+			  }
+			  if (extra & 0x0800) { /* FPSR */
+				  FAIL (1);
+				  return;
+			  }
+			  if (extra & 0x0400) { /* FPIAR */
+				  mov_l_rm (opcode & 15, uae_p32(&regs.fpiar)); return;
+			  }
+		  }
+		  FAIL (1);
+		  return;
+		case 6:
+		case 7:
+		  FAIL (1);
+		  return;
+		case 2: /* from <EA> to FPx */
+		  dont_care_fflags ();
+		  if ((extra & 0xfc00) == 0x5c00) { /* FMOVECR */
+			  //write_log (_T("JIT FMOVECR %x\n"), opmode);
+			  switch (opmode) {
+				  case 0x00:
+				    fmov_d_rm (dreg, uae_p32(&dhex_pi));
+				    break;
+				  case 0x0b:
+				    fmov_d_rm (dreg, uae_p32(&dhex_l10_2));
+				    break;
+				  case 0x0c:
+				    fmov_d_rm (dreg, uae_p32(&dhex_exp_1));
+				    break;
+				  case 0x0d:
+				    fmov_d_rm (dreg, uae_p32(&dhex_l2_e));
+				    break;
+				  case 0x0e:
+				    fmov_d_rm (dreg, uae_p32(&dhex_l10_e));
+				    break;
+				  case 0x0f:
+            fmov_d_ri_0 (dreg);
+				    break;
+				  case 0x30:
+				    fmov_d_rm (dreg, uae_p32(&dhex_ln_2));
+				    break; 
+				  case 0x31:
+				    fmov_d_rm (dreg, uae_p32(&dhex_ln_10));
+				    break;
+				  case 0x32:
+            fmov_d_ri_1 (dreg);
+				    break;
+				  case 0x33:
+            fmov_d_ri_10 (dreg);
+				    break;
+				  case 0x34:
+				    fmov_d_ri_100 (dreg);
+				    break;
+				  case 0x35:
+				    fmov_l_ri (dreg, 10000);
+				    break;
+				  case 0x36:
+				    fmov_rm (dreg, uae_p32(&fp_1e8));
+				    break;
+				  case 0x37:
+				    fmov_d_rm (dreg, uae_p32(&dhex_1e16));
+				    break;
+				  case 0x38:
+				    fmov_d_rm (dreg, uae_p32(&dhex_1e32));
+				    break;
+				  case 0x39:
+				    fmov_d_rm (dreg, uae_p32(&dhex_1e64));
+				    break;
+				  case 0x3a:
+				    fmov_d_rm (dreg, uae_p32(&dhex_1e128));
+				    break;
+				  case 0x3b:
+				    fmov_d_rm (dreg, uae_p32(&dhex_1e256));
+				    break;
+				  default:
+				    FAIL (1);
+				    return;
+			  }
+			  fmov_rr (FP_RESULT, dreg);
+			  return;
+      }
+		  if (opmode & 0x20) /* two operands, so we need a scratch reg */
+			  sreg = FS1;
+		  else /* one operand only, thus we can load the argument into dreg */
+			  sreg = dreg;
+		  if ((prec = comp_fp_get (opcode, extra, sreg)) < 0) {
+			  FAIL (1);
+			  return;
+		  }
+		  if (!opmode) { /* FMOVE  <EA>,FPx */
+			  fmov_rr (FP_RESULT, dreg);
+			  return;
+		  }
+		  /* no break here for <EA> to dreg */
+		case 0: /* directly from sreg to dreg */
+		  if (!source) { /* no <EA> */
+			  dont_care_fflags ();
+			  sreg = (extra >> 10) & 7;
+		  }
+		  switch (opmode) {
+			  case 0x00: /* FMOVE */
+			    fmov_rr (dreg, sreg);
+			    break;
+			  case 0x01: /* FINT */
+			    frndint_rr (dreg, sreg);
+			    break;
+			  case 0x02: /* FSINH */
+			    ffunc_rr (sinh, dreg, sreg);
+			    break;
+			  case 0x03: /* FINTRZ */
+			    frndintz_rr (dreg, sreg);
+			    break;
+		    case 0x04: /* FSQRT */
+		      fsqrt_rr (dreg, sreg);
+		      break;
+		    case 0x06: /* FLOGNP1 */
+		      ffunc_rr (log1p, dreg, sreg);
+		      break;
+		    case 0x08: /* FETOXM1 */
+		      ffunc_rr (expm1, dreg, sreg);
+		      break;
+		    case 0x09: /* FTANH */
+		      ffunc_rr (tanh, dreg, sreg);
+		      break;
+		    case 0x0a: /* FATAN */
+		      ffunc_rr (atan, dreg, sreg);
+		      break;
+		    case 0x0c: /* FASIN */
+		      ffunc_rr (asin, dreg, sreg);
+		      break;
+		    case 0x0d: /* FATANH */
+		      ffunc_rr (atanh, dreg, sreg);
+		      break;
+		    case 0x0e: /* FSIN */
+		      ffunc_rr (sin, dreg, sreg);
+		      break;
+		    case 0x0f: /* FTAN */
+		      ffunc_rr (tan, dreg, sreg);
+		      break;
+		    case 0x10: /* FETOX */
+		      ffunc_rr (exp, dreg, sreg);
+		      break;
+		    case 0x11: /* FTWOTOX */
+		      fpowx_rr (2, dreg, sreg);
+		      break;
+		    case 0x12: /* FTENTOX */
+		      fpowx_rr (10, dreg, sreg);
+		      break;
+			  case 0x14: /* FLOGN */
+			    ffunc_rr (log, dreg, sreg);
+			    break;
+			  case 0x15: /* FLOG10 */
+			    ffunc_rr (log10, dreg, sreg);
+			    break;
+			  case 0x16: /* FLOG2 */
+			    ffunc_rr (log2, dreg, sreg);
+			    break;
+			  case 0x18: /* FABS */
+			    fabs_rr (dreg, sreg);
+			    break;
+			  case 0x19: /* FCOSH */
+			    ffunc_rr (cosh, dreg, sreg);
+			    break;
+			  case 0x1a: /* FNEG */
+			    fneg_rr (dreg, sreg);
+			    break;
+  			case 0x1c: /* FACOS */
+			    ffunc_rr (acos, dreg, sreg);
+			    break;
+			  case 0x1d: /* FCOS */
+			    ffunc_rr (cos, dreg, sreg);
+			    break;
+			  case 0x20: /* FDIV */
+			    fdiv_rr (dreg, sreg);
+			    break;
+			  case 0x21: /* FMOD */
+			    fmod_rr (dreg, sreg);
+			    break;
+			  case 0x22: /* FADD */
+			    fadd_rr (dreg, sreg);
+			    break;
+			  case 0x23: /* FMUL */
+			    fmul_rr (dreg, sreg);
+			    break;
+			  case 0x24: /* FSGLDIV */
+			    fsgldiv_rr (dreg, sreg);
+			    break;
+			  case 0x60: /* FSDIV */
+			    fdiv_rr (dreg, sreg);
+			    if (!currprefs.fpu_strict) /* faster, but less strict rounding */
+				    break;
+			    fcuts_r (dreg);
+			    break;
+			  case 0x25: /* FREM */
+			    frem1_rr (dreg, sreg);
+			    break;
+        case 0x27: /* FSGLMUL */
+			    fsglmul_rr (dreg, sreg);
+			    break;
+			  case 0x63: /* FSMUL */
+			    fmul_rr (dreg, sreg);
+			    if (!currprefs.fpu_strict) /* faster, but less strict rounding */
+				    break;
+			    fcuts_r (dreg);
+			    break;
+			  case 0x28: /* FSUB */
+			    fsub_rr (dreg, sreg);
+			    break;
+			  case 0x30: /* FSINCOS */
+			  case 0x31:
+			  case 0x32:
+			  case 0x33:
+			  case 0x34:
+			  case 0x35:
+			  case 0x36:
+			  case 0x37:
+			    if (dreg == (extra & 7))
+				    ffunc_rr (sin, dreg, sreg);
+			    else
+				    fsincos_rr (dreg, extra & 7, sreg);
+			    break;
+			  case 0x38: /* FCMP */
+			    fmov_rr (FP_RESULT, dreg);
+			    fsub_rr (FP_RESULT, sreg);
+			    return;
+			  case 0x3a: /* FTST */
+			    fmov_rr (FP_RESULT, sreg);
+			    return;
+		    case 0x40: /* FSMOVE */
+			    if (prec == 1 || !currprefs.fpu_strict) {
+				    if (sreg != dreg) /* no <EA> */
+					    fmov_rr (dreg, sreg);
+			    }
+			    else {
+  			    fmovs_rr (dreg, sreg);
+			    }
+          break;
+			  case 0x44: /* FDMOVE */
+          if (sreg != dreg) /* no <EA> */
+  			    fmov_rr (dreg, sreg);
+          break;
+			  case 0x41: /* FSSQRT */
+			    fsqrt_rr (dreg, sreg);
+			    if (!currprefs.fpu_strict) /* faster, but less strict rounding */
+				    break;
+			    fcuts_r (dreg);
+			    break;
+			  case 0x45: /* FDSQRT */
+          fsqrt_rr (dreg, sreg);
+          break;
+			  case 0x58: /* FSABS */
+			    fabs_rr (dreg, sreg);
+			    if (prec != 1 && currprefs.fpu_strict)
+			      fcuts_r (dreg);
+			    break;
+			  case 0x5a: /* FSNEG */
+			    fneg_rr (dreg, sreg);
+			    if (prec != 1 && currprefs.fpu_strict)
+			      fcuts_r (dreg);
+			    break;
+			  case 0x5c: /* FDABS */
+			    fabs_rr (dreg, sreg);
+			    break;
+			  case 0x5e: /* FDNEG */
+			    fneg_rr (dreg, sreg);
+			    break;
+			  case 0x62: /* FSADD */
+			    fadd_rr (dreg, sreg);
+			    if (!currprefs.fpu_strict) /* faster, but less strict rounding */
+				    break;
+			    fcuts_r (dreg);
+			    break;
+			  case 0x64: /* FDDIV */
+			    fdiv_rr (dreg, sreg);
+			    break;
+			  case 0x66: /* FDADD */
+			    fadd_rr (dreg, sreg);
+			    break;
+			  case 0x67: /* FDMUL */
+			    fmul_rr (dreg, sreg);
+			    break;
+			  case 0x68: /* FSSUB */
+			    fsub_rr (dreg, sreg);
+			    if (!currprefs.fpu_strict) /* faster, but less strict rounding */
+				    break;
+			    fcuts_r (dreg);
+			    break;
+			  case 0x6c: /* FDSUB */
+			    fsub_rr (dreg, sreg);
+			    break;
+		    default:
+			    FAIL (1);
+			    return;
+      }
+		  fmov_rr (FP_RESULT, dreg);
+		  return;
+		default:
+		  write_log (_T ("Unsupported JIT-FPU instruction: 0x%04x %04x\n"), opcode, extra);
+		  FAIL (1);
+		  return;
+  }
 }
 #endif
--- a/src/jit/compemu_midfunc_arm.cpp
+++ b/src/jit/compemu_midfunc_arm.cpp
@ -224,9 +224,6 @@ MIDFUNC(2,mov_l_rr,(W4 d, RR4 s))

 	live.nat[s].holds[live.nat[s].nholds] = d;
 	live.nat[s].nholds++;
-#if defined(DEBUG) && DEBUG > 1	
-	jit_log("Added %d to nreg %d(%d), now holds %d regs", d, s, live.state[d].realind, live.nat[s].nholds);
-#endif
 	unlock2(s);
 }
 MENDFUNC(2,mov_l_rr,(W4 d, RR4 s))
@ -244,6 +241,14 @@ MIDFUNC(2,mov_l_mr,(IMM d, RR4 s))
 }
 MENDFUNC(2,mov_l_mr,(IMM d, RR4 s))

+MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
+{
+	d = writereg(d, 4);
+	raw_mov_l_rm(d, s);
+	unlock2(d);
+}
+MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
+
 MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
 {
 	set_const(d, s);
@ -480,3 +485,435 @@ STATIC_INLINE void write_jmp_target(uae_u32* jmpaddr, cpuop_func* a) {
 STATIC_INLINE void emit_jmp_target(uae_u32 a) {
 	emit_long((uae_u32)a);
 }
+
+
+/*************************************************************************
+* FPU stuff                                                             *
+*************************************************************************/
+
+MIDFUNC(1,f_forget_about,(FW r))
+{
+	if (f_isinreg(r))
+		f_disassociate(r);
+	live.fate[r].status=UNDEF;
+}
+MENDFUNC(1,f_forget_about,(FW r))
+
+MIDFUNC(0,dont_care_fflags,(void))
+{
+	f_disassociate(FP_RESULT);
+}
+MENDFUNC(0,dont_care_fflags,(void))
+
+MIDFUNC(2,fmov_rr,(FW d, FR s))
+{
+	if (d == s) { /* How pointless! */
+		return;
+	}
+	s = f_readreg(s);
+	d = f_writereg(d);
+	raw_fmov_rr(d, s);
+	f_unlock(s);
+	f_unlock(d);
+}
+MENDFUNC(2,fmov_rr,(FW d, FR s))
+
+MIDFUNC(2,fmov_l_rr,(FW d, RR4 s))
+{
+	s = readreg(s, 4);
+  d = f_writereg(d);
+  raw_fmov_l_rr(d, s);
+	f_unlock(d);
+  unlock2(s);
+}
+MENDFUNC(2,fmov_l_rr,(FW d, RR4 s))
+
+MIDFUNC(2,fmov_s_rr,(FW d, RR4 s))
+{
+	s = readreg(s, 4);
+  d = f_writereg(d);
+  raw_fmov_s_rr(d, s);
+	f_unlock(d);
+  unlock2(s);
+}
+MENDFUNC(2,fmov_s_rr,(FW d, RR4 s))
+
+MIDFUNC(2,fmov_w_rr,(FW d, RR2 s))
+{
+	s = readreg(s, 2);
+  d = f_writereg(d);
+  raw_fmov_w_rr(d, s);
+	f_unlock(d);
+  unlock2(s);
+}
+MENDFUNC(2,fmov_w_rr,(FW d, RR2 s))
+
+MIDFUNC(2,fmov_b_rr,(FW d, RR1 s))
+{
+	s = readreg(s, 1);
+  d = f_writereg(d);
+  raw_fmov_b_rr(d, s);
+	f_unlock(d);
+  unlock2(s);
+}
+MENDFUNC(2,fmov_b_rr,(FW d, RR1 s))
+
+MIDFUNC(3,fmov_d_rrr,(FW d, RR4 s1, RR4 s2))
+{
+	s1 = readreg(s1, 4);
+	s2 = readreg(s2, 4);
+  d = f_writereg(d);
+  raw_fmov_d_rrr(d, s1, s2);
+	f_unlock(d);
+  unlock2(s2);
+  unlock2(s1);
+}
+MENDFUNC(3,fmov_d_rrr,(FW d, RR4 s1, RR4 s2))
+
+MIDFUNC(2,fmov_l_ri,(FW d, IMM i))
+{
+	switch(i) {
+		case 0:
+			fmov_d_ri_0(d);
+			break;
+		case 1:
+			fmov_d_ri_1(d);
+			break;
+		case 10:
+			fmov_d_ri_10(d);
+			break;
+		case 100:
+			fmov_d_ri_100(d);
+			break;
+		default:
+		  d = f_writereg(d);
+		  compemu_raw_mov_l_ri(REG_WORK1, i);
+		  raw_fmov_l_rr(d, REG_WORK1);
+			f_unlock(d);
+	} 
+}
+MENDFUNC(2,fmov_l_ri,(FW d, IMM i))
+
+MIDFUNC(2,fmov_s_ri,(FW d, IMM i))
+{
+  d = f_writereg(d);
+  compemu_raw_mov_l_ri(REG_WORK1, i);
+  raw_fmov_s_rr(d, REG_WORK1);
+	f_unlock(d);
+}
+MENDFUNC(2,fmov_s_ri,(FW d, IMM i))
+
+MIDFUNC(2,fmov_to_l_rr,(W4 d, FR s))
+{
+	s = f_readreg(s);
+  d = writereg(d, 4);
+  raw_fmov_to_l_rr(d, s);
+	unlock2(d);
+  f_unlock(s);
+}
+MENDFUNC(2,fmov_to_l_rr,(W4 d, FR s))
+
+MIDFUNC(2,fmov_to_s_rr,(W4 d, FR s))
+{
+	s = f_readreg(s);
+  d = writereg(d, 4);
+  raw_fmov_to_s_rr(d, s);
+	unlock2(d);
+  f_unlock(s);
+}
+MENDFUNC(2,fmov_to_s_rr,(W4 d, FR s))
+
+MIDFUNC(2,fmov_to_w_rr,(W4 d, FR s))
+{
+	s = f_readreg(s);
+  d = rmw(d, 2, 4);
+  raw_fmov_to_w_rr(d, s);
+	unlock2(d);
+  f_unlock(s);
+}
+MENDFUNC(2,fmov_to_w_rr,(W4 d, FR s))
+
+MIDFUNC(2,fmov_to_b_rr,(W4 d, FR s))
+{
+	s = f_readreg(s);
+  d = rmw(d, 1, 4);
+  raw_fmov_to_b_rr(d, s);
+	unlock2(d);
+  f_unlock(s);
+}
+MENDFUNC(2,fmov_to_b_rr,(W4 d, FR s))
+
+MIDFUNC(1,fmov_d_ri_0,(FW r))
+{
+	r = f_writereg(r);
+	raw_fmov_d_ri_0(r);
+	f_unlock(r);
+}
+MENDFUNC(1,fmov_d_ri_0,(FW r))
+
+MIDFUNC(1,fmov_d_ri_1,(FW r))
+{
+	r = f_writereg(r);
+	raw_fmov_d_ri_1(r);
+	f_unlock(r);
+}
+MENDFUNC(1,fmov_d_ri_1,(FW r))
+
+MIDFUNC(1,fmov_d_ri_10,(FW r))
+{
+	r = f_writereg(r);
+	raw_fmov_d_ri_10(r);
+	f_unlock(r);
+}
+MENDFUNC(1,fmov_d_ri_10,(FW r))
+
+MIDFUNC(1,fmov_d_ri_100,(FW r))
+{
+	r = f_writereg(r);
+	raw_fmov_d_ri_100(r);
+	f_unlock(r);
+}
+MENDFUNC(1,fmov_d_ri_100,(FW r))
+
+MIDFUNC(2,fmov_d_rm,(FW r, MEMR m))
+{
+	r = f_writereg(r);
+	raw_fmov_d_rm(r, m);
+	f_unlock(r);
+}
+MENDFUNC(2,fmov_d_rm,(FW r, MEMR m))
+
+MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
+{
+	r = f_writereg(r);
+	raw_fmovs_rm(r, m);
+	f_unlock(r);
+}
+MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
+
+MIDFUNC(2,fmov_rm,(FW r, MEMR m))
+{
+	r = f_writereg(r);
+	raw_fmov_d_rm(r, m);
+	f_unlock(r);
+}
+MENDFUNC(2,fmov_rm,(FW r, MEMR m))
+
+MIDFUNC(3,fmov_to_d_rrr,(W4 d1, W4 d2, FR s))
+{
+	s = f_readreg(s);
+  d1 = writereg(d1, 4);
+  d2 = writereg(d2, 4);
+  raw_fmov_to_d_rrr(d1, d2, s);
+	unlock2(d2);
+	unlock2(d1);
+  f_unlock(s);
+}
+MENDFUNC(3,fmov_to_d_rrr,(W4 d1, W4 d2, FR s))
+
+MIDFUNC(2,fsqrt_rr,(FW d, FR s))
+{
+	s = f_readreg(s);
+	d = f_writereg(d);
+	raw_fsqrt_rr(d, s);
+	f_unlock(s);
+	f_unlock(d);
+}
+MENDFUNC(2,fsqrt_rr,(FW d, FR s))
+
+MIDFUNC(2,fabs_rr,(FW d, FR s))
+{
+	s = f_readreg(s);
+	d = f_writereg(d);
+	raw_fabs_rr(d, s);
+	f_unlock(s);
+	f_unlock(d);
+}
+MENDFUNC(2,fabs_rr,(FW d, FR s))
+
+MIDFUNC(2,fneg_rr,(FW d, FR s))
+{
+	s = f_readreg(s);
+	d = f_writereg(d);
+	raw_fneg_rr(d, s);
+	f_unlock(s);
+	f_unlock(d);
+}
+MENDFUNC(2,fneg_rr,(FW d, FR s))
+
+MIDFUNC(2,fdiv_rr,(FRW d, FR s))
+{
+	s = f_readreg(s);
+	d = f_rmw(d);
+	raw_fdiv_rr(d, s);
+	f_unlock(s);
+	f_unlock(d);
+}
+MENDFUNC(2,fdiv_rr,(FRW d, FR s))
+
+MIDFUNC(2,fadd_rr,(FRW d, FR s))
+{
+	s = f_readreg(s);
+	d = f_rmw(d);
+	raw_fadd_rr(d, s);
+	f_unlock(s);
+	f_unlock(d);
+}
+MENDFUNC(2,fadd_rr,(FRW d, FR s))
+
+MIDFUNC(2,fmul_rr,(FRW d, FR s))
+{
+	s = f_readreg(s);
+	d = f_rmw(d);
+	raw_fmul_rr(d, s);
+	f_unlock(s);
+	f_unlock(d);
+}
+MENDFUNC(2,fmul_rr,(FRW d, FR s))
+
+MIDFUNC(2,fsub_rr,(FRW d, FR s))
+{
+	s = f_readreg(s);
+	d = f_rmw(d);
+	raw_fsub_rr(d, s);
+	f_unlock(s);
+	f_unlock(d);
+}
+MENDFUNC(2,fsub_rr,(FRW d, FR s))
+
+MIDFUNC(2,frndint_rr,(FW d, FR s))
+{
+	s = f_readreg(s);
+	d = f_writereg(d);
+	raw_frndint_rr(d, s);
+	f_unlock(s);
+	f_unlock(d);
+}
+MENDFUNC(2,frndint_rr,(FW d, FR s))
+
+MIDFUNC(2,frndintz_rr,(FW d, FR s))
+{
+	s = f_readreg(s);
+	d = f_writereg(d);
+	raw_frndintz_rr(d, s);
+	f_unlock(s);
+	f_unlock(d);
+}
+MENDFUNC(2,frndintz_rr,(FW d, FR s))
+
+MIDFUNC(2,fmod_rr,(FRW d, FR s))
+{
+	s = f_readreg(s);
+	d = f_rmw(d);
+	raw_fmod_rr(d, s);
+	f_unlock(s);
+	f_unlock(d);
+}
+MENDFUNC(2,fmod_rr,(FRW d, FR s))
+
+MIDFUNC(2,fsgldiv_rr,(FRW d, FR s))
+{
+	s = f_readreg(s);
+	d = f_rmw(d);
+	raw_fsgldiv_rr(d, s);
+	f_unlock(s);
+	f_unlock(d);
+}
+MENDFUNC(2,fsgldiv_rr,(FRW d, FR s))
+
+MIDFUNC(1,fcuts_r,(FRW r))
+{
+	r = f_rmw(r);
+	raw_fcuts_r(r);
+	f_unlock(r);
+}
+MENDFUNC(1,fcuts_r,(FRW r))
+
+MIDFUNC(2,frem1_rr,(FRW d, FR s))
+{
+	s = f_readreg(s);
+	d = f_rmw(d);
+	raw_frem1_rr(d, s);
+	f_unlock(s);
+	f_unlock(d);
+}
+MENDFUNC(2,frem1_rr,(FRW d, FR s))
+
+MIDFUNC(2,fsglmul_rr,(FRW d, FR s))
+{
+	s = f_readreg(s);
+	d = f_rmw(d);
+	raw_fsglmul_rr(d, s);
+	f_unlock(s);
+	f_unlock(d);
+}
+MENDFUNC(2,fsglmul_rr,(FRW d, FR s))
+
+MIDFUNC(2,fmovs_rr,(FW d, FR s))
+{
+	s = f_readreg(s);
+	d = f_writereg(d);
+	raw_fmovs_rr(d, s);
+	f_unlock(s);
+	f_unlock(d);
+}
+MENDFUNC(2,fmovs_rr,(FW d, FR s))
+
+MIDFUNC(3,ffunc_rr,(double (*func)(double), FW d, FR s))
+{
+  clobber_flags();
+  prepare_for_call_1();
+  prepare_for_call_2();
+
+	s = f_readreg(s);
+	d = f_writereg(d);
+
+	raw_ffunc_rr(func, d, s);
+
+	f_unlock(s);
+	f_unlock(d);
+}
+MENDFUNC(3,ffunc_rr,(double (*func)(double), FW d, FR s))
+
+MIDFUNC(3,fsincos_rr,(FW d, FW c, FR s))
+{
+  clobber_flags();
+  prepare_for_call_1();
+  prepare_for_call_2();
+
+	s = f_readreg(s);  /* s for source */
+	d = f_writereg(d); /* d for sine   */
+	c = f_writereg(c); /* c for cosine */
+
+	raw_ffunc_rr(cos, c, s);
+	raw_ffunc_rr(sin, d, s);
+
+	f_unlock(s);
+	f_unlock(d);
+	f_unlock(c);
+}
+MENDFUNC(3,fsincos_rr,(FW d, FW c, FR s))
+
+MIDFUNC(3,fpowx_rr,(uae_u32 x, FW d, FR s))
+{
+  clobber_flags();
+  prepare_for_call_1();
+  prepare_for_call_2();
+
+	s = f_readreg(s);
+	d = f_writereg(d);
+
+	raw_fpowx_rr(x, d, s);
+
+	f_unlock(s);
+	f_unlock(d);
+
+}
+MENDFUNC(3,fpowx_rr,(uae_u32 x, FW d, FR s))
+
+MIDFUNC(1,fflags_into_flags,())
+{
+	clobber_flags();
+	fflags_into_flags_internal();
+}
+MENDFUNC(1,fflags_into_flags,())
--- a/src/jit/compemu_midfunc_arm.h
+++ b/src/jit/compemu_midfunc_arm.h
@ -50,6 +50,7 @@ DECLARE_MIDFUNC(lea_l_brr_indexed(W4 d, RR4 s, RR4 index, IMM factor, IMM offset
 DECLARE_MIDFUNC(lea_l_rr_indexed(W4 d, RR4 s, RR4 index, IMM factor));
 DECLARE_MIDFUNC(mov_l_rr(W4 d, RR4 s));
 DECLARE_MIDFUNC(mov_l_mr(IMM d, RR4 s));
+DECLARE_MIDFUNC(mov_l_rm(W4 d, IMM s));
 DECLARE_MIDFUNC(mov_b_rm(W1 d, IMM s));
 DECLARE_MIDFUNC(mov_l_ri(W4 d, IMM s));
 DECLARE_MIDFUNC(mov_w_ri(W2 d, IMM s));
@ -66,3 +67,44 @@ DECLARE_MIDFUNC(make_flags_live(void));
 DECLARE_MIDFUNC(forget_about(W4 r));

 DECLARE_MIDFUNC(f_forget_about(FW r));
+DECLARE_MIDFUNC(dont_care_fflags(void));
+DECLARE_MIDFUNC(fmov_rr(FW d, FR s));
+
+DECLARE_MIDFUNC(fmov_l_rr(FW d, RR4 s));
+DECLARE_MIDFUNC(fmov_s_rr(FW d, RR4 s));
+DECLARE_MIDFUNC(fmov_w_rr(FW d, RR2 s));
+DECLARE_MIDFUNC(fmov_b_rr(FW d, RR1 s));
+DECLARE_MIDFUNC(fmov_d_rrr(FW d, RR4 s1, RR4 s2));
+DECLARE_MIDFUNC(fmov_l_ri(FW d, IMM i));
+DECLARE_MIDFUNC(fmov_s_ri(FW d, IMM i));
+DECLARE_MIDFUNC(fmov_to_l_rr(W4 d, FR s));
+DECLARE_MIDFUNC(fmov_to_s_rr(W4 d, FR s));
+DECLARE_MIDFUNC(fmov_to_w_rr(W4 d, FR s));
+DECLARE_MIDFUNC(fmov_to_b_rr(W4 d, FR s));
+DECLARE_MIDFUNC(fmov_d_ri_0(FW d));
+DECLARE_MIDFUNC(fmov_d_ri_1(FW d));
+DECLARE_MIDFUNC(fmov_d_ri_10(FW d));
+DECLARE_MIDFUNC(fmov_d_ri_100(FW d));
+DECLARE_MIDFUNC(fmov_d_rm(FW r, MEMR m));
+DECLARE_MIDFUNC(fmovs_rm(FW r, MEMR m));
+DECLARE_MIDFUNC(fmov_rm(FW r, MEMR m));
+DECLARE_MIDFUNC(fmov_to_d_rrr(W4 d1, W4 d2, FR s));
+DECLARE_MIDFUNC(fsqrt_rr(FW d, FR s));
+DECLARE_MIDFUNC(fabs_rr(FW d, FR s));
+DECLARE_MIDFUNC(fneg_rr(FW d, FR s));
+DECLARE_MIDFUNC(fdiv_rr(FRW d, FR s));
+DECLARE_MIDFUNC(fadd_rr(FRW d, FR s));
+DECLARE_MIDFUNC(fmul_rr(FRW d, FR s));
+DECLARE_MIDFUNC(fsub_rr(FRW d, FR s));
+DECLARE_MIDFUNC(frndint_rr(FW d, FR s));
+DECLARE_MIDFUNC(frndintz_rr(FW d, FR s));
+DECLARE_MIDFUNC(fmod_rr(FRW d, FR s));
+DECLARE_MIDFUNC(fsgldiv_rr(FRW d, FR s));
+DECLARE_MIDFUNC(fcuts_r(FRW r));
+DECLARE_MIDFUNC(frem1_rr(FRW d, FR s));
+DECLARE_MIDFUNC(fsglmul_rr(FRW d, FR s));
+DECLARE_MIDFUNC(fmovs_rr(FW d, FR s));
+DECLARE_MIDFUNC(ffunc_rr(double (*func)(double), FW d, FR s));
+DECLARE_MIDFUNC(fsincos_rr(FW d, FW c, FR s));
+DECLARE_MIDFUNC(fpowx_rr(uae_u32 x, FW d, FR s));
+DECLARE_MIDFUNC(fflags_into_flags());
--- a/src/jit/compemu_support.cpp
+++ b/src/jit/compemu_support.cpp
@ -32,6 +32,8 @@
 #define writemem_special writemem
 #define readmem_special  readmem

+#include <math.h>
+
 #include "sysconfig.h"
 #include "sysdeps.h"

@ -108,7 +110,11 @@ const int	follow_const_jumps = 0;

 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
 static uae_u32 current_cache_size	= 0;		// Cache grows upwards: how much has been consumed already
+#ifdef USE_JIT_FPU
+#define avoid_fpu (!currprefs.compfpu)
+#else
 #define avoid_fpu (true)
+#endif
 static const int align_loops = 0;	      // Align the start of loops
 static const int align_jumps = 0;	      // Align the start of jumps
 static int optcount[10]		= {
@ -646,13 +652,15 @@ bool check_prefs_changed_comp(bool checkonly)
 {
 	bool changed = 0;

-	if (currprefs.fpu_strict != changed_prefs.fpu_strict ||
+	if (currprefs.compfpu != changed_prefs.compfpu ||
+		currprefs.fpu_strict != changed_prefs.fpu_strict ||
 		currprefs.cachesize != changed_prefs.cachesize)
 		changed = 1;

 	if (checkonly)
 		return changed;

+	currprefs.compfpu = changed_prefs.compfpu;
 	currprefs.fpu_strict = changed_prefs.fpu_strict;

 	if (currprefs.cachesize != changed_prefs.cachesize) {
@ -955,6 +963,7 @@ static  void evict(int r)
  if (live.nat[rr].nholds != live.state[r].realind) { /* Was not last */
 	  int topreg = live.nat[rr].holds[live.nat[rr].nholds];
 	  int thisind = live.state[r].realind;
+	
 	  live.nat[rr].holds[thisind] = topreg;
 	  live.state[topreg].realind = thisind;
  }
@ -1343,6 +1352,142 @@ static int rmw(int r, int wsize, int rsize)
  return rmw_general(r, wsize, rsize);
 }

+/********************************************************************
+ * FPU register status handling. EMIT TIME!                         *
+ ********************************************************************/
+
+STATIC_INLINE void f_tomem_drop(int r)
+{
+	if (live.fate[r].status == DIRTY) {
+		compemu_raw_fmov_mr_drop((uintptr)live.fate[r].mem, live.fate[r].realreg);
+		live.fate[r].status = INMEM;
+	}
+}
+
+
+STATIC_INLINE int f_isinreg(int r)
+{
+	return live.fate[r].status == CLEAN || live.fate[r].status == DIRTY;
+}
+
+STATIC_INLINE void f_evict(int r)
+{
+	int rr;
+
+	if (!f_isinreg(r))
+		return;
+	rr = live.fate[r].realreg;
+	f_tomem_drop(r);
+
+	live.fat[rr].nholds = 0;
+	live.fate[r].status = INMEM;
+	live.fate[r].realreg = -1;
+}
+
+STATIC_INLINE void f_free_nreg(int r)
+{
+	int vr;
+	vr = live.fat[r].holds;
+	f_evict(vr);
+}
+
+
+/* Use with care! */
+STATIC_INLINE void f_isclean(int r)
+{
+	if (!f_isinreg(r))
+		return;
+	live.fate[r].status = CLEAN;
+}
+
+STATIC_INLINE void f_disassociate(int r)
+{
+	f_isclean(r);
+	f_evict(r);
+}
+
+
+
+static int f_alloc_reg(int r, int willclobber)
+{
+	int bestreg;
+
+	if(r < 8)
+	  bestreg = r + 8; // map real Amiga reg to ARM VFP reg 8-15
+	else
+	  bestreg = r - 8; // map FP_RESULT, FS1, FS2 or FS3 to ARM VFP reg 0-3
+
+	if (!willclobber) {
+		if (live.fate[r].status == INMEM) {
+			compemu_raw_fmov_rm(bestreg, (uintptr)live.fate[r].mem);
+			live.fate[r].status=CLEAN;
+		}
+	}
+	else {
+		live.fate[r].status = DIRTY;
+	}
+	live.fate[r].realreg=bestreg;
+	live.fat[bestreg].holds = r;
+	live.fat[bestreg].nholds = 1;
+
+	return bestreg;
+}
+
+STATIC_INLINE void f_unlock(int r)
+{
+}
+
+STATIC_INLINE int f_readreg(int r)
+{
+	int answer=-1;
+
+	if (f_isinreg(r)) {
+		answer = live.fate[r].realreg;
+	}
+	/* either the value was in memory to start with, or it was evicted and
+	is in memory now */
+	if (answer < 0)
+		answer = f_alloc_reg(r,0);
+
+	return answer;
+}
+
+STATIC_INLINE int f_writereg(int r)
+{
+	int answer = -1;
+
+	if (f_isinreg(r)) {
+		answer = live.fate[r].realreg;
+	}
+	if (answer < 0) {
+		answer = f_alloc_reg(r,1);
+	}
+	live.fate[r].status = DIRTY;
+	return answer;
+}
+
+STATIC_INLINE int f_rmw(int r)
+{
+	int n;
+
+	if (f_isinreg(r)) {
+		n = live.fate[r].realreg;
+	}
+	else
+		n = f_alloc_reg(r,0);
+	live.fate[r].status = DIRTY;
+	return n;
+}
+
+static void fflags_into_flags_internal(void)
+{
+	int r;
+
+	r = f_readreg(FP_RESULT);
+  raw_fflags_into_flags(r);
+	f_unlock(r);
+	live_flags();
+}


 #if defined(CPU_arm)
@ -1379,6 +1524,7 @@ void sync_m68k_pc(void)

 struct scratch_t {
  uae_u32 regs[VREGS];
+	fpu_register	fregs[VFREGS];
 };

 static scratch_t scratch;
@ -1479,6 +1625,12 @@ void init_comp(void)
 	  set_status(i, UNDEF);
  }

+	for (i=0;i<VFREGS;i++) {
+		live.fate[i].status = UNDEF;
+		live.fate[i].realreg = -1;
+		live.fate[i].needflush = NF_SCRATCH;
+	}
+
  for (i=0; i<VREGS; i++) {
  	if (i < 16) { /* First 16 registers map to 68k registers */
 	    live.state[i].mem = &regs.regs[i];
@ -1502,6 +1654,22 @@ void init_comp(void)

  set_status(NEXT_HANDLER, UNDEF);

+	for (i = 0; i < VFREGS; i++) {
+		if (i < 8) { /* First 8 registers map to 68k FPU registers */
+			live.fate[i].mem = (uae_u32*)(&regs.fp[i].fp);
+			live.fate[i].needflush = NF_TOMEM;
+			live.fate[i].status = INMEM;
+		}
+		else if (i == FP_RESULT) {
+			live.fate[i].mem = (uae_u32*)(&regs.fp_result.fp);
+			live.fate[i].needflush = NF_TOMEM;
+			live.fate[i].status = INMEM;
+		}
+		else
+			live.fate[i].mem = (uae_u32*)(&scratch.fregs[i]);
+	}
+
+
  for (i=0; i<N_REGS; i++) {
 	  live.nat[i].touched = 0;
 	  live.nat[i].nholds = 0;
@ -1512,6 +1680,10 @@ void init_comp(void)
 	  }
  }

+	for (i=0;i<N_FREGS;i++) {
+		live.fat[i].nholds = 0;
+	}
+
  touchcnt = 1;
  m68k_pc_offset = 0;
  live.flags_in_flags = TRASH;
@ -1528,6 +1700,12 @@ void flush(int save_regs)
  sync_m68k_pc(); /* mid level */

  if (save_regs) {
+		for (i = 0; i < VFREGS; i++) {
+			if (live.fate[i].needflush == NF_SCRATCH ||
+				live.fate[i].status == CLEAN) {
+					f_disassociate(i);
+			}
+		}
  	for (i=0; i<=FLAGTMP; i++) {
  		switch(live.state[i].status) {
  		  case INMEM:
@ -1548,6 +1726,11 @@ void flush(int save_regs)
  	      break;
 	    }
 	  }
+		for (i = 0; i <= FP_RESULT; i++) {
+			if (live.fate[i].status == DIRTY) {
+				f_evict(i);
+			}
+		}
  }
 }

@ -1565,6 +1748,9 @@ void freescratch(void)

  for (i = S1; i < VREGS; i++)
    forget_about(i);
+
+	for (i = FS1; i <= FS3; i++) // only FS1-FS3
+		f_forget_about(i);
 }

 /********************************************************************
@ -1598,6 +1784,9 @@ static void flush_all(void)
    		tomem(i);
 	    }
  	}
+		for (i = FP_RESULT; i <= FS3; i++) // only FP_RESULT and FS1-FS3, FP0-FP7 are call save
+			if (f_isinreg(i))
+				f_evict(i);
 }

 /* Make sure all registers that will get clobbered by a call are
@ -1619,6 +1808,10 @@ static void prepare_for_call_2(void)
 	    free_nreg(i);
  }

+	for (i = 0; i < 4; i++) // only FP_RESULT and FS1-FS3, FP0-FP7 are call save
+		if (live.fat[i].nholds > 0)
+			f_free_nreg(i);
+
  live.flags_in_flags = TRASH;  /* Note: We assume we already rescued the
 			         flags at the very start of the call_r
 			         functions! */
@ -2038,7 +2231,6 @@ STATIC_INLINE int block_check_checksum(blockinfo* bi)
 	     means we have to move it into the needs-to-be-flushed list */
 	  bi->handler_to_use = bi->handler;
 	  set_dhtu(bi, bi->direct_handler);
-
 	  bi->status = BI_CHECKING;
 	  isgood = called_check_checksum(bi) != 0;
  }
@ -2694,7 +2886,7 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
      if (next_pc_p) { /* A branch was registered */
  		  uintptr t1 = next_pc_p;
  		  uintptr t2 = taken_pc_p;
-  		  int cc = branch_cc;
+  		  int cc = branch_cc; // this is native (ARM) condition code
  
  		  uae_u32* branchadd;
  		  uae_u32* tba;
@ -2707,7 +2899,10 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
  		       the 68k branch is taken. */
  		    t1 = taken_pc_p;
  		    t2 = next_pc_p;
-  		    cc = branch_cc^1;
+  		    if(cc < NATIVE_CC_AL)
+  		      cc = branch_cc^1;
+  		    else if(cc > NATIVE_CC_AL)
+  		    	cc = 0x10 | (branch_cc ^ 0xf);
    		}
  
    		tmp = live; /* ouch! This is big... */
--- a/src/jit/compstbl.cpp
+++ b/src/jit/compstbl.cpp
@ -1803,32 +1803,32 @@ extern const struct comptbl op_smalltbl_0_comp_ff[] = {
 { NULL, 0x00000001, 61488 }, /* MMUOP030 */
 { NULL, 0x00000001, 61496 }, /* MMUOP030 */
 { NULL, 0x00000001, 61497 }, /* MMUOP030 */
-{ NULL, 0x00000022, 61952 }, /* FPP */
-{ NULL, 0x00000022, 61960 }, /* FPP */
-{ NULL, 0x00000022, 61968 }, /* FPP */
-{ NULL, 0x00000022, 61976 }, /* FPP */
-{ NULL, 0x00000022, 61984 }, /* FPP */
-{ NULL, 0x00000022, 61992 }, /* FPP */
-{ NULL, 0x00000022, 62000 }, /* FPP */
-{ NULL, 0x00000022, 62008 }, /* FPP */
-{ NULL, 0x00000022, 62009 }, /* FPP */
-{ NULL, 0x00000022, 62010 }, /* FPP */
-{ NULL, 0x00000022, 62011 }, /* FPP */
-{ NULL, 0x00000022, 62012 }, /* FPP */
-{ NULL, 0x00000006, 62016 }, /* FScc */
+{ op_f200_0_comp_ff, 0x00000022, 61952 }, /* FPP */
+{ op_f208_0_comp_ff, 0x00000022, 61960 }, /* FPP */
+{ op_f210_0_comp_ff, 0x00000022, 61968 }, /* FPP */
+{ op_f218_0_comp_ff, 0x00000022, 61976 }, /* FPP */
+{ op_f220_0_comp_ff, 0x00000022, 61984 }, /* FPP */
+{ op_f228_0_comp_ff, 0x00000022, 61992 }, /* FPP */
+{ op_f230_0_comp_ff, 0x00000022, 62000 }, /* FPP */
+{ op_f238_0_comp_ff, 0x00000022, 62008 }, /* FPP */
+{ op_f239_0_comp_ff, 0x00000022, 62009 }, /* FPP */
+{ op_f23a_0_comp_ff, 0x00000022, 62010 }, /* FPP */
+{ op_f23b_0_comp_ff, 0x00000022, 62011 }, /* FPP */
+{ op_f23c_0_comp_ff, 0x00000022, 62012 }, /* FPP */
+{ op_f240_0_comp_ff, 0x00000006, 62016 }, /* FScc */
 { NULL, 0x00000021, 62024 }, /* FDBcc */
-{ NULL, 0x00000006, 62032 }, /* FScc */
-{ NULL, 0x00000006, 62040 }, /* FScc */
-{ NULL, 0x00000006, 62048 }, /* FScc */
-{ NULL, 0x00000006, 62056 }, /* FScc */
-{ NULL, 0x00000006, 62064 }, /* FScc */
-{ NULL, 0x00000006, 62072 }, /* FScc */
-{ NULL, 0x00000006, 62073 }, /* FScc */
+{ op_f250_0_comp_ff, 0x00000006, 62032 }, /* FScc */
+{ op_f258_0_comp_ff, 0x00000006, 62040 }, /* FScc */
+{ op_f260_0_comp_ff, 0x00000006, 62048 }, /* FScc */
+{ op_f268_0_comp_ff, 0x00000006, 62056 }, /* FScc */
+{ op_f270_0_comp_ff, 0x00000006, 62064 }, /* FScc */
+{ op_f278_0_comp_ff, 0x00000006, 62072 }, /* FScc */
+{ op_f279_0_comp_ff, 0x00000006, 62073 }, /* FScc */
 { NULL, 0x00000021, 62074 }, /* FTRAPcc */
 { NULL, 0x00000021, 62075 }, /* FTRAPcc */
 { NULL, 0x00000021, 62076 }, /* FTRAPcc */
-{ NULL, 0x00000005, 62080 }, /* FBcc */
-{ NULL, 0x00000005, 62144 }, /* FBcc */
+{ op_f280_0_comp_ff, 0x00000005, 62080 }, /* FBcc */
+{ op_f2c0_0_comp_ff, 0x00000005, 62144 }, /* FBcc */
 { NULL, 0x00000020, 62224 }, /* FSAVE */
 { NULL, 0x00000020, 62240 }, /* FSAVE */
 { NULL, 0x00000020, 62248 }, /* FSAVE */
@ -3675,32 +3675,32 @@ extern const struct comptbl op_smalltbl_0_comp_nf[] = {
 { NULL, 0x00000001, 61488 }, /* MMUOP030 */
 { NULL, 0x00000001, 61496 }, /* MMUOP030 */
 { NULL, 0x00000001, 61497 }, /* MMUOP030 */
-{ NULL, 0x00000022, 61952 }, /* FPP */
-{ NULL, 0x00000022, 61960 }, /* FPP */
-{ NULL, 0x00000022, 61968 }, /* FPP */
-{ NULL, 0x00000022, 61976 }, /* FPP */
-{ NULL, 0x00000022, 61984 }, /* FPP */
-{ NULL, 0x00000022, 61992 }, /* FPP */
-{ NULL, 0x00000022, 62000 }, /* FPP */
-{ NULL, 0x00000022, 62008 }, /* FPP */
-{ NULL, 0x00000022, 62009 }, /* FPP */
-{ NULL, 0x00000022, 62010 }, /* FPP */
-{ NULL, 0x00000022, 62011 }, /* FPP */
-{ NULL, 0x00000022, 62012 }, /* FPP */
-{ NULL, 0x00000006, 62016 }, /* FScc */
+{ op_f200_0_comp_nf, 0x00000022, 61952 }, /* FPP */
+{ op_f208_0_comp_nf, 0x00000022, 61960 }, /* FPP */
+{ op_f210_0_comp_nf, 0x00000022, 61968 }, /* FPP */
+{ op_f218_0_comp_nf, 0x00000022, 61976 }, /* FPP */
+{ op_f220_0_comp_nf, 0x00000022, 61984 }, /* FPP */
+{ op_f228_0_comp_nf, 0x00000022, 61992 }, /* FPP */
+{ op_f230_0_comp_nf, 0x00000022, 62000 }, /* FPP */
+{ op_f238_0_comp_nf, 0x00000022, 62008 }, /* FPP */
+{ op_f239_0_comp_nf, 0x00000022, 62009 }, /* FPP */
+{ op_f23a_0_comp_nf, 0x00000022, 62010 }, /* FPP */
+{ op_f23b_0_comp_nf, 0x00000022, 62011 }, /* FPP */
+{ op_f23c_0_comp_nf, 0x00000022, 62012 }, /* FPP */
+{ op_f240_0_comp_nf, 0x00000006, 62016 }, /* FScc */
 { NULL, 0x00000021, 62024 }, /* FDBcc */
-{ NULL, 0x00000006, 62032 }, /* FScc */
-{ NULL, 0x00000006, 62040 }, /* FScc */
-{ NULL, 0x00000006, 62048 }, /* FScc */
-{ NULL, 0x00000006, 62056 }, /* FScc */
-{ NULL, 0x00000006, 62064 }, /* FScc */
-{ NULL, 0x00000006, 62072 }, /* FScc */
-{ NULL, 0x00000006, 62073 }, /* FScc */
+{ op_f250_0_comp_nf, 0x00000006, 62032 }, /* FScc */
+{ op_f258_0_comp_nf, 0x00000006, 62040 }, /* FScc */
+{ op_f260_0_comp_nf, 0x00000006, 62048 }, /* FScc */
+{ op_f268_0_comp_nf, 0x00000006, 62056 }, /* FScc */
+{ op_f270_0_comp_nf, 0x00000006, 62064 }, /* FScc */
+{ op_f278_0_comp_nf, 0x00000006, 62072 }, /* FScc */
+{ op_f279_0_comp_nf, 0x00000006, 62073 }, /* FScc */
 { NULL, 0x00000021, 62074 }, /* FTRAPcc */
 { NULL, 0x00000021, 62075 }, /* FTRAPcc */
 { NULL, 0x00000021, 62076 }, /* FTRAPcc */
-{ NULL, 0x00000005, 62080 }, /* FBcc */
-{ NULL, 0x00000005, 62144 }, /* FBcc */
+{ op_f280_0_comp_nf, 0x00000005, 62080 }, /* FBcc */
+{ op_f2c0_0_comp_nf, 0x00000005, 62144 }, /* FBcc */
 { NULL, 0x00000020, 62224 }, /* FSAVE */
 { NULL, 0x00000020, 62240 }, /* FSAVE */
 { NULL, 0x00000020, 62248 }, /* FSAVE */
--- a/src/jit/comptbl.h
+++ b/src/jit/comptbl.h
@ -1446,6 +1446,28 @@ extern compop_func op_e7e8_0_comp_ff;
 extern compop_func op_e7f0_0_comp_ff;
 extern compop_func op_e7f8_0_comp_ff;
 extern compop_func op_e7f9_0_comp_ff;
+extern compop_func op_f200_0_comp_ff;
+extern compop_func op_f208_0_comp_ff;
+extern compop_func op_f210_0_comp_ff;
+extern compop_func op_f218_0_comp_ff;
+extern compop_func op_f220_0_comp_ff;
+extern compop_func op_f228_0_comp_ff;
+extern compop_func op_f230_0_comp_ff;
+extern compop_func op_f238_0_comp_ff;
+extern compop_func op_f239_0_comp_ff;
+extern compop_func op_f23a_0_comp_ff;
+extern compop_func op_f23b_0_comp_ff;
+extern compop_func op_f23c_0_comp_ff;
+extern compop_func op_f240_0_comp_ff;
+extern compop_func op_f250_0_comp_ff;
+extern compop_func op_f258_0_comp_ff;
+extern compop_func op_f260_0_comp_ff;
+extern compop_func op_f268_0_comp_ff;
+extern compop_func op_f270_0_comp_ff;
+extern compop_func op_f278_0_comp_ff;
+extern compop_func op_f279_0_comp_ff;
+extern compop_func op_f280_0_comp_ff;
+extern compop_func op_f2c0_0_comp_ff;
 extern compop_func op_f600_0_comp_ff;
 extern compop_func op_f608_0_comp_ff;
 extern compop_func op_f610_0_comp_ff;
@ -2893,6 +2915,28 @@ extern compop_func op_e7e8_0_comp_nf;
 extern compop_func op_e7f0_0_comp_nf;
 extern compop_func op_e7f8_0_comp_nf;
 extern compop_func op_e7f9_0_comp_nf;
+extern compop_func op_f200_0_comp_nf;
+extern compop_func op_f208_0_comp_nf;
+extern compop_func op_f210_0_comp_nf;
+extern compop_func op_f218_0_comp_nf;
+extern compop_func op_f220_0_comp_nf;
+extern compop_func op_f228_0_comp_nf;
+extern compop_func op_f230_0_comp_nf;
+extern compop_func op_f238_0_comp_nf;
+extern compop_func op_f239_0_comp_nf;
+extern compop_func op_f23a_0_comp_nf;
+extern compop_func op_f23b_0_comp_nf;
+extern compop_func op_f23c_0_comp_nf;
+extern compop_func op_f240_0_comp_nf;
+extern compop_func op_f250_0_comp_nf;
+extern compop_func op_f258_0_comp_nf;
+extern compop_func op_f260_0_comp_nf;
+extern compop_func op_f268_0_comp_nf;
+extern compop_func op_f270_0_comp_nf;
+extern compop_func op_f278_0_comp_nf;
+extern compop_func op_f279_0_comp_nf;
+extern compop_func op_f280_0_comp_nf;
+extern compop_func op_f2c0_0_comp_nf;
 extern compop_func op_f600_0_comp_nf;
 extern compop_func op_f608_0_comp_nf;
 extern compop_func op_f610_0_comp_nf;
--- a/src/jit/flags_arm.h
+++ b/src/jit/flags_arm.h
@ -1,52 +0,0 @@
-/*
- *  compiler/flags_arm.h - Native flags definitions for ARM
- *
- * Copyright (c) 2013 Jens Heitmann of ARAnyM dev team (see AUTHORS)
- * 
- * Inspired by Christian Bauer's Basilisk II
- *
- *  Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
- *
- *  Adaptation for Basilisk II and improvements, copyright 2000-2002
- *    Gwenole Beauchesne
- *
- *  Basilisk II (C) 1997-2002 Christian Bauer
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#ifndef NATIVE_FLAGS_ARM_H
-#define NATIVE_FLAGS_ARM_H
-
-/* Native integer code conditions */
-enum {
-      	NATIVE_CC_EQ = 0,
-        NATIVE_CC_NE = 1,
-        NATIVE_CC_CS = 2,
-        NATIVE_CC_CC = 3,
-        NATIVE_CC_MI = 4,
-        NATIVE_CC_PL = 5,
-        NATIVE_CC_VS = 6,
-        NATIVE_CC_VC = 7,
-        NATIVE_CC_HI = 8,
-        NATIVE_CC_LS = 9,
-        NATIVE_CC_GE = 10,
-        NATIVE_CC_LT = 11,
-        NATIVE_CC_GT = 12,
-        NATIVE_CC_LE = 13,
-        NATIVE_CC_AL = 14
-};
-
-#endif /* NATIVE_FLAGS_ARM_H */
--- a/src/jit/gencomp_arm.cpp
+++ b/src/jit/gencomp_arm.cpp
@ -7,9 +7,6 @@
 *  Adaptation for ARAnyM/ARM, copyright 2001-2015
 *    Milan Jurik, Jens Heitmann
 * 
- *  Adaptation for Basilisk II and improvements, copyright 2000-2005
- *    Gwenole Beauchesne
- *
 *  Basilisk II (C) 1997-2005 Christian Bauer
 *
 *  This program is free software; you can redistribute it and/or modify
@ -121,13 +118,14 @@
 #define DISABLE_I_ROXLW
 #define DISABLE_I_ROXRW
 //#define DISABLE_I_MULL
-#define DISABLE_I_FPP
-#define DISABLE_I_FBCC
-#define DISABLE_I_FSCC
+//#define DISABLE_I_FPP
+//#define DISABLE_I_FBCC
+//#define DISABLE_I_FSCC
 //#define DISABLE_I_MOVE16

 #define DISABLE_I_DIVU // DIVU works, but we have to think about exceptions. No big performance enhancement.

+
 #define RETURN "return 0;"

 #define BOOL_TYPE		"int"
@ -1222,9 +1220,6 @@ static void gen_dbcc(uae_u32 opcode, struct instr *curi, char* ssize) {
 	comprintf("\tarm_ADD_l_ri(PC_P, m68k_pc_offset);\n");
 	comprintf("\tm68k_pc_offset=0;\n");

-	start_brace();
-	comprintf("\tint nsrc = scratchie++;\n");
-
 	if (curi->cc >= 2) {
 		comprintf("\tmake_flags_live();\n"); /* Load the flags */
 	}
@ -1262,7 +1257,6 @@ static void gen_dbcc(uae_u32 opcode, struct instr *curi, char* ssize) {
 		  break;
 	  default: abort();
 	}
-	genastore("src", curi->smode, "srcreg", curi->size, "src");
 	gen_update_next_handler();
 }

@ -2071,7 +2065,6 @@ gen_opcode(unsigned long int opcode) {

 	case i_SBCD:
    failure;
-	/* I don't think so! */
 		break;

 	case i_ADD:
@ -2097,7 +2090,6 @@ gen_opcode(unsigned long int opcode) {

 	case i_ABCD:
    failure;
-	/* No BCD maths for me.... */
 		break;

 	case i_NEG:
@ -2116,7 +2108,6 @@ gen_opcode(unsigned long int opcode) {

 	case i_NBCD:
    failure;
-	/* Nope! */
 		break;

 	case i_CLR:
@ -2362,7 +2353,8 @@ gen_opcode(unsigned long int opcode) {
 		isjump;
 		genamode(curi->smode, "srcreg", curi->size, "src", 0, 0);
 		start_brace();
-		comprintf("\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n");
+		comprintf(
+				"\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n");
 		comprintf("\tint ret=scratchie++;\n"
 				"\tmov_l_ri(ret,retadd);\n"
 				"\tsub_l_ri(15,4);\n"
@ -2391,10 +2383,12 @@ gen_opcode(unsigned long int opcode) {
 #ifdef DISABLE_I_BSR
    failure;
 #endif
-		is_const_jump;
+		is_const_jump
+		;
 		genamode(curi->smode, "srcreg", curi->size, "src", 1, 0);
 		start_brace();
-		comprintf("\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n");
+		comprintf(
+				"\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n");
 		comprintf("\tint ret=scratchie++;\n"
 				"\tmov_l_ri(ret,retadd);\n"
 				"\tsub_l_ri(15,4);\n"
@ -2427,9 +2421,10 @@ gen_opcode(unsigned long int opcode) {
 			comprintf("\tv2 = get_const(src);\n");
 			comprintf("\tregister_branch(v1, v2, %d);\n", cond_codes[curi->cc]);
 			comprintf("\tmake_flags_live();\n"); /* Load the flags */
-			isjump;
+      isjump;
 		} else {
-			is_const_jump;
+			is_const_jump
+			;
 		}

 		switch (curi->cc) {
@ -3124,11 +3119,16 @@ generate_one_opcode(int rp, int noflags)
 			fprintf(stblfile, "{ NULL, 0x%08x, %ld }, /* %s */\n", flags,	opcode, name);
 			com_discard();
 		} else {
-		  const char *tbl = noflags ? "nf" : "ff";
 			printf ("/* %s */\n", outopcode (opcode));
-			fprintf(stblfile,	"{ op_%lx_%d_comp_%s, 0x%08x, %ld }, /* %s */\n",	opcode, postfix, tbl, flags, opcode, name);
-			fprintf(headerfile, "extern compop_func op_%lx_%d_comp_%s;\n", opcode, postfix, tbl);
-			printf("uae_u32 REGPARAM2 op_%lx_%d_comp_%s(uae_u32 opcode)\n{\n",	opcode, postfix, tbl);
+			if (noflags) {
+				fprintf(stblfile,	"{ op_%lx_%d_comp_nf, 0x%08x, %ld }, /* %s */\n",	opcode, postfix, flags, opcode, name);
+				fprintf(headerfile, "extern compop_func op_%lx_%d_comp_nf;\n", opcode, postfix);
+				printf("uae_u32 REGPARAM2 op_%lx_%d_comp_nf(uae_u32 opcode)\n{\n",	opcode, postfix);
+			} else {
+				fprintf(stblfile,	"{ op_%lx_%d_comp_ff, 0x%08x, %ld }, /* %s */\n",	opcode, postfix, flags, opcode, name);
+				fprintf(headerfile, "extern compop_func op_%lx_%d_comp_ff;\n",	opcode, postfix);
+				printf("uae_u32 REGPARAM2 op_%lx_%d_comp_ff(uae_u32 opcode)\n{\n",	opcode, postfix);
+			}
 			com_flush();
 		}
 	}
--- a/src/newcpu.cpp
+++ b/src/newcpu.cpp
@ -348,8 +348,8 @@ static void build_cpufunctbl (void)

 	write_log(_T("CPU=%d, FPU=%d%s, JIT%s=%d."),
 		currprefs.cpu_model,
-		currprefs.fpu_model, currprefs.fpu_model ? (currprefs.fpu_softfloat ? _T(" (softfloat)") : _T(" (host)")) : _T(""),
-	  currprefs.cachesize ? _T("=CPU") : _T(""),
+		currprefs.fpu_model, currprefs.fpu_model ? _T(" (host)") : _T(""),
+		currprefs.cachesize ? (currprefs.compfpu ? _T("=CPU/FPU") : _T("=CPU")) : _T(""),
 	  currprefs.cachesize);

  regs.address_space_mask = 0xffffffff;
@ -428,8 +428,7 @@ static int check_prefs_changed_cpu2(void)
 	|| currprefs.cpu_model != changed_prefs.cpu_model
 	|| currprefs.fpu_model != changed_prefs.fpu_model
 	|| currprefs.fpu_no_unimplemented != changed_prefs.fpu_no_unimplemented
-	|| currprefs.cpu_compatible != changed_prefs.cpu_compatible
-	|| currprefs.fpu_softfloat != changed_prefs.fpu_softfloat) {
+	|| currprefs.cpu_compatible != changed_prefs.cpu_compatible) {
 			cpu_prefs_changed_flag |= 1;
  }
  if (changed 
@ -1740,6 +1739,10 @@ bool is_hardreset(void)
 	return cpu_hardreset;
 }

+#ifdef USE_JIT_FPU
+static uae_u8 fp_buffer[8 * 8];
+#endif
+
 void m68k_go (int may_quit)
 {
  int hardboot = 1;
@ -1750,6 +1753,10 @@ void m68k_go (int may_quit)
 	  abort ();
  }

+#ifdef USE_JIT_FPU
+	save_host_fp_regs(fp_buffer);
+#endif
+
  reset_frame_rate_hack ();
  update_68k_cycles ();

@ -1808,7 +1815,6 @@ void m68k_go (int may_quit)
 			if (cpu_prefs_changed_flag & 1) {
 				uaecptr pc = m68k_getpc();
 				prefs_changed_cpu();
-				fpu_modechange();
 				build_cpufunctbl();
 				m68k_setpc_normal(pc);
 				fill_prefetch();
@ -1862,6 +1868,10 @@ void m68k_go (int may_quit)
  regs.pc_p = NULL;
  regs.pc_oldp = NULL;

+#ifdef USE_JIT_FPU
+  restore_host_fp_regs(fp_buffer);
+#endif
+
  in_m68k_go--;
 }

@ -1977,8 +1987,7 @@ uae_u8 *restore_cpu_extra (uae_u8 *src)
 		currprefs.m68k_speed = changed_prefs.m68k_speed = -1;
 	if (flags & 16)
 		currprefs.m68k_speed = changed_prefs.m68k_speed = (flags >> 24) * CYCLE_UNIT;
-	if (flags & 32)
-		currprefs.m68k_speed = changed_prefs.m68k_speed = -30;
+
 	return src;
 }

@ -1997,7 +2006,6 @@ uae_u8 *save_cpu_extra (int *len, uae_u8 *dstptr)
 	flags |= currprefs.m68k_speed < 0 ? 4 : 0;
 	flags |= currprefs.cachesize > 0 ? 8 : 0;
 	flags |= currprefs.m68k_speed > 0 ? 16 : 0;
-	flags |= currprefs.m68k_speed < -25 ? 32 : 0;
 	if (currprefs.m68k_speed > 0)
 		flags |= (currprefs.m68k_speed / CYCLE_UNIT) << 24;
 	save_u32 (flags);
--- a/src/osdep/amiberry.cpp
+++ b/src/osdep/amiberry.cpp
@ -1011,32 +1011,38 @@ int handle_msgpump()
 			break;

 		case SDL_KEYDOWN:
-			// If the Enter GUI key was pressed, handle it
-			if (enter_gui_key && rEvent.key.keysym.sym == enter_gui_key && rEvent.key.repeat == 0)
+#ifdef USE_SDL2
+			if (rEvent.key.repeat == 0)
 			{
-				inputdevice_add_inputcode(AKS_ENTERGUI, 1, nullptr);
-				break;
-			}
-			
-			// If the Quit emulator key was pressed, handle it
-			if (quit_key && rEvent.key.keysym.sym == quit_key && rEvent.key.repeat == 0)
-			{
-				inputdevice_add_inputcode(AKS_QUIT, 1, nullptr);
-				break;
-			}
+#endif
+				// If the Enter GUI key was pressed, handle it
+				if (enter_gui_key && rEvent.key.keysym.sym == enter_gui_key)
+				{
+					inputdevice_add_inputcode(AKS_ENTERGUI, 1, nullptr);
+					break;
+				}

-			if (action_replay_button && rEvent.key.keysym.sym == action_replay_button && rEvent.key.repeat == 0)
-			{
-				inputdevice_add_inputcode(AKS_FREEZEBUTTON, 1, nullptr);
-				break;
-			}
+				// If the Quit emulator key was pressed, handle it
+				if (quit_key && rEvent.key.keysym.sym == quit_key)
+				{
+					inputdevice_add_inputcode(AKS_QUIT, 1, nullptr);
+					break;
+				}

-			if (fullscreen_key && rEvent.key.keysym.sym == fullscreen_key && rEvent.key.repeat == 0)
-			{
-				inputdevice_add_inputcode(AKS_TOGGLEWINDOWEDFULLSCREEN, 1, nullptr);
-				break;
-			}
+				if (action_replay_button && rEvent.key.keysym.sym == action_replay_button)
+				{
+					inputdevice_add_inputcode(AKS_FREEZEBUTTON, 1, nullptr);
+					break;
+				}

+				if (fullscreen_key && rEvent.key.keysym.sym == fullscreen_key)
+				{
+					inputdevice_add_inputcode(AKS_TOGGLEWINDOWEDFULLSCREEN, 1, nullptr);
+					break;
+				}
+#ifdef USE_SDL2
+			}
+#endif
 			// If the reset combination was pressed, handle it
 #ifdef USE_SDL1
 			// Strangely in FBCON left window is seen as left alt ??
@ -1063,34 +1069,35 @@ int handle_msgpump()
 			if (rEvent.key.keysym.scancode == 58 && rEvent.key.keysym.sym == SDLK_UNKNOWN)
 				rEvent.key.keysym.sym = SDLK_CAPSLOCK;
 #endif
-
-			if (rEvent.key.keysym.sym ==  SDLK_CAPSLOCK && rEvent.key.repeat == 0)
-			{
-				// Treat CAPSLOCK as a toggle. If on, set off and vice/versa
-				ioctl(0, KDGKBLED, &kbd_flags);
-				ioctl(0, KDGETLED, &kbd_led_status);
-				if (kbd_flags & 07 & LED_CAP)
-				{
-					// On, so turn off
-					kbd_led_status &= ~LED_CAP;
-					kbd_flags &= ~LED_CAP;
-					inputdevice_do_keyboard(AK_CAPSLOCK, 0);
-				}
-				else
-				{
-					// Off, so turn on
-					kbd_led_status |= LED_CAP;
-					kbd_flags |= LED_CAP;
-					inputdevice_do_keyboard(AK_CAPSLOCK, 1);
-				}
-				ioctl(0, KDSETLED, kbd_led_status);
-				ioctl(0, KDSKBLED, kbd_flags);
-				break;
-			}
-		
-			// Handle all other keys
+#ifdef USE_SDL2
 			if (rEvent.key.repeat == 0)
 			{
+#endif
+				if (rEvent.key.keysym.sym == SDLK_CAPSLOCK)
+				{
+					// Treat CAPSLOCK as a toggle. If on, set off and vice/versa
+					ioctl(0, KDGKBLED, &kbd_flags);
+					ioctl(0, KDGETLED, &kbd_led_status);
+					if (kbd_flags & 07 & LED_CAP)
+					{
+						// On, so turn off
+						kbd_led_status &= ~LED_CAP;
+						kbd_flags &= ~LED_CAP;
+						inputdevice_do_keyboard(AK_CAPSLOCK, 0);
+					}
+					else
+					{
+						// Off, so turn on
+						kbd_led_status |= LED_CAP;
+						kbd_flags |= LED_CAP;
+						inputdevice_do_keyboard(AK_CAPSLOCK, 1);
+					}
+					ioctl(0, KDSETLED, kbd_led_status);
+					ioctl(0, KDSKBLED, kbd_flags);
+					break;
+				}
+
+				// Handle all other keys
 #ifdef USE_SDL1
 				if (keyboard_type == KEYCODE_UNK)
 					inputdevice_translatekeycode(0, rEvent.key.keysym.sym, 1);
@ -1098,12 +1105,14 @@ int handle_msgpump()
 					inputdevice_translatekeycode(0, rEvent.key.keysym.scancode, 1);
 #elif USE_SDL2
 				inputdevice_translatekeycode(0, rEvent.key.keysym.scancode, 1);
-#endif
 			}
+#endif
 			break;
 		case SDL_KEYUP:
+#ifdef USE_SDL2
 			if (rEvent.key.repeat == 0)
 			{
+#endif
 #ifdef USE_SDL1
 				if (keyboard_type == KEYCODE_UNK)
 					inputdevice_translatekeycode(0, rEvent.key.keysym.sym, 0);
@ -1111,8 +1120,8 @@ int handle_msgpump()
 					inputdevice_translatekeycode(0, rEvent.key.keysym.scancode, 0);
 #elif USE_SDL2
 				inputdevice_translatekeycode(0, rEvent.key.keysym.scancode, 0);
-#endif
 			}
+#endif
 			break;

 		case SDL_MOUSEBUTTONDOWN:
@ -1148,14 +1157,14 @@ int handle_msgpump()
 					const auto x = rEvent.motion.xrel;
 					const auto y = rEvent.motion.yrel;
 #if defined (ANDROIDSDL)
-    				if(rEvent.motion.x == 0 && x > -4)
-    					x = -4;
-    				if(rEvent.motion.y == 0 && y > -4)
-    					y = -4;
-    				if(rEvent.motion.x == currprefs.gfx_size.width - 1 && x < 4)
-    					x = 4;
-    				if(rEvent.motion.y == currprefs.gfx_size.height - 1 && y < 4)
-    					y = 4;
+					if (rEvent.motion.x == 0 && x > -4)
+						x = -4;
+					if (rEvent.motion.y == 0 && y > -4)
+						y = -4;
+					if (rEvent.motion.x == currprefs.gfx_size.width - 1 && x < 4)
+						x = 4;
+					if (rEvent.motion.y == currprefs.gfx_size.height - 1 && y < 4)
+						y = 4;
 #endif //ANDROIDSDL
 					setmousestate(0, 0, x * mouseScale, 0);
 					setmousestate(0, 1, y * mouseScale, 0);
--- a/src/osdep/amiberry_gfx.cpp
+++ b/src/osdep/amiberry_gfx.cpp
@ -359,6 +359,7 @@ int graphics_setup(void)

 void toggle_fullscreen()
 {
+#ifdef USE_SDL2
 	Uint32 FullscreenFlag = SDL_WINDOW_FULLSCREEN;
 	if (sdlWindow)
 	{
@ -366,6 +367,7 @@ void toggle_fullscreen()
 		SDL_SetWindowFullscreen(sdlWindow, is_fullscreen ? 0 : FullscreenFlag);
 		SDL_ShowCursor(is_fullscreen);
 	}
+#endif
 }

 #ifdef USE_DISPMANX
--- a/src/osdep/amiberry_rp9.cpp
+++ b/src/osdep/amiberry_rp9.cpp
@ -131,6 +131,7 @@ static void parse_compatibility(struct uae_prefs* p, xmlNode* node)
 				{
 					p->cachesize = MAX_JIT_CACHE;
 					p->address_space_24 = false;
+          p->compfpu = true;
 				}
 				else if (strcmp(reinterpret_cast<const char *>(content), "flexible-cpu-cycles") == 0)
 					p->cpu_compatible = false;
@ -315,12 +316,18 @@ static void parse_peripheral(struct uae_prefs* p, xmlNode* node)
 				}
 				else if (strcmp(reinterpret_cast<const char *>(content), "jit") == 0)
 				{
-					const auto attr = xmlGetProp(curr_node, reinterpret_cast<const xmlChar *>("memory"));
+					auto attr = xmlGetProp(curr_node, reinterpret_cast<const xmlChar *>("memory"));
 					if (attr != nullptr)
 					{
 						p->cachesize = atoi(reinterpret_cast<const char *>(attr)) / 1024;
 						xmlFree(attr);
 					}
+					attr = xmlGetProp(curr_node, (const xmlChar *)_T("fpu"));
+					if (attr != NULL)
+					{
+						if (strcmp((const char *)attr, "false") == 0)
+							p->compfpu = false;
+					}
 				}
 				xmlFree(content);
 			}
--- a/src/osdep/arm_helper.s
+++ b/src/osdep/arm_helper.s
@ -2,6 +2,8 @@

 .arm

+.global save_host_fp_regs
+.global restore_host_fp_regs
 .global copy_screen_8bit
 .global copy_screen_16bit_swap_arm
 .global copy_screen_32bit_to_16bit_arm
@ -10,6 +12,20 @@

 .align 8

+@----------------------------------------------------------------
+@ save_host_fp_regs
+@----------------------------------------------------------------
+save_host_fp_regs:
+	vstmia    r0!, {d7-d15}
+  bx        lr
+
+@----------------------------------------------------------------
+@ restore_host_fp_regs
+@----------------------------------------------------------------
+restore_host_fp_regs:
+  vldmia    r0!, {d7-d15}
+  bx        lr
+

@----------------------------------------------------------------
@ copy_screen_8bit
--- a/src/osdep/config.h
+++ b/src/osdep/config.h
@ -6,50 +6,6 @@
  * Copyright 1995 - 1998 Bernd Schmidt
  */

-/*
- * Please note: Many things are configurable with command line parameters,
- * and you can put anything you can pass on the command line into a 
- * configuration file ~/.uaerc. Please read the documentation for more
- * information.
- * 
- * NOTE NOTE NOTE
- * Whenever you change something in this file, you have to "make clean"
- * afterwards.
- * Don't remove the '#' signs. If you want to enable something, move it out
- * of the C comment block, if you want to disable something, move it inside
- * the block.
- */
-
-/*
- * When USE_COMPILER is defined, a m68k->i386 instruction compiler will be
- * used. This is experimental. It has only been tested on a Linux/i386 ELF
- * machine, although it might work on other i386 Unices.
- * This is supposed to speed up application programs. It will not work very
- * well for hardware bangers like games and demos, in fact it will be much
- * slower. It can also be slower for some applications and/or benchmarks.
- * It needs a lot of tuning. Please let me know your results with this.
- * The second define, RELY_ON_LOADSEG_DETECTION, decides how the compiler 
- * tries to detect self-modifying code. If it is not set, the first bytes
- * of every compiled routine are used as checksum before executing the
- * routine. If it is set, the UAE filesystem will perform some checks to 
- * detect whether an executable is being loaded. This is less reliable
- * (it won't work if you don't use the harddisk emulation, so don't try to
- * use floppies or even the RAM disk), but much faster.
- *
- * @@@ NOTE: This option is unfortunately broken in this version. Don't
- * try to use it. @@@
- *
-#define USE_COMPILER
-#define RELY_ON_LOADSEG_DETECTION
- */
-
-/***************************************************************************
- * Operating system/machine specific options
- * Configure these for your CPU. The default settings should work on any
- * machine, but may not give optimal performance everywhere.
- * (These don't do very much yet, except HAVE_RDTSC
- */
-
 /*
 * [pismy] defines virtual keys
 * Still hard-coded but can be easily changed by recompiling the project...
--- a/src/osdep/gui/Navigation.cpp
+++ b/src/osdep/gui/Navigation.cpp
@ -107,15 +107,15 @@ static NavigationMap navMap[] =
 { "68020",          "CPU and FPU",    "68882",          "68010",          "68030" },
 { "68030",          "CPU and FPU",    "CPU internal",   "68020",          "68040" },
 { "68040",          "CPU and FPU",    "FPUstrict",      "68030",          "CPU24Bit" },
-{ "CPU24Bit",       "CPU and FPU",    "SoftFloat",      "68040",          "CPUComp" },
-{ "CPUComp",        "CPU and FPU",    "SoftFloat",      "CPU24Bit",       "JIT" },
-{ "JIT",            "CPU and FPU",    "SoftFloat",      "CPUComp",        "68000" },
-{ "FPUnone",        "68000",          "7 Mhz",          "SoftFloat",      "68881" },
+{ "CPU24Bit",       "CPU and FPU",    "FPUJIT",         "68040",          "CPUComp" },
+{ "CPUComp",        "CPU and FPU",    "FPUJIT",         "CPU24Bit",       "JIT" },
+{ "JIT",            "CPU and FPU",    "FPUJIT",         "CPUComp",        "68000" },
+{ "FPUnone",        "68000",          "7 Mhz",          "FPUJIT",         "68881" },
 { "68881",          "68010",          "14 Mhz",         "FPUnone",        "68882" },
 { "68882",          "68020",          "25 Mhz",         "68881",          "CPU internal" },
 { "CPU internal",   "68030",          "Fastest",        "68882",          "FPUstrict" },
-{ "FPUstrict",      "68040",          "Fastest",        "CPU internal",   "SoftFloat" },
-{ "SoftFloat",      "CPU24Bit",       "Fastest",        "FPUstrict",      "FPUnone" },
+{ "FPUstrict",      "68040",          "Fastest",        "CPU internal",   "FPUJIT" },
+{ "FPUJIT",         "CPU24Bit",       "Fastest",        "FPUstrict",      "FPUnone" },
 { "7 Mhz",          "FPUnone",        "CPU and FPU",    "Fastest",        "14 Mhz" },
 { "14 Mhz",         "68881",          "CPU and FPU",    "7 Mhz",          "25 Mhz" },
 { "25 Mhz",         "68882",          "CPU and FPU",    "14 Mhz",         "Fastest" },
--- a/src/osdep/gui/PanelCPU.cpp
+++ b/src/osdep/gui/PanelCPU.cpp
@ -38,7 +38,7 @@ static gcn::UaeRadioButton* optFPU68881;
 static gcn::UaeRadioButton* optFPU68882;
 static gcn::UaeRadioButton* optFPUinternal;
 static gcn::UaeCheckBox* chkFPUstrict;
-static gcn::UaeCheckBox* chkSoftFloat;
+static gcn::UaeCheckBox* chkFPUJIT;
 static gcn::Window* grpCPUSpeed;
 static gcn::UaeRadioButton* opt7Mhz;
 static gcn::UaeRadioButton* opt14Mhz;
@ -186,14 +186,23 @@ class JITActionListener : public gcn::ActionListener
 public:
 	void action(const gcn::ActionEvent& actionEvent) override
 	{
-		if (chkJIT->isSelected())
+		if (actionEvent.getSource() == chkJIT)
 		{
-			changed_prefs.cpu_compatible = false;
-			changed_prefs.cachesize = MAX_JIT_CACHE;
+			if (chkJIT->isSelected())
+			{
+				changed_prefs.cpu_compatible = 0;
+				changed_prefs.cachesize = MAX_JIT_CACHE;
+				changed_prefs.compfpu = true;
+			}
+			else
+			{
+				changed_prefs.cachesize = 0;
+				changed_prefs.compfpu = false;
+			}
 		}
-		else
+		else if (actionEvent.getSource() == chkFPUJIT)
 		{
-			changed_prefs.cachesize = 0;
+			changed_prefs.compfpu = chkFPUJIT->isSelected();
 		}
 		RefreshPanelCPU();
 	}
@ -209,10 +218,6 @@ public:
 		if (actionEvent.getSource() == chkFPUstrict) {
 			changed_prefs.fpu_strict = chkFPUstrict->isSelected();

-		}
-		else if (actionEvent.getSource() == chkSoftFloat) {
-			changed_prefs.fpu_softfloat = chkSoftFloat->isSelected();
-
 		}
 		RefreshPanelCPU();
 	}
@ -285,9 +290,9 @@ void InitPanelCPU(const struct _ConfigCategory& category)
 	chkFPUstrict->setId("FPUstrict");
 	chkFPUstrict->addActionListener(fpuActionListener);

-	chkSoftFloat = new gcn::UaeCheckBox("Softfloat FPU emul.", true);
-	chkSoftFloat->setId("SoftFloat");
-	chkSoftFloat->addActionListener(fpuActionListener);
+	chkFPUJIT = new gcn::UaeCheckBox("FPU JIT", true);
+	chkFPUJIT->setId("FPUJIT");
+	chkFPUJIT->addActionListener(jitActionListener);

 	grpFPU = new gcn::Window("FPU");
 	grpFPU->setPosition(DISTANCE_BORDER + grpCPU->getWidth() + DISTANCE_NEXT_X, DISTANCE_BORDER);
@ -296,7 +301,7 @@ void InitPanelCPU(const struct _ConfigCategory& category)
 	grpFPU->add(optFPU68882, 5, 70);
 	grpFPU->add(optFPUinternal, 5, 100);
 	grpFPU->add(chkFPUstrict, 5, 140);
-	grpFPU->add(chkSoftFloat, 5, 170);
+	grpFPU->add(chkFPUJIT, 5, 170);
 	grpFPU->setMovable(false);
 	grpFPU->setSize(185, 215);
 	grpFPU->setBaseColor(gui_baseCol);
@ -358,7 +363,7 @@ void ExitPanelCPU()
 	delete optFPU68882;
 	delete optFPUinternal;
 	delete chkFPUstrict;
-	delete chkSoftFloat;
+	delete chkFPUJIT;
 	delete grpFPU;
 	delete fpuButtonActionListener;
 	delete fpuActionListener;
@ -413,7 +418,8 @@ void RefreshPanelCPU()
 	optFPUinternal->setEnabled(changed_prefs.cpu_model == 68040);

 	chkFPUstrict->setSelected(changed_prefs.fpu_strict);
-	chkSoftFloat->setSelected(changed_prefs.fpu_softfloat);
+	chkFPUJIT->setSelected(changed_prefs.compfpu);
+	chkFPUJIT->setEnabled(changed_prefs.cachesize > 0);

 	if (changed_prefs.m68k_speed == M68K_SPEED_7MHZ_CYCLES)
 		opt7Mhz->setSelected(true);
@ -440,8 +446,6 @@ bool HelpPanelCPU(std::vector<std::string> &helptext)
 	helptext.emplace_back("");
 	helptext.emplace_back("The available FPU models depending on the selected CPU.");
 	helptext.emplace_back("The option \"More compatible\" activates more accurate rounding and compare of two floats.");
-	helptext.emplace_back("\"Softfloat FPU emul.\" aktivates the FPU emulation from QEMU. This is more accurate,");
-	helptext.emplace_back("but a bit slower.");
 	helptext.emplace_back("");
 	helptext.emplace_back("With \"CPU Speed\" you can choose the clock rate of the Amiga.");
 	helptext.emplace_back("Use 7MHz for A500 games or 14MHz for A1200 ones. Fastest uses more emulation time");
--- a/src/osdep/neon_helper.s
+++ b/src/osdep/neon_helper.s
@ -2,6 +2,8 @@

 .arm

+.global save_host_fp_regs
+.global restore_host_fp_regs
 .global copy_screen_8bit
 .global copy_screen_16bit_swap
 .global copy_screen_32bit_to_16bit_neon
@ -16,6 +18,20 @@

 .align 8

+@----------------------------------------------------------------
+@ save_host_fp_regs
+@----------------------------------------------------------------
+save_host_fp_regs:
+	vstmia    r0!, {d7-d15}
+  bx        lr
+
+@----------------------------------------------------------------
+@ restore_host_fp_regs
+@----------------------------------------------------------------
+restore_host_fp_regs:
+  vldmia    r0!, {d7-d15}
+  bx        lr
+

@----------------------------------------------------------------
@ copy_screen_8bit
--- a/src/osdep/picasso96.cpp
+++ b/src/osdep/picasso96.cpp
@ -1230,7 +1230,6 @@ static uae_u32 REGPARAM2 picasso_SetSpriteColor (TrapContext *ctx)
 	return 0;
 }

-
 /*
 SetSpriteImage:
 Synopsis: SetSpriteImage(bi, RGBFormat);
@ -1942,7 +1941,7 @@ static void init_picasso_screen(void)
 * This function is called whenever another ModeInfo has to be set. This
 * function simply sets up the CRTC and TS registers to generate the
 * timing used for that screen mode. You should not set the DAC, clocks
- * or linear start adress. They will be set when appropriate by their
+ * or linear start address. They will be set when appropriate by their
 * own functions.
 */
 static uae_u32 REGPARAM2 picasso_SetGC(TrapContext *ctx)
@ -2103,6 +2102,7 @@ static uae_u32 REGPARAM2 picasso_InvertRect(TrapContext *ctx)

 	if (NOBLITTER)
 		return 0;
+
 	if (CopyRenderInfoStructureA2U(ctx, renderinfo, &ri)) {
 		P96TRACE((_T("InvertRect %dbpp 0x%lx\n"), Bpp, (long)mask));

@ -2451,6 +2451,7 @@ static uae_u32 REGPARAM2 picasso_BlitPattern(TrapContext *ctx)

 	if (NOBLITTER)
 		return 0;
+
 	if (CopyRenderInfoStructureA2U(ctx, rinf, &ri) && CopyPatternStructureA2U(ctx, pinf, &pattern)) {
 		if (!validatecoords(ctx, &ri, &X, &Y, &W, &H))
 			return 0;
@ -2497,6 +2498,7 @@ static uae_u32 REGPARAM2 picasso_BlitPattern(TrapContext *ctx)
 				unsigned long cols;

 				d = do_get_mem_word(((uae_u16 *)pattern.Memory) + prow);
+
 				if (xshift != 0)
 					d = (d << xshift) | (d >> (16 - xshift));

@ -3069,6 +3071,7 @@ static uae_u32 REGPARAM2 picasso_BlitPlanar2Direct(TrapContext *ctx)

 	if (NOBLITTER)
 		return 0;
+
 	if (minterm != 0x0C) {
 		write_log(_T("WARNING - BlitPlanar2Direct() has unhandled op-code 0x%x. Using fall-back routine.\n"), minterm);
 		return 0;
--- a/src/osdep/sysconfig.h
+++ b/src/osdep/sysconfig.h
@ -15,7 +15,7 @@
 #define UAE_FILESYS_THREADS
 #define AUTOCONFIG /* autoconfig support, fast ram, harddrives etc.. */
 #define JIT /* JIT compiler support */
-/* #define USE_JIT_FPU */
+#define USE_JIT_FPU
 /* #define NATMEM_OFFSET natmem_offset */
 /* #define CATWEASEL */ /* Catweasel MK2/3 support */
 /* #define AHI */ /* AHI sound emulation */
--- a/src/osdep/target.h
+++ b/src/osdep/target.h
@ -145,3 +145,14 @@ STATIC_INLINE void atomic_set(volatile uae_atomic *p, uae_u32 v)
 {
 	__sync_lock_test_and_set(p, v);
 }
+
+#ifdef USE_JIT_FPU
+#ifdef __cplusplus
+extern "C" {
+#endif
+	void save_host_fp_regs(void* buf);
+	void restore_host_fp_regs(void* buf);
+#ifdef __cplusplus
+}
+#endif
+#endif
--- a/src/softfloat/softfloat-macros.h
+++ b/src/softfloat/softfloat-macros.h
@ -1,793 +0,0 @@
-/*
- * QEMU float support macros
- *
- * The code in this source file is derived from release 2a of the SoftFloat
- * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
- * some later contributions) are provided under that license, as detailed below.
- * It has subsequently been modified by contributors to the QEMU Project,
- * so some portions are provided under:
- *  the SoftFloat-2a license
- *  the BSD license
- *  GPL-v2-or-later
- *
- * Any future contributions to this file after December 1st 2014 will be
- * taken to be licensed under the Softfloat-2a license unless specifically
- * indicated otherwise.
- */
-
-/*
-===============================================================================
-This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
-Arithmetic Package, Release 2a.
-
-Written by John R. Hauser.  This work was made possible in part by the
-International Computer Science Institute, located at Suite 600, 1947 Center
-Street, Berkeley, California 94704.  Funding was partially provided by the
-National Science Foundation under grant MIP-9311980.  The original version
-of this code was written as part of a project to build a fixed-point vector
-processor in collaboration with the University of California at Berkeley,
-overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
-is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
-arithmetic/SoftFloat.html'.
-
-THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
-has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
-TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
-PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
-AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
-
-Derivative works are acceptable, even for commercial purposes, so long as
-(1) they include prominent notice that the work is derivative, and (2) they
-include prominent notice akin to these four paragraphs for those parts of
-this code that are retained.
-
-===============================================================================
-*/
-
-/* BSD licensing:
- * Copyright (c) 2006, Fabrice Bellard
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* Portions of this work are licensed under the terms of the GNU GPL,
- * version 2 or later. See the COPYING file in the top-level directory.
- */
-
-/*----------------------------------------------------------------------------
-| This macro tests for minimum version of the GNU C compiler.
-*----------------------------------------------------------------------------*/
-#if defined(__GNUC__) && defined(__GNUC_MINOR__)
-# define SOFTFLOAT_GNUC_PREREQ(maj, min) \
-         ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
-#else
-# define SOFTFLOAT_GNUC_PREREQ(maj, min) 0
-#endif
-
-
-/*----------------------------------------------------------------------------
-| Shifts `a' right by the number of bits given in `count'.  If any nonzero
-| bits are shifted off, they are ``jammed'' into the least significant bit of
-| the result by setting the least significant bit to 1.  The value of `count'
-| can be arbitrarily large; in particular, if `count' is greater than 32, the
-| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
-| The result is stored in the location pointed to by `zPtr'.
-*----------------------------------------------------------------------------*/
-
-static inline void shift32RightJamming(uint32_t a, int count, uint32_t *zPtr)
-{
-    uint32_t z;
-
-    if ( count == 0 ) {
-        z = a;
-    }
-    else if ( count < 32 ) {
-        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
-    }
-    else {
-        z = ( a != 0 );
-    }
-    *zPtr = z;
-
-}
-
-/*----------------------------------------------------------------------------
-| Shifts `a' right by the number of bits given in `count'.  If any nonzero
-| bits are shifted off, they are ``jammed'' into the least significant bit of
-| the result by setting the least significant bit to 1.  The value of `count'
-| can be arbitrarily large; in particular, if `count' is greater than 64, the
-| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
-| The result is stored in the location pointed to by `zPtr'.
-*----------------------------------------------------------------------------*/
-
-static inline void shift64RightJamming(uint64_t a, int count, uint64_t *zPtr)
-{
-    uint64_t z;
-
-    if ( count == 0 ) {
-        z = a;
-    }
-    else if ( count < 64 ) {
-        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
-    }
-    else {
-        z = ( a != 0 );
-    }
-    *zPtr = z;
-
-}
-
-/*----------------------------------------------------------------------------
-| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
-| _plus_ the number of bits given in `count'.  The shifted result is at most
-| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
-| bits shifted off form a second 64-bit result as follows:  The _last_ bit
-| shifted off is the most-significant bit of the extra result, and the other
-| 63 bits of the extra result are all zero if and only if _all_but_the_last_
-| bits shifted off were all zero.  This extra result is stored in the location
-| pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
-|     (This routine makes more sense if `a0' and `a1' are considered to form a
-| fixed-point value with binary point between `a0' and `a1'.  This fixed-point
-| value is shifted right by the number of bits given in `count', and the
-| integer part of the result is returned at the location pointed to by
-| `z0Ptr'.  The fractional part of the result may be slightly corrupted as
-| described above, and is returned at the location pointed to by `z1Ptr'.)
-*----------------------------------------------------------------------------*/
-
-static inline void
- shift64ExtraRightJamming(
-     uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
-{
-    uint64_t z0, z1;
-    int8_t negCount = ( - count ) & 63;
-
-    if ( count == 0 ) {
-        z1 = a1;
-        z0 = a0;
-    }
-    else if ( count < 64 ) {
-        z1 = ( a0<<negCount ) | ( a1 != 0 );
-        z0 = a0>>count;
-    }
-    else {
-        if ( count == 64 ) {
-            z1 = a0 | ( a1 != 0 );
-        }
-        else {
-            z1 = ( ( a0 | a1 ) != 0 );
-        }
-        z0 = 0;
-    }
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
-}
-
-/*----------------------------------------------------------------------------
-| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
-| number of bits given in `count'.  Any bits shifted off are lost.  The value
-| of `count' can be arbitrarily large; in particular, if `count' is greater
-| than 128, the result will be 0.  The result is broken into two 64-bit pieces
-| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
-*----------------------------------------------------------------------------*/
-
-static inline void
- shift128Right(
-     uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
-{
-    uint64_t z0, z1;
-    int8_t negCount = ( - count ) & 63;
-
-    if ( count == 0 ) {
-        z1 = a1;
-        z0 = a0;
-    }
-    else if ( count < 64 ) {
-        z1 = ( a0<<negCount ) | ( a1>>count );
-        z0 = a0>>count;
-    }
-    else {
-        z1 = (count < 128) ? (a0 >> (count & 63)) : 0;
-        z0 = 0;
-    }
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
-}
-
-/*----------------------------------------------------------------------------
-| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
-| number of bits given in `count'.  If any nonzero bits are shifted off, they
-| are ``jammed'' into the least significant bit of the result by setting the
-| least significant bit to 1.  The value of `count' can be arbitrarily large;
-| in particular, if `count' is greater than 128, the result will be either
-| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
-| nonzero.  The result is broken into two 64-bit pieces which are stored at
-| the locations pointed to by `z0Ptr' and `z1Ptr'.
-*----------------------------------------------------------------------------*/
-
-static inline void
- shift128RightJamming(
-     uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
-{
-    uint64_t z0, z1;
-    int8_t negCount = ( - count ) & 63;
-
-    if ( count == 0 ) {
-        z1 = a1;
-        z0 = a0;
-    }
-    else if ( count < 64 ) {
-        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
-        z0 = a0>>count;
-    }
-    else {
-        if ( count == 64 ) {
-            z1 = a0 | ( a1 != 0 );
-        }
-        else if ( count < 128 ) {
-            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
-        }
-        else {
-            z1 = ( ( a0 | a1 ) != 0 );
-        }
-        z0 = 0;
-    }
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
-}
-
-/*----------------------------------------------------------------------------
-| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
-| by 64 _plus_ the number of bits given in `count'.  The shifted result is
-| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
-| stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
-| off form a third 64-bit result as follows:  The _last_ bit shifted off is
-| the most-significant bit of the extra result, and the other 63 bits of the
-| extra result are all zero if and only if _all_but_the_last_ bits shifted off
-| were all zero.  This extra result is stored in the location pointed to by
-| `z2Ptr'.  The value of `count' can be arbitrarily large.
-|     (This routine makes more sense if `a0', `a1', and `a2' are considered
-| to form a fixed-point value with binary point between `a1' and `a2'.  This
-| fixed-point value is shifted right by the number of bits given in `count',
-| and the integer part of the result is returned at the locations pointed to
-| by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
-| corrupted as described above, and is returned at the location pointed to by
-| `z2Ptr'.)
-*----------------------------------------------------------------------------*/
-
-static inline void
- shift128ExtraRightJamming(
-     uint64_t a0,
-     uint64_t a1,
-     uint64_t a2,
-     int count,
-     uint64_t *z0Ptr,
-     uint64_t *z1Ptr,
-     uint64_t *z2Ptr
- )
-{
-    uint64_t z0, z1, z2;
-    int8_t negCount = ( - count ) & 63;
-
-    if ( count == 0 ) {
-        z2 = a2;
-        z1 = a1;
-        z0 = a0;
-    }
-    else {
-        if ( count < 64 ) {
-            z2 = a1<<negCount;
-            z1 = ( a0<<negCount ) | ( a1>>count );
-            z0 = a0>>count;
-        }
-        else {
-            if ( count == 64 ) {
-                z2 = a1;
-                z1 = a0;
-            }
-            else {
-                a2 |= a1;
-                if ( count < 128 ) {
-                    z2 = a0<<negCount;
-                    z1 = a0>>( count & 63 );
-                }
-                else {
-                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
-                    z1 = 0;
-                }
-            }
-            z0 = 0;
-        }
-        z2 |= ( a2 != 0 );
-    }
-    *z2Ptr = z2;
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
-}
-
-/*----------------------------------------------------------------------------
-| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
-| number of bits given in `count'.  Any bits shifted off are lost.  The value
-| of `count' must be less than 64.  The result is broken into two 64-bit
-| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
-*----------------------------------------------------------------------------*/
-
-static inline void
- shortShift128Left(
-     uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
-{
-
-    *z1Ptr = a1<<count;
-    *z0Ptr =
-        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
-| by the number of bits given in `count'.  Any bits shifted off are lost.
-| The value of `count' must be less than 64.  The result is broken into three
-| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
-| `z1Ptr', and `z2Ptr'.
-*----------------------------------------------------------------------------*/
-
-static inline void
- shortShift192Left(
-     uint64_t a0,
-     uint64_t a1,
-     uint64_t a2,
-     int count,
-     uint64_t *z0Ptr,
-     uint64_t *z1Ptr,
-     uint64_t *z2Ptr
- )
-{
-    uint64_t z0, z1, z2;
-    int8_t negCount;
-
-    z2 = a2<<count;
-    z1 = a1<<count;
-    z0 = a0<<count;
-    if ( 0 < count ) {
-        negCount = ( ( - count ) & 63 );
-        z1 |= a2>>negCount;
-        z0 |= a1>>negCount;
-    }
-    *z2Ptr = z2;
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
-}
-
-/*----------------------------------------------------------------------------
-| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
-| value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
-| any carry out is lost.  The result is broken into two 64-bit pieces which
-| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
-*----------------------------------------------------------------------------*/
-
-static inline void
- add128(
-     uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
-{
-    uint64_t z1;
-
-    z1 = a1 + b1;
-    *z1Ptr = z1;
-    *z0Ptr = a0 + b0 + ( z1 < a1 );
-
-}
-
-/*----------------------------------------------------------------------------
-| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
-| 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
-| modulo 2^192, so any carry out is lost.  The result is broken into three
-| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
-| `z1Ptr', and `z2Ptr'.
-*----------------------------------------------------------------------------*/
-
-static inline void
- add192(
-     uint64_t a0,
-     uint64_t a1,
-     uint64_t a2,
-     uint64_t b0,
-     uint64_t b1,
-     uint64_t b2,
-     uint64_t *z0Ptr,
-     uint64_t *z1Ptr,
-     uint64_t *z2Ptr
- )
-{
-    uint64_t z0, z1, z2;
-    uint8_t carry0, carry1;
-
-    z2 = a2 + b2;
-    carry1 = ( z2 < a2 );
-    z1 = a1 + b1;
-    carry0 = ( z1 < a1 );
-    z0 = a0 + b0;
-    z1 += carry1;
-    z0 += ( z1 < carry1 );
-    z0 += carry0;
-    *z2Ptr = z2;
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
-}
-
-/*----------------------------------------------------------------------------
-| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
-| 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
-| 2^128, so any borrow out (carry out) is lost.  The result is broken into two
-| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
-| `z1Ptr'.
-*----------------------------------------------------------------------------*/
-
-static inline void
- sub128(
-     uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
-{
-
-    *z1Ptr = a1 - b1;
-    *z0Ptr = a0 - b0 - ( a1 < b1 );
-
-}
-
-/*----------------------------------------------------------------------------
-| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
-| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
-| Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
-| result is broken into three 64-bit pieces which are stored at the locations
-| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
-*----------------------------------------------------------------------------*/
-
-static inline void
- sub192(
-     uint64_t a0,
-     uint64_t a1,
-     uint64_t a2,
-     uint64_t b0,
-     uint64_t b1,
-     uint64_t b2,
-     uint64_t *z0Ptr,
-     uint64_t *z1Ptr,
-     uint64_t *z2Ptr
- )
-{
-    uint64_t z0, z1, z2;
-    uint8_t borrow0, borrow1;
-
-    z2 = a2 - b2;
-    borrow1 = ( a2 < b2 );
-    z1 = a1 - b1;
-    borrow0 = ( a1 < b1 );
-    z0 = a0 - b0;
-    z0 -= ( z1 < borrow1 );
-    z1 -= borrow1;
-    z0 -= borrow0;
-    *z2Ptr = z2;
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
-}
-
-/*----------------------------------------------------------------------------
-| Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
-| into two 64-bit pieces which are stored at the locations pointed to by
-| `z0Ptr' and `z1Ptr'.
-*----------------------------------------------------------------------------*/
-
-static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
-{
-    uint32_t aHigh, aLow, bHigh, bLow;
-    uint64_t z0, zMiddleA, zMiddleB, z1;
-
-    aLow = a;
-    aHigh = a>>32;
-    bLow = b;
-    bHigh = b>>32;
-    z1 = ( (uint64_t) aLow ) * bLow;
-    zMiddleA = ( (uint64_t) aLow ) * bHigh;
-    zMiddleB = ( (uint64_t) aHigh ) * bLow;
-    z0 = ( (uint64_t) aHigh ) * bHigh;
-    zMiddleA += zMiddleB;
-    z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
-    zMiddleA <<= 32;
-    z1 += zMiddleA;
-    z0 += ( z1 < zMiddleA );
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
-}
-
-/*----------------------------------------------------------------------------
-| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
-| `b' to obtain a 192-bit product.  The product is broken into three 64-bit
-| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
-| `z2Ptr'.
-*----------------------------------------------------------------------------*/
-
-static inline void
- mul128By64To192(
-     uint64_t a0,
-     uint64_t a1,
-     uint64_t b,
-     uint64_t *z0Ptr,
-     uint64_t *z1Ptr,
-     uint64_t *z2Ptr
- )
-{
-    uint64_t z0, z1, z2, more1;
-
-    mul64To128( a1, b, &z1, &z2 );
-    mul64To128( a0, b, &z0, &more1 );
-    add128( z0, more1, 0, z1, &z0, &z1 );
-    *z2Ptr = z2;
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
-}
-
-/*----------------------------------------------------------------------------
-| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
-| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
-| product.  The product is broken into four 64-bit pieces which are stored at
-| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
-*----------------------------------------------------------------------------*/
-
-static inline void
- mul128To256(
-     uint64_t a0,
-     uint64_t a1,
-     uint64_t b0,
-     uint64_t b1,
-     uint64_t *z0Ptr,
-     uint64_t *z1Ptr,
-     uint64_t *z2Ptr,
-     uint64_t *z3Ptr
- )
-{
-    uint64_t z0, z1, z2, z3;
-    uint64_t more1, more2;
-
-    mul64To128( a1, b1, &z2, &z3 );
-    mul64To128( a1, b0, &z1, &more2 );
-    add128( z1, more2, 0, z2, &z1, &z2 );
-    mul64To128( a0, b0, &z0, &more1 );
-    add128( z0, more1, 0, z1, &z0, &z1 );
-    mul64To128( a0, b1, &more1, &more2 );
-    add128( more1, more2, 0, z2, &more1, &z2 );
-    add128( z0, z1, 0, more1, &z0, &z1 );
-    *z3Ptr = z3;
-    *z2Ptr = z2;
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns an approximation to the 64-bit integer quotient obtained by dividing
-| `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
-| divisor `b' must be at least 2^63.  If q is the exact quotient truncated
-| toward zero, the approximation returned lies between q and q + 2 inclusive.
-| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
-| unsigned integer is returned.
-*----------------------------------------------------------------------------*/
-
-static uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b )
-{
-    uint64_t b0, b1;
-    uint64_t rem0, rem1, term0, term1;
-    uint64_t z;
-
-    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
-    b0 = b>>32;
-    z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
-    mul64To128( b, z, &term0, &term1 );
-    sub128( a0, a1, term0, term1, &rem0, &rem1 );
-    while ( ( (int64_t) rem0 ) < 0 ) {
-        z -= LIT64( 0x100000000 );
-        b1 = b<<32;
-        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
-    }
-    rem0 = ( rem0<<32 ) | ( rem1>>32 );
-    z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
-    return z;
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns an approximation to the square root of the 32-bit significand given
-| by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
-| `aExp' (the least significant bit) is 1, the integer returned approximates
-| 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
-| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
-| case, the approximation returned lies strictly within +/-2 of the exact
-| value.
-*----------------------------------------------------------------------------*/
-
-static uint32_t estimateSqrt32(int aExp, uint32_t a)
-{
-    static const uint16_t sqrtOddAdjustments[] = {
-        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
-        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
-    };
-    static const uint16_t sqrtEvenAdjustments[] = {
-        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
-        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
-    };
-    int8_t index;
-    uint32_t z;
-
-    index = ( a>>27 ) & 15;
-    if ( aExp & 1 ) {
-        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
-        z = ( ( a / z )<<14 ) + ( z<<15 );
-        a >>= 1;
-    }
-    else {
-        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
-        z = a / z + z;
-        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
-        if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
-    }
-    return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the number of leading 0 bits before the most-significant 1 bit of
-| `a'.  If `a' is zero, 32 is returned.
-*----------------------------------------------------------------------------*/
-
-static inline int8_t countLeadingZeros32( uint32_t a )
-{
-#if SOFTFLOAT_GNUC_PREREQ(3, 4)
-    if (a) {
-        return __builtin_clz(a);
-    } else {
-        return 32;
-    }
-#else
-    static const int8_t countLeadingZerosHigh[] = {
-        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
-        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-    };
-    int8_t shiftCount;
-
-    shiftCount = 0;
-    if ( a < 0x10000 ) {
-        shiftCount += 16;
-        a <<= 16;
-    }
-    if ( a < 0x1000000 ) {
-        shiftCount += 8;
-        a <<= 8;
-    }
-    shiftCount += countLeadingZerosHigh[ a>>24 ];
-    return shiftCount;
-#endif
-}
-
-/*----------------------------------------------------------------------------
-| Returns the number of leading 0 bits before the most-significant 1 bit of
-| `a'.  If `a' is zero, 64 is returned.
-*----------------------------------------------------------------------------*/
-
-static inline int8_t countLeadingZeros64( uint64_t a )
-{
-#if SOFTFLOAT_GNUC_PREREQ(3, 4)
-    if (a) {
-        return __builtin_clzll(a);
-    } else {
-        return 64;
-    }
-#else
-    int8_t shiftCount;
-
-    shiftCount = 0;
-    if ( a < ( (uint64_t) 1 )<<32 ) {
-        shiftCount += 32;
-    }
-    else {
-        a >>= 32;
-    }
-    shiftCount += countLeadingZeros32( a );
-    return shiftCount;
-#endif
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
-| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
-| Otherwise, returns 0.
-*----------------------------------------------------------------------------*/
-
-static inline flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
-{
-
-    return ( a0 == b0 ) && ( a1 == b1 );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
-| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
-| Otherwise, returns 0.
-*----------------------------------------------------------------------------*/
-
-static inline flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
-{
-
-    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
-| than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
-| returns 0.
-*----------------------------------------------------------------------------*/
-
-static inline flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
-{
-
-    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
-| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
-| Otherwise, returns 0.
-*----------------------------------------------------------------------------*/
-
-static inline flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
-{
-
-    return ( a0 != b0 ) || ( a1 != b1 );
-
-}
--- a/src/softfloat/softfloat-specialize.h
+++ b/src/softfloat/softfloat-specialize.h
@ -1,443 +0,0 @@
-/*
- * QEMU float support
- *
- * The code in this source file is derived from release 2a of the SoftFloat
- * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
- * some later contributions) are provided under that license, as detailed below.
- * It has subsequently been modified by contributors to the QEMU Project,
- * so some portions are provided under:
- *  the SoftFloat-2a license
- *  the BSD license
- *  GPL-v2-or-later
- *
- * Any future contributions to this file after December 1st 2014 will be
- * taken to be licensed under the Softfloat-2a license unless specifically
- * indicated otherwise.
- */
-
-/*
-===============================================================================
-This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
-Arithmetic Package, Release 2a.
-
-Written by John R. Hauser.  This work was made possible in part by the
-International Computer Science Institute, located at Suite 600, 1947 Center
-Street, Berkeley, California 94704.  Funding was partially provided by the
-National Science Foundation under grant MIP-9311980.  The original version
-of this code was written as part of a project to build a fixed-point vector
-processor in collaboration with the University of California at Berkeley,
-overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
-is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
-arithmetic/SoftFloat.html'.
-
-THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
-has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
-TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
-PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
-AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
-
-Derivative works are acceptable, even for commercial purposes, so long as
-(1) they include prominent notice that the work is derivative, and (2) they
-include prominent notice akin to these four paragraphs for those parts of
-this code that are retained.
-
-===============================================================================
-*/
-
-/* BSD licensing:
- * Copyright (c) 2006, Fabrice Bellard
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* Portions of this work are licensed under the terms of the GNU GPL,
- * version 2 or later. See the COPYING file in the top-level directory.
- */
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point value `a' is a
-| NaN; otherwise returns 0.
-*----------------------------------------------------------------------------*/
-
-static inline flag floatx80_is_nan( floatx80 a )
-{
-
-    return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (uint64_t) ( a.low<<1 );
-
-}
-
-/*----------------------------------------------------------------------------
-| The pattern for a default generated extended double-precision NaN.
-*----------------------------------------------------------------------------*/
-static inline floatx80 floatx80_default_nan(float_status *status)
-{
-    floatx80 r;
-    r.high = 0x7FFF;
-    r.low = LIT64( 0xFFFFFFFFFFFFFFFF );
-	return r;
-}
-
-/*----------------------------------------------------------------------------
-| Raises the exceptions specified by `flags'.  Floating-point traps can be
-| defined here if desired.  It is currently not possible for such a trap
-| to substitute a result value.  If traps are not implemented, this routine
-| should be simply `float_exception_flags |= flags;'.
-*----------------------------------------------------------------------------*/
-
-static inline void float_raise(uint8_t flags, float_status *status)
-{
-    status->float_exception_flags |= flags;
-}
-
-/*----------------------------------------------------------------------------
-| Internal canonical NaN format.
-*----------------------------------------------------------------------------*/
-typedef struct {
-    flag sign;
-    uint64_t high, low;
-} commonNaNT;
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point value `a' is a NaN;
-| otherwise returns 0.
-*----------------------------------------------------------------------------*/
-
-static inline flag float32_is_nan( float32 a )
-{
-
-    return ( 0xFF000000 < (uint32_t) ( a<<1 ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point value `a' is a signaling
-| NaN; otherwise returns 0.
-*----------------------------------------------------------------------------*/
-
-static inline flag float32_is_signaling_nan( float32 a )
-{
-
-    return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the single-precision floating-point NaN
-| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
-| exception is raised.
-*----------------------------------------------------------------------------*/
-
-static inline commonNaNT float32ToCommonNaN( float32 a, float_status *status )
-{
-    commonNaNT z;
-
-    if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_signaling, status );
-    z.sign = a>>31;
-    z.low = 0;
-    z.high = ( (uint64_t) a )<<41;
-    return z;
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the canonical NaN `a' to the single-
-| precision floating-point format.
-*----------------------------------------------------------------------------*/
-
-static inline float32 commonNaNToFloat32( commonNaNT a )
-{
-
-    return ( ( (uint32_t) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 );
-
-}
-
-/*----------------------------------------------------------------------------
-| Takes two single-precision floating-point values `a' and `b', one of which
-| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
-| signaling NaN, the invalid exception is raised.
-*----------------------------------------------------------------------------*/
-
-static inline float32 propagateFloat32NaN( float32 a, float32 b, float_status *status )
-{
-    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
-
-    aIsNaN = float32_is_nan( a );
-    aIsSignalingNaN = float32_is_signaling_nan( a );
-    bIsNaN = float32_is_nan( b );
-    bIsSignalingNaN = float32_is_signaling_nan( b );
-    a |= 0x00400000;
-    b |= 0x00400000;
-    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_signaling, status );
-    if ( aIsNaN ) {
-        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
-    }
-    else {
-        return b;
-    }
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point value `a' is a NaN;
-| otherwise returns 0.
-*----------------------------------------------------------------------------*/
-
-static inline flag float64_is_nan( float64 a )
-{
-
-    return ( LIT64( 0xFFE0000000000000 ) < (uint64_t) ( a<<1 ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point value `a' is a signaling
-| NaN; otherwise returns 0.
-*----------------------------------------------------------------------------*/
-
-static inline flag float64_is_signaling_nan( float64 a )
-{
-
-    return
-           ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
-        && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the double-precision floating-point NaN
-| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
-| exception is raised.
-*----------------------------------------------------------------------------*/
-
-static inline commonNaNT float64ToCommonNaN(float64 a, float_status *status)
-{
-    commonNaNT z;
-
-    if (float64_is_signaling_nan(a)) {
-        float_raise(float_flag_invalid, status);
-    }
-    z.sign = float64_val(a) >> 63;
-    z.low = 0;
-    z.high = float64_val(a) << 12;
-    return z;
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the canonical NaN `a' to the double-
-| precision floating-point format.
-*----------------------------------------------------------------------------*/
-
-static inline float64 commonNaNToFloat64(commonNaNT a, float_status *status)
-{
-     return
-          ( ( (uint64_t) a.sign )<<63 )
-        | LIT64( 0x7FF8000000000000 )
-        | ( a.high>>12 );
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point value `a' is a
-| signaling NaN; otherwise returns 0.
-*----------------------------------------------------------------------------*/
-
-static inline flag floatx80_is_signaling_nan( floatx80 a )
-{
-    uint64_t aLow;
-
-    aLow = a.low & ~ LIT64( 0x4000000000000000 );
-    return
-           ( ( a.high & 0x7FFF ) == 0x7FFF )
-        && (uint64_t) ( aLow<<1 )
-        && ( a.low == aLow );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the extended double-precision floating-
-| point NaN `a' to the canonical NaN format.  If `a' is a signaling NaN, the
-| invalid exception is raised.
-*----------------------------------------------------------------------------*/
-
-static inline commonNaNT floatx80ToCommonNaN( floatx80 a, float_status *status )
-{
-    commonNaNT z;
-
-    if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_signaling, status );
-    z.sign = a.high>>15;
-    z.low = 0;
-    z.high = a.low<<1;
-    return z;
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the canonical NaN `a' to the extended
-| double-precision floating-point format.
-*----------------------------------------------------------------------------*/
-
-static inline floatx80 commonNaNToFloatx80(commonNaNT a, float_status *status)
-{
-    floatx80 z;
-#ifdef SOFTFLOAT_68K
-    z.low = LIT64( 0x4000000000000000 ) | ( a.high>>1 );
-#else
-    z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
-#endif    
-    z.high = ( ( (int16_t) a.sign )<<15 ) | 0x7FFF;
-    return z;
-}
-
-/*----------------------------------------------------------------------------
-| Takes two extended double-precision floating-point values `a' and `b', one
-| of which is a NaN, and returns the appropriate NaN result.  If either `a' or
-| `b' is a signaling NaN, the invalid exception is raised.
-*----------------------------------------------------------------------------*/
-
-static inline floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b, float_status *status )
-{
-    flag aIsNaN, aIsSignalingNaN, bIsSignalingNaN;
-#ifndef SOFTFLOAT_68K
-    flag bIsNaN;
-#endif 
-
-	aIsNaN = floatx80_is_nan( a );
-    aIsSignalingNaN = floatx80_is_signaling_nan( a );
-    bIsSignalingNaN = floatx80_is_signaling_nan( b );
-#ifdef SOFTFLOAT_68K
-    a.low |= LIT64( 0x4000000000000000 );
-    b.low |= LIT64( 0x4000000000000000 );
-    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_signaling, status );
-    return aIsNaN ? a : b;
-#else
-    bIsNaN = floatx80_is_nan( b );
-    a.low |= LIT64( 0xC000000000000000 );
-    b.low |= LIT64( 0xC000000000000000 );
-    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_signaling, status );
-    if ( aIsNaN ) {
-        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
-    }
-    else {
-        return b;
-    }
-#endif
-
-}
-
-#ifdef SOFTFLOAT_68K
-/*----------------------------------------------------------------------------
- | Takes extended double-precision floating-point  NaN  `a' and returns the
- | appropriate NaN result. If `a' is a signaling NaN, the invalid exception
- | is raised.
- *----------------------------------------------------------------------------*/
-
-static inline floatx80 propagateFloatx80NaNOneArg(floatx80 a, float_status *status)
-{
-    if ( floatx80_is_signaling_nan( a ) )
-        float_raise( float_flag_signaling, status );
-    a.low |= LIT64( 0x4000000000000000 );
-    
-    return a;
-}
-#endif
-
-// 28-12-2016: Added for Previous:
-
-/*----------------------------------------------------------------------------
- | Returns 1 if the extended double-precision floating-point value `a' is
- | zero; otherwise returns 0.
- *----------------------------------------------------------------------------*/
-
-static inline flag floatx80_is_zero( floatx80 a )
-{
-    
-    return ( ( a.high & 0x7FFF ) < 0x7FFF ) && ( a.low == 0 );
-    
-}
-
-/*----------------------------------------------------------------------------
- | Returns 1 if the extended double-precision floating-point value `a' is
- | infinity; otherwise returns 0.
- *----------------------------------------------------------------------------*/
-
-static inline flag floatx80_is_infinity( floatx80 a )
-{
-    
-    return ( ( a.high & 0x7FFF ) == 0x7FFF ) && ( (uint64_t) ( a.low<<1 ) == 0 );
-    
-}
-
-/*----------------------------------------------------------------------------
- | Returns 1 if the extended double-precision floating-point value `a' is
- | negative; otherwise returns 0.
- *----------------------------------------------------------------------------*/
-
-static inline flag floatx80_is_negative( floatx80 a )
-{
-    
-    return ( ( a.high & 0x8000 ) == 0x8000 );
-    
-}
-
-/*----------------------------------------------------------------------------
- | Returns 1 if the extended double-precision floating-point value `a' is
- | unnormal; otherwise returns 0.
- *----------------------------------------------------------------------------*/
-static inline flag floatx80_is_unnormal( floatx80 a )
-{
-	return
-		( ( a.high & 0x7FFF ) > 0 )
-		&& ( ( a.high & 0x7FFF ) < 0x7FFF)
-		&& ( (uint64_t) ( a.low & LIT64( 0x8000000000000000 ) ) == LIT64( 0x0000000000000000 ) );
-}
-
-/*----------------------------------------------------------------------------
- | Returns 1 if the extended double-precision floating-point value `a' is
- | denormal; otherwise returns 0.
- *----------------------------------------------------------------------------*/
-
-static inline flag floatx80_is_denormal( floatx80 a )
-{
-	return
-		( ( a.high & 0x7FFF ) == 0 )
-		&& ( (uint64_t) ( a.low & LIT64( 0x8000000000000000 ) ) == LIT64( 0x0000000000000000 ) )
-		&& (uint64_t) ( a.low<<1 );
-}
-
-/*----------------------------------------------------------------------------
- | Returns 1 if the extended double-precision floating-point value `a' is
- | normal; otherwise returns 0.
- *----------------------------------------------------------------------------*/
-
-static inline flag floatx80_is_normal( floatx80 a )
-{
-	return
-		( ( a.high & 0x7FFF ) < 0x7FFF )
-		&& ( (uint64_t) ( a.low & LIT64( 0x8000000000000000 ) ) == LIT64( 0x8000000000000000 ) );
-}
-// End of addition for Previous
-
--- a/src/softfloat/softfloat.cpp
+++ b/src/softfloat/softfloat.cpp
--- a/src/softfloat/softfloat.h
+++ b/src/softfloat/softfloat.h
@ -1,488 +0,0 @@
-#define SOFTFLOAT_68K
-
-/*
- * QEMU float support
- *
- * The code in this source file is derived from release 2a of the SoftFloat
- * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
- * some later contributions) are provided under that license, as detailed below.
- * It has subsequently been modified by contributors to the QEMU Project,
- * so some portions are provided under:
- *  the SoftFloat-2a license
- *  the BSD license
- *  GPL-v2-or-later
- *
- * Any future contributions to this file after December 1st 2014 will be
- * taken to be licensed under the Softfloat-2a license unless specifically
- * indicated otherwise.
- */
-
-/*
-===============================================================================
-This C header file is part of the SoftFloat IEC/IEEE Floating-point
-Arithmetic Package, Release 2a.
-
-Written by John R. Hauser.  This work was made possible in part by the
-International Computer Science Institute, located at Suite 600, 1947 Center
-Street, Berkeley, California 94704.  Funding was partially provided by the
-National Science Foundation under grant MIP-9311980.  The original version
-of this code was written as part of a project to build a fixed-point vector
-processor in collaboration with the University of California at Berkeley,
-overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
-is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
-arithmetic/SoftFloat.html'.
-
-THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
-has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
-TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
-PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
-AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
-
-Derivative works are acceptable, even for commercial purposes, so long as
-(1) they include prominent notice that the work is derivative, and (2) they
-include prominent notice akin to these four paragraphs for those parts of
-this code that are retained.
-
-===============================================================================
-*/
-
-/* BSD licensing:
- * Copyright (c) 2006, Fabrice Bellard
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* Portions of this work are licensed under the terms of the GNU GPL,
- * version 2 or later. See the COPYING file in the top-level directory.
- */
-
-#ifndef SOFTFLOAT_H
-#define SOFTFLOAT_H
-
-#if defined(CONFIG_SOLARIS) && defined(CONFIG_NEEDS_LIBSUNMATH)
-#include <sunmath.h>
-#endif
-
-
-/* This 'flag' type must be able to hold at least 0 and 1. It should
- * probably be replaced with 'bool' but the uses would need to be audited
- * to check that they weren't accidentally relying on it being a larger type.
- */
-typedef uint8_t flag;
-
-#define LIT64( a ) a##ULL
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE floating-point ordering relations
-*----------------------------------------------------------------------------*/
-enum {
-    float_relation_less      = -1,
-    float_relation_equal     =  0,
-    float_relation_greater   =  1,
-    float_relation_unordered =  2
-};
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE floating-point types.
-*----------------------------------------------------------------------------*/
-/* Use structures for soft-float types.  This prevents accidentally mixing
-   them with native int/float types.  A sufficiently clever compiler and
-   sane ABI should be able to see though these structs.  However
-   x86/gcc 3.x seems to struggle a bit, so leave them disabled by default.  */
-//#define USE_SOFTFLOAT_STRUCT_TYPES
-#ifdef USE_SOFTFLOAT_STRUCT_TYPES
-typedef struct {
-    uint16_t v;
-} float16;
-#define float16_val(x) (((float16)(x)).v)
-#define make_float16(x) __extension__ ({ float16 f16_val = {x}; f16_val; })
-#define const_float16(x) { x }
-typedef struct {
-    uint32_t v;
-} float32;
-/* The cast ensures an error if the wrong type is passed.  */
-#define float32_val(x) (((float32)(x)).v)
-#define make_float32(x) __extension__ ({ float32 f32_val = {x}; f32_val; })
-#define const_float32(x) { x }
-typedef struct {
-    uint64_t v;
-} float64;
-#define float64_val(x) (((float64)(x)).v)
-#define make_float64(x) __extension__ ({ float64 f64_val = {x}; f64_val; })
-#define const_float64(x) { x }
-#else
-typedef uint16_t float16;
-typedef uint32_t float32;
-typedef uint64_t float64;
-#define float16_val(x) (x)
-#define float32_val(x) (x)
-#define float64_val(x) (x)
-#define make_float16(x) (x)
-#define make_float32(x) (x)
-#define make_float64(x) (x)
-#define const_float16(x) (x)
-#define const_float32(x) (x)
-#define const_float64(x) (x)
-#endif
-typedef struct {
-    uint16_t high;
-    uint64_t low;
-} floatx80;
-typedef struct {
-#ifdef HOST_WORDS_BIGENDIAN
-    uint64_t high, low;
-#else
-    uint64_t low, high;
-#endif
-} float128;
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE floating-point underflow tininess-detection mode.
-*----------------------------------------------------------------------------*/
-enum {
-    float_tininess_after_rounding  = 0,
-    float_tininess_before_rounding = 1
-};
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE floating-point rounding mode.
-*----------------------------------------------------------------------------*/
-enum {
-    float_round_nearest_even = 0,
-    float_round_down         = 1,
-    float_round_up           = 2,
-    float_round_to_zero      = 3,
-    float_round_ties_away    = 4,
-};
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE floating-point exception flags.
-*----------------------------------------------------------------------------*/
-enum {
-    float_flag_invalid   = 0x01,
-	float_flag_denormal  = 0x02,
-    float_flag_divbyzero = 0x04,
-    float_flag_overflow  = 0x08,
-    float_flag_underflow = 0x10,
-    float_flag_inexact   = 0x20,
-	float_flag_signaling = 0x40,
-	float_flag_decimal =   0x80
-};
-
-/*----------------------------------------------------------------------------
- | Variables for storing sign, exponent and significand of overflowed or 
- | underflowed extended double-precision floating-point value.
- | Variables for storing sign, exponent and significand of internal extended 
- | double-precision floating-point value for external use.
- *----------------------------------------------------------------------------*/
-
-extern flag floatx80_internal_sign;
-extern int32_t floatx80_internal_exp;
-extern uint64_t floatx80_internal_sig;
-extern int32_t floatx80_internal_exp0;
-extern uint64_t floatx80_internal_sig0;
-extern uint64_t floatx80_internal_sig1;
-extern int8_t floatx80_internal_precision;
-extern int8_t floatx80_internal_mode;
-
-typedef struct float_status {
-    signed char float_detect_tininess;
-    signed char float_rounding_mode;
-    uint8_t     float_exception_flags;
-    signed char floatx80_rounding_precision;
-    /* should denormalised results go to zero and set the inexact flag? */
-    flag flush_to_zero;
-    /* should denormalised inputs go to zero and set the input_denormal flag? */
-    flag flush_inputs_to_zero;
-    flag default_nan_mode;
-    flag snan_bit_is_one;
-} float_status;
-
-/*----------------------------------------------------------------------------
- | Function for getting sign, exponent and significand of extended
- | double-precision floating-point intermediate result for external use.
- *----------------------------------------------------------------------------*/
-floatx80 getFloatInternalOverflow( void );
-floatx80 getFloatInternalUnderflow( void );
-floatx80 getFloatInternalRoundedAll( void );
-floatx80 getFloatInternalRoundedSome( void );
-floatx80 getFloatInternalUnrounded( void );
-floatx80 getFloatInternalFloatx80( void );
-uint64_t getFloatInternalGRS( void );
-
-static inline void set_float_detect_tininess(int val, float_status *status)
-{
-    status->float_detect_tininess = val;
-}
-static inline void set_float_rounding_mode(int val, float_status *status)
-{
-    status->float_rounding_mode = val;
-}
-static inline void set_float_exception_flags(int val, float_status *status)
-{
-    status->float_exception_flags = val;
-}
-static inline void set_floatx80_rounding_precision(int val,
-                                                   float_status *status)
-{
-    status->floatx80_rounding_precision = val;
-}
-static inline void set_flush_to_zero(flag val, float_status *status)
-{
-    status->flush_to_zero = val;
-}
-static inline void set_flush_inputs_to_zero(flag val, float_status *status)
-{
-    status->flush_inputs_to_zero = val;
-}
-static inline void set_default_nan_mode(flag val, float_status *status)
-{
-    status->default_nan_mode = val;
-}
-static inline void set_snan_bit_is_one(flag val, float_status *status)
-{
-    status->snan_bit_is_one = val;
-}
-static inline int get_float_detect_tininess(float_status *status)
-{
-    return status->float_detect_tininess;
-}
-static inline int get_float_rounding_mode(float_status *status)
-{
-    return status->float_rounding_mode;
-}
-static inline int get_float_exception_flags(float_status *status)
-{
-    return status->float_exception_flags;
-}
-static inline int get_floatx80_rounding_precision(float_status *status)
-{
-    return status->floatx80_rounding_precision;
-}
-static inline flag get_flush_to_zero(float_status *status)
-{
-    return status->flush_to_zero;
-}
-static inline flag get_flush_inputs_to_zero(float_status *status)
-{
-    return status->flush_inputs_to_zero;
-}
-static inline flag get_default_nan_mode(float_status *status)
-{
-    return status->default_nan_mode;
-}
-
-/*----------------------------------------------------------------------------
-| Routine to raise any or all of the software IEC/IEEE floating-point
-| exception flags.
-*----------------------------------------------------------------------------*/
-//void float_raise(uint8_t flags, float_status *status);
-
-
-/*----------------------------------------------------------------------------
- | The pattern for a default generated single-precision NaN.
- *----------------------------------------------------------------------------*/
-#define float32_default_nan 0x7FFFFFFF
-
-/*----------------------------------------------------------------------------
- | The pattern for a default generated double-precision NaN.
- *----------------------------------------------------------------------------*/
-#define float64_default_nan LIT64( 0x7FFFFFFFFFFFFFFF )
-
-/*----------------------------------------------------------------------------
- | The pattern for a default generated extended double-precision NaN.  The
- | `high' and `low' values hold the most- and least-significant bits,
- | respectively.
- *----------------------------------------------------------------------------*/
-#define floatx80_default_nan_high 0x7FFF
-#define floatx80_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
-
-/*----------------------------------------------------------------------------
- | The pattern for a default generated extended double-precision infinity.
- *----------------------------------------------------------------------------*/
-#define floatx80_default_infinity_low  LIT64( 0x0000000000000000 )
-
-/*----------------------------------------------------------------------------
-| If `a' is denormal and we are in flush-to-zero mode then set the
-| input-denormal exception and return zero. Otherwise just return the value.
-*----------------------------------------------------------------------------*/
-float64 float64_squash_input_denormal(float64 a, float_status *status);
-
-/*----------------------------------------------------------------------------
-| Options to indicate which negations to perform in float*_muladd()
-| Using these differs from negating an input or output before calling
-| the muladd function in that this means that a NaN doesn't have its
-| sign bit inverted before it is propagated.
-| We also support halving the result before rounding, as a special
-| case to support the ARM fused-sqrt-step instruction FRSQRTS.
-*----------------------------------------------------------------------------*/
-enum {
-    float_muladd_negate_c = 1,
-    float_muladd_negate_product = 2,
-    float_muladd_negate_result = 4,
-    float_muladd_halve_result = 8,
-};
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE integer-to-floating-point conversion routines.
-*----------------------------------------------------------------------------*/
-
-floatx80 int32_to_floatx80(int32_t);
-floatx80 int64_to_floatx80(int64_t);
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE single-precision conversion routines.
-*----------------------------------------------------------------------------*/
-floatx80 float32_to_floatx80(float32, float_status *status);
-floatx80 float32_to_floatx80_allowunnormal(float32, float_status *status);
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE double-precision conversion routines.
-*----------------------------------------------------------------------------*/
-floatx80 float64_to_floatx80(float64, float_status *status);
-
-floatx80 float64_to_floatx80_allowunnormal( float64 a, float_status *status );
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE extended double-precision conversion routines.
-*----------------------------------------------------------------------------*/
-int32_t floatx80_to_int32(floatx80, float_status *status);
-#ifdef SOFTFLOAT_68K
-int16_t floatx80_to_int16(floatx80, float_status *status);
-int8_t floatx80_to_int8(floatx80, float_status *status);
-#endif
-int32_t floatx80_to_int32_round_to_zero(floatx80, float_status *status);
-int64_t floatx80_to_int64(floatx80, float_status *status);
-float32 floatx80_to_float32(floatx80, float_status *status);
-float64 floatx80_to_float64(floatx80, float_status *status);
-#ifdef SOFTFLOAT_68K
-floatx80 floatx80_to_floatx80( floatx80, float_status *status);
-floatx80 floatdecimal_to_floatx80(floatx80, float_status *status);
-floatx80 floatx80_to_floatdecimal(floatx80, int32_t*, float_status *status);
-#endif
-
-uint64_t extractFloatx80Frac( floatx80 a );
-int32_t extractFloatx80Exp( floatx80 a );
-flag extractFloatx80Sign( floatx80 a );
-
-floatx80 floatx80_round_to_int_toward_zero( floatx80 a, float_status *status);
-floatx80 floatx80_round_to_float32( floatx80, float_status *status );
-floatx80 floatx80_round_to_float64( floatx80, float_status *status );
-floatx80 floatx80_round32( floatx80, float_status *status);
-floatx80 floatx80_round64( floatx80, float_status *status);
-
-flag floatx80_eq( floatx80, floatx80, float_status *status);
-flag floatx80_le( floatx80, floatx80, float_status *status);
-flag floatx80_lt( floatx80, floatx80, float_status *status);
-
-#ifdef SOFTFLOAT_68K
-// functions are in softfloat.c
-floatx80 floatx80_move( floatx80 a, float_status *status );
-floatx80 floatx80_abs( floatx80 a, float_status *status );
-floatx80 floatx80_neg( floatx80 a, float_status *status );
-floatx80 floatx80_getexp( floatx80 a, float_status *status );
-floatx80 floatx80_getman( floatx80 a, float_status *status );
-floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status );
-floatx80 floatx80_rem( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status );
-floatx80 floatx80_mod( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status );
-floatx80 floatx80_sglmul( floatx80 a, floatx80 b, float_status *status );
-floatx80 floatx80_sgldiv( floatx80 a, floatx80 b, float_status *status );
-floatx80 floatx80_cmp( floatx80 a, floatx80 b, float_status *status );
-floatx80 floatx80_tst( floatx80 a, float_status *status );
-
-// functions are in softfloat_fpsp.c
-floatx80 floatx80_acos(floatx80 a, float_status *status);
-floatx80 floatx80_asin(floatx80 a, float_status *status);
-floatx80 floatx80_atan(floatx80 a, float_status *status);
-floatx80 floatx80_atanh(floatx80 a, float_status *status);
-floatx80 floatx80_cos(floatx80 a, float_status *status);
-floatx80 floatx80_cosh(floatx80 a, float_status *status);
-floatx80 floatx80_etox(floatx80 a, float_status *status);
-floatx80 floatx80_etoxm1(floatx80 a, float_status *status);
-floatx80 floatx80_log10(floatx80 a, float_status *status);
-floatx80 floatx80_log2(floatx80 a, float_status *status);
-floatx80 floatx80_logn(floatx80 a, float_status *status);
-floatx80 floatx80_lognp1(floatx80 a, float_status *status);
-floatx80 floatx80_sin(floatx80 a, float_status *status);
-floatx80 floatx80_sinh(floatx80 a, float_status *status);
-floatx80 floatx80_tan(floatx80 a, float_status *status);
-floatx80 floatx80_tanh(floatx80 a, float_status *status);
-floatx80 floatx80_tentox(floatx80 a, float_status *status);
-floatx80 floatx80_twotox(floatx80 a, float_status *status);
-#endif
-
-// functions originally internal to softfloat.c
-void normalizeFloatx80Subnormal( uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr );
-floatx80 packFloatx80( flag zSign, int32_t zExp, uint64_t zSig );
-floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status);
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE extended double-precision operations.
-*----------------------------------------------------------------------------*/
-floatx80 floatx80_round_to_int(floatx80, float_status *status);
-floatx80 floatx80_add(floatx80, floatx80, float_status *status);
-floatx80 floatx80_sub(floatx80, floatx80, float_status *status);
-floatx80 floatx80_mul(floatx80, floatx80, float_status *status);
-floatx80 floatx80_div(floatx80, floatx80, float_status *status);
-floatx80 floatx80_sqrt(floatx80, float_status *status);
-floatx80 floatx80_normalize(floatx80);
-floatx80 floatx80_denormalize(floatx80, flag);
-
-static inline int floatx80_is_zero_or_denormal(floatx80 a)
-{
-    return (a.high & 0x7fff) == 0;
-}
-
-static inline int floatx80_is_any_nan(floatx80 a)
-{
-    return ((a.high & 0x7fff) == 0x7fff) && (a.low<<1);
-}
-
-/*----------------------------------------------------------------------------
-| Return whether the given value is an invalid floatx80 encoding.
-| Invalid floatx80 encodings arise when the integer bit is not set, but
-| the exponent is not zero. The only times the integer bit is permitted to
-| be zero is in subnormal numbers and the value zero.
-| This includes what the Intel software developer's manual calls pseudo-NaNs,
-| pseudo-infinities and un-normal numbers. It does not include
-| pseudo-denormals, which must still be correctly handled as inputs even
-| if they are never generated as outputs.
-*----------------------------------------------------------------------------*/
-static inline bool floatx80_invalid_encoding(floatx80 a)
-{
-    return (a.low & (1ULL << 63)) == 0 && (a.high & 0x7FFF) != 0 && (a.high & 0x7FFF) != 0x7FFF;
-}
-
-#define floatx80_zero make_floatx80(0x0000, 0x0000000000000000LL)
-#define floatx80_one make_floatx80(0x3fff, 0x8000000000000000LL)
-#define floatx80_ln2 make_floatx80(0x3ffe, 0xb17217f7d1cf79acLL)
-#define floatx80_pi make_floatx80(0x4000, 0xc90fdaa22168c235LL)
-#define floatx80_half make_floatx80(0x3ffe, 0x8000000000000000LL)
-#define floatx80_infinity make_floatx80(0x7fff, 0x8000000000000000LL)
-
-#endif /* SOFTFLOAT_H */
--- a/src/softfloat/softfloat_decimal.cpp
+++ b/src/softfloat/softfloat_decimal.cpp
@ -1,461 +0,0 @@
-/*============================================================================
-
-This C source file is an extension to the SoftFloat IEC/IEEE Floating-point 
-Arithmetic Package, Release 2a.
-
-=============================================================================*/
-
-#include <stdint.h>
-
-#include "sysconfig.h"
-#include "sysdeps.h"
-
-#define DECIMAL_LOG 0
-
-#if DECIMAL_LOG
-#define decimal_log write_log
-#else
-#define decimal_log(fmt, ...)
-#endif
-
-#include "softfloat.h"
-#include "softfloat-macros.h"
-#include "softfloat/softfloat-specialize.h"
-
-/*----------------------------------------------------------------------------
-| Methods for converting decimal floats to binary extended precision floats.
-*----------------------------------------------------------------------------*/
-
-static void round128to64(flag aSign, int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, float_status *status)
-{
-	flag increment;
-	int32_t zExp;
-	uint64_t zSig0, zSig1;
-	
-	zExp = *aExp;
-	zSig0 = *aSig0;
-	zSig1 = *aSig1;
-	
-	increment = ( (int64_t) zSig1 < 0 );
-	if (status->float_rounding_mode != float_round_nearest_even) {
-		if (status->float_rounding_mode == float_round_to_zero) {
-			increment = 0;
-		} else {
-			if (aSign) {
-				increment = (status->float_rounding_mode == float_round_down) && zSig1;
-			} else {
-				increment = (status->float_rounding_mode == float_round_up) && zSig1;
-			}
-		}
-	}
-	
-	if (increment) {
-		++zSig0;
-		if (zSig0 == 0) {
-			++zExp;
-			zSig0 = LIT64(0x8000000000000000);
-		} else {
-			zSig0 &= ~ (((uint64_t) (zSig1<<1) == 0) & (status->float_rounding_mode == float_round_nearest_even));
-		}
-	} else {
-		if ( zSig0 == 0 ) zExp = 0;
-	}
-	
-	*aExp = zExp;
-	*aSig0 = zSig0;
-	*aSig1 = 0;
-}
-
-static void mul128by128round(int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1, float_status *status)
-{
-	int32_t zExp;
-	uint64_t zSig0, zSig1, zSig2, zSig3;
-	
-	zExp = *aExp;
-	zSig0 = *aSig0;
-	zSig1 = *aSig1;
-	
-	round128to64(0, &bExp, &bSig0, &bSig1, status);
-	
-	zExp += bExp - 0x3FFE;
-	mul128To256(zSig0, zSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3);
-	zSig1 |= (zSig2 | zSig3) != 0;
-	if ( 0 < (int64_t) zSig0 ) {
-		shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
-		--zExp;
-	}
-	*aExp = zExp;
-	*aSig0 = zSig0;
-	*aSig1 = zSig1;
-	
-	round128to64(0, aExp, aSig0, aSig1, status);
-}
-
-static void mul128by128(int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1)
-{
-	int32_t zExp;
-	uint64_t zSig0, zSig1, zSig2, zSig3;
-	
-	zExp = *aExp;
-	zSig0 = *aSig0;
-	zSig1 = *aSig1;
-
-	zExp += bExp - 0x3FFE;
-	mul128To256(zSig0, zSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3);
-	zSig1 |= (zSig2 | zSig3) != 0;
-	if ( 0 < (int64_t) zSig0 ) {
-		shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
-		--zExp;
-	}
-	*aExp = zExp;
-	*aSig0 = zSig0;
-	*aSig1 = zSig1;
-}
-
-static void div128by128(int32_t *paExp, uint64_t *paSig0, uint64_t *paSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1)
-{
-	int32_t zExp, aExp;
-	uint64_t zSig0, zSig1, aSig0, aSig1;
-	uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
-	
-	aExp = *paExp;
-	aSig0 = *paSig0;
-	aSig1 = *paSig1;
-	
-	zExp = aExp - bExp + 0x3FFE;
-	if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
-		shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
-		++zExp;
-	}
-	zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
-	mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
-	sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
-	while ( (int64_t) rem0 < 0 ) {
-		--zSig0;
-		add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
-	}
-	zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
-	if ( ( zSig1 & 0x3FFF ) <= 4 ) {
-		mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
-		sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
-		while ( (int64_t) rem1 < 0 ) {
-			--zSig1;
-			add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
-		}
-		zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
-	}
-
-	*paExp = zExp;
-	*paSig0 = zSig0;
-	*paSig1 = zSig1;
-}
-
-static void tentoint128(flag mSign, flag eSign, int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t scale, float_status *status)
- {
-    int8_t save_rounding_mode;
-    int32_t mExp;
-    uint64_t mSig0, mSig1;
-     
-    save_rounding_mode = status->float_rounding_mode;
-    switch (status->float_rounding_mode) {
-        case float_round_nearest_even:
-            break;
-        case float_round_down:
-            if (mSign != eSign) {
-                set_float_rounding_mode(float_round_up, status);
-            }
-            break;
-        case float_round_up:
-            if (mSign != eSign) {
-                set_float_rounding_mode(float_round_down, status);
-            }
-            break;
-        case float_round_to_zero:
-            if (eSign == 0) {
-                set_float_rounding_mode(float_round_down, status);
-            } else {
-                set_float_rounding_mode(float_round_up, status);
-            }
-            break;
-        default:
-            break;
-    }	
-
-	*aExp = 0x3FFF;
-	*aSig0 = LIT64(0x8000000000000000);
-	*aSig1 = 0;
-
-	mExp = 0x4002;
-	mSig0 = LIT64(0xA000000000000000);
-	mSig1 = 0;
-	
-	while (scale) {
-		if (scale & 1) {
-			mul128by128round(aExp, aSig0, aSig1, mExp, mSig0, mSig1, status);
-		}
-		mul128by128(&mExp, &mSig0, &mSig1, mExp, mSig0, mSig1);
-		scale >>= 1;
-	}
-
-	set_float_rounding_mode(save_rounding_mode, status);
-}
-
-static int64_t tentointdec(int32_t scale)
-{
-	uint64_t decM, decX;
-	 
-	decX = 1;
-	decM = 10;
-	 
-	while (scale) {
-		if (scale & 1) {
-			decX *= decM;
-		}
-		decM *= decM;
-		scale >>= 1;
-	 }
-	
-	return decX;
-}
-
-
-static int64_t float128toint64(flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status)
-{
-	int8_t roundingMode;
-	flag roundNearestEven, increment;
-	int64_t z;
-	
-	shift128RightJamming(zSig0, zSig1, 0x403E - zExp, &zSig0, &zSig1);
-
-	roundingMode = status->float_rounding_mode;
-	roundNearestEven = (roundingMode == float_round_nearest_even);
-	increment = ((int64_t)zSig1 < 0);
-	if (!roundNearestEven) {
-		if (roundingMode == float_round_to_zero) {
-			increment = 0;
-		} else {
-			if (zSign) {
-				increment = (roundingMode == float_round_down ) && zSig1;
-			} else {
-				increment = (roundingMode == float_round_up ) && zSig1;
-			}
-		}
-	}
-	if (increment) {
-		++zSig0;
-		zSig0 &= ~ (((uint64_t)(zSig1<<1) == 0) & roundNearestEven);
-	}
-	z = zSig0;
-	if (zSig1) float_raise(float_flag_inexact, status);
-	return z;
-}
-
-static int32_t getDecimalExponent(int32_t aExp, uint64_t aSig)
-{
-	flag zSign;
-	int32_t zExp, shiftCount;
-	uint64_t zSig0, zSig1;
-	
-	if (aSig == 0 || aExp == 0x3FFF) {
-		return 0;
-	}
-	if (aExp < 0) {
-		return -4932;
-	}
-
-	aSig ^= LIT64(0x8000000000000000);
-	aExp -= 0x3FFF;
-	zSign = (aExp < 0);
-	aExp = zSign ? -aExp : aExp;
-	shiftCount = 31 - countLeadingZeros32(aExp);
-	zExp = 0x3FFF + shiftCount;
-	
-	if (shiftCount < 0) {
-		shortShift128Left(aSig, 0, -shiftCount, &zSig0, &zSig1);
-	} else {
-		shift128Right(aSig, 0, shiftCount, &zSig0, &zSig1);
-		aSig = (uint64_t)aExp << (63 - shiftCount);
-		if (zSign) {
-			sub128(aSig, 0, zSig0, zSig1, &zSig0, &zSig1);
-		} else {
-			add128(aSig, 0, zSig0, zSig1, &zSig0, &zSig1);
-		}
-	}
-	
-	shiftCount = countLeadingZeros64(zSig0);
-	shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1);
-	zExp -= shiftCount;
-	mul128by128(&zExp, &zSig0, &zSig1, 0x3FFD, LIT64(0x9A209A84FBCFF798), LIT64(0x8F8959AC0B7C9178));
-	
-	shiftCount = 0x403E - zExp;
-	shift128RightJamming(zSig0, zSig1, shiftCount, &zSig0, &zSig1);
-
-	if ((int64_t)zSig1 < 0) {
-		++zSig0;
-		zSig0 &= ~(((int64_t)(zSig1<<1) == 0) & 1);
-	}
-	
-	zExp = zSign ? -zSig0 : zSig0;
-
-	return zExp;
-}
-
-/*----------------------------------------------------------------------------
-| Decimal to binary
-*----------------------------------------------------------------------------*/
-
-floatx80 floatdecimal_to_floatx80(floatx80 a, float_status *status)
-{
-	flag decSign, zSign, decExpSign;
-	int32_t decExp, zExp, xExp, shiftCount;
-	uint64_t decSig, zSig0, zSig1, xSig0, xSig1;
-	
-	decSign = extractFloatx80Sign(a);
-	decExp = extractFloatx80Exp(a);
-	decSig = extractFloatx80Frac(a);
-	
-	if (decExp == 0x7FFF) return a;
-	
-	if (decExp == 0 && decSig == 0) return a;
-	
-	decExpSign = (decExp >> 14) & 1;
-	decExp &= 0x3FFF;
-	
-	shiftCount = countLeadingZeros64( decSig );
-	zExp = 0x403E - shiftCount;
-	zSig0 = decSig << shiftCount;
-	zSig1 = 0;
-	zSign = decSign;
-	
-	tentoint128(decSign, decExpSign, &xExp, &xSig0, &xSig1, decExp, status);
-
-	if (decExpSign) {
-		div128by128(&zExp, &zSig0, &zSig1, xExp, xSig0, xSig1);
-	} else {
-		mul128by128(&zExp, &zSig0, &zSig1, xExp, xSig0, xSig1);
-	}
-	
-	if (zSig1) float_raise(float_flag_decimal, status);
-	round128to64(zSign, &zExp, &zSig0, &zSig1, status);
-	
-	return packFloatx80( zSign, zExp, zSig0 );
-	
-}
-
-/*----------------------------------------------------------------------------
- | Binary to decimal
- *----------------------------------------------------------------------------*/
-
-floatx80 floatx80_to_floatdecimal(floatx80 a, int32_t *k, float_status *status)
-{
-	flag aSign, decSign;
-	int32_t aExp, decExp, zExp, xExp;
-	uint64_t aSig, decSig, decX, zSig0, zSig1, xSig0, xSig1;
-	flag ictr, lambda;
-	int32_t kfactor, ilog, iscale, len;
-	
-	aSign = extractFloatx80Sign(a);
-	aExp = extractFloatx80Exp(a);
-	aSig = extractFloatx80Frac(a);
-	
-	if (aExp == 0x7FFF) {
-		if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status);
-		return a;
-	}
-	
-	if (aExp == 0) {
-		if (aSig == 0) return packFloatx80(aSign, 0, 0);
-		normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
-	}
-
-	kfactor = *k;
-
-	ilog = getDecimalExponent(aExp, aSig);
-	
-	ictr = 0;
-
-try_again:
-	decimal_log(_T("ILOG = %i\n"), ilog);
-	
-	if (kfactor > 0) {
-		if (kfactor > 17) {
-			kfactor = 17;
-			float_raise(float_flag_invalid, status);
-		}
-		len = kfactor;
-	} else {
-		len = ilog + 1 - kfactor;
-		if (len > 17) {
-			len = 17;
-		}
-		if (len < 1) {
-			len = 1;
-		}
-		if (kfactor > ilog) {
-			ilog = kfactor;
-			decimal_log(_T("ILOG is kfactor = %i\n"), ilog);
-		}
-	}
-	
-	decimal_log(_T("LEN = %i\n"),len);
-	
-	lambda = 0;
-	iscale = ilog + 1 - len;
-
-	if (iscale < 0) {
-		lambda = 1;
-		iscale = -iscale;
-	}
-	
-	decimal_log(_T("ISCALE = %i, LAMBDA = %i\n"),iscale, lambda);
-	
-	tentoint128(lambda, 0, &xExp, &xSig0, &xSig1, iscale, status);
-
-	decimal_log(_T("AFTER tentoint128: zExp = %04x, zSig0 = %16llx, zSig1 = %16llx\n"), xExp, xSig0, xSig1);
-
-	zExp = aExp;
-	zSig0 = aSig;
-	zSig1 = 0;
-	
-	if (lambda) {
-		mul128by128(&zExp, &zSig0, &zSig1, xExp, xSig0, xSig1);
-	} else {
-		div128by128(&zExp, &zSig0, &zSig1, xExp, xSig0, xSig1);
-	}
-
-	decimal_log(_T("BEFORE: zExp = %04x, zSig0 = %16llx, zSig1 = %16llx\n"),zExp,zSig0,zSig1);
-
-	decSig = float128toint64(aSign, zExp, zSig0, zSig1, status);
-
-	decimal_log(_T("AFTER: decSig = %llu\n"),decSig);
-
-	if (ictr == 0) {
-
-		decX = tentointdec(len - 1);
-
-		if (decSig < decX) { // z < x
-			ilog -= 1;
-			ictr = 1;
-			goto try_again;
-		}
-		
-		decX *= 10;
-		
-		if (decSig > decX) { // z > x
-			ilog += 1;
-			ictr = 1;
-			goto try_again;
-		}
-	}
-	
-	decSign = aSign;
-	decExp = (ilog < 0) ? -ilog : ilog;
-	if (decExp > 999) {
-		float_raise(float_flag_invalid, status);
-	}
-	if (ilog < 0) decExp |= 0x4000;
-	
-	*k = len;
-	
-	return packFloatx80(decSign, decExp, decSig);
-}
--- a/src/softfloat/softfloat_fpsp.cpp
+++ b/src/softfloat/softfloat_fpsp.cpp
--- a/src/softfloat/softfloat_fpsp_tables.h
+++ b/src/softfloat/softfloat_fpsp_tables.h
@ -1,528 +0,0 @@
-
-static const floatx80 atan_tbl[128] = {
-	{0x3FFB, LIT64(0x83D152C5060B7A51)},
-	{0x3FFB, LIT64(0x8BC8544565498B8B)},
-	{0x3FFB, LIT64(0x93BE406017626B0D)},
-	{0x3FFB, LIT64(0x9BB3078D35AEC202)},
-	{0x3FFB, LIT64(0xA3A69A525DDCE7DE)},
-	{0x3FFB, LIT64(0xAB98E94362765619)},
-	{0x3FFB, LIT64(0xB389E502F9C59862)},
-	{0x3FFB, LIT64(0xBB797E436B09E6FB)},
-	{0x3FFB, LIT64(0xC367A5C739E5F446)},
-	{0x3FFB, LIT64(0xCB544C61CFF7D5C6)},
-	{0x3FFB, LIT64(0xD33F62F82488533E)},
-	{0x3FFB, LIT64(0xDB28DA8162404C77)},
-	{0x3FFB, LIT64(0xE310A4078AD34F18)},
-	{0x3FFB, LIT64(0xEAF6B0A8188EE1EB)},
-	{0x3FFB, LIT64(0xF2DAF1949DBE79D5)},
-	{0x3FFB, LIT64(0xFABD581361D47E3E)},
-	{0x3FFC, LIT64(0x8346AC210959ECC4)},
-	{0x3FFC, LIT64(0x8B232A08304282D8)},
-	{0x3FFC, LIT64(0x92FB70B8D29AE2F9)},
-	{0x3FFC, LIT64(0x9ACF476F5CCD1CB4)},
-	{0x3FFC, LIT64(0xA29E76304954F23F)},
-	{0x3FFC, LIT64(0xAA68C5D08AB85230)},
-	{0x3FFC, LIT64(0xB22DFFFD9D539F83)},
-	{0x3FFC, LIT64(0xB9EDEF453E900EA5)},
-	{0x3FFC, LIT64(0xC1A85F1CC75E3EA5)},
-	{0x3FFC, LIT64(0xC95D1BE828138DE6)},
-	{0x3FFC, LIT64(0xD10BF300840D2DE4)},
-	{0x3FFC, LIT64(0xD8B4B2BA6BC05E7A)},
-	{0x3FFC, LIT64(0xE0572A6BB42335F6)},
-	{0x3FFC, LIT64(0xE7F32A70EA9CAA8F)},
-	{0x3FFC, LIT64(0xEF88843264ECEFAA)},
-	{0x3FFC, LIT64(0xF7170A28ECC06666)},
-	{0x3FFD, LIT64(0x812FD288332DAD32)},
-	{0x3FFD, LIT64(0x88A8D1B1218E4D64)},
-	{0x3FFD, LIT64(0x9012AB3F23E4AEE8)},
-	{0x3FFD, LIT64(0x976CC3D411E7F1B9)},
-	{0x3FFD, LIT64(0x9EB689493889A227)},
-	{0x3FFD, LIT64(0xA5EF72C34487361B)},
-	{0x3FFD, LIT64(0xAD1700BAF07A7227)},
-	{0x3FFD, LIT64(0xB42CBCFAFD37EFB7)},
-	{0x3FFD, LIT64(0xBB303A940BA80F89)},
-	{0x3FFD, LIT64(0xC22115C6FCAEBBAF)},
-	{0x3FFD, LIT64(0xC8FEF3E686331221)},
-	{0x3FFD, LIT64(0xCFC98330B4000C70)},
-	{0x3FFD, LIT64(0xD6807AA1102C5BF9)},
-	{0x3FFD, LIT64(0xDD2399BC31252AA3)},
-	{0x3FFD, LIT64(0xE3B2A8556B8FC517)},
-	{0x3FFD, LIT64(0xEA2D764F64315989)},
-	{0x3FFD, LIT64(0xF3BF5BF8BAD1A21D)},
-	{0x3FFE, LIT64(0x801CE39E0D205C9A)},
-	{0x3FFE, LIT64(0x8630A2DADA1ED066)},
-	{0x3FFE, LIT64(0x8C1AD445F3E09B8C)},
-	{0x3FFE, LIT64(0x91DB8F1664F350E2)},
-	{0x3FFE, LIT64(0x97731420365E538C)},
-	{0x3FFE, LIT64(0x9CE1C8E6A0B8CDBA)},
-	{0x3FFE, LIT64(0xA22832DBCADAAE09)},
-	{0x3FFE, LIT64(0xA746F2DDB7602294)},
-	{0x3FFE, LIT64(0xAC3EC0FB997DD6A2)},
-	{0x3FFE, LIT64(0xB110688AEBDC6F6A)},
-	{0x3FFE, LIT64(0xB5BCC49059ECC4B0)},
-	{0x3FFE, LIT64(0xBA44BC7DD470782F)},
-	{0x3FFE, LIT64(0xBEA94144FD049AAC)},
-	{0x3FFE, LIT64(0xC2EB4ABB661628B6)},
-	{0x3FFE, LIT64(0xC70BD54CE602EE14)},
-	{0x3FFE, LIT64(0xCD000549ADEC7159)},
-	{0x3FFE, LIT64(0xD48457D2D8EA4EA3)},
-	{0x3FFE, LIT64(0xDB948DA712DECE3B)},
-	{0x3FFE, LIT64(0xE23855F969E8096A)},
-	{0x3FFE, LIT64(0xE8771129C4353259)},
-	{0x3FFE, LIT64(0xEE57C16E0D379C0D)},
-	{0x3FFE, LIT64(0xF3E10211A87C3779)},
-	{0x3FFE, LIT64(0xF919039D758B8D41)},
-	{0x3FFE, LIT64(0xFE058B8F64935FB3)},
-	{0x3FFF, LIT64(0x8155FB497B685D04)},
-	{0x3FFF, LIT64(0x83889E3549D108E1)},
-	{0x3FFF, LIT64(0x859CFA76511D724B)},
-	{0x3FFF, LIT64(0x87952ECFFF8131E7)},
-	{0x3FFF, LIT64(0x89732FD19557641B)},
-	{0x3FFF, LIT64(0x8B38CAD101932A35)},
-	{0x3FFF, LIT64(0x8CE7A8D8301EE6B5)},
-	{0x3FFF, LIT64(0x8F46A39E2EAE5281)},
-	{0x3FFF, LIT64(0x922DA7D791888487)},
-	{0x3FFF, LIT64(0x94D19FCBDEDF5241)},
-	{0x3FFF, LIT64(0x973AB94419D2A08B)},
-	{0x3FFF, LIT64(0x996FF00E08E10B96)},
-	{0x3FFF, LIT64(0x9B773F9512321DA7)},
-	{0x3FFF, LIT64(0x9D55CC320F935624)},
-	{0x3FFF, LIT64(0x9F100575006CC571)},
-	{0x3FFF, LIT64(0xA0A9C290D97CC06C)},
-	{0x3FFF, LIT64(0xA22659EBEBC0630A)},
-	{0x3FFF, LIT64(0xA388B4AFF6EF0EC9)},
-	{0x3FFF, LIT64(0xA4D35F1061D292C4)},
-	{0x3FFF, LIT64(0xA60895DCFBE3187E)},
-	{0x3FFF, LIT64(0xA72A51DC7367BEAC)},
-	{0x3FFF, LIT64(0xA83A51530956168F)},
-	{0x3FFF, LIT64(0xA93A20077539546E)},
-	{0x3FFF, LIT64(0xAA9E7245023B2605)},
-	{0x3FFF, LIT64(0xAC4C84BA6FE4D58F)},
-	{0x3FFF, LIT64(0xADCE4A4A606B9712)},
-	{0x3FFF, LIT64(0xAF2A2DCD8D263C9C)},
-	{0x3FFF, LIT64(0xB0656F81F22265C7)},
-	{0x3FFF, LIT64(0xB18465150F71496A)},
-	{0x3FFF, LIT64(0xB28AAA156F9ADA35)},
-	{0x3FFF, LIT64(0xB37B44FF3766B895)},
-	{0x3FFF, LIT64(0xB458C3DCE9630433)},
-	{0x3FFF, LIT64(0xB525529D562246BD)},
-	{0x3FFF, LIT64(0xB5E2CCA95F9D88CC)},
-	{0x3FFF, LIT64(0xB692CADA7ACA1ADA)},
-	{0x3FFF, LIT64(0xB736AEA7A6925838)},
-	{0x3FFF, LIT64(0xB7CFAB287E9F7B36)},
-	{0x3FFF, LIT64(0xB85ECC66CB219835)},
-	{0x3FFF, LIT64(0xB8E4FD5A20A593DA)},
-	{0x3FFF, LIT64(0xB99F41F64AFF9BB5)},
-	{0x3FFF, LIT64(0xBA7F1E17842BBE7B)},
-	{0x3FFF, LIT64(0xBB4712857637E17D)},
-	{0x3FFF, LIT64(0xBBFABE8A4788DF6F)},
-	{0x3FFF, LIT64(0xBC9D0FAD2B689D79)},
-	{0x3FFF, LIT64(0xBD306A39471ECD86)},
-	{0x3FFF, LIT64(0xBDB6C731856AF18A)},
-	{0x3FFF, LIT64(0xBE31CAC502E80D70)},
-	{0x3FFF, LIT64(0xBEA2D55CE33194E2)},
-	{0x3FFF, LIT64(0xBF0B10B7C03128F0)},
-	{0x3FFF, LIT64(0xBF6B7A18DACB778D)},
-	{0x3FFF, LIT64(0xBFC4EA4663FA18F6)},
-	{0x3FFF, LIT64(0xC0181BDE8B89A454)},
-	{0x3FFF, LIT64(0xC065B066CFBF6439)},
-	{0x3FFF, LIT64(0xC0AE345F56340AE6)},
-	{0x3FFF, LIT64(0xC0F222919CB9E6A7)}
-};
-
-
-static const floatx80 exp_tbl[64] = {
-	{0x3FFF, LIT64(0x8000000000000000)},
-	{0x3FFF, LIT64(0x8164D1F3BC030774)},
-	{0x3FFF, LIT64(0x82CD8698AC2BA1D8)},
-	{0x3FFF, LIT64(0x843A28C3ACDE4048)},
-	{0x3FFF, LIT64(0x85AAC367CC487B14)},
-	{0x3FFF, LIT64(0x871F61969E8D1010)},
-	{0x3FFF, LIT64(0x88980E8092DA8528)},
-	{0x3FFF, LIT64(0x8A14D575496EFD9C)},
-	{0x3FFF, LIT64(0x8B95C1E3EA8BD6E8)},
-	{0x3FFF, LIT64(0x8D1ADF5B7E5BA9E4)},
-	{0x3FFF, LIT64(0x8EA4398B45CD53C0)},
-	{0x3FFF, LIT64(0x9031DC431466B1DC)},
-	{0x3FFF, LIT64(0x91C3D373AB11C338)},
-	{0x3FFF, LIT64(0x935A2B2F13E6E92C)},
-	{0x3FFF, LIT64(0x94F4EFA8FEF70960)},
-	{0x3FFF, LIT64(0x96942D3720185A00)},
-	{0x3FFF, LIT64(0x9837F0518DB8A970)},
-	{0x3FFF, LIT64(0x99E0459320B7FA64)},
-	{0x3FFF, LIT64(0x9B8D39B9D54E5538)},
-	{0x3FFF, LIT64(0x9D3ED9A72CFFB750)},
-	{0x3FFF, LIT64(0x9EF5326091A111AC)},
-	{0x3FFF, LIT64(0xA0B0510FB9714FC4)},
-	{0x3FFF, LIT64(0xA27043030C496818)},
-	{0x3FFF, LIT64(0xA43515AE09E680A0)},
-	{0x3FFF, LIT64(0xA5FED6A9B15138EC)},
-	{0x3FFF, LIT64(0xA7CD93B4E9653568)},
-	{0x3FFF, LIT64(0xA9A15AB4EA7C0EF8)},
-	{0x3FFF, LIT64(0xAB7A39B5A93ED338)},
-	{0x3FFF, LIT64(0xAD583EEA42A14AC8)},
-	{0x3FFF, LIT64(0xAF3B78AD690A4374)},
-	{0x3FFF, LIT64(0xB123F581D2AC2590)},
-	{0x3FFF, LIT64(0xB311C412A9112488)},
-	{0x3FFF, LIT64(0xB504F333F9DE6484)},
-	{0x3FFF, LIT64(0xB6FD91E328D17790)},
-	{0x3FFF, LIT64(0xB8FBAF4762FB9EE8)},
-	{0x3FFF, LIT64(0xBAFF5AB2133E45FC)},
-	{0x3FFF, LIT64(0xBD08A39F580C36C0)},
-	{0x3FFF, LIT64(0xBF1799B67A731084)},
-	{0x3FFF, LIT64(0xC12C4CCA66709458)},
-	{0x3FFF, LIT64(0xC346CCDA24976408)},
-	{0x3FFF, LIT64(0xC5672A115506DADC)},
-	{0x3FFF, LIT64(0xC78D74C8ABB9B15C)},
-	{0x3FFF, LIT64(0xC9B9BD866E2F27A4)},
-	{0x3FFF, LIT64(0xCBEC14FEF2727C5C)},
-	{0x3FFF, LIT64(0xCE248C151F8480E4)},
-	{0x3FFF, LIT64(0xD06333DAEF2B2594)},
-	{0x3FFF, LIT64(0xD2A81D91F12AE45C)},
-	{0x3FFF, LIT64(0xD4F35AABCFEDFA20)},
-	{0x3FFF, LIT64(0xD744FCCAD69D6AF4)},
-	{0x3FFF, LIT64(0xD99D15C278AFD7B4)},
-	{0x3FFF, LIT64(0xDBFBB797DAF23754)},
-	{0x3FFF, LIT64(0xDE60F4825E0E9124)},
-	{0x3FFF, LIT64(0xE0CCDEEC2A94E110)},
-	{0x3FFF, LIT64(0xE33F8972BE8A5A50)},
-	{0x3FFF, LIT64(0xE5B906E77C8348A8)},
-	{0x3FFF, LIT64(0xE8396A503C4BDC68)},
-	{0x3FFF, LIT64(0xEAC0C6E7DD243930)},
-	{0x3FFF, LIT64(0xED4F301ED9942B84)},
-	{0x3FFF, LIT64(0xEFE4B99BDCDAF5CC)},
-	{0x3FFF, LIT64(0xF281773C59FFB138)},
-	{0x3FFF, LIT64(0xF5257D152486CC2C)},
-	{0x3FFF, LIT64(0xF7D0DF730AD13BB8)},
-	{0x3FFF, LIT64(0xFA83B2DB722A033C)},
-	{0x3FFF, LIT64(0xFD3E0C0CF486C174)}
-};
-
-static const float32 exp_tbl2[64] = {
-	0x00000000, 0x9F841A9B, 0x9FC1D5B9, 0xA0728369,
-	0x1FC5C95C, 0x1EE85C9F, 0x9FA20729, 0xA07BF9AF,
-	0xA0020DCF, 0x205A63DA, 0x1EB70051, 0x1F6EB029,
-	0xA0781494, 0x9EB319B0, 0x2017457D, 0x1F11D537,
-	0x9FB952DD, 0x1FE43087, 0x1FA2A818, 0x1FDE494D,
-	0x20504890, 0xA073691C, 0x1F9B7A05, 0xA0797126,
-	0xA071A140, 0x204F62DA, 0x1F283C4A, 0x9F9A7FDC,
-	0xA05B3FAC, 0x1FDF2610, 0x9F705F90, 0x201F678A,
-	0x1F32FB13, 0x20038B30, 0x200DC3CC, 0x9F8B2AE6,
-	0xA02BBF70, 0xA00BF518, 0xA041DD41, 0x9FDF137B,
-	0x201F1568, 0x1FC13A2E, 0xA03F8F03, 0x1FF4907D,
-	0x9E6E53E4, 0x1FD6D45C, 0xA076EDB9, 0x9FA6DE21,
-	0x1EE69A2F, 0x207F439F, 0x201EC207, 0x9E8BE175,
-	0x20032C4B, 0x2004DFF5, 0x1E72F47A, 0x1F722F22,
-	0xA017E945, 0x1F401A5B, 0x9FB9A9E3, 0x20744C05,
-	0x1F773A19, 0x1FFE90D5, 0xA041ED22, 0x1F853F3A
-};
-
-
-static const floatx80 exp2_tbl[64] = {
-	{0x3FFF, LIT64(0x8000000000000000)},
-	{0x3FFF, LIT64(0x8164D1F3BC030773)},
-	{0x3FFF, LIT64(0x82CD8698AC2BA1D7)},
-	{0x3FFF, LIT64(0x843A28C3ACDE4046)},
-	{0x3FFF, LIT64(0x85AAC367CC487B15)},
-	{0x3FFF, LIT64(0x871F61969E8D1010)},
-	{0x3FFF, LIT64(0x88980E8092DA8527)},
-	{0x3FFF, LIT64(0x8A14D575496EFD9A)},
-	{0x3FFF, LIT64(0x8B95C1E3EA8BD6E7)},
-	{0x3FFF, LIT64(0x8D1ADF5B7E5BA9E6)},
-	{0x3FFF, LIT64(0x8EA4398B45CD53C0)},
-	{0x3FFF, LIT64(0x9031DC431466B1DC)},
-	{0x3FFF, LIT64(0x91C3D373AB11C336)},
-	{0x3FFF, LIT64(0x935A2B2F13E6E92C)},
-	{0x3FFF, LIT64(0x94F4EFA8FEF70961)},
-	{0x3FFF, LIT64(0x96942D3720185A00)},
-	{0x3FFF, LIT64(0x9837F0518DB8A96F)},
-	{0x3FFF, LIT64(0x99E0459320B7FA65)},
-	{0x3FFF, LIT64(0x9B8D39B9D54E5539)},
-	{0x3FFF, LIT64(0x9D3ED9A72CFFB751)},
-	{0x3FFF, LIT64(0x9EF5326091A111AE)},
-	{0x3FFF, LIT64(0xA0B0510FB9714FC2)},
-	{0x3FFF, LIT64(0xA27043030C496819)},
-	{0x3FFF, LIT64(0xA43515AE09E6809E)},
-	{0x3FFF, LIT64(0xA5FED6A9B15138EA)},
-	{0x3FFF, LIT64(0xA7CD93B4E965356A)},
-	{0x3FFF, LIT64(0xA9A15AB4EA7C0EF8)},
-	{0x3FFF, LIT64(0xAB7A39B5A93ED337)},
-	{0x3FFF, LIT64(0xAD583EEA42A14AC6)},
-	{0x3FFF, LIT64(0xAF3B78AD690A4375)},
-	{0x3FFF, LIT64(0xB123F581D2AC2590)},
-	{0x3FFF, LIT64(0xB311C412A9112489)},
-	{0x3FFF, LIT64(0xB504F333F9DE6484)},
-	{0x3FFF, LIT64(0xB6FD91E328D17791)},
-	{0x3FFF, LIT64(0xB8FBAF4762FB9EE9)},
-	{0x3FFF, LIT64(0xBAFF5AB2133E45FB)},
-	{0x3FFF, LIT64(0xBD08A39F580C36BF)},
-	{0x3FFF, LIT64(0xBF1799B67A731083)},
-	{0x3FFF, LIT64(0xC12C4CCA66709456)},
-	{0x3FFF, LIT64(0xC346CCDA24976407)},
-	{0x3FFF, LIT64(0xC5672A115506DADD)},
-	{0x3FFF, LIT64(0xC78D74C8ABB9B15D)},
-	{0x3FFF, LIT64(0xC9B9BD866E2F27A3)},
-	{0x3FFF, LIT64(0xCBEC14FEF2727C5D)},
-	{0x3FFF, LIT64(0xCE248C151F8480E4)},
-	{0x3FFF, LIT64(0xD06333DAEF2B2595)},
-	{0x3FFF, LIT64(0xD2A81D91F12AE45A)},
-	{0x3FFF, LIT64(0xD4F35AABCFEDFA1F)},
-	{0x3FFF, LIT64(0xD744FCCAD69D6AF4)},
-	{0x3FFF, LIT64(0xD99D15C278AFD7B6)},
-	{0x3FFF, LIT64(0xDBFBB797DAF23755)},
-	{0x3FFF, LIT64(0xDE60F4825E0E9124)},
-	{0x3FFF, LIT64(0xE0CCDEEC2A94E111)},
-	{0x3FFF, LIT64(0xE33F8972BE8A5A51)},
-	{0x3FFF, LIT64(0xE5B906E77C8348A8)},
-	{0x3FFF, LIT64(0xE8396A503C4BDC68)},
-	{0x3FFF, LIT64(0xEAC0C6E7DD24392F)},
-	{0x3FFF, LIT64(0xED4F301ED9942B84)},
-	{0x3FFF, LIT64(0xEFE4B99BDCDAF5CB)},
-	{0x3FFF, LIT64(0xF281773C59FFB13A)},
-	{0x3FFF, LIT64(0xF5257D152486CC2C)},
-	{0x3FFF, LIT64(0xF7D0DF730AD13BB9)},
-	{0x3FFF, LIT64(0xFA83B2DB722A033A)},
-	{0x3FFF, LIT64(0xFD3E0C0CF486C175)}
-};
-
-
-static const float32 exp2_tbl2[64] = {
-	0x3F738000, 0x3FBEF7CA, 0x3FBDF8A9, 0x3FBCD7C9,
-	0xBFBDE8DA, 0x3FBDE85C, 0x3FBEBBF1, 0x3FBB80CA,
-	0xBFBA8373, 0xBFBE9670, 0x3FBDB700, 0x3FBEEEB0,
-	0x3FBBFD6D, 0xBFBDB319, 0x3FBDBA2B, 0x3FBE91D5,
-	0x3FBE8D5A, 0xBFBCDE7B, 0xBFBEBAAF, 0xBFBD86DA,
-	0xBFBEBEDD, 0x3FBCC96E, 0xBFBEC90B, 0x3FBBD1DB,
-	0x3FBCE5EB, 0xBFBEC274, 0x3FBEA83C, 0x3FBECB00,
-	0x3FBE9301, 0xBFBD8367, 0xBFBEF05F, 0x3FBDFB3C,
-	0x3FBEB2FB, 0x3FBAE2CB, 0x3FBCDC3C, 0x3FBEE9AA,
-	0xBFBEAEFD, 0xBFBCBF51, 0x3FBEF88A, 0x3FBD83B2,
-	0x3FBDF8AB, 0xBFBDFB17, 0xBFBEFE3C, 0xBFBBB6F8,
-	0xBFBCEE53, 0xBFBDA4AE, 0x3FBC9124, 0x3FBEB243,
-	0x3FBDE69A, 0xBFB8BC61, 0x3FBDF610, 0xBFBD8BE1,
-	0x3FBACB12, 0x3FBB9BFE, 0x3FBCF2F4, 0x3FBEF22F,
-	0xBFBDBF4A, 0x3FBEC01A, 0x3FBE8CAC, 0xBFBCBB3F,
-	0x3FBEF73A, 0xBFB8B795, 0x3FBEF84B, 0xBFBEF581
-};
-
-
-static const floatx80 log_tbl[128] = {
-	{0x3FFE, LIT64(0xFE03F80FE03F80FE)},
-	{0x3FF7, LIT64(0xFF015358833C47E2)},
-	{0x3FFE, LIT64(0xFA232CF252138AC0)},
-	{0x3FF9, LIT64(0xBDC8D83EAD88D549)},
-	{0x3FFE, LIT64(0xF6603D980F6603DA)},
-	{0x3FFA, LIT64(0x9CF43DCFF5EAFD48)},
-	{0x3FFE, LIT64(0xF2B9D6480F2B9D65)},
-	{0x3FFA, LIT64(0xDA16EB88CB8DF614)},
-	{0x3FFE, LIT64(0xEF2EB71FC4345238)},
-	{0x3FFB, LIT64(0x8B29B7751BD70743)},
-	{0x3FFE, LIT64(0xEBBDB2A5C1619C8C)},
-	{0x3FFB, LIT64(0xA8D839F830C1FB49)},
-	{0x3FFE, LIT64(0xE865AC7B7603A197)},
-	{0x3FFB, LIT64(0xC61A2EB18CD907AD)},
-	{0x3FFE, LIT64(0xE525982AF70C880E)},
-	{0x3FFB, LIT64(0xE2F2A47ADE3A18AF)},
-	{0x3FFE, LIT64(0xE1FC780E1FC780E2)},
-	{0x3FFB, LIT64(0xFF64898EDF55D551)},
-	{0x3FFE, LIT64(0xDEE95C4CA037BA57)},
-	{0x3FFC, LIT64(0x8DB956A97B3D0148)},
-	{0x3FFE, LIT64(0xDBEB61EED19C5958)},
-	{0x3FFC, LIT64(0x9B8FE100F47BA1DE)},
-	{0x3FFE, LIT64(0xD901B2036406C80E)},
-	{0x3FFC, LIT64(0xA9372F1D0DA1BD17)},
-	{0x3FFE, LIT64(0xD62B80D62B80D62C)},
-	{0x3FFC, LIT64(0xB6B07F38CE90E46B)},
-	{0x3FFE, LIT64(0xD3680D3680D3680D)},
-	{0x3FFC, LIT64(0xC3FD032906488481)},
-	{0x3FFE, LIT64(0xD0B69FCBD2580D0B)},
-	{0x3FFC, LIT64(0xD11DE0FF15AB18CA)},
-	{0x3FFE, LIT64(0xCE168A7725080CE1)},
-	{0x3FFC, LIT64(0xDE1433A16C66B150)},
-	{0x3FFE, LIT64(0xCB8727C065C393E0)},
-	{0x3FFC, LIT64(0xEAE10B5A7DDC8ADD)},
-	{0x3FFE, LIT64(0xC907DA4E871146AD)},
-	{0x3FFC, LIT64(0xF7856E5EE2C9B291)},
-	{0x3FFE, LIT64(0xC6980C6980C6980C)},
-	{0x3FFD, LIT64(0x82012CA5A68206D7)},
-	{0x3FFE, LIT64(0xC4372F855D824CA6)},
-	{0x3FFD, LIT64(0x882C5FCD7256A8C5)},
-	{0x3FFE, LIT64(0xC1E4BBD595F6E947)},
-	{0x3FFD, LIT64(0x8E44C60B4CCFD7DE)},
-	{0x3FFE, LIT64(0xBFA02FE80BFA02FF)},
-	{0x3FFD, LIT64(0x944AD09EF4351AF6)},
-	{0x3FFE, LIT64(0xBD69104707661AA3)},
-	{0x3FFD, LIT64(0x9A3EECD4C3EAA6B2)},
-	{0x3FFE, LIT64(0xBB3EE721A54D880C)},
-	{0x3FFD, LIT64(0xA0218434353F1DE8)},
-	{0x3FFE, LIT64(0xB92143FA36F5E02E)},
-	{0x3FFD, LIT64(0xA5F2FCABBBC506DA)},
-	{0x3FFE, LIT64(0xB70FBB5A19BE3659)},
-	{0x3FFD, LIT64(0xABB3B8BA2AD362A5)},
-	{0x3FFE, LIT64(0xB509E68A9B94821F)},
-	{0x3FFD, LIT64(0xB1641795CE3CA97B)},
-	{0x3FFE, LIT64(0xB30F63528917C80B)},
-	{0x3FFD, LIT64(0xB70475515D0F1C61)},
-	{0x3FFE, LIT64(0xB11FD3B80B11FD3C)},
-	{0x3FFD, LIT64(0xBC952AFEEA3D13E1)},
-	{0x3FFE, LIT64(0xAF3ADDC680AF3ADE)},
-	{0x3FFD, LIT64(0xC2168ED0F458BA4A)},
-	{0x3FFE, LIT64(0xAD602B580AD602B6)},
-	{0x3FFD, LIT64(0xC788F439B3163BF1)},
-	{0x3FFE, LIT64(0xAB8F69E28359CD11)},
-	{0x3FFD, LIT64(0xCCECAC08BF04565D)},
-	{0x3FFE, LIT64(0xA9C84A47A07F5638)},
-	{0x3FFD, LIT64(0xD24204872DD85160)},
-	{0x3FFE, LIT64(0xA80A80A80A80A80B)},
-	{0x3FFD, LIT64(0xD78949923BC3588A)},
-	{0x3FFE, LIT64(0xA655C4392D7B73A8)},
-	{0x3FFD, LIT64(0xDCC2C4B49887DACC)},
-	{0x3FFE, LIT64(0xA4A9CF1D96833751)},
-	{0x3FFD, LIT64(0xE1EEBD3E6D6A6B9E)},
-	{0x3FFE, LIT64(0xA3065E3FAE7CD0E0)},
-	{0x3FFD, LIT64(0xE70D785C2F9F5BDC)},
-	{0x3FFE, LIT64(0xA16B312EA8FC377D)},
-	{0x3FFD, LIT64(0xEC1F392C5179F283)},
-	{0x3FFE, LIT64(0x9FD809FD809FD80A)},
-	{0x3FFD, LIT64(0xF12440D3E36130E6)},
-	{0x3FFE, LIT64(0x9E4CAD23DD5F3A20)},
-	{0x3FFD, LIT64(0xF61CCE92346600BB)},
-	{0x3FFE, LIT64(0x9CC8E160C3FB19B9)},
-	{0x3FFD, LIT64(0xFB091FD38145630A)},
-	{0x3FFE, LIT64(0x9B4C6F9EF03A3CAA)},
-	{0x3FFD, LIT64(0xFFE97042BFA4C2AD)},
-	{0x3FFE, LIT64(0x99D722DABDE58F06)},
-	{0x3FFE, LIT64(0x825EFCED49369330)},
-	{0x3FFE, LIT64(0x9868C809868C8098)},
-	{0x3FFE, LIT64(0x84C37A7AB9A905C9)},
-	{0x3FFE, LIT64(0x97012E025C04B809)},
-	{0x3FFE, LIT64(0x87224C2E8E645FB7)},
-	{0x3FFE, LIT64(0x95A02568095A0257)},
-	{0x3FFE, LIT64(0x897B8CAC9F7DE298)},
-	{0x3FFE, LIT64(0x9445809445809446)},
-	{0x3FFE, LIT64(0x8BCF55DEC4CD05FE)},
-	{0x3FFE, LIT64(0x92F113840497889C)},
-	{0x3FFE, LIT64(0x8E1DC0FB89E125E5)},
-	{0x3FFE, LIT64(0x91A2B3C4D5E6F809)},
-	{0x3FFE, LIT64(0x9066E68C955B6C9B)},
-	{0x3FFE, LIT64(0x905A38633E06C43B)},
-	{0x3FFE, LIT64(0x92AADE74C7BE59E0)},
-	{0x3FFE, LIT64(0x8F1779D9FDC3A219)},
-	{0x3FFE, LIT64(0x94E9BFF615845643)},
-	{0x3FFE, LIT64(0x8DDA520237694809)},
-	{0x3FFE, LIT64(0x9723A1B720134203)},
-	{0x3FFE, LIT64(0x8CA29C046514E023)},
-	{0x3FFE, LIT64(0x995899C890EB8990)},
-	{0x3FFE, LIT64(0x8B70344A139BC75A)},
-	{0x3FFE, LIT64(0x9B88BDAA3A3DAE2F)},
-	{0x3FFE, LIT64(0x8A42F8705669DB46)},
-	{0x3FFE, LIT64(0x9DB4224FFFE1157C)},
-	{0x3FFE, LIT64(0x891AC73AE9819B50)},
-	{0x3FFE, LIT64(0x9FDADC268B7A12DA)},
-	{0x3FFE, LIT64(0x87F78087F78087F8)},
-	{0x3FFE, LIT64(0xA1FCFF17CE733BD4)},
-	{0x3FFE, LIT64(0x86D905447A34ACC6)},
-	{0x3FFE, LIT64(0xA41A9E8F5446FB9F)},
-	{0x3FFE, LIT64(0x85BF37612CEE3C9B)},
-	{0x3FFE, LIT64(0xA633CD7E6771CD8B)},
-	{0x3FFE, LIT64(0x84A9F9C8084A9F9D)},
-	{0x3FFE, LIT64(0xA8489E600B435A5E)},
-	{0x3FFE, LIT64(0x839930523FBE3368)},
-	{0x3FFE, LIT64(0xAA59233CCCA4BD49)},
-	{0x3FFE, LIT64(0x828CBFBEB9A020A3)},
-	{0x3FFE, LIT64(0xAC656DAE6BCC4985)},
-	{0x3FFE, LIT64(0x81848DA8FAF0D277)},
-	{0x3FFE, LIT64(0xAE6D8EE360BB2468)},
-	{0x3FFE, LIT64(0x8080808080808081)},
-	{0x3FFE, LIT64(0xB07197A23C46C654)}
-};
-
-
-static const floatx80 pi_tbl[65] = {
-	{0xC004, LIT64(0xC90FDAA22168C235)},
-	{0xC004, LIT64(0xC2C75BCD105D7C23)},
-	{0xC004, LIT64(0xBC7EDCF7FF523611)},
-	{0xC004, LIT64(0xB6365E22EE46F000)},
-	{0xC004, LIT64(0xAFEDDF4DDD3BA9EE)},
-	{0xC004, LIT64(0xA9A56078CC3063DD)},
-	{0xC004, LIT64(0xA35CE1A3BB251DCB)},
-	{0xC004, LIT64(0x9D1462CEAA19D7B9)},
-	{0xC004, LIT64(0x96CBE3F9990E91A8)},
-	{0xC004, LIT64(0x9083652488034B96)},
-	{0xC004, LIT64(0x8A3AE64F76F80584)},
-	{0xC004, LIT64(0x83F2677A65ECBF73)},
-	{0xC003, LIT64(0xFB53D14AA9C2F2C2)},
-	{0xC003, LIT64(0xEEC2D3A087AC669F)},
-	{0xC003, LIT64(0xE231D5F66595DA7B)},
-	{0xC003, LIT64(0xD5A0D84C437F4E58)},
-	{0xC003, LIT64(0xC90FDAA22168C235)},
-	{0xC003, LIT64(0xBC7EDCF7FF523611)},
-	{0xC003, LIT64(0xAFEDDF4DDD3BA9EE)},
-	{0xC003, LIT64(0xA35CE1A3BB251DCB)},
-	{0xC003, LIT64(0x96CBE3F9990E91A8)},
-	{0xC003, LIT64(0x8A3AE64F76F80584)},
-	{0xC002, LIT64(0xFB53D14AA9C2F2C2)},
-	{0xC002, LIT64(0xE231D5F66595DA7B)},
-	{0xC002, LIT64(0xC90FDAA22168C235)},
-	{0xC002, LIT64(0xAFEDDF4DDD3BA9EE)},
-	{0xC002, LIT64(0x96CBE3F9990E91A8)},
-	{0xC001, LIT64(0xFB53D14AA9C2F2C2)},
-	{0xC001, LIT64(0xC90FDAA22168C235)},
-	{0xC001, LIT64(0x96CBE3F9990E91A8)},
-	{0xC000, LIT64(0xC90FDAA22168C235)},
-	{0xBFFF, LIT64(0xC90FDAA22168C235)},
-	{0x0000, LIT64(0x0000000000000000)},
-	{0x3FFF, LIT64(0xC90FDAA22168C235)},
-	{0x4000, LIT64(0xC90FDAA22168C235)},
-	{0x4001, LIT64(0x96CBE3F9990E91A8)},
-	{0x4001, LIT64(0xC90FDAA22168C235)},
-	{0x4001, LIT64(0xFB53D14AA9C2F2C2)},
-	{0x4002, LIT64(0x96CBE3F9990E91A8)},
-	{0x4002, LIT64(0xAFEDDF4DDD3BA9EE)},
-	{0x4002, LIT64(0xC90FDAA22168C235)},
-	{0x4002, LIT64(0xE231D5F66595DA7B)},
-	{0x4002, LIT64(0xFB53D14AA9C2F2C2)},
-	{0x4003, LIT64(0x8A3AE64F76F80584)},
-	{0x4003, LIT64(0x96CBE3F9990E91A8)},
-	{0x4003, LIT64(0xA35CE1A3BB251DCB)},
-	{0x4003, LIT64(0xAFEDDF4DDD3BA9EE)},
-	{0x4003, LIT64(0xBC7EDCF7FF523611)},
-	{0x4003, LIT64(0xC90FDAA22168C235)},
-	{0x4003, LIT64(0xD5A0D84C437F4E58)},
-	{0x4003, LIT64(0xE231D5F66595DA7B)},
-	{0x4003, LIT64(0xEEC2D3A087AC669F)},
-	{0x4003, LIT64(0xFB53D14AA9C2F2C2)},
-	{0x4004, LIT64(0x83F2677A65ECBF73)},
-	{0x4004, LIT64(0x8A3AE64F76F80584)},
-	{0x4004, LIT64(0x9083652488034B96)},
-	{0x4004, LIT64(0x96CBE3F9990E91A8)},
-	{0x4004, LIT64(0x9D1462CEAA19D7B9)},
-	{0x4004, LIT64(0xA35CE1A3BB251DCB)},
-	{0x4004, LIT64(0xA9A56078CC3063DD)},
-	{0x4004, LIT64(0xAFEDDF4DDD3BA9EE)},
-	{0x4004, LIT64(0xB6365E22EE46F000)},
-	{0x4004, LIT64(0xBC7EDCF7FF523611)},
-	{0x4004, LIT64(0xC2C75BCD105D7C23)},
-	{0x4004, LIT64(0xC90FDAA22168C235)}
-};
-
-
-static const float32 pi_tbl2[65] = {
-	0x21800000, 0xA0D00000, 0xA1E80000, 0x21480000,
-	0xA1200000, 0x21FC0000, 0x21100000, 0xA1580000,
-	0x21E00000, 0x20B00000, 0xA1880000, 0x21C40000,
-	0x20000000, 0x21380000, 0xA1300000, 0x9FC00000,
-	0x21000000, 0xA1680000, 0xA0A00000, 0x20900000,
-	0x21600000, 0xA1080000, 0x1F800000, 0xA0B00000,
-	0x20800000, 0xA0200000, 0x20E00000, 0x1F000000,
-	0x20000000, 0x20600000, 0x1F800000, 0x1F000000,
-	0x00000000,
-	0x9F000000, 0x9F800000, 0xA0600000, 0xA0000000,
-	0x9F000000, 0xA0E00000, 0x20200000, 0xA0800000,
-	0x20B00000, 0x9F800000, 0x21080000, 0xA1600000,
-	0xA0900000, 0x20A00000, 0x21680000, 0xA1000000,
-	0x1FC00000, 0x21300000, 0xA1380000, 0xA0000000,
-	0xA1C40000, 0x21880000, 0xA0B00000, 0xA1E00000,
-	0x21580000, 0xA1100000, 0xA1FC0000, 0x21200000,
-	0xA1480000, 0x21E80000, 0x20D00000, 0xA1800000
-};