diff --git a/Makefile b/Makefile index d57b2675..55820d80 100644 --- a/Makefile +++ b/Makefile @@ -254,11 +254,6 @@ OBJS = \ src/filesys.o \ src/flashrom.o \ src/fpp.o \ - src/fpp_native.o \ - src/fpp_softfloat.o \ - src/softfloat/softfloat.o \ - src/softfloat/softfloat_decimal.o \ - src/softfloat/softfloat_fpsp.o \ src/fsdb.o \ src/fsdb_unix.o \ src/fsusage.o \ diff --git a/VisualGDB/Amiberry-sdl1/Amiberry-sdl1.vcxproj b/VisualGDB/Amiberry-sdl1/Amiberry-sdl1.vcxproj index 87540838..7927d4cb 100644 --- a/VisualGDB/Amiberry-sdl1/Amiberry-sdl1.vcxproj +++ b/VisualGDB/Amiberry-sdl1/Amiberry-sdl1.vcxproj @@ -192,10 +192,6 @@ - - - - @@ -295,7 +291,6 @@ - @@ -372,9 +367,6 @@ - - - diff --git a/VisualGDB/Amiberry-sdl1/Amiberry-sdl1.vcxproj.filters b/VisualGDB/Amiberry-sdl1/Amiberry-sdl1.vcxproj.filters index a695db3a..3b323e10 100644 --- a/VisualGDB/Amiberry-sdl1/Amiberry-sdl1.vcxproj.filters +++ b/VisualGDB/Amiberry-sdl1/Amiberry-sdl1.vcxproj.filters @@ -54,9 +54,6 @@ {d946fd2c-30b2-45d3-9999-ccc3749160b7} - - {628a02d1-51f3-4021-81e5-6103ddf96904} - {49dfa14b-d5bf-4aa3-a660-12f97ae62bdb} @@ -188,9 +185,6 @@ Source files - - Source files - Source files @@ -658,18 +652,6 @@ Source files\osdep\gui - - Source files\softfloat - - - Source files\softfloat - - - Source files\softfloat - - - Source files\softfloat - Source files\sounddep @@ -1007,15 +989,6 @@ Source files\osdep\gui - - Source files\softfloat - - - Source files\softfloat - - - Source files\softfloat - Source files\sounddep diff --git a/VisualGDB/Amiberry/Amiberry-Debug-dispmanx.vgdbsettings b/VisualGDB/Amiberry/Amiberry-Debug-dispmanx.vgdbsettings index 6f745d46..b60ba156 100644 --- a/VisualGDB/Amiberry/Amiberry-Debug-dispmanx.vgdbsettings +++ b/VisualGDB/Amiberry/Amiberry-Debug-dispmanx.vgdbsettings @@ -58,7 +58,6 @@ Amiberry.vcxproj - 1 true diff --git a/VisualGDB/Amiberry/Amiberry-Debug.vgdbsettings.midwan.user b/VisualGDB/Amiberry/Amiberry-Debug.vgdbsettings.midwan.user index be8d5823..5a415744 100644 --- a/VisualGDB/Amiberry/Amiberry-Debug.vgdbsettings.midwan.user +++ b/VisualGDB/Amiberry/Amiberry-Debug.vgdbsettings.midwan.user @@ -11,6 +11,7 @@ false + false diff --git a/VisualGDB/Amiberry/Amiberry-Release.vgdbsettings b/VisualGDB/Amiberry/Amiberry-Release.vgdbsettings index bf520402..073ccb58 100644 --- a/VisualGDB/Amiberry/Amiberry-Release.vgdbsettings +++ b/VisualGDB/Amiberry/Amiberry-Release.vgdbsettings @@ -35,6 +35,7 @@ true true + false true @@ -57,7 +58,6 @@ Amiberry.vcxproj - 1 true diff --git a/VisualGDB/Amiberry/Amiberry.vcxproj b/VisualGDB/Amiberry/Amiberry.vcxproj index 7d3a93ce..7a68aba6 100644 --- a/VisualGDB/Amiberry/Amiberry.vcxproj +++ b/VisualGDB/Amiberry/Amiberry.vcxproj @@ -67,7 +67,7 @@ ;%(Link.AdditionalLinkerInputs) =/usr/local/lib;../../src/guisan/lib;%(Link.LibrarySearchDirectories) - SDL2;pthread;z;png;rt;xml2;FLAC;mpg123;dl;mpeg2convert;mpeg2;SDL2_image;SDL2_ttf;guisan;m;%(Link.AdditionalLibraryNames) + SDL2;pthread;z;png;rt;xml2;FLAC;mpg123;dl;mpeg2convert;mpeg2;SDL2_image;SDL2_ttf;guisan;%(Link.AdditionalLibraryNames) @@ -90,7 +90,7 @@ GNUPP14 =/usr/local/include/SDL2;=/usr/include/libxml2;../../src;../../src/osdep;../../src/threaddep;../../src/include;../../src/guisan/include;../../src/archivers;%(ClCompile.AdditionalIncludeDirectories) - NDEBUG=1;RELEASE=1;ARMV6T2;USE_ARMNEON;_REENTRANT;AMIBERRY;CPU_arm;ARMV6_ASSEMBLY;USE_SDL2;%(ClCompile.PreprocessorDefinitions) + NDEBUG=1;RELEASE=1;ARMV6T2;USE_ARMNEON;_REENTRANT;AMIBERRY;CPU_arm;ARMV6_ASSEMBLY;USE_SDL2;USE_RENDER_THREAD;%(ClCompile.PreprocessorDefinitions) -march=armv8-a -mfpu=neon-fp-armv8 -mfloat-abi=hard %(AdditionalOptions) @@ -207,8 +207,6 @@ - - @@ -284,9 +282,6 @@ - - - @@ -434,10 +429,6 @@ - - - - diff --git a/VisualGDB/Amiberry/Amiberry.vcxproj.filters b/VisualGDB/Amiberry/Amiberry.vcxproj.filters index b470a3d5..affb2914 100644 --- a/VisualGDB/Amiberry/Amiberry.vcxproj.filters +++ b/VisualGDB/Amiberry/Amiberry.vcxproj.filters @@ -18,9 +18,6 @@ {406f7c18-2b0e-4564-8646-fdaef3089f65} - - {6e21b349-366f-4684-bb77-ead2ccf9c8f4} - {29512242-0e9f-4bfa-b302-f46f792e55cd} @@ -196,12 +193,6 @@ Source files - - Source files - - - Source files - Source files @@ -283,15 +274,6 @@ Source files\sounddep - - Source files\softfloat - - - Source files\softfloat - - - Source files\softfloat - Source files\osdep @@ -633,18 +615,6 @@ Source files\sounddep - - Source files\softfloat - - - Source files\softfloat - - - Source files\softfloat - - - Source files\softfloat - Source files\osdep diff --git a/VisualGDB/genlinetoscr/genlinetoscr-Debug.vgdbsettings b/VisualGDB/genlinetoscr/genlinetoscr-Debug.vgdbsettings index cc9c078f..3ea8cd2e 100644 --- a/VisualGDB/genlinetoscr/genlinetoscr-Debug.vgdbsettings +++ b/VisualGDB/genlinetoscr/genlinetoscr-Debug.vgdbsettings @@ -60,7 +60,6 @@ C:\SysGCC\raspberry\bin;%PATH% - 1 true diff --git a/VisualGDB/genlinetoscr/genlinetoscr-Release.vgdbsettings b/VisualGDB/genlinetoscr/genlinetoscr-Release.vgdbsettings index 608955e8..e1158903 100644 --- a/VisualGDB/genlinetoscr/genlinetoscr-Release.vgdbsettings +++ b/VisualGDB/genlinetoscr/genlinetoscr-Release.vgdbsettings @@ -60,7 +60,6 @@ C:\SysGCC\raspberry\bin;%PATH% - 1 true diff --git a/VisualGDB/genlinetoscr/genlinetoscr.vcxproj b/VisualGDB/genlinetoscr/genlinetoscr.vcxproj index 472bce97..37db1d85 100644 --- a/VisualGDB/genlinetoscr/genlinetoscr.vcxproj +++ b/VisualGDB/genlinetoscr/genlinetoscr.vcxproj @@ -35,26 +35,26 @@ GNUPP14 - C:\SysGCC\raspberry\lib\gcc\arm-linux-gnueabihf\4.9\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\include\c++\4.9;C:\SysGCC\raspberry\arm-linux-gnueabihf\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\sysroot\usr\include;../../src/include;../../src;../../src/osdep;%(ClCompile.AdditionalIncludeDirectories) + C:\SysGCC\raspberry\lib\gcc\arm-linux-gnueabihf\4.9\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\include\c++\4.9;C:\SysGCC\raspberry\arm-linux-gnueabihf\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\sysroot\usr\include;../../src/include;../../src;../../src/osdep;=/usr/local/include/SDL2;%(ClCompile.AdditionalIncludeDirectories) DEBUG=1;%(ClCompile.PreprocessorDefinitions) ;%(Link.AdditionalLinkerInputs) - ;%(Link.LibrarySearchDirectories) - ;%(Link.AdditionalLibraryNames) + =/usr/local/lib;%(Link.LibrarySearchDirectories) + SDL2;%(Link.AdditionalLibraryNames) GNUPP14 - C:\SysGCC\raspberry\lib\gcc\arm-linux-gnueabihf\4.9\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\include\c++\4.9;C:\SysGCC\raspberry\arm-linux-gnueabihf\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\sysroot\usr\include;../../src/include;../../src;../../src/osdep;%(ClCompile.AdditionalIncludeDirectories) + C:\SysGCC\raspberry\lib\gcc\arm-linux-gnueabihf\4.9\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\include\c++\4.9;C:\SysGCC\raspberry\arm-linux-gnueabihf\include;C:\SysGCC\raspberry\arm-linux-gnueabihf\sysroot\usr\include;../../src/include;../../src;../../src/osdep;=/usr/local/include/SDL2;%(ClCompile.AdditionalIncludeDirectories) NDEBUG=1;RELEASE=1;%(ClCompile.PreprocessorDefinitions) ;%(Link.AdditionalLinkerInputs) - ;%(Link.LibrarySearchDirectories) - ;%(Link.AdditionalLibraryNames) + =/usr/local/lib;%(Link.LibrarySearchDirectories) + SDL2;%(Link.AdditionalLibraryNames) diff --git a/src/cfgfile.cpp b/src/cfgfile.cpp index 08d2469f..bacd0935 100644 --- a/src/cfgfile.cpp +++ b/src/cfgfile.cpp @@ -197,13 +197,17 @@ static const TCHAR* obsolete[] = { _T("avoid_vid"), _T("avoid_dga"), _T("z3chipmem_size"), _T("state_replay_buffer"), _T("state_replay"), _T("z3realmapping"), _T("force_0x10000000_z3"), _T("fpu_arithmetic_exceptions"), + _T("gfx_filter_vert_zoom"),_T("gfx_filter_horiz_zoom"), _T("gfx_filter_vert_zoom_mult"), _T("gfx_filter_horiz_zoom_mult"), _T("gfx_filter_vert_offset"), _T("gfx_filter_horiz_offset"), + _T("pcibridge_rom_file"), _T("pcibridge_rom_options"), + _T("cpuboard_ext_rom_file"), _T("uaeboard_mode"), + _T("comp_oldsegv"), _T("comp_midopt"), _T("comp_lowopt"), @@ -1433,8 +1437,10 @@ void cfgfile_save_options(struct zfile* f, struct uae_prefs* p, int type) cfgfile_dwrite_bool(f, _T("fpu_no_unimplemented"), p->fpu_no_unimplemented); cfgfile_write_bool(f, _T("fpu_strict"), p->fpu_strict); - cfgfile_dwrite_bool(f, _T("fpu_softfloat"), p->fpu_softfloat); +#ifdef USE_JIT_FPU + cfgfile_write_bool(f, _T("compfpu"), p->compfpu); +#endif cfgfile_write(f, _T("cachesize"), _T("%d"), p->cachesize); cfg_write(_T("; "), f); @@ -3566,11 +3572,14 @@ static int cfgfile_parse_hardware(struct uae_prefs* p, const TCHAR* option, TCHA || cfgfile_yesno(option, value, _T("ksmirror_a8"), &p->cs_ksmirror_a8) || cfgfile_yesno(option, value, _T("cia_todbug"), &p->cs_ciatodbug) || cfgfile_yesno(option, value, _T("z3_autoconfig"), &p->cs_z3autoconfig) + || cfgfile_yesno(option, value, _T("ntsc"), &p->ntscmode) || cfgfile_yesno(option, value, _T("cpu_compatible"), &p->cpu_compatible) || cfgfile_yesno(option, value, _T("cpu_24bit_addressing"), &p->address_space_24) || cfgfile_yesno(option, value, _T("fpu_strict"), &p->fpu_strict) - || cfgfile_yesno(option, value, _T("fpu_softfloat"), &p->fpu_softfloat) +#ifdef USE_JIT_FPU + || cfgfile_yesno(option, value, _T("compfpu"), &p->compfpu) +#endif || cfgfile_yesno(option, value, _T("floppy_write_protect"), &p->floppy_read_only) || cfgfile_yesno(option, value, _T("harddrive_write_protect"), &p->harddrive_read_only)) return 1; @@ -5165,6 +5174,11 @@ void default_prefs(struct uae_prefs* p, bool reset, int type) p->sound_filter_type = 0; p->sound_volume_cd = 20; +#ifdef USE_JIT_FPU + p->compfpu = 1; +#else + p->compfpu = 0; +#endif p->cachesize = 0; p->gfx_framerate = 1; @@ -5223,7 +5237,6 @@ void default_prefs(struct uae_prefs* p, bool reset, int type) p->cpu_model = 68000; p->fpu_no_unimplemented = false; p->fpu_strict = false; - p->fpu_softfloat = false; p->m68k_speed = 0; p->cpu_compatible = false; p->address_space_24 = true; diff --git a/src/custom.cpp b/src/custom.cpp index 7dfab9ed..860947cd 100644 --- a/src/custom.cpp +++ b/src/custom.cpp @@ -313,7 +313,6 @@ struct color_change *curr_color_changes = 0; struct decision line_decisions[2 * (MAXVPOS + 2) + 1]; struct draw_info curr_drawinfo[2 * (MAXVPOS + 2) + 1]; -#define COLOR_TABLE_SIZE (MAXVPOS + 2) * 2 struct color_entry curr_color_tables[COLOR_TABLE_SIZE]; static int next_sprite_entry = 0; diff --git a/src/fpp.cpp b/src/fpp.cpp index 88b6b40d..d94b29b6 100644 --- a/src/fpp.cpp +++ b/src/fpp.cpp @@ -10,9 +10,9 @@ #define __USE_ISOC9X /* We might be able to pick up a NaN */ -#include +#include #include -#include +#include #include "sysconfig.h" #include "sysdeps.h" @@ -27,92 +27,9 @@ #include "savestate.h" #include "cpu_prefetch.h" -#include "softfloat/softfloat.h" +void fpsr_set_exception(uae_u32 exception); -FPP_PRINT fpp_print; - -FPP_IS fpp_is_snan; -FPP_IS fpp_unset_snan; -FPP_IS fpp_is_nan; -FPP_IS fpp_is_infinity; -FPP_IS fpp_is_zero; -FPP_IS fpp_is_neg; -FPP_IS fpp_is_denormal; -FPP_IS fpp_is_unnormal; - -FPP_GET_STATUS fpp_get_status; -FPP_CLEAR_STATUS fpp_clear_status; -FPP_SET_MODE fpp_set_mode; - -FPP_FROM_NATIVE fpp_from_native; -FPP_TO_NATIVE fpp_to_native; - -FPP_TO_INT fpp_to_int; -FPP_FROM_INT fpp_from_int; - -FPP_PACK fpp_to_pack; -FPP_PACK fpp_from_pack; - -FPP_TO_SINGLE fpp_to_single; -FPP_FROM_SINGLE fpp_from_single; -FPP_TO_DOUBLE fpp_to_double; -FPP_FROM_DOUBLE fpp_from_double; -FPP_TO_EXTEN fpp_to_exten; -FPP_FROM_EXTEN fpp_from_exten; -FPP_TO_EXTEN fpp_to_exten_fmovem; -FPP_FROM_EXTEN fpp_from_exten_fmovem; - -FPP_A fpp_normalize; -FPP_DENORMALIZE fpp_denormalize; -FPP_A fpp_get_internal_overflow; -FPP_A fpp_get_internal_underflow; -FPP_A fpp_get_internal_round_all; -FPP_A fpp_get_internal_round; -FPP_A fpp_get_internal_round_exten; -FPP_A fpp_get_internal; -FPP_GET32 fpp_get_internal_grs; - -FPP_A fpp_round_single; -FPP_A fpp_round_double; -FPP_A fpp_round32; -FPP_A fpp_round64; -FPP_AB fpp_int; -FPP_AB fpp_sinh; -FPP_AB fpp_intrz; -FPP_ABP fpp_sqrt; -FPP_AB fpp_lognp1; -FPP_AB fpp_etoxm1; -FPP_AB fpp_tanh; -FPP_AB fpp_atan; -FPP_AB fpp_atanh; -FPP_AB fpp_sin; -FPP_AB fpp_asin; -FPP_AB fpp_tan; -FPP_AB fpp_etox; -FPP_AB fpp_twotox; -FPP_AB fpp_tentox; -FPP_AB fpp_logn; -FPP_AB fpp_log10; -FPP_AB fpp_log2; -FPP_ABP fpp_abs; -FPP_AB fpp_cosh; -FPP_ABP fpp_neg; -FPP_AB fpp_acos; -FPP_AB fpp_cos; -FPP_AB fpp_getexp; -FPP_AB fpp_getman; -FPP_ABP fpp_div; -FPP_ABQS fpp_mod; -FPP_ABP fpp_add; -FPP_ABP fpp_mul; -FPP_ABQS fpp_rem; -FPP_AB fpp_scale; -FPP_ABP fpp_sub; -FPP_AB fpp_sgldiv; -FPP_AB fpp_sglmul; -FPP_AB fpp_cmp; -FPP_AB fpp_tst; -FPP_ABP fpp_move; +#include "fpp_native.cpp" #define DEBUG_FPP 0 #define EXCEPTION_FPP 0 @@ -313,10 +230,6 @@ static uae_u32 get_ftag(fpdata *src, int size) { if (fpp_is_zero(src)) { return 1; // ZERO - } else if (fpp_is_unnormal(src) || fpp_is_denormal(src)) { - if (size == 1 || size == 5) - return 5; // Single/double DENORMAL - return 4; // Extended DENORMAL or UNNORMAL } else if (fpp_is_nan(src)) { return 3; // NAN } else if (fpp_is_infinity(src)) { @@ -332,16 +245,6 @@ STATIC_INLINE bool fp_is_dyadic(uae_u16 extra) static bool fp_exception_pending(bool pre) { - // first check for pending arithmetic exceptions - if (currprefs.fpu_softfloat) { - if (regs.fp_exp_pend) { - regs.fpu_exp_pre = pre; - Exception(regs.fp_exp_pend); - if (currprefs.fpu_model != 68882) - regs.fp_exp_pend = 0; - return true; - } - } // no arithmetic exceptions pending, check for unimplemented datatype if (regs.fp_unimp_pend) { regs.fpu_exp_pre = pre; @@ -381,136 +284,11 @@ static uae_u32 fpsr_get_vector(uae_u32 exception) return 0; } -static void fpsr_check_arithmetic_exception(uae_u32 mask, fpdata *src, uae_u32 opcode, uae_u16 extra, uae_u32 ea) -{ - if (!currprefs.fpu_softfloat) - return; - - bool nonmaskable; - uae_u32 exception; - // Any exception status bit and matching exception enable bits set? - exception = regs.fpsr & regs.fpcr & 0xff00; - // Add 68040/68060 nonmaskable exceptions - if (currprefs.cpu_model >= 68040 && currprefs.fpu_model) - exception |= regs.fpsr & (FPSR_OVFL | FPSR_UNFL | mask); - - if (exception) { - regs.fp_exp_pend = fpsr_get_vector(exception); - nonmaskable = (regs.fp_exp_pend != fpsr_get_vector(regs.fpsr & regs.fpcr)); - - if (!currprefs.fpu_softfloat) { - // log message and exit - regs.fp_exp_pend = 0; - return; - } - - regs.fp_opword = opcode; - regs.fp_ea = ea; - - // data for FSAVE stack frame - fpdata eo; - uae_u32 opclass = (extra >> 13) & 7; - - reset_fsave_data(); - - if (currprefs.fpu_model == 68881 || currprefs.fpu_model == 68882) { - // fsave data for 68881 and 68882 - - if (opclass == 3) { // 011 - fsave_data.ccr = ((uae_u32)extra << 16) | extra; - } else { // 000 or 010 - fsave_data.ccr = ((uae_u32)(opcode | 0x0080) << 16) | extra; - } - if (regs.fp_exp_pend == 54 || regs.fp_exp_pend == 52 || regs.fp_exp_pend == 50) { // SNAN, OPERR, DZ - fpp_from_exten_fmovem(src, &fsave_data.eo[0], &fsave_data.eo[1], &fsave_data.eo[2]); - if (regs.fp_exp_pend == 52 && opclass == 3) { // OPERR from move to integer or packed - fsave_data.eo[0] &= 0x4fff0000; - fsave_data.eo[1] = fsave_data.eo[2] = 0; - } - } else if (regs.fp_exp_pend == 53) { // OVFL - fpp_get_internal_overflow(&eo); - fpp_from_exten_fmovem(&eo, &fsave_data.eo[0], &fsave_data.eo[1], &fsave_data.eo[2]); - } else if (regs.fp_exp_pend == 51) { // UNFL - fpp_get_internal_underflow(&eo); - fpp_from_exten_fmovem(&eo, &fsave_data.eo[0], &fsave_data.eo[1], &fsave_data.eo[2]); - } // else INEX1, INEX2: do nothing - - } else { - // fsave data for 68040 - regs.fpu_exp_state = 1; // 68040 UNIMP frame - - uae_u32 reg = (extra >> 7) & 7; - int size = (extra >> 10) & 7; - - fsave_data.fpiarcu = regs.fpiar; - - if (regs.fp_exp_pend == 54) { // SNAN (undocumented) - fsave_data.wbte15 = 1; - fsave_data.grs = 7; - } else { - fsave_data.grs = 1; - } - - if (opclass == 3) { // OPCLASS 011 - fsave_data.cmdreg1b = extra; - fsave_data.e1 = 1; - fsave_data.t = 1; - fsave_data.wbte15 = (regs.fp_exp_pend == 51 || regs.fp_exp_pend == 54) ? 1 : 0; // UNFL, SNAN - - if (fpp_is_snan(src)) { - fpp_unset_snan(src); - } - fpp_from_exten_fmovem(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]); - fsave_data.stag = get_ftag(src, -1); - } else { // OPCLASS 000 and 010 - fsave_data.cmdreg1b = extra; - fsave_data.e1 = 1; - fsave_data.wbte15 = (regs.fp_exp_pend == 54) ? 1 : 0; // SNAN (undocumented) - - if (regs.fp_exp_pend == 51 || regs.fp_exp_pend == 53 || regs.fp_exp_pend == 49) { // UNFL, OVFL, INEX - if ((extra & 0x30) == 0x20 || (extra & 0x3f) == 0x04) { // FADD, FSUB, FMUL, FDIV, FSQRT - regs.fpu_exp_state = 2; // 68040 BUSY frame - fsave_data.e3 = 1; - fsave_data.e1 = 0; - fsave_data.cmdreg3b = (extra & 0x3C3) | ((extra & 0x038)>>1) | ((extra & 0x004)<<3); - if (regs.fp_exp_pend == 51) { // UNFL - fpp_get_internal(&eo); - } else { // OVFL, INEX - fpp_get_internal_round(&eo); - } - fsave_data.grs = fpp_get_internal_grs(); - fpp_from_exten_fmovem(&eo, &fsave_data.wbt[0], &fsave_data.wbt[1], &fsave_data.wbt[2]); - fsave_data.wbte15 = (regs.fp_exp_pend == 51) ? 1 : 0; // UNFL - // src and dst is stored (undocumented) - fpp_from_exten_fmovem(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]); - fsave_data.stag = get_ftag(src, (opclass == 0) ? -1 : size); - if (fp_is_dyadic(extra)) { - fpp_from_exten_fmovem(®s.fp[reg], &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]); - fsave_data.dtag = get_ftag(®s.fp[reg], -1); - } - } else { // FMOVE to register, FABS, FNEG - fpp_get_internal_round_exten(&eo); - fsave_data.grs = fpp_get_internal_grs(); - fpp_from_exten_fmovem(&eo, &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]); - fpp_get_internal_round_all(&eo); // weird - fpp_from_exten_fmovem(&eo, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]); // undocumented - fsave_data.stag = get_ftag(src, (opclass == 0) ? -1 : size); - } - } else { // SNAN, OPERR, DZ - fpp_from_exten_fmovem(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]); - fsave_data.stag = get_ftag(src, (opclass == 0) ? -1 : size); - if (fp_is_dyadic(extra)) { - fpp_from_exten_fmovem(®s.fp[reg], &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]); - fsave_data.dtag = get_ftag(®s.fp[reg], -1); - } - } - } - } - } -} - static void fpsr_set_result(fpdata *result) { +#ifdef JIT + regs.fp_result = *result; +#endif // condition code byte regs.fpsr &= 0x00fffff8; // clear cc if (fpp_is_nan (result)) { @@ -527,18 +305,10 @@ static void fpsr_clear_status(void) { // clear exception status byte only regs.fpsr &= 0x0fff00f8; - - // clear external status - fpp_clear_status(); } static uae_u32 fpsr_make_status(void) { - uae_u32 exception; - - // get external status - fpp_get_status(®s.fpsr); - // update accrued exception byte if (regs.fpsr & (FPSR_BSUN | FPSR_SNAN | FPSR_OPERR)) regs.fpsr |= FPSR_AE_IOP; // IOP = BSUN || SNAN || OPERR @@ -551,15 +321,7 @@ static uae_u32 fpsr_make_status(void) if (regs.fpsr & (FPSR_OVFL | FPSR_INEX2 | FPSR_INEX1)) regs.fpsr |= FPSR_AE_INEX; // INEX = INEX1 || INEX2 || OVFL - if (!currprefs.fpu_softfloat) - return 0; - - // return exceptions that interrupt calculation - exception = regs.fpsr & regs.fpcr & (FPSR_SNAN | FPSR_OPERR | FPSR_DZ); - if (currprefs.cpu_model >= 68040 && currprefs.fpu_model) - exception |= regs.fpsr & (FPSR_OVFL | FPSR_UNFL); - - return exception; + return 0; } static int fpsr_set_bsun(void) @@ -567,15 +329,6 @@ static int fpsr_set_bsun(void) regs.fpsr |= FPSR_BSUN; regs.fpsr |= FPSR_AE_IOP; - if (regs.fpcr & FPSR_BSUN) { - // logging only so far - write_log (_T("FPU exception: BSUN! (FPSR: %08x, FPCR: %04x)\n"), regs.fpsr, regs.fpcr); - if (currprefs.fpu_softfloat) { - regs.fp_exp_pend = fpsr_get_vector(FPSR_BSUN); - fp_exception_pending(true); - return 1; - } - } return 0; } @@ -591,8 +344,22 @@ static void fpsr_get_quotient(uae_u64 *quot, uae_u8 *sign) *sign = (regs.fpsr & FPSR_QUOT_SIGN) ? 1 : 0; } -uae_u32 fpp_get_fpsr (void) +static uae_u32 fpp_get_fpsr (void) { +#ifdef JIT + if (currprefs.cachesize && currprefs.compfpu) { + regs.fpsr &= 0x00fffff8; // clear cc + if (fpp_is_nan (®s.fp_result)) { + regs.fpsr |= FPSR_CC_NAN; + } else if (fpp_is_zero(®s.fp_result)) { + regs.fpsr |= FPSR_CC_Z; + } else if (fpp_is_infinity (®s.fp_result)) { + regs.fpsr |= FPSR_CC_I; + } + if (fpp_is_neg(®s.fp_result)) + regs.fpsr |= FPSR_CC_N; + } +#endif return regs.fpsr; } @@ -619,9 +386,23 @@ static void fpset (fpdata *fpd, uae_s32 val) static void fpp_set_fpsr (uae_u32 val) { regs.fpsr = val; + +#ifdef JIT + // check comment in fpp_cond + if (currprefs.cachesize && currprefs.compfpu) { + if (val & 0x01000000) + fpnan(®s.fp_result); + else if (val & 0x04000000) + fpset(®s.fp_result, 0); + else if (val & 0x08000000) + fpset(®s.fp_result, -1); + else + fpset(®s.fp_result, 1); + } +#endif } -bool fpu_get_constant(fpdata *fpd, int cr) +static bool fpu_get_constant(fpdata *fpd, int cr) { uae_u32 f[3] = { 0, 0, 0 }; int entry = 0; @@ -739,16 +520,16 @@ bool fpu_get_constant(fpdata *fpd, int cr) } } } - fpp_to_exten_fmovem(fpd, f[0], f[1], f[2]); + fpp_to_exten(fpd, f[0], f[1], f[2]); if (prec == 1) fpp_round32(fpd); if (prec >= 2) fpp_round64(fpd); if (f1_adjust) { - fpp_from_exten_fmovem(fpd, &f[0], &f[1], &f[2]); + fpp_from_exten(fpd, &f[0], &f[1], &f[2]); f[1] += f1_adjust * 0x80; - fpp_to_exten_fmovem(fpd, f[0], f[1], f[2]); + fpp_to_exten(fpd, f[0], f[1], f[2]); } fpsr_set_result(fpd); @@ -767,7 +548,7 @@ bool fpu_get_constant(fpdata *fpd, int cr) f[2] += fpp_cr[entry].rndoff[mode]; } - fpp_to_exten_fmovem(fpd, f[0], f[1], f[2]); + fpp_to_exten(fpd, f[0], f[1], f[2]); if (prec == 1) fpp_round32(fpd); @@ -795,10 +576,10 @@ static void fp_unimp_instruction(uae_u16 opcode, uae_u16 extra, uae_u32 ea, uaec reset_fsave_data(); fsave_data.cmdreg3b = (extra & 0x3C3) | ((extra & 0x038) >> 1) | ((extra & 0x004) << 3); fsave_data.cmdreg1b = extra; - fpp_from_exten_fmovem(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]); + fpp_from_exten(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]); fsave_data.stag = get_ftag(src, size); if (reg >= 0) { - fpp_from_exten_fmovem(®s.fp[reg], &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]); + fpp_from_exten(®s.fp[reg], &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]); fsave_data.dtag = get_ftag(®s.fp[reg], -1); } } @@ -838,9 +619,9 @@ static void fp_unimp_datatype(uae_u16 opcode, uae_u16 extra, uae_u32 ea, uaecptr } if (opclass == 3) { // OPCLASS 011 fsave_data.t = 1; - fpp_from_exten_fmovem(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]); + fpp_from_exten(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]); fsave_data.stag = get_ftag(src, -1); - fpp_from_exten_fmovem(src, &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]); // undocumented + fpp_from_exten(src, &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]); // undocumented fsave_data.dtag = get_ftag(src, -1); // undocumented } else { // OPCLASS 000 and 010 if (packed) { @@ -850,13 +631,13 @@ static void fp_unimp_datatype(uae_u16 opcode, uae_u16 extra, uae_u32 ea, uaecptr fsave_data.et[2] = packed[2]; fsave_data.stag = 7; // undocumented } else { - fpp_from_exten_fmovem(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]); + fpp_from_exten(src, &fsave_data.et[0], &fsave_data.et[1], &fsave_data.et[2]); fsave_data.stag = get_ftag(src, (opclass == 0) ? -1 : size); if (fsave_data.stag == 5) { fsave_data.et[0] = (size == 1) ? 0x3f800000 : 0x3c000000; // exponent for denormalized single and double } if (fp_is_dyadic(extra)) { - fpp_from_exten_fmovem(®s.fp[reg], &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]); + fpp_from_exten(®s.fp[reg], &fsave_data.fpt[0], &fsave_data.fpt[1], &fsave_data.fpt[2]); fsave_data.dtag = get_ftag(®s.fp[reg], -1); } } @@ -1026,9 +807,7 @@ static bool fault_if_unimplemented_6888x (uae_u16 opcode, uae_u16 extra, uaecptr static bool fault_if_no_fpu_u (uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc) { - if (fault_if_no_fpu (opcode, extra, ea, oldpc)) - return true; - return false; + return fault_if_no_fpu (opcode, extra, ea, oldpc); } static bool fault_if_no_6888x (uae_u16 opcode, uae_u16 extra, uaecptr oldpc) @@ -1073,44 +852,6 @@ static void fpu_null (void) fpnan (®s.fp[i]); } -// 68040/060 does not support denormals -static bool normalize_or_fault_if_no_denormal_support(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *src) -{ - if (!currprefs.fpu_softfloat) - return false; - if (fpp_is_unnormal(src) || fpp_is_denormal(src)) { - if (currprefs.cpu_model >= 68040 && currprefs.fpu_model && currprefs.fpu_no_unimplemented) { - if (fpp_is_zero(src)) { - fpp_normalize(src); // 68040/060 can only fix unnormal zeros - } else { - fp_unimp_datatype(opcode, extra, ea, oldpc, src, NULL); - return true; - } - } else { - fpp_normalize(src); - } - } - return false; -} -static bool normalize_or_fault_if_no_denormal_support_dst(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *dst, fpdata *src) -{ - if (!currprefs.fpu_softfloat) - return false; - if (fpp_is_unnormal(dst) || fpp_is_denormal(dst)) { - if (currprefs.cpu_model >= 68040 && currprefs.fpu_model && currprefs.fpu_no_unimplemented) { - if (fpp_is_zero(dst)) { - fpp_normalize(dst); // 68040/060 can only fix unnormal zeros - } else { - fp_unimp_datatype(opcode, extra, ea, oldpc, src, NULL); - return true; - } - } else { - fpp_normalize(dst); - } - } - return false; -} - // 68040/060 does not support packed decimal format static bool fault_if_no_packed_support(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *src, uae_u32 *packed) { @@ -1119,20 +860,6 @@ static bool fault_if_no_packed_support(uae_u16 opcode, uae_u16 extra, uaecptr ea return true; } return false; - } - -// 68040 does not support move to integer format -static bool fault_if_68040_integer_nonmaskable(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *src) -{ - if (currprefs.cpu_model == 68040 && currprefs.fpu_model && currprefs.fpu_softfloat) { - fpsr_make_status(); - if (regs.fpsr & (FPSR_SNAN | FPSR_OPERR)) { - fpsr_check_arithmetic_exception(FPSR_SNAN | FPSR_OPERR, src, opcode, extra, ea); - fp_exception_pending(false); // post - return true; - } - } - return false; } static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr oldpc, uae_u32 *adp) @@ -1148,7 +875,6 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old if (fault_if_no_fpu (opcode, extra, 0, oldpc)) return -1; *src = regs.fp[(extra >> 10) & 7]; - normalize_or_fault_if_no_denormal_support(opcode, extra, 0, oldpc, src); return 1; } mode = (opcode >> 3) & 7; @@ -1172,7 +898,6 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old break; case 1: fpp_to_single (src, m68k_dreg (regs, reg)); - normalize_or_fault_if_no_denormal_support(opcode, extra, 0, oldpc, src); break; default: return 0; @@ -1257,7 +982,6 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old break; case 1: fpp_to_single (src, (doext ? exts[0] : x_cp_get_long (ad))); - normalize_or_fault_if_no_denormal_support(opcode, extra, adold, oldpc, src); break; case 2: { @@ -1268,7 +992,6 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old ad += 4; wrd3 = (doext ? exts[2] : x_cp_get_long (ad)); fpp_to_exten (src, wrd1, wrd2, wrd3); - normalize_or_fault_if_no_denormal_support(opcode, extra, adold, oldpc, src); } break; case 3: @@ -1282,7 +1005,6 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old if (fault_if_no_packed_support (opcode, extra, adold, oldpc, NULL, wrd)) return 1; fpp_to_pack (src, wrd, 0); - fpp_normalize(src); return 1; } break; @@ -1296,7 +1018,6 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old ad += 4; wrd2 = (doext ? exts[1] : x_cp_get_long (ad)); fpp_to_double (src, wrd1, wrd2); - normalize_or_fault_if_no_denormal_support(opcode, extra, adold, oldpc, src); } break; case 6: @@ -1331,31 +1052,17 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o switch (size) { case 6: - if (normalize_or_fault_if_no_denormal_support(opcode, extra, 0, oldpc, value)) - return 1; m68k_dreg (regs, reg) = (uae_u32)(((fpp_to_int (value, 0) & 0xff) | (m68k_dreg (regs, reg) & ~0xff))); - if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value)) - return -1; break; case 4: - if (normalize_or_fault_if_no_denormal_support(opcode, extra, 0, oldpc, value)) - return 1; m68k_dreg (regs, reg) = (uae_u32)(((fpp_to_int (value, 1) & 0xffff) | (m68k_dreg (regs, reg) & ~0xffff))); - if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value)) - return -1; break; case 0: - if (normalize_or_fault_if_no_denormal_support(opcode, extra, 0, oldpc, value)) - return 1; m68k_dreg (regs, reg) = (uae_u32)fpp_to_int (value, 2); - if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value)) - return -1; break; case 1: - if (normalize_or_fault_if_no_denormal_support(opcode, extra, 0, oldpc, value)) - return 1; m68k_dreg (regs, reg) = fpp_from_single (value); break; default: @@ -1410,21 +1117,13 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o switch (size) { case 0: - if (normalize_or_fault_if_no_denormal_support(opcode, extra, ad, oldpc, value)) - return 1; x_cp_put_long(ad, (uae_u32)fpp_to_int(value, 2)); - if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value)) - return -1; break; case 1: - if (normalize_or_fault_if_no_denormal_support(opcode, extra, ad, oldpc, value)) - return 1; x_cp_put_long(ad, fpp_from_single(value)); break; case 2: { - if (normalize_or_fault_if_no_denormal_support(opcode, extra, ad, oldpc, value)) - return 1; uae_u32 wrd1, wrd2, wrd3; fpp_from_exten(value, &wrd1, &wrd2, &wrd3); x_cp_put_long (ad, wrd1); @@ -1445,7 +1144,6 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o kfactor &= 127; if (kfactor & 64) kfactor |= ~63; - fpp_normalize(value); fpp_from_pack(value, wrd, kfactor); x_cp_put_long (ad, wrd[0]); ad += 4; @@ -1455,16 +1153,10 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o } break; case 4: - if (normalize_or_fault_if_no_denormal_support(opcode, extra, ad, oldpc, value)) - return 1; x_cp_put_word(ad, (uae_s16)fpp_to_int(value, 1)); - if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value)) - return -1; break; case 5: { - if (normalize_or_fault_if_no_denormal_support(opcode, extra, ad, oldpc, value)) - return 1; uae_u32 wrd1, wrd2; fpp_from_double(value, &wrd1, &wrd2); x_cp_put_long (ad, wrd1); @@ -1473,11 +1165,7 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o } break; case 6: - if (normalize_or_fault_if_no_denormal_support(opcode, extra, ad, oldpc, value)) - return 1; x_cp_put_byte(ad, (uae_s8)fpp_to_int(value, 0)); - if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value)) - return -1; break; default: return 0; @@ -1539,9 +1227,19 @@ int fpp_cond (int condition) { int NotANumber, N, Z; - NotANumber = (regs.fpsr & FPSR_CC_NAN) != 0; - N = (regs.fpsr & FPSR_CC_N) != 0; - Z = (regs.fpsr & FPSR_CC_Z) != 0; +#ifdef JIT + if (currprefs.cachesize && currprefs.compfpu) { + // JIT reads and writes regs.fpu_result + NotANumber = fpp_is_nan(®s.fp_result); + N = fpp_is_neg(®s.fp_result); + Z = fpp_is_zero(®s.fp_result); + } else +#endif + { + NotANumber = (regs.fpsr & FPSR_CC_NAN) != 0; + N = (regs.fpsr & FPSR_CC_N) != 0; + Z = (regs.fpsr & FPSR_CC_Z) != 0; + } if ((condition & 0x10) && NotANumber) { if (fpsr_set_bsun()) @@ -1994,14 +1692,12 @@ retry: if (cusavepc == 0xFE) { if (opclass == 0 || opclass == 2) { - fpp_to_exten_fmovem(&dst, fsave_data.fpt[0], fsave_data.fpt[1], fsave_data.fpt[2]); - fpp_denormalize(&dst, fpte15); - fpp_to_exten_fmovem(&src, fsave_data.et[0], fsave_data.et[1], fsave_data.et[2]); - fpp_denormalize(&src, et15); + fpp_to_exten(&dst, fsave_data.fpt[0], fsave_data.fpt[1], fsave_data.fpt[2]); + fpp_to_exten(&src, fsave_data.et[0], fsave_data.et[1], fsave_data.et[2]); #if EXCEPTION_FPP uae_u32 tmpsrc[3], tmpdst[3]; - fpp_from_exten_fmovem(&src, &tmpsrc[0], &tmpsrc[1], &tmpsrc[2]); - fpp_from_exten_fmovem(&dst, &tmpdst[0], &tmpdst[1], &tmpdst[2]); + fpp_from_exten(&src, &tmpsrc[0], &tmpsrc[1], &tmpsrc[2]); + fpp_from_exten(&dst, &tmpdst[0], &tmpdst[1], &tmpdst[2]); write_log (_T("FRESTORE src = %08X %08X %08X, dst = %08X %08X %08X, extra = %04X\n"), tmpsrc[0], tmpsrc[1], tmpsrc[2], tmpdst[0], tmpdst[1], tmpdst[2], cmdreg1b); #endif @@ -2011,8 +1707,6 @@ retry: if (v) regs.fp[(cmdreg1b>>7)&7] = dst; - - fpsr_check_arithmetic_exception(0, &src, regs.fp_opword, cmdreg1b, regs.fp_ea); } else { write_log (_T("FRESTORE resume of opclass %d instruction not supported %08x\n"), opclass, ad_orig); } @@ -2117,7 +1811,7 @@ static uaecptr fmovem2mem (uaecptr ad, uae_u32 list, int incr, int regdir) else reg = r; if (list & 0x80) { - fpp_from_exten_fmovem(®s.fp[reg], &wrd1, &wrd2, &wrd3); + fpp_from_exten(®s.fp[reg], &wrd1, &wrd2, &wrd3); if (incr < 0) ad -= 3 * 4; x_put_long(ad + 0, wrd1); @@ -2164,7 +1858,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra) switch (extra & 0x7f) { case 0x00: /* FMOVE */ - fpp_move(dst, src, 0); + fpp_move(dst, src, fpu_prec); break; case 0x40: /* FSMOVE */ fpp_move(dst, src, 32); @@ -2182,7 +1876,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra) fpp_intrz(dst, src); break; case 0x04: /* FSQRT */ - fpp_sqrt(dst, src, 0); + fpp_sqrt(dst, src, fpu_prec); break; case 0x41: /* FSSQRT */ fpp_sqrt(dst, src, 32); @@ -2233,7 +1927,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra) fpp_log2(dst, src); break; case 0x18: /* FABS */ - fpp_abs(dst, src, 0); + fpp_abs(dst, src, fpu_prec); break; case 0x58: /* FSABS */ fpp_abs(dst, src, 32); @@ -2245,7 +1939,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra) fpp_cosh(dst, src); break; case 0x1a: /* FNEG */ - fpp_neg(dst, src, 0); + fpp_neg(dst, src, fpu_prec); break; case 0x5a: /* FSNEG */ fpp_neg(dst, src, 32); @@ -2266,7 +1960,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra) fpp_getman(dst, src); break; case 0x20: /* FDIV */ - fpp_div(dst, src, 0); + fpp_div(dst, src, fpu_prec); break; case 0x60: /* FSDIV */ fpp_div(dst, src, 32); @@ -2280,7 +1974,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra) fpsr_set_quotient(q, s); break; case 0x22: /* FADD */ - fpp_add(dst, src, 0); + fpp_add(dst, src, fpu_prec); break; case 0x62: /* FSADD */ fpp_add(dst, src, 32); @@ -2289,7 +1983,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra) fpp_add(dst, src, 64); break; case 0x23: /* FMUL */ - fpp_mul(dst, src, 0); + fpp_mul(dst, src, fpu_prec); break; case 0x63: /* FSMUL */ fpp_mul(dst, src, 32); @@ -2312,7 +2006,7 @@ static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra) fpp_sglmul(dst, src); break; case 0x28: /* FSUB */ - fpp_sub(dst, src, 0); + fpp_sub(dst, src, fpu_prec); break; case 0x68: /* FSSUB */ fpp_sub(dst, src, 32); @@ -2390,7 +2084,6 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra) return; } fpsr_make_status(); - fpsr_check_arithmetic_exception(0, &src, opcode, extra, ad); fp_exception_pending(false); // post/mid instruction return; @@ -2598,7 +2291,6 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra) fpsr_clear_status(); fpu_get_constant(®s.fp[reg], extra & 0x7f); fpsr_make_status(); - fpsr_check_arithmetic_exception(0, &src, opcode, extra, ad); return; } @@ -2620,9 +2312,6 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra) dst = regs.fp[reg]; - if (fp_is_dyadic(extra)) - normalize_or_fault_if_no_denormal_support_dst(opcode, extra, ad, pc, &dst, &src); - // check for 680x0 unimplemented instruction if (fault_if_unimplemented_680x0 (opcode, extra, ad, pc, &src, reg)) return; @@ -2635,8 +2324,6 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra) v = fp_arithmetic(&src, &dst, extra); - fpsr_check_arithmetic_exception(0, &src, opcode, extra, ad); - if (v) regs.fp[reg] = dst; @@ -2654,35 +2341,8 @@ void fpuop_arithmetic (uae_u32 opcode, uae_u16 extra) fpuop_arithmetic2 (opcode, extra); } -void fpu_modechange(void) -{ - uae_u32 temp_ext[8][3]; - - if (currprefs.fpu_softfloat == changed_prefs.fpu_softfloat) - return; - currprefs.fpu_softfloat = changed_prefs.fpu_softfloat; - - for (int i = 0; i < 8; i++) { - fpp_from_exten_fmovem(®s.fp[i], &temp_ext[i][0], &temp_ext[i][1], &temp_ext[i][2]); - } - if (currprefs.fpu_softfloat) { - fp_init_softfloat(); - } else { - fp_init_native(); - } - for (int i = 0; i < 8; i++) { - fpp_to_exten_fmovem(®s.fp[i], temp_ext[i][0], temp_ext[i][1], temp_ext[i][2]); - } -} - void fpu_reset (void) { - if (currprefs.fpu_softfloat) { - fp_init_softfloat(); - } else { - fp_init_native(); - } - #if defined(CPU_i386) || defined(CPU_x86_64) init_fpucw_x87(); #endif @@ -2709,7 +2369,7 @@ uae_u8 *restore_fpu (uae_u8 *src) w1 = restore_u16 () << 16; w2 = restore_u32 (); w3 = restore_u32 (); - fpp_to_exten_fmovem(®s.fp[i], w1, w2, w3); + fpp_to_exten(®s.fp[i], w1, w2, w3); } regs.fpcr = restore_u32 (); regs.fpsr = restore_u32 (); @@ -2776,7 +2436,7 @@ uae_u8 *save_fpu (int *len, uae_u8 *dstptr) save_u32 (currprefs.fpu_model); save_u32 (0x80000000 | 0x20000000); for (i = 0; i < 8; i++) { - fpp_from_exten_fmovem(®s.fp[i], &w1, &w2, &w3); + fpp_from_exten(®s.fp[i], &w1, &w2, &w3); save_u16 (w1 >> 16); save_u32 (w2); save_u32 (w3); diff --git a/src/fpp_native.cpp b/src/fpp_native.cpp index 9d86f55e..d78540cc 100644 --- a/src/fpp_native.cpp +++ b/src/fpp_native.cpp @@ -6,30 +6,34 @@ * Copyright 1996 Herman ten Brugge * Modified 2005 Peter Keunecke * 68040+ exceptions and more by Toni Wilen +* +* This is the version for ARM devices. We have these restrictions: +* - all caclulations are done in double, not in extended format like in MC68881... +* - rounding is only needed by special opcodes (like FSMOVE or FSADD) or if FPCR is set to single */ #define __USE_ISOC9X /* We might be able to pick up a NaN */ -#include -#include -#include +#include +#include +#include #include "sysconfig.h" #include "sysdeps.h" #define USE_HOST_ROUNDING 1 -#define SOFTFLOAT_CONVERSIONS 0 #include "options.h" -#include "memory.h" +#include "include/memory.h" #include "newcpu.h" #include "fpp.h" -#include "uae/attributes.h" -#include "uae/vm.h" -#include "newcpu.h" + +#ifdef JIT +double fp_1e8 = 1.0e8; +#endif static uae_u32 dhex_nan[] ={0xffffffff, 0x7fffffff}; -static double *fp_nan = reinterpret_cast(dhex_nan); +static double *fp_nan = (double *)dhex_nan; static const double twoto32 = 4294967296.0; #define FPCR_ROUNDING_MODE 0x00000030 @@ -43,43 +47,41 @@ static const double twoto32 = 4294967296.0; #define FPCR_PRECISION_DOUBLE 0x00000080 #define FPCR_PRECISION_EXTENDED 0x00000000 -static struct float_status fs; static uae_u32 fpu_mode_control = 0; static int fpu_prec; -static int temp_prec; /* Functions for setting host/library modes and getting status */ -static void fp_set_mode(uae_u32 mode_control) +static void fpp_set_mode(uae_u32 mode_control) { - if (mode_control == fpu_mode_control) + if (mode_control == fpu_mode_control && !currprefs.compfpu) return; - switch(mode_control & FPCR_ROUNDING_PRECISION) { - case FPCR_PRECISION_EXTENDED: // X - fpu_prec = 80; - break; - case FPCR_PRECISION_SINGLE: // S - fpu_prec = 32; - break; - case FPCR_PRECISION_DOUBLE: // D - default: // undefined - fpu_prec = 64; - break; - } + switch (mode_control & FPCR_ROUNDING_PRECISION) { + case FPCR_PRECISION_EXTENDED: // X + fpu_prec = 80; + break; + case FPCR_PRECISION_SINGLE: // S + fpu_prec = 32; + break; + case FPCR_PRECISION_DOUBLE: // D + default: // undefined + fpu_prec = 64; + break; + } #if USE_HOST_ROUNDING if ((mode_control & FPCR_ROUNDING_MODE) != (fpu_mode_control & FPCR_ROUNDING_MODE)) { - switch(mode_control & FPCR_ROUNDING_MODE) { - case FPCR_ROUND_NEAR: // to neareset - fesetround(FE_TONEAREST); - break; - case FPCR_ROUND_ZERO: // to zero - fesetround(FE_TOWARDZERO); - break; - case FPCR_ROUND_MINF: // to minus - fesetround(FE_DOWNWARD); - break; - case FPCR_ROUND_PINF: // to plus - fesetround(FE_UPWARD); - break; + switch (mode_control & FPCR_ROUNDING_MODE) { + case FPCR_ROUND_NEAR: // to neareset + fesetround(FE_TONEAREST); + break; + case FPCR_ROUND_ZERO: // to zero + fesetround(FE_TOWARDZERO); + break; + case FPCR_ROUND_MINF: // to minus + fesetround(FE_DOWNWARD); + break; + case FPCR_ROUND_PINF: // to plus + fesetround(FE_UPWARD); + break; } } #endif @@ -87,209 +89,152 @@ static void fp_set_mode(uae_u32 mode_control) } -static void fp_get_status(uae_u32 *status) -{ - // These can't be properly emulated using host FPU. -} - -static void fp_clear_status(void) -{ -} - /* Functions for detecting float type */ -static bool fp_is_snan(fpdata *fpd) +static bool fpp_is_nan (fpdata *fpd) { - return false; /* FIXME: how to detect SNAN */ + return ::isnan(fpd->fp) != 0; } -static bool fp_unset_snan(fpdata *fpd) +static bool fpp_is_infinity (fpdata *fpd) { - /* FIXME: how to unset SNAN */ - return false; + return ::isinf(fpd->fp) != 0; } -static bool fp_is_nan (fpdata *fpd) +static bool fpp_is_zero(fpdata *fpd) { - return isnan(fpd->fp) != 0; + return fpd->fp == 0.0; } -static bool fp_is_infinity (fpdata *fpd) +static bool fpp_is_neg(fpdata *fpd) { - return isinf(fpd->fp) != 0; -} -static bool fp_is_zero(fpdata *fpd) -{ - return fpd->fp == 0.0; -} -static bool fp_is_neg(fpdata *fpd) -{ - return signbit(fpd->fp) != 0; -} -static bool fp_is_denormal(fpdata *fpd) -{ - return false; - //return (isnormal(fpd->fp) == 0); /* FIXME: how to differ denormal/unnormal? */ -} -static bool fp_is_unnormal(fpdata *fpd) -{ - return false; - //return (isnormal(fpd->fp) == 0); /* FIXME: how to differ denormal/unnormal? */ + return signbit(fpd->fp) != 0; } /* Functions for converting between float formats */ /* FIXME: how to preserve/fix denormals and unnormals? */ -static void fp_to_native(fptype *fp, fpdata *fpd) +static void fpp_to_native(fptype *fp, fpdata *fpd) { - *fp = fpd->fp; + *fp = fpd->fp; } -static void fp_from_native(fptype fp, fpdata *fpd) +static void fpp_from_native(fptype fp, fpdata *fpd) { - fpd->fp = fp; + fpd->fp = fp; } -static void fp_to_single(fpdata *fpd, uae_u32 wrd1) +static void fpp_to_single(fpdata *fpd, uae_u32 wrd1) { - union { - float f; - uae_u32 u; - } val{}; - - val.u = wrd1; - fpd->fp = fptype(val.f); + union { + float f; + uae_u32 u; + } val; + + val.u = wrd1; + fpd->fp = (fptype)val.f; } -static uae_u32 fp_from_single(fpdata *fpd) +static uae_u32 fpp_from_single(fpdata *fpd) { - union { - float f; - uae_u32 u; - } val{}; - - val.f = float(fpd->fp); - return val.u; + union { + float f; + uae_u32 u; + } val; + + val.f = (float)fpd->fp; + return val.u; } -static void fp_to_double(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2) +static void fpp_to_double(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2) { - union { - double d; - uae_u32 u[2]; - } val{}; - #ifdef WORDS_BIGENDIAN - val.u[0] = wrd1; - val.u[1] = wrd2; + ((uae_u32*)&(fpd->fp))[0] = wrd1; + ((uae_u32*)&(fpd->fp))[1] = wrd2; #else - val.u[1] = wrd1; - val.u[0] = wrd2; + ((uae_u32*)&(fpd->fp))[1] = wrd1; + ((uae_u32*)&(fpd->fp))[0] = wrd2; #endif - fpd->fp = fptype(val.d); } -static void fp_from_double(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2) +static void fpp_from_double(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2) { - union { - double d; - uae_u32 u[2]; - } val{}; - - val.d = double(fpd->fp); #ifdef WORDS_BIGENDIAN - *wrd1 = val.u[0]; - *wrd2 = val.u[1]; + *wrd1 = ((uae_u32*)&(fpd->fp))[0]; + *wrd2 = ((uae_u32*)&(fpd->fp))[1]; #else - *wrd1 = val.u[1]; - *wrd2 = val.u[0]; + *wrd1 = ((uae_u32*)&(fpd->fp))[1]; + *wrd2 = ((uae_u32*)&(fpd->fp))[0]; #endif } -static void fp_to_exten(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) +void fpp_to_exten(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) { -#if SOFTFLOAT_CONVERSIONS - floatx80 fx80; - fx80.high = wrd1 >> 16; - fx80.low = (((uae_u64)wrd2) << 32) | wrd3; - float64 f = floatx80_to_float64(fx80, &fs); - fp_to_double(fpd, f >> 32, (uae_u32)f); -#else - double frac; - if ((wrd1 & 0x7fff0000) == 0 && wrd2 == 0 && wrd3 == 0) { - fpd->fp = (wrd1 & 0x80000000) ? -0.0 : +0.0; - return; - } - frac = ((double)wrd2 + ((double)wrd3 / twoto32)) / 2147483648.0; - if (wrd1 & 0x80000000) - frac = -frac; - fpd->fp = ldexp (frac, ((wrd1 >> 16) & 0x7fff) - 16383); -#endif + double frac; + if ((wrd1 & 0x7fff0000) == 0 && wrd2 == 0 && wrd3 == 0) { + fpd->fp = (wrd1 & 0x80000000) ? -0.0 : +0.0; + return; + } + frac = ((double)wrd2 + ((double)wrd3 / twoto32)) / 2147483648.0; + if (wrd1 & 0x80000000) + frac = -frac; + fpd->fp = ldexp (frac, ((wrd1 >> 16) & 0x7fff) - 16383); } -static void fp_from_exten(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *wrd3) +static void fpp_from_exten(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *wrd3) { -#if SOFTFLOAT_CONVERSIONS - uae_u32 w1, w2; - fp_from_double(fpd, &w1, &w2); - floatx80 f = float64_to_floatx80(((uae_u64)w1 << 32) | w2, &fs); - *wrd1 = f.high << 16; - *wrd2 = f.low >> 32; - *wrd3 = (uae_u32)f.low; -#else - int expon; - double frac; - fptype v; - - v = fpd->fp; - if (v == 0.0) { - *wrd1 = signbit(v) ? 0x80000000 : 0; - *wrd2 = 0; - *wrd3 = 0; - return; - } - if (v < 0) { - *wrd1 = 0x80000000; - v = -v; - } else { - *wrd1 = 0; - } - frac = frexp (v, &expon); - frac += 0.5 / (twoto32 * twoto32); - if (frac >= 1.0) { - frac /= 2.0; - expon++; - } - *wrd1 |= (((expon + 16383 - 1) & 0x7fff) << 16); - *wrd2 = (uae_u32) (frac * twoto32); - *wrd3 = (uae_u32) ((frac * twoto32 - *wrd2) * twoto32); -#endif + int expon; + double frac; + fptype v; + + v = fpd->fp; + if (v == 0.0) { + *wrd1 = signbit(v) ? 0x80000000 : 0; + *wrd2 = 0; + *wrd3 = 0; + return; + } + if (v < 0) { + *wrd1 = 0x80000000; + v = -v; + } else { + *wrd1 = 0; + } + frac = frexp (v, &expon); + frac += 0.5 / (twoto32 * twoto32); + if (frac >= 1.0) { + frac /= 2.0; + expon++; + } + *wrd1 |= (((expon + 16383 - 1) & 0x7fff) << 16); + *wrd2 = (uae_u32) (frac * twoto32); + *wrd3 = (uae_u32) ((frac * twoto32 - *wrd2) * twoto32); } #if USE_HOST_ROUNDING == 0 -#define fp_round_to_minus_infinity(x) floor(x) -#define fp_round_to_plus_infinity(x) ceil(x) -#define fp_round_to_zero(x) ((x) >= 0.0 ? floor(x) : ceil(x)) -#define fp_round_to_nearest(x) round(x) +#define fpp_round_to_minus_infinity(x) floor(x) +#define fpp_round_to_plus_infinity(x) ceil(x) +#define fpp_round_to_zero(x) ((x) >= 0.0 ? floor(x) : ceil(x)) +#define fpp_round_to_nearest(x) round(x) #endif // USE_HOST_ROUNDING -static uae_s64 fp_to_int(fpdata *src, int size) +static uae_s64 fpp_to_int(fpdata *src, int size) { - static const fptype fxsizes[6] = - { - -128.0, 127.0, - -32768.0, 32767.0, - -2147483648.0, 2147483647.0 - }; + static const fptype fxsizes[6] = + { + -128.0, 127.0, + -32768.0, 32767.0, + -2147483648.0, 2147483647.0 + }; fptype fp = src->fp; - if (fp_is_nan(src)) { + if (fpp_is_nan(src)) { uae_u32 w1, w2, w3; - fp_from_exten(src, &w1, &w2, &w3); + fpp_from_exten(src, &w1, &w2, &w3); uae_s64 v = 0; // return mantissa switch (size) { case 0: - v = w2 >> 24; - break; + v = w2 >> 24; + break; case 1: - v = w2 >> 16; - break; + v = w2 >> 16; + break; case 2: - v = w2 >> 0; - break; + v = w2 >> 0; + break; } return v; } @@ -306,140 +251,77 @@ static uae_s64 fp_to_int(fpdata *src, int size) switch (regs.fpcr & 0x30) { case FPCR_ROUND_ZERO: - result = (int)fp_round_to_zero (fp); + result = (int)fpp_round_to_zero (fp); break; case FPCR_ROUND_MINF: - result = (int)fp_round_to_minus_infinity (fp); + result = (int)fpp_round_to_minus_infinity (fp); break; case FPCR_ROUND_NEAR: - result = fp_round_to_nearest (fp); + result = fpp_round_to_nearest (fp); break; case FPCR_ROUND_PINF: - result = (int)fp_round_to_plus_infinity (fp); + result = (int)fpp_round_to_plus_infinity (fp); break; } return result; #endif } -static void fp_from_int(fpdata *fpd, uae_s32 src) +static void fpp_from_int(fpdata *fpd, uae_s32 src) { - fpd->fp = (fptype) src; + fpd->fp = (fptype) src; } /* Functions for rounding */ // round to float with extended precision exponent -static void fp_round32(fpdata *fpd) +static void fpp_round32(fpdata *fpd) { - int expon; - float mant; - mant = (float)(frexpl(fpd->fp, &expon) * 2.0); - fpd->fp = ldexpl((fptype)mant, expon - 1); + int expon; + float mant; + mant = (float)(frexpl(fpd->fp, &expon) * 2.0); + fpd->fp = ldexpl((fptype)mant, expon - 1); } // round to double with extended precision exponent -static void fp_round64(fpdata *fpd) +static void fpp_round64(fpdata *fpd) { - int expon; - double mant; - mant = (double)(frexpl(fpd->fp, &expon) * 2.0); - fpd->fp = ldexpl((fptype)mant, expon - 1); +#if !defined(CPU_arm) + int expon; + double mant; + mant = (double)(frexpl(fpd->fp, &expon) * 2.0); + fpd->fp = ldexpl((fptype)mant, expon - 1); +#endif } // round to float -static void fp_round_single(fpdata *fpd) +static void fpp_round_single(fpdata *fpd) { - fpd->fp = (float) fpd->fp; + fpd->fp = (float) fpd->fp; } -// round to double -static void fp_round_double(fpdata *fpd) +static void fpp_round_prec(fpdata *fpd, int prec) { -} - -static const TCHAR *fp_print(fpdata *fpd, int mode) -{ - static TCHAR fsout[32]; - bool n; - - if (mode < 0) { - uae_u32 w1, w2, w3; - fp_from_exten(fpd, &w1, &w2, &w3); - _stprintf(fsout, _T("%04X-%08X-%08X"), w1 >> 16, w2, w3); - return fsout; - } - - n = signbit(fpd->fp) ? 1 : 0; - - if(isinf(fpd->fp)) { - _stprintf(fsout, _T("%c%s"), n ? '-' : '+', _T("inf")); - } else if(isnan(fpd->fp)) { - _stprintf(fsout, _T("%c%s"), n ? '-' : '+', _T("nan")); - } else { - if(n) - fpd->fp *= -1.0; - _stprintf(fsout, _T("#%e"), fpd->fp); - } - if (mode == 0 || mode > _tcslen(fsout)) - return fsout; - fsout[mode] = 0; - return fsout; -} - -static void fp_round_prec(fpdata *fpd, int prec) -{ - if (prec == 64) { - fp_round_double(fpd); - } else if (prec == 32) { - fp_round_single(fpd); + if (prec == 32) { + fpp_round_single(fpd); } } static void fp_round(fpdata *fpd) { - if (!currprefs.fpu_strict) - return; - fp_round_prec(fpd, fpu_prec); -} - - -static void fp_set_prec(int prec) -{ - temp_prec = prec; -} -static void fp_reset_prec(fpdata *fpd) -{ - int prec = temp_prec; - if (temp_prec == 0) - prec = fpu_prec; - fp_round_prec(fpd, prec); -} - -// Use default precision/rounding mode when calling C-library math functions. -static void fp_normal_prec(void) -{ - temp_prec = fpu_mode_control; - if ((fpu_mode_control & FPCR_ROUNDING_PRECISION) == FPCR_PRECISION_SINGLE || (fpu_mode_control & FPCR_ROUNDING_MODE) != FPCR_ROUND_NEAR) { - fp_set_mode(FPCR_PRECISION_DOUBLE | FPCR_ROUND_NEAR); - } -} - -static void fp_reset_normal_prec(void) -{ - fp_set_mode(temp_prec); + fpp_round_prec(fpd, fpu_prec); } /* Arithmetic functions */ -static void fp_move(fpdata *a, fpdata *b, int prec) +static void fpp_move(fpdata *a, fpdata *b, int prec) { - fp_set_prec(prec); a->fp = b->fp; - fp_reset_prec(a); + if (prec == 32) + fpp_round_single(a); } -static void fp_int(fpdata *a, fpdata *b) +static void fpp_int(fpdata *a, fpdata *b) { fptype bb = b->fp; #if USE_HOST_ROUNDING @@ -448,16 +330,16 @@ static void fp_int(fpdata *a, fpdata *b) switch (regs.fpcr & FPCR_ROUNDING_MODE) { case FPCR_ROUND_NEAR: - a->fp = fp_round_to_nearest(bb); + a->fp = fpp_round_to_nearest(bb); break; case FPCR_ROUND_ZERO: - a->fp = fp_round_to_zero(bb); + a->fp = fpp_round_to_zero(bb); break; case FPCR_ROUND_MINF: - a->fp = fp_round_to_minus_infinity(bb); + a->fp = fpp_round_to_minus_infinity(bb); break; case FPCR_ROUND_PINF: - a->fp = fp_round_to_plus_infinity(bb); + a->fp = fpp_round_to_plus_infinity(bb); break; default: /* never reached */ break; @@ -465,36 +347,32 @@ static void fp_int(fpdata *a, fpdata *b) #endif } -static void fp_getexp(fpdata *a, fpdata *b) +static void fpp_getexp(fpdata *a, fpdata *b) { int expon; - fp_normal_prec(); frexpl(b->fp, &expon); a->fp = (fptype) (expon - 1); - fp_reset_normal_prec(); fp_round(a); } -static void fp_getman(fpdata *a, fpdata *b) +static void fpp_getman(fpdata *a, fpdata *b) { int expon; - fp_normal_prec(); a->fp = frexpl(b->fp, &expon) * 2.0; - fp_reset_normal_prec(); fp_round(a); } -static void fp_div(fpdata *a, fpdata *b, int prec) +static void fpp_div(fpdata *a, fpdata *b, int prec) { - fp_set_prec(prec); a->fp = a->fp / b->fp; - fp_reset_prec(b); + if (prec == 32) + fpp_round_single(a); } -static void fp_mod(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s) +static void fpp_mod(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s) { fptype quot; #if USE_HOST_ROUNDING quot = truncl(a->fp / b->fp); #else - quot = fp_round_to_zero(a->fp / b->fp); + quot = fpp_round_to_zero(a->fp / b->fp); #endif if (quot < 0.0) { *s = 1; @@ -507,13 +385,13 @@ static void fp_mod(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s) fp_round(a); } -static void fp_rem(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s) +static void fpp_rem(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s) { fptype quot; #if USE_HOST_ROUNDING quot = roundl(a->fp / b->fp); #else - quot = fp_round_to_nearest(a->fp / b->fp); + quot = fpp_round_to_nearest(a->fp / b->fp); #endif if (quot < 0.0) { *s = 1; @@ -526,212 +404,170 @@ static void fp_rem(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s) fp_round(a); } -static void fp_scale(fpdata *a, fpdata *b) +static void fpp_scale(fpdata *a, fpdata *b) { - fp_normal_prec(); a->fp = ldexpl(a->fp, (int)b->fp); - fp_reset_normal_prec(); fp_round(a); } -static void fp_sinh(fpdata *a, fpdata *b) +static void fpp_sinh(fpdata *a, fpdata *b) { - fp_normal_prec(); a->fp = sinhl(b->fp); - fp_reset_normal_prec(); fp_round(a); } -static void fp_intrz(fpdata *a, fpdata *b) +static void fpp_intrz(fpdata *a, fpdata *b) { #if USE_HOST_ROUNDING a->fp = truncl(b->fp); #else - a->fp = fp_round_to_zero (b->fp); + a->fp = fpp_round_to_zero (b->fp); #endif fp_round(a); } -static void fp_sqrt(fpdata *a, fpdata *b, int prec) +static void fpp_sqrt(fpdata *a, fpdata *b, int prec) { - fp_set_prec(prec); a->fp = sqrtl(b->fp); - fp_reset_prec(b); + if (prec == 32) + fpp_round_single(a); } -static void fp_lognp1(fpdata *a, fpdata *b) +static void fpp_lognp1(fpdata *a, fpdata *b) { - fp_normal_prec(); a->fp = log1pl(b->fp); - fp_reset_normal_prec(); fp_round(a); } -static void fp_etoxm1(fpdata *a, fpdata *b) +static void fpp_etoxm1(fpdata *a, fpdata *b) { - fp_normal_prec(); a->fp = expm1l(b->fp); - fp_reset_normal_prec(); fp_round(a); } -static void fp_tanh(fpdata *a, fpdata *b) +static void fpp_tanh(fpdata *a, fpdata *b) { - fp_normal_prec(); a->fp = tanhl(b->fp); - fp_reset_normal_prec(); fp_round(a); } -static void fp_atan(fpdata *a, fpdata *b) +static void fpp_atan(fpdata *a, fpdata *b) { - fp_normal_prec(); a->fp = atanl(b->fp); - fp_reset_normal_prec(); fp_round(a); } -static void fp_atanh(fpdata *a, fpdata *b) +static void fpp_atanh(fpdata *a, fpdata *b) { - fp_normal_prec(); a->fp = atanhl(b->fp); - fp_reset_normal_prec(); fp_round(a); } -static void fp_sin(fpdata *a, fpdata *b) +static void fpp_sin(fpdata *a, fpdata *b) { - fp_normal_prec(); a->fp = sinl(b->fp); - fp_reset_normal_prec(); fp_round(a); } -static void fp_asin(fpdata *a, fpdata *b) +static void fpp_asin(fpdata *a, fpdata *b) { - fp_normal_prec(); a->fp = asinl(b->fp); - fp_reset_normal_prec(); fp_round(a); } -static void fp_tan(fpdata *a, fpdata *b) +static void fpp_tan(fpdata *a, fpdata *b) { - fp_normal_prec(); a->fp = tanl(b->fp); - fp_reset_normal_prec(); fp_round(a); } -static void fp_etox(fpdata *a, fpdata *b) +static void fpp_etox(fpdata *a, fpdata *b) { - fp_normal_prec(); a->fp = expl(b->fp); - fp_reset_normal_prec(); fp_round(a); } -static void fp_twotox(fpdata *a, fpdata *b) +static void fpp_twotox(fpdata *a, fpdata *b) { - fp_normal_prec(); a->fp = powl(2.0, b->fp); - fp_reset_normal_prec(); fp_round(a); } -static void fp_tentox(fpdata *a, fpdata *b) +static void fpp_tentox(fpdata *a, fpdata *b) { - fp_normal_prec(); a->fp = powl(10.0, b->fp); - fp_reset_normal_prec(); fp_round(a); } -static void fp_logn(fpdata *a, fpdata *b) +static void fpp_logn(fpdata *a, fpdata *b) { - fp_normal_prec(); a->fp = logl(b->fp); - fp_reset_normal_prec(); fp_round(a); } -static void fp_log10(fpdata *a, fpdata *b) +static void fpp_log10(fpdata *a, fpdata *b) { - fp_normal_prec(); a->fp = log10l(b->fp); - fp_reset_normal_prec(); fp_round(a); } -static void fp_log2(fpdata *a, fpdata *b) +static void fpp_log2(fpdata *a, fpdata *b) { - fp_normal_prec(); a->fp = log2l(b->fp); - fp_reset_normal_prec(); fp_round(a); } -static void fp_abs(fpdata *a, fpdata *b, int prec) +static void fpp_abs(fpdata *a, fpdata *b, int prec) { - fp_set_prec(prec); a->fp = b->fp < 0.0 ? -b->fp : b->fp; - fp_reset_prec(a); + if (prec == 32) + fpp_round_single(a); } -static void fp_cosh(fpdata *a, fpdata *b) +static void fpp_cosh(fpdata *a, fpdata *b) { - fp_normal_prec(); a->fp = coshl(b->fp); - fp_reset_normal_prec(); fp_round(a); } -static void fp_neg(fpdata *a, fpdata *b, int prec) +static void fpp_neg(fpdata *a, fpdata *b, int prec) { - fp_set_prec(prec); a->fp = -b->fp; - fp_reset_prec(a); + if (prec == 32) + fpp_round_single(a); } -static void fp_acos(fpdata *a, fpdata *b) +static void fpp_acos(fpdata *a, fpdata *b) { - fp_normal_prec(); a->fp = acosl(b->fp); - fp_reset_normal_prec(); fp_round(a); } -static void fp_cos(fpdata *a, fpdata *b) +static void fpp_cos(fpdata *a, fpdata *b) { - fp_normal_prec(); a->fp = cosl(b->fp); - fp_reset_normal_prec(); fp_round(a); } -static void fp_sub(fpdata *a, fpdata *b, int prec) +static void fpp_sub(fpdata *a, fpdata *b, int prec) { - fp_set_prec(prec); a->fp = a->fp - b->fp; - fp_reset_prec(a); + if (prec == 32) + fpp_round_single(a); } -static void fp_add(fpdata *a, fpdata *b, int prec) +static void fpp_add(fpdata *a, fpdata *b, int prec) { - fp_set_prec(prec); a->fp = a->fp + b->fp; - fp_reset_prec(a); + if (prec == 32) + fpp_round_single(a); } -static void fp_mul(fpdata *a, fpdata *b, int prec) +static void fpp_mul(fpdata *a, fpdata *b, int prec) { - fp_set_prec(prec); a->fp = a->fp * b->fp; - fp_reset_prec(a); + if (prec == 32) + fpp_round_single(a); } -static void fp_sglmul(fpdata *a, fpdata *b) +static void fpp_sglmul(fpdata *a, fpdata *b) { - fptype z; - float mant; - int expon; - /* FIXME: truncate mantissa of a and b to single precision */ - z = a->fp * b->fp; + fptype z; + float mant; + int expon; + /* FIXME: truncate mantissa of a and b to single precision */ + z = a->fp * b->fp; - mant = (float)(frexpl(z, &expon) * 2.0); - a->fp = ldexpl((fptype)mant, expon - 1); + mant = (float)(frexpl(z, &expon) * 2.0); + a->fp = ldexpl((fptype)mant, expon - 1); } -static void fp_sgldiv(fpdata *a, fpdata *b) +static void fpp_sgldiv(fpdata *a, fpdata *b) { - fptype z; - float mant; - int expon; - z = a->fp / b->fp; - - mant = (float)(frexpl(z, &expon) * 2.0); - a->fp = ldexpl((fptype)mant, expon - 1); + fptype z; + float mant; + int expon; + z = a->fp / b->fp; + + mant = (float)(frexpl(z, &expon) * 2.0); + a->fp = ldexpl((fptype)mant, expon - 1); } -static void fp_normalize(fpdata *a) -{ -} - -static void fp_cmp(fpdata *a, fpdata *b) +static void fpp_cmp(fpdata *a, fpdata *b) { fptype v = 1.0; if (currprefs.fpu_strict) { @@ -775,57 +611,19 @@ static void fp_cmp(fpdata *a, fpdata *b) v = -1.0; } else { v = a->fp - b->fp; - fp_clear_status(); } } else { v = a->fp - b->fp; - fp_clear_status(); } a->fp = v; } -static void fp_tst(fpdata *a, fpdata *b) +static void fpp_tst(fpdata *a, fpdata *b) { a->fp = b->fp; } -/* Functions for returning exception state data */ - -static void fp_get_internal_overflow(fpdata *fpd) -{ - fpd->fp = 0; -} -static void fp_get_internal_underflow(fpdata *fpd) -{ - fpd->fp = 0; -} -static void fp_get_internal_round_all(fpdata *fpd) -{ - fpd->fp = 0; -} -static void fp_get_internal_round(fpdata *fpd) -{ - fpd->fp = 0; -} -static void fp_get_internal_round_exten(fpdata *fpd) -{ - fpd->fp = 0; -} -static void fp_get_internal(fpdata *fpd) -{ - fpd->fp = 0; -} -static uae_u32 fp_get_internal_grs(void) -{ - return 0; -} - -/* Function for denormalizing */ -static void fp_denormalize(fpdata *fpd, int esign) -{ -} - -static void fp_from_pack (fpdata *src, uae_u32 *wrd, int kfactor) +static void fpp_from_pack (fpdata *src, uae_u32 *wrd, int kfactor) { int i, j, t; int exp; @@ -834,17 +632,17 @@ static void fp_from_pack (fpdata *src, uae_u32 *wrd, int kfactor) char str[100]; fptype fp; - if (fpp_is_nan (src)) { - // copy bit by bit, handle signaling nan - fpp_from_exten(src, &wrd[0], &wrd[1], &wrd[2]); - return; - } - if (fpp_is_infinity (src)) { - // extended exponent and all 0 packed fraction - fpp_from_exten(src, &wrd[0], &wrd[1], &wrd[2]); - wrd[1] = wrd[2] = 0; - return; - } + if (fpp_is_nan (src)) { + // copy bit by bit, handle signaling nan + fpp_from_exten(src, &wrd[0], &wrd[1], &wrd[2]); + return; + } + if (fpp_is_infinity (src)) { + // extended exponent and all 0 packed fraction + fpp_from_exten(src, &wrd[0], &wrd[1], &wrd[2]); + wrd[1] = wrd[2] = 0; + return; + } wrd[0] = wrd[1] = wrd[2] = 0; @@ -973,24 +771,24 @@ static void fp_from_pack (fpdata *src, uae_u32 *wrd, int kfactor) wrd[0] |= t << 16; } -static void fp_to_pack (fpdata *fpd, uae_u32 *wrd, int dummy) +static void fpp_to_pack (fpdata *fpd, uae_u32 *wrd, int dummy) { fptype d; char *cp; char str[100]; - if (((wrd[0] >> 16) & 0x7fff) == 0x7fff) { - // infinity has extended exponent and all 0 packed fraction - // nans are copies bit by bit - fpp_to_exten(fpd, wrd[0], wrd[1], wrd[2]); - return; - } - if (!(wrd[0] & 0xf) && !wrd[1] && !wrd[2]) { - // exponent is not cared about, if mantissa is zero - wrd[0] &= 0x80000000; - fpp_to_exten(fpd, wrd[0], wrd[1], wrd[2]); - return; - } + if (((wrd[0] >> 16) & 0x7fff) == 0x7fff) { + // infinity has extended exponent and all 0 packed fraction + // nans are copies bit by bit + fpp_to_exten(fpd, wrd[0], wrd[1], wrd[2]); + return; + } + if (!(wrd[0] & 0xf) && !wrd[1] && !wrd[2]) { + // exponent is not cared about, if mantissa is zero + wrd[0] &= 0x80000000; + fpp_to_exten(fpd, wrd[0], wrd[1], wrd[2]); + return; + } cp = str; if (wrd[0] & 0x80000000) @@ -1025,110 +823,7 @@ static void fp_to_pack (fpdata *fpd, uae_u32 *wrd, int dummy) } -void fp_init_native(void) -{ - set_floatx80_rounding_precision(80, &fs); - set_float_rounding_mode(float_round_to_zero, &fs); - - fpp_print = fp_print; - fpp_is_snan = fp_is_snan; - fpp_unset_snan = fp_unset_snan; - fpp_is_nan = fp_is_nan; - fpp_is_infinity = fp_is_infinity; - fpp_is_zero = fp_is_zero; - fpp_is_neg = fp_is_neg; - fpp_is_denormal = fp_is_denormal; - fpp_is_unnormal = fp_is_unnormal; - - fpp_get_status = fp_get_status; - fpp_clear_status = fp_clear_status; - fpp_set_mode = fp_set_mode; - - fpp_from_native = fp_from_native; - fpp_to_native = fp_to_native; - - fpp_to_int = fp_to_int; - fpp_from_int = fp_from_int; - - fpp_to_pack = fp_to_pack; - fpp_from_pack = fp_from_pack; - - fpp_to_single = fp_to_single; - fpp_from_single = fp_from_single; - fpp_to_double = fp_to_double; - fpp_from_double = fp_from_double; - fpp_to_exten = fp_to_exten; - fpp_from_exten = fp_from_exten; - fpp_to_exten_fmovem = fp_to_exten; - fpp_from_exten_fmovem = fp_from_exten; - - fpp_round_single = fp_round_single; - fpp_round_double = fp_round_double; - fpp_round32 = fp_round32; - fpp_round64 = fp_round64; - - fpp_normalize = fp_normalize; - fpp_denormalize = fp_denormalize; - fpp_get_internal_overflow = fp_get_internal_overflow; - fpp_get_internal_underflow = fp_get_internal_underflow; - fpp_get_internal_round_all = fp_get_internal_round_all; - fpp_get_internal_round = fp_get_internal_round; - fpp_get_internal_round_exten = fp_get_internal_round_exten; - fpp_get_internal = fp_get_internal; - fpp_get_internal_grs = fp_get_internal_grs; - - fpp_int = fp_int; - fpp_sinh = fp_sinh; - fpp_intrz = fp_intrz; - fpp_sqrt = fp_sqrt; - fpp_lognp1 = fp_lognp1; - fpp_etoxm1 = fp_etoxm1; - fpp_tanh = fp_tanh; - fpp_atan = fp_atan; - fpp_atanh = fp_atanh; - fpp_sin = fp_sin; - fpp_asin = fp_asin; - fpp_tan = fp_tan; - fpp_etox = fp_etox; - fpp_twotox = fp_twotox; - fpp_tentox = fp_tentox; - fpp_logn = fp_logn; - fpp_log10 = fp_log10; - fpp_log2 = fp_log2; - fpp_abs = fp_abs; - fpp_cosh = fp_cosh; - fpp_neg = fp_neg; - fpp_acos = fp_acos; - fpp_cos = fp_cos; - fpp_getexp = fp_getexp; - fpp_getman = fp_getman; - fpp_div = fp_div; - fpp_mod = fp_mod; - fpp_add = fp_add; - fpp_mul = fp_mul; - fpp_rem = fp_rem; - fpp_scale = fp_scale; - fpp_sub = fp_sub; - fpp_sgldiv = fp_sgldiv; - fpp_sglmul = fp_sglmul; - fpp_cmp = fp_cmp; - fpp_tst = fp_tst; - fpp_move = fp_move; -} - double softfloat_tan(double v) { - struct float_status f = { 0 }; - uae_u32 w1, w2; - fpdata fpd = { 0 }; - - fpd.fp = v; - set_floatx80_rounding_precision(80, &f); - set_float_rounding_mode(float_round_to_zero, &f); - fp_from_double(&fpd, &w1, &w2); - floatx80 fv = float64_to_floatx80(((uae_u64)w1 << 32) | w2, &fs); - fv = floatx80_tan(fv, &fs); - float64 f64 = floatx80_to_float64(fv, &fs); - fp_to_double(&fpd, f64 >> 32, (uae_u32)f64); - return fpd.fp; + return tanl(v); } diff --git a/src/fpp_softfloat.cpp b/src/fpp_softfloat.cpp deleted file mode 100644 index 08e67c88..00000000 --- a/src/fpp_softfloat.cpp +++ /dev/null @@ -1,780 +0,0 @@ -/* -* UAE - The Un*x Amiga Emulator -* -* MC68881/68882/68040/68060 FPU emulation -* Softfloat version -* -* Andreas Grabher and Toni Wilen -* -*/ -#define __USE_ISOC9X /* We might be able to pick up a NaN */ - -#define SOFTFLOAT_FAST_INT64 - -#include -#include -#include - -#include "sysconfig.h" -#include "sysdeps.h" - -#include "options.h" -#include "memory.h" -#include "newcpu.h" -#include "fpp.h" -#include "newcpu.h" - -#include "softfloat/softfloat-macros.h" -#include "softfloat/softfloat-specialize.h" - -#define FPCR_ROUNDING_MODE 0x00000030 -#define FPCR_ROUND_NEAR 0x00000000 -#define FPCR_ROUND_ZERO 0x00000010 -#define FPCR_ROUND_MINF 0x00000020 -#define FPCR_ROUND_PINF 0x00000030 - -#define FPCR_ROUNDING_PRECISION 0x000000c0 -#define FPCR_PRECISION_SINGLE 0x00000040 -#define FPCR_PRECISION_DOUBLE 0x00000080 -#define FPCR_PRECISION_EXTENDED 0x00000000 - -static struct float_status fs; - -/* Functions for setting host/library modes and getting status */ -static void fp_set_mode(uae_u32 mode_control) -{ - set_float_detect_tininess(float_tininess_before_rounding, &fs); - - switch(mode_control & FPCR_ROUNDING_PRECISION) { - case FPCR_PRECISION_SINGLE: // single - set_floatx80_rounding_precision(32, &fs); - break; - default: // double - case FPCR_PRECISION_DOUBLE: // double - set_floatx80_rounding_precision(64, &fs); - break; - case FPCR_PRECISION_EXTENDED: // extended - set_floatx80_rounding_precision(80, &fs); - break; - } - - switch(mode_control & FPCR_ROUNDING_MODE) { - case FPCR_ROUND_NEAR: // to neareset - set_float_rounding_mode(float_round_nearest_even, &fs); - break; - case FPCR_ROUND_ZERO: // to zero - set_float_rounding_mode(float_round_to_zero, &fs); - break; - case FPCR_ROUND_MINF: // to minus - set_float_rounding_mode(float_round_down, &fs); - break; - case FPCR_ROUND_PINF: // to plus - set_float_rounding_mode(float_round_up, &fs); - break; - } -} - -static void fp_get_status(uae_u32 *status) -{ - if (fs.float_exception_flags & float_flag_signaling) - *status |= FPSR_SNAN; - if (fs.float_exception_flags & float_flag_invalid) - *status |= FPSR_OPERR; - if (fs.float_exception_flags & float_flag_divbyzero) - *status |= FPSR_DZ; - if (fs.float_exception_flags & float_flag_overflow) - *status |= FPSR_OVFL; - if (fs.float_exception_flags & float_flag_underflow) - *status |= FPSR_UNFL; - if (fs.float_exception_flags & float_flag_inexact) - *status |= FPSR_INEX2; - if (fs.float_exception_flags & float_flag_decimal) - *status |= FPSR_INEX1; -} -STATIC_INLINE void fp_clear_status(void) -{ - fs.float_exception_flags = 0; -} - - -static const TCHAR *fp_printx80(floatx80 *fx, int mode) -{ - static TCHAR fsout[32]; - flag n, u, d; - - if (mode < 0) { - _stprintf(fsout, _T("%04X-%08X-%08X"), fx->high, (uae_u32)(fx->low >> 32), (uae_u32)fx->low); - return fsout; - } - - n = floatx80_is_negative(*fx); - u = floatx80_is_unnormal(*fx); - d = floatx80_is_denormal(*fx); - - if (floatx80_is_infinity(*fx)) { - _stprintf(fsout, _T("%c%s"), n ? '-' : '+', _T("inf")); - } else if (floatx80_is_signaling_nan(*fx)) { - _stprintf(fsout, _T("%c%s"), n ? '-' : '+', _T("snan")); - } else if (floatx80_is_nan(*fx)) { - _stprintf(fsout, _T("%c%s"), n ? '-' : '+', _T("nan")); - } else { - int32_t len = 17; - int8_t save_exception_flags = fs.float_exception_flags; - fs.float_exception_flags = 0; - floatx80 x = floatx80_to_floatdecimal(*fx, &len, &fs); - _stprintf(fsout, _T("%c%01lld.%016llde%c%04d%s%s"), n ? '-' : '+', - x.low / LIT64(10000000000000000), x.low % LIT64(10000000000000000), - (x.high & 0x4000) ? '-' : '+', x.high & 0x3FFF, d ? _T("D") : u ? _T("U") : _T(""), - (fs.float_exception_flags & float_flag_inexact) ? _T("~") : _T("")); - fs.float_exception_flags = save_exception_flags; - } - - if (mode == 0 || mode > _tcslen(fsout)) - return fsout; - fsout[mode] = 0; - return fsout; -} - -static const TCHAR *fp_print(fpdata *fpd, int mode) -{ - return fp_printx80(&fpd->fpx, mode); -} - -/* Functions for detecting float type */ -static bool fp_is_snan(fpdata *fpd) -{ - return floatx80_is_signaling_nan(fpd->fpx) != 0; -} -static bool fp_unset_snan(fpdata *fpd) -{ - fpd->fpx.low |= LIT64(0x4000000000000000); - return 0; -} -static bool fp_is_nan (fpdata *fpd) -{ - return floatx80_is_any_nan(fpd->fpx) != 0; -} -static bool fp_is_infinity (fpdata *fpd) -{ - return floatx80_is_infinity(fpd->fpx) != 0; -} -static bool fp_is_zero(fpdata *fpd) -{ - return floatx80_is_zero(fpd->fpx) != 0; -} -static bool fp_is_neg(fpdata *fpd) -{ - return floatx80_is_negative(fpd->fpx) != 0; -} -static bool fp_is_denormal(fpdata *fpd) -{ - return floatx80_is_denormal(fpd->fpx) != 0; -} -static bool fp_is_unnormal(fpdata *fpd) -{ - return floatx80_is_unnormal(fpd->fpx) != 0; -} - - -static void to_single(fpdata *fpd, uae_u32 wrd1) -{ - float32 f = wrd1; - fpd->fpx = float32_to_floatx80_allowunnormal(f, &fs); -} -static uae_u32 from_single(fpdata *fpd) -{ - float32 f = floatx80_to_float32(fpd->fpx, &fs); - return f; -} - -static void to_double(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2) -{ - float64 f = ((float64)wrd1 << 32) | wrd2; - fpd->fpx = float64_to_floatx80_allowunnormal(f, &fs); -} -static void from_double(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2) -{ - float64 f = floatx80_to_float64(fpd->fpx, &fs); - *wrd1 = f >> 32; - *wrd2 = (uae_u32)f; -} - -static void to_exten(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) -{ - fpd->fpx.high = (uae_u16)(wrd1 >> 16); - fpd->fpx.low = ((uae_u64)wrd2 << 32) | wrd3; -} -static void from_exten(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *wrd3) -{ - floatx80 f = floatx80_to_floatx80(fpd->fpx, &fs); - *wrd1 = (uae_u32)(f.high << 16); - *wrd2 = f.low >> 32; - *wrd3 = (uae_u32)f.low; -} - -static void to_exten_fmovem(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) -{ - fpd->fpx.high = (uae_u16)(wrd1 >> 16); - fpd->fpx.low = ((uae_u64)wrd2 << 32) | wrd3; -} -static void from_exten_fmovem(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *wrd3) - { - *wrd1 = (uae_u32)(fpd->fpx.high << 16); - *wrd2 = fpd->fpx.low >> 32; - *wrd3 = (uae_u32)fpd->fpx.low; - } - -static uae_s64 to_int(fpdata *src, int size) -{ - switch (size) { - case 0: return floatx80_to_int8(src->fpx, &fs); - case 1: return floatx80_to_int16(src->fpx, &fs); - case 2: return floatx80_to_int32(src->fpx, &fs); - default: return 0; - } -} -static void from_int(fpdata *fpd, uae_s32 src) -{ - fpd->fpx = int32_to_floatx80(src); -} - -/* Functions for returning exception state data */ -static void fp_get_internal_overflow(fpdata *fpd) -{ - fpd->fpx = getFloatInternalOverflow(); -} -static void fp_get_internal_underflow(fpdata *fpd) - { - fpd->fpx = getFloatInternalUnderflow(); -} -static void fp_get_internal_round_all(fpdata *fpd) -{ - fpd->fpx = getFloatInternalRoundedAll(); -} -static void fp_get_internal_round(fpdata *fpd) -{ - fpd->fpx = getFloatInternalRoundedSome(); -} -static void fp_get_internal_round_exten(fpdata *fpd) -{ - fpd->fpx = getFloatInternalFloatx80(); -} -static void fp_get_internal(fpdata *fpd) -{ - fpd->fpx = getFloatInternalUnrounded(); -} -static uae_u32 fp_get_internal_grs(void) -{ - return (uae_u32)getFloatInternalGRS(); -} -/* Function for denormalizing */ -static void fp_denormalize(fpdata *fpd, int esign) -{ - fpd->fpx = floatx80_denormalize(fpd->fpx, esign); -} - -/* Functions for rounding */ - -// round to float with extended precision exponent -static void fp_round32(fpdata *fpd) -{ - fpd->fpx = floatx80_round32(fpd->fpx, &fs); -} - -// round to double with extended precision exponent -static void fp_round64(fpdata *fpd) -{ - fpd->fpx = floatx80_round64(fpd->fpx, &fs); -} - -// round to float -static void fp_round_single(fpdata *fpd) -{ - fpd->fpx = floatx80_round_to_float32(fpd->fpx, &fs); -} - -// round to double -static void fp_round_double(fpdata *fpd) -{ - fpd->fpx = floatx80_round_to_float64(fpd->fpx, &fs); -} - -/* Arithmetic functions */ - -static void fp_int(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_round_to_int(b->fpx, &fs); -} - -static void fp_intrz(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_round_to_int_toward_zero(b->fpx, &fs); -} - -static void fp_getexp(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_getexp(b->fpx, &fs); -} -static void fp_getman(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_getman(b->fpx, &fs); -} -static void fp_mod(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s) -{ - a->fpx = floatx80_mod(a->fpx, b->fpx, q, s, &fs); -} -static void fp_sgldiv(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_sgldiv(a->fpx, b->fpx, &fs); -} -static void fp_sglmul(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_sglmul(a->fpx, b->fpx, &fs); -} -static void fp_rem(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s) -{ - a->fpx = floatx80_rem(a->fpx, b->fpx, q, s, &fs); -} -static void fp_scale(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_scale(a->fpx, b->fpx, &fs); -} -static void fp_cmp(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_cmp(a->fpx, b->fpx, &fs); -} -static void fp_tst(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_tst(b->fpx, &fs); -} - -#define SETPREC \ - uint8_t oldprec = fs.floatx80_rounding_precision; \ - if (prec > 0) \ - set_floatx80_rounding_precision(prec, &fs); - -#define RESETPREC \ - if (prec > 0) \ - set_floatx80_rounding_precision(oldprec, &fs); - - -/* Functions with fixed precision */ -static void fp_move(fpdata *a, fpdata *b, int prec) -{ - SETPREC - a->fpx = floatx80_move(b->fpx, &fs); - RESETPREC -} -static void fp_abs(fpdata *a, fpdata *b, int prec) -{ - SETPREC - a->fpx = floatx80_abs(b->fpx, &fs); - RESETPREC -} -static void fp_neg(fpdata *a, fpdata *b, int prec) -{ - SETPREC - a->fpx = floatx80_neg(b->fpx, &fs); - RESETPREC -} -static void fp_add(fpdata *a, fpdata *b, int prec) -{ - SETPREC - a->fpx = floatx80_add(a->fpx, b->fpx, &fs); - RESETPREC -} -static void fp_sub(fpdata *a, fpdata *b, int prec) -{ - SETPREC - a->fpx = floatx80_sub(a->fpx, b->fpx, &fs); - RESETPREC -} -static void fp_mul(fpdata *a, fpdata *b, int prec) -{ - SETPREC - a->fpx = floatx80_mul(a->fpx, b->fpx, &fs); - RESETPREC -} -static void fp_div(fpdata *a, fpdata *b, int prec) -{ - SETPREC - a->fpx = floatx80_div(a->fpx, b->fpx, &fs); - RESETPREC -} -static void fp_sqrt(fpdata *a, fpdata *b, int prec) -{ - SETPREC - a->fpx = floatx80_sqrt(b->fpx, &fs); - RESETPREC -} - - -static void fp_sinh(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_sinh(b->fpx, &fs); -} -static void fp_lognp1(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_lognp1(b->fpx, &fs); -} -static void fp_etoxm1(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_etoxm1(b->fpx, &fs); -} -static void fp_tanh(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_tanh(b->fpx, &fs); -} -static void fp_atan(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_atan(b->fpx, &fs); -} -static void fp_asin(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_asin(b->fpx, &fs); -} -static void fp_atanh(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_atanh(b->fpx, &fs); -} -static void fp_sin(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_sin(b->fpx, &fs); -} -static void fp_tan(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_tan(b->fpx, &fs); -} -static void fp_etox(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_etox(b->fpx, &fs); -} -static void fp_twotox(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_twotox(b->fpx, &fs); -} -static void fp_tentox(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_tentox(b->fpx, &fs); -} -static void fp_logn(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_logn(b->fpx, &fs); -} -static void fp_log10(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_log10(b->fpx, &fs); -} -static void fp_log2(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_log2(b->fpx, &fs); -} -static void fp_cosh(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_cosh(b->fpx, &fs); -} -static void fp_acos(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_acos(b->fpx, &fs); -} -static void fp_cos(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_cos(b->fpx, &fs); -} - -/* Functions for converting between float formats */ -static const fptype twoto32 = 4294967296.0; - -static void to_native(fptype *fp, fpdata *fpd) -{ - int expon; - fptype frac; - - expon = fpd->fpx.high & 0x7fff; - - if (fp_is_zero(fpd)) { - *fp = fp_is_neg(fpd) ? -0.0 : +0.0; - return; - } - if (fp_is_nan(fpd)) { - *fp = sqrt(-1); - return; - } - if (fp_is_infinity(fpd)) { - double zero = 0.0; - *fp = fp_is_neg(fpd) ? log(0.0) : (1.0 / zero); - return; - } - - frac = (fptype)fpd->fpx.low / (fptype)(twoto32 * 2147483648.0); - if (fp_is_neg(fpd)) - frac = -frac; - *fp = ldexp (frac, expon - 16383); -} - -static void from_native(fptype fp, fpdata *fpd) -{ - int expon; - fptype frac; - - if (signbit(fp)) - fpd->fpx.high = 0x8000; - else - fpd->fpx.high = 0x0000; - - if (isnan(fp)) { - fpd->fpx.high |= 0x7fff; - fpd->fpx.low = LIT64(0xffffffffffffffff); - return; - } - if (isinf(fp)) { - fpd->fpx.high |= 0x7fff; - fpd->fpx.low = LIT64(0x0000000000000000); - return; - } - if (fp == 0.0) { - fpd->fpx.low = LIT64(0x0000000000000000); - return; - } - if (fp < 0.0) - fp = -fp; - - frac = frexp (fp, &expon); - frac += 0.5 / (twoto32 * twoto32); - if (frac >= 1.0) { - frac /= 2.0; - expon++; - } - fpd->fpx.high |= (expon + 16383 - 1) & 0x7fff; - fpd->fpx.low = (uint64_t)(frac * (fptype)(twoto32 * twoto32)); - - while (!(fpd->fpx.low & LIT64( 0x8000000000000000))) { - if (fpd->fpx.high == 0) { - break; - } - fpd->fpx.low <<= 1; - fpd->fpx.high--; - } -} - -static void fp_normalize(fpdata *a) -{ - a->fpx = floatx80_normalize(a->fpx); -} - -static void fp_to_pack(fpdata *fp, uae_u32 *wrd, int dummy) -{ - floatx80 f; - int i; - uae_s32 exp; - uae_s64 mant; - uae_u32 pack_exp, pack_int, pack_se, pack_sm; - uae_u64 pack_frac; - - if (((wrd[0] >> 16) & 0x7fff) == 0x7fff) { - // infinity has extended exponent and all 0 packed fraction - // nans are copies bit by bit - fpp_to_exten(fp, wrd[0], wrd[1], wrd[2]); - return; - } - if (!(wrd[0] & 0xf) && !wrd[1] && !wrd[2]) { - // exponent is not cared about, if mantissa is zero - wrd[0] &= 0x80000000; - fpp_to_exten(fp, wrd[0], wrd[1], wrd[2]); - return; - } - - pack_exp = (wrd[0] >> 16) & 0xFFF; // packed exponent - pack_int = wrd[0] & 0xF; // packed integer part - pack_frac = ((uae_u64)wrd[1] << 32) | wrd[2]; // packed fraction - pack_se = (wrd[0] >> 30) & 1; // sign of packed exponent - pack_sm = (wrd[0] >> 31) & 1; // sign of packed significand - exp = 0; - - for (i = 0; i < 3; i++) { - exp *= 10; - exp += (pack_exp >> (8 - i * 4)) & 0xF; - } - - if (pack_se) { - exp = -exp; - } - - exp -= 16; - - if (exp < 0) { - exp = -exp; - pack_se = 1; - } - - mant = pack_int; - - for (i = 0; i < 16; i++) { - mant *= 10; - mant += (pack_frac >> (60 - i * 4)) & 0xF; - } - - f.high = exp & 0x3FFF; - f.high |= pack_se ? 0x4000 : 0; - f.high |= pack_sm ? 0x8000 : 0; - f.low = mant; - - fp->fpx = floatdecimal_to_floatx80(f, &fs); -} - - -static void fp_from_pack(fpdata *fp, uae_u32 *wrd, int kfactor) -{ - floatx80 f = floatx80_to_floatdecimal(fp->fpx, &kfactor, &fs); - - uae_u32 pack_exp, pack_exp4, pack_int, pack_se, pack_sm; - uae_u64 pack_frac; - - uae_u32 exponent; - uae_u64 significand; - - uae_s32 len; - uae_u64 digit; - - if ((f.high & 0x7FFF) == 0x7FFF) { - wrd[0] = (uae_u32)(f.high << 16); - wrd[1] = f.low >> 32; - wrd[2] = (uae_u32)f.low; - } else { - exponent = f.high & 0x3FFF; - significand = f.low; - - pack_int = 0; - pack_frac = 0; - len = kfactor; // SoftFloat saved len to kfactor variable - while (len > 0) { - len--; - digit = significand % 10; - significand /= 10; - if (len == 0) { - pack_int = digit; - } else { - pack_frac |= digit << (64 - len * 4); - } - } - - pack_exp = 0; - pack_exp4 = 0; - len = 4; - while (len > 0) { - len--; - digit = exponent % 10; - exponent /= 10; - if (len == 0) { - pack_exp4 = digit; - } else { - pack_exp |= digit << (12 - len * 4); - } - } - - pack_se = f.high & 0x4000; - pack_sm = f.high & 0x8000; - - wrd[0] = pack_exp << 16; - wrd[0] |= pack_exp4 << 12; - wrd[0] |= pack_int; - wrd[0] |= pack_se ? 0x40000000 : 0; - wrd[0] |= pack_sm ? 0x80000000 : 0; - - wrd[1] = pack_frac >> 32; - wrd[2] = pack_frac & 0xffffffff; - } -} - -void fp_init_softfloat(void) -{ - float_status fsx = { 0 }; - set_floatx80_rounding_precision(80, &fsx); - set_float_rounding_mode(float_round_to_zero, &fsx); - - fpp_print = fp_print; - fpp_is_snan = fp_is_snan; - fpp_unset_snan = fp_unset_snan; - fpp_is_nan = fp_is_nan; - fpp_is_infinity = fp_is_infinity; - fpp_is_zero = fp_is_zero; - fpp_is_neg = fp_is_neg; - fpp_is_denormal = fp_is_denormal; - fpp_is_unnormal = fp_is_unnormal; - - fpp_get_status = fp_get_status; - fpp_clear_status = fp_clear_status; - fpp_set_mode = fp_set_mode; - - fpp_from_native = from_native; - fpp_to_native = to_native; - - fpp_to_int = to_int; - fpp_from_int = from_int; - - fpp_to_pack = fp_to_pack; - fpp_from_pack = fp_from_pack; - - fpp_to_single = to_single; - fpp_from_single = from_single; - fpp_to_double = to_double; - fpp_from_double = from_double; - fpp_to_exten = to_exten; - fpp_from_exten = from_exten; - fpp_to_exten_fmovem = to_exten_fmovem; - fpp_from_exten_fmovem = from_exten_fmovem; - - fpp_round_single = fp_round_single; - fpp_round_double = fp_round_double; - fpp_round32 = fp_round32; - fpp_round64 = fp_round64; - - fpp_normalize = fp_normalize; - fpp_denormalize = fp_denormalize; - fpp_get_internal_overflow = fp_get_internal_overflow; - fpp_get_internal_underflow = fp_get_internal_underflow; - fpp_get_internal_round_all = fp_get_internal_round_all; - fpp_get_internal_round = fp_get_internal_round; - fpp_get_internal_round_exten = fp_get_internal_round_exten; - fpp_get_internal = fp_get_internal; - fpp_get_internal_grs = fp_get_internal_grs; - - fpp_int = fp_int; - fpp_sinh = fp_sinh; - fpp_intrz = fp_intrz; - fpp_sqrt = fp_sqrt; - fpp_lognp1 = fp_lognp1; - fpp_etoxm1 = fp_etoxm1; - fpp_tanh = fp_tanh; - fpp_atan = fp_atan; - fpp_atanh = fp_atanh; - fpp_sin = fp_sin; - fpp_asin = fp_asin; - fpp_tan = fp_tan; - fpp_etox = fp_etox; - fpp_twotox = fp_twotox; - fpp_tentox = fp_tentox; - fpp_logn = fp_logn; - fpp_log10 = fp_log10; - fpp_log2 = fp_log2; - fpp_abs = fp_abs; - fpp_cosh = fp_cosh; - fpp_neg = fp_neg; - fpp_acos = fp_acos; - fpp_cos = fp_cos; - fpp_getexp = fp_getexp; - fpp_getman = fp_getman; - fpp_div = fp_div; - fpp_mod = fp_mod; - fpp_add = fp_add; - fpp_mul = fp_mul; - fpp_rem = fp_rem; - fpp_scale = fp_scale; - fpp_sub = fp_sub; - fpp_sgldiv = fp_sgldiv; - fpp_sglmul = fp_sglmul; - fpp_cmp = fp_cmp; - fpp_tst = fp_tst; - fpp_move = fp_move; -} - diff --git a/src/include/akiko.h b/src/include/akiko.h index 5b102597..b9dd591a 100644 --- a/src/include/akiko.h +++ b/src/include/akiko.h @@ -4,14 +4,14 @@ #define AKIKO_BASE 0xb80000 #define AKIKO_BASE_END 0xb80100 /* ?? */ -extern void akiko_reset(void); -extern int akiko_init(void); -extern void akiko_free(void); +extern void akiko_reset (void); +extern int akiko_init (void); +extern void akiko_free (void); -extern void AKIKO_hsync_handler(void); -extern void akiko_mute(int); +extern void AKIKO_hsync_handler (void); +extern void akiko_mute (int); extern bool akiko_ntscmode(void); -extern void rethink_akiko(void); +extern void rethink_akiko (void); #endif /* UAE_AKIKO_H */ diff --git a/src/include/drawing.h b/src/include/drawing.h index dc4289c5..0084784d 100644 --- a/src/include/drawing.h +++ b/src/include/drawing.h @@ -94,44 +94,38 @@ struct color_entry { }; /* convert 24 bit AGA Amiga RGB to native color */ -// Disabled because it only works for 16-bit modes (wrong colors on AGA modes if running 32-bit) -//#ifdef ARMV6T2 -//STATIC_INLINE uae_u32 CONVERT_RGB(uae_u32 c) -//{ -// uae_u32 ret; -// __asm__ ( -// "ubfx r1, %[c], #19, #5 \n\t" -// "ubfx r2, %[c], #10, #6 \n\t" -// "ubfx %[v], %[c], #3, #5 \n\t" -// "orr %[v], %[v], r1, lsl #11 \n\t" -// "orr %[v], %[v], r2, lsl #5 \n\t" -// "pkhbt %[v], %[v], %[v], lsl #16 \n\t" -// : [v] "=r" (ret) : [c] "r" (c) : "r1", "r2" ); -// return ret; -//} -//STATIC_INLINE uae_u16 CONVERT_RGB_16(uae_u32 c) -//{ -// uae_u16 ret; -// __asm__ ( -// "ubfx r1, %[c], #19, #5 \n\t" -// "ubfx r2, %[c], #10, #6 \n\t" -// "ubfx %[v], %[c], #3, #5 \n\t" -// "orr %[v], %[v], r1, lsl #11 \n\t" -// "orr %[v], %[v], r2, lsl #5 \n\t" -// : [v] "=r" (ret) : [c] "r" (c) : "r1", "r2" ); -// return ret; -//} -//#else -#ifdef WORDS_BIGENDIAN -# define CONVERT_RGB(c) \ - ( xbluecolors[((uae_u8*)(&c))[3]] | xgreencolors[((uae_u8*)(&c))[2]] | xredcolors[((uae_u8*)(&c))[1]] ) +#ifdef ARMV6T2 +STATIC_INLINE uae_u32 CONVERT_RGB(uae_u32 c) +{ + uae_u32 ret; + __asm__ ( + "ubfx r1, %[c], #19, #5 \n\t" + "ubfx r2, %[c], #10, #6 \n\t" + "ubfx %[v], %[c], #3, #5 \n\t" + "orr %[v], %[v], r1, lsl #11 \n\t" + "orr %[v], %[v], r2, lsl #5 \n\t" + "pkhbt %[v], %[v], %[v], lsl #16 \n\t" + : [v] "=r" (ret) : [c] "r" (c) : "r1", "r2" ); + return ret; +} +STATIC_INLINE uae_u16 CONVERT_RGB_16(uae_u32 c) +{ + uae_u16 ret; + __asm__ ( + "ubfx r1, %[c], #19, #5 \n\t" + "ubfx r2, %[c], #10, #6 \n\t" + "ubfx %[v], %[c], #3, #5 \n\t" + "orr %[v], %[v], r1, lsl #11 \n\t" + "orr %[v], %[v], r2, lsl #5 \n\t" + : [v] "=r" (ret) : [c] "r" (c) : "r1", "r2" ); + return ret; +} #else -# define CONVERT_RGB(c) \ - ( xbluecolors[((uae_u8*)(&c))[0]] | xgreencolors[((uae_u8*)(&c))[1]] | xredcolors[((uae_u8*)(&c))[2]] ) +#define CONVERT_RGB(c) \ + ( xbluecolors[((uae_u8*)(&c))[0]] | xgreencolors[((uae_u8*)(&c))[1]] | xredcolors[((uae_u8*)(&c))[2]] ) #define CONVERT_RGB_16(c) \ ( xbluecolors[((uae_u8*)(&c))[0]] | xgreencolors[((uae_u8*)(&c))[1]] | xredcolors[((uae_u8*)(&c))[2]] ) #endif -//#endif STATIC_INLINE xcolnr getxcolor (int c) { @@ -213,8 +207,9 @@ extern uae_u16 spixels[MAX_SPR_PIXELS * 2]; /* Way too much... */ #define MAX_REG_CHANGE ((MAXVPOS + 1) * MAXHPOS) +#define COLOR_TABLE_SIZE (MAXVPOS + 2) * 2 -extern struct color_entry curr_color_tables[(MAXVPOS + 2) * 2]; +extern struct color_entry curr_color_tables[COLOR_TABLE_SIZE]; extern struct sprite_entry *curr_sprite_entries; extern struct color_change *curr_color_changes; @@ -229,19 +224,14 @@ struct decision { int diwfirstword, diwlastword; int ctable; - uae_u16 bplcon0, bplcon2; -#ifdef AGA - uae_u16 bplcon3, bplcon4; -#endif - uae_u8 nr_planes; - uae_u8 bplres; - bool ehb_seen; - bool ham_seen; - bool ham_at_start; -#ifdef AGA + uae_u16 bplcon0, bplcon2; + uae_u16 bplcon3, bplcon4; + uae_u8 nr_planes; + uae_u8 bplres; + bool ham_seen; + bool ham_at_start; bool bordersprite_seen; bool xor_seen; -#endif }; /* Anything related to changes in hw registers during the DDF for one diff --git a/src/include/flags_arm.h b/src/include/flags_arm.h index c9a60490..2633aaf6 100644 --- a/src/include/flags_arm.h +++ b/src/include/flags_arm.h @@ -32,21 +32,36 @@ /* Native integer code conditions */ enum { - NATIVE_CC_EQ = 0, - NATIVE_CC_NE = 1, - NATIVE_CC_CS = 2, - NATIVE_CC_CC = 3, - NATIVE_CC_MI = 4, - NATIVE_CC_PL = 5, - NATIVE_CC_VS = 6, - NATIVE_CC_VC = 7, - NATIVE_CC_HI = 8, - NATIVE_CC_LS = 9, - NATIVE_CC_GE = 10, - NATIVE_CC_LT = 11, - NATIVE_CC_GT = 12, - NATIVE_CC_LE = 13, - NATIVE_CC_AL = 14 + NATIVE_CC_EQ = 0, + NATIVE_CC_NE = 1, + NATIVE_CC_CS = 2, + NATIVE_CC_CC = 3, + NATIVE_CC_MI = 4, + NATIVE_CC_PL = 5, + NATIVE_CC_VS = 6, + NATIVE_CC_VC = 7, + NATIVE_CC_HI = 8, + NATIVE_CC_LS = 9, + NATIVE_CC_GE = 10, + NATIVE_CC_LT = 11, + NATIVE_CC_GT = 12, + NATIVE_CC_LE = 13, + NATIVE_CC_AL = 14, + + // For FBcc, we need some pseudo condition codes + NATIVE_CC_F_OGT = 16 + 2, + NATIVE_CC_F_OGE = 16 + 3, + NATIVE_CC_F_OLT = 16 + 4, + NATIVE_CC_F_OLE = 16 + 5, + NATIVE_CC_F_OGL = 16 + 6, + NATIVE_CC_F_OR = 16 + 7, + NATIVE_CC_F_UN = 16 + 8, + NATIVE_CC_F_UEQ = 16 + 9, + NATIVE_CC_F_UGT = 16 + 10, + NATIVE_CC_F_UGE = 16 + 11, + NATIVE_CC_F_ULT = 16 + 12, + NATIVE_CC_F_ULE = 16 + 13 + }; #endif /* NATIVE_FLAGS_ARM_H */ diff --git a/src/include/fpp.h b/src/include/fpp.h index b30c7455..f12f8e66 100644 --- a/src/include/fpp.h +++ b/src/include/fpp.h @@ -7,6 +7,7 @@ /* E = MAX & F # 0 -> NotANumber */ /* E = biased by 127 (single) ,1023 (double) ,16383 (extended) */ +#pragma once #define FPSR_BSUN 0x00008000 #define FPSR_SNAN 0x00004000 #define FPSR_OPERR 0x00002000 @@ -16,129 +17,8 @@ #define FPSR_INEX2 0x00000200 #define FPSR_INEX1 0x00000100 -extern void fp_init_native(void); -extern void fp_init_softfloat(void); -extern void fpsr_set_exception(uae_u32 exception); -extern void fpu_modechange(void); - #if defined(CPU_i386) || defined(CPU_x86_64) extern void init_fpucw_x87(void); #endif -typedef void (*FPP_ABQS)(fpdata*, fpdata*, uae_u64*, uae_u8*); -typedef void (*FPP_AB)(fpdata*, fpdata*); -typedef void (*FPP_ABP)(fpdata*, fpdata*, int); -typedef void (*FPP_A)(fpdata*); - -typedef bool (*FPP_IS)(fpdata*); -typedef void (*FPP_SET_MODE)(uae_u32); -typedef void (*FPP_GET_STATUS)(uae_u32*); -typedef void (*FPP_CLEAR_STATUS)(void); - -typedef void (*FPP_FROM_NATIVE)(fptype, fpdata*); -typedef void (*FPP_TO_NATIVE)(fptype*, fpdata*); - -typedef void (*FPP_FROM_INT)(fpdata*,uae_s32); -typedef uae_s64 (*FPP_TO_INT)(fpdata*, int); - -typedef void (*FPP_TO_SINGLE)(fpdata*, uae_u32); -typedef uae_u32 (*FPP_FROM_SINGLE)(fpdata*); - -typedef void (*FPP_TO_DOUBLE)(fpdata*, uae_u32, uae_u32); -typedef void (*FPP_FROM_DOUBLE)(fpdata*, uae_u32*, uae_u32*); - -typedef void (*FPP_TO_EXTEN)(fpdata*, uae_u32, uae_u32, uae_u32); -typedef void (*FPP_FROM_EXTEN)(fpdata*, uae_u32*, uae_u32*, uae_u32*); - -typedef void (*FPP_PACK)(fpdata*, uae_u32*, int); - -typedef const TCHAR* (*FPP_PRINT)(fpdata*,int); -typedef uae_u32 (*FPP_GET32)(void); - -typedef void (*FPP_DENORMALIZE)(fpdata*,int); - -extern FPP_PRINT fpp_print; - -extern FPP_IS fpp_is_snan; -extern FPP_IS fpp_unset_snan; -extern FPP_IS fpp_is_nan; -extern FPP_IS fpp_is_infinity; -extern FPP_IS fpp_is_zero; -extern FPP_IS fpp_is_neg; -extern FPP_IS fpp_is_denormal; -extern FPP_IS fpp_is_unnormal; - -extern FPP_GET_STATUS fpp_get_status; -extern FPP_CLEAR_STATUS fpp_clear_status; -extern FPP_SET_MODE fpp_set_mode; - -extern FPP_FROM_NATIVE fpp_from_native; -extern FPP_TO_NATIVE fpp_to_native; - -extern FPP_TO_INT fpp_to_int; -extern FPP_FROM_INT fpp_from_int; - -extern FPP_PACK fpp_to_pack; -extern FPP_PACK fpp_from_pack; - -extern FPP_TO_SINGLE fpp_to_single; -extern FPP_FROM_SINGLE fpp_from_single; -extern FPP_TO_DOUBLE fpp_to_double; -extern FPP_FROM_DOUBLE fpp_from_double; -extern FPP_TO_EXTEN fpp_to_exten; -extern FPP_FROM_EXTEN fpp_from_exten; -extern FPP_TO_EXTEN fpp_to_exten_fmovem; -extern FPP_FROM_EXTEN fpp_from_exten_fmovem; - -extern FPP_A fpp_round_single; -extern FPP_A fpp_round_double; -extern FPP_A fpp_round32; -extern FPP_A fpp_round64; - -extern FPP_A fpp_normalize; -extern FPP_DENORMALIZE fpp_denormalize; -extern FPP_A fpp_get_internal_overflow; -extern FPP_A fpp_get_internal_underflow; -extern FPP_A fpp_get_internal_round_all; -extern FPP_A fpp_get_internal_round; -extern FPP_A fpp_get_internal_round_exten; -extern FPP_A fpp_get_internal; -extern FPP_GET32 fpp_get_internal_grs; - -extern FPP_AB fpp_int; -extern FPP_AB fpp_sinh; -extern FPP_AB fpp_intrz; -extern FPP_ABP fpp_sqrt; -extern FPP_AB fpp_lognp1; -extern FPP_AB fpp_etoxm1; -extern FPP_AB fpp_tanh; -extern FPP_AB fpp_atan; -extern FPP_AB fpp_atanh; -extern FPP_AB fpp_sin; -extern FPP_AB fpp_asin; -extern FPP_AB fpp_tan; -extern FPP_AB fpp_etox; -extern FPP_AB fpp_twotox; -extern FPP_AB fpp_tentox; -extern FPP_AB fpp_logn; -extern FPP_AB fpp_log10; -extern FPP_AB fpp_log2; -extern FPP_ABP fpp_abs; -extern FPP_AB fpp_cosh; -extern FPP_ABP fpp_neg; -extern FPP_AB fpp_acos; -extern FPP_AB fpp_cos; -extern FPP_AB fpp_getexp; -extern FPP_AB fpp_getman; -extern FPP_ABP fpp_div; -extern FPP_ABQS fpp_mod; -extern FPP_ABP fpp_add; -extern FPP_ABP fpp_mul; -extern FPP_ABQS fpp_rem; -extern FPP_AB fpp_scale; -extern FPP_ABP fpp_sub; -extern FPP_AB fpp_sgldiv; -extern FPP_AB fpp_sglmul; -extern FPP_AB fpp_cmp; -extern FPP_AB fpp_tst; -extern FPP_ABP fpp_move; +extern void fpsr_set_exception(uae_u32 exception); \ No newline at end of file diff --git a/src/include/native2amiga_api.h b/src/include/native2amiga_api.h index 44a0369b..e7dfb40a 100644 --- a/src/include/native2amiga_api.h +++ b/src/include/native2amiga_api.h @@ -18,7 +18,7 @@ void uae_PutMsg(uaecptr port, uaecptr msg); void uae_Signal(uaecptr task, uae_u32 mask); void uae_NotificationHack(uaecptr, uaecptr); #endif -int native2amiga_isfree (void); +int native2amiga_isfree(void); void uae_nativesem_wait(void); void uae_nativesem_post(void); diff --git a/src/include/newcpu.h b/src/include/newcpu.h index c048c6cd..78b5e75f 100644 --- a/src/include/newcpu.h +++ b/src/include/newcpu.h @@ -12,7 +12,6 @@ #include "uae/types.h" #include "readcpu.h" #include "machdep/m68k.h" -#include extern const int areg_byteinc[]; extern const int imm8_table[]; @@ -68,7 +67,6 @@ typedef double fptype; typedef struct { - floatx80 fpx; fptype fp; } fpdata; @@ -101,6 +99,9 @@ struct regstruct #ifdef FPUEMU fpdata fp[8]; +#ifdef JIT + fpdata fp_result; +#endif uae_u32 fpcr,fpsr, fpiar; uae_u32 fpu_state; uae_u32 fpu_exp_state; @@ -341,9 +342,7 @@ extern void fpuop_trapcc(uae_u32, uaecptr, uae_u16); extern void fpuop_bcc(uae_u32, uaecptr, uae_u32); extern void fpuop_save(uae_u32); extern void fpuop_restore(uae_u32); -extern uae_u32 fpp_get_fpsr (void); extern void fpu_reset (void); -extern bool fpu_get_constant(fpdata *fp, int cr); extern int fpp_cond(int condition); extern void exception3_read(uae_u32 opcode, uaecptr addr); diff --git a/src/include/options.h b/src/include/options.h index 12170d8a..64d18788 100644 --- a/src/include/options.h +++ b/src/include/options.h @@ -310,9 +310,9 @@ struct uae_prefs { int sound_volume_paula; int sound_volume_cd; + bool compfpu; int cachesize; bool fpu_strict; - bool fpu_softfloat; int gfx_framerate; struct wh gfx_size; diff --git a/src/include/sysdeps.h b/src/include/sysdeps.h index 8df3c9bc..2ebcc115 100644 --- a/src/include/sysdeps.h +++ b/src/include/sysdeps.h @@ -408,17 +408,18 @@ extern void gui_message(const TCHAR *, ...); */ #ifdef ARMV6_ASSEMBLY + STATIC_INLINE uae_u32 do_byteswap_32(uae_u32 v) { - __asm__( - "rev %0, %0" - : "=r" (v) : "0" (v)); return v; + __asm__ ( + "rev %0, %0" + : "=r" (v) : "0" (v) ); return v; } STATIC_INLINE uae_u32 do_byteswap_16(uae_u32 v) { - __asm__( - "revsh %0, %0\n\t" - "uxth %0, %0" - : "=r" (v) : "0" (v)); return v; + __asm__ ( + "revsh %0, %0\n\t" + "uxth %0, %0" + : "=r" (v) : "0" (v) ); return v; } #define bswap_16(x) do_byteswap_16(x) #define bswap_32(x) do_byteswap_32(x) diff --git a/src/jit/codegen_arm.cpp b/src/jit/codegen_arm.cpp index ba57c147..dc7c3964 100644 --- a/src/jit/codegen_arm.cpp +++ b/src/jit/codegen_arm.cpp @@ -333,6 +333,24 @@ LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, RR4 s)) } LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, RR4 s)) +LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s)) +{ + if(s >= (uae_u32) ®s && s < ((uae_u32) ®s) + sizeof(struct regstruct)) { + uae_s32 idx = s - (uae_u32) & regs; + LDR_rRI(d, R_REGSTRUCT, idx); + } else { +#ifdef ARMV6T2 + MOVW_ri16(REG_WORK1, s); + MOVT_ri16(REG_WORK1, s >> 16); +#else + uae_s32 offs = data_long_offs(s); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); +#endif + LDR_rR(d, REG_WORK1); + } +} +LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s)) + LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, RR2 s)) { PKHBT_rrr(d, s, d); @@ -465,11 +483,6 @@ STATIC_INLINE void raw_emit_nop_filler(int nbytes) while(nbytes--) { NOP(); } } -STATIC_INLINE void raw_emit_nop(void) -{ - NOP(); -} - // // Arm instructions // @@ -611,7 +624,7 @@ LOWFUNC(NONE,NONE,3,compemu_raw_lea_l_brr,(W4 d, RR4 s, IMM offset)) } LENDFUNC(NONE,NONE,3,compemu_raw_lea_l_brr,(W4 d, RR4 s, IMM offset)) -LOWFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(IMM d, RR1 s)) +LOWFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(MEMW d, RR1 s)) { if(d >= (uae_u32) ®s && d < ((uae_u32) ®s) + sizeof(struct regstruct)) { uae_s32 idx = d - (uae_u32) & regs; @@ -627,7 +640,7 @@ LOWFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(IMM d, RR1 s)) STRB_rR(s, REG_WORK1); } } -LENDFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(IMM d, RR1 s)) +LENDFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(MEMW d, RR1 s)) LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mi,(MEMW d, IMM s)) { @@ -664,7 +677,7 @@ LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mi,(MEMW d, IMM s)) } LENDFUNC(NONE,WRITE,2,compemu_raw_mov_l_mi,(MEMW d, IMM s)) -LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(IMM d, RR4 s)) +LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(MEMW d, RR4 s)) { if(d >= (uae_u32) ®s && d < ((uae_u32) ®s) + sizeof(struct regstruct)) { uae_s32 idx = d - (uae_u32) & regs; @@ -680,7 +693,7 @@ LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(IMM d, RR4 s)) STR_rR(s, REG_WORK1); } } -LENDFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(IMM d, RR4 s)) +LENDFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(MEMW d, RR4 s)) LOWFUNC(NONE,NONE,2,compemu_raw_mov_l_ri,(W4 d, IMM s)) { @@ -831,26 +844,101 @@ STATIC_INLINE void compemu_raw_call_r(RR4 r) STATIC_INLINE void compemu_raw_jcc_l_oponly(int cc) { switch (cc) { - case 9: // LS - BEQ_i(0); // beq - BCC_i(1); // bcc + case NATIVE_CC_HI: // HI + BEQ_i(2); // beq no jump + BCS_i(1); // bcs no jump + // jump + LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + // no jump + break; - //: - LDR_rRI(RPC_INDEX, RPC_INDEX, -4); // ldr pc, [pc] ; - break; + case NATIVE_CC_LS: // LS + BEQ_i(0); // beq jump + BCC_i(1); // bcc no jump + // jump + LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + // no jump + break; - case 8: // HI - BEQ_i(2); // beq - BCS_i(1); // bcs + case NATIVE_CC_F_OGT: // Jump if valid and greater than + BVS_i(2); // do not jump if NaN + BLE_i(1); // do not jump if less or equal + LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + break; - //: - LDR_rRI(RPC_INDEX, RPC_INDEX, -4); // ldr pc, [pc] ; - break; + case NATIVE_CC_F_OGE: // Jump if valid and greater or equal + BVS_i(2); // do not jump if NaN + BCC_i(1); // do not jump if carry cleared + LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + break; + + case NATIVE_CC_F_OLT: // Jump if vaild and less than + BVS_i(2); // do not jump if NaN + BCS_i(1); // do not jump if carry set + LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + break; + + case NATIVE_CC_F_OLE: // Jump if valid and less or equal + BVS_i(2); // do not jump if NaN + BGT_i(1); // do not jump if greater than + LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + break; + + case NATIVE_CC_F_OGL: // Jump if valid and greator or less + BVS_i(2); // do not jump if NaN + BEQ_i(1); // do not jump if equal + LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + break; - default: - CC_B_i(cc^1, 1); - LDR_rRI(RPC_INDEX, RPC_INDEX, -4); - break; + case NATIVE_CC_F_OR: // Jump if valid + BVS_i(1); // do not jump if NaN + LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + break; + + case NATIVE_CC_F_UN: // Jump if NAN + BVC_i(1); // do not jump if valid + LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + break; + + case NATIVE_CC_F_UEQ: // Jump if NAN or equal + BVS_i(0); // jump if NaN + BNE_i(1); // do not jump if greater or less + // jump + LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + break; + + case NATIVE_CC_F_UGT: // Jump if NAN or greater than + BVS_i(0); // jump if NaN + BLS_i(1); // do not jump if lower or same + // jump + LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + break; + + case NATIVE_CC_F_UGE: // Jump if NAN or greater or equal + BVS_i(0); // jump if NaN + BMI_i(1); // do not jump if lower + // jump + LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + break; + + case NATIVE_CC_F_ULT: // Jump if NAN or less than + BVS_i(0); // jump if NaN + BGE_i(1); // do not jump if greater or equal + // jump + LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + break; + + case NATIVE_CC_F_ULE: // Jump if NAN or less or equal + BVS_i(0); // jump if NaN + BGT_i(1); // do not jump if greater + // jump + LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + break; + + default: + CC_B_i(cc^1, 1); + LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + break; } // emit of target will be done by caller } @@ -889,11 +977,6 @@ STATIC_INLINE void compemu_raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m) emit_long(base); } -STATIC_INLINE void compemu_raw_jmp_r(RR4 r) -{ - BX_r(r); -} - STATIC_INLINE void compemu_raw_jnz(uae_u32 t) { #ifdef ARMV6T2 @@ -1009,3 +1092,317 @@ LOWFUNC(NONE,NONE,2,compemu_raw_endblock_pc_isconst,(IMM cycles, IMM v)) // } LENDFUNC(NONE,NONE,2,compemu_raw_endblock_pc_isconst,(IMM cycles, IMM v)) + + +/************************************************************************* +* FPU stuff * +*************************************************************************/ + +LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s)) +{ + VMOV64_rr(d, s); +} +LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s)) + +LOWFUNC(NONE,WRITE,2,compemu_raw_fmov_mr_drop,(MEMW mem, FR s)) +{ + if(mem >= (uae_u32) ®s && mem < (uae_u32) ®s + 1020 && ((mem - (uae_u32) ®s) & 0x3) == 0) { + VSTR64(s, R_REGSTRUCT, (mem - (uae_u32) ®s)); + } else { + MOVW_ri16(REG_WORK1, mem); + MOVT_ri16(REG_WORK1, mem >> 16); + VSTR64(s, REG_WORK1, 0); + } +} +LENDFUNC(NONE,WRITE,2,compemu_raw_fmov_mr_drop,(MEMW mem, FR s)) + + +LOWFUNC(NONE,READ,2,compemu_raw_fmov_rm,(FW d, MEMR mem)) +{ + if(mem >= (uae_u32) ®s && mem < (uae_u32) ®s + 1020 && ((mem - (uae_u32) ®s) & 0x3) == 0) { + VLDR64(d, R_REGSTRUCT, (mem - (uae_u32) ®s)); + } else { + MOVW_ri16(REG_WORK1, mem); + MOVT_ri16(REG_WORK1, mem >> 16); + VLDR64(d, REG_WORK1, 0); + } +} +LENDFUNC(NONE,READ,2,compemu_raw_fmov_rm,(FW d, MEMW mem)) + +LOWFUNC(NONE,NONE,2,raw_fmov_l_rr,(FW d, RR4 s)) +{ + VMOVi_from_ARM(SCRATCH_F64_1, s); + VCVT_64_from_i(d, SCRATCH_F32_1); +} +LENDFUNC(NONE,NONE,2,raw_fmov_l_rr,(FW d, RR4 s)) + +LOWFUNC(NONE,NONE,2,raw_fmov_s_rr,(FW d, RR4 s)) +{ + VMOV32_from_ARM(SCRATCH_F32_1, s); + VCVT_32_to_64(d, SCRATCH_F32_1); +} +LENDFUNC(NONE,NONE,2,raw_fmov_s_rr,(FW d, RR4 s)) + +LOWFUNC(NONE,NONE,2,raw_fmov_w_rr,(FW d, RR2 s)) +{ + SIGN_EXTEND_16_REG_2_REG(REG_WORK1, s); + VMOVi_from_ARM(SCRATCH_F64_1, REG_WORK1); + VCVT_64_from_i(d, SCRATCH_F32_1); +} +LENDFUNC(NONE,NONE,2,raw_fmov_w_rr,(FW d, RR2 s)) + +LOWFUNC(NONE,NONE,2,raw_fmov_b_rr,(FW d, RR1 s)) +{ + SIGN_EXTEND_8_REG_2_REG(REG_WORK1, s); + VMOVi_from_ARM(SCRATCH_F64_1, REG_WORK1); + VCVT_64_from_i(d, SCRATCH_F32_1); +} +LENDFUNC(NONE,NONE,2,raw_fmov_b_rr,(FW d, RR1 s)) + +LOWFUNC(NONE,NONE,2,raw_fmov_d_rrr,(FW d, RR4 s1, RR4 s2)) +{ + VMOV64_from_ARM(d, s1, s2); +} +LENDFUNC(NONE,NONE,2,raw_fmov_d_rrr,(FW d, RR4 s1, RR4 s2)) + +LOWFUNC(NONE,NONE,2,raw_fmov_to_l_rr,(W4 d, FR s)) +{ + VCVTR_64_to_i(SCRATCH_F32_1, s); + VMOVi_to_ARM(d, SCRATCH_F64_1); +} +LENDFUNC(NONE,NONE,2,raw_fmov_to_l_rr,(W4 d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fmov_to_s_rr,(W4 d, FR s)) +{ + VCVT_64_to_32(SCRATCH_F32_1, s); + VMOV32_to_ARM(d, SCRATCH_F32_1); +} +LENDFUNC(NONE,NONE,2,raw_fmov_to_s_rr,(W4 d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fmov_to_w_rr,(W4 d, FR s)) +{ + VCVTR_64_to_i(SCRATCH_F32_1, s); + VMOVi_to_ARM(REG_WORK1, SCRATCH_F64_1); + SSAT_rir(REG_WORK1, 15, REG_WORK1); + BFI_rrii(d, REG_WORK1, 0, 15); +} +LENDFUNC(NONE,NONE,2,raw_fmov_to_w_rr,(W4 d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fmov_to_b_rr,(W4 d, FR s)) +{ + VCVTR_64_to_i(SCRATCH_F32_1, s); + VMOVi_to_ARM(REG_WORK1, SCRATCH_F64_1); + SSAT_rir(REG_WORK1, 7, REG_WORK1); + BFI_rrii(d, REG_WORK1, 0, 7); +} +LENDFUNC(NONE,NONE,2,raw_fmov_to_b_rr,(W4 d, FR s)) + +LOWFUNC(NONE,NONE,1,raw_fmov_d_ri_0,(FW r)) +{ + VMOV64_i(r, 0x7, 0x0); // load imm #1 into reg + VSUB64(r, r, r); +} +LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_0,(FW r)) + +LOWFUNC(NONE,NONE,1,raw_fmov_d_ri_1,(FW r)) +{ + VMOV64_i(r, 0x7, 0x0); // load imm #1 into reg +} +LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_1,(FW r)) + +LOWFUNC(NONE,NONE,1,raw_fmov_d_ri_10,(FW r)) +{ + VMOV64_i(r, 0x2, 0x4); // load imm #10 into reg +} +LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_10,(FW r)) + +LOWFUNC(NONE,NONE,1,raw_fmov_d_ri_100,(FW r)) +{ + VMOV64_i(r, 0x2, 0x4); // load imm #10 into reg + VMUL64(r, r, r); +} +LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_10,(FW r)) + +LOWFUNC(NONE,READ,2,raw_fmov_d_rm,(FW r, MEMR m)) +{ + MOVW_ri16(REG_WORK1, m); + MOVT_ri16(REG_WORK1, m >> 16); + VLDR64(r, REG_WORK1, 0); +} +LENDFUNC(NONE,READ,2,raw_fmov_d_rm,(FW r, MEMR m)) + +LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m)) +{ + MOVW_ri16(REG_WORK1, m); + MOVT_ri16(REG_WORK1, m >> 16); + VLDR32(SCRATCH_F32_1, REG_WORK1, 0); + VCVT_32_to_64(r, SCRATCH_F32_1); +} +LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m)) + +LOWFUNC(NONE,NONE,3,raw_fmov_to_d_rrr,(W4 d1, W4 d2, FR s)) +{ + VMOV64_to_ARM(d1, d2, s); +} +LENDFUNC(NONE,NONE,3,raw_fmov_to_d_rrr,(W4 d1, W4 d2, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s)) +{ + VSQRT64(d, s); +} +LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s)) +{ + VABS64(d, s); +} +LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s)) +{ + VNEG64(d, s); +} +LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s)) +{ + VDIV64(d, d, s); +} +LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s)) +{ + VADD64(d, d, s); +} +LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s)) +{ + VMUL64(d, d, s); +} +LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s)) +{ + VSUB64(d, d, s); +} +LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s)) +{ + VCVTR_64_to_i(SCRATCH_F32_1, s); + VCVT_64_from_i(d, SCRATCH_F32_1); +} +LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_frndintz_rr,(FW d, FR s)) +{ + VCVT_64_to_i(SCRATCH_F32_1, s); + VCVT_64_from_i(d, SCRATCH_F32_1); +} +LENDFUNC(NONE,NONE,2,raw_frndintz_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fmod_rr,(FRW d, FR s)) +{ + VDIV64(SCRATCH_F64_2, d, s); + VCVT_64_to_i(SCRATCH_F32_1, SCRATCH_F64_2); + VCVT_64_from_i(SCRATCH_F64_2, SCRATCH_F32_1); + VMUL64(SCRATCH_F64_1, SCRATCH_F64_2, s); + VSUB64(d, d, SCRATCH_F64_1); +} +LENDFUNC(NONE,NONE,2,raw_fmod_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fsgldiv_rr,(FRW d, FR s)) +{ + VCVT_64_to_32(SCRATCH_F32_1, d); + VCVT_64_to_32(SCRATCH_F32_2, s); + VDIV32(SCRATCH_F32_1, SCRATCH_F32_1, SCRATCH_F32_2); + VCVT_32_to_64(d, SCRATCH_F32_1); +} +LENDFUNC(NONE,NONE,2,raw_fsgldiv_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,1,raw_fcuts_r,(FRW r)) +{ + VCVT_64_to_32(SCRATCH_F32_1, r); + VCVT_32_to_64(r, SCRATCH_F32_1); +} +LENDFUNC(NONE,NONE,1,raw_fcuts_r,(FRW r)) + +LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s)) +{ + VMRS(REG_WORK1); + BIC_rri(REG_WORK2, REG_WORK1, 0x00c00000); + VMSR(REG_WORK2); + + VDIV64(SCRATCH_F64_2, d, s); + VCVTR_64_to_i(SCRATCH_F32_1, SCRATCH_F64_2); + VCVT_64_from_i(SCRATCH_F64_2, SCRATCH_F32_1); + VMUL64(SCRATCH_F64_1, SCRATCH_F64_2, s); + VSUB64(d, d, SCRATCH_F64_1); + + VMRS(REG_WORK2); + UBFX_rrii(REG_WORK1, REG_WORK1, 22, 2); + BFI_rrii(REG_WORK2, REG_WORK1, 22, 2); + VMSR(REG_WORK2); +} +LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fsglmul_rr,(FRW d, FR s)) +{ + VCVT_64_to_32(SCRATCH_F32_1, d); + VCVT_64_to_32(SCRATCH_F32_2, s); + VMUL32(SCRATCH_F32_1, SCRATCH_F32_1, SCRATCH_F32_2); + VCVT_32_to_64(d, SCRATCH_F32_1); +} +LENDFUNC(NONE,NONE,2,raw_fsglmul_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fmovs_rr,(FW d, FR s)) +{ + VCVT_64_to_32(SCRATCH_F32_1, s); + VCVT_32_to_64(d, SCRATCH_F32_1); +} +LENDFUNC(NONE,NONE,2,raw_fmovs_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,3,raw_ffunc_rr,(double (*func)(double), FW d, FR s)) +{ + VMOV64_rr(0, s); + + MOVW_ri16(REG_WORK1, (uae_u32)func); + MOVT_ri16(REG_WORK1, ((uae_u32)func) >> 16); + + PUSH(RLR_INDEX); + BLX_r(REG_WORK1); + POP(RLR_INDEX); + + VMOV64_rr(d, 0); +} +LENDFUNC(NONE,NONE,3,raw_ffunc_rr,(double (*func)(double), FW d, FR s)) + +LOWFUNC(NONE,NONE,3,raw_fpowx_rr,(uae_u32 x, FW d, FR s)) +{ + double (*func)(double,double) = pow; + + if(x == 2) { + VMOV64_i(0, 0x0, 0x0); // load imm #2 into first reg + } else { + VMOV64_i(0, 0x2, 0x4); // load imm #10 into first reg + } + + VMOV64_rr(1, s); + + MOVW_ri16(REG_WORK1, (uae_u32)func); + MOVT_ri16(REG_WORK1, ((uae_u32)func) >> 16); + + PUSH(RLR_INDEX); + BLX_r(REG_WORK1); + POP(RLR_INDEX); + + VMOV64_rr(d, 0); +} +LENDFUNC(NONE,NONE,3,raw_fpowx_rr,(uae_u32 x, FW d, FR s)) + +STATIC_INLINE void raw_fflags_into_flags(int r) +{ + VCMP64_0(r); + VMRS(15); // special case: move flags from FPSCR to APSR_nzcv +} diff --git a/src/jit/codegen_arm.h b/src/jit/codegen_arm.h index 2532d3b2..a11c13f7 100644 --- a/src/jit/codegen_arm.h +++ b/src/jit/codegen_arm.h @@ -1319,6 +1319,7 @@ enum { // ARMv6T2 #ifdef ARMV6T2 + #define CC_BFI_rrii(cc,Rd,Rn,lsb,msb) _W(((cc) << 28) | (0x3e << 21) | ((msb) << 16) | (Rd << 12) | ((lsb) << 7) | (0x1 << 4) | (Rn)) #define BFI_rrii(Rd,Rn,lsb,msb) CC_BFI_rrii(NATIVE_CC_AL,Rd,Rn,lsb,msb) @@ -1333,10 +1334,138 @@ enum { #define CC_MOVT_ri16(cc,Rd,i) _W(((cc) << 28) | (0x34 << 20) | (((i >> 12) & 0xf) << 16) | (Rd << 12) | (i & 0x0fff)) #define MOVT_ri16(Rd,i) CC_MOVT_ri16(NATIVE_CC_AL,Rd,i) + +#define CC_SSAT_rir(cc,Rd,i,Rn) _W(((cc) << 28) | (0x6a << 20) | (i << 16) | (Rd << 12) | (0x1 << 4) | (Rn)) +#define SSAT_rir(Rd,i,Rn) CC_SSAT_rir(NATIVE_CC_AL,Rd,i,Rn) + #endif // Floatingpoint +#define FADR_ADD(offs) ((1 << 23) | (offs) >> 2) +#define FADR_SUB(offs) ((0 << 23) | (offs) >> 2) +#define FIMM8(offs) (offs >= 0 ? FADR_ADD(offs) : FADR_SUB(-offs)) +#define MAKE_Dd(Dd) (((Dd & 0x10) << 18) | ((Dd & 0x0f) << 12)) +#define MAKE_Dm(Dm) (((Dm & 0x10) << 1) | ((Dm & 0x0f) << 0)) +#define MAKE_Dn(Dn) (((Dn & 0x10) << 3) | ((Dn & 0x0f) << 16)) +#define MAKE_Sd(Sd) (((Sd & 0x01) << 22) | ((Sd & 0x1e) << 11)) +#define MAKE_Sm(Sm) (((Sm & 0x01) << 5) | ((Sm & 0x1e) >> 1)) +#define MAKE_Sn(Sn) (((Sn & 0x01) << 7) | ((Sn & 0x1e) << 15)) + + +#define CC_VLDR64(cc,Dd,Rn,offs) _W(((cc) << 28) | (0xd << 24) | (0x1 << 20) | (Rn << 16) | (0xb << 8) | FIMM8(offs) | MAKE_Dd(Dd)) +#define VLDR64(Dd,Rn,offs) CC_VLDR64(NATIVE_CC_AL,Dd,Rn,offs) +#define CC_VLDR32(cc,Sd,Rn,offs) _W(((cc) << 28) | (0xd << 24) | (0x1 << 20) | (Rn << 16) | (0xa << 8) | FIMM8(offs) | MAKE_Sd(Sd)) +#define VLDR32(Sd,Rn,offs) CC_VLDR32(NATIVE_CC_AL,Sd,Rn,offs) + +#define CC_VSTR64(cc,Dd,Rn,offs) _W(((cc) << 28) | (0xd << 24) | (0x0 << 20) | (Rn << 16) | (0xb << 8) | FIMM8(offs) | MAKE_Dd(Dd)) +#define VSTR64(Dd,Rn,offs) CC_VSTR64(NATIVE_CC_AL,Dd,Rn,offs) +#define CC_VSTR32(cc,Dd,Rn,offs) _W(((cc) << 28) | (0xd << 24) | (0x0 << 20) | (Rn << 16) | (0xa << 8) | FIMM8(offs) | MAKE_Dd(Dd)) +#define VSTR32(Dd,Rn,offs) CC_VSTR32(NATIVE_CC_AL,Dd,Rn,offs) + +#define CC_VMOV64_rr(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm)) +#define VMOV64_rr(Dd,Dm) CC_VMOV64_rr(NATIVE_CC_AL,Dd,Dm) +#define CC_VMOV32_rr(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) +#define VMOV32_rr(Sd,Sm) CC_VMOV32_rr(NATIVE_CC_AL,Sd,Sm) + +#define CC_VMOV32_to_ARM(cc,Rt,Sn) _W(((cc) << 28) | (0xe << 24) | (0x1 << 20) | (Rt << 12) | (0xa << 8) | (0x1 << 4) | MAKE_Sn(Sn)) +#define VMOV32_to_ARM(Rt,Sn) CC_VMOV32_to_ARM(NATIVE_CC_AL,Rt,Sn) +#define CC_VMOV32_from_ARM(cc,Sn,Rt) _W(((cc) << 28) | (0xe << 24) | (0x0 << 20) | (Rt << 12) | (0xa << 8) | (0x1 << 4) | MAKE_Sn(Sn)) +#define VMOV32_from_ARM(Sn,Rt) CC_VMOV32_from_ARM(NATIVE_CC_AL,Sn,Rt) + +#define CC_VMOVi_from_ARM(cc,Dn,Rt) _W(((cc) << 28) | (0xe << 24) | (0x0 << 20) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dn(Dn)) +#define VMOVi_from_ARM(Dn,Rt) CC_VMOVi_from_ARM(NATIVE_CC_AL,Dn,Rt) +#define CC_VMOVi_to_ARM(cc,Rt,Dn) _W(((cc) << 28) | (0xe << 24) | (0x1 << 20) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dn(Dn)) +#define VMOVi_to_ARM(Rt,Dn) CC_VMOVi_to_ARM(NATIVE_CC_AL,Rt,Dn) + +#define CC_VMOV64_to_ARM(cc,Rt,Rt2,Dm) _W(((cc) << 28) | (0xc << 24) | (0x5 << 20) | (Rt2 << 16) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dm(Dm)) +#define VMOV64_to_ARM(Rt,Rt2,Dm) CC_VMOV64_to_ARM(NATIVE_CC_AL,Rt,Rt2,Dm) +#define CC_VMOV64_from_ARM(cc,Dm,Rt,Rt2) _W(((cc) << 28) | (0xc << 24) | (0x4 << 20) | (Rt2 << 16) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dm(Dm)) +#define VMOV64_from_ARM(Dm,Rt,Rt2) CC_VMOV64_from_ARM(NATIVE_CC_AL,Dm,Rt,Rt2) + +#define CC_VCVT_64_to_32(cc,Sd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x7 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Dm(Dm)) +#define VCVT_64_to_32(Sd,Dm) CC_VCVT_64_to_32(NATIVE_CC_AL,Sd,Dm) +#define CC_VCVT_32_to_64(cc,Dd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x7 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Sm(Sm)) +#define VCVT_32_to_64(Dd,Sm) CC_VCVT_32_to_64(NATIVE_CC_AL,Dd,Sm) + +#define CC_VCVTR_64_to_i(cc,Sd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xb << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Dm(Dm)) +#define VCVTR_64_to_i(Sd,Dm) CC_VCVTR_64_to_i(NATIVE_CC_AL,Sd,Dm) +#define CC_VCVTR_32_to_i(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) +#define VCVTR_32_to_i(Sd,Sm) CC_VCVTR_32_to_i(NATIVE_CC_AL,Sd,Sm) + +#define CC_VCVT_64_to_i(cc,Sd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xb << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Dm(Dm)) +#define VCVT_64_to_i(Sd,Dm) CC_VCVT_64_to_i(NATIVE_CC_AL,Sd,Dm) +#define CC_VCVT_32_to_i(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) +#define VCVT_32_to_i(Sd,Sm) CC_VCVT_32_to_i(NATIVE_CC_AL,Sd,Sm) + +#define CC_VCVT_64_from_i(cc,Dd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x8 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Sm(Sm)) +#define VCVT_64_from_i(Dd,Sm) CC_VCVT_64_from_i(NATIVE_CC_AL,Dd,Sm) +#define CC_VCVT_32_from_i(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x8 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) +#define VCVT_32_from_i(Sd,Sm) CC_VCVT_32_from_i(NATIVE_CC_AL,Dd,Sm) + +#define CC_VMOV_rr64(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm)) +#define VMOV_rr64(Dd,Dm) CC_VMOV_rr64(NATIVE_CC_AL,Dd,Dm) +#define CC_VMOV_rr32(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) +#define VMOV_rr32(Sd,Sm) CC_VMOV_rr32(NATIVE_CC_AL,Sd,Sm) + +#define CC_VADD64(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xb << 8) | (0x0 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm)) +#define VADD64(Dd,Dn,Dm) CC_VADD64(NATIVE_CC_AL,Dd,Dn,Dm) +#define CC_VADD32(cc,Sd,Sn,Sm) _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xa << 8) | (0x0 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm)) +#define VADD32(Sd,Sn,Sm) CC_VADD32(NATIVE_CC_AL,Sd,Sn,Sm) + +#define CC_VSUB64(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm)) +#define VSUB64(Dd,Dn,Dm) CC_VSUB64(NATIVE_CC_AL,Dd,Dn,Dm) +#define CC_VSUB32(cc,Sd,Sn,Sm) _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm)) +#define VSUB32(Sd,Sn,Sm) CC_VSUB32(NATIVE_CC_AL,Sd,Sn,Sm) + +#define CC_VMUL64(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0xe << 24) | (0x2 << 20) | (0xb << 8) | (0x0 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm)) +#define VMUL64(Dd,Dn,Dm) CC_VMUL64(NATIVE_CC_AL,Dd,Dn,Dm) +#define CC_VMUL32(cc,Sd,Sn,Sm) _W(((cc) << 28) | (0xe << 24) | (0x2 << 20) | (0xa << 8) | (0x0 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm)) +#define VMUL32(Sd,Sn,Sm) CC_VMUL32(NATIVE_CC_AL,Sd,Sn,Sm) + +#define CC_VDIV64(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0xe << 24) | (0x8 << 20) | (0xb << 8) | (0x0 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm)) +#define VDIV64(Dd,Dn,Dm) CC_VDIV64(NATIVE_CC_AL,Dd,Dn,Dm) +#define CC_VDIV32(cc,Sd,Sn,Sm) _W(((cc) << 28) | (0xe << 24) | (0x8 << 20) | (0xa << 8) | (0x0 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm)) +#define VDIV32(Sd,Sn,Sm) CC_VDIV32(NATIVE_CC_AL,Sd,Sn,Sm) + +#define CC_VABS64(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm)) +#define VABS64(Dd,Dm) CC_VABS64(NATIVE_CC_AL,Dd,Dm) +#define CC_VABS32(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) +#define VABS32(Sd,Sm) CC_VABS32(NATIVE_CC_AL,Sd,Sm) + +#define CC_VNEG64(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm)) +#define VNEG64(Dd,Dm) CC_VNEG64(NATIVE_CC_AL,Dd,Dm) +#define CC_VNEG32(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) +#define VNEG32(Sd,Sm) CC_VNEG32(NATIVE_CC_AL,Sd,Sm) + +#define CC_VSQRT64(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm)) +#define VSQRT64(Dd,Dm) CC_VSQRT64(NATIVE_CC_AL,Dd,Dm) +#define CC_VSQRT32(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) +#define VSQRT32(Sd,Sm) CC_VSQRT32(NATIVE_CC_AL,Sd,Sm) + +#define CC_VCMP64(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x4 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm)) +#define VCMP64(Dd,Dm) CC_VCMP64(NATIVE_CC_AL,Dd,Dm) +#define CC_VCMP32(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x4 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) +#define VCMP32(Sd,Sm) CC_VCMP32(NATIVE_CC_AL,Sd,Sm) + +#define CC_VCMP64_0(cc,Dd) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x5 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd)) +#define VCMP64_0(Dd) CC_VCMP64_0(NATIVE_CC_AL,Dd) + +#define CC_VTST64(cc,Dd) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x5 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd)) +#define VTST64(Dd) CC_VTST64(NATIVE_CC_AL,Dd) +#define CC_VTST32(cc,Sd) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x5 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd)) +#define VTST32(Sd) CC_VTST32(NATIVE_CC_AL,Sd) + +#define CC_VMRS(cc,Rt) _W(((cc) << 28) | (0xe << 24) | (0xf << 20) | (0x1 << 16) | (Rt << 12) | (0xa << 8) | (0x1 << 4)) +#define VMRS(Rt) CC_VMRS(NATIVE_CC_AL,Rt) + +#define CC_VMSR(cc,Rt) _W(((cc) << 28) | (0xe << 24) | (0xe << 20) | (0x1 << 16) | (Rt << 12) | (0xa << 8) | (0x1 << 4)) +#define VMSR(Rt) CC_VMSR(NATIVE_CC_AL,Rt) + +#define CC_VMOV64_i(cc,Dd,imm4H,imm4L) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (imm4H << 16) | (0xb << 8) | (imm4L) | MAKE_Dd(Dd)) +#define VMOV64_i(Dd,imm4H,imm4L) CC_VMOV64_i(NATIVE_CC_AL,Dd,imm4H,imm4L) + +// Floatingpoint used by non FPU JIT #define CC_VMOV_sr(cc,Sd,Rn) _W(((cc) << 28) | (0x70 << 21) | (0 << 20) | (Sd << 16) | (Rn << 12) | (0x0a << 8) | (0x10)) #define VMOV_sr(Sd,Rn) CC_VMOV_sr(NATIVE_CC_AL,Sd,Rn) @@ -1352,4 +1481,5 @@ enum { #define CC_VDIV_ddd(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0x1d << 23) | (0x0 << 20) | (Dn << 16) | (Dd << 12) | (0xb << 8) | (0x0 << 4) | (Dm)) #define VDIV_ddd(Dd,Dn,Dm) CC_VDIV_ddd(NATIVE_CC_AL,Dd,Dn,Dm) + #endif /* ARM_RTASM_H */ diff --git a/src/jit/compemu.cpp b/src/jit/compemu.cpp index 07c1ba4e..881fdc4d 100644 --- a/src/jit/compemu.cpp +++ b/src/jit/compemu.cpp @@ -16874,10 +16874,7 @@ uae_u32 REGPARAM2 op_50c8_0_comp_ff(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -17646,15 +17643,12 @@ uae_u32 REGPARAM2 op_51c8_0_comp_ff(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; sub_w_ri(src, 1); { uae_u32 v2; uae_u32 v1=get_const(PC_P); v2=get_const(offs); register_branch(v1, v2, 3); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -17805,15 +17799,12 @@ uae_u32 REGPARAM2 op_52c8_0_comp_ff(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 8); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -17971,15 +17962,12 @@ uae_u32 REGPARAM2 op_53c8_0_comp_ff(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 9); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -18137,15 +18125,12 @@ uae_u32 REGPARAM2 op_54c8_0_comp_ff(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 3); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -18303,15 +18288,12 @@ uae_u32 REGPARAM2 op_55c8_0_comp_ff(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 2); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -18469,15 +18451,12 @@ uae_u32 REGPARAM2 op_56c8_0_comp_ff(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 1); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -18635,15 +18614,12 @@ uae_u32 REGPARAM2 op_57c8_0_comp_ff(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 0); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -18801,15 +18777,12 @@ uae_u32 REGPARAM2 op_58c8_0_comp_ff(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 7); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -18967,15 +18940,12 @@ uae_u32 REGPARAM2 op_59c8_0_comp_ff(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 6); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -19133,15 +19103,12 @@ uae_u32 REGPARAM2 op_5ac8_0_comp_ff(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 5); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -19299,15 +19266,12 @@ uae_u32 REGPARAM2 op_5bc8_0_comp_ff(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 4); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -19465,15 +19429,12 @@ uae_u32 REGPARAM2 op_5cc8_0_comp_ff(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 10); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -19631,15 +19592,12 @@ uae_u32 REGPARAM2 op_5dc8_0_comp_ff(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 11); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -19797,15 +19755,12 @@ uae_u32 REGPARAM2 op_5ec8_0_comp_ff(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 12); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -19963,15 +19918,12 @@ uae_u32 REGPARAM2 op_5fc8_0_comp_ff(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 13); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -31754,6 +31706,395 @@ uae_u32 REGPARAM2 op_e7f9_0_comp_ff(uae_u32 opcode) }}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } +/* FPP.L #.W,Dn */ +uae_u32 REGPARAM2 op_f200_0_comp_ff(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,An */ +uae_u32 REGPARAM2 op_f208_0_comp_ff(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,(An) */ +uae_u32 REGPARAM2 op_f210_0_comp_ff(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,(An)+ */ +uae_u32 REGPARAM2 op_f218_0_comp_ff(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,-(An) */ +uae_u32 REGPARAM2 op_f220_0_comp_ff(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,(d16,An) */ +uae_u32 REGPARAM2 op_f228_0_comp_ff(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,(d8,An,Xn) */ +uae_u32 REGPARAM2 op_f230_0_comp_ff(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,(xxx).W */ +uae_u32 REGPARAM2 op_f238_0_comp_ff(uae_u32 opcode) +{ + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,(xxx).L */ +uae_u32 REGPARAM2 op_f239_0_comp_ff(uae_u32 opcode) +{ + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,(d16,PC) */ +uae_u32 REGPARAM2 op_f23a_0_comp_ff(uae_u32 opcode) +{ + uae_s32 dstreg = 2; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,(d8,PC,Xn) */ +uae_u32 REGPARAM2 op_f23b_0_comp_ff(uae_u32 opcode) +{ + uae_s32 dstreg = 3; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,#.L */ +uae_u32 REGPARAM2 op_f23c_0_comp_ff(uae_u32 opcode) +{ + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FScc.L #.W,Dn */ +uae_u32 REGPARAM2 op_f240_0_comp_ff(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fscc_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FScc.L #.W,(An) */ +uae_u32 REGPARAM2 op_f250_0_comp_ff(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fscc_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FScc.L #.W,(An)+ */ +uae_u32 REGPARAM2 op_f258_0_comp_ff(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fscc_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FScc.L #.W,-(An) */ +uae_u32 REGPARAM2 op_f260_0_comp_ff(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fscc_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FScc.L #.W,(d16,An) */ +uae_u32 REGPARAM2 op_f268_0_comp_ff(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fscc_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FScc.L #.W,(d8,An,Xn) */ +uae_u32 REGPARAM2 op_f270_0_comp_ff(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fscc_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FScc.L #.W,(xxx).W */ +uae_u32 REGPARAM2 op_f278_0_comp_ff(uae_u32 opcode) +{ + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fscc_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FScc.L #.W,(xxx).L */ +uae_u32 REGPARAM2 op_f279_0_comp_ff(uae_u32 opcode) +{ + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fscc_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FBccQ.L #,#.W */ +uae_u32 REGPARAM2 op_f280_0_comp_ff(uae_u32 opcode) +{ + uae_s32 srcreg = (opcode & 63); + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + comp_fbcc_opp(opcode); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FBccQ.L #,#.L */ +uae_u32 REGPARAM2 op_f2c0_0_comp_ff(uae_u32 opcode) +{ + uae_s32 srcreg = (opcode & 63); + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + comp_fbcc_opp(opcode); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} /* MOVE16.L (An)+,(xxx).L */ uae_u32 REGPARAM2 op_f600_0_comp_ff(uae_u32 opcode) { @@ -47591,10 +47932,7 @@ uae_u32 REGPARAM2 op_50c8_0_comp_nf(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -48315,15 +48653,12 @@ uae_u32 REGPARAM2 op_51c8_0_comp_nf(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; sub_w_ri(src, 1); { uae_u32 v2; uae_u32 v1=get_const(PC_P); v2=get_const(offs); register_branch(v1, v2, 3); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -48474,15 +48809,12 @@ uae_u32 REGPARAM2 op_52c8_0_comp_nf(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 8); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -48640,15 +48972,12 @@ uae_u32 REGPARAM2 op_53c8_0_comp_nf(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 9); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -48806,15 +49135,12 @@ uae_u32 REGPARAM2 op_54c8_0_comp_nf(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 3); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -48972,15 +49298,12 @@ uae_u32 REGPARAM2 op_55c8_0_comp_nf(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 2); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -49138,15 +49461,12 @@ uae_u32 REGPARAM2 op_56c8_0_comp_nf(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 1); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -49304,15 +49624,12 @@ uae_u32 REGPARAM2 op_57c8_0_comp_nf(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 0); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -49470,15 +49787,12 @@ uae_u32 REGPARAM2 op_58c8_0_comp_nf(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 7); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -49636,15 +49950,12 @@ uae_u32 REGPARAM2 op_59c8_0_comp_nf(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 6); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -49802,15 +50113,12 @@ uae_u32 REGPARAM2 op_5ac8_0_comp_nf(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 5); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -49968,15 +50276,12 @@ uae_u32 REGPARAM2 op_5bc8_0_comp_nf(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 4); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -50134,15 +50439,12 @@ uae_u32 REGPARAM2 op_5cc8_0_comp_nf(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 10); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -50300,15 +50602,12 @@ uae_u32 REGPARAM2 op_5dc8_0_comp_nf(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 11); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -50466,15 +50765,12 @@ uae_u32 REGPARAM2 op_5ec8_0_comp_nf(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 12); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -50632,15 +50928,12 @@ uae_u32 REGPARAM2 op_5fc8_0_comp_nf(uae_u32 opcode) arm_ADD_l_ri(offs, m68k_pc_offset); arm_ADD_l_ri(PC_P, m68k_pc_offset); m68k_pc_offset=0; -{ int nsrc = scratchie++; make_flags_live(); uae_u32 v1=get_const(PC_P); uae_u32 v2=get_const(offs); jff_DBCC(src, 13); register_branch(v1, v2, 2); - if(srcreg!=src) - mov_w_rr(srcreg,src); -}}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); +}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } /* Scc.B (An) */ @@ -61782,6 +62075,395 @@ uae_u32 REGPARAM2 op_e7f9_0_comp_nf(uae_u32 opcode) }}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); return 0; } +/* FPP.L #.W,Dn */ +uae_u32 REGPARAM2 op_f200_0_comp_nf(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,An */ +uae_u32 REGPARAM2 op_f208_0_comp_nf(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,(An) */ +uae_u32 REGPARAM2 op_f210_0_comp_nf(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,(An)+ */ +uae_u32 REGPARAM2 op_f218_0_comp_nf(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,-(An) */ +uae_u32 REGPARAM2 op_f220_0_comp_nf(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,(d16,An) */ +uae_u32 REGPARAM2 op_f228_0_comp_nf(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,(d8,An,Xn) */ +uae_u32 REGPARAM2 op_f230_0_comp_nf(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,(xxx).W */ +uae_u32 REGPARAM2 op_f238_0_comp_nf(uae_u32 opcode) +{ + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,(xxx).L */ +uae_u32 REGPARAM2 op_f239_0_comp_nf(uae_u32 opcode) +{ + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,(d16,PC) */ +uae_u32 REGPARAM2 op_f23a_0_comp_nf(uae_u32 opcode) +{ + uae_s32 dstreg = 2; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,(d8,PC,Xn) */ +uae_u32 REGPARAM2 op_f23b_0_comp_nf(uae_u32 opcode) +{ + uae_s32 dstreg = 3; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FPP.L #.W,#.L */ +uae_u32 REGPARAM2 op_f23c_0_comp_nf(uae_u32 opcode) +{ + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fpp_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FScc.L #.W,Dn */ +uae_u32 REGPARAM2 op_f240_0_comp_nf(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fscc_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FScc.L #.W,(An) */ +uae_u32 REGPARAM2 op_f250_0_comp_nf(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fscc_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FScc.L #.W,(An)+ */ +uae_u32 REGPARAM2 op_f258_0_comp_nf(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fscc_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FScc.L #.W,-(An) */ +uae_u32 REGPARAM2 op_f260_0_comp_nf(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fscc_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FScc.L #.W,(d16,An) */ +uae_u32 REGPARAM2 op_f268_0_comp_nf(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fscc_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FScc.L #.W,(d8,An,Xn) */ +uae_u32 REGPARAM2 op_f270_0_comp_nf(uae_u32 opcode) +{ + uae_u32 dstreg = opcode & 7; + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fscc_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FScc.L #.W,(xxx).W */ +uae_u32 REGPARAM2 op_f278_0_comp_nf(uae_u32 opcode) +{ + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fscc_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FScc.L #.W,(xxx).L */ +uae_u32 REGPARAM2 op_f279_0_comp_nf(uae_u32 opcode) +{ + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); + comp_fscc_opp(opcode,extra); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FBccQ.L #,#.W */ +uae_u32 REGPARAM2 op_f280_0_comp_nf(uae_u32 opcode) +{ + uae_s32 srcreg = (opcode & 63); + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + comp_fbcc_opp(opcode); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} +/* FBccQ.L #,#.L */ +uae_u32 REGPARAM2 op_f2c0_0_comp_nf(uae_u32 opcode) +{ + uae_s32 srcreg = (opcode & 63); + uae_u32 dodgy=0; + uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; + m68k_pc_offset+=2; +{ uae_u8 scratchie=S1; +#ifdef USE_JIT_FPU + comp_fbcc_opp(opcode); +#else + failure = 1; +#endif +} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); + if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; +return 0; +} /* MOVE16.L (An)+,(xxx).L */ uae_u32 REGPARAM2 op_f600_0_comp_nf(uae_u32 opcode) { diff --git a/src/jit/compemu.h b/src/jit/compemu.h index 247dfed4..fac4bcf9 100644 --- a/src/jit/compemu.h +++ b/src/jit/compemu.h @@ -90,7 +90,7 @@ typedef union { #define BYTES_PER_INST 10240 /* paranoid ;-) */ #if defined(CPU_arm) -#define LONGEST_68K_INST 256 /* The number of bytes the longest possible +#define LONGEST_68K_INST 128 /* The number of bytes the longest possible 68k instruction takes */ #else #define LONGEST_68K_INST 16 /* The number of bytes the longest possible @@ -127,7 +127,8 @@ typedef union { #else #define N_REGS 8 /* really only 7, but they are numbered 0,1,2,3,5,6,7 */ #endif -#define N_FREGS 6 /* That leaves us two positions on the stack to play with */ +#define N_FREGS 16 // We use 16 regs: 0 - FP_RESULT, 1-3 - SCRATCH, 4-7 - ???, 8-15 - Amiga regs FP0-FP7 + /* Functions exposed to newcpu, or to what was moved from newcpu.c to * compemu_support.c */ @@ -151,11 +152,21 @@ extern int check_for_cache_miss(void); #define scaled_cycles(x) (currprefs.m68k_speed<0?(((x)/SCALE)?(((x)/SCALE +#include #include "sysconfig.h" #include "sysdeps.h" @@ -18,41 +18,794 @@ #include "custom.h" #include "newcpu.h" #include "compemu.h" +#include "flags_arm.h" #if defined(JIT) +extern void fpp_to_exten(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3); + +static const int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 }; +static const int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 }; + +/* return the required floating point precision or -1 for failure, 0=E, 1=S, 2=D */ +STATIC_INLINE int comp_fp_get (uae_u32 opcode, uae_u16 extra, int treg) +{ + int reg = opcode & 7; + int mode = (opcode >> 3) & 7; + int size = (extra >> 10) & 7; + + if ((size == 2 && (mode != 7 || reg != 4)) || size == 3 || size == 7) /* 2 = long double, 3 = packed decimal, 7 is not defined */ + return -1; + switch (mode) { + case 0: /* Dn */ + switch (size) { + case 0: /* Long */ + fmov_l_rr (treg, reg); + return 2; + case 1: /* Single */ + fmov_s_rr (treg, reg); + return 1; + case 4: /* Word */ + fmov_w_rr (treg, reg); + return 1; + case 6: /* Byte */ + fmov_b_rr (treg, reg); + return 1; + default: + return -1; + } + case 1: /* An, invalid mode */ + return -1; + case 2: /* (An) */ + mov_l_rr (S1, reg + 8); + break; + case 3: /* (An)+ */ + mov_l_rr (S1, reg + 8); + arm_ADD_l_ri8(reg + 8, (reg == 7 ? sz2[size] : sz1[size])); + break; + case 4: /* -(An) */ + arm_SUB_l_ri8(reg + 8, (reg == 7 ? sz2[size] : sz1[size])); + mov_l_rr (S1, reg + 8); + break; + case 5: /* (d16,An) */ + { + uae_u32 off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2); + mov_l_rr (S1, reg + 8); + lea_l_brr (S1, S1, off); + break; + } + case 6: /* (d8,An,Xn) or (bd,An,Xn) or ([bd,An,Xn],od) or ([bd,An],Xn,od) */ + { + uae_u32 dp = comp_get_iword ((m68k_pc_offset += 2) - 2); + calc_disp_ea_020 (reg + 8, dp, S1, S2); + break; + } + case 7: + switch (reg) { + case 0: /* (xxx).W */ + { + uae_u32 off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2); + mov_l_ri (S1, off); + break; + } + case 1: /* (xxx).L */ + { + uae_u32 off = comp_get_ilong ((m68k_pc_offset += 4) - 4); + mov_l_ri (S1, off); + break; + } + case 2: /* (d16,PC) */ + { + uae_u32 address = start_pc + ((uae_char*) comp_pc_p - (uae_char*) start_pc_p) + + m68k_pc_offset; + uae_s32 PC16off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2); + mov_l_ri (S1, address + PC16off); + break; + } + case 3: /* (d8,PC,Xn) or (bd,PC,Xn) or ([bd,PC,Xn],od) or ([bd,PC],Xn,od) */ + return -1; /* rarely used, fallback to non-JIT */ + case 4: /* # < data >; Constants should be converted just once by the JIT */ + m68k_pc_offset += sz2[size]; + switch (size) { + case 0: + { + uae_s32 li = comp_get_ilong(m68k_pc_offset - 4); + float si = (float)li; + + if (li == (int)si) { + //write_log ("converted immediate LONG constant to SINGLE\n"); + fmov_s_ri(treg, *(uae_u32 *)&si); + return 1; + } + //write_log ("immediate LONG constant\n"); + fmov_l_ri(treg, *(uae_u32 *)&li); + return 2; + } + case 1: + //write_log (_T("immediate SINGLE constant\n")); + fmov_s_ri(treg, comp_get_ilong(m68k_pc_offset - 4)); + return 1; + case 2: + { + //write_log (_T("immediate LONG DOUBLE constant\n")); + uae_u32 wrd1, wrd2, wrd3; + fpdata tmp; + wrd3 = comp_get_ilong(m68k_pc_offset - 4); + wrd2 = comp_get_ilong(m68k_pc_offset - 8); + wrd1 = comp_get_iword(m68k_pc_offset - 12) << 16; + fpp_to_exten(&tmp, wrd1, wrd2, wrd3); + mov_l_ri(S1, ((uae_u32*)&tmp)[0]); + mov_l_ri(S2, ((uae_u32*)&tmp)[1]); + fmov_d_rrr (treg, S1, S2); + return 0; + } + case 4: + { + float si = (float)(uae_s16)comp_get_iword(m68k_pc_offset-2); + + //write_log (_T("converted immediate WORD constant %f to SINGLE\n"), si); + fmov_s_ri(treg, *(uae_u32 *)&si); + return 1; + } + case 5: + { + //write_log (_T("immediate DOUBLE constant\n")); + mov_l_ri(S1, comp_get_ilong(m68k_pc_offset - 4)); + mov_l_ri(S2, comp_get_ilong(m68k_pc_offset - 8)); + fmov_d_rrr (treg, S1, S2); + return 2; + } + case 6: + { + float si = (float)(uae_s8)comp_get_ibyte(m68k_pc_offset - 2); + + //write_log (_T("converted immediate BYTE constant to SINGLE\n")); + fmov_s_ri(treg, *(uae_u32 *)&si); + return 1; + } + default: /* never reached */ + return -1; + } + default: /* never reached */ + return -1; + } + } + + switch (size) { + case 0: /* Long */ + readlong (S1, S2, S3); + fmov_l_rr (treg, S2); + return 2; + case 1: /* Single */ + readlong (S1, S2, S3); + fmov_s_rr (treg, S2); + return 1; + case 4: /* Word */ + readword (S1, S2, S3); + fmov_w_rr (treg, S2); + return 1; + case 5: /* Double */ + readlong (S1, S2, S3); + add_l_ri (S1, 4); + readlong (S1, S4, S3); + fmov_d_rrr (treg, S4, S2); + return 2; + case 6: /* Byte */ + readbyte (S1, S2, S3); + fmov_b_rr (treg, S2); + return 1; + default: + return -1; + } + return -1; +} + +/* return of -1 means failure, >=0 means OK */ +STATIC_INLINE int comp_fp_put (uae_u32 opcode, uae_u16 extra) +{ + int reg = opcode & 7; + int sreg = (extra >> 7) & 7; + int mode = (opcode >> 3) & 7; + int size = (extra >> 10) & 7; + + if (size == 2 || size == 3 || size == 7) /* 2 = long double, 3 = packed decimal, 7 is not defined */ + return -1; + switch (mode) { + case 0: /* Dn */ + switch (size) { + case 0: /* FMOVE.L FPx, Dn */ + fmov_to_l_rr(reg, sreg); + return 0; + case 1: /* FMOVE.S FPx, Dn */ + fmov_to_s_rr(reg, sreg); + return 0; + case 4: /* FMOVE.W FPx, Dn */ + fmov_to_w_rr(reg, sreg); + return 0; + case 6: /* FMOVE.B FPx, Dn */ + fmov_to_b_rr(reg, sreg); + return 0; + default: + return -1; + } + case 1: /* An, invalid mode */ + return -1; + case 2: /* (An) */ + mov_l_rr (S1, reg + 8); + break; + case 3: /* (An)+ */ + mov_l_rr (S1, reg + 8); + arm_ADD_l_ri8(reg + 8, (reg == 7 ? sz2[size] : sz1[size])); + break; + case 4: /* -(An) */ + arm_SUB_l_ri8(reg + 8, (reg == 7 ? sz2[size] : sz1[size])); + mov_l_rr (S1, reg + 8); + break; + case 5: /* (d16,An) */ + { + uae_u32 off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2); + mov_l_rr (S1, reg + 8); + add_l_ri (S1, off); + break; + } + case 6: /* (d8,An,Xn) or (bd,An,Xn) or ([bd,An,Xn],od) or ([bd,An],Xn,od) */ + { + uae_u32 dp = comp_get_iword ((m68k_pc_offset += 2) - 2); + calc_disp_ea_020 (reg + 8, dp, S1, S2); + break; + } + case 7: + switch (reg) { + case 0: /* (xxx).W */ + { + uae_u32 off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2); + mov_l_ri (S1, off); + break; + } + case 1: /* (xxx).L */ + { + uae_u32 off = comp_get_ilong ((m68k_pc_offset += 4) - 4); + mov_l_ri (S1, off); + break; + } + default: /* All other modes are not allowed for FPx to */ + write_log (_T ("JIT FMOVE FPx, Mode is not allowed %04x %04x\n"), opcode, extra); + return -1; + } + } + switch (size) { + case 0: /* Long */ + fmov_to_l_rr(S2, sreg); + writelong_clobber (S1, S2, S3); + return 0; + case 1: /* Single */ + fmov_to_s_rr(S2, sreg); + writelong_clobber (S1, S2, S3); + return 0; + case 4: /* Word */ + fmov_to_w_rr(S2, sreg); + writeword (S1, S2, S3); + return 0; + case 5: /* Double */ + fmov_to_d_rrr(S2, S3, sreg); + writelong_clobber (S1, S3, S4); + add_l_ri (S1, 4); + writelong_clobber (S1, S2, S4); + return 0; + case 6: /* Byte */ + fmov_to_b_rr(S2, sreg); + writebyte (S1, S2, S3); + return 0; + default: + return -1; + } + return -1; +} + +/* return -1 for failure, or register number for success */ +STATIC_INLINE int comp_fp_adr (uae_u32 opcode) +{ + uae_s32 off; + int mode = (opcode >> 3) & 7; + int reg = opcode & 7; + + switch (mode) { + case 2: + case 3: + case 4: + mov_l_rr (S1, 8 + reg); + return S1; + case 5: + off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2); + mov_l_rr (S1, 8 + reg); + add_l_ri (S1, off); + return S1; + case 7: + switch (reg) { + case 0: + off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2); + mov_l_ri (S1, off); + return S1; + case 1: + off = comp_get_ilong ((m68k_pc_offset += 4) - 4); + mov_l_ri (S1, off); + return S1; + } + default: + return -1; + } +} + void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra) { - printf("comp_fdbcc_opp not yet implemented\n"); + FAIL (1); + return; } void comp_fscc_opp (uae_u32 opcode, uae_u16 extra) { - printf("comp_fscc_opp not yet implemented\n"); + //printf("comp_fscc_opp() called (0x%04x, 0x%04x)\n", opcode, extra); + if (!currprefs.compfpu) { + FAIL (1); + return; + } + + FAIL (1); + return; } void comp_ftrapcc_opp (uae_u32 opcode, uaecptr oldpc) { - printf("comp_ftrapcc_opp not yet implemented\n"); + FAIL (1); + return; } void comp_fbcc_opp (uae_u32 opcode) { - printf("comp_fbcc_opp not yet implemented\n"); + uae_u32 start_68k_offset = m68k_pc_offset; + uae_u32 off, v1, v2; + int cc; + + if (!currprefs.compfpu) { + FAIL (1); + return; + } + + if (opcode & 0x20) { /* only cc from 00 to 1f are defined */ + FAIL (1); + return; + } + if (!(opcode & 0x40)) { + off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2); + } + else { + off = comp_get_ilong ((m68k_pc_offset += 4) - 4); + } + + /* according to fpp.c, the 0x10 bit is ignored + (it handles exception handling, which we don't + do, anyway ;-) */ + cc = opcode & 0x0f; + if(cc == 0) + return; /* jump never */ + + /* Note, "off" will sometimes be (unsigned) "negative", so the following + * uintptr can be > 0xffffffff, but the result will be correct due to + * wraparound when truncated to 32 bit in the call to mov_l_ri. */ + mov_l_ri(S1, (uintptr) + (comp_pc_p + off - (m68k_pc_offset - start_68k_offset))); + mov_l_ri(PC_P, (uintptr) comp_pc_p); + + /* Now they are both constant. Might as well fold in m68k_pc_offset */ + add_l_ri (S1, m68k_pc_offset); + add_l_ri (PC_P, m68k_pc_offset); + m68k_pc_offset = 0; + + v1 = get_const (PC_P); + v2 = get_const (S1); + fflags_into_flags (); + + switch (cc) { + case 1: register_branch (v1, v2, NATIVE_CC_EQ); break; + case 2: register_branch (v1, v2, NATIVE_CC_F_OGT); break; + case 3: register_branch (v1, v2, NATIVE_CC_F_OGE); break; + case 4: register_branch (v1, v2, NATIVE_CC_F_OLT); break; + case 5: register_branch (v1, v2, NATIVE_CC_F_OLE); break; + case 6: register_branch (v1, v2, NATIVE_CC_F_OGL); break; + case 7: register_branch (v1, v2, NATIVE_CC_F_OR); break; + case 8: register_branch (v1, v2, NATIVE_CC_F_UN); break; + case 9: register_branch (v1, v2, NATIVE_CC_F_UEQ); break; + case 10: register_branch (v1, v2, NATIVE_CC_F_UGT); break; + case 11: register_branch (v1, v2, NATIVE_CC_F_UGE); break; + case 12: register_branch (v1, v2, NATIVE_CC_F_ULT); break; + case 13: register_branch (v1, v2, NATIVE_CC_F_ULE); break; + case 14: register_branch (v1, v2, NATIVE_CC_NE); break; + case 15: register_branch (v2, v2, NATIVE_CC_AL); break; + } } void comp_fsave_opp (uae_u32 opcode) { - printf("comp_fsave_opp not yet implemented\n"); + FAIL (1); + return; } void comp_frestore_opp (uae_u32 opcode) { - printf("comp_frestore_opp not yet implemented\n"); + FAIL (1); + return; } +static uae_u32 dhex_pi[] ={0x54442D18, 0x400921FB}; +static uae_u32 dhex_exp_1[] ={0x8B145769, 0x4005BF0A}; +static uae_u32 dhex_l2_e[] ={0x652B82FE, 0x3FF71547}; +static uae_u32 dhex_ln_2[] ={0xFEFA39EF, 0x3FE62E42}; +static uae_u32 dhex_ln_10[] ={0xBBB55516, 0x40026BB1}; +static uae_u32 dhex_l10_2[] ={0x509F79FF, 0x3FD34413}; +static uae_u32 dhex_l10_e[] ={0x1526E50E, 0x3FDBCB7B}; +static uae_u32 dhex_1e16[] ={0x37E08000, 0x4341C379}; +static uae_u32 dhex_1e32[] ={0xB5056E17, 0x4693B8B5}; +static uae_u32 dhex_1e64[] ={0xE93FF9F5, 0x4D384F03}; +static uae_u32 dhex_1e128[] ={0xF9301D32, 0x5A827748}; +static uae_u32 dhex_1e256[] ={0x7F73BF3C, 0x75154FDD}; +static uae_u32 dhex_inf[] ={0x00000000, 0x7ff00000}; +static uae_u32 dhex_nan[] ={0xffffffff, 0x7fffffff}; +extern double fp_1e8; + void comp_fpp_opp (uae_u32 opcode, uae_u16 extra) { - printf("comp_fpp_opp not yet implemented\n"); + int reg; + int sreg, prec = 0; + int dreg = (extra >> 7) & 7; + int source = (extra >> 13) & 7; + int opmode = extra & 0x7f; + + if (!currprefs.compfpu) { + FAIL (1); + return; + } + switch (source) { + case 3: /* FMOVE FPx, */ + if (comp_fp_put (opcode, extra) < 0) + FAIL (1); + return; + case 4: /* FMOVE.L , ControlReg */ + if (!(opcode & 0x30)) { /* Dn or An */ + if (extra & 0x1000) { /* FPCR */ + mov_l_mr (uae_p32(®s.fpcr), opcode & 15); + return; + } + if (extra & 0x0800) { /* FPSR */ + FAIL (1); + return; + // set_fpsr(m68k_dreg (regs, opcode & 15)); + } + if (extra & 0x0400) { /* FPIAR */ + mov_l_mr (uae_p32(®s.fpiar), opcode & 15); return; + } + } + else if ((opcode & 0x3f) == 0x3c) { + if (extra & 0x1000) { /* FPCR */ + uae_u32 val = comp_get_ilong ((m68k_pc_offset += 4) - 4); + mov_l_mi (uae_p32(®s.fpcr), val); + return; + } + if (extra & 0x0800) { /* FPSR */ + FAIL (1); + return; + } + if (extra & 0x0400) { /* FPIAR */ + uae_u32 val = comp_get_ilong ((m68k_pc_offset += 4) - 4); + mov_l_mi (uae_p32(®s.fpiar), val); + return; + } + } + FAIL (1); + return; + case 5: /* FMOVE.L ControlReg, */ + if (!(opcode & 0x30)) { /* Dn or An */ + if (extra & 0x1000) { /* FPCR */ + mov_l_rm (opcode & 15, uae_p32(®s.fpcr)); return; + } + if (extra & 0x0800) { /* FPSR */ + FAIL (1); + return; + } + if (extra & 0x0400) { /* FPIAR */ + mov_l_rm (opcode & 15, uae_p32(®s.fpiar)); return; + } + } + FAIL (1); + return; + case 6: + case 7: + FAIL (1); + return; + case 2: /* from to FPx */ + dont_care_fflags (); + if ((extra & 0xfc00) == 0x5c00) { /* FMOVECR */ + //write_log (_T("JIT FMOVECR %x\n"), opmode); + switch (opmode) { + case 0x00: + fmov_d_rm (dreg, uae_p32(&dhex_pi)); + break; + case 0x0b: + fmov_d_rm (dreg, uae_p32(&dhex_l10_2)); + break; + case 0x0c: + fmov_d_rm (dreg, uae_p32(&dhex_exp_1)); + break; + case 0x0d: + fmov_d_rm (dreg, uae_p32(&dhex_l2_e)); + break; + case 0x0e: + fmov_d_rm (dreg, uae_p32(&dhex_l10_e)); + break; + case 0x0f: + fmov_d_ri_0 (dreg); + break; + case 0x30: + fmov_d_rm (dreg, uae_p32(&dhex_ln_2)); + break; + case 0x31: + fmov_d_rm (dreg, uae_p32(&dhex_ln_10)); + break; + case 0x32: + fmov_d_ri_1 (dreg); + break; + case 0x33: + fmov_d_ri_10 (dreg); + break; + case 0x34: + fmov_d_ri_100 (dreg); + break; + case 0x35: + fmov_l_ri (dreg, 10000); + break; + case 0x36: + fmov_rm (dreg, uae_p32(&fp_1e8)); + break; + case 0x37: + fmov_d_rm (dreg, uae_p32(&dhex_1e16)); + break; + case 0x38: + fmov_d_rm (dreg, uae_p32(&dhex_1e32)); + break; + case 0x39: + fmov_d_rm (dreg, uae_p32(&dhex_1e64)); + break; + case 0x3a: + fmov_d_rm (dreg, uae_p32(&dhex_1e128)); + break; + case 0x3b: + fmov_d_rm (dreg, uae_p32(&dhex_1e256)); + break; + default: + FAIL (1); + return; + } + fmov_rr (FP_RESULT, dreg); + return; + } + if (opmode & 0x20) /* two operands, so we need a scratch reg */ + sreg = FS1; + else /* one operand only, thus we can load the argument into dreg */ + sreg = dreg; + if ((prec = comp_fp_get (opcode, extra, sreg)) < 0) { + FAIL (1); + return; + } + if (!opmode) { /* FMOVE ,FPx */ + fmov_rr (FP_RESULT, dreg); + return; + } + /* no break here for to dreg */ + case 0: /* directly from sreg to dreg */ + if (!source) { /* no */ + dont_care_fflags (); + sreg = (extra >> 10) & 7; + } + switch (opmode) { + case 0x00: /* FMOVE */ + fmov_rr (dreg, sreg); + break; + case 0x01: /* FINT */ + frndint_rr (dreg, sreg); + break; + case 0x02: /* FSINH */ + ffunc_rr (sinh, dreg, sreg); + break; + case 0x03: /* FINTRZ */ + frndintz_rr (dreg, sreg); + break; + case 0x04: /* FSQRT */ + fsqrt_rr (dreg, sreg); + break; + case 0x06: /* FLOGNP1 */ + ffunc_rr (log1p, dreg, sreg); + break; + case 0x08: /* FETOXM1 */ + ffunc_rr (expm1, dreg, sreg); + break; + case 0x09: /* FTANH */ + ffunc_rr (tanh, dreg, sreg); + break; + case 0x0a: /* FATAN */ + ffunc_rr (atan, dreg, sreg); + break; + case 0x0c: /* FASIN */ + ffunc_rr (asin, dreg, sreg); + break; + case 0x0d: /* FATANH */ + ffunc_rr (atanh, dreg, sreg); + break; + case 0x0e: /* FSIN */ + ffunc_rr (sin, dreg, sreg); + break; + case 0x0f: /* FTAN */ + ffunc_rr (tan, dreg, sreg); + break; + case 0x10: /* FETOX */ + ffunc_rr (exp, dreg, sreg); + break; + case 0x11: /* FTWOTOX */ + fpowx_rr (2, dreg, sreg); + break; + case 0x12: /* FTENTOX */ + fpowx_rr (10, dreg, sreg); + break; + case 0x14: /* FLOGN */ + ffunc_rr (log, dreg, sreg); + break; + case 0x15: /* FLOG10 */ + ffunc_rr (log10, dreg, sreg); + break; + case 0x16: /* FLOG2 */ + ffunc_rr (log2, dreg, sreg); + break; + case 0x18: /* FABS */ + fabs_rr (dreg, sreg); + break; + case 0x19: /* FCOSH */ + ffunc_rr (cosh, dreg, sreg); + break; + case 0x1a: /* FNEG */ + fneg_rr (dreg, sreg); + break; + case 0x1c: /* FACOS */ + ffunc_rr (acos, dreg, sreg); + break; + case 0x1d: /* FCOS */ + ffunc_rr (cos, dreg, sreg); + break; + case 0x20: /* FDIV */ + fdiv_rr (dreg, sreg); + break; + case 0x21: /* FMOD */ + fmod_rr (dreg, sreg); + break; + case 0x22: /* FADD */ + fadd_rr (dreg, sreg); + break; + case 0x23: /* FMUL */ + fmul_rr (dreg, sreg); + break; + case 0x24: /* FSGLDIV */ + fsgldiv_rr (dreg, sreg); + break; + case 0x60: /* FSDIV */ + fdiv_rr (dreg, sreg); + if (!currprefs.fpu_strict) /* faster, but less strict rounding */ + break; + fcuts_r (dreg); + break; + case 0x25: /* FREM */ + frem1_rr (dreg, sreg); + break; + case 0x27: /* FSGLMUL */ + fsglmul_rr (dreg, sreg); + break; + case 0x63: /* FSMUL */ + fmul_rr (dreg, sreg); + if (!currprefs.fpu_strict) /* faster, but less strict rounding */ + break; + fcuts_r (dreg); + break; + case 0x28: /* FSUB */ + fsub_rr (dreg, sreg); + break; + case 0x30: /* FSINCOS */ + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + if (dreg == (extra & 7)) + ffunc_rr (sin, dreg, sreg); + else + fsincos_rr (dreg, extra & 7, sreg); + break; + case 0x38: /* FCMP */ + fmov_rr (FP_RESULT, dreg); + fsub_rr (FP_RESULT, sreg); + return; + case 0x3a: /* FTST */ + fmov_rr (FP_RESULT, sreg); + return; + case 0x40: /* FSMOVE */ + if (prec == 1 || !currprefs.fpu_strict) { + if (sreg != dreg) /* no */ + fmov_rr (dreg, sreg); + } + else { + fmovs_rr (dreg, sreg); + } + break; + case 0x44: /* FDMOVE */ + if (sreg != dreg) /* no */ + fmov_rr (dreg, sreg); + break; + case 0x41: /* FSSQRT */ + fsqrt_rr (dreg, sreg); + if (!currprefs.fpu_strict) /* faster, but less strict rounding */ + break; + fcuts_r (dreg); + break; + case 0x45: /* FDSQRT */ + fsqrt_rr (dreg, sreg); + break; + case 0x58: /* FSABS */ + fabs_rr (dreg, sreg); + if (prec != 1 && currprefs.fpu_strict) + fcuts_r (dreg); + break; + case 0x5a: /* FSNEG */ + fneg_rr (dreg, sreg); + if (prec != 1 && currprefs.fpu_strict) + fcuts_r (dreg); + break; + case 0x5c: /* FDABS */ + fabs_rr (dreg, sreg); + break; + case 0x5e: /* FDNEG */ + fneg_rr (dreg, sreg); + break; + case 0x62: /* FSADD */ + fadd_rr (dreg, sreg); + if (!currprefs.fpu_strict) /* faster, but less strict rounding */ + break; + fcuts_r (dreg); + break; + case 0x64: /* FDDIV */ + fdiv_rr (dreg, sreg); + break; + case 0x66: /* FDADD */ + fadd_rr (dreg, sreg); + break; + case 0x67: /* FDMUL */ + fmul_rr (dreg, sreg); + break; + case 0x68: /* FSSUB */ + fsub_rr (dreg, sreg); + if (!currprefs.fpu_strict) /* faster, but less strict rounding */ + break; + fcuts_r (dreg); + break; + case 0x6c: /* FDSUB */ + fsub_rr (dreg, sreg); + break; + default: + FAIL (1); + return; + } + fmov_rr (FP_RESULT, dreg); + return; + default: + write_log (_T ("Unsupported JIT-FPU instruction: 0x%04x %04x\n"), opcode, extra); + FAIL (1); + return; + } } #endif diff --git a/src/jit/compemu_midfunc_arm.cpp b/src/jit/compemu_midfunc_arm.cpp index 08ed40ce..8342fbb8 100644 --- a/src/jit/compemu_midfunc_arm.cpp +++ b/src/jit/compemu_midfunc_arm.cpp @@ -224,9 +224,6 @@ MIDFUNC(2,mov_l_rr,(W4 d, RR4 s)) live.nat[s].holds[live.nat[s].nholds] = d; live.nat[s].nholds++; -#if defined(DEBUG) && DEBUG > 1 - jit_log("Added %d to nreg %d(%d), now holds %d regs", d, s, live.state[d].realind, live.nat[s].nholds); -#endif unlock2(s); } MENDFUNC(2,mov_l_rr,(W4 d, RR4 s)) @@ -244,6 +241,14 @@ MIDFUNC(2,mov_l_mr,(IMM d, RR4 s)) } MENDFUNC(2,mov_l_mr,(IMM d, RR4 s)) +MIDFUNC(2,mov_l_rm,(W4 d, IMM s)) +{ + d = writereg(d, 4); + raw_mov_l_rm(d, s); + unlock2(d); +} +MENDFUNC(2,mov_l_rm,(W4 d, IMM s)) + MIDFUNC(2,mov_l_ri,(W4 d, IMM s)) { set_const(d, s); @@ -480,3 +485,435 @@ STATIC_INLINE void write_jmp_target(uae_u32* jmpaddr, cpuop_func* a) { STATIC_INLINE void emit_jmp_target(uae_u32 a) { emit_long((uae_u32)a); } + + +/************************************************************************* +* FPU stuff * +*************************************************************************/ + +MIDFUNC(1,f_forget_about,(FW r)) +{ + if (f_isinreg(r)) + f_disassociate(r); + live.fate[r].status=UNDEF; +} +MENDFUNC(1,f_forget_about,(FW r)) + +MIDFUNC(0,dont_care_fflags,(void)) +{ + f_disassociate(FP_RESULT); +} +MENDFUNC(0,dont_care_fflags,(void)) + +MIDFUNC(2,fmov_rr,(FW d, FR s)) +{ + if (d == s) { /* How pointless! */ + return; + } + s = f_readreg(s); + d = f_writereg(d); + raw_fmov_rr(d, s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fmov_rr,(FW d, FR s)) + +MIDFUNC(2,fmov_l_rr,(FW d, RR4 s)) +{ + s = readreg(s, 4); + d = f_writereg(d); + raw_fmov_l_rr(d, s); + f_unlock(d); + unlock2(s); +} +MENDFUNC(2,fmov_l_rr,(FW d, RR4 s)) + +MIDFUNC(2,fmov_s_rr,(FW d, RR4 s)) +{ + s = readreg(s, 4); + d = f_writereg(d); + raw_fmov_s_rr(d, s); + f_unlock(d); + unlock2(s); +} +MENDFUNC(2,fmov_s_rr,(FW d, RR4 s)) + +MIDFUNC(2,fmov_w_rr,(FW d, RR2 s)) +{ + s = readreg(s, 2); + d = f_writereg(d); + raw_fmov_w_rr(d, s); + f_unlock(d); + unlock2(s); +} +MENDFUNC(2,fmov_w_rr,(FW d, RR2 s)) + +MIDFUNC(2,fmov_b_rr,(FW d, RR1 s)) +{ + s = readreg(s, 1); + d = f_writereg(d); + raw_fmov_b_rr(d, s); + f_unlock(d); + unlock2(s); +} +MENDFUNC(2,fmov_b_rr,(FW d, RR1 s)) + +MIDFUNC(3,fmov_d_rrr,(FW d, RR4 s1, RR4 s2)) +{ + s1 = readreg(s1, 4); + s2 = readreg(s2, 4); + d = f_writereg(d); + raw_fmov_d_rrr(d, s1, s2); + f_unlock(d); + unlock2(s2); + unlock2(s1); +} +MENDFUNC(3,fmov_d_rrr,(FW d, RR4 s1, RR4 s2)) + +MIDFUNC(2,fmov_l_ri,(FW d, IMM i)) +{ + switch(i) { + case 0: + fmov_d_ri_0(d); + break; + case 1: + fmov_d_ri_1(d); + break; + case 10: + fmov_d_ri_10(d); + break; + case 100: + fmov_d_ri_100(d); + break; + default: + d = f_writereg(d); + compemu_raw_mov_l_ri(REG_WORK1, i); + raw_fmov_l_rr(d, REG_WORK1); + f_unlock(d); + } +} +MENDFUNC(2,fmov_l_ri,(FW d, IMM i)) + +MIDFUNC(2,fmov_s_ri,(FW d, IMM i)) +{ + d = f_writereg(d); + compemu_raw_mov_l_ri(REG_WORK1, i); + raw_fmov_s_rr(d, REG_WORK1); + f_unlock(d); +} +MENDFUNC(2,fmov_s_ri,(FW d, IMM i)) + +MIDFUNC(2,fmov_to_l_rr,(W4 d, FR s)) +{ + s = f_readreg(s); + d = writereg(d, 4); + raw_fmov_to_l_rr(d, s); + unlock2(d); + f_unlock(s); +} +MENDFUNC(2,fmov_to_l_rr,(W4 d, FR s)) + +MIDFUNC(2,fmov_to_s_rr,(W4 d, FR s)) +{ + s = f_readreg(s); + d = writereg(d, 4); + raw_fmov_to_s_rr(d, s); + unlock2(d); + f_unlock(s); +} +MENDFUNC(2,fmov_to_s_rr,(W4 d, FR s)) + +MIDFUNC(2,fmov_to_w_rr,(W4 d, FR s)) +{ + s = f_readreg(s); + d = rmw(d, 2, 4); + raw_fmov_to_w_rr(d, s); + unlock2(d); + f_unlock(s); +} +MENDFUNC(2,fmov_to_w_rr,(W4 d, FR s)) + +MIDFUNC(2,fmov_to_b_rr,(W4 d, FR s)) +{ + s = f_readreg(s); + d = rmw(d, 1, 4); + raw_fmov_to_b_rr(d, s); + unlock2(d); + f_unlock(s); +} +MENDFUNC(2,fmov_to_b_rr,(W4 d, FR s)) + +MIDFUNC(1,fmov_d_ri_0,(FW r)) +{ + r = f_writereg(r); + raw_fmov_d_ri_0(r); + f_unlock(r); +} +MENDFUNC(1,fmov_d_ri_0,(FW r)) + +MIDFUNC(1,fmov_d_ri_1,(FW r)) +{ + r = f_writereg(r); + raw_fmov_d_ri_1(r); + f_unlock(r); +} +MENDFUNC(1,fmov_d_ri_1,(FW r)) + +MIDFUNC(1,fmov_d_ri_10,(FW r)) +{ + r = f_writereg(r); + raw_fmov_d_ri_10(r); + f_unlock(r); +} +MENDFUNC(1,fmov_d_ri_10,(FW r)) + +MIDFUNC(1,fmov_d_ri_100,(FW r)) +{ + r = f_writereg(r); + raw_fmov_d_ri_100(r); + f_unlock(r); +} +MENDFUNC(1,fmov_d_ri_100,(FW r)) + +MIDFUNC(2,fmov_d_rm,(FW r, MEMR m)) +{ + r = f_writereg(r); + raw_fmov_d_rm(r, m); + f_unlock(r); +} +MENDFUNC(2,fmov_d_rm,(FW r, MEMR m)) + +MIDFUNC(2,fmovs_rm,(FW r, MEMR m)) +{ + r = f_writereg(r); + raw_fmovs_rm(r, m); + f_unlock(r); +} +MENDFUNC(2,fmovs_rm,(FW r, MEMR m)) + +MIDFUNC(2,fmov_rm,(FW r, MEMR m)) +{ + r = f_writereg(r); + raw_fmov_d_rm(r, m); + f_unlock(r); +} +MENDFUNC(2,fmov_rm,(FW r, MEMR m)) + +MIDFUNC(3,fmov_to_d_rrr,(W4 d1, W4 d2, FR s)) +{ + s = f_readreg(s); + d1 = writereg(d1, 4); + d2 = writereg(d2, 4); + raw_fmov_to_d_rrr(d1, d2, s); + unlock2(d2); + unlock2(d1); + f_unlock(s); +} +MENDFUNC(3,fmov_to_d_rrr,(W4 d1, W4 d2, FR s)) + +MIDFUNC(2,fsqrt_rr,(FW d, FR s)) +{ + s = f_readreg(s); + d = f_writereg(d); + raw_fsqrt_rr(d, s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fsqrt_rr,(FW d, FR s)) + +MIDFUNC(2,fabs_rr,(FW d, FR s)) +{ + s = f_readreg(s); + d = f_writereg(d); + raw_fabs_rr(d, s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fabs_rr,(FW d, FR s)) + +MIDFUNC(2,fneg_rr,(FW d, FR s)) +{ + s = f_readreg(s); + d = f_writereg(d); + raw_fneg_rr(d, s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fneg_rr,(FW d, FR s)) + +MIDFUNC(2,fdiv_rr,(FRW d, FR s)) +{ + s = f_readreg(s); + d = f_rmw(d); + raw_fdiv_rr(d, s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fdiv_rr,(FRW d, FR s)) + +MIDFUNC(2,fadd_rr,(FRW d, FR s)) +{ + s = f_readreg(s); + d = f_rmw(d); + raw_fadd_rr(d, s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fadd_rr,(FRW d, FR s)) + +MIDFUNC(2,fmul_rr,(FRW d, FR s)) +{ + s = f_readreg(s); + d = f_rmw(d); + raw_fmul_rr(d, s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fmul_rr,(FRW d, FR s)) + +MIDFUNC(2,fsub_rr,(FRW d, FR s)) +{ + s = f_readreg(s); + d = f_rmw(d); + raw_fsub_rr(d, s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fsub_rr,(FRW d, FR s)) + +MIDFUNC(2,frndint_rr,(FW d, FR s)) +{ + s = f_readreg(s); + d = f_writereg(d); + raw_frndint_rr(d, s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,frndint_rr,(FW d, FR s)) + +MIDFUNC(2,frndintz_rr,(FW d, FR s)) +{ + s = f_readreg(s); + d = f_writereg(d); + raw_frndintz_rr(d, s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,frndintz_rr,(FW d, FR s)) + +MIDFUNC(2,fmod_rr,(FRW d, FR s)) +{ + s = f_readreg(s); + d = f_rmw(d); + raw_fmod_rr(d, s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fmod_rr,(FRW d, FR s)) + +MIDFUNC(2,fsgldiv_rr,(FRW d, FR s)) +{ + s = f_readreg(s); + d = f_rmw(d); + raw_fsgldiv_rr(d, s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fsgldiv_rr,(FRW d, FR s)) + +MIDFUNC(1,fcuts_r,(FRW r)) +{ + r = f_rmw(r); + raw_fcuts_r(r); + f_unlock(r); +} +MENDFUNC(1,fcuts_r,(FRW r)) + +MIDFUNC(2,frem1_rr,(FRW d, FR s)) +{ + s = f_readreg(s); + d = f_rmw(d); + raw_frem1_rr(d, s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,frem1_rr,(FRW d, FR s)) + +MIDFUNC(2,fsglmul_rr,(FRW d, FR s)) +{ + s = f_readreg(s); + d = f_rmw(d); + raw_fsglmul_rr(d, s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fsglmul_rr,(FRW d, FR s)) + +MIDFUNC(2,fmovs_rr,(FW d, FR s)) +{ + s = f_readreg(s); + d = f_writereg(d); + raw_fmovs_rr(d, s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fmovs_rr,(FW d, FR s)) + +MIDFUNC(3,ffunc_rr,(double (*func)(double), FW d, FR s)) +{ + clobber_flags(); + prepare_for_call_1(); + prepare_for_call_2(); + + s = f_readreg(s); + d = f_writereg(d); + + raw_ffunc_rr(func, d, s); + + f_unlock(s); + f_unlock(d); +} +MENDFUNC(3,ffunc_rr,(double (*func)(double), FW d, FR s)) + +MIDFUNC(3,fsincos_rr,(FW d, FW c, FR s)) +{ + clobber_flags(); + prepare_for_call_1(); + prepare_for_call_2(); + + s = f_readreg(s); /* s for source */ + d = f_writereg(d); /* d for sine */ + c = f_writereg(c); /* c for cosine */ + + raw_ffunc_rr(cos, c, s); + raw_ffunc_rr(sin, d, s); + + f_unlock(s); + f_unlock(d); + f_unlock(c); +} +MENDFUNC(3,fsincos_rr,(FW d, FW c, FR s)) + +MIDFUNC(3,fpowx_rr,(uae_u32 x, FW d, FR s)) +{ + clobber_flags(); + prepare_for_call_1(); + prepare_for_call_2(); + + s = f_readreg(s); + d = f_writereg(d); + + raw_fpowx_rr(x, d, s); + + f_unlock(s); + f_unlock(d); + +} +MENDFUNC(3,fpowx_rr,(uae_u32 x, FW d, FR s)) + +MIDFUNC(1,fflags_into_flags,()) +{ + clobber_flags(); + fflags_into_flags_internal(); +} +MENDFUNC(1,fflags_into_flags,()) diff --git a/src/jit/compemu_midfunc_arm.h b/src/jit/compemu_midfunc_arm.h index 78eacc57..03fa9652 100644 --- a/src/jit/compemu_midfunc_arm.h +++ b/src/jit/compemu_midfunc_arm.h @@ -50,6 +50,7 @@ DECLARE_MIDFUNC(lea_l_brr_indexed(W4 d, RR4 s, RR4 index, IMM factor, IMM offset DECLARE_MIDFUNC(lea_l_rr_indexed(W4 d, RR4 s, RR4 index, IMM factor)); DECLARE_MIDFUNC(mov_l_rr(W4 d, RR4 s)); DECLARE_MIDFUNC(mov_l_mr(IMM d, RR4 s)); +DECLARE_MIDFUNC(mov_l_rm(W4 d, IMM s)); DECLARE_MIDFUNC(mov_b_rm(W1 d, IMM s)); DECLARE_MIDFUNC(mov_l_ri(W4 d, IMM s)); DECLARE_MIDFUNC(mov_w_ri(W2 d, IMM s)); @@ -66,3 +67,44 @@ DECLARE_MIDFUNC(make_flags_live(void)); DECLARE_MIDFUNC(forget_about(W4 r)); DECLARE_MIDFUNC(f_forget_about(FW r)); +DECLARE_MIDFUNC(dont_care_fflags(void)); +DECLARE_MIDFUNC(fmov_rr(FW d, FR s)); + +DECLARE_MIDFUNC(fmov_l_rr(FW d, RR4 s)); +DECLARE_MIDFUNC(fmov_s_rr(FW d, RR4 s)); +DECLARE_MIDFUNC(fmov_w_rr(FW d, RR2 s)); +DECLARE_MIDFUNC(fmov_b_rr(FW d, RR1 s)); +DECLARE_MIDFUNC(fmov_d_rrr(FW d, RR4 s1, RR4 s2)); +DECLARE_MIDFUNC(fmov_l_ri(FW d, IMM i)); +DECLARE_MIDFUNC(fmov_s_ri(FW d, IMM i)); +DECLARE_MIDFUNC(fmov_to_l_rr(W4 d, FR s)); +DECLARE_MIDFUNC(fmov_to_s_rr(W4 d, FR s)); +DECLARE_MIDFUNC(fmov_to_w_rr(W4 d, FR s)); +DECLARE_MIDFUNC(fmov_to_b_rr(W4 d, FR s)); +DECLARE_MIDFUNC(fmov_d_ri_0(FW d)); +DECLARE_MIDFUNC(fmov_d_ri_1(FW d)); +DECLARE_MIDFUNC(fmov_d_ri_10(FW d)); +DECLARE_MIDFUNC(fmov_d_ri_100(FW d)); +DECLARE_MIDFUNC(fmov_d_rm(FW r, MEMR m)); +DECLARE_MIDFUNC(fmovs_rm(FW r, MEMR m)); +DECLARE_MIDFUNC(fmov_rm(FW r, MEMR m)); +DECLARE_MIDFUNC(fmov_to_d_rrr(W4 d1, W4 d2, FR s)); +DECLARE_MIDFUNC(fsqrt_rr(FW d, FR s)); +DECLARE_MIDFUNC(fabs_rr(FW d, FR s)); +DECLARE_MIDFUNC(fneg_rr(FW d, FR s)); +DECLARE_MIDFUNC(fdiv_rr(FRW d, FR s)); +DECLARE_MIDFUNC(fadd_rr(FRW d, FR s)); +DECLARE_MIDFUNC(fmul_rr(FRW d, FR s)); +DECLARE_MIDFUNC(fsub_rr(FRW d, FR s)); +DECLARE_MIDFUNC(frndint_rr(FW d, FR s)); +DECLARE_MIDFUNC(frndintz_rr(FW d, FR s)); +DECLARE_MIDFUNC(fmod_rr(FRW d, FR s)); +DECLARE_MIDFUNC(fsgldiv_rr(FRW d, FR s)); +DECLARE_MIDFUNC(fcuts_r(FRW r)); +DECLARE_MIDFUNC(frem1_rr(FRW d, FR s)); +DECLARE_MIDFUNC(fsglmul_rr(FRW d, FR s)); +DECLARE_MIDFUNC(fmovs_rr(FW d, FR s)); +DECLARE_MIDFUNC(ffunc_rr(double (*func)(double), FW d, FR s)); +DECLARE_MIDFUNC(fsincos_rr(FW d, FW c, FR s)); +DECLARE_MIDFUNC(fpowx_rr(uae_u32 x, FW d, FR s)); +DECLARE_MIDFUNC(fflags_into_flags()); diff --git a/src/jit/compemu_support.cpp b/src/jit/compemu_support.cpp index 699033ce..2f2d6b33 100644 --- a/src/jit/compemu_support.cpp +++ b/src/jit/compemu_support.cpp @@ -32,6 +32,8 @@ #define writemem_special writemem #define readmem_special readmem +#include + #include "sysconfig.h" #include "sysdeps.h" @@ -108,7 +110,11 @@ const int follow_const_jumps = 0; static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already +#ifdef USE_JIT_FPU +#define avoid_fpu (!currprefs.compfpu) +#else #define avoid_fpu (true) +#endif static const int align_loops = 0; // Align the start of loops static const int align_jumps = 0; // Align the start of jumps static int optcount[10] = { @@ -646,13 +652,15 @@ bool check_prefs_changed_comp(bool checkonly) { bool changed = 0; - if (currprefs.fpu_strict != changed_prefs.fpu_strict || + if (currprefs.compfpu != changed_prefs.compfpu || + currprefs.fpu_strict != changed_prefs.fpu_strict || currprefs.cachesize != changed_prefs.cachesize) changed = 1; if (checkonly) return changed; + currprefs.compfpu = changed_prefs.compfpu; currprefs.fpu_strict = changed_prefs.fpu_strict; if (currprefs.cachesize != changed_prefs.cachesize) { @@ -955,6 +963,7 @@ static void evict(int r) if (live.nat[rr].nholds != live.state[r].realind) { /* Was not last */ int topreg = live.nat[rr].holds[live.nat[rr].nholds]; int thisind = live.state[r].realind; + live.nat[rr].holds[thisind] = topreg; live.state[topreg].realind = thisind; } @@ -1343,6 +1352,142 @@ static int rmw(int r, int wsize, int rsize) return rmw_general(r, wsize, rsize); } +/******************************************************************** + * FPU register status handling. EMIT TIME! * + ********************************************************************/ + +STATIC_INLINE void f_tomem_drop(int r) +{ + if (live.fate[r].status == DIRTY) { + compemu_raw_fmov_mr_drop((uintptr)live.fate[r].mem, live.fate[r].realreg); + live.fate[r].status = INMEM; + } +} + + +STATIC_INLINE int f_isinreg(int r) +{ + return live.fate[r].status == CLEAN || live.fate[r].status == DIRTY; +} + +STATIC_INLINE void f_evict(int r) +{ + int rr; + + if (!f_isinreg(r)) + return; + rr = live.fate[r].realreg; + f_tomem_drop(r); + + live.fat[rr].nholds = 0; + live.fate[r].status = INMEM; + live.fate[r].realreg = -1; +} + +STATIC_INLINE void f_free_nreg(int r) +{ + int vr; + vr = live.fat[r].holds; + f_evict(vr); +} + + +/* Use with care! */ +STATIC_INLINE void f_isclean(int r) +{ + if (!f_isinreg(r)) + return; + live.fate[r].status = CLEAN; +} + +STATIC_INLINE void f_disassociate(int r) +{ + f_isclean(r); + f_evict(r); +} + + + +static int f_alloc_reg(int r, int willclobber) +{ + int bestreg; + + if(r < 8) + bestreg = r + 8; // map real Amiga reg to ARM VFP reg 8-15 + else + bestreg = r - 8; // map FP_RESULT, FS1, FS2 or FS3 to ARM VFP reg 0-3 + + if (!willclobber) { + if (live.fate[r].status == INMEM) { + compemu_raw_fmov_rm(bestreg, (uintptr)live.fate[r].mem); + live.fate[r].status=CLEAN; + } + } + else { + live.fate[r].status = DIRTY; + } + live.fate[r].realreg=bestreg; + live.fat[bestreg].holds = r; + live.fat[bestreg].nholds = 1; + + return bestreg; +} + +STATIC_INLINE void f_unlock(int r) +{ +} + +STATIC_INLINE int f_readreg(int r) +{ + int answer=-1; + + if (f_isinreg(r)) { + answer = live.fate[r].realreg; + } + /* either the value was in memory to start with, or it was evicted and + is in memory now */ + if (answer < 0) + answer = f_alloc_reg(r,0); + + return answer; +} + +STATIC_INLINE int f_writereg(int r) +{ + int answer = -1; + + if (f_isinreg(r)) { + answer = live.fate[r].realreg; + } + if (answer < 0) { + answer = f_alloc_reg(r,1); + } + live.fate[r].status = DIRTY; + return answer; +} + +STATIC_INLINE int f_rmw(int r) +{ + int n; + + if (f_isinreg(r)) { + n = live.fate[r].realreg; + } + else + n = f_alloc_reg(r,0); + live.fate[r].status = DIRTY; + return n; +} + +static void fflags_into_flags_internal(void) +{ + int r; + + r = f_readreg(FP_RESULT); + raw_fflags_into_flags(r); + f_unlock(r); + live_flags(); +} #if defined(CPU_arm) @@ -1379,6 +1524,7 @@ void sync_m68k_pc(void) struct scratch_t { uae_u32 regs[VREGS]; + fpu_register fregs[VFREGS]; }; static scratch_t scratch; @@ -1479,6 +1625,12 @@ void init_comp(void) set_status(i, UNDEF); } + for (i=0;i 0) + f_free_nreg(i); + live.flags_in_flags = TRASH; /* Note: We assume we already rescued the flags at the very start of the call_r functions! */ @@ -2038,7 +2231,6 @@ STATIC_INLINE int block_check_checksum(blockinfo* bi) means we have to move it into the needs-to-be-flushed list */ bi->handler_to_use = bi->handler; set_dhtu(bi, bi->direct_handler); - bi->status = BI_CHECKING; isgood = called_check_checksum(bi) != 0; } @@ -2694,7 +2886,7 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles) if (next_pc_p) { /* A branch was registered */ uintptr t1 = next_pc_p; uintptr t2 = taken_pc_p; - int cc = branch_cc; + int cc = branch_cc; // this is native (ARM) condition code uae_u32* branchadd; uae_u32* tba; @@ -2707,7 +2899,10 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles) the 68k branch is taken. */ t1 = taken_pc_p; t2 = next_pc_p; - cc = branch_cc^1; + if(cc < NATIVE_CC_AL) + cc = branch_cc^1; + else if(cc > NATIVE_CC_AL) + cc = 0x10 | (branch_cc ^ 0xf); } tmp = live; /* ouch! This is big... */ diff --git a/src/jit/compstbl.cpp b/src/jit/compstbl.cpp index c651d410..c04e1724 100644 --- a/src/jit/compstbl.cpp +++ b/src/jit/compstbl.cpp @@ -1803,32 +1803,32 @@ extern const struct comptbl op_smalltbl_0_comp_ff[] = { { NULL, 0x00000001, 61488 }, /* MMUOP030 */ { NULL, 0x00000001, 61496 }, /* MMUOP030 */ { NULL, 0x00000001, 61497 }, /* MMUOP030 */ -{ NULL, 0x00000022, 61952 }, /* FPP */ -{ NULL, 0x00000022, 61960 }, /* FPP */ -{ NULL, 0x00000022, 61968 }, /* FPP */ -{ NULL, 0x00000022, 61976 }, /* FPP */ -{ NULL, 0x00000022, 61984 }, /* FPP */ -{ NULL, 0x00000022, 61992 }, /* FPP */ -{ NULL, 0x00000022, 62000 }, /* FPP */ -{ NULL, 0x00000022, 62008 }, /* FPP */ -{ NULL, 0x00000022, 62009 }, /* FPP */ -{ NULL, 0x00000022, 62010 }, /* FPP */ -{ NULL, 0x00000022, 62011 }, /* FPP */ -{ NULL, 0x00000022, 62012 }, /* FPP */ -{ NULL, 0x00000006, 62016 }, /* FScc */ +{ op_f200_0_comp_ff, 0x00000022, 61952 }, /* FPP */ +{ op_f208_0_comp_ff, 0x00000022, 61960 }, /* FPP */ +{ op_f210_0_comp_ff, 0x00000022, 61968 }, /* FPP */ +{ op_f218_0_comp_ff, 0x00000022, 61976 }, /* FPP */ +{ op_f220_0_comp_ff, 0x00000022, 61984 }, /* FPP */ +{ op_f228_0_comp_ff, 0x00000022, 61992 }, /* FPP */ +{ op_f230_0_comp_ff, 0x00000022, 62000 }, /* FPP */ +{ op_f238_0_comp_ff, 0x00000022, 62008 }, /* FPP */ +{ op_f239_0_comp_ff, 0x00000022, 62009 }, /* FPP */ +{ op_f23a_0_comp_ff, 0x00000022, 62010 }, /* FPP */ +{ op_f23b_0_comp_ff, 0x00000022, 62011 }, /* FPP */ +{ op_f23c_0_comp_ff, 0x00000022, 62012 }, /* FPP */ +{ op_f240_0_comp_ff, 0x00000006, 62016 }, /* FScc */ { NULL, 0x00000021, 62024 }, /* FDBcc */ -{ NULL, 0x00000006, 62032 }, /* FScc */ -{ NULL, 0x00000006, 62040 }, /* FScc */ -{ NULL, 0x00000006, 62048 }, /* FScc */ -{ NULL, 0x00000006, 62056 }, /* FScc */ -{ NULL, 0x00000006, 62064 }, /* FScc */ -{ NULL, 0x00000006, 62072 }, /* FScc */ -{ NULL, 0x00000006, 62073 }, /* FScc */ +{ op_f250_0_comp_ff, 0x00000006, 62032 }, /* FScc */ +{ op_f258_0_comp_ff, 0x00000006, 62040 }, /* FScc */ +{ op_f260_0_comp_ff, 0x00000006, 62048 }, /* FScc */ +{ op_f268_0_comp_ff, 0x00000006, 62056 }, /* FScc */ +{ op_f270_0_comp_ff, 0x00000006, 62064 }, /* FScc */ +{ op_f278_0_comp_ff, 0x00000006, 62072 }, /* FScc */ +{ op_f279_0_comp_ff, 0x00000006, 62073 }, /* FScc */ { NULL, 0x00000021, 62074 }, /* FTRAPcc */ { NULL, 0x00000021, 62075 }, /* FTRAPcc */ { NULL, 0x00000021, 62076 }, /* FTRAPcc */ -{ NULL, 0x00000005, 62080 }, /* FBcc */ -{ NULL, 0x00000005, 62144 }, /* FBcc */ +{ op_f280_0_comp_ff, 0x00000005, 62080 }, /* FBcc */ +{ op_f2c0_0_comp_ff, 0x00000005, 62144 }, /* FBcc */ { NULL, 0x00000020, 62224 }, /* FSAVE */ { NULL, 0x00000020, 62240 }, /* FSAVE */ { NULL, 0x00000020, 62248 }, /* FSAVE */ @@ -3675,32 +3675,32 @@ extern const struct comptbl op_smalltbl_0_comp_nf[] = { { NULL, 0x00000001, 61488 }, /* MMUOP030 */ { NULL, 0x00000001, 61496 }, /* MMUOP030 */ { NULL, 0x00000001, 61497 }, /* MMUOP030 */ -{ NULL, 0x00000022, 61952 }, /* FPP */ -{ NULL, 0x00000022, 61960 }, /* FPP */ -{ NULL, 0x00000022, 61968 }, /* FPP */ -{ NULL, 0x00000022, 61976 }, /* FPP */ -{ NULL, 0x00000022, 61984 }, /* FPP */ -{ NULL, 0x00000022, 61992 }, /* FPP */ -{ NULL, 0x00000022, 62000 }, /* FPP */ -{ NULL, 0x00000022, 62008 }, /* FPP */ -{ NULL, 0x00000022, 62009 }, /* FPP */ -{ NULL, 0x00000022, 62010 }, /* FPP */ -{ NULL, 0x00000022, 62011 }, /* FPP */ -{ NULL, 0x00000022, 62012 }, /* FPP */ -{ NULL, 0x00000006, 62016 }, /* FScc */ +{ op_f200_0_comp_nf, 0x00000022, 61952 }, /* FPP */ +{ op_f208_0_comp_nf, 0x00000022, 61960 }, /* FPP */ +{ op_f210_0_comp_nf, 0x00000022, 61968 }, /* FPP */ +{ op_f218_0_comp_nf, 0x00000022, 61976 }, /* FPP */ +{ op_f220_0_comp_nf, 0x00000022, 61984 }, /* FPP */ +{ op_f228_0_comp_nf, 0x00000022, 61992 }, /* FPP */ +{ op_f230_0_comp_nf, 0x00000022, 62000 }, /* FPP */ +{ op_f238_0_comp_nf, 0x00000022, 62008 }, /* FPP */ +{ op_f239_0_comp_nf, 0x00000022, 62009 }, /* FPP */ +{ op_f23a_0_comp_nf, 0x00000022, 62010 }, /* FPP */ +{ op_f23b_0_comp_nf, 0x00000022, 62011 }, /* FPP */ +{ op_f23c_0_comp_nf, 0x00000022, 62012 }, /* FPP */ +{ op_f240_0_comp_nf, 0x00000006, 62016 }, /* FScc */ { NULL, 0x00000021, 62024 }, /* FDBcc */ -{ NULL, 0x00000006, 62032 }, /* FScc */ -{ NULL, 0x00000006, 62040 }, /* FScc */ -{ NULL, 0x00000006, 62048 }, /* FScc */ -{ NULL, 0x00000006, 62056 }, /* FScc */ -{ NULL, 0x00000006, 62064 }, /* FScc */ -{ NULL, 0x00000006, 62072 }, /* FScc */ -{ NULL, 0x00000006, 62073 }, /* FScc */ +{ op_f250_0_comp_nf, 0x00000006, 62032 }, /* FScc */ +{ op_f258_0_comp_nf, 0x00000006, 62040 }, /* FScc */ +{ op_f260_0_comp_nf, 0x00000006, 62048 }, /* FScc */ +{ op_f268_0_comp_nf, 0x00000006, 62056 }, /* FScc */ +{ op_f270_0_comp_nf, 0x00000006, 62064 }, /* FScc */ +{ op_f278_0_comp_nf, 0x00000006, 62072 }, /* FScc */ +{ op_f279_0_comp_nf, 0x00000006, 62073 }, /* FScc */ { NULL, 0x00000021, 62074 }, /* FTRAPcc */ { NULL, 0x00000021, 62075 }, /* FTRAPcc */ { NULL, 0x00000021, 62076 }, /* FTRAPcc */ -{ NULL, 0x00000005, 62080 }, /* FBcc */ -{ NULL, 0x00000005, 62144 }, /* FBcc */ +{ op_f280_0_comp_nf, 0x00000005, 62080 }, /* FBcc */ +{ op_f2c0_0_comp_nf, 0x00000005, 62144 }, /* FBcc */ { NULL, 0x00000020, 62224 }, /* FSAVE */ { NULL, 0x00000020, 62240 }, /* FSAVE */ { NULL, 0x00000020, 62248 }, /* FSAVE */ diff --git a/src/jit/comptbl.h b/src/jit/comptbl.h index 63caadfa..a7b1550b 100644 --- a/src/jit/comptbl.h +++ b/src/jit/comptbl.h @@ -1446,6 +1446,28 @@ extern compop_func op_e7e8_0_comp_ff; extern compop_func op_e7f0_0_comp_ff; extern compop_func op_e7f8_0_comp_ff; extern compop_func op_e7f9_0_comp_ff; +extern compop_func op_f200_0_comp_ff; +extern compop_func op_f208_0_comp_ff; +extern compop_func op_f210_0_comp_ff; +extern compop_func op_f218_0_comp_ff; +extern compop_func op_f220_0_comp_ff; +extern compop_func op_f228_0_comp_ff; +extern compop_func op_f230_0_comp_ff; +extern compop_func op_f238_0_comp_ff; +extern compop_func op_f239_0_comp_ff; +extern compop_func op_f23a_0_comp_ff; +extern compop_func op_f23b_0_comp_ff; +extern compop_func op_f23c_0_comp_ff; +extern compop_func op_f240_0_comp_ff; +extern compop_func op_f250_0_comp_ff; +extern compop_func op_f258_0_comp_ff; +extern compop_func op_f260_0_comp_ff; +extern compop_func op_f268_0_comp_ff; +extern compop_func op_f270_0_comp_ff; +extern compop_func op_f278_0_comp_ff; +extern compop_func op_f279_0_comp_ff; +extern compop_func op_f280_0_comp_ff; +extern compop_func op_f2c0_0_comp_ff; extern compop_func op_f600_0_comp_ff; extern compop_func op_f608_0_comp_ff; extern compop_func op_f610_0_comp_ff; @@ -2893,6 +2915,28 @@ extern compop_func op_e7e8_0_comp_nf; extern compop_func op_e7f0_0_comp_nf; extern compop_func op_e7f8_0_comp_nf; extern compop_func op_e7f9_0_comp_nf; +extern compop_func op_f200_0_comp_nf; +extern compop_func op_f208_0_comp_nf; +extern compop_func op_f210_0_comp_nf; +extern compop_func op_f218_0_comp_nf; +extern compop_func op_f220_0_comp_nf; +extern compop_func op_f228_0_comp_nf; +extern compop_func op_f230_0_comp_nf; +extern compop_func op_f238_0_comp_nf; +extern compop_func op_f239_0_comp_nf; +extern compop_func op_f23a_0_comp_nf; +extern compop_func op_f23b_0_comp_nf; +extern compop_func op_f23c_0_comp_nf; +extern compop_func op_f240_0_comp_nf; +extern compop_func op_f250_0_comp_nf; +extern compop_func op_f258_0_comp_nf; +extern compop_func op_f260_0_comp_nf; +extern compop_func op_f268_0_comp_nf; +extern compop_func op_f270_0_comp_nf; +extern compop_func op_f278_0_comp_nf; +extern compop_func op_f279_0_comp_nf; +extern compop_func op_f280_0_comp_nf; +extern compop_func op_f2c0_0_comp_nf; extern compop_func op_f600_0_comp_nf; extern compop_func op_f608_0_comp_nf; extern compop_func op_f610_0_comp_nf; diff --git a/src/jit/flags_arm.h b/src/jit/flags_arm.h deleted file mode 100644 index c9a60490..00000000 --- a/src/jit/flags_arm.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * compiler/flags_arm.h - Native flags definitions for ARM - * - * Copyright (c) 2013 Jens Heitmann of ARAnyM dev team (see AUTHORS) - * - * Inspired by Christian Bauer's Basilisk II - * - * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer - * - * Adaptation for Basilisk II and improvements, copyright 2000-2002 - * Gwenole Beauchesne - * - * Basilisk II (C) 1997-2002 Christian Bauer - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef NATIVE_FLAGS_ARM_H -#define NATIVE_FLAGS_ARM_H - -/* Native integer code conditions */ -enum { - NATIVE_CC_EQ = 0, - NATIVE_CC_NE = 1, - NATIVE_CC_CS = 2, - NATIVE_CC_CC = 3, - NATIVE_CC_MI = 4, - NATIVE_CC_PL = 5, - NATIVE_CC_VS = 6, - NATIVE_CC_VC = 7, - NATIVE_CC_HI = 8, - NATIVE_CC_LS = 9, - NATIVE_CC_GE = 10, - NATIVE_CC_LT = 11, - NATIVE_CC_GT = 12, - NATIVE_CC_LE = 13, - NATIVE_CC_AL = 14 -}; - -#endif /* NATIVE_FLAGS_ARM_H */ diff --git a/src/jit/gencomp_arm.cpp b/src/jit/gencomp_arm.cpp index eab27497..5eb0791b 100644 --- a/src/jit/gencomp_arm.cpp +++ b/src/jit/gencomp_arm.cpp @@ -7,9 +7,6 @@ * Adaptation for ARAnyM/ARM, copyright 2001-2015 * Milan Jurik, Jens Heitmann * - * Adaptation for Basilisk II and improvements, copyright 2000-2005 - * Gwenole Beauchesne - * * Basilisk II (C) 1997-2005 Christian Bauer * * This program is free software; you can redistribute it and/or modify @@ -121,13 +118,14 @@ #define DISABLE_I_ROXLW #define DISABLE_I_ROXRW //#define DISABLE_I_MULL -#define DISABLE_I_FPP -#define DISABLE_I_FBCC -#define DISABLE_I_FSCC +//#define DISABLE_I_FPP +//#define DISABLE_I_FBCC +//#define DISABLE_I_FSCC //#define DISABLE_I_MOVE16 #define DISABLE_I_DIVU // DIVU works, but we have to think about exceptions. No big performance enhancement. + #define RETURN "return 0;" #define BOOL_TYPE "int" @@ -1222,9 +1220,6 @@ static void gen_dbcc(uae_u32 opcode, struct instr *curi, char* ssize) { comprintf("\tarm_ADD_l_ri(PC_P, m68k_pc_offset);\n"); comprintf("\tm68k_pc_offset=0;\n"); - start_brace(); - comprintf("\tint nsrc = scratchie++;\n"); - if (curi->cc >= 2) { comprintf("\tmake_flags_live();\n"); /* Load the flags */ } @@ -1262,7 +1257,6 @@ static void gen_dbcc(uae_u32 opcode, struct instr *curi, char* ssize) { break; default: abort(); } - genastore("src", curi->smode, "srcreg", curi->size, "src"); gen_update_next_handler(); } @@ -2071,7 +2065,6 @@ gen_opcode(unsigned long int opcode) { case i_SBCD: failure; - /* I don't think so! */ break; case i_ADD: @@ -2097,7 +2090,6 @@ gen_opcode(unsigned long int opcode) { case i_ABCD: failure; - /* No BCD maths for me.... */ break; case i_NEG: @@ -2116,7 +2108,6 @@ gen_opcode(unsigned long int opcode) { case i_NBCD: failure; - /* Nope! */ break; case i_CLR: @@ -2362,7 +2353,8 @@ gen_opcode(unsigned long int opcode) { isjump; genamode(curi->smode, "srcreg", curi->size, "src", 0, 0); start_brace(); - comprintf("\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n"); + comprintf( + "\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n"); comprintf("\tint ret=scratchie++;\n" "\tmov_l_ri(ret,retadd);\n" "\tsub_l_ri(15,4);\n" @@ -2391,10 +2383,12 @@ gen_opcode(unsigned long int opcode) { #ifdef DISABLE_I_BSR failure; #endif - is_const_jump; + is_const_jump + ; genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); start_brace(); - comprintf("\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n"); + comprintf( + "\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n"); comprintf("\tint ret=scratchie++;\n" "\tmov_l_ri(ret,retadd);\n" "\tsub_l_ri(15,4);\n" @@ -2427,9 +2421,10 @@ gen_opcode(unsigned long int opcode) { comprintf("\tv2 = get_const(src);\n"); comprintf("\tregister_branch(v1, v2, %d);\n", cond_codes[curi->cc]); comprintf("\tmake_flags_live();\n"); /* Load the flags */ - isjump; + isjump; } else { - is_const_jump; + is_const_jump + ; } switch (curi->cc) { @@ -3124,11 +3119,16 @@ generate_one_opcode(int rp, int noflags) fprintf(stblfile, "{ NULL, 0x%08x, %ld }, /* %s */\n", flags, opcode, name); com_discard(); } else { - const char *tbl = noflags ? "nf" : "ff"; printf ("/* %s */\n", outopcode (opcode)); - fprintf(stblfile, "{ op_%lx_%d_comp_%s, 0x%08x, %ld }, /* %s */\n", opcode, postfix, tbl, flags, opcode, name); - fprintf(headerfile, "extern compop_func op_%lx_%d_comp_%s;\n", opcode, postfix, tbl); - printf("uae_u32 REGPARAM2 op_%lx_%d_comp_%s(uae_u32 opcode)\n{\n", opcode, postfix, tbl); + if (noflags) { + fprintf(stblfile, "{ op_%lx_%d_comp_nf, 0x%08x, %ld }, /* %s */\n", opcode, postfix, flags, opcode, name); + fprintf(headerfile, "extern compop_func op_%lx_%d_comp_nf;\n", opcode, postfix); + printf("uae_u32 REGPARAM2 op_%lx_%d_comp_nf(uae_u32 opcode)\n{\n", opcode, postfix); + } else { + fprintf(stblfile, "{ op_%lx_%d_comp_ff, 0x%08x, %ld }, /* %s */\n", opcode, postfix, flags, opcode, name); + fprintf(headerfile, "extern compop_func op_%lx_%d_comp_ff;\n", opcode, postfix); + printf("uae_u32 REGPARAM2 op_%lx_%d_comp_ff(uae_u32 opcode)\n{\n", opcode, postfix); + } com_flush(); } } diff --git a/src/newcpu.cpp b/src/newcpu.cpp index 3d23bfac..6870e919 100644 --- a/src/newcpu.cpp +++ b/src/newcpu.cpp @@ -348,8 +348,8 @@ static void build_cpufunctbl (void) write_log(_T("CPU=%d, FPU=%d%s, JIT%s=%d."), currprefs.cpu_model, - currprefs.fpu_model, currprefs.fpu_model ? (currprefs.fpu_softfloat ? _T(" (softfloat)") : _T(" (host)")) : _T(""), - currprefs.cachesize ? _T("=CPU") : _T(""), + currprefs.fpu_model, currprefs.fpu_model ? _T(" (host)") : _T(""), + currprefs.cachesize ? (currprefs.compfpu ? _T("=CPU/FPU") : _T("=CPU")) : _T(""), currprefs.cachesize); regs.address_space_mask = 0xffffffff; @@ -428,8 +428,7 @@ static int check_prefs_changed_cpu2(void) || currprefs.cpu_model != changed_prefs.cpu_model || currprefs.fpu_model != changed_prefs.fpu_model || currprefs.fpu_no_unimplemented != changed_prefs.fpu_no_unimplemented - || currprefs.cpu_compatible != changed_prefs.cpu_compatible - || currprefs.fpu_softfloat != changed_prefs.fpu_softfloat) { + || currprefs.cpu_compatible != changed_prefs.cpu_compatible) { cpu_prefs_changed_flag |= 1; } if (changed @@ -1740,6 +1739,10 @@ bool is_hardreset(void) return cpu_hardreset; } +#ifdef USE_JIT_FPU +static uae_u8 fp_buffer[8 * 8]; +#endif + void m68k_go (int may_quit) { int hardboot = 1; @@ -1750,6 +1753,10 @@ void m68k_go (int may_quit) abort (); } +#ifdef USE_JIT_FPU + save_host_fp_regs(fp_buffer); +#endif + reset_frame_rate_hack (); update_68k_cycles (); @@ -1808,7 +1815,6 @@ void m68k_go (int may_quit) if (cpu_prefs_changed_flag & 1) { uaecptr pc = m68k_getpc(); prefs_changed_cpu(); - fpu_modechange(); build_cpufunctbl(); m68k_setpc_normal(pc); fill_prefetch(); @@ -1862,6 +1868,10 @@ void m68k_go (int may_quit) regs.pc_p = NULL; regs.pc_oldp = NULL; +#ifdef USE_JIT_FPU + restore_host_fp_regs(fp_buffer); +#endif + in_m68k_go--; } @@ -1977,8 +1987,7 @@ uae_u8 *restore_cpu_extra (uae_u8 *src) currprefs.m68k_speed = changed_prefs.m68k_speed = -1; if (flags & 16) currprefs.m68k_speed = changed_prefs.m68k_speed = (flags >> 24) * CYCLE_UNIT; - if (flags & 32) - currprefs.m68k_speed = changed_prefs.m68k_speed = -30; + return src; } @@ -1997,7 +2006,6 @@ uae_u8 *save_cpu_extra (int *len, uae_u8 *dstptr) flags |= currprefs.m68k_speed < 0 ? 4 : 0; flags |= currprefs.cachesize > 0 ? 8 : 0; flags |= currprefs.m68k_speed > 0 ? 16 : 0; - flags |= currprefs.m68k_speed < -25 ? 32 : 0; if (currprefs.m68k_speed > 0) flags |= (currprefs.m68k_speed / CYCLE_UNIT) << 24; save_u32 (flags); diff --git a/src/osdep/amiberry.cpp b/src/osdep/amiberry.cpp index 5ddcd296..6ebc6d67 100644 --- a/src/osdep/amiberry.cpp +++ b/src/osdep/amiberry.cpp @@ -1011,32 +1011,38 @@ int handle_msgpump() break; case SDL_KEYDOWN: - // If the Enter GUI key was pressed, handle it - if (enter_gui_key && rEvent.key.keysym.sym == enter_gui_key && rEvent.key.repeat == 0) +#ifdef USE_SDL2 + if (rEvent.key.repeat == 0) { - inputdevice_add_inputcode(AKS_ENTERGUI, 1, nullptr); - break; - } - - // If the Quit emulator key was pressed, handle it - if (quit_key && rEvent.key.keysym.sym == quit_key && rEvent.key.repeat == 0) - { - inputdevice_add_inputcode(AKS_QUIT, 1, nullptr); - break; - } +#endif + // If the Enter GUI key was pressed, handle it + if (enter_gui_key && rEvent.key.keysym.sym == enter_gui_key) + { + inputdevice_add_inputcode(AKS_ENTERGUI, 1, nullptr); + break; + } - if (action_replay_button && rEvent.key.keysym.sym == action_replay_button && rEvent.key.repeat == 0) - { - inputdevice_add_inputcode(AKS_FREEZEBUTTON, 1, nullptr); - break; - } + // If the Quit emulator key was pressed, handle it + if (quit_key && rEvent.key.keysym.sym == quit_key) + { + inputdevice_add_inputcode(AKS_QUIT, 1, nullptr); + break; + } - if (fullscreen_key && rEvent.key.keysym.sym == fullscreen_key && rEvent.key.repeat == 0) - { - inputdevice_add_inputcode(AKS_TOGGLEWINDOWEDFULLSCREEN, 1, nullptr); - break; - } + if (action_replay_button && rEvent.key.keysym.sym == action_replay_button) + { + inputdevice_add_inputcode(AKS_FREEZEBUTTON, 1, nullptr); + break; + } + if (fullscreen_key && rEvent.key.keysym.sym == fullscreen_key) + { + inputdevice_add_inputcode(AKS_TOGGLEWINDOWEDFULLSCREEN, 1, nullptr); + break; + } +#ifdef USE_SDL2 + } +#endif // If the reset combination was pressed, handle it #ifdef USE_SDL1 // Strangely in FBCON left window is seen as left alt ?? @@ -1063,34 +1069,35 @@ int handle_msgpump() if (rEvent.key.keysym.scancode == 58 && rEvent.key.keysym.sym == SDLK_UNKNOWN) rEvent.key.keysym.sym = SDLK_CAPSLOCK; #endif - - if (rEvent.key.keysym.sym == SDLK_CAPSLOCK && rEvent.key.repeat == 0) - { - // Treat CAPSLOCK as a toggle. If on, set off and vice/versa - ioctl(0, KDGKBLED, &kbd_flags); - ioctl(0, KDGETLED, &kbd_led_status); - if (kbd_flags & 07 & LED_CAP) - { - // On, so turn off - kbd_led_status &= ~LED_CAP; - kbd_flags &= ~LED_CAP; - inputdevice_do_keyboard(AK_CAPSLOCK, 0); - } - else - { - // Off, so turn on - kbd_led_status |= LED_CAP; - kbd_flags |= LED_CAP; - inputdevice_do_keyboard(AK_CAPSLOCK, 1); - } - ioctl(0, KDSETLED, kbd_led_status); - ioctl(0, KDSKBLED, kbd_flags); - break; - } - - // Handle all other keys +#ifdef USE_SDL2 if (rEvent.key.repeat == 0) { +#endif + if (rEvent.key.keysym.sym == SDLK_CAPSLOCK) + { + // Treat CAPSLOCK as a toggle. If on, set off and vice/versa + ioctl(0, KDGKBLED, &kbd_flags); + ioctl(0, KDGETLED, &kbd_led_status); + if (kbd_flags & 07 & LED_CAP) + { + // On, so turn off + kbd_led_status &= ~LED_CAP; + kbd_flags &= ~LED_CAP; + inputdevice_do_keyboard(AK_CAPSLOCK, 0); + } + else + { + // Off, so turn on + kbd_led_status |= LED_CAP; + kbd_flags |= LED_CAP; + inputdevice_do_keyboard(AK_CAPSLOCK, 1); + } + ioctl(0, KDSETLED, kbd_led_status); + ioctl(0, KDSKBLED, kbd_flags); + break; + } + + // Handle all other keys #ifdef USE_SDL1 if (keyboard_type == KEYCODE_UNK) inputdevice_translatekeycode(0, rEvent.key.keysym.sym, 1); @@ -1098,12 +1105,14 @@ int handle_msgpump() inputdevice_translatekeycode(0, rEvent.key.keysym.scancode, 1); #elif USE_SDL2 inputdevice_translatekeycode(0, rEvent.key.keysym.scancode, 1); -#endif } +#endif break; case SDL_KEYUP: +#ifdef USE_SDL2 if (rEvent.key.repeat == 0) { +#endif #ifdef USE_SDL1 if (keyboard_type == KEYCODE_UNK) inputdevice_translatekeycode(0, rEvent.key.keysym.sym, 0); @@ -1111,8 +1120,8 @@ int handle_msgpump() inputdevice_translatekeycode(0, rEvent.key.keysym.scancode, 0); #elif USE_SDL2 inputdevice_translatekeycode(0, rEvent.key.keysym.scancode, 0); -#endif } +#endif break; case SDL_MOUSEBUTTONDOWN: @@ -1148,14 +1157,14 @@ int handle_msgpump() const auto x = rEvent.motion.xrel; const auto y = rEvent.motion.yrel; #if defined (ANDROIDSDL) - if(rEvent.motion.x == 0 && x > -4) - x = -4; - if(rEvent.motion.y == 0 && y > -4) - y = -4; - if(rEvent.motion.x == currprefs.gfx_size.width - 1 && x < 4) - x = 4; - if(rEvent.motion.y == currprefs.gfx_size.height - 1 && y < 4) - y = 4; + if (rEvent.motion.x == 0 && x > -4) + x = -4; + if (rEvent.motion.y == 0 && y > -4) + y = -4; + if (rEvent.motion.x == currprefs.gfx_size.width - 1 && x < 4) + x = 4; + if (rEvent.motion.y == currprefs.gfx_size.height - 1 && y < 4) + y = 4; #endif //ANDROIDSDL setmousestate(0, 0, x * mouseScale, 0); setmousestate(0, 1, y * mouseScale, 0); diff --git a/src/osdep/amiberry_gfx.cpp b/src/osdep/amiberry_gfx.cpp index 170dcd30..1b72baa4 100644 --- a/src/osdep/amiberry_gfx.cpp +++ b/src/osdep/amiberry_gfx.cpp @@ -359,6 +359,7 @@ int graphics_setup(void) void toggle_fullscreen() { +#ifdef USE_SDL2 Uint32 FullscreenFlag = SDL_WINDOW_FULLSCREEN; if (sdlWindow) { @@ -366,6 +367,7 @@ void toggle_fullscreen() SDL_SetWindowFullscreen(sdlWindow, is_fullscreen ? 0 : FullscreenFlag); SDL_ShowCursor(is_fullscreen); } +#endif } #ifdef USE_DISPMANX diff --git a/src/osdep/amiberry_rp9.cpp b/src/osdep/amiberry_rp9.cpp index 9878692d..e3986fe7 100644 --- a/src/osdep/amiberry_rp9.cpp +++ b/src/osdep/amiberry_rp9.cpp @@ -131,6 +131,7 @@ static void parse_compatibility(struct uae_prefs* p, xmlNode* node) { p->cachesize = MAX_JIT_CACHE; p->address_space_24 = false; + p->compfpu = true; } else if (strcmp(reinterpret_cast(content), "flexible-cpu-cycles") == 0) p->cpu_compatible = false; @@ -315,12 +316,18 @@ static void parse_peripheral(struct uae_prefs* p, xmlNode* node) } else if (strcmp(reinterpret_cast(content), "jit") == 0) { - const auto attr = xmlGetProp(curr_node, reinterpret_cast("memory")); + auto attr = xmlGetProp(curr_node, reinterpret_cast("memory")); if (attr != nullptr) { p->cachesize = atoi(reinterpret_cast(attr)) / 1024; xmlFree(attr); } + attr = xmlGetProp(curr_node, (const xmlChar *)_T("fpu")); + if (attr != NULL) + { + if (strcmp((const char *)attr, "false") == 0) + p->compfpu = false; + } } xmlFree(content); } diff --git a/src/osdep/arm_helper.s b/src/osdep/arm_helper.s index c7500ea3..d83ab7e3 100644 --- a/src/osdep/arm_helper.s +++ b/src/osdep/arm_helper.s @@ -2,6 +2,8 @@ .arm +.global save_host_fp_regs +.global restore_host_fp_regs .global copy_screen_8bit .global copy_screen_16bit_swap_arm .global copy_screen_32bit_to_16bit_arm @@ -10,6 +12,20 @@ .align 8 +@---------------------------------------------------------------- +@ save_host_fp_regs +@---------------------------------------------------------------- +save_host_fp_regs: + vstmia r0!, {d7-d15} + bx lr + +@---------------------------------------------------------------- +@ restore_host_fp_regs +@---------------------------------------------------------------- +restore_host_fp_regs: + vldmia r0!, {d7-d15} + bx lr + @---------------------------------------------------------------- @ copy_screen_8bit diff --git a/src/osdep/config.h b/src/osdep/config.h index 74e7eaa3..384c5aa3 100644 --- a/src/osdep/config.h +++ b/src/osdep/config.h @@ -6,50 +6,6 @@ * Copyright 1995 - 1998 Bernd Schmidt */ -/* - * Please note: Many things are configurable with command line parameters, - * and you can put anything you can pass on the command line into a - * configuration file ~/.uaerc. Please read the documentation for more - * information. - * - * NOTE NOTE NOTE - * Whenever you change something in this file, you have to "make clean" - * afterwards. - * Don't remove the '#' signs. If you want to enable something, move it out - * of the C comment block, if you want to disable something, move it inside - * the block. - */ - -/* - * When USE_COMPILER is defined, a m68k->i386 instruction compiler will be - * used. This is experimental. It has only been tested on a Linux/i386 ELF - * machine, although it might work on other i386 Unices. - * This is supposed to speed up application programs. It will not work very - * well for hardware bangers like games and demos, in fact it will be much - * slower. It can also be slower for some applications and/or benchmarks. - * It needs a lot of tuning. Please let me know your results with this. - * The second define, RELY_ON_LOADSEG_DETECTION, decides how the compiler - * tries to detect self-modifying code. If it is not set, the first bytes - * of every compiled routine are used as checksum before executing the - * routine. If it is set, the UAE filesystem will perform some checks to - * detect whether an executable is being loaded. This is less reliable - * (it won't work if you don't use the harddisk emulation, so don't try to - * use floppies or even the RAM disk), but much faster. - * - * @@@ NOTE: This option is unfortunately broken in this version. Don't - * try to use it. @@@ - * -#define USE_COMPILER -#define RELY_ON_LOADSEG_DETECTION - */ - -/*************************************************************************** - * Operating system/machine specific options - * Configure these for your CPU. The default settings should work on any - * machine, but may not give optimal performance everywhere. - * (These don't do very much yet, except HAVE_RDTSC - */ - /* * [pismy] defines virtual keys * Still hard-coded but can be easily changed by recompiling the project... diff --git a/src/osdep/gui/Navigation.cpp b/src/osdep/gui/Navigation.cpp index 32149376..facdc746 100644 --- a/src/osdep/gui/Navigation.cpp +++ b/src/osdep/gui/Navigation.cpp @@ -107,15 +107,15 @@ static NavigationMap navMap[] = { "68020", "CPU and FPU", "68882", "68010", "68030" }, { "68030", "CPU and FPU", "CPU internal", "68020", "68040" }, { "68040", "CPU and FPU", "FPUstrict", "68030", "CPU24Bit" }, -{ "CPU24Bit", "CPU and FPU", "SoftFloat", "68040", "CPUComp" }, -{ "CPUComp", "CPU and FPU", "SoftFloat", "CPU24Bit", "JIT" }, -{ "JIT", "CPU and FPU", "SoftFloat", "CPUComp", "68000" }, -{ "FPUnone", "68000", "7 Mhz", "SoftFloat", "68881" }, +{ "CPU24Bit", "CPU and FPU", "FPUJIT", "68040", "CPUComp" }, +{ "CPUComp", "CPU and FPU", "FPUJIT", "CPU24Bit", "JIT" }, +{ "JIT", "CPU and FPU", "FPUJIT", "CPUComp", "68000" }, +{ "FPUnone", "68000", "7 Mhz", "FPUJIT", "68881" }, { "68881", "68010", "14 Mhz", "FPUnone", "68882" }, { "68882", "68020", "25 Mhz", "68881", "CPU internal" }, { "CPU internal", "68030", "Fastest", "68882", "FPUstrict" }, -{ "FPUstrict", "68040", "Fastest", "CPU internal", "SoftFloat" }, -{ "SoftFloat", "CPU24Bit", "Fastest", "FPUstrict", "FPUnone" }, +{ "FPUstrict", "68040", "Fastest", "CPU internal", "FPUJIT" }, +{ "FPUJIT", "CPU24Bit", "Fastest", "FPUstrict", "FPUnone" }, { "7 Mhz", "FPUnone", "CPU and FPU", "Fastest", "14 Mhz" }, { "14 Mhz", "68881", "CPU and FPU", "7 Mhz", "25 Mhz" }, { "25 Mhz", "68882", "CPU and FPU", "14 Mhz", "Fastest" }, diff --git a/src/osdep/gui/PanelCPU.cpp b/src/osdep/gui/PanelCPU.cpp index 331eefaf..49e848af 100644 --- a/src/osdep/gui/PanelCPU.cpp +++ b/src/osdep/gui/PanelCPU.cpp @@ -38,7 +38,7 @@ static gcn::UaeRadioButton* optFPU68881; static gcn::UaeRadioButton* optFPU68882; static gcn::UaeRadioButton* optFPUinternal; static gcn::UaeCheckBox* chkFPUstrict; -static gcn::UaeCheckBox* chkSoftFloat; +static gcn::UaeCheckBox* chkFPUJIT; static gcn::Window* grpCPUSpeed; static gcn::UaeRadioButton* opt7Mhz; static gcn::UaeRadioButton* opt14Mhz; @@ -186,14 +186,23 @@ class JITActionListener : public gcn::ActionListener public: void action(const gcn::ActionEvent& actionEvent) override { - if (chkJIT->isSelected()) + if (actionEvent.getSource() == chkJIT) { - changed_prefs.cpu_compatible = false; - changed_prefs.cachesize = MAX_JIT_CACHE; + if (chkJIT->isSelected()) + { + changed_prefs.cpu_compatible = 0; + changed_prefs.cachesize = MAX_JIT_CACHE; + changed_prefs.compfpu = true; + } + else + { + changed_prefs.cachesize = 0; + changed_prefs.compfpu = false; + } } - else + else if (actionEvent.getSource() == chkFPUJIT) { - changed_prefs.cachesize = 0; + changed_prefs.compfpu = chkFPUJIT->isSelected(); } RefreshPanelCPU(); } @@ -209,10 +218,6 @@ public: if (actionEvent.getSource() == chkFPUstrict) { changed_prefs.fpu_strict = chkFPUstrict->isSelected(); - } - else if (actionEvent.getSource() == chkSoftFloat) { - changed_prefs.fpu_softfloat = chkSoftFloat->isSelected(); - } RefreshPanelCPU(); } @@ -285,9 +290,9 @@ void InitPanelCPU(const struct _ConfigCategory& category) chkFPUstrict->setId("FPUstrict"); chkFPUstrict->addActionListener(fpuActionListener); - chkSoftFloat = new gcn::UaeCheckBox("Softfloat FPU emul.", true); - chkSoftFloat->setId("SoftFloat"); - chkSoftFloat->addActionListener(fpuActionListener); + chkFPUJIT = new gcn::UaeCheckBox("FPU JIT", true); + chkFPUJIT->setId("FPUJIT"); + chkFPUJIT->addActionListener(jitActionListener); grpFPU = new gcn::Window("FPU"); grpFPU->setPosition(DISTANCE_BORDER + grpCPU->getWidth() + DISTANCE_NEXT_X, DISTANCE_BORDER); @@ -296,7 +301,7 @@ void InitPanelCPU(const struct _ConfigCategory& category) grpFPU->add(optFPU68882, 5, 70); grpFPU->add(optFPUinternal, 5, 100); grpFPU->add(chkFPUstrict, 5, 140); - grpFPU->add(chkSoftFloat, 5, 170); + grpFPU->add(chkFPUJIT, 5, 170); grpFPU->setMovable(false); grpFPU->setSize(185, 215); grpFPU->setBaseColor(gui_baseCol); @@ -358,7 +363,7 @@ void ExitPanelCPU() delete optFPU68882; delete optFPUinternal; delete chkFPUstrict; - delete chkSoftFloat; + delete chkFPUJIT; delete grpFPU; delete fpuButtonActionListener; delete fpuActionListener; @@ -413,7 +418,8 @@ void RefreshPanelCPU() optFPUinternal->setEnabled(changed_prefs.cpu_model == 68040); chkFPUstrict->setSelected(changed_prefs.fpu_strict); - chkSoftFloat->setSelected(changed_prefs.fpu_softfloat); + chkFPUJIT->setSelected(changed_prefs.compfpu); + chkFPUJIT->setEnabled(changed_prefs.cachesize > 0); if (changed_prefs.m68k_speed == M68K_SPEED_7MHZ_CYCLES) opt7Mhz->setSelected(true); @@ -440,8 +446,6 @@ bool HelpPanelCPU(std::vector &helptext) helptext.emplace_back(""); helptext.emplace_back("The available FPU models depending on the selected CPU."); helptext.emplace_back("The option \"More compatible\" activates more accurate rounding and compare of two floats."); - helptext.emplace_back("\"Softfloat FPU emul.\" aktivates the FPU emulation from QEMU. This is more accurate,"); - helptext.emplace_back("but a bit slower."); helptext.emplace_back(""); helptext.emplace_back("With \"CPU Speed\" you can choose the clock rate of the Amiga."); helptext.emplace_back("Use 7MHz for A500 games or 14MHz for A1200 ones. Fastest uses more emulation time"); diff --git a/src/osdep/neon_helper.s b/src/osdep/neon_helper.s index 0a82e752..727e31e4 100644 --- a/src/osdep/neon_helper.s +++ b/src/osdep/neon_helper.s @@ -2,6 +2,8 @@ .arm +.global save_host_fp_regs +.global restore_host_fp_regs .global copy_screen_8bit .global copy_screen_16bit_swap .global copy_screen_32bit_to_16bit_neon @@ -16,6 +18,20 @@ .align 8 +@---------------------------------------------------------------- +@ save_host_fp_regs +@---------------------------------------------------------------- +save_host_fp_regs: + vstmia r0!, {d7-d15} + bx lr + +@---------------------------------------------------------------- +@ restore_host_fp_regs +@---------------------------------------------------------------- +restore_host_fp_regs: + vldmia r0!, {d7-d15} + bx lr + @---------------------------------------------------------------- @ copy_screen_8bit diff --git a/src/osdep/picasso96.cpp b/src/osdep/picasso96.cpp index ff2178ed..0672163a 100644 --- a/src/osdep/picasso96.cpp +++ b/src/osdep/picasso96.cpp @@ -1230,7 +1230,6 @@ static uae_u32 REGPARAM2 picasso_SetSpriteColor (TrapContext *ctx) return 0; } - /* SetSpriteImage: Synopsis: SetSpriteImage(bi, RGBFormat); @@ -1942,7 +1941,7 @@ static void init_picasso_screen(void) * This function is called whenever another ModeInfo has to be set. This * function simply sets up the CRTC and TS registers to generate the * timing used for that screen mode. You should not set the DAC, clocks - * or linear start adress. They will be set when appropriate by their + * or linear start address. They will be set when appropriate by their * own functions. */ static uae_u32 REGPARAM2 picasso_SetGC(TrapContext *ctx) @@ -2103,6 +2102,7 @@ static uae_u32 REGPARAM2 picasso_InvertRect(TrapContext *ctx) if (NOBLITTER) return 0; + if (CopyRenderInfoStructureA2U(ctx, renderinfo, &ri)) { P96TRACE((_T("InvertRect %dbpp 0x%lx\n"), Bpp, (long)mask)); @@ -2451,6 +2451,7 @@ static uae_u32 REGPARAM2 picasso_BlitPattern(TrapContext *ctx) if (NOBLITTER) return 0; + if (CopyRenderInfoStructureA2U(ctx, rinf, &ri) && CopyPatternStructureA2U(ctx, pinf, &pattern)) { if (!validatecoords(ctx, &ri, &X, &Y, &W, &H)) return 0; @@ -2497,6 +2498,7 @@ static uae_u32 REGPARAM2 picasso_BlitPattern(TrapContext *ctx) unsigned long cols; d = do_get_mem_word(((uae_u16 *)pattern.Memory) + prow); + if (xshift != 0) d = (d << xshift) | (d >> (16 - xshift)); @@ -3069,6 +3071,7 @@ static uae_u32 REGPARAM2 picasso_BlitPlanar2Direct(TrapContext *ctx) if (NOBLITTER) return 0; + if (minterm != 0x0C) { write_log(_T("WARNING - BlitPlanar2Direct() has unhandled op-code 0x%x. Using fall-back routine.\n"), minterm); return 0; diff --git a/src/osdep/sysconfig.h b/src/osdep/sysconfig.h index 0834f4b3..2735c6ff 100644 --- a/src/osdep/sysconfig.h +++ b/src/osdep/sysconfig.h @@ -15,7 +15,7 @@ #define UAE_FILESYS_THREADS #define AUTOCONFIG /* autoconfig support, fast ram, harddrives etc.. */ #define JIT /* JIT compiler support */ -/* #define USE_JIT_FPU */ +#define USE_JIT_FPU /* #define NATMEM_OFFSET natmem_offset */ /* #define CATWEASEL */ /* Catweasel MK2/3 support */ /* #define AHI */ /* AHI sound emulation */ diff --git a/src/osdep/target.h b/src/osdep/target.h index 3277e5df..05718903 100644 --- a/src/osdep/target.h +++ b/src/osdep/target.h @@ -145,3 +145,14 @@ STATIC_INLINE void atomic_set(volatile uae_atomic *p, uae_u32 v) { __sync_lock_test_and_set(p, v); } + +#ifdef USE_JIT_FPU +#ifdef __cplusplus +extern "C" { +#endif + void save_host_fp_regs(void* buf); + void restore_host_fp_regs(void* buf); +#ifdef __cplusplus +} +#endif +#endif \ No newline at end of file diff --git a/src/softfloat/softfloat-macros.h b/src/softfloat/softfloat-macros.h deleted file mode 100644 index aa6665e1..00000000 --- a/src/softfloat/softfloat-macros.h +++ /dev/null @@ -1,793 +0,0 @@ -/* - * QEMU float support macros - * - * The code in this source file is derived from release 2a of the SoftFloat - * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and - * some later contributions) are provided under that license, as detailed below. - * It has subsequently been modified by contributors to the QEMU Project, - * so some portions are provided under: - * the SoftFloat-2a license - * the BSD license - * GPL-v2-or-later - * - * Any future contributions to this file after December 1st 2014 will be - * taken to be licensed under the Softfloat-2a license unless specifically - * indicated otherwise. - */ - -/* -=============================================================================== -This C source fragment is part of the SoftFloat IEC/IEEE Floating-point -Arithmetic Package, Release 2a. - -Written by John R. Hauser. This work was made possible in part by the -International Computer Science Institute, located at Suite 600, 1947 Center -Street, Berkeley, California 94704. Funding was partially provided by the -National Science Foundation under grant MIP-9311980. The original version -of this code was written as part of a project to build a fixed-point vector -processor in collaboration with the University of California at Berkeley, -overseen by Profs. Nelson Morgan and John Wawrzynek. More information -is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ -arithmetic/SoftFloat.html'. - -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort -has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT -TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO -PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY -AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. - -Derivative works are acceptable, even for commercial purposes, so long as -(1) they include prominent notice that the work is derivative, and (2) they -include prominent notice akin to these four paragraphs for those parts of -this code that are retained. - -=============================================================================== -*/ - -/* BSD licensing: - * Copyright (c) 2006, Fabrice Bellard - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software without - * specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Portions of this work are licensed under the terms of the GNU GPL, - * version 2 or later. See the COPYING file in the top-level directory. - */ - -/*---------------------------------------------------------------------------- -| This macro tests for minimum version of the GNU C compiler. -*----------------------------------------------------------------------------*/ -#if defined(__GNUC__) && defined(__GNUC_MINOR__) -# define SOFTFLOAT_GNUC_PREREQ(maj, min) \ - ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) -#else -# define SOFTFLOAT_GNUC_PREREQ(maj, min) 0 -#endif - - -/*---------------------------------------------------------------------------- -| Shifts `a' right by the number of bits given in `count'. If any nonzero -| bits are shifted off, they are ``jammed'' into the least significant bit of -| the result by setting the least significant bit to 1. The value of `count' -| can be arbitrarily large; in particular, if `count' is greater than 32, the -| result will be either 0 or 1, depending on whether `a' is zero or nonzero. -| The result is stored in the location pointed to by `zPtr'. -*----------------------------------------------------------------------------*/ - -static inline void shift32RightJamming(uint32_t a, int count, uint32_t *zPtr) -{ - uint32_t z; - - if ( count == 0 ) { - z = a; - } - else if ( count < 32 ) { - z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); - } - else { - z = ( a != 0 ); - } - *zPtr = z; - -} - -/*---------------------------------------------------------------------------- -| Shifts `a' right by the number of bits given in `count'. If any nonzero -| bits are shifted off, they are ``jammed'' into the least significant bit of -| the result by setting the least significant bit to 1. The value of `count' -| can be arbitrarily large; in particular, if `count' is greater than 64, the -| result will be either 0 or 1, depending on whether `a' is zero or nonzero. -| The result is stored in the location pointed to by `zPtr'. -*----------------------------------------------------------------------------*/ - -static inline void shift64RightJamming(uint64_t a, int count, uint64_t *zPtr) -{ - uint64_t z; - - if ( count == 0 ) { - z = a; - } - else if ( count < 64 ) { - z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); - } - else { - z = ( a != 0 ); - } - *zPtr = z; - -} - -/*---------------------------------------------------------------------------- -| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 -| _plus_ the number of bits given in `count'. The shifted result is at most -| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The -| bits shifted off form a second 64-bit result as follows: The _last_ bit -| shifted off is the most-significant bit of the extra result, and the other -| 63 bits of the extra result are all zero if and only if _all_but_the_last_ -| bits shifted off were all zero. This extra result is stored in the location -| pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. -| (This routine makes more sense if `a0' and `a1' are considered to form a -| fixed-point value with binary point between `a0' and `a1'. This fixed-point -| value is shifted right by the number of bits given in `count', and the -| integer part of the result is returned at the location pointed to by -| `z0Ptr'. The fractional part of the result may be slightly corrupted as -| described above, and is returned at the location pointed to by `z1Ptr'.) -*----------------------------------------------------------------------------*/ - -static inline void - shift64ExtraRightJamming( - uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) -{ - uint64_t z0, z1; - int8_t negCount = ( - count ) & 63; - - if ( count == 0 ) { - z1 = a1; - z0 = a0; - } - else if ( count < 64 ) { - z1 = ( a0<>count; - } - else { - if ( count == 64 ) { - z1 = a0 | ( a1 != 0 ); - } - else { - z1 = ( ( a0 | a1 ) != 0 ); - } - z0 = 0; - } - *z1Ptr = z1; - *z0Ptr = z0; - -} - -/*---------------------------------------------------------------------------- -| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the -| number of bits given in `count'. Any bits shifted off are lost. The value -| of `count' can be arbitrarily large; in particular, if `count' is greater -| than 128, the result will be 0. The result is broken into two 64-bit pieces -| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. -*----------------------------------------------------------------------------*/ - -static inline void - shift128Right( - uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) -{ - uint64_t z0, z1; - int8_t negCount = ( - count ) & 63; - - if ( count == 0 ) { - z1 = a1; - z0 = a0; - } - else if ( count < 64 ) { - z1 = ( a0<>count ); - z0 = a0>>count; - } - else { - z1 = (count < 128) ? (a0 >> (count & 63)) : 0; - z0 = 0; - } - *z1Ptr = z1; - *z0Ptr = z0; - -} - -/*---------------------------------------------------------------------------- -| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the -| number of bits given in `count'. If any nonzero bits are shifted off, they -| are ``jammed'' into the least significant bit of the result by setting the -| least significant bit to 1. The value of `count' can be arbitrarily large; -| in particular, if `count' is greater than 128, the result will be either -| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or -| nonzero. The result is broken into two 64-bit pieces which are stored at -| the locations pointed to by `z0Ptr' and `z1Ptr'. -*----------------------------------------------------------------------------*/ - -static inline void - shift128RightJamming( - uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) -{ - uint64_t z0, z1; - int8_t negCount = ( - count ) & 63; - - if ( count == 0 ) { - z1 = a1; - z0 = a0; - } - else if ( count < 64 ) { - z1 = ( a0<>count ) | ( ( a1<>count; - } - else { - if ( count == 64 ) { - z1 = a0 | ( a1 != 0 ); - } - else if ( count < 128 ) { - z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<>count ); - z0 = a0>>count; - } - else { - if ( count == 64 ) { - z2 = a1; - z1 = a0; - } - else { - a2 |= a1; - if ( count < 128 ) { - z2 = a0<>( count & 63 ); - } - else { - z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); - z1 = 0; - } - } - z0 = 0; - } - z2 |= ( a2 != 0 ); - } - *z2Ptr = z2; - *z1Ptr = z1; - *z0Ptr = z0; - -} - -/*---------------------------------------------------------------------------- -| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the -| number of bits given in `count'. Any bits shifted off are lost. The value -| of `count' must be less than 64. The result is broken into two 64-bit -| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. -*----------------------------------------------------------------------------*/ - -static inline void - shortShift128Left( - uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) -{ - - *z1Ptr = a1<>( ( - count ) & 63 ) ); - -} - -/*---------------------------------------------------------------------------- -| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left -| by the number of bits given in `count'. Any bits shifted off are lost. -| The value of `count' must be less than 64. The result is broken into three -| 64-bit pieces which are stored at the locations pointed to by `z0Ptr', -| `z1Ptr', and `z2Ptr'. -*----------------------------------------------------------------------------*/ - -static inline void - shortShift192Left( - uint64_t a0, - uint64_t a1, - uint64_t a2, - int count, - uint64_t *z0Ptr, - uint64_t *z1Ptr, - uint64_t *z2Ptr - ) -{ - uint64_t z0, z1, z2; - int8_t negCount; - - z2 = a2<>negCount; - z0 |= a1>>negCount; - } - *z2Ptr = z2; - *z1Ptr = z1; - *z0Ptr = z0; - -} - -/*---------------------------------------------------------------------------- -| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit -| value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so -| any carry out is lost. The result is broken into two 64-bit pieces which -| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. -*----------------------------------------------------------------------------*/ - -static inline void - add128( - uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr ) -{ - uint64_t z1; - - z1 = a1 + b1; - *z1Ptr = z1; - *z0Ptr = a0 + b0 + ( z1 < a1 ); - -} - -/*---------------------------------------------------------------------------- -| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the -| 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is -| modulo 2^192, so any carry out is lost. The result is broken into three -| 64-bit pieces which are stored at the locations pointed to by `z0Ptr', -| `z1Ptr', and `z2Ptr'. -*----------------------------------------------------------------------------*/ - -static inline void - add192( - uint64_t a0, - uint64_t a1, - uint64_t a2, - uint64_t b0, - uint64_t b1, - uint64_t b2, - uint64_t *z0Ptr, - uint64_t *z1Ptr, - uint64_t *z2Ptr - ) -{ - uint64_t z0, z1, z2; - uint8_t carry0, carry1; - - z2 = a2 + b2; - carry1 = ( z2 < a2 ); - z1 = a1 + b1; - carry0 = ( z1 < a1 ); - z0 = a0 + b0; - z1 += carry1; - z0 += ( z1 < carry1 ); - z0 += carry0; - *z2Ptr = z2; - *z1Ptr = z1; - *z0Ptr = z0; - -} - -/*---------------------------------------------------------------------------- -| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the -| 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo -| 2^128, so any borrow out (carry out) is lost. The result is broken into two -| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and -| `z1Ptr'. -*----------------------------------------------------------------------------*/ - -static inline void - sub128( - uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr ) -{ - - *z1Ptr = a1 - b1; - *z0Ptr = a0 - b0 - ( a1 < b1 ); - -} - -/*---------------------------------------------------------------------------- -| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' -| from the 192-bit value formed by concatenating `a0', `a1', and `a2'. -| Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The -| result is broken into three 64-bit pieces which are stored at the locations -| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. -*----------------------------------------------------------------------------*/ - -static inline void - sub192( - uint64_t a0, - uint64_t a1, - uint64_t a2, - uint64_t b0, - uint64_t b1, - uint64_t b2, - uint64_t *z0Ptr, - uint64_t *z1Ptr, - uint64_t *z2Ptr - ) -{ - uint64_t z0, z1, z2; - uint8_t borrow0, borrow1; - - z2 = a2 - b2; - borrow1 = ( a2 < b2 ); - z1 = a1 - b1; - borrow0 = ( a1 < b1 ); - z0 = a0 - b0; - z0 -= ( z1 < borrow1 ); - z1 -= borrow1; - z0 -= borrow0; - *z2Ptr = z2; - *z1Ptr = z1; - *z0Ptr = z0; - -} - -/*---------------------------------------------------------------------------- -| Multiplies `a' by `b' to obtain a 128-bit product. The product is broken -| into two 64-bit pieces which are stored at the locations pointed to by -| `z0Ptr' and `z1Ptr'. -*----------------------------------------------------------------------------*/ - -static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr ) -{ - uint32_t aHigh, aLow, bHigh, bLow; - uint64_t z0, zMiddleA, zMiddleB, z1; - - aLow = a; - aHigh = a>>32; - bLow = b; - bHigh = b>>32; - z1 = ( (uint64_t) aLow ) * bLow; - zMiddleA = ( (uint64_t) aLow ) * bHigh; - zMiddleB = ( (uint64_t) aHigh ) * bLow; - z0 = ( (uint64_t) aHigh ) * bHigh; - zMiddleA += zMiddleB; - z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); - zMiddleA <<= 32; - z1 += zMiddleA; - z0 += ( z1 < zMiddleA ); - *z1Ptr = z1; - *z0Ptr = z0; - -} - -/*---------------------------------------------------------------------------- -| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by -| `b' to obtain a 192-bit product. The product is broken into three 64-bit -| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and -| `z2Ptr'. -*----------------------------------------------------------------------------*/ - -static inline void - mul128By64To192( - uint64_t a0, - uint64_t a1, - uint64_t b, - uint64_t *z0Ptr, - uint64_t *z1Ptr, - uint64_t *z2Ptr - ) -{ - uint64_t z0, z1, z2, more1; - - mul64To128( a1, b, &z1, &z2 ); - mul64To128( a0, b, &z0, &more1 ); - add128( z0, more1, 0, z1, &z0, &z1 ); - *z2Ptr = z2; - *z1Ptr = z1; - *z0Ptr = z0; - -} - -/*---------------------------------------------------------------------------- -| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the -| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit -| product. The product is broken into four 64-bit pieces which are stored at -| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. -*----------------------------------------------------------------------------*/ - -static inline void - mul128To256( - uint64_t a0, - uint64_t a1, - uint64_t b0, - uint64_t b1, - uint64_t *z0Ptr, - uint64_t *z1Ptr, - uint64_t *z2Ptr, - uint64_t *z3Ptr - ) -{ - uint64_t z0, z1, z2, z3; - uint64_t more1, more2; - - mul64To128( a1, b1, &z2, &z3 ); - mul64To128( a1, b0, &z1, &more2 ); - add128( z1, more2, 0, z2, &z1, &z2 ); - mul64To128( a0, b0, &z0, &more1 ); - add128( z0, more1, 0, z1, &z0, &z1 ); - mul64To128( a0, b1, &more1, &more2 ); - add128( more1, more2, 0, z2, &more1, &z2 ); - add128( z0, z1, 0, more1, &z0, &z1 ); - *z3Ptr = z3; - *z2Ptr = z2; - *z1Ptr = z1; - *z0Ptr = z0; - -} - -/*---------------------------------------------------------------------------- -| Returns an approximation to the 64-bit integer quotient obtained by dividing -| `b' into the 128-bit value formed by concatenating `a0' and `a1'. The -| divisor `b' must be at least 2^63. If q is the exact quotient truncated -| toward zero, the approximation returned lies between q and q + 2 inclusive. -| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit -| unsigned integer is returned. -*----------------------------------------------------------------------------*/ - -static uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b ) -{ - uint64_t b0, b1; - uint64_t rem0, rem1, term0, term1; - uint64_t z; - - if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); - b0 = b>>32; - z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32; - mul64To128( b, z, &term0, &term1 ); - sub128( a0, a1, term0, term1, &rem0, &rem1 ); - while ( ( (int64_t) rem0 ) < 0 ) { - z -= LIT64( 0x100000000 ); - b1 = b<<32; - add128( rem0, rem1, b0, b1, &rem0, &rem1 ); - } - rem0 = ( rem0<<32 ) | ( rem1>>32 ); - z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns an approximation to the square root of the 32-bit significand given -| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of -| `aExp' (the least significant bit) is 1, the integer returned approximates -| 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' -| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either -| case, the approximation returned lies strictly within +/-2 of the exact -| value. -*----------------------------------------------------------------------------*/ - -static uint32_t estimateSqrt32(int aExp, uint32_t a) -{ - static const uint16_t sqrtOddAdjustments[] = { - 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, - 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 - }; - static const uint16_t sqrtEvenAdjustments[] = { - 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, - 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 - }; - int8_t index; - uint32_t z; - - index = ( a>>27 ) & 15; - if ( aExp & 1 ) { - z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ]; - z = ( ( a / z )<<14 ) + ( z<<15 ); - a >>= 1; - } - else { - z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ]; - z = a / z + z; - z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); - if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 ); - } - return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 ); - -} - -/*---------------------------------------------------------------------------- -| Returns the number of leading 0 bits before the most-significant 1 bit of -| `a'. If `a' is zero, 32 is returned. -*----------------------------------------------------------------------------*/ - -static inline int8_t countLeadingZeros32( uint32_t a ) -{ -#if SOFTFLOAT_GNUC_PREREQ(3, 4) - if (a) { - return __builtin_clz(a); - } else { - return 32; - } -#else - static const int8_t countLeadingZerosHigh[] = { - 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; - int8_t shiftCount; - - shiftCount = 0; - if ( a < 0x10000 ) { - shiftCount += 16; - a <<= 16; - } - if ( a < 0x1000000 ) { - shiftCount += 8; - a <<= 8; - } - shiftCount += countLeadingZerosHigh[ a>>24 ]; - return shiftCount; -#endif -} - -/*---------------------------------------------------------------------------- -| Returns the number of leading 0 bits before the most-significant 1 bit of -| `a'. If `a' is zero, 64 is returned. -*----------------------------------------------------------------------------*/ - -static inline int8_t countLeadingZeros64( uint64_t a ) -{ -#if SOFTFLOAT_GNUC_PREREQ(3, 4) - if (a) { - return __builtin_clzll(a); - } else { - return 64; - } -#else - int8_t shiftCount; - - shiftCount = 0; - if ( a < ( (uint64_t) 1 )<<32 ) { - shiftCount += 32; - } - else { - a >>= 32; - } - shiftCount += countLeadingZeros32( a ); - return shiftCount; -#endif -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' -| is equal to the 128-bit value formed by concatenating `b0' and `b1'. -| Otherwise, returns 0. -*----------------------------------------------------------------------------*/ - -static inline flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) -{ - - return ( a0 == b0 ) && ( a1 == b1 ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less -| than or equal to the 128-bit value formed by concatenating `b0' and `b1'. -| Otherwise, returns 0. -*----------------------------------------------------------------------------*/ - -static inline flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) -{ - - return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less -| than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, -| returns 0. -*----------------------------------------------------------------------------*/ - -static inline flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) -{ - - return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is -| not equal to the 128-bit value formed by concatenating `b0' and `b1'. -| Otherwise, returns 0. -*----------------------------------------------------------------------------*/ - -static inline flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) -{ - - return ( a0 != b0 ) || ( a1 != b1 ); - -} diff --git a/src/softfloat/softfloat-specialize.h b/src/softfloat/softfloat-specialize.h deleted file mode 100644 index 003953fd..00000000 --- a/src/softfloat/softfloat-specialize.h +++ /dev/null @@ -1,443 +0,0 @@ -/* - * QEMU float support - * - * The code in this source file is derived from release 2a of the SoftFloat - * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and - * some later contributions) are provided under that license, as detailed below. - * It has subsequently been modified by contributors to the QEMU Project, - * so some portions are provided under: - * the SoftFloat-2a license - * the BSD license - * GPL-v2-or-later - * - * Any future contributions to this file after December 1st 2014 will be - * taken to be licensed under the Softfloat-2a license unless specifically - * indicated otherwise. - */ - -/* -=============================================================================== -This C source fragment is part of the SoftFloat IEC/IEEE Floating-point -Arithmetic Package, Release 2a. - -Written by John R. Hauser. This work was made possible in part by the -International Computer Science Institute, located at Suite 600, 1947 Center -Street, Berkeley, California 94704. Funding was partially provided by the -National Science Foundation under grant MIP-9311980. The original version -of this code was written as part of a project to build a fixed-point vector -processor in collaboration with the University of California at Berkeley, -overseen by Profs. Nelson Morgan and John Wawrzynek. More information -is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ -arithmetic/SoftFloat.html'. - -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort -has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT -TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO -PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY -AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. - -Derivative works are acceptable, even for commercial purposes, so long as -(1) they include prominent notice that the work is derivative, and (2) they -include prominent notice akin to these four paragraphs for those parts of -this code that are retained. - -=============================================================================== -*/ - -/* BSD licensing: - * Copyright (c) 2006, Fabrice Bellard - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software without - * specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Portions of this work are licensed under the terms of the GNU GPL, - * version 2 or later. See the COPYING file in the top-level directory. - */ - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is a -| NaN; otherwise returns 0. -*----------------------------------------------------------------------------*/ - -static inline flag floatx80_is_nan( floatx80 a ) -{ - - return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (uint64_t) ( a.low<<1 ); - -} - -/*---------------------------------------------------------------------------- -| The pattern for a default generated extended double-precision NaN. -*----------------------------------------------------------------------------*/ -static inline floatx80 floatx80_default_nan(float_status *status) -{ - floatx80 r; - r.high = 0x7FFF; - r.low = LIT64( 0xFFFFFFFFFFFFFFFF ); - return r; -} - -/*---------------------------------------------------------------------------- -| Raises the exceptions specified by `flags'. Floating-point traps can be -| defined here if desired. It is currently not possible for such a trap -| to substitute a result value. If traps are not implemented, this routine -| should be simply `float_exception_flags |= flags;'. -*----------------------------------------------------------------------------*/ - -static inline void float_raise(uint8_t flags, float_status *status) -{ - status->float_exception_flags |= flags; -} - -/*---------------------------------------------------------------------------- -| Internal canonical NaN format. -*----------------------------------------------------------------------------*/ -typedef struct { - flag sign; - uint64_t high, low; -} commonNaNT; - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is a NaN; -| otherwise returns 0. -*----------------------------------------------------------------------------*/ - -static inline flag float32_is_nan( float32 a ) -{ - - return ( 0xFF000000 < (uint32_t) ( a<<1 ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is a signaling -| NaN; otherwise returns 0. -*----------------------------------------------------------------------------*/ - -static inline flag float32_is_signaling_nan( float32 a ) -{ - - return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point NaN -| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid -| exception is raised. -*----------------------------------------------------------------------------*/ - -static inline commonNaNT float32ToCommonNaN( float32 a, float_status *status ) -{ - commonNaNT z; - - if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_signaling, status ); - z.sign = a>>31; - z.low = 0; - z.high = ( (uint64_t) a )<<41; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the canonical NaN `a' to the single- -| precision floating-point format. -*----------------------------------------------------------------------------*/ - -static inline float32 commonNaNToFloat32( commonNaNT a ) -{ - - return ( ( (uint32_t) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 ); - -} - -/*---------------------------------------------------------------------------- -| Takes two single-precision floating-point values `a' and `b', one of which -| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a -| signaling NaN, the invalid exception is raised. -*----------------------------------------------------------------------------*/ - -static inline float32 propagateFloat32NaN( float32 a, float32 b, float_status *status ) -{ - flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; - - aIsNaN = float32_is_nan( a ); - aIsSignalingNaN = float32_is_signaling_nan( a ); - bIsNaN = float32_is_nan( b ); - bIsSignalingNaN = float32_is_signaling_nan( b ); - a |= 0x00400000; - b |= 0x00400000; - if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_signaling, status ); - if ( aIsNaN ) { - return ( aIsSignalingNaN & bIsNaN ) ? b : a; - } - else { - return b; - } - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is a NaN; -| otherwise returns 0. -*----------------------------------------------------------------------------*/ - -static inline flag float64_is_nan( float64 a ) -{ - - return ( LIT64( 0xFFE0000000000000 ) < (uint64_t) ( a<<1 ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is a signaling -| NaN; otherwise returns 0. -*----------------------------------------------------------------------------*/ - -static inline flag float64_is_signaling_nan( float64 a ) -{ - - return - ( ( ( a>>51 ) & 0xFFF ) == 0xFFE ) - && ( a & LIT64( 0x0007FFFFFFFFFFFF ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point NaN -| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid -| exception is raised. -*----------------------------------------------------------------------------*/ - -static inline commonNaNT float64ToCommonNaN(float64 a, float_status *status) -{ - commonNaNT z; - - if (float64_is_signaling_nan(a)) { - float_raise(float_flag_invalid, status); - } - z.sign = float64_val(a) >> 63; - z.low = 0; - z.high = float64_val(a) << 12; - return z; -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the canonical NaN `a' to the double- -| precision floating-point format. -*----------------------------------------------------------------------------*/ - -static inline float64 commonNaNToFloat64(commonNaNT a, float_status *status) -{ - return - ( ( (uint64_t) a.sign )<<63 ) - | LIT64( 0x7FF8000000000000 ) - | ( a.high>>12 ); -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is a -| signaling NaN; otherwise returns 0. -*----------------------------------------------------------------------------*/ - -static inline flag floatx80_is_signaling_nan( floatx80 a ) -{ - uint64_t aLow; - - aLow = a.low & ~ LIT64( 0x4000000000000000 ); - return - ( ( a.high & 0x7FFF ) == 0x7FFF ) - && (uint64_t) ( aLow<<1 ) - && ( a.low == aLow ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the -| invalid exception is raised. -*----------------------------------------------------------------------------*/ - -static inline commonNaNT floatx80ToCommonNaN( floatx80 a, float_status *status ) -{ - commonNaNT z; - - if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_signaling, status ); - z.sign = a.high>>15; - z.low = 0; - z.high = a.low<<1; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the canonical NaN `a' to the extended -| double-precision floating-point format. -*----------------------------------------------------------------------------*/ - -static inline floatx80 commonNaNToFloatx80(commonNaNT a, float_status *status) -{ - floatx80 z; -#ifdef SOFTFLOAT_68K - z.low = LIT64( 0x4000000000000000 ) | ( a.high>>1 ); -#else - z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 ); -#endif - z.high = ( ( (int16_t) a.sign )<<15 ) | 0x7FFF; - return z; -} - -/*---------------------------------------------------------------------------- -| Takes two extended double-precision floating-point values `a' and `b', one -| of which is a NaN, and returns the appropriate NaN result. If either `a' or -| `b' is a signaling NaN, the invalid exception is raised. -*----------------------------------------------------------------------------*/ - -static inline floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b, float_status *status ) -{ - flag aIsNaN, aIsSignalingNaN, bIsSignalingNaN; -#ifndef SOFTFLOAT_68K - flag bIsNaN; -#endif - - aIsNaN = floatx80_is_nan( a ); - aIsSignalingNaN = floatx80_is_signaling_nan( a ); - bIsSignalingNaN = floatx80_is_signaling_nan( b ); -#ifdef SOFTFLOAT_68K - a.low |= LIT64( 0x4000000000000000 ); - b.low |= LIT64( 0x4000000000000000 ); - if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_signaling, status ); - return aIsNaN ? a : b; -#else - bIsNaN = floatx80_is_nan( b ); - a.low |= LIT64( 0xC000000000000000 ); - b.low |= LIT64( 0xC000000000000000 ); - if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_signaling, status ); - if ( aIsNaN ) { - return ( aIsSignalingNaN & bIsNaN ) ? b : a; - } - else { - return b; - } -#endif - -} - -#ifdef SOFTFLOAT_68K -/*---------------------------------------------------------------------------- - | Takes extended double-precision floating-point NaN `a' and returns the - | appropriate NaN result. If `a' is a signaling NaN, the invalid exception - | is raised. - *----------------------------------------------------------------------------*/ - -static inline floatx80 propagateFloatx80NaNOneArg(floatx80 a, float_status *status) -{ - if ( floatx80_is_signaling_nan( a ) ) - float_raise( float_flag_signaling, status ); - a.low |= LIT64( 0x4000000000000000 ); - - return a; -} -#endif - -// 28-12-2016: Added for Previous: - -/*---------------------------------------------------------------------------- - | Returns 1 if the extended double-precision floating-point value `a' is - | zero; otherwise returns 0. - *----------------------------------------------------------------------------*/ - -static inline flag floatx80_is_zero( floatx80 a ) -{ - - return ( ( a.high & 0x7FFF ) < 0x7FFF ) && ( a.low == 0 ); - -} - -/*---------------------------------------------------------------------------- - | Returns 1 if the extended double-precision floating-point value `a' is - | infinity; otherwise returns 0. - *----------------------------------------------------------------------------*/ - -static inline flag floatx80_is_infinity( floatx80 a ) -{ - - return ( ( a.high & 0x7FFF ) == 0x7FFF ) && ( (uint64_t) ( a.low<<1 ) == 0 ); - -} - -/*---------------------------------------------------------------------------- - | Returns 1 if the extended double-precision floating-point value `a' is - | negative; otherwise returns 0. - *----------------------------------------------------------------------------*/ - -static inline flag floatx80_is_negative( floatx80 a ) -{ - - return ( ( a.high & 0x8000 ) == 0x8000 ); - -} - -/*---------------------------------------------------------------------------- - | Returns 1 if the extended double-precision floating-point value `a' is - | unnormal; otherwise returns 0. - *----------------------------------------------------------------------------*/ -static inline flag floatx80_is_unnormal( floatx80 a ) -{ - return - ( ( a.high & 0x7FFF ) > 0 ) - && ( ( a.high & 0x7FFF ) < 0x7FFF) - && ( (uint64_t) ( a.low & LIT64( 0x8000000000000000 ) ) == LIT64( 0x0000000000000000 ) ); -} - -/*---------------------------------------------------------------------------- - | Returns 1 if the extended double-precision floating-point value `a' is - | denormal; otherwise returns 0. - *----------------------------------------------------------------------------*/ - -static inline flag floatx80_is_denormal( floatx80 a ) -{ - return - ( ( a.high & 0x7FFF ) == 0 ) - && ( (uint64_t) ( a.low & LIT64( 0x8000000000000000 ) ) == LIT64( 0x0000000000000000 ) ) - && (uint64_t) ( a.low<<1 ); -} - -/*---------------------------------------------------------------------------- - | Returns 1 if the extended double-precision floating-point value `a' is - | normal; otherwise returns 0. - *----------------------------------------------------------------------------*/ - -static inline flag floatx80_is_normal( floatx80 a ) -{ - return - ( ( a.high & 0x7FFF ) < 0x7FFF ) - && ( (uint64_t) ( a.low & LIT64( 0x8000000000000000 ) ) == LIT64( 0x8000000000000000 ) ); -} -// End of addition for Previous - diff --git a/src/softfloat/softfloat.cpp b/src/softfloat/softfloat.cpp deleted file mode 100644 index 8f8e0197..00000000 --- a/src/softfloat/softfloat.cpp +++ /dev/null @@ -1,3556 +0,0 @@ - -#define SOFTFLOAT_68K - -#include -#include -#include "softfloat/softfloat.h" - - -/* - * QEMU float support - * - * The code in this source file is derived from release 2a of the SoftFloat - * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and - * some later contributions) are provided under that license, as detailed below. - * It has subsequently been modified by contributors to the QEMU Project, - * so some portions are provided under: - * the SoftFloat-2a license - * the BSD license - * GPL-v2-or-later - * - * Any future contributions to this file after December 1st 2014 will be - * taken to be licensed under the Softfloat-2a license unless specifically - * indicated otherwise. - */ - -/* -=============================================================================== -This C source file is part of the SoftFloat IEC/IEEE Floating-point -Arithmetic Package, Release 2a. - -Written by John R. Hauser. This work was made possible in part by the -International Computer Science Institute, located at Suite 600, 1947 Center -Street, Berkeley, California 94704. Funding was partially provided by the -National Science Foundation under grant MIP-9311980. The original version -of this code was written as part of a project to build a fixed-point vector -processor in collaboration with the University of California at Berkeley, -overseen by Profs. Nelson Morgan and John Wawrzynek. More information -is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ -arithmetic/SoftFloat.html'. - -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort -has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT -TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO -PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY -AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. - -Derivative works are acceptable, even for commercial purposes, so long as -(1) they include prominent notice that the work is derivative, and (2) they -include prominent notice akin to these four paragraphs for those parts of -this code that are retained. - -=============================================================================== -*/ - -/* BSD licensing: - * Copyright (c) 2006, Fabrice Bellard - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software without - * specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Portions of this work are licensed under the terms of the GNU GPL, - * version 2 or later. See the COPYING file in the top-level directory. - */ - -/* We only need stdlib for abort() */ - -/*---------------------------------------------------------------------------- -| Primitive arithmetic functions, including multi-word arithmetic, and -| division and square root approximations. (Can be specialized to target if -| desired.) -*----------------------------------------------------------------------------*/ -#include "softfloat-macros.h" - -/*---------------------------------------------------------------------------- - | Variables for storing sign, exponent and significand of internal extended - | double-precision floating-point value for external use. - *----------------------------------------------------------------------------*/ -flag floatx80_internal_sign = 0; -int32_t floatx80_internal_exp = 0; -uint64_t floatx80_internal_sig = 0; -int32_t floatx80_internal_exp0 = 0; -uint64_t floatx80_internal_sig0 = 0; -uint64_t floatx80_internal_sig1 = 0; -int8_t floatx80_internal_precision = 80; -int8_t floatx80_internal_mode = float_round_nearest_even; - -/*---------------------------------------------------------------------------- - | Functions for storing sign, exponent and significand of extended - | double-precision floating-point intermediate result for external use. - *----------------------------------------------------------------------------*/ -floatx80 roundSaveFloatx80Internal( int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status ) -{ - int64_t roundIncrement, roundMask, roundBits; - flag increment; - - if ( roundingPrecision == 80 ) { - goto precision80; - } else if ( roundingPrecision == 64 ) { - roundIncrement = LIT64( 0x0000000000000400 ); - roundMask = LIT64( 0x00000000000007FF ); - } else if ( roundingPrecision == 32 ) { - roundIncrement = LIT64( 0x0000008000000000 ); - roundMask = LIT64( 0x000000FFFFFFFFFF ); - } else { - goto precision80; - } - - zSig0 |= ( zSig1 != 0 ); - if ( status->float_rounding_mode != float_round_nearest_even ) { - if ( status->float_rounding_mode == float_round_to_zero ) { - roundIncrement = 0; - } else { - roundIncrement = roundMask; - if ( zSign ) { - if ( status->float_rounding_mode == float_round_up ) roundIncrement = 0; - } else { - if ( status->float_rounding_mode == float_round_down ) roundIncrement = 0; - } - } - } - - roundBits = zSig0 & roundMask; - - zSig0 += roundIncrement; - if ( zSig0 < roundIncrement ) { - ++zExp; - zSig0 = LIT64( 0x8000000000000000 ); - } - roundIncrement = roundMask + 1; - if ( status->float_rounding_mode == float_round_nearest_even && ( roundBits<<1 == roundIncrement ) ) { - roundMask |= roundIncrement; - } - zSig0 &= ~ roundMask; - if ( zSig0 == 0 ) zExp = 0; - return packFloatx80( zSign, zExp, zSig0 ); - -precision80: - increment = ( (int64_t) zSig1 < 0 ); - if ( status->float_rounding_mode != float_round_nearest_even ) { - if ( status->float_rounding_mode == float_round_to_zero ) { - increment = 0; - } else { - if ( zSign ) { - increment = ( status->float_rounding_mode == float_round_down ) && zSig1; - } else { - increment = ( status->float_rounding_mode == float_round_up ) && zSig1; - } - } - } - if ( increment ) { - ++zSig0; - if ( zSig0 == 0 ) { - ++zExp; - zSig0 = LIT64( 0x8000000000000000 ); - } else { - zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & ( status->float_rounding_mode == float_round_nearest_even ) ); - } - } else { - if ( zSig0 == 0 ) zExp = 0; - } - return packFloatx80( zSign, zExp, zSig0 ); -} - -static void saveFloatx80Internal( int8_t prec, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status ) -{ - floatx80_internal_sign = zSign; - floatx80_internal_exp = zExp; - floatx80_internal_sig0 = zSig0; - floatx80_internal_sig1 = zSig1; - floatx80_internal_precision = prec; - floatx80_internal_mode = status->float_rounding_mode; -} - -static void saveFloat64Internal( flag zSign, int16_t zExp, uint64_t zSig, float_status *status ) -{ - floatx80_internal_sign = zSign; - floatx80_internal_exp = zExp + 0x3C01; - floatx80_internal_sig0 = zSig<<1; - floatx80_internal_sig1 = 0; - floatx80_internal_precision = 64; - floatx80_internal_mode = status->float_rounding_mode; -} - -static void saveFloat32Internal( flag zSign, int16_t zExp, uint32_t zSig, float_status *status ) -{ - floatx80 z = roundSaveFloatx80Internal( 32, zSign, zExp + 0x3F81, ( (uint64_t) zSig )<<33, 0, status ); - - floatx80_internal_sign = zSign; - floatx80_internal_exp = extractFloatx80Exp( z ); - floatx80_internal_sig = extractFloatx80Frac( z ); - floatx80_internal_exp0 = zExp + 0x3F81; - floatx80_internal_sig0 = ( (uint64_t) zSig )<<33; - floatx80_internal_sig1 = 0; -} - -/*---------------------------------------------------------------------------- - | Functions for returning sign, exponent and significand of extended - | double-precision floating-point intermediate result for external use. - *----------------------------------------------------------------------------*/ - -void getRoundedFloatInternal( int8_t roundingPrecision, flag *pzSign, int32_t *pzExp, uint64_t *pzSig ) -{ - int64_t roundIncrement, roundMask, roundBits; - flag increment; - - flag zSign = floatx80_internal_sign; - int32_t zExp = floatx80_internal_exp; - uint64_t zSig0 = floatx80_internal_sig0; - uint64_t zSig1 = floatx80_internal_sig1; - - if ( roundingPrecision == 80 ) { - goto precision80; - } else if ( roundingPrecision == 64 ) { - roundIncrement = LIT64( 0x0000000000000400 ); - roundMask = LIT64( 0x00000000000007FF ); - } else if ( roundingPrecision == 32 ) { - roundIncrement = LIT64( 0x0000008000000000 ); - roundMask = LIT64( 0x000000FFFFFFFFFF ); - } else { - goto precision80; - } - - zSig0 |= ( zSig1 != 0 ); - if ( floatx80_internal_mode != float_round_nearest_even ) { - if ( floatx80_internal_mode == float_round_to_zero ) { - roundIncrement = 0; - } else { - roundIncrement = roundMask; - if ( zSign ) { - if ( floatx80_internal_mode == float_round_up ) roundIncrement = 0; - } else { - if ( floatx80_internal_mode == float_round_down ) roundIncrement = 0; - } - } - } - - roundBits = zSig0 & roundMask; - - zSig0 += roundIncrement; - if ( zSig0 < roundIncrement ) { - ++zExp; - zSig0 = LIT64( 0x8000000000000000 ); - } - roundIncrement = roundMask + 1; - if ( floatx80_internal_mode == float_round_nearest_even && ( roundBits<<1 == roundIncrement ) ) { - roundMask |= roundIncrement; - } - zSig0 &= ~ roundMask; - if ( zSig0 == 0 ) zExp = 0; - - *pzSign = zSign; - *pzExp = zExp; - *pzSig = zSig0; - return; - -precision80: - increment = ( (int64_t) zSig1 < 0 ); - if ( floatx80_internal_mode != float_round_nearest_even ) { - if ( floatx80_internal_mode == float_round_to_zero ) { - increment = 0; - } else { - if ( zSign ) { - increment = ( floatx80_internal_mode == float_round_down ) && zSig1; - } else { - increment = ( floatx80_internal_mode == float_round_up ) && zSig1; - } - } - } - if ( increment ) { - ++zSig0; - if ( zSig0 == 0 ) { - ++zExp; - zSig0 = LIT64( 0x8000000000000000 ); - } else { - zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & ( floatx80_internal_mode == float_round_nearest_even ) ); - } - } else { - if ( zSig0 == 0 ) zExp = 0; - } - - *pzSign = zSign; - *pzExp = zExp; - *pzSig = zSig0; -} - -floatx80 getFloatInternalOverflow( void ) -{ - flag zSign; - int32_t zExp; - uint64_t zSig; - - getRoundedFloatInternal( floatx80_internal_precision, &zSign, &zExp, &zSig ); - - if (zExp > (0x7fff + 0x6000)) { // catastrophic - zExp = 0; - } else { - zExp -= 0x6000; - } - - return packFloatx80( zSign, zExp, zSig ); - -} - -floatx80 getFloatInternalUnderflow( void ) -{ - flag zSign; - int32_t zExp; - uint64_t zSig; - - getRoundedFloatInternal( floatx80_internal_precision, &zSign, &zExp, &zSig ); - - if (zExp < (0x0000 - 0x6000)) { // catastrophic - zExp = 0; - } else { - zExp += 0x6000; - } - - return packFloatx80( zSign, zExp, zSig ); - -} - -floatx80 getFloatInternalRoundedAll( void ) -{ - flag zSign; - int32_t zExp; - uint64_t zSig, zSig32, zSig64, zSig80; - - if (floatx80_internal_precision == 80) { - getRoundedFloatInternal( 80, &zSign, &zExp, &zSig80 ); - zSig = zSig80; - } else if (floatx80_internal_precision == 64) { - getRoundedFloatInternal( 80, &zSign, &zExp, &zSig80 ); - getRoundedFloatInternal( 64, &zSign, &zExp, &zSig64 ); - zSig = zSig64; - zSig |= zSig80 & LIT64( 0x00000000000007FF ); - } else { - getRoundedFloatInternal( 80, &zSign, &zExp, &zSig80 ); - getRoundedFloatInternal( 64, &zSign, &zExp, &zSig64 ); - getRoundedFloatInternal( 32, &zSign, &zExp, &zSig32 ); - zSig = zSig32; - zSig |= zSig64 & LIT64( 0x000000FFFFFFFFFF ); - zSig |= zSig80 & LIT64( 0x00000000000007FF ); - } - - return packFloatx80( zSign, zExp & 0x7FFF, zSig ); - -} - -floatx80 getFloatInternalRoundedSome( void ) -{ - flag zSign; - int32_t zExp; - uint64_t zSig, zSig32, zSig64, zSig80; - - if (floatx80_internal_precision == 80) { - getRoundedFloatInternal( 80, &zSign, &zExp, &zSig80 ); - zSig = zSig80; - } else if (floatx80_internal_precision == 64) { - getRoundedFloatInternal( 64, &zSign, &zExp, &zSig64 ); - zSig80 = floatx80_internal_sig0; - if (zSig64 != (zSig80 & LIT64( 0xFFFFFFFFFFFFF800 ))) { - zSig80++; - } - zSig = zSig64; - zSig |= zSig80 & LIT64( 0x00000000000007FF ); - } else { - getRoundedFloatInternal( 32, &zSign, &zExp, &zSig32 ); - zSig80 = floatx80_internal_sig0; - if (zSig32 != (zSig80 & LIT64( 0xFFFFFF0000000000 ))) { - zSig80++; - } - zSig = zSig32; - zSig |= zSig80 & LIT64( 0x000000FFFFFFFFFF ); - } - - return packFloatx80( zSign, zExp & 0x7FFF, zSig ); - -} - -floatx80 getFloatInternalFloatx80( void ) -{ - flag zSign; - int32_t zExp; - uint64_t zSig; - - getRoundedFloatInternal( 80, &zSign, &zExp, &zSig ); - - return packFloatx80( zSign, zExp & 0x7FFF, zSig ); - -} - -floatx80 getFloatInternalUnrounded( void ) -{ - flag zSign = floatx80_internal_sign; - int32_t zExp = floatx80_internal_exp; - uint64_t zSig = floatx80_internal_sig0; - - return packFloatx80( zSign, zExp & 0x7FFF, zSig ); - -} - -uint64_t getFloatInternalGRS( void ) -{ -#if 1 - if (floatx80_internal_sig1) - return 5; - - if (floatx80_internal_precision == 64 && - floatx80_internal_sig0 & LIT64( 0x00000000000007FF )) { - return 1; - } - if (floatx80_internal_precision == 32 && - floatx80_internal_sig0 & LIT64( 0x000000FFFFFFFFFF )) { - return 1; - } - - return 0; -#else - uint64_t roundbits; - shift64RightJamming(floatx80_internal_sig1, 61, &roundbits); - - return roundbits; -#endif -} - -/*---------------------------------------------------------------------------- -| Functions and definitions to determine: (1) whether tininess for underflow -| is detected before or after rounding by default, (2) what (if anything) -| happens when exceptions are raised, (3) how signaling NaNs are distinguished -| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs -| are propagated from function inputs to output. These details are target- -| specific. -*----------------------------------------------------------------------------*/ -#include "softfloat-specialize.h" - -/*---------------------------------------------------------------------------- -| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 -| and 7, and returns the properly rounded 32-bit integer corresponding to the -| input. If `zSign' is 1, the input is negated before being converted to an -| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input -| is simply rounded to an integer, with the inexact exception raised if the -| input cannot be represented exactly as an integer. However, if the fixed- -| point input is too large, the invalid exception is raised and the largest -| positive or negative integer is returned. -*----------------------------------------------------------------------------*/ - -static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status) -{ - int8_t roundingMode; - flag roundNearestEven; - int8_t roundIncrement, roundBits; - int32_t z; - - roundingMode = status->float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - switch (roundingMode) { - case float_round_nearest_even: - case float_round_ties_away: - roundIncrement = 0x40; - break; - case float_round_to_zero: - roundIncrement = 0; - break; - case float_round_up: - roundIncrement = zSign ? 0 : 0x7f; - break; - case float_round_down: - roundIncrement = zSign ? 0x7f : 0; - break; - default: - abort(); - } - roundBits = absZ & 0x7F; - absZ = ( absZ + roundIncrement )>>7; - absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); - z = absZ; - if ( zSign ) z = - z; - if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { - float_raise(float_flag_invalid, status); - return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; - } - if (roundBits) { - status->float_exception_flags |= float_flag_inexact; - } - return z; - -} - - -#ifdef SOFTFLOAT_68K // 30-01-2017: Added for Previous -static int16_t roundAndPackInt16( flag zSign, uint64_t absZ, float_status *status ) -{ - int8_t roundingMode; - flag roundNearestEven; - int8_t roundIncrement, roundBits; - int16_t z; - - roundingMode = status->float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - roundIncrement = 0x40; - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - roundIncrement = 0; - } - else { - roundIncrement = 0x7F; - if ( zSign ) { - if ( roundingMode == float_round_up ) roundIncrement = 0; - } - else { - if ( roundingMode == float_round_down ) roundIncrement = 0; - } - } - } - roundBits = absZ & 0x7F; - absZ = ( absZ + roundIncrement )>>7; - absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); - z = absZ; - if ( zSign ) z = - z; - z = (int16_t) z; - if ( ( absZ>>16 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { - float_raise( float_flag_invalid, status ); - return zSign ? (int16_t) 0x8000 : 0x7FFF; - } - if ( roundBits ) status->float_exception_flags |= float_flag_inexact; - return z; - -} - -static int8_t roundAndPackInt8( flag zSign, uint64_t absZ, float_status *status ) -{ - int8_t roundingMode; - flag roundNearestEven; - int8_t roundIncrement, roundBits; - int8_t z; - - roundingMode = status->float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - roundIncrement = 0x40; - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - roundIncrement = 0; - } - else { - roundIncrement = 0x7F; - if ( zSign ) { - if ( roundingMode == float_round_up ) roundIncrement = 0; - } - else { - if ( roundingMode == float_round_down ) roundIncrement = 0; - } - } - } - roundBits = absZ & 0x7F; - absZ = ( absZ + roundIncrement )>>7; - absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); - z = absZ; - if ( zSign ) z = - z; - z = (int8_t) z; - if ( ( absZ>>8 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { - float_raise( float_flag_invalid, status ); - return zSign ? (int8_t) 0x80 : 0x7F; - } - if ( roundBits ) status->float_exception_flags |= float_flag_inexact; - return z; - -} -#endif // End of addition for Previous - -/*---------------------------------------------------------------------------- -| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and -| `absZ1', with binary point between bits 63 and 64 (between the input words), -| and returns the properly rounded 64-bit integer corresponding to the input. -| If `zSign' is 1, the input is negated before being converted to an integer. -| Ordinarily, the fixed-point input is simply rounded to an integer, with -| the inexact exception raised if the input cannot be represented exactly as -| an integer. However, if the fixed-point input is too large, the invalid -| exception is raised and the largest positive or negative integer is -| returned. -*----------------------------------------------------------------------------*/ - -static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1, - float_status *status) -{ - int8_t roundingMode; - flag roundNearestEven, increment; - int64_t z; - - roundingMode = status->float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - switch (roundingMode) { - case float_round_nearest_even: - case float_round_ties_away: - increment = ((int64_t) absZ1 < 0); - break; - case float_round_to_zero: - increment = 0; - break; - case float_round_up: - increment = !zSign && absZ1; - break; - case float_round_down: - increment = zSign && absZ1; - break; - default: - abort(); - } - if ( increment ) { - ++absZ0; - if ( absZ0 == 0 ) goto overflow; - absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven ); - } - z = absZ0; - if ( zSign ) z = - z; - if ( z && ( ( z < 0 ) ^ zSign ) ) { - overflow: - float_raise(float_flag_invalid, status); - return - zSign ? (int64_t) LIT64( 0x8000000000000000 ) - : LIT64( 0x7FFFFFFFFFFFFFFF ); - } - if (absZ1) { - status->float_exception_flags |= float_flag_inexact; - } - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the fraction bits of the single-precision floating-point value `a'. -*----------------------------------------------------------------------------*/ - -static inline uint32_t extractFloat32Frac( float32 a ) -{ - - return float32_val(a) & 0x007FFFFF; - -} - -/*---------------------------------------------------------------------------- -| Returns the exponent bits of the single-precision floating-point value `a'. -*----------------------------------------------------------------------------*/ - -static inline int extractFloat32Exp(float32 a) -{ - - return ( float32_val(a)>>23 ) & 0xFF; - -} - -/*---------------------------------------------------------------------------- -| Returns the sign bit of the single-precision floating-point value `a'. -*----------------------------------------------------------------------------*/ - -static inline flag extractFloat32Sign( float32 a ) -{ - - return float32_val(a)>>31; - -} - -/*---------------------------------------------------------------------------- -| Normalizes the subnormal single-precision floating-point value represented -| by the denormalized significand `aSig'. The normalized exponent and -| significand are stored at the locations pointed to by `zExpPtr' and -| `zSigPtr', respectively. -*----------------------------------------------------------------------------*/ - -static void - normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr) -{ - int8_t shiftCount; - - shiftCount = countLeadingZeros32( aSig ) - 8; - *zSigPtr = aSig<float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - switch (roundingMode) { - case float_round_nearest_even: - case float_round_ties_away: - roundIncrement = 0x40; - break; - case float_round_to_zero: - roundIncrement = 0; - break; - case float_round_up: - roundIncrement = zSign ? 0 : 0x7f; - break; - case float_round_down: - roundIncrement = zSign ? 0x7f : 0; - break; - default: - abort(); - break; - } - roundBits = zSig & 0x7F; - if ( 0xFD <= (uint16_t) zExp ) { - if ( ( 0xFD < zExp ) - || ( ( zExp == 0xFD ) - && ( (int32_t) ( zSig + roundIncrement ) < 0 ) ) - ) { -#ifdef SOFTFLOAT_68K - float_raise( float_flag_overflow, status ); - saveFloat32Internal( zSign, zExp, zSig, status ); - if ( roundBits ) float_raise( float_flag_inexact, status ); -#else - float_raise(float_flag_overflow | float_flag_inexact, status); -#endif - return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 )); - } - if ( zExp < 0 ) { - if (status->flush_to_zero) { - //float_raise(float_flag_output_denormal, status); - return packFloat32(zSign, 0, 0); - } - isTiny = - (status->float_detect_tininess - == float_tininess_before_rounding) - || ( zExp < -1 ) - || ( zSig + roundIncrement < 0x80000000 ); -#ifdef SOFTFLOAT_68K - if ( isTiny ) { - float_raise( float_flag_underflow, status ); - saveFloat32Internal( zSign, zExp, zSig, status ); - } -#endif - shift32RightJamming( zSig, - zExp, &zSig ); - zExp = 0; - roundBits = zSig & 0x7F; -#ifndef SOFTFLOAT_68K - if (isTiny && roundBits) - float_raise(float_flag_underflow, status); -#endif - } - } - if (roundBits) { - status->float_exception_flags |= float_flag_inexact; - } - zSig = ( zSig + roundIncrement )>>7; - zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); - if ( zSig == 0 ) zExp = 0; - return packFloat32( zSign, zExp, zSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns the fraction bits of the double-precision floating-point value `a'. -*----------------------------------------------------------------------------*/ - -static inline uint64_t extractFloat64Frac( float64 a ) -{ - - return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF ); - -} - -/*---------------------------------------------------------------------------- -| Returns the exponent bits of the double-precision floating-point value `a'. -*----------------------------------------------------------------------------*/ - -static inline int extractFloat64Exp(float64 a) -{ - - return ( float64_val(a)>>52 ) & 0x7FF; - -} - -/*---------------------------------------------------------------------------- -| Returns the sign bit of the double-precision floating-point value `a'. -*----------------------------------------------------------------------------*/ - -static inline flag extractFloat64Sign( float64 a ) -{ - - return float64_val(a)>>63; - -} - -/*---------------------------------------------------------------------------- -| If `a' is denormal and we are in flush-to-zero mode then set the -| input-denormal exception and return zero. Otherwise just return the value. -*----------------------------------------------------------------------------*/ -float64 float64_squash_input_denormal(float64 a, float_status *status) -{ - if (status->flush_inputs_to_zero) { - if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) { - //float_raise(float_flag_input_denormal, status); - return make_float64(float64_val(a) & (1ULL << 63)); - } - } - return a; -} - -/*---------------------------------------------------------------------------- -| Normalizes the subnormal double-precision floating-point value represented -| by the denormalized significand `aSig'. The normalized exponent and -| significand are stored at the locations pointed to by `zExpPtr' and -| `zSigPtr', respectively. -*----------------------------------------------------------------------------*/ - -static void - normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr) -{ - int8_t shiftCount; - - shiftCount = countLeadingZeros64( aSig ) - 11; - *zSigPtr = aSig<float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - switch (roundingMode) { - case float_round_nearest_even: - case float_round_ties_away: - roundIncrement = 0x200; - break; - case float_round_to_zero: - roundIncrement = 0; - break; - case float_round_up: - roundIncrement = zSign ? 0 : 0x3ff; - break; - case float_round_down: - roundIncrement = zSign ? 0x3ff : 0; - break; - default: - abort(); - } - roundBits = zSig & 0x3FF; - if ( 0x7FD <= (uint16_t) zExp ) { - if ( ( 0x7FD < zExp ) - || ( ( zExp == 0x7FD ) - && ( (int64_t) ( zSig + roundIncrement ) < 0 ) ) - ) { -#ifdef SOFTFLOAT_68K - float_raise( float_flag_overflow, status ); - saveFloat64Internal( zSign, zExp, zSig, status ); - if ( roundBits ) float_raise( float_flag_inexact, status ); -#else - float_raise(float_flag_overflow | float_flag_inexact, status); -#endif - return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 )); - } - if ( zExp < 0 ) { - if (status->flush_to_zero) { - //float_raise(float_flag_output_denormal, status); - return packFloat64(zSign, 0, 0); - } - isTiny = - (status->float_detect_tininess - == float_tininess_before_rounding) - || ( zExp < -1 ) - || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) ); -#ifdef SOFTFLOAT_68K - if ( isTiny ) { - float_raise( float_flag_underflow, status ); - saveFloat64Internal( zSign, zExp, zSig, status ); - } -#endif - shift64RightJamming( zSig, - zExp, &zSig ); - zExp = 0; - roundBits = zSig & 0x3FF; -#ifndef SOFTFLOAT_68K - if (isTiny && roundBits) - float_raise(float_flag_underflow, status); -#endif - } - } - if (roundBits) { - status->float_exception_flags |= float_flag_inexact; - } - zSig = ( zSig + roundIncrement )>>10; - zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); - if ( zSig == 0 ) zExp = 0; - return packFloat64( zSign, zExp, zSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns the fraction bits of the extended double-precision floating-point -| value `a'. -*----------------------------------------------------------------------------*/ - -uint64_t extractFloatx80Frac( floatx80 a ) -{ - - return a.low; - -} - -/*---------------------------------------------------------------------------- -| Returns the exponent bits of the extended double-precision floating-point -| value `a'. -*----------------------------------------------------------------------------*/ - -int32_t extractFloatx80Exp( floatx80 a ) -{ - - return a.high & 0x7FFF; - -} - -/*---------------------------------------------------------------------------- -| Returns the sign bit of the extended double-precision floating-point value -| `a'. -*----------------------------------------------------------------------------*/ - -flag extractFloatx80Sign( floatx80 a ) -{ - - return a.high>>15; - -} - -/*---------------------------------------------------------------------------- -| Normalizes the subnormal extended double-precision floating-point value -| represented by the denormalized significand `aSig'. The normalized exponent -| and significand are stored at the locations pointed to by `zExpPtr' and -| `zSigPtr', respectively. -*----------------------------------------------------------------------------*/ - -void normalizeFloatx80Subnormal( uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr ) -{ - int8_t shiftCount; - - shiftCount = countLeadingZeros64( aSig ); - *zSigPtr = aSig<float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - if ( roundingPrecision == 80 ) goto precision80; - if ( roundingPrecision == 64 ) { - roundIncrement = LIT64( 0x0000000000000400 ); - roundMask = LIT64( 0x00000000000007FF ); - } - else if ( roundingPrecision == 32 ) { - roundIncrement = LIT64( 0x0000008000000000 ); - roundMask = LIT64( 0x000000FFFFFFFFFF ); - } - else { - goto precision80; - } - zSig0 |= ( zSig1 != 0 ); - switch (roundingMode) { - case float_round_nearest_even: - case float_round_ties_away: - break; - case float_round_to_zero: - roundIncrement = 0; - break; - case float_round_up: - roundIncrement = zSign ? 0 : roundMask; - break; - case float_round_down: - roundIncrement = zSign ? roundMask : 0; - break; - default: - abort(); - } - roundBits = zSig0 & roundMask; -#ifdef SOFTFLOAT_68K - if ( 0x7FFE <= (uint32_t) zExp ) { -#else - if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { -#endif - if ( ( 0x7FFE < zExp ) - || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) - ) { - goto overflow; - } -#ifdef SOFTFLOAT_68K - if ( zExp < 0 ) { -#else - if ( zExp <= 0 ) { -#endif - if (status->flush_to_zero) { - //float_raise(float_flag_output_denormal, status); - return packFloatx80(zSign, 0, 0); - } - isTiny = - (status->float_detect_tininess - == float_tininess_before_rounding) -#ifdef SOFTFLOAT_68K - || ( zExp < -1 ) -#else - || ( zExp < 0 ) -#endif - || ( zSig0 <= zSig0 + roundIncrement ); -#ifdef SOFTFLOAT_68K - if ( isTiny ) { - float_raise( float_flag_underflow, status ); - saveFloatx80Internal( zSign, zExp, zSig0, zSig1, status ); - } - shift64RightJamming( zSig0, -zExp, &zSig0 ); -#else - shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); -#endif - zExp = 0; - roundBits = zSig0 & roundMask; -#ifdef SOFTFLOAT_68K - if ( isTiny ) float_raise( float_flag_underflow, status ); -#else - if (isTiny && roundBits) { - float_raise(float_flag_underflow, status); - } -#endif -if (roundBits) { - status->float_exception_flags |= float_flag_inexact; - } - zSig0 += roundIncrement; -#ifndef SOFTFLOAT_68K - if ( (int64_t) zSig0 < 0 ) zExp = 1; -#endif - roundIncrement = roundMask + 1; - if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { - roundMask |= roundIncrement; - } - zSig0 &= ~ roundMask; - return packFloatx80( zSign, zExp, zSig0 ); - } - } - if (roundBits) { - status->float_exception_flags |= float_flag_inexact; - } - zSig0 += roundIncrement; - if ( zSig0 < roundIncrement ) { - ++zExp; - zSig0 = LIT64( 0x8000000000000000 ); - } - roundIncrement = roundMask + 1; - if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { - roundMask |= roundIncrement; - } - zSig0 &= ~ roundMask; - if ( zSig0 == 0 ) zExp = 0; - return packFloatx80( zSign, zExp, zSig0 ); - precision80: - switch (roundingMode) { - case float_round_nearest_even: - case float_round_ties_away: - increment = ((int64_t)zSig1 < 0); - break; - case float_round_to_zero: - increment = 0; - break; - case float_round_up: - increment = !zSign && zSig1; - break; - case float_round_down: - increment = zSign && zSig1; - break; - default: - abort(); - } -#ifdef SOFTFLOAT_68K - if ( 0x7FFE <= (uint32_t) zExp ) { -#else - if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { -#endif - if ( ( 0x7FFE < zExp ) - || ( ( zExp == 0x7FFE ) - && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) ) - && increment - ) - ) { - roundMask = 0; - overflow: -#ifndef SOFTFLOAT_68K - float_raise(float_flag_overflow | float_flag_inexact, status); -#else - float_raise( float_flag_overflow, status ); - saveFloatx80Internal( zSign, zExp, zSig0, zSig1, status ); - if ( ( zSig0 & roundMask ) || zSig1 ) float_raise( float_flag_inexact, status ); -#endif - if ( ( roundingMode == float_round_to_zero ) - || ( zSign && ( roundingMode == float_round_up ) ) - || ( ! zSign && ( roundingMode == float_round_down ) ) - ) { - return packFloatx80( zSign, 0x7FFE, ~ roundMask ); - } - return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); - } -#ifdef SOFTFLOAT_68K - if ( zExp < 0 ) { -#else - if ( zExp <= 0 ) { -#endif - isTiny = - (status->float_detect_tininess - == float_tininess_before_rounding) -#ifdef SOFTFLOAT_68K - || ( zExp < -1 ) -#else - || ( zExp < 0 ) -#endif - || ! increment - || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) ); -#ifdef SOFTFLOAT_68K - if ( isTiny ) { - float_raise( float_flag_underflow, status ); - saveFloatx80Internal( zSign, zExp, zSig0, zSig1, status ); - } - shift64ExtraRightJamming( zSig0, zSig1, -zExp, &zSig0, &zSig1 ); -#else - shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); -#endif - zExp = 0; -#ifndef SOFTFLOAT_68K - if ( isTiny && zSig1 ) float_raise( float_flag_underflow, status ); -#endif - if (zSig1) float_raise(float_flag_inexact, status); - switch (roundingMode) { - case float_round_nearest_even: - case float_round_ties_away: - increment = ((int64_t)zSig1 < 0); - break; - case float_round_to_zero: - increment = 0; - break; - case float_round_up: - increment = !zSign && zSig1; - break; - case float_round_down: - increment = zSign && zSig1; - break; - default: - abort(); - } - if ( increment ) { - ++zSig0; - zSig0 &= - ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven ); -#ifndef SOFTFLOAT_68K - if ( (int64_t) zSig0 < 0 ) zExp = 1; -#endif - } - return packFloatx80( zSign, zExp, zSig0 ); - } - } - if (zSig1) { - status->float_exception_flags |= float_flag_inexact; - } - if ( increment ) { - ++zSig0; - if ( zSig0 == 0 ) { - ++zExp; - zSig0 = LIT64( 0x8000000000000000 ); - } - else { - zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven ); - } - } - else { - if ( zSig0 == 0 ) zExp = 0; - } - return packFloatx80( zSign, zExp, zSig0 ); - -} - -#else // SOFTFLOAT_68K - -floatx80 roundAndPackFloatx80( int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status ) -{ - int8_t roundingMode; - flag roundNearestEven, increment; - int64_t roundIncrement, roundMask, roundBits; - int32_t expOffset; - - roundingMode = status->float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - if ( roundingPrecision == 80 ) goto precision80; - if ( roundingPrecision == 64 ) { - roundIncrement = LIT64( 0x0000000000000400 ); - roundMask = LIT64( 0x00000000000007FF ); - expOffset = 0x3C00; - } else if ( roundingPrecision == 32 ) { - roundIncrement = LIT64( 0x0000008000000000 ); - roundMask = LIT64( 0x000000FFFFFFFFFF ); - expOffset = 0x3F80; - } else { - goto precision80; - } - zSig0 |= ( zSig1 != 0 ); - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - roundIncrement = 0; - } else { - roundIncrement = roundMask; - if ( zSign ) { - if ( roundingMode == float_round_up ) roundIncrement = 0; - } else { - if ( roundingMode == float_round_down ) roundIncrement = 0; - } - } - } - roundBits = zSig0 & roundMask; - if ( ( ( 0x7FFE - expOffset ) < zExp ) || - ( ( zExp == ( 0x7FFE - expOffset ) ) && ( zSig0 + roundIncrement < zSig0 ) ) ) { - float_raise( float_flag_overflow, status ); - saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status ); - if ( zSig0 & roundMask ) float_raise( float_flag_inexact, status ); - if ( ( roundingMode == float_round_to_zero ) - || ( zSign && ( roundingMode == float_round_up ) ) - || ( ! zSign && ( roundingMode == float_round_down ) ) - ) { - return packFloatx80( zSign, 0x7FFE - expOffset, ~ roundMask ); - } - return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); - } - if ( zExp < ( expOffset + 1 ) ) { - float_raise( float_flag_underflow, status ); - saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status ); - shift64RightJamming( zSig0, -( zExp - ( expOffset + 1 ) ), &zSig0 ); - zExp = expOffset + 1; - roundBits = zSig0 & roundMask; - if ( roundBits ) float_raise( float_flag_inexact, status ); - zSig0 += roundIncrement; - roundIncrement = roundMask + 1; - if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { - roundMask |= roundIncrement; - } - zSig0 &= ~ roundMask; - return packFloatx80( zSign, zExp, zSig0 ); - } - if ( roundBits ) { - float_raise( float_flag_inexact, status ); - saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status); - } - zSig0 += roundIncrement; - if ( zSig0 < roundIncrement ) { - ++zExp; - zSig0 = LIT64( 0x8000000000000000 ); - } - roundIncrement = roundMask + 1; - if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { - roundMask |= roundIncrement; - } - zSig0 &= ~ roundMask; - if ( zSig0 == 0 ) zExp = 0; - return packFloatx80( zSign, zExp, zSig0 ); -precision80: - increment = ( (int64_t) zSig1 < 0 ); - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - increment = 0; - } else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig1; - } else { - increment = ( roundingMode == float_round_up ) && zSig1; - } - } - } - if ( 0x7FFE <= (uint32_t) zExp ) { - if ( ( 0x7FFE < zExp ) || - ( ( zExp == 0x7FFE ) && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) ) && increment ) - ) { - roundMask = 0; - float_raise( float_flag_overflow, status ); - saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status ); - if ( ( zSig0 & roundMask ) || zSig1 ) float_raise( float_flag_inexact, status ); - if ( ( roundingMode == float_round_to_zero ) - || ( zSign && ( roundingMode == float_round_up ) ) - || ( ! zSign && ( roundingMode == float_round_down ) ) - ) { - return packFloatx80( zSign, 0x7FFE, ~ roundMask ); - } - return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); - } - if ( zExp < 0 ) { - float_raise( float_flag_underflow, status ); - saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status); - shift64ExtraRightJamming( zSig0, zSig1, -zExp, &zSig0, &zSig1 ); - zExp = 0; - if ( zSig1 ) float_raise( float_flag_inexact, status ); - if ( roundNearestEven ) { - increment = ( (int64_t) zSig1 < 0 ); - } else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig1; - } else { - increment = ( roundingMode == float_round_up ) && zSig1; - } - } - if ( increment ) { - ++zSig0; - zSig0 &= - ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven ); - } - return packFloatx80( zSign, zExp, zSig0 ); - } - } - if ( zSig1 ) { - float_raise( float_flag_inexact, status ); - saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status ); - } - if ( increment ) { - ++zSig0; - if ( zSig0 == 0 ) { - ++zExp; - zSig0 = LIT64( 0x8000000000000000 ); - } else { - zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven ); - } - } else { - if ( zSig0 == 0 ) zExp = 0; - } - return packFloatx80( zSign, zExp, zSig0 ); - -} - -#endif - -#ifdef SOFTFLOAT_68K // 21-01-2017: Added for Previous -floatx80 roundSigAndPackFloatx80( int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status ) -{ - int8_t roundingMode; - flag roundNearestEven, isTiny; - int64_t roundIncrement, roundMask, roundBits; - - roundingMode = status->float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - if ( roundingPrecision == 32 ) { - roundIncrement = LIT64( 0x0000008000000000 ); - roundMask = LIT64( 0x000000FFFFFFFFFF ); - } else if ( roundingPrecision == 64 ) { - roundIncrement = LIT64( 0x0000000000000400 ); - roundMask = LIT64( 0x00000000000007FF ); - } else { - return roundAndPackFloatx80( 80, zSign, zExp, zSig0, zSig1, status ); - } - zSig0 |= ( zSig1 != 0 ); - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - roundIncrement = 0; - } - else { - roundIncrement = roundMask; - if ( zSign ) { - if ( roundingMode == float_round_up ) roundIncrement = 0; - } - else { - if ( roundingMode == float_round_down ) roundIncrement = 0; - } - } - } - roundBits = zSig0 & roundMask; - - if ( 0x7FFE <= (uint32_t) zExp ) { - if ( ( 0x7FFE < zExp ) - || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) - ) { - float_raise( float_flag_overflow, status ); - saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status); - if ( zSig0 & roundMask ) float_raise( float_flag_inexact, status ); - if ( ( roundingMode == float_round_to_zero ) - || ( zSign && ( roundingMode == float_round_up ) ) - || ( ! zSign && ( roundingMode == float_round_down ) ) - ) { - return packFloatx80( zSign, 0x7FFE, LIT64( 0xFFFFFFFFFFFFFFFF ) ); - } - return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); - } - - if ( zExp < 0 ) { - isTiny = - ( status->float_detect_tininess == float_tininess_before_rounding ) - || ( zExp < -1 ) - || ( zSig0 <= zSig0 + roundIncrement ); - if ( isTiny ) { - float_raise( float_flag_underflow, status ); - saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status ); - } - shift64RightJamming( zSig0, -zExp, &zSig0 ); - zExp = 0; - roundBits = zSig0 & roundMask; - if ( roundBits ) float_raise ( float_flag_inexact, status ); - zSig0 += roundIncrement; - if ( roundNearestEven && ( roundBits == roundIncrement ) ) { - roundMask |= roundIncrement<<1; - } - zSig0 &= ~roundMask; - return packFloatx80( zSign, zExp, zSig0 ); - } - } - if ( roundBits ) { - float_raise( float_flag_inexact, status ); - saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status ); - } - zSig0 += roundIncrement; - if ( zSig0 < roundIncrement ) { - ++zExp; - zSig0 = LIT64( 0x8000000000000000 ); - } - roundIncrement = roundMask + 1; - if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { - roundMask |= roundIncrement; - } - zSig0 &= ~ roundMask; - if ( zSig0 == 0 ) zExp = 0; - return packFloatx80( zSign, zExp, zSig0 ); - -} -#endif // End of Addition for Previous - - -/*---------------------------------------------------------------------------- -| Takes an abstract floating-point value having sign `zSign', exponent -| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1', -| and returns the proper extended double-precision floating-point value -| corresponding to the abstract input. This routine is just like -| `roundAndPackFloatx80' except that the input significand does not have to be -| normalized. -*----------------------------------------------------------------------------*/ - -static floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision, - flag zSign, int32_t zExp, - uint64_t zSig0, uint64_t zSig1, - float_status *status) -{ - int8_t shiftCount; - - if ( zSig0 == 0 ) { - zSig0 = zSig1; - zSig1 = 0; - zExp -= 64; - } - shiftCount = countLeadingZeros64( zSig0 ); - shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); - zExp -= shiftCount; - return roundAndPackFloatx80(roundingPrecision, zSign, zExp, - zSig0, zSig1, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the 32-bit two's complement integer `a' -| to the extended double-precision floating-point format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 int32_to_floatx80(int32_t a) -{ - flag zSign; - uint32_t absA; - int8_t shiftCount; - uint64_t zSig; - - if ( a == 0 ) return packFloatx80( 0, 0, 0 ); - zSign = ( a < 0 ); - absA = zSign ? - a : a; - shiftCount = countLeadingZeros32( absA ) + 32; - zSig = absA; - return packFloatx80( zSign, 0x403E - shiftCount, zSig<>32); - } - float_raise( float_flag_invalid, status ); - return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; - } -#else - if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; -#endif - shiftCount = 0x4037 - aExp; - if ( shiftCount <= 0 ) shiftCount = 1; - shift64RightJamming( aSig, shiftCount, &aSig ); - return roundAndPackInt32(aSign, aSig, status); - -} - -#ifdef SOFTFLOAT_68K // 30-01-2017: Addition for Previous -int16_t floatx80_to_int16( floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp, shiftCount; - uint64_t aSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( aExp == 0x7FFF ) { - float_raise( float_flag_invalid, status ); - if ( (uint64_t) ( aSig<<1 ) ) { - a = propagateFloatx80NaNOneArg( a, status ); - if ( a.low == aSig ) float_raise( float_flag_invalid, status ); - return (int16_t)(a.low>>48); - } - return aSign ? (int16_t) 0x8000 : 0x7FFF; - } - shiftCount = 0x4037 - aExp; - if ( shiftCount <= 0 ) shiftCount = 1; - shift64RightJamming( aSig, shiftCount, &aSig ); - return roundAndPackInt16( aSign, aSig, status ); - -} -int8_t floatx80_to_int8( floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp, shiftCount; - uint64_t aSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) ) { - a = propagateFloatx80NaNOneArg( a, status ); - if ( a.low == aSig ) float_raise( float_flag_invalid, status ); - return (int8_t)(a.low>>56); - } - float_raise( float_flag_invalid, status ); - return aSign ? (int8_t) 0x80 : 0x7F; - } - shiftCount = 0x4037 - aExp; - if ( shiftCount <= 0 ) shiftCount = 1; - shift64RightJamming( aSig, shiftCount, &aSig ); - return roundAndPackInt8( aSign, aSig, status ); - -} -#endif // End of addition for Previous - - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the 32-bit two's complement integer format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic, except that the conversion is always rounded -| toward zero. If `a' is a NaN, the largest positive integer is returned. -| Otherwise, if the conversion overflows, the largest integer with the same -| sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp, shiftCount; - uint64_t aSig, savedASig; - int32_t z; - - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return 1 << 31; - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( 0x401E < aExp ) { - if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0; - goto invalid; - } - else if ( aExp < 0x3FFF ) { - if (aExp || aSig) { - status->float_exception_flags |= float_flag_inexact; - } - return 0; - } - shiftCount = 0x403E - aExp; - savedASig = aSig; - aSig >>= shiftCount; - z = aSig; - if ( aSign ) z = - z; - if ( ( z < 0 ) ^ aSign ) { - invalid: - float_raise(float_flag_invalid, status); - return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; - } - if ( ( aSig<float_exception_flags |= float_flag_inexact; - } - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the 64-bit two's complement integer format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic---which means in particular that the conversion -| is rounded according to the current rounding mode. If `a' is a NaN, -| the largest positive integer is returned. Otherwise, if the conversion -| overflows, the largest integer with the same sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int64_t floatx80_to_int64(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp, shiftCount; - uint64_t aSig, aSigExtra; - - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return 1ULL << 63; - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - shiftCount = 0x403E - aExp; - if ( shiftCount <= 0 ) { - if ( shiftCount ) { - float_raise(float_flag_invalid, status); - if ( ! aSign - || ( ( aExp == 0x7FFF ) - && ( aSig != LIT64( 0x8000000000000000 ) ) ) - ) { - return LIT64( 0x7FFFFFFFFFFFFFFF ); - } - return (int64_t) LIT64( 0x8000000000000000 ); - } - aSigExtra = 0; - } - else { - shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); - } - return roundAndPackInt64(aSign, aSig, aSigExtra, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the single-precision floating-point format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 floatx80_to_float32(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) ) { - return commonNaNToFloat32(floatx80ToCommonNaN(a, status)); - } - return packFloat32( aSign, 0xFF, 0 ); - } -#ifdef SOFTFLOAT_68K - if ( aExp == 0 ) { - if ( aSig == 0) return packFloat32( aSign, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - shift64RightJamming( aSig, 33, &aSig ); - aExp -= 0x3F81; -#else - shift64RightJamming( aSig, 33, &aSig ); - if ( aExp || aSig ) aExp -= 0x3F81; -#endif - return roundAndPackFloat32(aSign, aExp, aSig, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the double-precision floating-point format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 floatx80_to_float64(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig, zSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) ) { - return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status); - } - return packFloat64( aSign, 0x7FF, 0 ); - } -#ifdef SOFTFLOAT_68K - if ( aExp == 0 ) { - if ( aSig == 0) return packFloat64( aSign, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - shift64RightJamming( aSig, 1, &zSig ); - aExp -= 0x3C01; -#else - shift64RightJamming( aSig, 1, &zSig ); - if ( aExp || aSig ) aExp -= 0x3C01; -#endif - return roundAndPackFloat64(aSign, aExp, zSig, status); - -} - -#ifdef SOFTFLOAT_68K // 31-01-2017 -/*---------------------------------------------------------------------------- - | Returns the result of converting the extended double-precision floating- - | point value `a' to the extended double-precision floating-point format. - | The conversion is performed according to the IEC/IEEE Standard for Binary - | Floating-Point Arithmetic. - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_to_floatx80( floatx80 a, float_status *status ) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - - if ( aExp == 0x7FFF && (uint64_t) ( aSig<<1 ) ) { - return propagateFloatx80NaNOneArg( a, status ); - } - if ( aExp == 0 && aSig != 0 ) { - return normalizeRoundAndPackFloatx80( status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status ); - } - return a; - -} -#endif - -#ifdef SOFTFLOAT_68K // 30-01-2016: Added for Previous -floatx80 floatx80_round32( floatx80 a, float_status *status ) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - - if ( aExp == 0x7FFF || aSig == 0 ) { - return a; - } - if ( aExp == 0 ) { - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - - return roundSigAndPackFloatx80( 32, aSign, aExp, aSig, 0, status ); -} - -floatx80 floatx80_round64( floatx80 a, float_status *status ) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - - if ( aExp == 0x7FFF || aSig == 0 ) { - return a; - } - if ( aExp == 0 ) { - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - - return roundSigAndPackFloatx80( 64, aSign, aExp, aSig, 0, status ); -} - -floatx80 floatx80_round_to_float32( floatx80 a, float_status *status ) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - aSign = extractFloatx80Sign( a ); - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaNOneArg( a, status ); - return a; - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return a; - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - - return roundAndPackFloatx80( 32, aSign, aExp, aSig, 0, status ); -} - -floatx80 floatx80_round_to_float64( floatx80 a, float_status *status ) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - aSign = extractFloatx80Sign( a ); - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaNOneArg( a, status ); - return a; - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return a; - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - - return roundAndPackFloatx80( 64, aSign, aExp, aSig, 0, status ); -} - - -floatx80 floatx80_normalize( floatx80 a ) -{ - flag aSign; - int16_t aExp; - uint64_t aSig; - int8_t shiftCount; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - - if ( aExp == 0x7FFF || aExp == 0 ) return a; - if ( aSig == 0 ) return packFloatx80(aSign, 0, 0); - - shiftCount = countLeadingZeros64( aSig ); - - if ( shiftCount > aExp ) shiftCount = aExp; - - aExp -= shiftCount; - aSig <<= shiftCount; - - return packFloatx80( aSign, aExp, aSig ); -} -#endif // end of addition for Previous - -/*---------------------------------------------------------------------------- -| Rounds the extended double-precision floating-point value `a' to an integer, -| and returns the result as an extended quadruple-precision floating-point -| value. The operation is performed according to the IEC/IEEE Standard for -| Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_round_to_int(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t lastBitMask, roundBitsMask; - floatx80 z; - - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - aExp = extractFloatx80Exp( a ); - if ( 0x403E <= aExp ) { - if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) { - return propagateFloatx80NaNOneArg(a, status); - } - return a; - } - if ( aExp < 0x3FFF ) { - if ( ( aExp == 0 ) - #ifdef SOFTFLOAT_68K - && ( (uint64_t) extractFloatx80Frac( a ) == 0 ) ) { -#else - && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { -#endif - return a; - } - status->float_exception_flags |= float_flag_inexact; - aSign = extractFloatx80Sign( a ); - switch (status->float_rounding_mode) { - case float_round_nearest_even: - if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) - ) { - return - packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); - } - break; - case float_round_ties_away: - if (aExp == 0x3FFE) { - return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000)); - } - break; - case float_round_down: - return - aSign ? - packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) ) - : packFloatx80( 0, 0, 0 ); - case float_round_up: - return - aSign ? packFloatx80( 1, 0, 0 ) - : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) ); - } - return packFloatx80( aSign, 0, 0 ); - } - lastBitMask = 1; - lastBitMask <<= 0x403E - aExp; - roundBitsMask = lastBitMask - 1; - z = a; - switch (status->float_rounding_mode) { - case float_round_nearest_even: - z.low += lastBitMask>>1; - if ((z.low & roundBitsMask) == 0) { - z.low &= ~lastBitMask; - } - break; - case float_round_ties_away: - z.low += lastBitMask >> 1; - break; - case float_round_to_zero: - break; - case float_round_up: - if (!extractFloatx80Sign(z)) { - z.low += roundBitsMask; - } - break; - case float_round_down: - if (extractFloatx80Sign(z)) { - z.low += roundBitsMask; - } - break; - default: - abort(); - } - z.low &= ~ roundBitsMask; - if ( z.low == 0 ) { - ++z.high; - z.low = LIT64( 0x8000000000000000 ); - } - if (z.low != a.low) { - status->float_exception_flags |= float_flag_inexact; - } - return z; - -} - -#ifdef SOFTFLOAT_68K // 09-01-2017: Added for Previous -floatx80 floatx80_round_to_int_toward_zero( floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t lastBitMask, roundBitsMask; - floatx80 z; - - aExp = extractFloatx80Exp( a ); - if ( 0x403E <= aExp ) { - if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) { - return propagateFloatx80NaNOneArg( a, status ); - } - return a; - } - if ( aExp < 0x3FFF ) { - if ( ( aExp == 0 ) -#ifdef SOFTFLOAT_68K - && ( (uint64_t) extractFloatx80Frac( a ) == 0 ) ) { -#else - && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { -#endif - return a; - } - status->float_exception_flags |= float_flag_inexact; - aSign = extractFloatx80Sign( a ); - return packFloatx80( aSign, 0, 0 ); - } - lastBitMask = 1; - lastBitMask <<= 0x403E - aExp; - roundBitsMask = lastBitMask - 1; - z = a; - z.low &= ~ roundBitsMask; - if ( z.low == 0 ) { - ++z.high; - z.low = LIT64( 0x8000000000000000 ); - } - if ( z.low != a.low ) status->float_exception_flags |= float_flag_inexact; - return z; - -} -#endif // End of addition for Previous - -/*---------------------------------------------------------------------------- -| Returns the result of adding the absolute values of the extended double- -| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is -| negated before being returned. `zSign' is ignored if the result is a NaN. -| The addition is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, - float_status *status) -{ - int32_t aExp, bExp, zExp; - uint64_t aSig, bSig, zSig0, zSig1; - int32_t expDiff; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); -#ifdef SOFTFLOAT_68K - if ( aExp == 0 ) { - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - if ( bExp == 0 ) { - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } -#endif - expDiff = aExp - bExp; - if ( 0 < expDiff ) { - if ( aExp == 0x7FFF ) { - if ((uint64_t)(aSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - return a; - } -#ifndef SOFTFLOAT_68K - if ( bExp == 0 ) --expDiff; -#endif - shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); - zExp = aExp; - } - else if ( expDiff < 0 ) { - if ( bExp == 0x7FFF ) { - if ((uint64_t)(bSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); - } -#ifndef SOFTFLOAT_68K - if ( aExp == 0 ) ++expDiff; -#endif - shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); - zExp = bExp; - } - else { - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) { - return propagateFloatx80NaN(a, b, status); - } - return a; - } - zSig1 = 0; - zSig0 = aSig + bSig; - #ifndef SOFTFLOAT_68K - if ( aExp == 0 ) { - normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); - goto roundAndPack; - } -#endif - zExp = aExp; -#ifdef SOFTFLOAT_68K - if ( aSig == 0 && bSig == 0 ) return packFloatx80( zSign, 0, 0 ); - if ( aSig == 0 || bSig == 0 ) goto roundAndPack; -#endif - goto shiftRight1; - } - zSig0 = aSig + bSig; - if ( (int64_t) zSig0 < 0 ) goto roundAndPack; - shiftRight1: - shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 ); - zSig0 |= LIT64( 0x8000000000000000 ); - ++zExp; - roundAndPack: - return roundAndPackFloatx80(status->floatx80_rounding_precision, - zSign, zExp, zSig0, zSig1, status); -} - -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the absolute values of the extended -| double-precision floating-point values `a' and `b'. If `zSign' is 1, the -| difference is negated before being returned. `zSign' is ignored if the -| result is a NaN. The subtraction is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, - float_status *status) -{ - int32_t aExp, bExp, zExp; - uint64_t aSig, bSig, zSig0, zSig1; - int32_t expDiff; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - expDiff = aExp - bExp; - if ( 0 < expDiff ) goto aExpBigger; - if ( expDiff < 0 ) goto bExpBigger; - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) { - return propagateFloatx80NaN(a, b, status); - } - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - #ifndef SOFTFLOAT_68K - if ( aExp == 0 ) { - aExp = 1; - bExp = 1; - } -#endif - zSig1 = 0; - if ( bSig < aSig ) goto aBigger; - if ( aSig < bSig ) goto bBigger; - return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0); - bExpBigger: - if ( bExp == 0x7FFF ) { - if ((uint64_t)(bSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } -#ifndef SOFTFLOAT_68K - if ( aExp == 0 ) ++expDiff; -#endif - shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); - bBigger: - sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); - zExp = bExp; - zSign ^= 1; - goto normalizeRoundAndPack; - aExpBigger: - if ( aExp == 0x7FFF ) { - if ((uint64_t)(aSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - return a; - } -#ifndef SOFTFLOAT_68K - if ( bExp == 0 ) --expDiff; -#endif - shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); - aBigger: - sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); - zExp = aExp; - normalizeRoundAndPack: - return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, - zSign, zExp, zSig0, zSig1, status); -} - -/*---------------------------------------------------------------------------- -| Returns the result of adding the extended double-precision floating-point -| values `a' and `b'. The operation is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign == bSign ) { - return addFloatx80Sigs(a, b, aSign, status); - } - else { - return subFloatx80Sigs(a, b, aSign, status); - } - -} - -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the extended double-precision floating- -| point values `a' and `b'. The operation is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign == bSign ) { - return subFloatx80Sigs(a, b, aSign, status); - } - else { - return addFloatx80Sigs(a, b, aSign, status); - } - -} - -/*---------------------------------------------------------------------------- -| Returns the result of multiplying the extended double-precision floating- -| point values `a' and `b'. The operation is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign, zSign; - int32_t aExp, bExp, zExp; - uint64_t aSig, bSig, zSig0, zSig1; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - bSign = extractFloatx80Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) - || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { - return propagateFloatx80NaN(a, b, status); - } - if ( ( bExp | bSig ) == 0 ) goto invalid; - return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); - } - if ( bExp == 0x7FFF ) { - if ((uint64_t)(bSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - if ( ( aExp | aSig ) == 0 ) { - invalid: - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - zExp = aExp + bExp - 0x3FFE; - mul64To128( aSig, bSig, &zSig0, &zSig1 ); - if ( 0 < (int64_t) zSig0 ) { - shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); - --zExp; - } - return roundAndPackFloatx80(status->floatx80_rounding_precision, - zSign, zExp, zSig0, zSig1, status); -} - -#ifdef SOFTFLOAT_68K // 21-01-2017: Added for Previous -floatx80 floatx80_sglmul( floatx80 a, floatx80 b, float_status *status ) -{ - flag aSign, bSign, zSign; - int32_t aExp, bExp, zExp; - uint64_t aSig, bSig, zSig0, zSig1; - floatx80 z; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - bSign = extractFloatx80Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) - || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { - return propagateFloatx80NaN( a, b, status ); - } - if ( ( bExp | bSig ) == 0 ) goto invalid; - return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); - } - if ( bExp == 0x7FFF ) { - if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); - if ( ( aExp | aSig ) == 0 ) { - invalid: - float_raise( float_flag_invalid, status ); - z.low = floatx80_default_nan_low; - z.high = floatx80_default_nan_high; - return z; - } - return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - aSig &= LIT64( 0xFFFFFF0000000000 ); - bSig &= LIT64( 0xFFFFFF0000000000 ); - zExp = aExp + bExp - 0x3FFE; - mul64To128( aSig, bSig, &zSig0, &zSig1 ); - if ( 0 < (int64_t) zSig0 ) { - shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); - --zExp; - } - return roundSigAndPackFloatx80( 32, zSign, zExp, zSig0, zSig1, status); - -} -#endif // End of addition for Previous - - -/*---------------------------------------------------------------------------- -| Returns the result of dividing the extended double-precision floating-point -| value `a' by the corresponding value `b'. The operation is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign, zSign; - int32_t aExp, bExp, zExp; - uint64_t aSig, bSig, zSig0, zSig1; - uint64_t rem0, rem1, rem2, term0, term1, term2; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - bSign = extractFloatx80Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FFF ) { - if ((uint64_t)(aSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - if ( bExp == 0x7FFF ) { - if ((uint64_t)(bSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - goto invalid; - } - return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); - } - if ( bExp == 0x7FFF ) { - if ((uint64_t)(bSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - return packFloatx80( zSign, 0, 0 ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - if ( ( aExp | aSig ) == 0 ) { - invalid: - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - float_raise(float_flag_divbyzero, status); - return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); - } - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - zExp = aExp - bExp + 0x3FFE; - rem1 = 0; - if ( bSig <= aSig ) { - shift128Right( aSig, 0, 1, &aSig, &rem1 ); - ++zExp; - } - zSig0 = estimateDiv128To64( aSig, rem1, bSig ); - mul64To128( bSig, zSig0, &term0, &term1 ); - sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); - while ( (int64_t) rem0 < 0 ) { - --zSig0; - add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); - } - zSig1 = estimateDiv128To64( rem1, 0, bSig ); - if ( (uint64_t) ( zSig1<<1 ) <= 8 ) { - mul64To128( bSig, zSig1, &term1, &term2 ); - sub128( rem1, 0, term1, term2, &rem1, &rem2 ); - while ( (int64_t) rem1 < 0 ) { - --zSig1; - add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); - } - zSig1 |= ( ( rem1 | rem2 ) != 0 ); - } - return roundAndPackFloatx80(status->floatx80_rounding_precision, - zSign, zExp, zSig0, zSig1, status); -} - -#ifdef SOFTFLOAT_68K // 21-01-2017: Addition for Previous -floatx80 floatx80_sgldiv( floatx80 a, floatx80 b, float_status *status ) -{ - flag aSign, bSign, zSign; - int32_t aExp, bExp, zExp; - uint64_t aSig, bSig, zSig0, zSig1; - uint64_t rem0, rem1, rem2, term0, term1, term2; - floatx80 z; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - bSign = extractFloatx80Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); - if ( bExp == 0x7FFF ) { - if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); - goto invalid; - } - return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); - } - if ( bExp == 0x7FFF ) { - if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); - return packFloatx80( zSign, 0, 0 ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - if ( ( aExp | aSig ) == 0 ) { - invalid: - float_raise( float_flag_invalid, status ); - z.low = floatx80_default_nan_low; - z.high = floatx80_default_nan_high; - return z; - } - float_raise( float_flag_divbyzero, status ); - return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); - } - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - - zExp = aExp - bExp + 0x3FFE; - rem1 = 0; - if ( bSig <= aSig ) { - shift128Right( aSig, 0, 1, &aSig, &rem1 ); - ++zExp; - } - zSig0 = estimateDiv128To64( aSig, rem1, bSig ); - mul64To128( bSig, zSig0, &term0, &term1 ); - sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); - while ( (int64_t) rem0 < 0 ) { - --zSig0; - add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); - } - zSig1 = estimateDiv128To64( rem1, 0, bSig ); - if ( (uint64_t) ( zSig1<<1 ) <= 8 ) { - mul64To128( bSig, zSig1, &term1, &term2 ); - sub128( rem1, 0, term1, term2, &rem1, &rem2 ); - while ( (int64_t) rem1 < 0 ) { - --zSig1; - add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); - } - zSig1 |= ( ( rem1 | rem2 ) != 0 ); - } - return roundSigAndPackFloatx80( 32, zSign, zExp, zSig0, zSig1, status); - -} -#endif // End of addition for Previous - - -/*---------------------------------------------------------------------------- -| Returns the remainder of the extended double-precision floating-point value -| `a' with respect to the corresponding value `b'. The operation is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -#ifndef SOFTFLOAT_68K -floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, zSign; - int32_t aExp, bExp, expDiff; - uint64_t aSig0, aSig1, bSig; - uint64_t q, term0, term1, alternateASig0, alternateASig1; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - aSig0 = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig0<<1 ) - || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { - return propagateFloatx80NaN(a, b, status); - } - goto invalid; - } - if ( bExp == 0x7FFF ) { - if ((uint64_t)(bSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - return a; - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - invalid: - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a; - normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); - } - bSig |= LIT64( 0x8000000000000000 ); - zSign = aSign; - expDiff = aExp - bExp; - aSig1 = 0; - if ( expDiff < 0 ) { - if ( expDiff < -1 ) return a; - shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); - expDiff = 0; - } - q = ( bSig <= aSig0 ); - if ( q ) aSig0 -= bSig; - expDiff -= 64; - while ( 0 < expDiff ) { - q = estimateDiv128To64( aSig0, aSig1, bSig ); - q = ( 2 < q ) ? q - 2 : 0; - mul64To128( bSig, q, &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); - expDiff -= 62; - } - expDiff += 64; - if ( 0 < expDiff ) { - q = estimateDiv128To64( aSig0, aSig1, bSig ); - q = ( 2 < q ) ? q - 2 : 0; - q >>= 64 - expDiff; - mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); - while ( le128( term0, term1, aSig0, aSig1 ) ) { - ++q; - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - } - } - else { - term1 = 0; - term0 = bSig; - } - sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); - if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) - || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) - && ( q & 1 ) ) - ) { - aSig0 = alternateASig0; - aSig1 = alternateASig1; - zSign = ! zSign; - } - return - normalizeRoundAndPackFloatx80( - 80, zSign, bExp + expDiff, aSig0, aSig1, status); - -} -#else // 09-01-2017: Modified version for Previous -floatx80 floatx80_rem( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status ) -{ - flag aSign, bSign, zSign; - int32_t aExp, bExp, expDiff; - uint64_t aSig0, aSig1, bSig; - uint64_t qTemp, term0, term1, alternateASig0, alternateASig1; - floatx80 z; - - aSig0 = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - bSign = extractFloatx80Sign( b ); - - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig0<<1 ) - || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { - return propagateFloatx80NaN( a, b, status ); - } - goto invalid; - } - if ( bExp == 0x7FFF ) { - if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); - *s = (aSign != bSign); - *q = 0; - return a; - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - invalid: - float_raise( float_flag_invalid, status ); - z.low = floatx80_default_nan_low; - z.high = floatx80_default_nan_high; - return z; - } - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { -#ifdef SOFTFLOAT_68K - if ( aSig0 == 0 ) { - *s = (aSign != bSign); - *q = 0; - return a; - } -#else - if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a; -#endif - normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); - } - bSig |= LIT64( 0x8000000000000000 ); - zSign = aSign; - expDiff = aExp - bExp; - *s = (aSign != bSign); - aSig1 = 0; - if ( expDiff < 0 ) { - if ( expDiff < -1 ) return a; - shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); - expDiff = 0; - } - qTemp = ( bSig <= aSig0 ); - if ( qTemp ) aSig0 -= bSig; - *q = ( expDiff > 63 ) ? 0 : ( qTemp< 63 ) ? 0 : ( qTemp<>= 64 - expDiff; - mul64To128( bSig, qTemp<<( 64 - expDiff ), &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); - while ( le128( term0, term1, aSig0, aSig1 ) ) { - ++qTemp; - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - } - *q += qTemp; - } - else { - term1 = 0; - term0 = bSig; - } - sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); - if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) - || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) - && ( qTemp & 1 ) ) - ) { - aSig0 = alternateASig0; - aSig1 = alternateASig1; - zSign = ! zSign; - ++*q; - } - return - normalizeRoundAndPackFloatx80( - 80, zSign, bExp + expDiff, aSig0, aSig1, status ); - -} -#endif // End of modification - - -#ifdef SOFTFLOAT_68K // 08-01-2017: Added for Previous -/*---------------------------------------------------------------------------- - | Returns the modulo remainder of the extended double-precision floating-point - | value `a' with respect to the corresponding value `b'. - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_mod( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status ) -{ - flag aSign, bSign, zSign; - int32_t aExp, bExp, expDiff; - uint64_t aSig0, aSig1, bSig; - uint64_t qTemp, term0, term1; - floatx80 z; - - aSig0 = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - bSign = extractFloatx80Sign( b ); - - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig0<<1 ) - || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { - return propagateFloatx80NaN( a, b, status ); - } - goto invalid; - } - if ( bExp == 0x7FFF ) { - if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); - *s = (aSign != bSign); - *q = 0; - return a; - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - invalid: - float_raise( float_flag_invalid, status ); - z.low = floatx80_default_nan_low; - z.high = floatx80_default_nan_high; - return z; - } - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { -#ifdef SOFTFLOAT_68K - if ( aSig0 == 0 ) { - *s = (aSign != bSign); - *q = 0; - return a; - } -#else - if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a; -#endif - normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); - } - bSig |= LIT64( 0x8000000000000000 ); - zSign = aSign; - expDiff = aExp - bExp; - *s = (aSign != bSign); - aSig1 = 0; - if ( expDiff < 0 ) return a; - qTemp = ( bSig <= aSig0 ); - if ( qTemp ) aSig0 -= bSig; - *q = ( expDiff > 63 ) ? 0 : ( qTemp< 63 ) ? 0 : ( qTemp<>= 64 - expDiff; - mul64To128( bSig, qTemp<<( 64 - expDiff ), &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); - while ( le128( term0, term1, aSig0, aSig1 ) ) { - ++qTemp; - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - } - *q += qTemp; - } - return - normalizeRoundAndPackFloatx80( - 80, zSign, bExp + expDiff, aSig0, aSig1, status ); - -} -#endif // end of addition for Previous - - -/*---------------------------------------------------------------------------- -| Returns the square root of the extended double-precision floating-point -| value `a'. The operation is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_sqrt(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp, zExp; - uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0; - uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; - - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - aSig0 = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( aExp == 0x7FFF ) { - if ((uint64_t)(aSig0 << 1)) { - return propagateFloatx80NaNOneArg(a, status); - } - if ( ! aSign ) return a; - goto invalid; - } - if ( aSign ) { - if ( ( aExp | aSig0 ) == 0 ) return a; - invalid: - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - if ( aExp == 0 ) { - if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); - normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); - } - zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; - zSig0 = estimateSqrt32( aExp, aSig0>>32 ); - shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 ); - zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); - doubleZSig0 = zSig0<<1; - mul64To128( zSig0, zSig0, &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); - while ( (int64_t) rem0 < 0 ) { - --zSig0; - doubleZSig0 -= 2; - add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); - } - zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); - if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) { - if ( zSig1 == 0 ) zSig1 = 1; - mul64To128( doubleZSig0, zSig1, &term1, &term2 ); - sub128( rem1, 0, term1, term2, &rem1, &rem2 ); - mul64To128( zSig1, zSig1, &term2, &term3 ); - sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); - while ( (int64_t) rem1 < 0 ) { - --zSig1; - shortShift128Left( 0, zSig1, 1, &term2, &term3 ); - term3 |= 1; - term2 |= doubleZSig0; - add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); - } - zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); - } - shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 ); - zSig0 |= doubleZSig0; - return roundAndPackFloatx80(status->floatx80_rounding_precision, - 0, zExp, zSig0, zSig1, status); -} - - -#ifdef SOFTFLOAT_68K // 07-01-2017: Added for Previous -/*---------------------------------------------------------------------------- - | Returns the mantissa of the extended double-precision floating-point - | value `a'. - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_getman( floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaNOneArg( a, status ); - float_raise( float_flag_invalid, status ); - a.low = floatx80_default_nan_low; - a.high = floatx80_default_nan_high; - return a; - } - - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - - return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, 0x3FFF, aSig, 0, status); -} - -/*---------------------------------------------------------------------------- - | Returns the exponent of the extended double-precision floating-point - | value `a' as an extended double-precision value. - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_getexp( floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaNOneArg( a, status ); - float_raise( float_flag_invalid, status ); - a.low = floatx80_default_nan_low; - a.high = floatx80_default_nan_high; - return a; - } - - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - - return int32_to_floatx80(aExp - 0x3FFF); -} - -/*---------------------------------------------------------------------------- - | Scales extended double-precision floating-point value in operand `a' by - | value `b'. The function truncates the value in the second operand 'b' to - | an integral value and adds that value to the exponent of the operand 'a'. - | The operation performed according to the IEC/IEEE Standard for Binary - | Floating-Point Arithmetic. - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign; - int32_t aExp, bExp, shiftCount; - uint64_t aSig, bSig; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - bSig = extractFloatx80Frac(b); - bExp = extractFloatx80Exp(b); - bSign = extractFloatx80Sign(b); - - if ( bExp == 0x7FFF ) { - if ( (uint64_t) ( bSig<<1 ) || - ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) ) { - return propagateFloatx80NaN( a, b, status ); - } - float_raise( float_flag_invalid, status ); - a.low = floatx80_default_nan_low; - a.high = floatx80_default_nan_high; - return a; - } - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); - return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( aSign, 0, 0); - if ( bExp < 0x3FFF ) return a; - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - - if ( bExp < 0x3FFF ) return a; - - if ( 0x400F < bExp ) { - aExp = bSign ? -0x6001 : 0xE000; - return roundAndPackFloatx80( - status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status ); - } - - shiftCount = 0x403E - bExp; - bSig >>= shiftCount; - aExp = bSign ? ( aExp - bSig ) : ( aExp + bSig ); - - return roundAndPackFloatx80( - status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status); - -} - -/*----------------------------------------------------------------------------- - | Calculates the absolute value of the extended double-precision floating-point - | value `a'. The operation is performed according to the IEC/IEEE Standard - | for Binary Floating-Point Arithmetic. - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_abs(floatx80 a, float_status *status) -{ - int32_t aExp; - uint64_t aSig; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - - if ( aExp == 0x7FFF && (uint64_t) ( aSig<<1 ) ) { - return propagateFloatx80NaNOneArg( a, status ); - } - - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( 0, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - - return roundAndPackFloatx80( - status->floatx80_rounding_precision, 0, aExp, aSig, 0, status ); - -} - -/*----------------------------------------------------------------------------- - | Changes the sign of the extended double-precision floating-point value 'a'. - | The operation is performed according to the IEC/IEEE Standard for Binary - | Floating-Point Arithmetic. - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_neg(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - - if ( aExp == 0x7FFF && (uint64_t) ( aSig<<1 ) ) { - return propagateFloatx80NaNOneArg( a, status ); - } - - aSign = !aSign; - - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - - return roundAndPackFloatx80( - status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status ); - -} - -/*---------------------------------------------------------------------------- - | Returns the result of comparing the extended double-precision floating- - | point values `a' and `b'. The result is abstracted for matching the - | corresponding condition codes. - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_cmp( floatx80 a, floatx80 b, float_status *status ) -{ - flag aSign, bSign; - int32_t aExp, bExp; - uint64_t aSig, bSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - bSign = extractFloatx80Sign( b ); - - if ( ( aExp == 0x7FFF && (uint64_t) ( aSig<<1 ) ) || - ( bExp == 0x7FFF && (uint64_t) ( bSig<<1 ) ) ) { - return propagateFloatx80NaN( packFloatx80( 0, aExp, aSig ), - packFloatx80( 0, bExp, bSig ), status ); - } - - if ( bExp < aExp ) return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); - if ( aExp < bExp ) return packFloatx80( bSign ^ 1, 0x3FFF, LIT64( 0x8000000000000000 ) ); - - if ( aExp == 0x7FFF ) { - if ( aSign == bSign ) return packFloatx80( aSign, 0, 0 ); - return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); - } - - if ( bSig < aSig ) return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); - if ( aSig < bSig ) return packFloatx80( bSign ^ 1, 0x3FFF, LIT64( 0x8000000000000000 ) ); - - if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); - - if ( aSign == bSign ) return packFloatx80( 0, 0, 0 ); - - return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); - -} - -floatx80 floatx80_tst( floatx80 a, float_status *status ) -{ - int32_t aExp; - uint64_t aSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - - if ( aExp == 0x7FFF && (uint64_t) ( aSig<<1 ) ) - return propagateFloatx80NaNOneArg( a, status ); - return a; -} - -floatx80 floatx80_move( floatx80 a, float_status *status ) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaNOneArg( a, status ); - return a; - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return a; - normalizeRoundAndPackFloatx80( status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status ); - } - return roundAndPackFloatx80( status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status ); -} - -floatx80 floatx80_denormalize( floatx80 a, flag eSign) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - int32_t shiftCount; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - - if ( eSign ) { - shiftCount = 0x8000 - aExp; - aExp = 0; - if (shiftCount > 63) { - aSig = 0; - } else { - aSig >>= shiftCount; - } - } - return packFloatx80(aSign, aExp, aSig); -} - -#endif // End of addition for Previous - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is -| equal to the corresponding value `b', and 0 otherwise. The comparison is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -flag floatx80_eq( floatx80 a, floatx80 b, float_status *status ) -{ - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) - ) { - if ( floatx80_is_signaling_nan( a ) - || floatx80_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid, status ); - } - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is -| less than or equal to the corresponding value `b', and 0 otherwise. The -| comparison is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag floatx80_le( floatx80 a, floatx80 b, float_status *status ) -{ - flag aSign, bSign; - - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) - ) { - float_raise( float_flag_invalid, status ); - return 0; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le128( b.high, b.low, a.high, a.low ) - : le128( a.high, a.low, b.high, b.low ); -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is -| less than the corresponding value `b', and 0 otherwise. The comparison -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -flag floatx80_lt( floatx80 a, floatx80 b, float_status *status ) -{ - flag aSign, bSign; - - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) - ) { - float_raise( float_flag_invalid, status ); - return 0; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt128( b.high, b.low, a.high, a.low ) - : lt128( a.high, a.low, b.high, b.low ); - -} - - -/*---------------------------------------------------------------------------- -| Returns the result of converting the 64-bit two's complement integer `a' -| to the extended double-precision floating-point format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 int64_to_floatx80( int64_t a ) -{ - flag zSign; - uint64_t absA; - int8_t shiftCount; - - if ( a == 0 ) return packFloatx80( 0, 0, 0 ); - zSign = ( a < 0 ); - absA = zSign ? - a : a; - shiftCount = countLeadingZeros64( absA ); - return packFloatx80( zSign, 0x403E - shiftCount, absA< -#endif - - -/* This 'flag' type must be able to hold at least 0 and 1. It should - * probably be replaced with 'bool' but the uses would need to be audited - * to check that they weren't accidentally relying on it being a larger type. - */ -typedef uint8_t flag; - -#define LIT64( a ) a##ULL - -/*---------------------------------------------------------------------------- -| Software IEC/IEEE floating-point ordering relations -*----------------------------------------------------------------------------*/ -enum { - float_relation_less = -1, - float_relation_equal = 0, - float_relation_greater = 1, - float_relation_unordered = 2 -}; - -/*---------------------------------------------------------------------------- -| Software IEC/IEEE floating-point types. -*----------------------------------------------------------------------------*/ -/* Use structures for soft-float types. This prevents accidentally mixing - them with native int/float types. A sufficiently clever compiler and - sane ABI should be able to see though these structs. However - x86/gcc 3.x seems to struggle a bit, so leave them disabled by default. */ -//#define USE_SOFTFLOAT_STRUCT_TYPES -#ifdef USE_SOFTFLOAT_STRUCT_TYPES -typedef struct { - uint16_t v; -} float16; -#define float16_val(x) (((float16)(x)).v) -#define make_float16(x) __extension__ ({ float16 f16_val = {x}; f16_val; }) -#define const_float16(x) { x } -typedef struct { - uint32_t v; -} float32; -/* The cast ensures an error if the wrong type is passed. */ -#define float32_val(x) (((float32)(x)).v) -#define make_float32(x) __extension__ ({ float32 f32_val = {x}; f32_val; }) -#define const_float32(x) { x } -typedef struct { - uint64_t v; -} float64; -#define float64_val(x) (((float64)(x)).v) -#define make_float64(x) __extension__ ({ float64 f64_val = {x}; f64_val; }) -#define const_float64(x) { x } -#else -typedef uint16_t float16; -typedef uint32_t float32; -typedef uint64_t float64; -#define float16_val(x) (x) -#define float32_val(x) (x) -#define float64_val(x) (x) -#define make_float16(x) (x) -#define make_float32(x) (x) -#define make_float64(x) (x) -#define const_float16(x) (x) -#define const_float32(x) (x) -#define const_float64(x) (x) -#endif -typedef struct { - uint16_t high; - uint64_t low; -} floatx80; -typedef struct { -#ifdef HOST_WORDS_BIGENDIAN - uint64_t high, low; -#else - uint64_t low, high; -#endif -} float128; - -/*---------------------------------------------------------------------------- -| Software IEC/IEEE floating-point underflow tininess-detection mode. -*----------------------------------------------------------------------------*/ -enum { - float_tininess_after_rounding = 0, - float_tininess_before_rounding = 1 -}; - -/*---------------------------------------------------------------------------- -| Software IEC/IEEE floating-point rounding mode. -*----------------------------------------------------------------------------*/ -enum { - float_round_nearest_even = 0, - float_round_down = 1, - float_round_up = 2, - float_round_to_zero = 3, - float_round_ties_away = 4, -}; - -/*---------------------------------------------------------------------------- -| Software IEC/IEEE floating-point exception flags. -*----------------------------------------------------------------------------*/ -enum { - float_flag_invalid = 0x01, - float_flag_denormal = 0x02, - float_flag_divbyzero = 0x04, - float_flag_overflow = 0x08, - float_flag_underflow = 0x10, - float_flag_inexact = 0x20, - float_flag_signaling = 0x40, - float_flag_decimal = 0x80 -}; - -/*---------------------------------------------------------------------------- - | Variables for storing sign, exponent and significand of overflowed or - | underflowed extended double-precision floating-point value. - | Variables for storing sign, exponent and significand of internal extended - | double-precision floating-point value for external use. - *----------------------------------------------------------------------------*/ - -extern flag floatx80_internal_sign; -extern int32_t floatx80_internal_exp; -extern uint64_t floatx80_internal_sig; -extern int32_t floatx80_internal_exp0; -extern uint64_t floatx80_internal_sig0; -extern uint64_t floatx80_internal_sig1; -extern int8_t floatx80_internal_precision; -extern int8_t floatx80_internal_mode; - -typedef struct float_status { - signed char float_detect_tininess; - signed char float_rounding_mode; - uint8_t float_exception_flags; - signed char floatx80_rounding_precision; - /* should denormalised results go to zero and set the inexact flag? */ - flag flush_to_zero; - /* should denormalised inputs go to zero and set the input_denormal flag? */ - flag flush_inputs_to_zero; - flag default_nan_mode; - flag snan_bit_is_one; -} float_status; - -/*---------------------------------------------------------------------------- - | Function for getting sign, exponent and significand of extended - | double-precision floating-point intermediate result for external use. - *----------------------------------------------------------------------------*/ -floatx80 getFloatInternalOverflow( void ); -floatx80 getFloatInternalUnderflow( void ); -floatx80 getFloatInternalRoundedAll( void ); -floatx80 getFloatInternalRoundedSome( void ); -floatx80 getFloatInternalUnrounded( void ); -floatx80 getFloatInternalFloatx80( void ); -uint64_t getFloatInternalGRS( void ); - -static inline void set_float_detect_tininess(int val, float_status *status) -{ - status->float_detect_tininess = val; -} -static inline void set_float_rounding_mode(int val, float_status *status) -{ - status->float_rounding_mode = val; -} -static inline void set_float_exception_flags(int val, float_status *status) -{ - status->float_exception_flags = val; -} -static inline void set_floatx80_rounding_precision(int val, - float_status *status) -{ - status->floatx80_rounding_precision = val; -} -static inline void set_flush_to_zero(flag val, float_status *status) -{ - status->flush_to_zero = val; -} -static inline void set_flush_inputs_to_zero(flag val, float_status *status) -{ - status->flush_inputs_to_zero = val; -} -static inline void set_default_nan_mode(flag val, float_status *status) -{ - status->default_nan_mode = val; -} -static inline void set_snan_bit_is_one(flag val, float_status *status) -{ - status->snan_bit_is_one = val; -} -static inline int get_float_detect_tininess(float_status *status) -{ - return status->float_detect_tininess; -} -static inline int get_float_rounding_mode(float_status *status) -{ - return status->float_rounding_mode; -} -static inline int get_float_exception_flags(float_status *status) -{ - return status->float_exception_flags; -} -static inline int get_floatx80_rounding_precision(float_status *status) -{ - return status->floatx80_rounding_precision; -} -static inline flag get_flush_to_zero(float_status *status) -{ - return status->flush_to_zero; -} -static inline flag get_flush_inputs_to_zero(float_status *status) -{ - return status->flush_inputs_to_zero; -} -static inline flag get_default_nan_mode(float_status *status) -{ - return status->default_nan_mode; -} - -/*---------------------------------------------------------------------------- -| Routine to raise any or all of the software IEC/IEEE floating-point -| exception flags. -*----------------------------------------------------------------------------*/ -//void float_raise(uint8_t flags, float_status *status); - - -/*---------------------------------------------------------------------------- - | The pattern for a default generated single-precision NaN. - *----------------------------------------------------------------------------*/ -#define float32_default_nan 0x7FFFFFFF - -/*---------------------------------------------------------------------------- - | The pattern for a default generated double-precision NaN. - *----------------------------------------------------------------------------*/ -#define float64_default_nan LIT64( 0x7FFFFFFFFFFFFFFF ) - -/*---------------------------------------------------------------------------- - | The pattern for a default generated extended double-precision NaN. The - | `high' and `low' values hold the most- and least-significant bits, - | respectively. - *----------------------------------------------------------------------------*/ -#define floatx80_default_nan_high 0x7FFF -#define floatx80_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) - -/*---------------------------------------------------------------------------- - | The pattern for a default generated extended double-precision infinity. - *----------------------------------------------------------------------------*/ -#define floatx80_default_infinity_low LIT64( 0x0000000000000000 ) - -/*---------------------------------------------------------------------------- -| If `a' is denormal and we are in flush-to-zero mode then set the -| input-denormal exception and return zero. Otherwise just return the value. -*----------------------------------------------------------------------------*/ -float64 float64_squash_input_denormal(float64 a, float_status *status); - -/*---------------------------------------------------------------------------- -| Options to indicate which negations to perform in float*_muladd() -| Using these differs from negating an input or output before calling -| the muladd function in that this means that a NaN doesn't have its -| sign bit inverted before it is propagated. -| We also support halving the result before rounding, as a special -| case to support the ARM fused-sqrt-step instruction FRSQRTS. -*----------------------------------------------------------------------------*/ -enum { - float_muladd_negate_c = 1, - float_muladd_negate_product = 2, - float_muladd_negate_result = 4, - float_muladd_halve_result = 8, -}; - -/*---------------------------------------------------------------------------- -| Software IEC/IEEE integer-to-floating-point conversion routines. -*----------------------------------------------------------------------------*/ - -floatx80 int32_to_floatx80(int32_t); -floatx80 int64_to_floatx80(int64_t); - -/*---------------------------------------------------------------------------- -| Software IEC/IEEE single-precision conversion routines. -*----------------------------------------------------------------------------*/ -floatx80 float32_to_floatx80(float32, float_status *status); -floatx80 float32_to_floatx80_allowunnormal(float32, float_status *status); - -/*---------------------------------------------------------------------------- -| Software IEC/IEEE double-precision conversion routines. -*----------------------------------------------------------------------------*/ -floatx80 float64_to_floatx80(float64, float_status *status); - -floatx80 float64_to_floatx80_allowunnormal( float64 a, float_status *status ); - -/*---------------------------------------------------------------------------- -| Software IEC/IEEE extended double-precision conversion routines. -*----------------------------------------------------------------------------*/ -int32_t floatx80_to_int32(floatx80, float_status *status); -#ifdef SOFTFLOAT_68K -int16_t floatx80_to_int16(floatx80, float_status *status); -int8_t floatx80_to_int8(floatx80, float_status *status); -#endif -int32_t floatx80_to_int32_round_to_zero(floatx80, float_status *status); -int64_t floatx80_to_int64(floatx80, float_status *status); -float32 floatx80_to_float32(floatx80, float_status *status); -float64 floatx80_to_float64(floatx80, float_status *status); -#ifdef SOFTFLOAT_68K -floatx80 floatx80_to_floatx80( floatx80, float_status *status); -floatx80 floatdecimal_to_floatx80(floatx80, float_status *status); -floatx80 floatx80_to_floatdecimal(floatx80, int32_t*, float_status *status); -#endif - -uint64_t extractFloatx80Frac( floatx80 a ); -int32_t extractFloatx80Exp( floatx80 a ); -flag extractFloatx80Sign( floatx80 a ); - -floatx80 floatx80_round_to_int_toward_zero( floatx80 a, float_status *status); -floatx80 floatx80_round_to_float32( floatx80, float_status *status ); -floatx80 floatx80_round_to_float64( floatx80, float_status *status ); -floatx80 floatx80_round32( floatx80, float_status *status); -floatx80 floatx80_round64( floatx80, float_status *status); - -flag floatx80_eq( floatx80, floatx80, float_status *status); -flag floatx80_le( floatx80, floatx80, float_status *status); -flag floatx80_lt( floatx80, floatx80, float_status *status); - -#ifdef SOFTFLOAT_68K -// functions are in softfloat.c -floatx80 floatx80_move( floatx80 a, float_status *status ); -floatx80 floatx80_abs( floatx80 a, float_status *status ); -floatx80 floatx80_neg( floatx80 a, float_status *status ); -floatx80 floatx80_getexp( floatx80 a, float_status *status ); -floatx80 floatx80_getman( floatx80 a, float_status *status ); -floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status ); -floatx80 floatx80_rem( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status ); -floatx80 floatx80_mod( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status ); -floatx80 floatx80_sglmul( floatx80 a, floatx80 b, float_status *status ); -floatx80 floatx80_sgldiv( floatx80 a, floatx80 b, float_status *status ); -floatx80 floatx80_cmp( floatx80 a, floatx80 b, float_status *status ); -floatx80 floatx80_tst( floatx80 a, float_status *status ); - -// functions are in softfloat_fpsp.c -floatx80 floatx80_acos(floatx80 a, float_status *status); -floatx80 floatx80_asin(floatx80 a, float_status *status); -floatx80 floatx80_atan(floatx80 a, float_status *status); -floatx80 floatx80_atanh(floatx80 a, float_status *status); -floatx80 floatx80_cos(floatx80 a, float_status *status); -floatx80 floatx80_cosh(floatx80 a, float_status *status); -floatx80 floatx80_etox(floatx80 a, float_status *status); -floatx80 floatx80_etoxm1(floatx80 a, float_status *status); -floatx80 floatx80_log10(floatx80 a, float_status *status); -floatx80 floatx80_log2(floatx80 a, float_status *status); -floatx80 floatx80_logn(floatx80 a, float_status *status); -floatx80 floatx80_lognp1(floatx80 a, float_status *status); -floatx80 floatx80_sin(floatx80 a, float_status *status); -floatx80 floatx80_sinh(floatx80 a, float_status *status); -floatx80 floatx80_tan(floatx80 a, float_status *status); -floatx80 floatx80_tanh(floatx80 a, float_status *status); -floatx80 floatx80_tentox(floatx80 a, float_status *status); -floatx80 floatx80_twotox(floatx80 a, float_status *status); -#endif - -// functions originally internal to softfloat.c -void normalizeFloatx80Subnormal( uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr ); -floatx80 packFloatx80( flag zSign, int32_t zExp, uint64_t zSig ); -floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status); - -/*---------------------------------------------------------------------------- -| Software IEC/IEEE extended double-precision operations. -*----------------------------------------------------------------------------*/ -floatx80 floatx80_round_to_int(floatx80, float_status *status); -floatx80 floatx80_add(floatx80, floatx80, float_status *status); -floatx80 floatx80_sub(floatx80, floatx80, float_status *status); -floatx80 floatx80_mul(floatx80, floatx80, float_status *status); -floatx80 floatx80_div(floatx80, floatx80, float_status *status); -floatx80 floatx80_sqrt(floatx80, float_status *status); -floatx80 floatx80_normalize(floatx80); -floatx80 floatx80_denormalize(floatx80, flag); - -static inline int floatx80_is_zero_or_denormal(floatx80 a) -{ - return (a.high & 0x7fff) == 0; -} - -static inline int floatx80_is_any_nan(floatx80 a) -{ - return ((a.high & 0x7fff) == 0x7fff) && (a.low<<1); -} - -/*---------------------------------------------------------------------------- -| Return whether the given value is an invalid floatx80 encoding. -| Invalid floatx80 encodings arise when the integer bit is not set, but -| the exponent is not zero. The only times the integer bit is permitted to -| be zero is in subnormal numbers and the value zero. -| This includes what the Intel software developer's manual calls pseudo-NaNs, -| pseudo-infinities and un-normal numbers. It does not include -| pseudo-denormals, which must still be correctly handled as inputs even -| if they are never generated as outputs. -*----------------------------------------------------------------------------*/ -static inline bool floatx80_invalid_encoding(floatx80 a) -{ - return (a.low & (1ULL << 63)) == 0 && (a.high & 0x7FFF) != 0 && (a.high & 0x7FFF) != 0x7FFF; -} - -#define floatx80_zero make_floatx80(0x0000, 0x0000000000000000LL) -#define floatx80_one make_floatx80(0x3fff, 0x8000000000000000LL) -#define floatx80_ln2 make_floatx80(0x3ffe, 0xb17217f7d1cf79acLL) -#define floatx80_pi make_floatx80(0x4000, 0xc90fdaa22168c235LL) -#define floatx80_half make_floatx80(0x3ffe, 0x8000000000000000LL) -#define floatx80_infinity make_floatx80(0x7fff, 0x8000000000000000LL) - -#endif /* SOFTFLOAT_H */ diff --git a/src/softfloat/softfloat_decimal.cpp b/src/softfloat/softfloat_decimal.cpp deleted file mode 100644 index afb5b8b1..00000000 --- a/src/softfloat/softfloat_decimal.cpp +++ /dev/null @@ -1,461 +0,0 @@ -/*============================================================================ - -This C source file is an extension to the SoftFloat IEC/IEEE Floating-point -Arithmetic Package, Release 2a. - -=============================================================================*/ - -#include - -#include "sysconfig.h" -#include "sysdeps.h" - -#define DECIMAL_LOG 0 - -#if DECIMAL_LOG -#define decimal_log write_log -#else -#define decimal_log(fmt, ...) -#endif - -#include "softfloat.h" -#include "softfloat-macros.h" -#include "softfloat/softfloat-specialize.h" - -/*---------------------------------------------------------------------------- -| Methods for converting decimal floats to binary extended precision floats. -*----------------------------------------------------------------------------*/ - -static void round128to64(flag aSign, int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, float_status *status) -{ - flag increment; - int32_t zExp; - uint64_t zSig0, zSig1; - - zExp = *aExp; - zSig0 = *aSig0; - zSig1 = *aSig1; - - increment = ( (int64_t) zSig1 < 0 ); - if (status->float_rounding_mode != float_round_nearest_even) { - if (status->float_rounding_mode == float_round_to_zero) { - increment = 0; - } else { - if (aSign) { - increment = (status->float_rounding_mode == float_round_down) && zSig1; - } else { - increment = (status->float_rounding_mode == float_round_up) && zSig1; - } - } - } - - if (increment) { - ++zSig0; - if (zSig0 == 0) { - ++zExp; - zSig0 = LIT64(0x8000000000000000); - } else { - zSig0 &= ~ (((uint64_t) (zSig1<<1) == 0) & (status->float_rounding_mode == float_round_nearest_even)); - } - } else { - if ( zSig0 == 0 ) zExp = 0; - } - - *aExp = zExp; - *aSig0 = zSig0; - *aSig1 = 0; -} - -static void mul128by128round(int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1, float_status *status) -{ - int32_t zExp; - uint64_t zSig0, zSig1, zSig2, zSig3; - - zExp = *aExp; - zSig0 = *aSig0; - zSig1 = *aSig1; - - round128to64(0, &bExp, &bSig0, &bSig1, status); - - zExp += bExp - 0x3FFE; - mul128To256(zSig0, zSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3); - zSig1 |= (zSig2 | zSig3) != 0; - if ( 0 < (int64_t) zSig0 ) { - shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); - --zExp; - } - *aExp = zExp; - *aSig0 = zSig0; - *aSig1 = zSig1; - - round128to64(0, aExp, aSig0, aSig1, status); -} - -static void mul128by128(int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1) -{ - int32_t zExp; - uint64_t zSig0, zSig1, zSig2, zSig3; - - zExp = *aExp; - zSig0 = *aSig0; - zSig1 = *aSig1; - - zExp += bExp - 0x3FFE; - mul128To256(zSig0, zSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3); - zSig1 |= (zSig2 | zSig3) != 0; - if ( 0 < (int64_t) zSig0 ) { - shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); - --zExp; - } - *aExp = zExp; - *aSig0 = zSig0; - *aSig1 = zSig1; -} - -static void div128by128(int32_t *paExp, uint64_t *paSig0, uint64_t *paSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1) -{ - int32_t zExp, aExp; - uint64_t zSig0, zSig1, aSig0, aSig1; - uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; - - aExp = *paExp; - aSig0 = *paSig0; - aSig1 = *paSig1; - - zExp = aExp - bExp + 0x3FFE; - if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) { - shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); - ++zExp; - } - zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 ); - mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); - sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); - while ( (int64_t) rem0 < 0 ) { - --zSig0; - add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); - } - zSig1 = estimateDiv128To64( rem1, rem2, bSig0 ); - if ( ( zSig1 & 0x3FFF ) <= 4 ) { - mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); - sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); - while ( (int64_t) rem1 < 0 ) { - --zSig1; - add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); - } - zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); - } - - *paExp = zExp; - *paSig0 = zSig0; - *paSig1 = zSig1; -} - -static void tentoint128(flag mSign, flag eSign, int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t scale, float_status *status) - { - int8_t save_rounding_mode; - int32_t mExp; - uint64_t mSig0, mSig1; - - save_rounding_mode = status->float_rounding_mode; - switch (status->float_rounding_mode) { - case float_round_nearest_even: - break; - case float_round_down: - if (mSign != eSign) { - set_float_rounding_mode(float_round_up, status); - } - break; - case float_round_up: - if (mSign != eSign) { - set_float_rounding_mode(float_round_down, status); - } - break; - case float_round_to_zero: - if (eSign == 0) { - set_float_rounding_mode(float_round_down, status); - } else { - set_float_rounding_mode(float_round_up, status); - } - break; - default: - break; - } - - *aExp = 0x3FFF; - *aSig0 = LIT64(0x8000000000000000); - *aSig1 = 0; - - mExp = 0x4002; - mSig0 = LIT64(0xA000000000000000); - mSig1 = 0; - - while (scale) { - if (scale & 1) { - mul128by128round(aExp, aSig0, aSig1, mExp, mSig0, mSig1, status); - } - mul128by128(&mExp, &mSig0, &mSig1, mExp, mSig0, mSig1); - scale >>= 1; - } - - set_float_rounding_mode(save_rounding_mode, status); -} - -static int64_t tentointdec(int32_t scale) -{ - uint64_t decM, decX; - - decX = 1; - decM = 10; - - while (scale) { - if (scale & 1) { - decX *= decM; - } - decM *= decM; - scale >>= 1; - } - - return decX; -} - - -static int64_t float128toint64(flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status) -{ - int8_t roundingMode; - flag roundNearestEven, increment; - int64_t z; - - shift128RightJamming(zSig0, zSig1, 0x403E - zExp, &zSig0, &zSig1); - - roundingMode = status->float_rounding_mode; - roundNearestEven = (roundingMode == float_round_nearest_even); - increment = ((int64_t)zSig1 < 0); - if (!roundNearestEven) { - if (roundingMode == float_round_to_zero) { - increment = 0; - } else { - if (zSign) { - increment = (roundingMode == float_round_down ) && zSig1; - } else { - increment = (roundingMode == float_round_up ) && zSig1; - } - } - } - if (increment) { - ++zSig0; - zSig0 &= ~ (((uint64_t)(zSig1<<1) == 0) & roundNearestEven); - } - z = zSig0; - if (zSig1) float_raise(float_flag_inexact, status); - return z; -} - -static int32_t getDecimalExponent(int32_t aExp, uint64_t aSig) -{ - flag zSign; - int32_t zExp, shiftCount; - uint64_t zSig0, zSig1; - - if (aSig == 0 || aExp == 0x3FFF) { - return 0; - } - if (aExp < 0) { - return -4932; - } - - aSig ^= LIT64(0x8000000000000000); - aExp -= 0x3FFF; - zSign = (aExp < 0); - aExp = zSign ? -aExp : aExp; - shiftCount = 31 - countLeadingZeros32(aExp); - zExp = 0x3FFF + shiftCount; - - if (shiftCount < 0) { - shortShift128Left(aSig, 0, -shiftCount, &zSig0, &zSig1); - } else { - shift128Right(aSig, 0, shiftCount, &zSig0, &zSig1); - aSig = (uint64_t)aExp << (63 - shiftCount); - if (zSign) { - sub128(aSig, 0, zSig0, zSig1, &zSig0, &zSig1); - } else { - add128(aSig, 0, zSig0, zSig1, &zSig0, &zSig1); - } - } - - shiftCount = countLeadingZeros64(zSig0); - shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1); - zExp -= shiftCount; - mul128by128(&zExp, &zSig0, &zSig1, 0x3FFD, LIT64(0x9A209A84FBCFF798), LIT64(0x8F8959AC0B7C9178)); - - shiftCount = 0x403E - zExp; - shift128RightJamming(zSig0, zSig1, shiftCount, &zSig0, &zSig1); - - if ((int64_t)zSig1 < 0) { - ++zSig0; - zSig0 &= ~(((int64_t)(zSig1<<1) == 0) & 1); - } - - zExp = zSign ? -zSig0 : zSig0; - - return zExp; -} - -/*---------------------------------------------------------------------------- -| Decimal to binary -*----------------------------------------------------------------------------*/ - -floatx80 floatdecimal_to_floatx80(floatx80 a, float_status *status) -{ - flag decSign, zSign, decExpSign; - int32_t decExp, zExp, xExp, shiftCount; - uint64_t decSig, zSig0, zSig1, xSig0, xSig1; - - decSign = extractFloatx80Sign(a); - decExp = extractFloatx80Exp(a); - decSig = extractFloatx80Frac(a); - - if (decExp == 0x7FFF) return a; - - if (decExp == 0 && decSig == 0) return a; - - decExpSign = (decExp >> 14) & 1; - decExp &= 0x3FFF; - - shiftCount = countLeadingZeros64( decSig ); - zExp = 0x403E - shiftCount; - zSig0 = decSig << shiftCount; - zSig1 = 0; - zSign = decSign; - - tentoint128(decSign, decExpSign, &xExp, &xSig0, &xSig1, decExp, status); - - if (decExpSign) { - div128by128(&zExp, &zSig0, &zSig1, xExp, xSig0, xSig1); - } else { - mul128by128(&zExp, &zSig0, &zSig1, xExp, xSig0, xSig1); - } - - if (zSig1) float_raise(float_flag_decimal, status); - round128to64(zSign, &zExp, &zSig0, &zSig1, status); - - return packFloatx80( zSign, zExp, zSig0 ); - -} - -/*---------------------------------------------------------------------------- - | Binary to decimal - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_to_floatdecimal(floatx80 a, int32_t *k, float_status *status) -{ - flag aSign, decSign; - int32_t aExp, decExp, zExp, xExp; - uint64_t aSig, decSig, decX, zSig0, zSig1, xSig0, xSig1; - flag ictr, lambda; - int32_t kfactor, ilog, iscale, len; - - aSign = extractFloatx80Sign(a); - aExp = extractFloatx80Exp(a); - aSig = extractFloatx80Frac(a); - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); - return a; - } - - if (aExp == 0) { - if (aSig == 0) return packFloatx80(aSign, 0, 0); - normalizeFloatx80Subnormal(aSig, &aExp, &aSig); - } - - kfactor = *k; - - ilog = getDecimalExponent(aExp, aSig); - - ictr = 0; - -try_again: - decimal_log(_T("ILOG = %i\n"), ilog); - - if (kfactor > 0) { - if (kfactor > 17) { - kfactor = 17; - float_raise(float_flag_invalid, status); - } - len = kfactor; - } else { - len = ilog + 1 - kfactor; - if (len > 17) { - len = 17; - } - if (len < 1) { - len = 1; - } - if (kfactor > ilog) { - ilog = kfactor; - decimal_log(_T("ILOG is kfactor = %i\n"), ilog); - } - } - - decimal_log(_T("LEN = %i\n"),len); - - lambda = 0; - iscale = ilog + 1 - len; - - if (iscale < 0) { - lambda = 1; - iscale = -iscale; - } - - decimal_log(_T("ISCALE = %i, LAMBDA = %i\n"),iscale, lambda); - - tentoint128(lambda, 0, &xExp, &xSig0, &xSig1, iscale, status); - - decimal_log(_T("AFTER tentoint128: zExp = %04x, zSig0 = %16llx, zSig1 = %16llx\n"), xExp, xSig0, xSig1); - - zExp = aExp; - zSig0 = aSig; - zSig1 = 0; - - if (lambda) { - mul128by128(&zExp, &zSig0, &zSig1, xExp, xSig0, xSig1); - } else { - div128by128(&zExp, &zSig0, &zSig1, xExp, xSig0, xSig1); - } - - decimal_log(_T("BEFORE: zExp = %04x, zSig0 = %16llx, zSig1 = %16llx\n"),zExp,zSig0,zSig1); - - decSig = float128toint64(aSign, zExp, zSig0, zSig1, status); - - decimal_log(_T("AFTER: decSig = %llu\n"),decSig); - - if (ictr == 0) { - - decX = tentointdec(len - 1); - - if (decSig < decX) { // z < x - ilog -= 1; - ictr = 1; - goto try_again; - } - - decX *= 10; - - if (decSig > decX) { // z > x - ilog += 1; - ictr = 1; - goto try_again; - } - } - - decSign = aSign; - decExp = (ilog < 0) ? -ilog : ilog; - if (decExp > 999) { - float_raise(float_flag_invalid, status); - } - if (ilog < 0) decExp |= 0x4000; - - *k = len; - - return packFloatx80(decSign, decExp, decSig); -} diff --git a/src/softfloat/softfloat_fpsp.cpp b/src/softfloat/softfloat_fpsp.cpp deleted file mode 100644 index 61b6296e..00000000 --- a/src/softfloat/softfloat_fpsp.cpp +++ /dev/null @@ -1,2172 +0,0 @@ - -/*============================================================================ - - This C source file is an extension to the SoftFloat IEC/IEEE Floating-point - Arithmetic Package, Release 2a. - - Written by Andreas Grabher for Previous, NeXT Computer Emulator. - -=============================================================================*/ - -#include -#include - -#include "softfloat.h" -#include "softfloat-specialize.h" -#include "softfloat_fpsp_tables.h" - - -/*---------------------------------------------------------------------------- -| Algorithms for transcendental functions supported by MC68881 and MC68882 -| mathematical coprocessors. The functions are derived from FPSP library. -*----------------------------------------------------------------------------*/ - -#define pi_sig LIT64(0xc90fdaa22168c235) -#define pi_sig0 LIT64(0xc90fdaa22168c234) -#define pi_sig1 LIT64(0xc4c6628b80dc1cd1) - -#define pi_exp 0x4000 -#define piby2_exp 0x3FFF -#define piby4_exp 0x3FFE - -#define one_exp 0x3FFF -#define one_sig LIT64(0x8000000000000000) - -#define SET_PREC \ - int8_t user_rnd_mode, user_rnd_prec; \ - user_rnd_mode = status->float_rounding_mode; \ - user_rnd_prec = status->floatx80_rounding_precision; \ - status->float_rounding_mode = float_round_nearest_even; \ - status->floatx80_rounding_precision = 80 - -#define RESET_PREC \ - status->float_rounding_mode = user_rnd_mode; \ - status->floatx80_rounding_precision = user_rnd_prec - -/*---------------------------------------------------------------------------- - | Function for compactifying extended double-precision floating point values. - *----------------------------------------------------------------------------*/ - -static int32_t floatx80_make_compact(int32_t aExp, uint64_t aSig) -{ - return (aExp<<16)|(aSig>>48); -} - - -/*---------------------------------------------------------------------------- - | Arc cosine - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_acos(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - - int32_t compact; - floatx80 fp0, fp1, one; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - - if (aExp == 0x7FFF && (uint64_t) (aSig<<1)) { - return propagateFloatx80NaNOneArg(a, status); - } - if (aExp == 0 && aSig == 0) { - float_raise(float_flag_inexact, status); - return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, piby2_exp, pi_sig, 0, status); - } - - compact = floatx80_make_compact(aExp, aSig); - - if (compact >= 0x3FFF8000) { // |X| >= 1 - if (aExp == one_exp && aSig == one_sig) { // |X| == 1 - if (aSign) { // X == -1 - a = packFloatx80(0, pi_exp, pi_sig); - float_raise(float_flag_inexact, status); - return floatx80_move(a, status); - } else { // X == +1 - return packFloatx80(0, 0, 0); - } - } else { // |X| > 1 - float_raise(float_flag_invalid, status); - a.low = floatx80_default_nan_low; - a.high = floatx80_default_nan_high; - return a; - } - } // |X| < 1 - - SET_PREC; - - one = packFloatx80(0, one_exp, one_sig); - fp0 = a; - - fp1 = floatx80_add(one, fp0, status); // 1 + X - fp0 = floatx80_sub(one, fp0, status); // 1 - X - fp0 = floatx80_div(fp0, fp1, status); // (1-X)/(1+X) - fp0 = floatx80_sqrt(fp0, status); // SQRT((1-X)/(1+X)) - fp0 = floatx80_atan(fp0, status); // ATAN(SQRT((1-X)/(1+X))) - - RESET_PREC; - - a = floatx80_add(fp0, fp0, status); // 2 * ATAN(SQRT((1-X)/(1+X))) - - float_raise(float_flag_inexact, status); - - return a; -} - -/*---------------------------------------------------------------------------- - | Arc sine - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_asin(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - int32_t compact; - floatx80 fp0, fp1, fp2, one; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - - if (aExp == 0x7FFF && (uint64_t) (aSig<<1)) { - return propagateFloatx80NaNOneArg(a, status); - } - - if (aExp == 0 && aSig == 0) { - return packFloatx80(aSign, 0, 0); - } - - compact = floatx80_make_compact(aExp, aSig); - - if (compact >= 0x3FFF8000) { // |X| >= 1 - if (aExp == one_exp && aSig == one_sig) { // |X| == 1 - float_raise(float_flag_inexact, status); - a = packFloatx80(aSign, piby2_exp, pi_sig); - return floatx80_move(a, status); - } else { // |X| > 1 - float_raise(float_flag_invalid, status); - a.low = floatx80_default_nan_low; - a.high = floatx80_default_nan_high; - return a; - } - - } // |X| < 1 - - SET_PREC; - - one = packFloatx80(0, one_exp, one_sig); - fp0 = a; - - fp1 = floatx80_sub(one, fp0, status); // 1 - X - fp2 = floatx80_add(one, fp0, status); // 1 + X - fp1 = floatx80_mul(fp2, fp1, status); // (1+X)*(1-X) - fp1 = floatx80_sqrt(fp1, status); // SQRT((1+X)*(1-X)) - fp0 = floatx80_div(fp0, fp1, status); // X/SQRT((1+X)*(1-X)) - - RESET_PREC; - - a = floatx80_atan(fp0, status); // ATAN(X/SQRT((1+X)*(1-X))) - - float_raise(float_flag_inexact, status); - - return a; -} - -/*---------------------------------------------------------------------------- - | Arc tangent - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_atan(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - int32_t compact, tbl_index; - floatx80 fp0, fp1, fp2, fp3, xsave; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); - a = packFloatx80(aSign, piby2_exp, pi_sig); - float_raise(float_flag_inexact, status); - return floatx80_move(a, status); - } - - if (aExp == 0 && aSig == 0) { - return packFloatx80(aSign, 0, 0); - } - - compact = floatx80_make_compact(aExp, aSig); - - SET_PREC; - - if (compact < 0x3FFB8000 || compact > 0x4002FFFF) { // |X| >= 16 or |X| < 1/16 - if (compact > 0x3FFF8000) { // |X| >= 16 - if (compact > 0x40638000) { // |X| > 2^(100) - fp0 = packFloatx80(aSign, piby2_exp, pi_sig); - fp1 = packFloatx80(aSign, 0x0001, one_sig); - - RESET_PREC; - - a = floatx80_sub(fp0, fp1, status); - - float_raise(float_flag_inexact, status); - - return a; - } else { - fp0 = a; - fp1 = packFloatx80(1, one_exp, one_sig); // -1 - fp1 = floatx80_div(fp1, fp0, status); // X' = -1/X - xsave = fp1; - fp0 = floatx80_mul(fp1, fp1, status); // Y = X'*X' - fp1 = floatx80_mul(fp0, fp0, status); // Z = Y*Y - fp3 = float64_to_floatx80(LIT64(0xBFB70BF398539E6A), status); // C5 - fp2 = float64_to_floatx80(LIT64(0x3FBC7187962D1D7D), status); // C4 - fp3 = floatx80_mul(fp3, fp1, status); // Z*C5 - fp2 = floatx80_mul(fp2, fp1, status); // Z*C4 - fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBFC24924827107B8), status), status); // C3+Z*C5 - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FC999999996263E), status), status); // C2+Z*C4 - fp1 = floatx80_mul(fp1, fp3, status); // Z*(C3+Z*C5) - fp2 = floatx80_mul(fp2, fp0, status); // Y*(C2+Z*C4) - fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0xBFD5555555555536), status), status); // C1+Z*(C3+Z*C5) - fp0 = floatx80_mul(fp0, xsave, status); // X'*Y - fp1 = floatx80_add(fp1, fp2, status); // [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] - fp0 = floatx80_mul(fp0, fp1, status); // X'*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) ?? - fp0 = floatx80_add(fp0, xsave, status); - fp1 = packFloatx80(aSign, piby2_exp, pi_sig); - - RESET_PREC; - - a = floatx80_add(fp0, fp1, status); - - float_raise(float_flag_inexact, status); - - return a; - } - } else { // |X| < 1/16 - if (compact < 0x3FD78000) { // |X| < 2^(-40) - RESET_PREC; - - a = floatx80_move(a, status); - - float_raise(float_flag_inexact, status); - - return a; - } else { - fp0 = a; - xsave = a; - fp0 = floatx80_mul(fp0, fp0, status); // Y = X*X - fp1 = floatx80_mul(fp0, fp0, status); // Z = Y*Y - fp2 = float64_to_floatx80(LIT64(0x3FB344447F876989), status); // B6 - fp3 = float64_to_floatx80(LIT64(0xBFB744EE7FAF45DB), status); // B5 - fp2 = floatx80_mul(fp2, fp1, status); // Z*B6 - fp3 = floatx80_mul(fp3, fp1, status); // Z*B5 - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FBC71C646940220), status), status); // B4+Z*B6 - fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBFC24924921872F9), status), status); // B3+Z*B5 - fp2 = floatx80_mul(fp2, fp1, status); // Z*(B4+Z*B6) - fp1 = floatx80_mul(fp1, fp3, status); // Z*(B3+Z*B5) - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FC9999999998FA9), status), status); // B2+Z*(B4+Z*B6) - fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0xBFD5555555555555), status), status); // B1+Z*(B3+Z*B5) - fp2 = floatx80_mul(fp2, fp0, status); // Y*(B2+Z*(B4+Z*B6)) - fp0 = floatx80_mul(fp0, xsave, status); // X*Y - fp1 = floatx80_add(fp1, fp2, status); // [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] - fp0 = floatx80_mul(fp0, fp1, status); // X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) - - RESET_PREC; - - a = floatx80_add(fp0, xsave, status); - - float_raise(float_flag_inexact, status); - - return a; - } - } - } else { - aSig &= LIT64(0xF800000000000000); - aSig |= LIT64(0x0400000000000000); - xsave = packFloatx80(aSign, aExp, aSig); // F - fp0 = a; - fp1 = a; // X - fp2 = packFloatx80(0, one_exp, one_sig); // 1 - fp1 = floatx80_mul(fp1, xsave, status); // X*F - fp0 = floatx80_sub(fp0, xsave, status); // X-F - fp1 = floatx80_add(fp1, fp2, status); // 1 + X*F - fp0 = floatx80_div(fp0, fp1, status); // U = (X-F)/(1+X*F) - - tbl_index = compact; - - tbl_index &= 0x7FFF0000; - tbl_index -= 0x3FFB0000; - tbl_index >>= 1; - tbl_index += compact&0x00007800; - tbl_index >>= 11; - - fp3 = atan_tbl[tbl_index]; - - fp3.high |= aSign ? 0x8000 : 0; // ATAN(F) - - fp1 = floatx80_mul(fp0, fp0, status); // V = U*U - fp2 = float64_to_floatx80(LIT64(0xBFF6687E314987D8), status); // A3 - fp2 = floatx80_add(fp2, fp1, status); // A3+V - fp2 = floatx80_mul(fp2, fp1, status); // V*(A3+V) - fp1 = floatx80_mul(fp1, fp0, status); // U*V - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x4002AC6934A26DB3), status), status); // A2+V*(A3+V) - fp1 = floatx80_mul(fp1, float64_to_floatx80(LIT64(0xBFC2476F4E1DA28E), status), status); // A1+U*V - fp1 = floatx80_mul(fp1, fp2, status); // A1*U*V*(A2+V*(A3+V)) - fp0 = floatx80_add(fp0, fp1, status); // ATAN(U) - - RESET_PREC; - - a = floatx80_add(fp0, fp3, status); // ATAN(X) - - float_raise(float_flag_inexact, status); - - return a; - } -} - -/*---------------------------------------------------------------------------- - | Hyperbolic arc tangent - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_atanh(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - int32_t compact; - floatx80 fp0, fp1, fp2, one; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - - if (aExp == 0x7FFF && (uint64_t) (aSig<<1)) { - return propagateFloatx80NaNOneArg(a, status); - } - - if (aExp == 0 && aSig == 0) { - return packFloatx80(aSign, 0, 0); - } - - compact = floatx80_make_compact(aExp, aSig); - - if (compact >= 0x3FFF8000) { // |X| >= 1 - if (aExp == one_exp && aSig == one_sig) { // |X| == 1 - float_raise(float_flag_divbyzero, status); - return packFloatx80(aSign, 0x7FFF, floatx80_default_infinity_low); - } else { // |X| > 1 - float_raise(float_flag_invalid, status); - a.low = floatx80_default_nan_low; - a.high = floatx80_default_nan_high; - return a; - } - } // |X| < 1 - - SET_PREC; - - one = packFloatx80(0, one_exp, one_sig); - fp2 = packFloatx80(aSign, 0x3FFE, one_sig); // SIGN(X) * (1/2) - fp0 = packFloatx80(0, aExp, aSig); // Y = |X| - fp1 = packFloatx80(1, aExp, aSig); // -Y - fp0 = floatx80_add(fp0, fp0, status); // 2Y - fp1 = floatx80_add(fp1, one, status); // 1-Y - fp0 = floatx80_div(fp0, fp1, status); // Z = 2Y/(1-Y) - fp0 = floatx80_lognp1(fp0, status); // LOG1P(Z) - - RESET_PREC; - - a = floatx80_mul(fp0, fp2, status); // ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z) - - float_raise(float_flag_inexact, status); - - return a; -} - -/*---------------------------------------------------------------------------- - | Cosine - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_cos(floatx80 a, float_status *status) -{ - flag aSign, xSign; - int32_t aExp, xExp; - uint64_t aSig, xSig; - - int32_t compact, l, n, j; - floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; - float32 posneg1, twoto63; - flag adjn, endflag; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); - float_raise(float_flag_invalid, status); - a.low = floatx80_default_nan_low; - a.high = floatx80_default_nan_high; - return a; - } - - if (aExp == 0 && aSig == 0) { - return packFloatx80(0, one_exp, one_sig); - } - - adjn = 1; - - SET_PREC; - - compact = floatx80_make_compact(aExp, aSig); - - fp0 = a; - - if (compact < 0x3FD78000 || compact > 0x4004BC7E) { // 2^(-40) > |X| > 15 PI - if (compact > 0x3FFF8000) { // |X| >= 15 PI - // REDUCEX - fp1 = packFloatx80(0, 0, 0); - if (compact == 0x7FFEFFFF) { - twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, LIT64(0xC90FDAA200000000)); - twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, LIT64(0x85A308D300000000)); - fp0 = floatx80_add(fp0, twopi1, status); - fp1 = fp0; - fp0 = floatx80_add(fp0, twopi2, status); - fp1 = floatx80_sub(fp1, fp0, status); - fp1 = floatx80_add(fp1, twopi2, status); - } - loop: - xSign = extractFloatx80Sign(fp0); - xExp = extractFloatx80Exp(fp0); - xExp -= 0x3FFF; - if (xExp <= 28) { - l = 0; - endflag = 1; - } else { - l = xExp - 27; - endflag = 0; - } - invtwopi = packFloatx80(0, 0x3FFE - l, LIT64(0xA2F9836E4E44152A)); // INVTWOPI - twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000)); - twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000)); - - twoto63 = 0x5F000000; - twoto63 |= xSign ? 0x80000000 : 0x00000000; // SIGN(INARG)*2^63 IN SGL - - fp2 = floatx80_mul(fp0, invtwopi, status); - fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), status); // THE FRACTIONAL PART OF FP2 IS ROUNDED - fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), status); // FP2 is N - fp4 = floatx80_mul(twopi1, fp2, status); // W = N*P1 - fp5 = floatx80_mul(twopi2, fp2, status); // w = N*P2 - fp3 = floatx80_add(fp4, fp5, status); // FP3 is P - fp4 = floatx80_sub(fp4, fp3, status); // W-P - fp0 = floatx80_sub(fp0, fp3, status); // FP0 is A := R - P - fp4 = floatx80_add(fp4, fp5, status); // FP4 is p = (W-P)+w - fp3 = fp0; // FP3 is A - fp1 = floatx80_sub(fp1, fp4, status); // FP1 is a := r - p - fp0 = floatx80_add(fp0, fp1, status); // FP0 is R := A+a - - if (endflag > 0) { - n = floatx80_to_int32(fp2, status); - goto sincont; - } - fp3 = floatx80_sub(fp3, fp0, status); // A-R - fp1 = floatx80_add(fp1, fp3, status); // FP1 is r := (A-R)+a - goto loop; - } else { - // SINSM - fp0 = float32_to_floatx80(0x3F800000, status); // 1 - - RESET_PREC; - - if (adjn) { - // COSTINY - a = floatx80_sub(fp0, float32_to_floatx80(0x00800000, status), status); - } else { - // SINTINY - a = floatx80_move(a, status); - } - float_raise(float_flag_inexact, status); - - return a; - } - } else { - fp1 = floatx80_mul(fp0, float64_to_floatx80(LIT64(0x3FE45F306DC9C883), status), status); // X*2/PI - - n = floatx80_to_int32(fp1, status); - j = 32 + n; - - fp0 = floatx80_sub(fp0, pi_tbl[j], status); // X-Y1 - fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), status); // FP0 IS R = (X-Y1)-Y2 - - sincont: - if ((n + adjn) & 1) { - // COSPOLY - fp0 = floatx80_mul(fp0, fp0, status); // FP0 IS S - fp1 = floatx80_mul(fp0, fp0, status); // FP1 IS T - fp2 = float64_to_floatx80(LIT64(0x3D2AC4D0D6011EE3), status); // B8 - fp3 = float64_to_floatx80(LIT64(0xBDA9396F9F45AC19), status); // B7 - - xSign = extractFloatx80Sign(fp0); // X IS S - xExp = extractFloatx80Exp(fp0); - xSig = extractFloatx80Frac(fp0); - - if (((n + adjn) >> 1) & 1) { - xSign ^= 1; - posneg1 = 0xBF800000; // -1 - } else { - xSign ^= 0; - posneg1 = 0x3F800000; // 1 - } // X IS NOW R'= SGN*R - - fp2 = floatx80_mul(fp2, fp1, status); // TB8 - fp3 = floatx80_mul(fp3, fp1, status); // TB7 - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3E21EED90612C972), status), status); // B6+TB8 - fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBE927E4FB79D9FCF), status), status); // B5+TB7 - fp2 = floatx80_mul(fp2, fp1, status); // T(B6+TB8) - fp3 = floatx80_mul(fp3, fp1, status); // T(B5+TB7) - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3EFA01A01A01D423), status), status); // B4+T(B6+TB8) - fp4 = packFloatx80(1, 0x3FF5, LIT64(0xB60B60B60B61D438)); - fp3 = floatx80_add(fp3, fp4, status); // B3+T(B5+TB7) - fp2 = floatx80_mul(fp2, fp1, status); // T(B4+T(B6+TB8)) - fp1 = floatx80_mul(fp1, fp3, status); // T(B3+T(B5+TB7)) - fp4 = packFloatx80(0, 0x3FFA, LIT64(0xAAAAAAAAAAAAAB5E)); - fp2 = floatx80_add(fp2, fp4, status); // B2+T(B4+T(B6+TB8)) - fp1 = floatx80_add(fp1, float32_to_floatx80(0xBF000000, status), status); // B1+T(B3+T(B5+TB7)) - fp0 = floatx80_mul(fp0, fp2, status); // S(B2+T(B4+T(B6+TB8))) - fp0 = floatx80_add(fp0, fp1, status); // [B1+T(B3+T(B5+TB7))]+[S(B2+T(B4+T(B6+TB8)))] - - x = packFloatx80(xSign, xExp, xSig); - fp0 = floatx80_mul(fp0, x, status); - - RESET_PREC; - - a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status); - - float_raise(float_flag_inexact, status); - - return a; - } else { - // SINPOLY - xSign = extractFloatx80Sign(fp0); // X IS R - xExp = extractFloatx80Exp(fp0); - xSig = extractFloatx80Frac(fp0); - - xSign ^= ((n + adjn) >> 1) & 1; // X IS NOW R'= SGN*R - - fp0 = floatx80_mul(fp0, fp0, status); // FP0 IS S - fp1 = floatx80_mul(fp0, fp0, status); // FP1 IS T - fp3 = float64_to_floatx80(LIT64(0xBD6AAA77CCC994F5), status); // A7 - fp2 = float64_to_floatx80(LIT64(0x3DE612097AAE8DA1), status); // A6 - fp3 = floatx80_mul(fp3, fp1, status); // T*A7 - fp2 = floatx80_mul(fp2, fp1, status); // T*A6 - fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBE5AE6452A118AE4), status), status); // A5+T*A7 - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3EC71DE3A5341531), status), status); // A4+T*A6 - fp3 = floatx80_mul(fp3, fp1, status); // T(A5+TA7) - fp2 = floatx80_mul(fp2, fp1, status); // T(A4+TA6) - fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBF2A01A01A018B59), status), status); // A3+T(A5+TA7) - fp4 = packFloatx80(0, 0x3FF8, LIT64(0x88888888888859AF)); - fp2 = floatx80_add(fp2, fp4, status); // A2+T(A4+TA6) - fp1 = floatx80_mul(fp1, fp3, status); // T(A3+T(A5+TA7)) - fp2 = floatx80_mul(fp2, fp0, status); // S(A2+T(A4+TA6)) - fp4 = packFloatx80(1, 0x3FFC, LIT64(0xAAAAAAAAAAAAAA99)); - fp1 = floatx80_add(fp1, fp4, status); // A1+T(A3+T(A5+TA7)) - fp1 = floatx80_add(fp1, fp2, status); // [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] - - x = packFloatx80(xSign, xExp, xSig); - fp0 = floatx80_mul(fp0, x, status); // R'*S - fp0 = floatx80_mul(fp0, fp1, status); // SIN(R')-R' - - RESET_PREC; - - a = floatx80_add(fp0, x, status); - - float_raise(float_flag_inexact, status); - - return a; - } - } -} - -/*---------------------------------------------------------------------------- - | Hyperbolic cosine - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_cosh(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - int32_t compact; - floatx80 fp0, fp1; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); - return packFloatx80(0, 0x7FFF, floatx80_default_infinity_low); - } - - if (aExp == 0 && aSig == 0) { - return packFloatx80(0, one_exp, one_sig); - } - - SET_PREC; - - compact = floatx80_make_compact(aExp, aSig); - - if (compact > 0x400CB167) { - if (compact > 0x400CB2B3) { - RESET_PREC; - return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, 0x8000, one_sig, 0, status); - } else { - fp0 = packFloatx80(0, aExp, aSig); - fp0 = floatx80_sub(fp0, float64_to_floatx80(LIT64(0x40C62D38D3D64634), status), status); - fp0 = floatx80_sub(fp0, float64_to_floatx80(LIT64(0x3D6F90AEB1E75CC7), status), status); - fp0 = floatx80_etox(fp0, status); - fp1 = packFloatx80(0, 0x7FFB, one_sig); - - RESET_PREC; - - a = floatx80_mul(fp0, fp1, status); - - float_raise(float_flag_inexact, status); - - return a; - } - } - - fp0 = packFloatx80(0, aExp, aSig); // |X| - fp0 = floatx80_etox(fp0, status); // EXP(|X|) - fp0 = floatx80_mul(fp0, float32_to_floatx80(0x3F000000, status), status); // (1/2)*EXP(|X|) - fp1 = float32_to_floatx80(0x3E800000, status); // 1/4 - fp1 = floatx80_div(fp1, fp0, status); // 1/(2*EXP(|X|)) - - RESET_PREC; - - a = floatx80_add(fp0, fp1, status); - - float_raise(float_flag_inexact, status); - - return a; -} - -/*---------------------------------------------------------------------------- - | e to x - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_etox(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - int32_t compact, n, j, k, m, m1; - floatx80 fp0, fp1, fp2, fp3, l2, scale, adjscale; - flag adjflag; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); - if (aSign) return packFloatx80(0, 0, 0); - return packFloatx80(0, 0x7FFF, floatx80_default_infinity_low); - } - - if (aExp == 0 && aSig == 0) { - return packFloatx80(0, one_exp, one_sig); - } - - SET_PREC; - - adjflag = 0; - - if (aExp >= 0x3FBE) { // |X| >= 2^(-65) - compact = floatx80_make_compact(aExp, aSig); - - if (compact < 0x400CB167) { // |X| < 16380 log2 - fp0 = a; - fp1 = a; - fp0 = floatx80_mul(fp0, float32_to_floatx80(0x42B8AA3B, status), status); // 64/log2 * X - adjflag = 0; - n = floatx80_to_int32(fp0, status); // int(64/log2*X) - fp0 = int32_to_floatx80(n); - - j = n & 0x3F; // J = N mod 64 - m = n / 64; // NOTE: this is really arithmetic right shift by 6 - if (n < 0 && j) { // arithmetic right shift is division and round towards minus infinity - m--; - } - m += 0x3FFF; // biased exponent of 2^(M) - - expcont1: - fp2 = fp0; // N - fp0 = floatx80_mul(fp0, float32_to_floatx80(0xBC317218, status), status); // N * L1, L1 = lead(-log2/64) - l2 = packFloatx80(0, 0x3FDC, LIT64(0x82E308654361C4C6)); - fp2 = floatx80_mul(fp2, l2, status); // N * L2, L1+L2 = -log2/64 - fp0 = floatx80_add(fp0, fp1, status); // X + N*L1 - fp0 = floatx80_add(fp0, fp2, status); // R - - fp1 = floatx80_mul(fp0, fp0, status); // S = R*R - fp2 = float32_to_floatx80(0x3AB60B70, status); // A5 - fp2 = floatx80_mul(fp2, fp1, status); // fp2 is S*A5 - fp3 = floatx80_mul(float32_to_floatx80(0x3C088895, status), fp1, status); // fp3 is S*A4 - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FA5555555554431), status), status); // fp2 is A3+S*A5 - fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0x3FC5555555554018), status), status); // fp3 is A2+S*A4 - fp2 = floatx80_mul(fp2, fp1, status); // fp2 is S*(A3+S*A5) - fp3 = floatx80_mul(fp3, fp1, status); // fp3 is S*(A2+S*A4) - fp2 = floatx80_add(fp2, float32_to_floatx80(0x3F000000, status), status); // fp2 is A1+S*(A3+S*A5) - fp3 = floatx80_mul(fp3, fp0, status); // fp3 IS R*S*(A2+S*A4) - fp2 = floatx80_mul(fp2, fp1, status); // fp2 IS S*(A1+S*(A3+S*A5)) - fp0 = floatx80_add(fp0, fp3, status); // fp0 IS R+R*S*(A2+S*A4) - fp0 = floatx80_add(fp0, fp2, status); // fp0 IS EXP(R) - 1 - - fp1 = exp_tbl[j]; - fp0 = floatx80_mul(fp0, fp1, status); // 2^(J/64)*(Exp(R)-1) - fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], status), status); // accurate 2^(J/64) - fp0 = floatx80_add(fp0, fp1, status); // 2^(J/64) + 2^(J/64)*(Exp(R)-1) - - scale = packFloatx80(0, m, one_sig); - if (adjflag) { - adjscale = packFloatx80(0, m1, one_sig); - fp0 = floatx80_mul(fp0, adjscale, status); - } - - RESET_PREC; - - a = floatx80_mul(fp0, scale, status); - - float_raise(float_flag_inexact, status); - - return a; - } else { // |X| >= 16380 log2 - if (compact > 0x400CB27C) { // |X| >= 16480 log2 - RESET_PREC; - if (aSign) { - a = roundAndPackFloatx80(status->floatx80_rounding_precision, 0, -0x1000, aSig, 0, status); - } else { - a = roundAndPackFloatx80(status->floatx80_rounding_precision, 0, 0x8000, aSig, 0, status); - } - float_raise(float_flag_inexact, status); - - return a; - } else { - fp0 = a; - fp1 = a; - fp0 = floatx80_mul(fp0, float32_to_floatx80(0x42B8AA3B, status), status); // 64/log2 * X - adjflag = 1; - n = floatx80_to_int32(fp0, status); // int(64/log2*X) - fp0 = int32_to_floatx80(n); - - j = n & 0x3F; // J = N mod 64 - k = n / 64; // NOTE: this is really arithmetic right shift by 6 - if (n < 0 && j) { // arithmetic right shift is division and round towards minus infinity - k--; - } - m1 = k / 2; // NOTE: this is really arithmetic right shift by 1 - if (k < 0 && (k & 1)) { // arithmetic right shift is division and round towards minus infinity - m1--; - } - m = k - m1; - m1 += 0x3FFF; // biased exponent of 2^(M1) - m += 0x3FFF; // biased exponent of 2^(M) - - goto expcont1; - } - } - } else { // |X| < 2^(-65) - RESET_PREC; - - a = floatx80_add(a, float32_to_floatx80(0x3F800000, status), status); // 1 + X - - float_raise(float_flag_inexact, status); - - return a; - } -} - -/*---------------------------------------------------------------------------- - | e to x minus 1 - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_etoxm1(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - int32_t compact, n, j, m, m1; - floatx80 fp0, fp1, fp2, fp3, l2, sc, onebysc; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); - if (aSign) return packFloatx80(aSign, one_exp, one_sig); - return packFloatx80(0, 0x7FFF, floatx80_default_infinity_low); - } - - if (aExp == 0 && aSig == 0) { - return packFloatx80(aSign, 0, 0); - } - - SET_PREC; - - if (aExp >= 0x3FFD) { // |X| >= 1/4 - compact = floatx80_make_compact(aExp, aSig); - - if (compact <= 0x4004C215) { // |X| <= 70 log2 - fp0 = a; - fp1 = a; - fp0 = floatx80_mul(fp0, float32_to_floatx80(0x42B8AA3B, status), status); // 64/log2 * X - n = floatx80_to_int32(fp0, status); // int(64/log2*X) - fp0 = int32_to_floatx80(n); - - j = n & 0x3F; // J = N mod 64 - m = n / 64; // NOTE: this is really arithmetic right shift by 6 - if (n < 0 && j) { // arithmetic right shift is division and round towards minus infinity - m--; - } - m1 = -m; - //m += 0x3FFF; // biased exponent of 2^(M) - //m1 += 0x3FFF; // biased exponent of -2^(-M) - - fp2 = fp0; // N - fp0 = floatx80_mul(fp0, float32_to_floatx80(0xBC317218, status), status); // N * L1, L1 = lead(-log2/64) - l2 = packFloatx80(0, 0x3FDC, LIT64(0x82E308654361C4C6)); - fp2 = floatx80_mul(fp2, l2, status); // N * L2, L1+L2 = -log2/64 - fp0 = floatx80_add(fp0, fp1, status); // X + N*L1 - fp0 = floatx80_add(fp0, fp2, status); // R - - fp1 = floatx80_mul(fp0, fp0, status); // S = R*R - fp2 = float32_to_floatx80(0x3950097B, status); // A6 - fp2 = floatx80_mul(fp2, fp1, status); // fp2 is S*A6 - fp3 = floatx80_mul(float32_to_floatx80(0x3AB60B6A, status), fp1, status); // fp3 is S*A5 - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3F81111111174385), status), status); // fp2 IS A4+S*A6 - fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0x3FA5555555554F5A), status), status); // fp3 is A3+S*A5 - fp2 = floatx80_mul(fp2, fp1, status); // fp2 IS S*(A4+S*A6) - fp3 = floatx80_mul(fp3, fp1, status); // fp3 IS S*(A3+S*A5) - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FC5555555555555), status), status); // fp2 IS A2+S*(A4+S*A6) - fp3 = floatx80_add(fp3, float32_to_floatx80(0x3F000000, status), status); // fp3 IS A1+S*(A3+S*A5) - fp2 = floatx80_mul(fp2, fp1, status); // fp2 IS S*(A2+S*(A4+S*A6)) - fp1 = floatx80_mul(fp1, fp3, status); // fp1 IS S*(A1+S*(A3+S*A5)) - fp2 = floatx80_mul(fp2, fp0, status); // fp2 IS R*S*(A2+S*(A4+S*A6)) - fp0 = floatx80_add(fp0, fp1, status); // fp0 IS R+S*(A1+S*(A3+S*A5)) - fp0 = floatx80_add(fp0, fp2, status); // fp0 IS EXP(R) - 1 - - fp0 = floatx80_mul(fp0, exp_tbl[j], status); // 2^(J/64)*(Exp(R)-1) - - if (m >= 64) { - fp1 = float32_to_floatx80(exp_tbl2[j], status); - onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); // -2^(-M) - fp1 = floatx80_add(fp1, onebysc, status); - fp0 = floatx80_add(fp0, fp1, status); - fp0 = floatx80_add(fp0, exp_tbl[j], status); - } else if (m < -3) { - fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], status), status); - fp0 = floatx80_add(fp0, exp_tbl[j], status); - onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); // -2^(-M) - fp0 = floatx80_add(fp0, onebysc, status); - } else { // -3 <= m <= 63 - fp1 = exp_tbl[j]; - fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], status), status); - onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); // -2^(-M) - fp1 = floatx80_add(fp1, onebysc, status); - fp0 = floatx80_add(fp0, fp1, status); - } - - sc = packFloatx80(0, m + 0x3FFF, one_sig); - - RESET_PREC; - - a = floatx80_mul(fp0, sc, status); - - float_raise(float_flag_inexact, status); - - return a; - } else { // |X| > 70 log2 - if (aSign) { - fp0 = float32_to_floatx80(0xBF800000, status); // -1 - - RESET_PREC; - - a = floatx80_add(fp0, float32_to_floatx80(0x00800000, status), status); // -1 + 2^(-126) - - float_raise(float_flag_inexact, status); - - return a; - } else { - RESET_PREC; - - return floatx80_etox(a, status); - } - } - } else { // |X| < 1/4 - if (aExp >= 0x3FBE) { - fp0 = a; - fp0 = floatx80_mul(fp0, fp0, status); // S = X*X - fp1 = float32_to_floatx80(0x2F30CAA8, status); // B12 - fp1 = floatx80_mul(fp1, fp0, status); // S * B12 - fp2 = float32_to_floatx80(0x310F8290, status); // B11 - fp1 = floatx80_add(fp1, float32_to_floatx80(0x32D73220, status), status); // B10 - fp2 = floatx80_mul(fp2, fp0, status); - fp1 = floatx80_mul(fp1, fp0, status); - fp2 = floatx80_add(fp2, float32_to_floatx80(0x3493F281, status), status); // B9 - fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3EC71DE3A5774682), status), status); // B8 - fp2 = floatx80_mul(fp2, fp0, status); - fp1 = floatx80_mul(fp1, fp0, status); - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3EFA01A019D7CB68), status), status); // B7 - fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3F2A01A01A019DF3), status), status); // B6 - fp2 = floatx80_mul(fp2, fp0, status); - fp1 = floatx80_mul(fp1, fp0, status); - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3F56C16C16C170E2), status), status); // B5 - fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3F81111111111111), status), status); // B4 - fp2 = floatx80_mul(fp2, fp0, status); - fp1 = floatx80_mul(fp1, fp0, status); - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FA5555555555555), status), status); // B3 - fp3 = packFloatx80(0, 0x3FFC, LIT64(0xAAAAAAAAAAAAAAAB)); - fp1 = floatx80_add(fp1, fp3, status); // B2 - fp2 = floatx80_mul(fp2, fp0, status); - fp1 = floatx80_mul(fp1, fp0, status); - - fp2 = floatx80_mul(fp2, fp0, status); - fp1 = floatx80_mul(fp1, a, status); - - fp0 = floatx80_mul(fp0, float32_to_floatx80(0x3F000000, status), status); // S*B1 - fp1 = floatx80_add(fp1, fp2, status); // Q - fp0 = floatx80_add(fp0, fp1, status); // S*B1+Q - - RESET_PREC; - - a = floatx80_add(fp0, a, status); - - float_raise(float_flag_inexact, status); - - return a; - } else { // |X| < 2^(-65) - sc = packFloatx80(1, 1, one_sig); - fp0 = a; - - if (aExp < 0x0033) { // |X| < 2^(-16382) - fp0 = floatx80_mul(fp0, float64_to_floatx80(LIT64(0x48B0000000000000), status), status); - fp0 = floatx80_add(fp0, sc, status); - - RESET_PREC; - - a = floatx80_mul(fp0, float64_to_floatx80(LIT64(0x3730000000000000), status), status); - } else { - RESET_PREC; - - a = floatx80_add(fp0, sc, status); - } - - float_raise(float_flag_inexact, status); - - return a; - } - } -} - -/*---------------------------------------------------------------------------- - | Log base 10 - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_log10(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - floatx80 fp0, fp1; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig<<1)) propagateFloatx80NaNOneArg(a, status); - if (aSign == 0) - return packFloatx80(0, 0x7FFF, floatx80_default_infinity_low); - } - - if (aExp == 0 && aSig == 0) { - float_raise(float_flag_divbyzero, status); - return packFloatx80(1, 0x7FFF, floatx80_default_infinity_low); - } - - if (aSign) { - float_raise(float_flag_invalid, status); - a.low = floatx80_default_nan_low; - a.high = floatx80_default_nan_high; - return a; - } - - SET_PREC; - - fp0 = floatx80_logn(a, status); - fp1 = packFloatx80(0, 0x3FFD, LIT64(0xDE5BD8A937287195)); // INV_L10 - - RESET_PREC; - - a = floatx80_mul(fp0, fp1, status); // LOGN(X)*INV_L10 - - float_raise(float_flag_inexact, status); - - return a; -} - -/*---------------------------------------------------------------------------- - | Log base 2 - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_log2(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - floatx80 fp0, fp1; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig<<1)) propagateFloatx80NaNOneArg(a, status); - if (aSign == 0) - return packFloatx80(0, 0x7FFF, floatx80_default_infinity_low); - } - - if (aExp == 0) { - if (aSig == 0) { - float_raise(float_flag_divbyzero, status); - return packFloatx80(1, 0x7FFF, floatx80_default_infinity_low); - } - normalizeFloatx80Subnormal(aSig, &aExp, &aSig); - } - - if (aSign) { - float_raise(float_flag_invalid, status); - a.low = floatx80_default_nan_low; - a.high = floatx80_default_nan_high; - return a; - } - - SET_PREC; - - if (aSig == one_sig) { // X is 2^k - RESET_PREC; - - a = int32_to_floatx80(aExp-0x3FFF); - } else { - fp0 = floatx80_logn(a, status); - fp1 = packFloatx80(0, 0x3FFF, LIT64(0xB8AA3B295C17F0BC)); // INV_L2 - - RESET_PREC; - - a = floatx80_mul(fp0, fp1, status); // LOGN(X)*INV_L2 - } - - float_raise(float_flag_inexact, status); - - return a; -} - -/*---------------------------------------------------------------------------- - | Log base e - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_logn(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig, fSig; - - int32_t compact, j, k, adjk; - floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig<<1)) propagateFloatx80NaNOneArg(a, status); - if (aSign == 0) - return packFloatx80(0, 0x7FFF, floatx80_default_infinity_low); - } - - adjk = 0; - - if (aExp == 0) { - if (aSig == 0) { // zero - float_raise(float_flag_divbyzero, status); - return packFloatx80(1, 0x7FFF, floatx80_default_infinity_low); - } -#if 1 - if ((aSig & one_sig) == 0) { // denormal - normalizeFloatx80Subnormal(aSig, &aExp, &aSig); - adjk = -100; - aExp += 100; - a = packFloatx80(aSign, aExp, aSig); - } -#else - normalizeFloatx80Subnormal(aSig, &aExp, &aSig); -#endif - } - - if (aSign) { - float_raise(float_flag_invalid, status); - a.low = floatx80_default_nan_low; - a.high = floatx80_default_nan_high; - return a; - } - - SET_PREC; - - compact = floatx80_make_compact(aExp, aSig); - - if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { // |X| < 15/16 or |X| > 17/16 - k = aExp - 0x3FFF; - k += adjk; - fp1 = int32_to_floatx80(k); - - fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000); - j = (fSig >> 56) & 0x7E; // DISPLACEMENT FOR 1/F - - f = packFloatx80(0, 0x3FFF, fSig); // F - fp0 = packFloatx80(0, 0x3FFF, aSig); // Y - - fp0 = floatx80_sub(fp0, f, status); // Y-F - - // LP1CONT1 - fp0 = floatx80_mul(fp0, log_tbl[j], status); // FP0 IS U = (Y-F)/F - logof2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); - klog2 = floatx80_mul(fp1, logof2, status); // FP1 IS K*LOG2 - fp2 = floatx80_mul(fp0, fp0, status); // FP2 IS V=U*U - - fp3 = fp2; - fp1 = fp2; - - fp1 = floatx80_mul(fp1, float64_to_floatx80(LIT64(0x3FC2499AB5E4040B), status), status); // V*A6 - fp2 = floatx80_mul(fp2, float64_to_floatx80(LIT64(0xBFC555B5848CB7DB), status), status); // V*A5 - fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3FC99999987D8730), status), status); // A4+V*A6 - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0xBFCFFFFFFF6F7E97), status), status); // A3+V*A5 - fp1 = floatx80_mul(fp1, fp3, status); // V*(A4+V*A6) - fp2 = floatx80_mul(fp2, fp3, status); // V*(A3+V*A5) - fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3FD55555555555A4), status), status); // A2+V*(A4+V*A6) - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0xBFE0000000000008), status), status); // A1+V*(A3+V*A5) - fp1 = floatx80_mul(fp1, fp3, status); // V*(A2+V*(A4+V*A6)) - fp2 = floatx80_mul(fp2, fp3, status); // V*(A1+V*(A3+V*A5)) - fp1 = floatx80_mul(fp1, fp0, status); // U*V*(A2+V*(A4+V*A6)) - fp0 = floatx80_add(fp0, fp2, status); // U+V*(A1+V*(A3+V*A5)) - - fp1 = floatx80_add(fp1, log_tbl[j+1], status); // LOG(F)+U*V*(A2+V*(A4+V*A6)) - fp0 = floatx80_add(fp0, fp1, status); // FP0 IS LOG(F) + LOG(1+U) - - RESET_PREC; - - a = floatx80_add(fp0, klog2, status); - - float_raise(float_flag_inexact, status); - - return a; - } else { // |X-1| >= 1/16 - fp0 = a; - fp1 = a; - fp1 = floatx80_sub(fp1, float32_to_floatx80(0x3F800000, status), status); // FP1 IS X-1 - fp0 = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // FP0 IS X+1 - fp1 = floatx80_add(fp1, fp1, status); // FP1 IS 2(X-1) - - // LP1CONT2 - fp1 = floatx80_div(fp1, fp0, status); // U - saveu = fp1; - fp0 = floatx80_mul(fp1, fp1, status); // FP0 IS V = U*U - fp1 = floatx80_mul(fp0, fp0, status); // FP1 IS W = V*V - - fp3 = float64_to_floatx80(LIT64(0x3F175496ADD7DAD6), status); // B5 - fp2 = float64_to_floatx80(LIT64(0x3F3C71C2FE80C7E0), status); // B4 - fp3 = floatx80_mul(fp3, fp1, status); // W*B5 - fp2 = floatx80_mul(fp2, fp1, status); // W*B4 - fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0x3F624924928BCCFF), status), status); // B3+W*B5 - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3F899999999995EC), status), status); // B2+W*B4 - fp1 = floatx80_mul(fp1, fp3, status); // W*(B3+W*B5) - fp2 = floatx80_mul(fp2, fp0, status); // V*(B2+W*B4) - fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3FB5555555555555), status), status); // B1+W*(B3+W*B5) - - fp0 = floatx80_mul(fp0, saveu, status); // FP0 IS U*V - fp1 = floatx80_add(fp1, fp2, status); // B1+W*(B3+W*B5) + V*(B2+W*B4) - fp0 = floatx80_mul(fp0, fp1, status); // U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] ) - - RESET_PREC; - - a = floatx80_add(fp0, saveu, status); - - float_raise(float_flag_inexact, status); - - return a; - } -} - -/*---------------------------------------------------------------------------- - | Log base e of x plus 1 - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_lognp1(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig, fSig; - - int32_t compact, j, k; - floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig<<1)) propagateFloatx80NaNOneArg(a, status); - if (aSign) { - float_raise(float_flag_invalid, status); - a.low = floatx80_default_nan_low; - a.high = floatx80_default_nan_high; - return a; - } - return packFloatx80(0, 0x7FFF, floatx80_default_infinity_low); - } - - if (aExp == 0 && aSig == 0) { - return packFloatx80(aSign, 0, 0); - } - - if (aSign && aExp >= one_exp) { - if (aExp == one_exp && aSig == one_sig) { - float_raise(float_flag_divbyzero, status); - packFloatx80(aSign, 0x7FFF, floatx80_default_infinity_low); - } - float_raise(float_flag_invalid, status); - a.low = floatx80_default_nan_low; - a.high = floatx80_default_nan_high; - return a; - } - - if (aExp < 0x3f99 || (aExp == 0x3f99 && aSig == one_sig)) { // <= min threshold - float_raise(float_flag_inexact, status); - return floatx80_move(a, status); - } - - SET_PREC; - - compact = floatx80_make_compact(aExp, aSig); - - fp0 = a; // Z - fp1 = a; - - fp0 = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // X = (1+Z) - - aExp = extractFloatx80Exp(fp0); - aSig = extractFloatx80Frac(fp0); - - compact = floatx80_make_compact(aExp, aSig); - - if (compact < 0x3FFE8000 || compact > 0x3FFFC000) { // |X| < 1/2 or |X| > 3/2 - k = aExp - 0x3FFF; - fp1 = int32_to_floatx80(k); - - fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000); - j = (fSig >> 56) & 0x7E; // DISPLACEMENT FOR 1/F - - f = packFloatx80(0, 0x3FFF, fSig); // F - fp0 = packFloatx80(0, 0x3FFF, aSig); // Y - - fp0 = floatx80_sub(fp0, f, status); // Y-F - - lp1cont1: - // LP1CONT1 - fp0 = floatx80_mul(fp0, log_tbl[j], status); // FP0 IS U = (Y-F)/F - logof2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); - klog2 = floatx80_mul(fp1, logof2, status); // FP1 IS K*LOG2 - fp2 = floatx80_mul(fp0, fp0, status); // FP2 IS V=U*U - - fp3 = fp2; - fp1 = fp2; - - fp1 = floatx80_mul(fp1, float64_to_floatx80(LIT64(0x3FC2499AB5E4040B), status), status); // V*A6 - fp2 = floatx80_mul(fp2, float64_to_floatx80(LIT64(0xBFC555B5848CB7DB), status), status); // V*A5 - fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3FC99999987D8730), status), status); // A4+V*A6 - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0xBFCFFFFFFF6F7E97), status), status); // A3+V*A5 - fp1 = floatx80_mul(fp1, fp3, status); // V*(A4+V*A6) - fp2 = floatx80_mul(fp2, fp3, status); // V*(A3+V*A5) - fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3FD55555555555A4), status), status); // A2+V*(A4+V*A6) - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0xBFE0000000000008), status), status); // A1+V*(A3+V*A5) - fp1 = floatx80_mul(fp1, fp3, status); // V*(A2+V*(A4+V*A6)) - fp2 = floatx80_mul(fp2, fp3, status); // V*(A1+V*(A3+V*A5)) - fp1 = floatx80_mul(fp1, fp0, status); // U*V*(A2+V*(A4+V*A6)) - fp0 = floatx80_add(fp0, fp2, status); // U+V*(A1+V*(A3+V*A5)) - - fp1 = floatx80_add(fp1, log_tbl[j+1], status); // LOG(F)+U*V*(A2+V*(A4+V*A6)) - fp0 = floatx80_add(fp0, fp1, status); // FP0 IS LOG(F) + LOG(1+U) - - RESET_PREC; - - a = floatx80_add(fp0, klog2, status); - - float_raise(float_flag_inexact, status); - - return a; - } else if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { // |X| < 1/16 or |X| > -1/16 - // LP1CARE - fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000); - f = packFloatx80(0, 0x3FFF, fSig); // F - j = (fSig >> 56) & 0x7E; // DISPLACEMENT FOR 1/F - - if (compact >= 0x3FFF8000) { // 1+Z >= 1 - // KISZERO - fp0 = floatx80_sub(float32_to_floatx80(0x3F800000, status), f, status); // 1-F - fp0 = floatx80_add(fp0, fp1, status); // FP0 IS Y-F = (1-F)+Z - fp1 = packFloatx80(0, 0, 0); // K = 0 - } else { - // KISNEG - fp0 = floatx80_sub(float32_to_floatx80(0x40000000, status), f, status); // 2-F - fp1 = floatx80_add(fp1, fp1, status); // 2Z - fp0 = floatx80_add(fp0, fp1, status); // FP0 IS Y-F = (2-F)+2Z - fp1 = packFloatx80(1, one_exp, one_sig); // K = -1 - } - goto lp1cont1; - } else { - // LP1ONE16 - fp1 = floatx80_add(fp1, fp1, status); // FP1 IS 2Z - fp0 = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // FP0 IS 1+X - - // LP1CONT2 - fp1 = floatx80_div(fp1, fp0, status); // U - saveu = fp1; - fp0 = floatx80_mul(fp1, fp1, status); // FP0 IS V = U*U - fp1 = floatx80_mul(fp0, fp0, status); // FP1 IS W = V*V - - fp3 = float64_to_floatx80(LIT64(0x3F175496ADD7DAD6), status); // B5 - fp2 = float64_to_floatx80(LIT64(0x3F3C71C2FE80C7E0), status); // B4 - fp3 = floatx80_mul(fp3, fp1, status); // W*B5 - fp2 = floatx80_mul(fp2, fp1, status); // W*B4 - fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0x3F624924928BCCFF), status), status); // B3+W*B5 - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3F899999999995EC), status), status); // B2+W*B4 - fp1 = floatx80_mul(fp1, fp3, status); // W*(B3+W*B5) - fp2 = floatx80_mul(fp2, fp0, status); // V*(B2+W*B4) - fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3FB5555555555555), status), status); // B1+W*(B3+W*B5) - - fp0 = floatx80_mul(fp0, saveu, status); // FP0 IS U*V - fp1 = floatx80_add(fp1, fp2, status); // B1+W*(B3+W*B5) + V*(B2+W*B4) - fp0 = floatx80_mul(fp0, fp1, status); // U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] ) - - RESET_PREC; - - a = floatx80_add(fp0, saveu, status); - - float_raise(float_flag_inexact, status); - - return a; - } -} - -/*---------------------------------------------------------------------------- - | Sine - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_sin(floatx80 a, float_status *status) -{ - flag aSign, xSign; - int32_t aExp, xExp; - uint64_t aSig, xSig; - - int32_t compact, l, n, j; - floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; - float32 posneg1, twoto63; - flag adjn, endflag; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); - float_raise(float_flag_invalid, status); - a.low = floatx80_default_nan_low; - a.high = floatx80_default_nan_high; - return a; - } - - if (aExp == 0 && aSig == 0) { - return packFloatx80(aSign, 0, 0); - } - - adjn = 0; - - SET_PREC; - - compact = floatx80_make_compact(aExp, aSig); - - fp0 = a; - - if (compact < 0x3FD78000 || compact > 0x4004BC7E) { // 2^(-40) > |X| > 15 PI - if (compact > 0x3FFF8000) { // |X| >= 15 PI - // REDUCEX - fp1 = packFloatx80(0, 0, 0); - if (compact == 0x7FFEFFFF) { - twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, LIT64(0xC90FDAA200000000)); - twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, LIT64(0x85A308D300000000)); - fp0 = floatx80_add(fp0, twopi1, status); - fp1 = fp0; - fp0 = floatx80_add(fp0, twopi2, status); - fp1 = floatx80_sub(fp1, fp0, status); - fp1 = floatx80_add(fp1, twopi2, status); - } - loop: - xSign = extractFloatx80Sign(fp0); - xExp = extractFloatx80Exp(fp0); - xExp -= 0x3FFF; - if (xExp <= 28) { - l = 0; - endflag = 1; - } else { - l = xExp - 27; - endflag = 0; - } - invtwopi = packFloatx80(0, 0x3FFE - l, LIT64(0xA2F9836E4E44152A)); // INVTWOPI - twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000)); - twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000)); - - twoto63 = 0x5F000000; - twoto63 |= xSign ? 0x80000000 : 0x00000000; // SIGN(INARG)*2^63 IN SGL - - fp2 = floatx80_mul(fp0, invtwopi, status); - fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), status); // THE FRACTIONAL PART OF FP2 IS ROUNDED - fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), status); // FP2 is N - fp4 = floatx80_mul(twopi1, fp2, status); // W = N*P1 - fp5 = floatx80_mul(twopi2, fp2, status); // w = N*P2 - fp3 = floatx80_add(fp4, fp5, status); // FP3 is P - fp4 = floatx80_sub(fp4, fp3, status); // W-P - fp0 = floatx80_sub(fp0, fp3, status); // FP0 is A := R - P - fp4 = floatx80_add(fp4, fp5, status); // FP4 is p = (W-P)+w - fp3 = fp0; // FP3 is A - fp1 = floatx80_sub(fp1, fp4, status); // FP1 is a := r - p - fp0 = floatx80_add(fp0, fp1, status); // FP0 is R := A+a - - if (endflag > 0) { - n = floatx80_to_int32(fp2, status); - goto sincont; - } - fp3 = floatx80_sub(fp3, fp0, status); // A-R - fp1 = floatx80_add(fp1, fp3, status); // FP1 is r := (A-R)+a - goto loop; - } else { - // SINSM - fp0 = float32_to_floatx80(0x3F800000, status); // 1 - - RESET_PREC; - - if (adjn) { - // COSTINY - a = floatx80_sub(fp0, float32_to_floatx80(0x00800000, status), status); - } else { - // SINTINY - a = floatx80_move(a, status); - } - float_raise(float_flag_inexact, status); - - return a; - } - } else { - fp1 = floatx80_mul(fp0, float64_to_floatx80(LIT64(0x3FE45F306DC9C883), status), status); // X*2/PI - - n = floatx80_to_int32(fp1, status); - j = 32 + n; - - fp0 = floatx80_sub(fp0, pi_tbl[j], status); // X-Y1 - fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), status); // FP0 IS R = (X-Y1)-Y2 - - sincont: - if ((n + adjn) & 1) { - // COSPOLY - fp0 = floatx80_mul(fp0, fp0, status); // FP0 IS S - fp1 = floatx80_mul(fp0, fp0, status); // FP1 IS T - fp2 = float64_to_floatx80(LIT64(0x3D2AC4D0D6011EE3), status); // B8 - fp3 = float64_to_floatx80(LIT64(0xBDA9396F9F45AC19), status); // B7 - - xSign = extractFloatx80Sign(fp0); // X IS S - xExp = extractFloatx80Exp(fp0); - xSig = extractFloatx80Frac(fp0); - - if (((n + adjn) >> 1) & 1) { - xSign ^= 1; - posneg1 = 0xBF800000; // -1 - } else { - xSign ^= 0; - posneg1 = 0x3F800000; // 1 - } // X IS NOW R'= SGN*R - - fp2 = floatx80_mul(fp2, fp1, status); // TB8 - fp3 = floatx80_mul(fp3, fp1, status); // TB7 - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3E21EED90612C972), status), status); // B6+TB8 - fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBE927E4FB79D9FCF), status), status); // B5+TB7 - fp2 = floatx80_mul(fp2, fp1, status); // T(B6+TB8) - fp3 = floatx80_mul(fp3, fp1, status); // T(B5+TB7) - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3EFA01A01A01D423), status), status); // B4+T(B6+TB8) - fp4 = packFloatx80(1, 0x3FF5, LIT64(0xB60B60B60B61D438)); - fp3 = floatx80_add(fp3, fp4, status); // B3+T(B5+TB7) - fp2 = floatx80_mul(fp2, fp1, status); // T(B4+T(B6+TB8)) - fp1 = floatx80_mul(fp1, fp3, status); // T(B3+T(B5+TB7)) - fp4 = packFloatx80(0, 0x3FFA, LIT64(0xAAAAAAAAAAAAAB5E)); - fp2 = floatx80_add(fp2, fp4, status); // B2+T(B4+T(B6+TB8)) - fp1 = floatx80_add(fp1, float32_to_floatx80(0xBF000000, status), status); // B1+T(B3+T(B5+TB7)) - fp0 = floatx80_mul(fp0, fp2, status); // S(B2+T(B4+T(B6+TB8))) - fp0 = floatx80_add(fp0, fp1, status); // [B1+T(B3+T(B5+TB7))]+[S(B2+T(B4+T(B6+TB8)))] - - x = packFloatx80(xSign, xExp, xSig); - fp0 = floatx80_mul(fp0, x, status); - - RESET_PREC; - - a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status); - - float_raise(float_flag_inexact, status); - - return a; - } else { - // SINPOLY - xSign = extractFloatx80Sign(fp0); // X IS R - xExp = extractFloatx80Exp(fp0); - xSig = extractFloatx80Frac(fp0); - - xSign ^= ((n + adjn) >> 1) & 1; // X IS NOW R'= SGN*R - - fp0 = floatx80_mul(fp0, fp0, status); // FP0 IS S - fp1 = floatx80_mul(fp0, fp0, status); // FP1 IS T - fp3 = float64_to_floatx80(LIT64(0xBD6AAA77CCC994F5), status); // A7 - fp2 = float64_to_floatx80(LIT64(0x3DE612097AAE8DA1), status); // A6 - fp3 = floatx80_mul(fp3, fp1, status); // T*A7 - fp2 = floatx80_mul(fp2, fp1, status); // T*A6 - fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBE5AE6452A118AE4), status), status); // A5+T*A7 - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3EC71DE3A5341531), status), status); // A4+T*A6 - fp3 = floatx80_mul(fp3, fp1, status); // T(A5+TA7) - fp2 = floatx80_mul(fp2, fp1, status); // T(A4+TA6) - fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBF2A01A01A018B59), status), status); // A3+T(A5+TA7) - fp4 = packFloatx80(0, 0x3FF8, LIT64(0x88888888888859AF)); - fp2 = floatx80_add(fp2, fp4, status); // A2+T(A4+TA6) - fp1 = floatx80_mul(fp1, fp3, status); // T(A3+T(A5+TA7)) - fp2 = floatx80_mul(fp2, fp0, status); // S(A2+T(A4+TA6)) - fp4 = packFloatx80(1, 0x3FFC, LIT64(0xAAAAAAAAAAAAAA99)); - fp1 = floatx80_add(fp1, fp4, status); // A1+T(A3+T(A5+TA7)) - fp1 = floatx80_add(fp1, fp2, status); // [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] - - x = packFloatx80(xSign, xExp, xSig); - fp0 = floatx80_mul(fp0, x, status); // R'*S - fp0 = floatx80_mul(fp0, fp1, status); // SIN(R')-R' - - RESET_PREC; - - a = floatx80_add(fp0, x, status); - - float_raise(float_flag_inexact, status); - - return a; - } - } -} - -/*---------------------------------------------------------------------------- - | Hyperbolic sine - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_sinh(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - int32_t compact; - floatx80 fp0, fp1, fp2; - float32 fact; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); - return packFloatx80(aSign, 0x7FFF, floatx80_default_infinity_low); - } - - if (aExp == 0 && aSig == 0) { - return packFloatx80(aSign, 0, 0); - } - - SET_PREC; - - compact = floatx80_make_compact(aExp, aSig); - - if (compact > 0x400CB167) { - // SINHBIG - if (compact > 0x400CB2B3) { - RESET_PREC; - - return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, 0x8000, aSig, 0, status); - } else { - fp0 = floatx80_abs(a, status); // Y = |X| - fp0 = floatx80_sub(fp0, float64_to_floatx80(LIT64(0x40C62D38D3D64634), status), status); // (|X|-16381LOG2_LEAD) - fp0 = floatx80_sub(fp0, float64_to_floatx80(LIT64(0x3D6F90AEB1E75CC7), status), status); // |X| - 16381 LOG2, ACCURATE - fp0 = floatx80_etox(fp0, status); - fp2 = packFloatx80(aSign, 0x7FFB, one_sig); - - RESET_PREC; - - a = floatx80_mul(fp0, fp2, status); - - float_raise(float_flag_inexact, status); - - return a; - } - } else { // |X| < 16380 LOG2 - fp0 = floatx80_abs(a, status); // Y = |X| - fp0 = floatx80_etoxm1(fp0, status); // FP0 IS Z = EXPM1(Y) - fp1 = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // 1+Z - fp2 = fp0; - fp0 = floatx80_div(fp0, fp1, status); // Z/(1+Z) - fp0 = floatx80_add(fp0, fp2, status); - - fact = 0x3F000000; - fact |= aSign ? 0x80000000 : 0x00000000; - - RESET_PREC; - - a = floatx80_mul(fp0, float32_to_floatx80(fact, status), status); - - float_raise(float_flag_inexact, status); - - return a; - } -} - -/*---------------------------------------------------------------------------- - | Tangent - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_tan(floatx80 a, float_status *status) -{ - flag aSign, xSign; - int32_t aExp, xExp; - uint64_t aSig, xSig; - - int32_t compact, l, n, j; - floatx80 fp0, fp1, fp2, fp3, fp4, fp5, invtwopi, twopi1, twopi2; - float32 twoto63; - flag endflag; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); - float_raise(float_flag_invalid, status); - a.low = floatx80_default_nan_low; - a.high = floatx80_default_nan_high; - return a; - } - - if (aExp == 0 && aSig == 0) { - return packFloatx80(aSign, 0, 0); - } - - SET_PREC; - - compact = floatx80_make_compact(aExp, aSig); - - fp0 = a; - - if (compact < 0x3FD78000 || compact > 0x4004BC7E) { // 2^(-40) > |X| > 15 PI - if (compact > 0x3FFF8000) { // |X| >= 15 PI - // REDUCEX - fp1 = packFloatx80(0, 0, 0); - if (compact == 0x7FFEFFFF) { - twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, LIT64(0xC90FDAA200000000)); - twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, LIT64(0x85A308D300000000)); - fp0 = floatx80_add(fp0, twopi1, status); - fp1 = fp0; - fp0 = floatx80_add(fp0, twopi2, status); - fp1 = floatx80_sub(fp1, fp0, status); - fp1 = floatx80_add(fp1, twopi2, status); - } - loop: - xSign = extractFloatx80Sign(fp0); - xExp = extractFloatx80Exp(fp0); - xExp -= 0x3FFF; - if (xExp <= 28) { - l = 0; - endflag = 1; - } else { - l = xExp - 27; - endflag = 0; - } - invtwopi = packFloatx80(0, 0x3FFE - l, LIT64(0xA2F9836E4E44152A)); // INVTWOPI - twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000)); - twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000)); - - twoto63 = 0x5F000000; - twoto63 |= xSign ? 0x80000000 : 0x00000000; // SIGN(INARG)*2^63 IN SGL - - fp2 = floatx80_mul(fp0, invtwopi, status); - fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), status); // THE FRACTIONAL PART OF FP2 IS ROUNDED - fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), status); // FP2 is N - fp4 = floatx80_mul(twopi1, fp2, status); // W = N*P1 - fp5 = floatx80_mul(twopi2, fp2, status); // w = N*P2 - fp3 = floatx80_add(fp4, fp5, status); // FP3 is P - fp4 = floatx80_sub(fp4, fp3, status); // W-P - fp0 = floatx80_sub(fp0, fp3, status); // FP0 is A := R - P - fp4 = floatx80_add(fp4, fp5, status); // FP4 is p = (W-P)+w - fp3 = fp0; // FP3 is A - fp1 = floatx80_sub(fp1, fp4, status); // FP1 is a := r - p - fp0 = floatx80_add(fp0, fp1, status); // FP0 is R := A+a - - if (endflag > 0) { - n = floatx80_to_int32(fp2, status); - goto tancont; - } - fp3 = floatx80_sub(fp3, fp0, status); // A-R - fp1 = floatx80_add(fp1, fp3, status); // FP1 is r := (A-R)+a - goto loop; - } else { - RESET_PREC; - - a = floatx80_move(a, status); - - float_raise(float_flag_inexact, status); - - return a; - } - } else { - fp1 = floatx80_mul(fp0, float64_to_floatx80(LIT64(0x3FE45F306DC9C883), status), status); // X*2/PI - - n = floatx80_to_int32(fp1, status); - j = 32 + n; - - fp0 = floatx80_sub(fp0, pi_tbl[j], status); // X-Y1 - fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), status); // FP0 IS R = (X-Y1)-Y2 - - tancont: - if (n & 1) { - // NODD - fp1 = fp0; // R - fp0 = floatx80_mul(fp0, fp0, status); // S = R*R - fp3 = float64_to_floatx80(LIT64(0x3EA0B759F50F8688), status); // Q4 - fp2 = float64_to_floatx80(LIT64(0xBEF2BAA5A8924F04), status); // P3 - fp3 = floatx80_mul(fp3, fp0, status); // SQ4 - fp2 = floatx80_mul(fp2, fp0, status); // SP3 - fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBF346F59B39BA65F), status), status); // Q3+SQ4 - fp4 = packFloatx80(0, 0x3FF6, LIT64(0xE073D3FC199C4A00)); - fp2 = floatx80_add(fp2, fp4, status); // P2+SP3 - fp3 = floatx80_mul(fp3, fp0, status); // S(Q3+SQ4) - fp2 = floatx80_mul(fp2, fp0, status); // S(P2+SP3) - fp4 = packFloatx80(0, 0x3FF9, LIT64(0xD23CD68415D95FA1)); - fp3 = floatx80_add(fp3, fp4, status); // Q2+S(Q3+SQ4) - fp4 = packFloatx80(1, 0x3FFC, LIT64(0x8895A6C5FB423BCA)); - fp2 = floatx80_add(fp2, fp4, status); // P1+S(P2+SP3) - fp3 = floatx80_mul(fp3, fp0, status); // S(Q2+S(Q3+SQ4)) - fp2 = floatx80_mul(fp2, fp0, status); // S(P1+S(P2+SP3)) - fp4 = packFloatx80(1, 0x3FFD, LIT64(0xEEF57E0DA84BC8CE)); - fp3 = floatx80_add(fp3, fp4, status); // Q1+S(Q2+S(Q3+SQ4)) - fp2 = floatx80_mul(fp2, fp1, status); // RS(P1+S(P2+SP3)) - fp0 = floatx80_mul(fp0, fp3, status); // S(Q1+S(Q2+S(Q3+SQ4))) - fp1 = floatx80_add(fp1, fp2, status); // R+RS(P1+S(P2+SP3)) - fp0 = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // 1+S(Q1+S(Q2+S(Q3+SQ4))) - - xSign = extractFloatx80Sign(fp1); - xExp = extractFloatx80Exp(fp1); - xSig = extractFloatx80Frac(fp1); - xSign ^= 1; - fp1 = packFloatx80(xSign, xExp, xSig); - - RESET_PREC; - - a = floatx80_div(fp0, fp1, status); - - float_raise(float_flag_inexact, status); - - return a; - } else { - fp1 = floatx80_mul(fp0, fp0, status); // S = R*R - fp3 = float64_to_floatx80(LIT64(0x3EA0B759F50F8688), status); // Q4 - fp2 = float64_to_floatx80(LIT64(0xBEF2BAA5A8924F04), status); // P3 - fp3 = floatx80_mul(fp3, fp1, status); // SQ4 - fp2 = floatx80_mul(fp2, fp1, status); // SP3 - fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBF346F59B39BA65F), status), status); // Q3+SQ4 - fp4 = packFloatx80(0, 0x3FF6, LIT64(0xE073D3FC199C4A00)); - fp2 = floatx80_add(fp2, fp4, status); // P2+SP3 - fp3 = floatx80_mul(fp3, fp1, status); // S(Q3+SQ4) - fp2 = floatx80_mul(fp2, fp1, status); // S(P2+SP3) - fp4 = packFloatx80(0, 0x3FF9, LIT64(0xD23CD68415D95FA1)); - fp3 = floatx80_add(fp3, fp4, status); // Q2+S(Q3+SQ4) - fp4 = packFloatx80(1, 0x3FFC, LIT64(0x8895A6C5FB423BCA)); - fp2 = floatx80_add(fp2, fp4, status); // P1+S(P2+SP3) - fp3 = floatx80_mul(fp3, fp1, status); // S(Q2+S(Q3+SQ4)) - fp2 = floatx80_mul(fp2, fp1, status); // S(P1+S(P2+SP3)) - fp4 = packFloatx80(1, 0x3FFD, LIT64(0xEEF57E0DA84BC8CE)); - fp3 = floatx80_add(fp3, fp4, status); // Q1+S(Q2+S(Q3+SQ4)) - fp2 = floatx80_mul(fp2, fp0, status); // RS(P1+S(P2+SP3)) - fp1 = floatx80_mul(fp1, fp3, status); // S(Q1+S(Q2+S(Q3+SQ4))) - fp0 = floatx80_add(fp0, fp2, status); // R+RS(P1+S(P2+SP3)) - fp1 = floatx80_add(fp1, float32_to_floatx80(0x3F800000, status), status); // 1+S(Q1+S(Q2+S(Q3+SQ4))) - - RESET_PREC; - - a = floatx80_div(fp0, fp1, status); - - float_raise(float_flag_inexact, status); - - return a; - } - } -} - -/*---------------------------------------------------------------------------- - | Hyperbolic tangent - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_tanh(floatx80 a, float_status *status) -{ - flag aSign, vSign; - int32_t aExp, vExp; - uint64_t aSig, vSig; - - int32_t compact; - floatx80 fp0, fp1; - float32 sign; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); - return packFloatx80(aSign, one_exp, one_sig); - } - - if (aExp == 0 && aSig == 0) { - return packFloatx80(aSign, 0, 0); - } - - SET_PREC; - - compact = floatx80_make_compact(aExp, aSig); - - if (compact < 0x3FD78000 || compact > 0x3FFFDDCE) { - // TANHBORS - if (compact < 0x3FFF8000) { - // TANHSM - RESET_PREC; - - a = floatx80_move(a, status); - - float_raise(float_flag_inexact, status); - - return a; - } else { - if (compact > 0x40048AA1) { - // TANHHUGE - sign = 0x3F800000; - sign |= aSign ? 0x80000000 : 0x00000000; - fp0 = float32_to_floatx80(sign, status); - sign &= 0x80000000; - sign ^= 0x80800000; // -SIGN(X)*EPS - - RESET_PREC; - - a = floatx80_add(fp0, float32_to_floatx80(sign, status), status); - - float_raise(float_flag_inexact, status); - - return a; - } else { - fp0 = packFloatx80(0, aExp+1, aSig); // Y = 2|X| - fp0 = floatx80_etox(fp0, status); // FP0 IS EXP(Y) - fp0 = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // EXP(Y)+1 - sign = aSign ? 0x80000000 : 0x00000000; - fp1 = floatx80_div(float32_to_floatx80(sign^0xC0000000, status), fp0, status); // -SIGN(X)*2 / [EXP(Y)+1] - fp0 = float32_to_floatx80(sign | 0x3F800000, status); // SIGN - - RESET_PREC; - - a = floatx80_add(fp1, fp0, status); - - float_raise(float_flag_inexact, status); - - return a; - } - } - } else { // 2**(-40) < |X| < (5/2)LOG2 - fp0 = packFloatx80(0, aExp+1, aSig); // Y = 2|X| - fp0 = floatx80_etoxm1(fp0, status); // FP0 IS Z = EXPM1(Y) - fp1 = floatx80_add(fp0, float32_to_floatx80(0x40000000, status), status); // Z+2 - - vSign = extractFloatx80Sign(fp1); - vExp = extractFloatx80Exp(fp1); - vSig = extractFloatx80Frac(fp1); - - fp1 = packFloatx80(vSign ^ aSign, vExp, vSig); - - RESET_PREC; - - a = floatx80_div(fp0, fp1, status); - - float_raise(float_flag_inexact, status); - - return a; - } -} - -/*---------------------------------------------------------------------------- - | 10 to x - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_tentox(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - int32_t compact, n, j, l, m, m1; - floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); - if (aSign) return packFloatx80(0, 0, 0); - return packFloatx80(0, 0x7FFF, floatx80_default_infinity_low); - } - - if (aExp == 0 && aSig == 0) { - return packFloatx80(0, one_exp, one_sig); - } - - SET_PREC; - - fp0 = a; - - compact = floatx80_make_compact(aExp, aSig); - - if (compact < 0x3FB98000 || compact > 0x400B9B07) { // |X| > 16480 LOG2/LOG10 or |X| < 2^(-70) - if (compact > 0x3FFF8000) { // |X| > 16480 - RESET_PREC; - - if (aSign) { - return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, -0x1000, aSig, 0, status); - } else { - return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, 0x8000, aSig, 0, status); - } - } else { // |X| < 2^(-70) - RESET_PREC; - - a = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // 1 + X - - float_raise(float_flag_inexact, status); - - return a; - } - } else { // 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 - fp1 = fp0; // X - fp1 = floatx80_mul(fp1, float64_to_floatx80(LIT64(0x406A934F0979A371), status), status); // X*64*LOG10/LOG2 - n = floatx80_to_int32(fp1, status); // N=INT(X*64*LOG10/LOG2) - fp1 = int32_to_floatx80(n); - - j = n & 0x3F; - l = n / 64; // NOTE: this is really arithmetic right shift by 6 - if (n < 0 && j) { // arithmetic right shift is division and round towards minus infinity - l--; - } - m = l / 2; // NOTE: this is really arithmetic right shift by 1 - if (l < 0 && (l & 1)) { // arithmetic right shift is division and round towards minus infinity - m--; - } - m1 = l - m; - m1 += 0x3FFF; // ADJFACT IS 2^(M') - - adjfact = packFloatx80(0, m1, one_sig); - fact1 = exp2_tbl[j]; - fact1.high += m; - fact2.high = exp2_tbl2[j]>>16; - fact2.high += m; - fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF); - fact2.low <<= 48; - - fp2 = fp1; // N - fp1 = floatx80_mul(fp1, float64_to_floatx80(LIT64(0x3F734413509F8000), status), status); // N*(LOG2/64LOG10)_LEAD - fp3 = packFloatx80(1, 0x3FCD, LIT64(0xC0219DC1DA994FD2)); - fp2 = floatx80_mul(fp2, fp3, status); // N*(LOG2/64LOG10)_TRAIL - fp0 = floatx80_sub(fp0, fp1, status); // X - N L_LEAD - fp0 = floatx80_sub(fp0, fp2, status); // X - N L_TRAIL - fp2 = packFloatx80(0, 0x4000, LIT64(0x935D8DDDAAA8AC17)); // LOG10 - fp0 = floatx80_mul(fp0, fp2, status); // R - - // EXPR - fp1 = floatx80_mul(fp0, fp0, status); // S = R*R - fp2 = float64_to_floatx80(LIT64(0x3F56C16D6F7BD0B2), status); // A5 - fp3 = float64_to_floatx80(LIT64(0x3F811112302C712C), status); // A4 - fp2 = floatx80_mul(fp2, fp1, status); // S*A5 - fp3 = floatx80_mul(fp3, fp1, status); // S*A4 - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FA5555555554CC1), status), status); // A3+S*A5 - fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0x3FC5555555554A54), status), status); // A2+S*A4 - fp2 = floatx80_mul(fp2, fp1, status); // S*(A3+S*A5) - fp3 = floatx80_mul(fp3, fp1, status); // S*(A2+S*A4) - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FE0000000000000), status), status); // A1+S*(A3+S*A5) - fp3 = floatx80_mul(fp3, fp0, status); // R*S*(A2+S*A4) - - fp2 = floatx80_mul(fp2, fp1, status); // S*(A1+S*(A3+S*A5)) - fp0 = floatx80_add(fp0, fp3, status); // R+R*S*(A2+S*A4) - fp0 = floatx80_add(fp0, fp2, status); // EXP(R) - 1 - - fp0 = floatx80_mul(fp0, fact1, status); - fp0 = floatx80_add(fp0, fact2, status); - fp0 = floatx80_add(fp0, fact1, status); - - RESET_PREC; - - a = floatx80_mul(fp0, adjfact, status); - - float_raise(float_flag_inexact, status); - - return a; - } -} - -/*---------------------------------------------------------------------------- - | 2 to x - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_twotox(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - int32_t compact, n, j, l, m, m1; - floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); - if (aSign) return packFloatx80(0, 0, 0); - return packFloatx80(0, 0x7FFF, floatx80_default_infinity_low); - } - - if (aExp == 0 && aSig == 0) { - return packFloatx80(0, one_exp, one_sig); - } - - SET_PREC; - - fp0 = a; - - compact = floatx80_make_compact(aExp, aSig); - - if (compact < 0x3FB98000 || compact > 0x400D80C0) { // |X| > 16480 or |X| < 2^(-70) - if (compact > 0x3FFF8000) { // |X| > 16480 - RESET_PREC;; - - if (aSign) { - return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, -0x1000, aSig, 0, status); - } else { - return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, 0x8000, aSig, 0, status); - } - } else { // |X| < 2^(-70) - RESET_PREC;; - - a = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // 1 + X - - float_raise(float_flag_inexact, status); - - return a; - } - } else { // 2^(-70) <= |X| <= 16480 - fp1 = fp0; // X - fp1 = floatx80_mul(fp1, float32_to_floatx80(0x42800000, status), status); // X * 64 - n = floatx80_to_int32(fp1, status); - fp1 = int32_to_floatx80(n); - j = n & 0x3F; - l = n / 64; // NOTE: this is really arithmetic right shift by 6 - if (n < 0 && j) { // arithmetic right shift is division and round towards minus infinity - l--; - } - m = l / 2; // NOTE: this is really arithmetic right shift by 1 - if (l < 0 && (l & 1)) { // arithmetic right shift is division and round towards minus infinity - m--; - } - m1 = l - m; - m1 += 0x3FFF; // ADJFACT IS 2^(M') - - adjfact = packFloatx80(0, m1, one_sig); - fact1 = exp2_tbl[j]; - fact1.high += m; - fact2.high = exp2_tbl2[j]>>16; - fact2.high += m; - fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF); - fact2.low <<= 48; - - fp1 = floatx80_mul(fp1, float32_to_floatx80(0x3C800000, status), status); // (1/64)*N - fp0 = floatx80_sub(fp0, fp1, status); // X - (1/64)*INT(64 X) - fp2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); // LOG2 - fp0 = floatx80_mul(fp0, fp2, status); // R - - // EXPR - fp1 = floatx80_mul(fp0, fp0, status); // S = R*R - fp2 = float64_to_floatx80(LIT64(0x3F56C16D6F7BD0B2), status); // A5 - fp3 = float64_to_floatx80(LIT64(0x3F811112302C712C), status); // A4 - fp2 = floatx80_mul(fp2, fp1, status); // S*A5 - fp3 = floatx80_mul(fp3, fp1, status); // S*A4 - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FA5555555554CC1), status), status); // A3+S*A5 - fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0x3FC5555555554A54), status), status); // A2+S*A4 - fp2 = floatx80_mul(fp2, fp1, status); // S*(A3+S*A5) - fp3 = floatx80_mul(fp3, fp1, status); // S*(A2+S*A4) - fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FE0000000000000), status), status); // A1+S*(A3+S*A5) - fp3 = floatx80_mul(fp3, fp0, status); // R*S*(A2+S*A4) - - fp2 = floatx80_mul(fp2, fp1, status); // S*(A1+S*(A3+S*A5)) - fp0 = floatx80_add(fp0, fp3, status); // R+R*S*(A2+S*A4) - fp0 = floatx80_add(fp0, fp2, status); // EXP(R) - 1 - - fp0 = floatx80_mul(fp0, fact1, status); - fp0 = floatx80_add(fp0, fact2, status); - fp0 = floatx80_add(fp0, fact1, status); - - RESET_PREC; - - a = floatx80_mul(fp0, adjfact, status); - - float_raise(float_flag_inexact, status); - - return a; - } -} diff --git a/src/softfloat/softfloat_fpsp_tables.h b/src/softfloat/softfloat_fpsp_tables.h deleted file mode 100644 index b2b29527..00000000 --- a/src/softfloat/softfloat_fpsp_tables.h +++ /dev/null @@ -1,528 +0,0 @@ - -static const floatx80 atan_tbl[128] = { - {0x3FFB, LIT64(0x83D152C5060B7A51)}, - {0x3FFB, LIT64(0x8BC8544565498B8B)}, - {0x3FFB, LIT64(0x93BE406017626B0D)}, - {0x3FFB, LIT64(0x9BB3078D35AEC202)}, - {0x3FFB, LIT64(0xA3A69A525DDCE7DE)}, - {0x3FFB, LIT64(0xAB98E94362765619)}, - {0x3FFB, LIT64(0xB389E502F9C59862)}, - {0x3FFB, LIT64(0xBB797E436B09E6FB)}, - {0x3FFB, LIT64(0xC367A5C739E5F446)}, - {0x3FFB, LIT64(0xCB544C61CFF7D5C6)}, - {0x3FFB, LIT64(0xD33F62F82488533E)}, - {0x3FFB, LIT64(0xDB28DA8162404C77)}, - {0x3FFB, LIT64(0xE310A4078AD34F18)}, - {0x3FFB, LIT64(0xEAF6B0A8188EE1EB)}, - {0x3FFB, LIT64(0xF2DAF1949DBE79D5)}, - {0x3FFB, LIT64(0xFABD581361D47E3E)}, - {0x3FFC, LIT64(0x8346AC210959ECC4)}, - {0x3FFC, LIT64(0x8B232A08304282D8)}, - {0x3FFC, LIT64(0x92FB70B8D29AE2F9)}, - {0x3FFC, LIT64(0x9ACF476F5CCD1CB4)}, - {0x3FFC, LIT64(0xA29E76304954F23F)}, - {0x3FFC, LIT64(0xAA68C5D08AB85230)}, - {0x3FFC, LIT64(0xB22DFFFD9D539F83)}, - {0x3FFC, LIT64(0xB9EDEF453E900EA5)}, - {0x3FFC, LIT64(0xC1A85F1CC75E3EA5)}, - {0x3FFC, LIT64(0xC95D1BE828138DE6)}, - {0x3FFC, LIT64(0xD10BF300840D2DE4)}, - {0x3FFC, LIT64(0xD8B4B2BA6BC05E7A)}, - {0x3FFC, LIT64(0xE0572A6BB42335F6)}, - {0x3FFC, LIT64(0xE7F32A70EA9CAA8F)}, - {0x3FFC, LIT64(0xEF88843264ECEFAA)}, - {0x3FFC, LIT64(0xF7170A28ECC06666)}, - {0x3FFD, LIT64(0x812FD288332DAD32)}, - {0x3FFD, LIT64(0x88A8D1B1218E4D64)}, - {0x3FFD, LIT64(0x9012AB3F23E4AEE8)}, - {0x3FFD, LIT64(0x976CC3D411E7F1B9)}, - {0x3FFD, LIT64(0x9EB689493889A227)}, - {0x3FFD, LIT64(0xA5EF72C34487361B)}, - {0x3FFD, LIT64(0xAD1700BAF07A7227)}, - {0x3FFD, LIT64(0xB42CBCFAFD37EFB7)}, - {0x3FFD, LIT64(0xBB303A940BA80F89)}, - {0x3FFD, LIT64(0xC22115C6FCAEBBAF)}, - {0x3FFD, LIT64(0xC8FEF3E686331221)}, - {0x3FFD, LIT64(0xCFC98330B4000C70)}, - {0x3FFD, LIT64(0xD6807AA1102C5BF9)}, - {0x3FFD, LIT64(0xDD2399BC31252AA3)}, - {0x3FFD, LIT64(0xE3B2A8556B8FC517)}, - {0x3FFD, LIT64(0xEA2D764F64315989)}, - {0x3FFD, LIT64(0xF3BF5BF8BAD1A21D)}, - {0x3FFE, LIT64(0x801CE39E0D205C9A)}, - {0x3FFE, LIT64(0x8630A2DADA1ED066)}, - {0x3FFE, LIT64(0x8C1AD445F3E09B8C)}, - {0x3FFE, LIT64(0x91DB8F1664F350E2)}, - {0x3FFE, LIT64(0x97731420365E538C)}, - {0x3FFE, LIT64(0x9CE1C8E6A0B8CDBA)}, - {0x3FFE, LIT64(0xA22832DBCADAAE09)}, - {0x3FFE, LIT64(0xA746F2DDB7602294)}, - {0x3FFE, LIT64(0xAC3EC0FB997DD6A2)}, - {0x3FFE, LIT64(0xB110688AEBDC6F6A)}, - {0x3FFE, LIT64(0xB5BCC49059ECC4B0)}, - {0x3FFE, LIT64(0xBA44BC7DD470782F)}, - {0x3FFE, LIT64(0xBEA94144FD049AAC)}, - {0x3FFE, LIT64(0xC2EB4ABB661628B6)}, - {0x3FFE, LIT64(0xC70BD54CE602EE14)}, - {0x3FFE, LIT64(0xCD000549ADEC7159)}, - {0x3FFE, LIT64(0xD48457D2D8EA4EA3)}, - {0x3FFE, LIT64(0xDB948DA712DECE3B)}, - {0x3FFE, LIT64(0xE23855F969E8096A)}, - {0x3FFE, LIT64(0xE8771129C4353259)}, - {0x3FFE, LIT64(0xEE57C16E0D379C0D)}, - {0x3FFE, LIT64(0xF3E10211A87C3779)}, - {0x3FFE, LIT64(0xF919039D758B8D41)}, - {0x3FFE, LIT64(0xFE058B8F64935FB3)}, - {0x3FFF, LIT64(0x8155FB497B685D04)}, - {0x3FFF, LIT64(0x83889E3549D108E1)}, - {0x3FFF, LIT64(0x859CFA76511D724B)}, - {0x3FFF, LIT64(0x87952ECFFF8131E7)}, - {0x3FFF, LIT64(0x89732FD19557641B)}, - {0x3FFF, LIT64(0x8B38CAD101932A35)}, - {0x3FFF, LIT64(0x8CE7A8D8301EE6B5)}, - {0x3FFF, LIT64(0x8F46A39E2EAE5281)}, - {0x3FFF, LIT64(0x922DA7D791888487)}, - {0x3FFF, LIT64(0x94D19FCBDEDF5241)}, - {0x3FFF, LIT64(0x973AB94419D2A08B)}, - {0x3FFF, LIT64(0x996FF00E08E10B96)}, - {0x3FFF, LIT64(0x9B773F9512321DA7)}, - {0x3FFF, LIT64(0x9D55CC320F935624)}, - {0x3FFF, LIT64(0x9F100575006CC571)}, - {0x3FFF, LIT64(0xA0A9C290D97CC06C)}, - {0x3FFF, LIT64(0xA22659EBEBC0630A)}, - {0x3FFF, LIT64(0xA388B4AFF6EF0EC9)}, - {0x3FFF, LIT64(0xA4D35F1061D292C4)}, - {0x3FFF, LIT64(0xA60895DCFBE3187E)}, - {0x3FFF, LIT64(0xA72A51DC7367BEAC)}, - {0x3FFF, LIT64(0xA83A51530956168F)}, - {0x3FFF, LIT64(0xA93A20077539546E)}, - {0x3FFF, LIT64(0xAA9E7245023B2605)}, - {0x3FFF, LIT64(0xAC4C84BA6FE4D58F)}, - {0x3FFF, LIT64(0xADCE4A4A606B9712)}, - {0x3FFF, LIT64(0xAF2A2DCD8D263C9C)}, - {0x3FFF, LIT64(0xB0656F81F22265C7)}, - {0x3FFF, LIT64(0xB18465150F71496A)}, - {0x3FFF, LIT64(0xB28AAA156F9ADA35)}, - {0x3FFF, LIT64(0xB37B44FF3766B895)}, - {0x3FFF, LIT64(0xB458C3DCE9630433)}, - {0x3FFF, LIT64(0xB525529D562246BD)}, - {0x3FFF, LIT64(0xB5E2CCA95F9D88CC)}, - {0x3FFF, LIT64(0xB692CADA7ACA1ADA)}, - {0x3FFF, LIT64(0xB736AEA7A6925838)}, - {0x3FFF, LIT64(0xB7CFAB287E9F7B36)}, - {0x3FFF, LIT64(0xB85ECC66CB219835)}, - {0x3FFF, LIT64(0xB8E4FD5A20A593DA)}, - {0x3FFF, LIT64(0xB99F41F64AFF9BB5)}, - {0x3FFF, LIT64(0xBA7F1E17842BBE7B)}, - {0x3FFF, LIT64(0xBB4712857637E17D)}, - {0x3FFF, LIT64(0xBBFABE8A4788DF6F)}, - {0x3FFF, LIT64(0xBC9D0FAD2B689D79)}, - {0x3FFF, LIT64(0xBD306A39471ECD86)}, - {0x3FFF, LIT64(0xBDB6C731856AF18A)}, - {0x3FFF, LIT64(0xBE31CAC502E80D70)}, - {0x3FFF, LIT64(0xBEA2D55CE33194E2)}, - {0x3FFF, LIT64(0xBF0B10B7C03128F0)}, - {0x3FFF, LIT64(0xBF6B7A18DACB778D)}, - {0x3FFF, LIT64(0xBFC4EA4663FA18F6)}, - {0x3FFF, LIT64(0xC0181BDE8B89A454)}, - {0x3FFF, LIT64(0xC065B066CFBF6439)}, - {0x3FFF, LIT64(0xC0AE345F56340AE6)}, - {0x3FFF, LIT64(0xC0F222919CB9E6A7)} -}; - - -static const floatx80 exp_tbl[64] = { - {0x3FFF, LIT64(0x8000000000000000)}, - {0x3FFF, LIT64(0x8164D1F3BC030774)}, - {0x3FFF, LIT64(0x82CD8698AC2BA1D8)}, - {0x3FFF, LIT64(0x843A28C3ACDE4048)}, - {0x3FFF, LIT64(0x85AAC367CC487B14)}, - {0x3FFF, LIT64(0x871F61969E8D1010)}, - {0x3FFF, LIT64(0x88980E8092DA8528)}, - {0x3FFF, LIT64(0x8A14D575496EFD9C)}, - {0x3FFF, LIT64(0x8B95C1E3EA8BD6E8)}, - {0x3FFF, LIT64(0x8D1ADF5B7E5BA9E4)}, - {0x3FFF, LIT64(0x8EA4398B45CD53C0)}, - {0x3FFF, LIT64(0x9031DC431466B1DC)}, - {0x3FFF, LIT64(0x91C3D373AB11C338)}, - {0x3FFF, LIT64(0x935A2B2F13E6E92C)}, - {0x3FFF, LIT64(0x94F4EFA8FEF70960)}, - {0x3FFF, LIT64(0x96942D3720185A00)}, - {0x3FFF, LIT64(0x9837F0518DB8A970)}, - {0x3FFF, LIT64(0x99E0459320B7FA64)}, - {0x3FFF, LIT64(0x9B8D39B9D54E5538)}, - {0x3FFF, LIT64(0x9D3ED9A72CFFB750)}, - {0x3FFF, LIT64(0x9EF5326091A111AC)}, - {0x3FFF, LIT64(0xA0B0510FB9714FC4)}, - {0x3FFF, LIT64(0xA27043030C496818)}, - {0x3FFF, LIT64(0xA43515AE09E680A0)}, - {0x3FFF, LIT64(0xA5FED6A9B15138EC)}, - {0x3FFF, LIT64(0xA7CD93B4E9653568)}, - {0x3FFF, LIT64(0xA9A15AB4EA7C0EF8)}, - {0x3FFF, LIT64(0xAB7A39B5A93ED338)}, - {0x3FFF, LIT64(0xAD583EEA42A14AC8)}, - {0x3FFF, LIT64(0xAF3B78AD690A4374)}, - {0x3FFF, LIT64(0xB123F581D2AC2590)}, - {0x3FFF, LIT64(0xB311C412A9112488)}, - {0x3FFF, LIT64(0xB504F333F9DE6484)}, - {0x3FFF, LIT64(0xB6FD91E328D17790)}, - {0x3FFF, LIT64(0xB8FBAF4762FB9EE8)}, - {0x3FFF, LIT64(0xBAFF5AB2133E45FC)}, - {0x3FFF, LIT64(0xBD08A39F580C36C0)}, - {0x3FFF, LIT64(0xBF1799B67A731084)}, - {0x3FFF, LIT64(0xC12C4CCA66709458)}, - {0x3FFF, LIT64(0xC346CCDA24976408)}, - {0x3FFF, LIT64(0xC5672A115506DADC)}, - {0x3FFF, LIT64(0xC78D74C8ABB9B15C)}, - {0x3FFF, LIT64(0xC9B9BD866E2F27A4)}, - {0x3FFF, LIT64(0xCBEC14FEF2727C5C)}, - {0x3FFF, LIT64(0xCE248C151F8480E4)}, - {0x3FFF, LIT64(0xD06333DAEF2B2594)}, - {0x3FFF, LIT64(0xD2A81D91F12AE45C)}, - {0x3FFF, LIT64(0xD4F35AABCFEDFA20)}, - {0x3FFF, LIT64(0xD744FCCAD69D6AF4)}, - {0x3FFF, LIT64(0xD99D15C278AFD7B4)}, - {0x3FFF, LIT64(0xDBFBB797DAF23754)}, - {0x3FFF, LIT64(0xDE60F4825E0E9124)}, - {0x3FFF, LIT64(0xE0CCDEEC2A94E110)}, - {0x3FFF, LIT64(0xE33F8972BE8A5A50)}, - {0x3FFF, LIT64(0xE5B906E77C8348A8)}, - {0x3FFF, LIT64(0xE8396A503C4BDC68)}, - {0x3FFF, LIT64(0xEAC0C6E7DD243930)}, - {0x3FFF, LIT64(0xED4F301ED9942B84)}, - {0x3FFF, LIT64(0xEFE4B99BDCDAF5CC)}, - {0x3FFF, LIT64(0xF281773C59FFB138)}, - {0x3FFF, LIT64(0xF5257D152486CC2C)}, - {0x3FFF, LIT64(0xF7D0DF730AD13BB8)}, - {0x3FFF, LIT64(0xFA83B2DB722A033C)}, - {0x3FFF, LIT64(0xFD3E0C0CF486C174)} -}; - -static const float32 exp_tbl2[64] = { - 0x00000000, 0x9F841A9B, 0x9FC1D5B9, 0xA0728369, - 0x1FC5C95C, 0x1EE85C9F, 0x9FA20729, 0xA07BF9AF, - 0xA0020DCF, 0x205A63DA, 0x1EB70051, 0x1F6EB029, - 0xA0781494, 0x9EB319B0, 0x2017457D, 0x1F11D537, - 0x9FB952DD, 0x1FE43087, 0x1FA2A818, 0x1FDE494D, - 0x20504890, 0xA073691C, 0x1F9B7A05, 0xA0797126, - 0xA071A140, 0x204F62DA, 0x1F283C4A, 0x9F9A7FDC, - 0xA05B3FAC, 0x1FDF2610, 0x9F705F90, 0x201F678A, - 0x1F32FB13, 0x20038B30, 0x200DC3CC, 0x9F8B2AE6, - 0xA02BBF70, 0xA00BF518, 0xA041DD41, 0x9FDF137B, - 0x201F1568, 0x1FC13A2E, 0xA03F8F03, 0x1FF4907D, - 0x9E6E53E4, 0x1FD6D45C, 0xA076EDB9, 0x9FA6DE21, - 0x1EE69A2F, 0x207F439F, 0x201EC207, 0x9E8BE175, - 0x20032C4B, 0x2004DFF5, 0x1E72F47A, 0x1F722F22, - 0xA017E945, 0x1F401A5B, 0x9FB9A9E3, 0x20744C05, - 0x1F773A19, 0x1FFE90D5, 0xA041ED22, 0x1F853F3A -}; - - -static const floatx80 exp2_tbl[64] = { - {0x3FFF, LIT64(0x8000000000000000)}, - {0x3FFF, LIT64(0x8164D1F3BC030773)}, - {0x3FFF, LIT64(0x82CD8698AC2BA1D7)}, - {0x3FFF, LIT64(0x843A28C3ACDE4046)}, - {0x3FFF, LIT64(0x85AAC367CC487B15)}, - {0x3FFF, LIT64(0x871F61969E8D1010)}, - {0x3FFF, LIT64(0x88980E8092DA8527)}, - {0x3FFF, LIT64(0x8A14D575496EFD9A)}, - {0x3FFF, LIT64(0x8B95C1E3EA8BD6E7)}, - {0x3FFF, LIT64(0x8D1ADF5B7E5BA9E6)}, - {0x3FFF, LIT64(0x8EA4398B45CD53C0)}, - {0x3FFF, LIT64(0x9031DC431466B1DC)}, - {0x3FFF, LIT64(0x91C3D373AB11C336)}, - {0x3FFF, LIT64(0x935A2B2F13E6E92C)}, - {0x3FFF, LIT64(0x94F4EFA8FEF70961)}, - {0x3FFF, LIT64(0x96942D3720185A00)}, - {0x3FFF, LIT64(0x9837F0518DB8A96F)}, - {0x3FFF, LIT64(0x99E0459320B7FA65)}, - {0x3FFF, LIT64(0x9B8D39B9D54E5539)}, - {0x3FFF, LIT64(0x9D3ED9A72CFFB751)}, - {0x3FFF, LIT64(0x9EF5326091A111AE)}, - {0x3FFF, LIT64(0xA0B0510FB9714FC2)}, - {0x3FFF, LIT64(0xA27043030C496819)}, - {0x3FFF, LIT64(0xA43515AE09E6809E)}, - {0x3FFF, LIT64(0xA5FED6A9B15138EA)}, - {0x3FFF, LIT64(0xA7CD93B4E965356A)}, - {0x3FFF, LIT64(0xA9A15AB4EA7C0EF8)}, - {0x3FFF, LIT64(0xAB7A39B5A93ED337)}, - {0x3FFF, LIT64(0xAD583EEA42A14AC6)}, - {0x3FFF, LIT64(0xAF3B78AD690A4375)}, - {0x3FFF, LIT64(0xB123F581D2AC2590)}, - {0x3FFF, LIT64(0xB311C412A9112489)}, - {0x3FFF, LIT64(0xB504F333F9DE6484)}, - {0x3FFF, LIT64(0xB6FD91E328D17791)}, - {0x3FFF, LIT64(0xB8FBAF4762FB9EE9)}, - {0x3FFF, LIT64(0xBAFF5AB2133E45FB)}, - {0x3FFF, LIT64(0xBD08A39F580C36BF)}, - {0x3FFF, LIT64(0xBF1799B67A731083)}, - {0x3FFF, LIT64(0xC12C4CCA66709456)}, - {0x3FFF, LIT64(0xC346CCDA24976407)}, - {0x3FFF, LIT64(0xC5672A115506DADD)}, - {0x3FFF, LIT64(0xC78D74C8ABB9B15D)}, - {0x3FFF, LIT64(0xC9B9BD866E2F27A3)}, - {0x3FFF, LIT64(0xCBEC14FEF2727C5D)}, - {0x3FFF, LIT64(0xCE248C151F8480E4)}, - {0x3FFF, LIT64(0xD06333DAEF2B2595)}, - {0x3FFF, LIT64(0xD2A81D91F12AE45A)}, - {0x3FFF, LIT64(0xD4F35AABCFEDFA1F)}, - {0x3FFF, LIT64(0xD744FCCAD69D6AF4)}, - {0x3FFF, LIT64(0xD99D15C278AFD7B6)}, - {0x3FFF, LIT64(0xDBFBB797DAF23755)}, - {0x3FFF, LIT64(0xDE60F4825E0E9124)}, - {0x3FFF, LIT64(0xE0CCDEEC2A94E111)}, - {0x3FFF, LIT64(0xE33F8972BE8A5A51)}, - {0x3FFF, LIT64(0xE5B906E77C8348A8)}, - {0x3FFF, LIT64(0xE8396A503C4BDC68)}, - {0x3FFF, LIT64(0xEAC0C6E7DD24392F)}, - {0x3FFF, LIT64(0xED4F301ED9942B84)}, - {0x3FFF, LIT64(0xEFE4B99BDCDAF5CB)}, - {0x3FFF, LIT64(0xF281773C59FFB13A)}, - {0x3FFF, LIT64(0xF5257D152486CC2C)}, - {0x3FFF, LIT64(0xF7D0DF730AD13BB9)}, - {0x3FFF, LIT64(0xFA83B2DB722A033A)}, - {0x3FFF, LIT64(0xFD3E0C0CF486C175)} -}; - - -static const float32 exp2_tbl2[64] = { - 0x3F738000, 0x3FBEF7CA, 0x3FBDF8A9, 0x3FBCD7C9, - 0xBFBDE8DA, 0x3FBDE85C, 0x3FBEBBF1, 0x3FBB80CA, - 0xBFBA8373, 0xBFBE9670, 0x3FBDB700, 0x3FBEEEB0, - 0x3FBBFD6D, 0xBFBDB319, 0x3FBDBA2B, 0x3FBE91D5, - 0x3FBE8D5A, 0xBFBCDE7B, 0xBFBEBAAF, 0xBFBD86DA, - 0xBFBEBEDD, 0x3FBCC96E, 0xBFBEC90B, 0x3FBBD1DB, - 0x3FBCE5EB, 0xBFBEC274, 0x3FBEA83C, 0x3FBECB00, - 0x3FBE9301, 0xBFBD8367, 0xBFBEF05F, 0x3FBDFB3C, - 0x3FBEB2FB, 0x3FBAE2CB, 0x3FBCDC3C, 0x3FBEE9AA, - 0xBFBEAEFD, 0xBFBCBF51, 0x3FBEF88A, 0x3FBD83B2, - 0x3FBDF8AB, 0xBFBDFB17, 0xBFBEFE3C, 0xBFBBB6F8, - 0xBFBCEE53, 0xBFBDA4AE, 0x3FBC9124, 0x3FBEB243, - 0x3FBDE69A, 0xBFB8BC61, 0x3FBDF610, 0xBFBD8BE1, - 0x3FBACB12, 0x3FBB9BFE, 0x3FBCF2F4, 0x3FBEF22F, - 0xBFBDBF4A, 0x3FBEC01A, 0x3FBE8CAC, 0xBFBCBB3F, - 0x3FBEF73A, 0xBFB8B795, 0x3FBEF84B, 0xBFBEF581 -}; - - -static const floatx80 log_tbl[128] = { - {0x3FFE, LIT64(0xFE03F80FE03F80FE)}, - {0x3FF7, LIT64(0xFF015358833C47E2)}, - {0x3FFE, LIT64(0xFA232CF252138AC0)}, - {0x3FF9, LIT64(0xBDC8D83EAD88D549)}, - {0x3FFE, LIT64(0xF6603D980F6603DA)}, - {0x3FFA, LIT64(0x9CF43DCFF5EAFD48)}, - {0x3FFE, LIT64(0xF2B9D6480F2B9D65)}, - {0x3FFA, LIT64(0xDA16EB88CB8DF614)}, - {0x3FFE, LIT64(0xEF2EB71FC4345238)}, - {0x3FFB, LIT64(0x8B29B7751BD70743)}, - {0x3FFE, LIT64(0xEBBDB2A5C1619C8C)}, - {0x3FFB, LIT64(0xA8D839F830C1FB49)}, - {0x3FFE, LIT64(0xE865AC7B7603A197)}, - {0x3FFB, LIT64(0xC61A2EB18CD907AD)}, - {0x3FFE, LIT64(0xE525982AF70C880E)}, - {0x3FFB, LIT64(0xE2F2A47ADE3A18AF)}, - {0x3FFE, LIT64(0xE1FC780E1FC780E2)}, - {0x3FFB, LIT64(0xFF64898EDF55D551)}, - {0x3FFE, LIT64(0xDEE95C4CA037BA57)}, - {0x3FFC, LIT64(0x8DB956A97B3D0148)}, - {0x3FFE, LIT64(0xDBEB61EED19C5958)}, - {0x3FFC, LIT64(0x9B8FE100F47BA1DE)}, - {0x3FFE, LIT64(0xD901B2036406C80E)}, - {0x3FFC, LIT64(0xA9372F1D0DA1BD17)}, - {0x3FFE, LIT64(0xD62B80D62B80D62C)}, - {0x3FFC, LIT64(0xB6B07F38CE90E46B)}, - {0x3FFE, LIT64(0xD3680D3680D3680D)}, - {0x3FFC, LIT64(0xC3FD032906488481)}, - {0x3FFE, LIT64(0xD0B69FCBD2580D0B)}, - {0x3FFC, LIT64(0xD11DE0FF15AB18CA)}, - {0x3FFE, LIT64(0xCE168A7725080CE1)}, - {0x3FFC, LIT64(0xDE1433A16C66B150)}, - {0x3FFE, LIT64(0xCB8727C065C393E0)}, - {0x3FFC, LIT64(0xEAE10B5A7DDC8ADD)}, - {0x3FFE, LIT64(0xC907DA4E871146AD)}, - {0x3FFC, LIT64(0xF7856E5EE2C9B291)}, - {0x3FFE, LIT64(0xC6980C6980C6980C)}, - {0x3FFD, LIT64(0x82012CA5A68206D7)}, - {0x3FFE, LIT64(0xC4372F855D824CA6)}, - {0x3FFD, LIT64(0x882C5FCD7256A8C5)}, - {0x3FFE, LIT64(0xC1E4BBD595F6E947)}, - {0x3FFD, LIT64(0x8E44C60B4CCFD7DE)}, - {0x3FFE, LIT64(0xBFA02FE80BFA02FF)}, - {0x3FFD, LIT64(0x944AD09EF4351AF6)}, - {0x3FFE, LIT64(0xBD69104707661AA3)}, - {0x3FFD, LIT64(0x9A3EECD4C3EAA6B2)}, - {0x3FFE, LIT64(0xBB3EE721A54D880C)}, - {0x3FFD, LIT64(0xA0218434353F1DE8)}, - {0x3FFE, LIT64(0xB92143FA36F5E02E)}, - {0x3FFD, LIT64(0xA5F2FCABBBC506DA)}, - {0x3FFE, LIT64(0xB70FBB5A19BE3659)}, - {0x3FFD, LIT64(0xABB3B8BA2AD362A5)}, - {0x3FFE, LIT64(0xB509E68A9B94821F)}, - {0x3FFD, LIT64(0xB1641795CE3CA97B)}, - {0x3FFE, LIT64(0xB30F63528917C80B)}, - {0x3FFD, LIT64(0xB70475515D0F1C61)}, - {0x3FFE, LIT64(0xB11FD3B80B11FD3C)}, - {0x3FFD, LIT64(0xBC952AFEEA3D13E1)}, - {0x3FFE, LIT64(0xAF3ADDC680AF3ADE)}, - {0x3FFD, LIT64(0xC2168ED0F458BA4A)}, - {0x3FFE, LIT64(0xAD602B580AD602B6)}, - {0x3FFD, LIT64(0xC788F439B3163BF1)}, - {0x3FFE, LIT64(0xAB8F69E28359CD11)}, - {0x3FFD, LIT64(0xCCECAC08BF04565D)}, - {0x3FFE, LIT64(0xA9C84A47A07F5638)}, - {0x3FFD, LIT64(0xD24204872DD85160)}, - {0x3FFE, LIT64(0xA80A80A80A80A80B)}, - {0x3FFD, LIT64(0xD78949923BC3588A)}, - {0x3FFE, LIT64(0xA655C4392D7B73A8)}, - {0x3FFD, LIT64(0xDCC2C4B49887DACC)}, - {0x3FFE, LIT64(0xA4A9CF1D96833751)}, - {0x3FFD, LIT64(0xE1EEBD3E6D6A6B9E)}, - {0x3FFE, LIT64(0xA3065E3FAE7CD0E0)}, - {0x3FFD, LIT64(0xE70D785C2F9F5BDC)}, - {0x3FFE, LIT64(0xA16B312EA8FC377D)}, - {0x3FFD, LIT64(0xEC1F392C5179F283)}, - {0x3FFE, LIT64(0x9FD809FD809FD80A)}, - {0x3FFD, LIT64(0xF12440D3E36130E6)}, - {0x3FFE, LIT64(0x9E4CAD23DD5F3A20)}, - {0x3FFD, LIT64(0xF61CCE92346600BB)}, - {0x3FFE, LIT64(0x9CC8E160C3FB19B9)}, - {0x3FFD, LIT64(0xFB091FD38145630A)}, - {0x3FFE, LIT64(0x9B4C6F9EF03A3CAA)}, - {0x3FFD, LIT64(0xFFE97042BFA4C2AD)}, - {0x3FFE, LIT64(0x99D722DABDE58F06)}, - {0x3FFE, LIT64(0x825EFCED49369330)}, - {0x3FFE, LIT64(0x9868C809868C8098)}, - {0x3FFE, LIT64(0x84C37A7AB9A905C9)}, - {0x3FFE, LIT64(0x97012E025C04B809)}, - {0x3FFE, LIT64(0x87224C2E8E645FB7)}, - {0x3FFE, LIT64(0x95A02568095A0257)}, - {0x3FFE, LIT64(0x897B8CAC9F7DE298)}, - {0x3FFE, LIT64(0x9445809445809446)}, - {0x3FFE, LIT64(0x8BCF55DEC4CD05FE)}, - {0x3FFE, LIT64(0x92F113840497889C)}, - {0x3FFE, LIT64(0x8E1DC0FB89E125E5)}, - {0x3FFE, LIT64(0x91A2B3C4D5E6F809)}, - {0x3FFE, LIT64(0x9066E68C955B6C9B)}, - {0x3FFE, LIT64(0x905A38633E06C43B)}, - {0x3FFE, LIT64(0x92AADE74C7BE59E0)}, - {0x3FFE, LIT64(0x8F1779D9FDC3A219)}, - {0x3FFE, LIT64(0x94E9BFF615845643)}, - {0x3FFE, LIT64(0x8DDA520237694809)}, - {0x3FFE, LIT64(0x9723A1B720134203)}, - {0x3FFE, LIT64(0x8CA29C046514E023)}, - {0x3FFE, LIT64(0x995899C890EB8990)}, - {0x3FFE, LIT64(0x8B70344A139BC75A)}, - {0x3FFE, LIT64(0x9B88BDAA3A3DAE2F)}, - {0x3FFE, LIT64(0x8A42F8705669DB46)}, - {0x3FFE, LIT64(0x9DB4224FFFE1157C)}, - {0x3FFE, LIT64(0x891AC73AE9819B50)}, - {0x3FFE, LIT64(0x9FDADC268B7A12DA)}, - {0x3FFE, LIT64(0x87F78087F78087F8)}, - {0x3FFE, LIT64(0xA1FCFF17CE733BD4)}, - {0x3FFE, LIT64(0x86D905447A34ACC6)}, - {0x3FFE, LIT64(0xA41A9E8F5446FB9F)}, - {0x3FFE, LIT64(0x85BF37612CEE3C9B)}, - {0x3FFE, LIT64(0xA633CD7E6771CD8B)}, - {0x3FFE, LIT64(0x84A9F9C8084A9F9D)}, - {0x3FFE, LIT64(0xA8489E600B435A5E)}, - {0x3FFE, LIT64(0x839930523FBE3368)}, - {0x3FFE, LIT64(0xAA59233CCCA4BD49)}, - {0x3FFE, LIT64(0x828CBFBEB9A020A3)}, - {0x3FFE, LIT64(0xAC656DAE6BCC4985)}, - {0x3FFE, LIT64(0x81848DA8FAF0D277)}, - {0x3FFE, LIT64(0xAE6D8EE360BB2468)}, - {0x3FFE, LIT64(0x8080808080808081)}, - {0x3FFE, LIT64(0xB07197A23C46C654)} -}; - - -static const floatx80 pi_tbl[65] = { - {0xC004, LIT64(0xC90FDAA22168C235)}, - {0xC004, LIT64(0xC2C75BCD105D7C23)}, - {0xC004, LIT64(0xBC7EDCF7FF523611)}, - {0xC004, LIT64(0xB6365E22EE46F000)}, - {0xC004, LIT64(0xAFEDDF4DDD3BA9EE)}, - {0xC004, LIT64(0xA9A56078CC3063DD)}, - {0xC004, LIT64(0xA35CE1A3BB251DCB)}, - {0xC004, LIT64(0x9D1462CEAA19D7B9)}, - {0xC004, LIT64(0x96CBE3F9990E91A8)}, - {0xC004, LIT64(0x9083652488034B96)}, - {0xC004, LIT64(0x8A3AE64F76F80584)}, - {0xC004, LIT64(0x83F2677A65ECBF73)}, - {0xC003, LIT64(0xFB53D14AA9C2F2C2)}, - {0xC003, LIT64(0xEEC2D3A087AC669F)}, - {0xC003, LIT64(0xE231D5F66595DA7B)}, - {0xC003, LIT64(0xD5A0D84C437F4E58)}, - {0xC003, LIT64(0xC90FDAA22168C235)}, - {0xC003, LIT64(0xBC7EDCF7FF523611)}, - {0xC003, LIT64(0xAFEDDF4DDD3BA9EE)}, - {0xC003, LIT64(0xA35CE1A3BB251DCB)}, - {0xC003, LIT64(0x96CBE3F9990E91A8)}, - {0xC003, LIT64(0x8A3AE64F76F80584)}, - {0xC002, LIT64(0xFB53D14AA9C2F2C2)}, - {0xC002, LIT64(0xE231D5F66595DA7B)}, - {0xC002, LIT64(0xC90FDAA22168C235)}, - {0xC002, LIT64(0xAFEDDF4DDD3BA9EE)}, - {0xC002, LIT64(0x96CBE3F9990E91A8)}, - {0xC001, LIT64(0xFB53D14AA9C2F2C2)}, - {0xC001, LIT64(0xC90FDAA22168C235)}, - {0xC001, LIT64(0x96CBE3F9990E91A8)}, - {0xC000, LIT64(0xC90FDAA22168C235)}, - {0xBFFF, LIT64(0xC90FDAA22168C235)}, - {0x0000, LIT64(0x0000000000000000)}, - {0x3FFF, LIT64(0xC90FDAA22168C235)}, - {0x4000, LIT64(0xC90FDAA22168C235)}, - {0x4001, LIT64(0x96CBE3F9990E91A8)}, - {0x4001, LIT64(0xC90FDAA22168C235)}, - {0x4001, LIT64(0xFB53D14AA9C2F2C2)}, - {0x4002, LIT64(0x96CBE3F9990E91A8)}, - {0x4002, LIT64(0xAFEDDF4DDD3BA9EE)}, - {0x4002, LIT64(0xC90FDAA22168C235)}, - {0x4002, LIT64(0xE231D5F66595DA7B)}, - {0x4002, LIT64(0xFB53D14AA9C2F2C2)}, - {0x4003, LIT64(0x8A3AE64F76F80584)}, - {0x4003, LIT64(0x96CBE3F9990E91A8)}, - {0x4003, LIT64(0xA35CE1A3BB251DCB)}, - {0x4003, LIT64(0xAFEDDF4DDD3BA9EE)}, - {0x4003, LIT64(0xBC7EDCF7FF523611)}, - {0x4003, LIT64(0xC90FDAA22168C235)}, - {0x4003, LIT64(0xD5A0D84C437F4E58)}, - {0x4003, LIT64(0xE231D5F66595DA7B)}, - {0x4003, LIT64(0xEEC2D3A087AC669F)}, - {0x4003, LIT64(0xFB53D14AA9C2F2C2)}, - {0x4004, LIT64(0x83F2677A65ECBF73)}, - {0x4004, LIT64(0x8A3AE64F76F80584)}, - {0x4004, LIT64(0x9083652488034B96)}, - {0x4004, LIT64(0x96CBE3F9990E91A8)}, - {0x4004, LIT64(0x9D1462CEAA19D7B9)}, - {0x4004, LIT64(0xA35CE1A3BB251DCB)}, - {0x4004, LIT64(0xA9A56078CC3063DD)}, - {0x4004, LIT64(0xAFEDDF4DDD3BA9EE)}, - {0x4004, LIT64(0xB6365E22EE46F000)}, - {0x4004, LIT64(0xBC7EDCF7FF523611)}, - {0x4004, LIT64(0xC2C75BCD105D7C23)}, - {0x4004, LIT64(0xC90FDAA22168C235)} -}; - - -static const float32 pi_tbl2[65] = { - 0x21800000, 0xA0D00000, 0xA1E80000, 0x21480000, - 0xA1200000, 0x21FC0000, 0x21100000, 0xA1580000, - 0x21E00000, 0x20B00000, 0xA1880000, 0x21C40000, - 0x20000000, 0x21380000, 0xA1300000, 0x9FC00000, - 0x21000000, 0xA1680000, 0xA0A00000, 0x20900000, - 0x21600000, 0xA1080000, 0x1F800000, 0xA0B00000, - 0x20800000, 0xA0200000, 0x20E00000, 0x1F000000, - 0x20000000, 0x20600000, 0x1F800000, 0x1F000000, - 0x00000000, - 0x9F000000, 0x9F800000, 0xA0600000, 0xA0000000, - 0x9F000000, 0xA0E00000, 0x20200000, 0xA0800000, - 0x20B00000, 0x9F800000, 0x21080000, 0xA1600000, - 0xA0900000, 0x20A00000, 0x21680000, 0xA1000000, - 0x1FC00000, 0x21300000, 0xA1380000, 0xA0000000, - 0xA1C40000, 0x21880000, 0xA0B00000, 0xA1E00000, - 0x21580000, 0xA1100000, 0xA1FC0000, 0x21200000, - 0xA1480000, 0x21E80000, 0x20D00000, 0xA1800000 -};