From 1b7525ddc05cfaa65c501f7cf7f23ceb1a69b814 Mon Sep 17 00:00:00 2001 From: Dimitris Panokostas Date: Sat, 3 Feb 2018 01:30:57 +0100 Subject: [PATCH] Added more FPU opcodes (further improves performance of JIT FPU), improved makefile profiler options --- Makefile | 19 +- src/fpp_native.cpp | 5 + src/include/flags_arm.h | 3 +- src/include/newcpu.h | 1 + src/jit/codegen_arm.cpp | 372 ++++++++++++---- src/jit/codegen_arm.h | 207 ++++----- src/jit/compemu.h | 11 +- src/jit/compemu_fpp.cpp | 179 +++++++- src/jit/compemu_midfunc_arm.cpp | 74 +++- src/jit/compemu_midfunc_arm.h | 3 + src/jit/compemu_midfunc_arm2.cpp | 163 ++----- src/jit/compemu_support.cpp | 130 ++---- src/osdep/amiberry_mem.cpp | 464 +++++++++----------- src/osdep/gui/PanelCPU.cpp | 10 +- src/osdep/picasso96.cpp | 4 +- src/osdep/sigsegv_handler.cpp | 719 +++++++++++++++---------------- src/osdep/sysconfig.h | 2 +- src/osdep/target.h | 8 +- 18 files changed, 1320 insertions(+), 1054 deletions(-) diff --git a/Makefile b/Makefile index a48d8a15..32c75936 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,6 @@ ifeq ($(PLATFORM),rpi3) CFLAGS += ${DISPMANX_FLAGS} -DARMV6T2 -DUSE_ARMNEON -DUSE_SDL1 LDFLAGS += ${DISPMANX_LDFLAGS} HAVE_NEON = 1 - PROFILER_PATH = /home/pi/projects/amiberry NAME = amiberry-rpi3-sdl1-dev else ifeq ($(PLATFORM),rpi2) @@ -34,14 +33,12 @@ else ifeq ($(PLATFORM),rpi2) CFLAGS += ${DISPMANX_FLAGS} -DARMV6T2 -DUSE_ARMNEON -DUSE_SDL1 LDFLAGS += ${DISPMANX_LDFLAGS} HAVE_NEON = 1 - PROFILER_PATH = /home/pi/projects/amiberry NAME = amiberry-rpi2-sdl1-dev else ifeq ($(PLATFORM),rpi1) CPU_FLAGS += -march=armv6zk -mtune=arm1176jzf-s -mfpu=vfp CFLAGS += ${DISPMANX_FLAGS} -DUSE_SDL1 LDFLAGS += ${DISPMANX_LDFLAGS} - PROFILER_PATH = /home/pi/projects/amiberry NAME = amiberry-rpi1-sdl1-dev else ifeq ($(PLATFORM),xu4) @@ -72,7 +69,6 @@ USE_SDL2 = 1 CFLAGS += -DARMV6T2 -DUSE_ARMNEON -DUSE_SDL2 ${DISPMANX_FLAGS} LDFLAGS += ${DISPMANX_LDFLAGS} HAVE_NEON = 1 - PROFILER_PATH = /home/pi/projects/amiberry/amiberry-sdl2-prof NAME = amiberry-rpi3-sdl2-dispmanx-dev else ifeq ($(PLATFORM),rpi2-sdl2-dispmanx) @@ -81,7 +77,6 @@ USE_SDL2 = 1 CFLAGS += -DARMV6T2 -DUSE_ARMNEON -DUSE_SDL2 ${DISPMANX_FLAGS} LDFLAGS += ${DISPMANX_LDFLAGS} HAVE_NEON = 1 - PROFILER_PATH = /home/pi/projects/amiberry/amiberry-sdl2-prof NAME = amiberry-rpi2-sdl2-dispmanx-dev else ifeq ($(PLATFORM),rpi1-sdl2-dispmanx) @@ -89,7 +84,6 @@ USE_SDL2 = 1 CPU_FLAGS += -march=armv6zk -mtune=arm1176jzf-s -mfpu=vfp CFLAGS += -DUSE_SDL2 ${DISPMANX_FLAGS} LDFLAGS += ${DISPMANX_LDFLAGS} - PROFILER_PATH = /home/pi/projects/amiberry/amiberry-sdl2-prof NAME = amiberry-rpi1-sdl2-dispmanx-dev # @@ -100,7 +94,6 @@ USE_SDL2 = 1 CPU_FLAGS += -march=armv8-a -mtune=cortex-a53 -mfpu=neon-fp-armv8 CFLAGS += -DARMV6T2 -DUSE_ARMNEON -DUSE_SDL2 HAVE_NEON = 1 - PROFILER_PATH = /home/pi/projects/amiberry/amiberry-sdl2-prof NAME = amiberry-rpi3-sdl2-dev else ifeq ($(PLATFORM),rpi2-sdl2) @@ -108,14 +101,12 @@ USE_SDL2 = 1 CPU_FLAGS += -march=armv7-a -mtune=cortex-a7 -mfpu=neon-vfpv4 CFLAGS += -DARMV6T2 -DUSE_ARMNEON -DUSE_SDL2 HAVE_NEON = 1 - PROFILER_PATH = /home/pi/projects/amiberry/amiberry-sdl2-prof NAME = amiberry-rpi2-sdl2-dev else ifeq ($(PLATFORM),rpi1-sdl2) USE_SDL2 = 1 CPU_FLAGS += -march=armv6zk -mtune=arm1176jzf-s -mfpu=vfp CFLAGS += -DUSE_SDL2 - PROFILER_PATH = /home/pi/projects/amiberry/amiberry-sdl2-prof NAME = amiberry-rpi1-sdl2-dev else ifeq ($(PLATFORM),pine64) @@ -191,7 +182,7 @@ DEFS += `xml2-config --cflags` DEFS += -DAMIBERRY -DARMV6_ASSEMBLY ifndef DEBUG - CFLAGS += -std=gnu++14 -Ofast + CFLAGS += -std=gnu++14 -Ofast -frename-registers else CFLAGS += -std=gnu++14 -g -rdynamic -funwind-tables -mapcs-frame -DDEBUG -Wl,--export-dynamic endif @@ -206,10 +197,13 @@ ifdef GCC_PROFILE endif ifdef GEN_PROFILE - CFLAGS += -fprofile-generate=$(PROFILER_PATH) -fprofile-arcs -fvpt + CFLAGS += -fprofile-generate -fprofile-arcs -fvpt + LDFLAGS += -lgcov endif + ifdef USE_PROFILE CFLAGS += -fprofile-use -fprofile-correction -fbranch-probabilities -fvpt + LDFLAGS += -lgcov endif ifdef SANITIZE @@ -476,6 +470,9 @@ clean: $(RM) $(PROG) $(PROG)-debug $(OBJS) $(ASMS) $(OBJS:%.o=%.d) $(MAKE) -C src/guisan clean +cleanprofile: + $(RM) $(OBJS:%.o=%.gcda) + delasm: $(RM) $(ASMS) diff --git a/src/fpp_native.cpp b/src/fpp_native.cpp index ce13fc8d..17345a26 100644 --- a/src/fpp_native.cpp +++ b/src/fpp_native.cpp @@ -184,6 +184,11 @@ static void fpp_from_exten(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *w *wrd2 = 0; *wrd3 = 0; return; + } else if (fpp_is_nan(fpd)) { + *wrd1 = 0x7fff0000; + *wrd2 = 0xffffffff; + *wrd3 = 0xffffffff; + return; } if (v < 0) { *wrd1 = 0x80000000; diff --git a/src/include/flags_arm.h b/src/include/flags_arm.h index 2633aaf6..c1606cc1 100644 --- a/src/include/flags_arm.h +++ b/src/include/flags_arm.h @@ -60,7 +60,8 @@ enum { NATIVE_CC_F_UGT = 16 + 10, NATIVE_CC_F_UGE = 16 + 11, NATIVE_CC_F_ULT = 16 + 12, - NATIVE_CC_F_ULE = 16 + 13 + NATIVE_CC_F_ULE = 16 + 13, + NATIVE_CC_F_NEVER = 32 }; diff --git a/src/include/newcpu.h b/src/include/newcpu.h index 78b5e75f..db495ed2 100644 --- a/src/include/newcpu.h +++ b/src/include/newcpu.h @@ -121,6 +121,7 @@ struct regstruct uae_u32 address_space_mask; uae_s32 pissoff; + uae_u8* natmem_offset; }; extern struct regstruct regs; diff --git a/src/jit/codegen_arm.cpp b/src/jit/codegen_arm.cpp index dc7c3964..21539067 100644 --- a/src/jit/codegen_arm.cpp +++ b/src/jit/codegen_arm.cpp @@ -92,7 +92,7 @@ extern void __clear_cache (char*, char*); #define STACK_OFFSET sizeof(void *) #define R_REGSTRUCT 11 -uae_s8 always_used[]={2,3,R_REGSTRUCT,12,-1}; // r12 is scratch register in C functions calls, I don't think it's save to use it here... +uae_s8 always_used[]={2,3,R_REGSTRUCT,12,-1}; // r2, r3 and r12 are work register in emitted code uae_u8 call_saved[]={0,0,0,0, 1,1,1,1, 1,1,1,1, 0,1,1,1}; @@ -1098,20 +1098,22 @@ LENDFUNC(NONE,NONE,2,compemu_raw_endblock_pc_isconst,(IMM cycles, IMM v)) * FPU stuff * *************************************************************************/ +#ifdef USE_JIT_FPU + LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s)) { - VMOV64_rr(d, s); + VMOV64_dd(d, s); } LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s)) LOWFUNC(NONE,WRITE,2,compemu_raw_fmov_mr_drop,(MEMW mem, FR s)) { if(mem >= (uae_u32) ®s && mem < (uae_u32) ®s + 1020 && ((mem - (uae_u32) ®s) & 0x3) == 0) { - VSTR64(s, R_REGSTRUCT, (mem - (uae_u32) ®s)); + VSTR64_dRi(s, R_REGSTRUCT, (mem - (uae_u32) ®s)); } else { MOVW_ri16(REG_WORK1, mem); MOVT_ri16(REG_WORK1, mem >> 16); - VSTR64(s, REG_WORK1, 0); + VSTR64_dRi(s, REG_WORK1, 0); } } LENDFUNC(NONE,WRITE,2,compemu_raw_fmov_mr_drop,(MEMW mem, FR s)) @@ -1120,69 +1122,69 @@ LENDFUNC(NONE,WRITE,2,compemu_raw_fmov_mr_drop,(MEMW mem, FR s)) LOWFUNC(NONE,READ,2,compemu_raw_fmov_rm,(FW d, MEMR mem)) { if(mem >= (uae_u32) ®s && mem < (uae_u32) ®s + 1020 && ((mem - (uae_u32) ®s) & 0x3) == 0) { - VLDR64(d, R_REGSTRUCT, (mem - (uae_u32) ®s)); + VLDR64_dRi(d, R_REGSTRUCT, (mem - (uae_u32) ®s)); } else { MOVW_ri16(REG_WORK1, mem); MOVT_ri16(REG_WORK1, mem >> 16); - VLDR64(d, REG_WORK1, 0); + VLDR64_dRi(d, REG_WORK1, 0); } } LENDFUNC(NONE,READ,2,compemu_raw_fmov_rm,(FW d, MEMW mem)) LOWFUNC(NONE,NONE,2,raw_fmov_l_rr,(FW d, RR4 s)) { - VMOVi_from_ARM(SCRATCH_F64_1, s); - VCVT_64_from_i(d, SCRATCH_F32_1); + VMOVi_from_ARM_dr(SCRATCH_F64_1, s, 0); + VCVTIto64_ds(d, SCRATCH_F32_1); } LENDFUNC(NONE,NONE,2,raw_fmov_l_rr,(FW d, RR4 s)) LOWFUNC(NONE,NONE,2,raw_fmov_s_rr,(FW d, RR4 s)) { - VMOV32_from_ARM(SCRATCH_F32_1, s); - VCVT_32_to_64(d, SCRATCH_F32_1); + VMOV32_sr(SCRATCH_F32_1, s); + VCVT32to64_ds(d, SCRATCH_F32_1); } LENDFUNC(NONE,NONE,2,raw_fmov_s_rr,(FW d, RR4 s)) LOWFUNC(NONE,NONE,2,raw_fmov_w_rr,(FW d, RR2 s)) { SIGN_EXTEND_16_REG_2_REG(REG_WORK1, s); - VMOVi_from_ARM(SCRATCH_F64_1, REG_WORK1); - VCVT_64_from_i(d, SCRATCH_F32_1); + VMOVi_from_ARM_dr(SCRATCH_F64_1, REG_WORK1, 0); + VCVTIto64_ds(d, SCRATCH_F32_1); } LENDFUNC(NONE,NONE,2,raw_fmov_w_rr,(FW d, RR2 s)) LOWFUNC(NONE,NONE,2,raw_fmov_b_rr,(FW d, RR1 s)) { SIGN_EXTEND_8_REG_2_REG(REG_WORK1, s); - VMOVi_from_ARM(SCRATCH_F64_1, REG_WORK1); - VCVT_64_from_i(d, SCRATCH_F32_1); + VMOVi_from_ARM_dr(SCRATCH_F64_1, REG_WORK1, 0); + VCVTIto64_ds(d, SCRATCH_F32_1); } LENDFUNC(NONE,NONE,2,raw_fmov_b_rr,(FW d, RR1 s)) LOWFUNC(NONE,NONE,2,raw_fmov_d_rrr,(FW d, RR4 s1, RR4 s2)) { - VMOV64_from_ARM(d, s1, s2); + VMOV64_drr(d, s1, s2); } LENDFUNC(NONE,NONE,2,raw_fmov_d_rrr,(FW d, RR4 s1, RR4 s2)) LOWFUNC(NONE,NONE,2,raw_fmov_to_l_rr,(W4 d, FR s)) { - VCVTR_64_to_i(SCRATCH_F32_1, s); - VMOVi_to_ARM(d, SCRATCH_F64_1); + VCVTR64toI_sd(SCRATCH_F32_1, s); + VMOV32_rs(d, SCRATCH_F32_1); } LENDFUNC(NONE,NONE,2,raw_fmov_to_l_rr,(W4 d, FR s)) LOWFUNC(NONE,NONE,2,raw_fmov_to_s_rr,(W4 d, FR s)) { - VCVT_64_to_32(SCRATCH_F32_1, s); - VMOV32_to_ARM(d, SCRATCH_F32_1); + VCVT64to32_sd(SCRATCH_F32_1, s); + VMOV32_rs(d, SCRATCH_F32_1); } LENDFUNC(NONE,NONE,2,raw_fmov_to_s_rr,(W4 d, FR s)) LOWFUNC(NONE,NONE,2,raw_fmov_to_w_rr,(W4 d, FR s)) { - VCVTR_64_to_i(SCRATCH_F32_1, s); - VMOVi_to_ARM(REG_WORK1, SCRATCH_F64_1); + VCVTR64toI_sd(SCRATCH_F32_1, s); + VMOV32_rs(REG_WORK1, SCRATCH_F32_1); SSAT_rir(REG_WORK1, 15, REG_WORK1); BFI_rrii(d, REG_WORK1, 0, 15); } @@ -1190,8 +1192,8 @@ LENDFUNC(NONE,NONE,2,raw_fmov_to_w_rr,(W4 d, FR s)) LOWFUNC(NONE,NONE,2,raw_fmov_to_b_rr,(W4 d, FR s)) { - VCVTR_64_to_i(SCRATCH_F32_1, s); - VMOVi_to_ARM(REG_WORK1, SCRATCH_F64_1); + VCVTR64toI_sd(SCRATCH_F32_1, s); + VMOV32_rs(REG_WORK1, SCRATCH_F32_1); SSAT_rir(REG_WORK1, 7, REG_WORK1); BFI_rrii(d, REG_WORK1, 0, 7); } @@ -1199,27 +1201,26 @@ LENDFUNC(NONE,NONE,2,raw_fmov_to_b_rr,(W4 d, FR s)) LOWFUNC(NONE,NONE,1,raw_fmov_d_ri_0,(FW r)) { - VMOV64_i(r, 0x7, 0x0); // load imm #1 into reg - VSUB64(r, r, r); + VMOV_I64_dimmI(r, 0x00); // load imm #0 into reg } LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_0,(FW r)) LOWFUNC(NONE,NONE,1,raw_fmov_d_ri_1,(FW r)) { - VMOV64_i(r, 0x7, 0x0); // load imm #1 into reg + VMOV_F64_dimmF(r, 0x70); // load imm #1 into reg } LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_1,(FW r)) LOWFUNC(NONE,NONE,1,raw_fmov_d_ri_10,(FW r)) { - VMOV64_i(r, 0x2, 0x4); // load imm #10 into reg + VMOV_F64_dimmF(r, 0x24); // load imm #10 into reg } LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_10,(FW r)) LOWFUNC(NONE,NONE,1,raw_fmov_d_ri_100,(FW r)) { - VMOV64_i(r, 0x2, 0x4); // load imm #10 into reg - VMUL64(r, r, r); + VMOV_F64_dimmF(r, 0x24); // load imm #10 into reg + VMUL64_ddd(r, r, r); } LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_10,(FW r)) @@ -1227,7 +1228,7 @@ LOWFUNC(NONE,READ,2,raw_fmov_d_rm,(FW r, MEMR m)) { MOVW_ri16(REG_WORK1, m); MOVT_ri16(REG_WORK1, m >> 16); - VLDR64(r, REG_WORK1, 0); + VLDR64_dRi(r, REG_WORK1, 0); } LENDFUNC(NONE,READ,2,raw_fmov_d_rm,(FW r, MEMR m)) @@ -1235,137 +1236,137 @@ LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m)) { MOVW_ri16(REG_WORK1, m); MOVT_ri16(REG_WORK1, m >> 16); - VLDR32(SCRATCH_F32_1, REG_WORK1, 0); - VCVT_32_to_64(r, SCRATCH_F32_1); + VLDR32_sRi(SCRATCH_F32_1, REG_WORK1, 0); + VCVT32to64_ds(r, SCRATCH_F32_1); } LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m)) LOWFUNC(NONE,NONE,3,raw_fmov_to_d_rrr,(W4 d1, W4 d2, FR s)) { - VMOV64_to_ARM(d1, d2, s); + VMOV64_rrd(d1, d2, s); } LENDFUNC(NONE,NONE,3,raw_fmov_to_d_rrr,(W4 d1, W4 d2, FR s)) LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s)) { - VSQRT64(d, s); + VSQRT64_dd(d, s); } LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s)) { - VABS64(d, s); + VABS64_dd(d, s); } LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s)) { - VNEG64(d, s); + VNEG64_dd(d, s); } LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s)) { - VDIV64(d, d, s); + VDIV64_ddd(d, d, s); } LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s)) { - VADD64(d, d, s); + VADD64_ddd(d, d, s); } LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s)) { - VMUL64(d, d, s); + VMUL64_ddd(d, d, s); } LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s)) { - VSUB64(d, d, s); + VSUB64_ddd(d, d, s); } LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s)) LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s)) { - VCVTR_64_to_i(SCRATCH_F32_1, s); - VCVT_64_from_i(d, SCRATCH_F32_1); + VCVTR64toI_sd(SCRATCH_F32_1, s); + VCVTIto64_ds(d, SCRATCH_F32_1); } LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s)) LOWFUNC(NONE,NONE,2,raw_frndintz_rr,(FW d, FR s)) { - VCVT_64_to_i(SCRATCH_F32_1, s); - VCVT_64_from_i(d, SCRATCH_F32_1); + VCVT64toI_sd(SCRATCH_F32_1, s); + VCVTIto64_ds(d, SCRATCH_F32_1); } LENDFUNC(NONE,NONE,2,raw_frndintz_rr,(FW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fmod_rr,(FRW d, FR s)) { - VDIV64(SCRATCH_F64_2, d, s); - VCVT_64_to_i(SCRATCH_F32_1, SCRATCH_F64_2); - VCVT_64_from_i(SCRATCH_F64_2, SCRATCH_F32_1); - VMUL64(SCRATCH_F64_1, SCRATCH_F64_2, s); - VSUB64(d, d, SCRATCH_F64_1); + VDIV64_ddd(SCRATCH_F64_2, d, s); + VCVT64toI_sd(SCRATCH_F32_1, SCRATCH_F64_2); + VCVTIto64_ds(SCRATCH_F64_2, SCRATCH_F32_1); + VMUL64_ddd(SCRATCH_F64_1, SCRATCH_F64_2, s); + VSUB64_ddd(d, d, SCRATCH_F64_1); } LENDFUNC(NONE,NONE,2,raw_fmod_rr,(FRW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fsgldiv_rr,(FRW d, FR s)) { - VCVT_64_to_32(SCRATCH_F32_1, d); - VCVT_64_to_32(SCRATCH_F32_2, s); - VDIV32(SCRATCH_F32_1, SCRATCH_F32_1, SCRATCH_F32_2); - VCVT_32_to_64(d, SCRATCH_F32_1); + VCVT64to32_sd(SCRATCH_F32_1, d); + VCVT64to32_sd(SCRATCH_F32_2, s); + VDIV32_sss(SCRATCH_F32_1, SCRATCH_F32_1, SCRATCH_F32_2); + VCVT32to64_ds(d, SCRATCH_F32_1); } LENDFUNC(NONE,NONE,2,raw_fsgldiv_rr,(FRW d, FR s)) LOWFUNC(NONE,NONE,1,raw_fcuts_r,(FRW r)) { - VCVT_64_to_32(SCRATCH_F32_1, r); - VCVT_32_to_64(r, SCRATCH_F32_1); + VCVT64to32_sd(SCRATCH_F32_1, r); + VCVT32to64_ds(r, SCRATCH_F32_1); } LENDFUNC(NONE,NONE,1,raw_fcuts_r,(FRW r)) LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s)) { - VMRS(REG_WORK1); + VMRS_r(REG_WORK1); BIC_rri(REG_WORK2, REG_WORK1, 0x00c00000); - VMSR(REG_WORK2); + VMSR_r(REG_WORK2); - VDIV64(SCRATCH_F64_2, d, s); - VCVTR_64_to_i(SCRATCH_F32_1, SCRATCH_F64_2); - VCVT_64_from_i(SCRATCH_F64_2, SCRATCH_F32_1); - VMUL64(SCRATCH_F64_1, SCRATCH_F64_2, s); - VSUB64(d, d, SCRATCH_F64_1); + VDIV64_ddd(SCRATCH_F64_2, d, s); + VCVTR64toI_sd(SCRATCH_F32_1, SCRATCH_F64_2); + VCVTIto64_ds(SCRATCH_F64_2, SCRATCH_F32_1); + VMUL64_ddd(SCRATCH_F64_1, SCRATCH_F64_2, s); + VSUB64_ddd(d, d, SCRATCH_F64_1); - VMRS(REG_WORK2); + VMRS_r(REG_WORK2); UBFX_rrii(REG_WORK1, REG_WORK1, 22, 2); BFI_rrii(REG_WORK2, REG_WORK1, 22, 2); - VMSR(REG_WORK2); + VMSR_r(REG_WORK2); } LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fsglmul_rr,(FRW d, FR s)) { - VCVT_64_to_32(SCRATCH_F32_1, d); - VCVT_64_to_32(SCRATCH_F32_2, s); - VMUL32(SCRATCH_F32_1, SCRATCH_F32_1, SCRATCH_F32_2); - VCVT_32_to_64(d, SCRATCH_F32_1); + VCVT64to32_sd(SCRATCH_F32_1, d); + VCVT64to32_sd(SCRATCH_F32_2, s); + VMUL32_sss(SCRATCH_F32_1, SCRATCH_F32_1, SCRATCH_F32_2); + VCVT32to64_ds(d, SCRATCH_F32_1); } LENDFUNC(NONE,NONE,2,raw_fsglmul_rr,(FRW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fmovs_rr,(FW d, FR s)) { - VCVT_64_to_32(SCRATCH_F32_1, s); - VCVT_32_to_64(d, SCRATCH_F32_1); + VCVT64to32_sd(SCRATCH_F32_1, s); + VCVT32to64_ds(d, SCRATCH_F32_1); } LENDFUNC(NONE,NONE,2,raw_fmovs_rr,(FW d, FR s)) LOWFUNC(NONE,NONE,3,raw_ffunc_rr,(double (*func)(double), FW d, FR s)) { - VMOV64_rr(0, s); + VMOV64_dd(0, s); MOVW_ri16(REG_WORK1, (uae_u32)func); MOVT_ri16(REG_WORK1, ((uae_u32)func) >> 16); @@ -1374,7 +1375,7 @@ LOWFUNC(NONE,NONE,3,raw_ffunc_rr,(double (*func)(double), FW d, FR s)) BLX_r(REG_WORK1); POP(RLR_INDEX); - VMOV64_rr(d, 0); + VMOV64_dd(d, 0); } LENDFUNC(NONE,NONE,3,raw_ffunc_rr,(double (*func)(double), FW d, FR s)) @@ -1383,12 +1384,12 @@ LOWFUNC(NONE,NONE,3,raw_fpowx_rr,(uae_u32 x, FW d, FR s)) double (*func)(double,double) = pow; if(x == 2) { - VMOV64_i(0, 0x0, 0x0); // load imm #2 into first reg + VMOV_F64_dimmF(0, 0x00); // load imm #2 into first reg } else { - VMOV64_i(0, 0x2, 0x4); // load imm #10 into first reg + VMOV_F64_dimmF(0, 0x24); // load imm #10 into first reg } - VMOV64_rr(1, s); + VMOV64_dd(1, s); MOVW_ri16(REG_WORK1, (uae_u32)func); MOVT_ri16(REG_WORK1, ((uae_u32)func) >> 16); @@ -1397,12 +1398,227 @@ LOWFUNC(NONE,NONE,3,raw_fpowx_rr,(uae_u32 x, FW d, FR s)) BLX_r(REG_WORK1); POP(RLR_INDEX); - VMOV64_rr(d, 0); + VMOV64_dd(d, 0); } LENDFUNC(NONE,NONE,3,raw_fpowx_rr,(uae_u32 x, FW d, FR s)) +LOWFUNC(NONE,WRITE,2,raw_fp_from_exten_mr,(RR4 adr, FR s)) +{ + uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) ®s; + + VMOVi_to_ARM_rd(REG_WORK2, s, 1); // get high part of double + VCMP64_d0(s); + VMRS_CPSR(); + BEQ_i(22); // iszero + + UBFX_rrii(REG_WORK3, REG_WORK2, 20, 11); // get exponent + MOVW_ri16(REG_WORK1, 2047); + CMP_rr(REG_WORK3, REG_WORK1); + BEQ_i(15); // isnan + + MOVW_ri16(REG_WORK1, 15360); // diff of bias between double and long double + ADD_rrr(REG_WORK3, REG_WORK3, REG_WORK1); // exponent done + AND_rri(REG_WORK2, REG_WORK2, 0x80000000); // extract sign + ORR_rrrLSLi(REG_WORK3, REG_WORK2, REG_WORK3, 16); // merge sign and exponent + + LDR_rRI(REG_WORK1, R_REGSTRUCT, offs); + ADD_rrr(REG_WORK1, adr, REG_WORK1); + + REV_rr(REG_WORK3, REG_WORK3); + STR_rR(REG_WORK3, REG_WORK1); // write exponent + + VSHL64_ddi(SCRATCH_F64_1, s, 11); // shift mantissa to correct position + VMOV64_rrd(REG_WORK3, REG_WORK2, SCRATCH_F64_1); + ORR_rri(REG_WORK2, REG_WORK2, 0x80000000); // insert explicit 1 + REV_rr(REG_WORK2, REG_WORK2); + REV_rr(REG_WORK3, REG_WORK3); + STR_rRI(REG_WORK2, REG_WORK1, 4); + STR_rRI(REG_WORK3, REG_WORK1, 8); + B_i(10); // end_of_op + +// isnan + MOVW_ri16(REG_WORK2, 0x7fff); + LSL_rri(REG_WORK2, REG_WORK2, 16); + MVN_ri(REG_WORK3, 0); + +// iszero + CC_AND_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, 0x80000000); // extract sign + CC_MOV_ri(NATIVE_CC_EQ, REG_WORK3, 0); + + LDR_rRI(REG_WORK1, R_REGSTRUCT, offs); + ADD_rrr(REG_WORK1, adr, REG_WORK1); + + REV_rr(REG_WORK2, REG_WORK2); + STR_rR(REG_WORK2, REG_WORK1); + STR_rRI(REG_WORK3, REG_WORK1, 4); + STR_rRI(REG_WORK3, REG_WORK1, 8); + +// end_of_op + +} +LENDFUNC(NONE,WRITE,2,raw_fp_from_exten_mr,(RR4 adr, FR s)) + +LOWFUNC(NONE,READ,2,raw_fp_to_exten_rm,(FW d, RR4 adr)) +{ + uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) ®s; + + LDR_rRI(REG_WORK1, R_REGSTRUCT, offs); + ADD_rrr(REG_WORK1, adr, REG_WORK1); + + LDR_rRI(REG_WORK2, REG_WORK1, 4); + LDR_rRI(REG_WORK3, REG_WORK1, 8); + REV_rr(REG_WORK2, REG_WORK2); + REV_rr(REG_WORK3, REG_WORK3); + BIC_rri(REG_WORK2, REG_WORK2, 0x80000000); // clear explicit 1 + VMOV64_drr(d, REG_WORK3, REG_WORK2); + + LDR_rR(REG_WORK2, REG_WORK1); + REV_rr(REG_WORK2, REG_WORK2); + LSR_rri(REG_WORK2, REG_WORK2, 16); // exponent now in lower half + MOVW_ri16(REG_WORK3, 0x7fff); + ANDS_rrr(REG_WORK3, REG_WORK3, REG_WORK2); + BNE_i(9); // not_zero + VCMP64_d0(d); + VMRS_CPSR(); + BNE_i(6); // not zero +// zero + VMOV_I64_dimmI(d, 0x00); + TST_ri(REG_WORK2, 0x8000); // check sign + BEQ_i(12); // end_of_op + MOV_ri(REG_WORK2, 0x80000000); + MOV_ri(REG_WORK3, 0); + VMOV64_drr(d, REG_WORK3, REG_WORK2); + B_i(8); // end_of_op + +// not_zero + MOVW_ri16(REG_WORK1, 15360); // diff of bias between double and long double + SUB_rrr(REG_WORK3, REG_WORK3, REG_WORK1); // exponent done, ToDo: check for carry -> result gets Inf in double + UBFX_rrii(REG_WORK2, REG_WORK2, 15, 1); // extract sign + BFI_rrii(REG_WORK3, REG_WORK2, 11, 1); // insert sign + VSHR64_ddi(d, d, 11); // shift mantissa to correct position + LSL_rri(REG_WORK3, REG_WORK3, 20); + VMOV_I64_dimmI(0, 0x00); + VMOVi_from_ARM_dr(0, REG_WORK3, 1); + VORR_ddd(d, d, 0); +// end_of_op + +} +LENDFUNC(NONE,READ,2,raw_fp_from_exten_mr,(FW d, RR4 adr)) + STATIC_INLINE void raw_fflags_into_flags(int r) { - VCMP64_0(r); - VMRS(15); // special case: move flags from FPSCR to APSR_nzcv + VCMP64_d0(r); + VMRS_CPSR(); } + +LOWFUNC(NONE,NONE,2,raw_fp_fscc_ri,(RW4 d, int cc)) +{ + switch (cc) { + case NATIVE_CC_F_NEVER: + BIC_rri(d, d, 0xff); + break; + + case NATIVE_CC_NE: // Set if not equal + CC_BIC_rri(NATIVE_CC_EQ, d, d, 0xff); // do not set if equal + CC_ORR_rri(NATIVE_CC_NE, d, d, 0xff); + break; + + case NATIVE_CC_EQ: // Set if equal + CC_BIC_rri(NATIVE_CC_NE, d, d, 0xff); // do not set if not equal + CC_ORR_rri(NATIVE_CC_EQ, d, d, 0xff); + break; + + case NATIVE_CC_F_OGT: // Set if valid and greater than + BVS_i(2); // do not set if NaN + BLE_i(1); // do not set if less or equal + ORR_rri(d, d, 0xff); + B_i(0); + BIC_rri(d, d, 0xff); + break; + + case NATIVE_CC_F_OGE: // Set if valid and greater or equal + BVS_i(2); // do not set if NaN + BCC_i(1); // do not set if carry cleared + ORR_rri(d, d, 0xff); + B_i(0); + BIC_rri(d, d, 0xff); + break; + + case NATIVE_CC_F_OLT: // Set if vaild and less than + BVS_i(2); // do not set if NaN + BCS_i(1); // do not set if carry set + ORR_rri(d, d, 0xff); + B_i(0); + BIC_rri(d, d, 0xff); + break; + + case NATIVE_CC_F_OLE: // Set if valid and less or equal + BVS_i(2); // do not set if NaN + BGT_i(1); // do not set if greater than + ORR_rri(d, d, 0xff); + B_i(0); + BIC_rri(d, d, 0xff); + break; + + case NATIVE_CC_F_OGL: // Set if valid and greator or less + BVS_i(2); // do not set if NaN + BEQ_i(1); // do not set if equal + ORR_rri(d, d, 0xff); + B_i(0); + BIC_rri(d, d, 0xff); + break; + + case NATIVE_CC_F_OR: // Set if valid + CC_BIC_rri(NATIVE_CC_VS, d, d, 0xff); // do not set if NaN + CC_ORR_rri(NATIVE_CC_VC, d, d, 0xff); + break; + + case NATIVE_CC_F_UN: // Set if NAN + CC_BIC_rri(NATIVE_CC_VC, d, d, 0xff); // do not set if valid + CC_ORR_rri(NATIVE_CC_VS, d, d, 0xff); + break; + + case NATIVE_CC_F_UEQ: // Set if NAN or equal + BVS_i(0); // set if NaN + BNE_i(1); // do not set if greater or less + ORR_rri(d, d, 0xff); + B_i(0); + BIC_rri(d, d, 0xff); + break; + + case NATIVE_CC_F_UGT: // Set if NAN or greater than + BVS_i(0); // set if NaN + BLS_i(1); // do not set if lower or same + ORR_rri(d, d, 0xff); + B_i(0); + BIC_rri(d, d, 0xff); + break; + + case NATIVE_CC_F_UGE: // Set if NAN or greater or equal + BVS_i(0); // set if NaN + BMI_i(1); // do not set if lower + ORR_rri(d, d, 0xff); + B_i(0); + BIC_rri(d, d, 0xff); + break; + + case NATIVE_CC_F_ULT: // Set if NAN or less than + BVS_i(0); // set if NaN + BGE_i(1); // do not set if greater or equal + ORR_rri(d, d, 0xff); + B_i(0); + BIC_rri(d, d, 0xff); + break; + + case NATIVE_CC_F_ULE: // Set if NAN or less or equal + BVS_i(0); // set if NaN + BGT_i(1); // do not set if greater + ORR_rri(d, d, 0xff); + B_i(0); + BIC_rri(d, d, 0xff); + break; + } +} +LENDFUNC(NONE,NONE,2,raw_fp_fscc_ri,(RW4 d, int cc)) + +#endif // USE_JIT_FPU diff --git a/src/jit/codegen_arm.h b/src/jit/codegen_arm.h index a11c13f7..7e72901d 100644 --- a/src/jit/codegen_arm.h +++ b/src/jit/codegen_arm.h @@ -1317,6 +1317,9 @@ enum { #define CC_SXTAH_rrr(cc,Rd,Rn,Rm) _W(((cc) << 28) | (0x6b << 20) | (Rn << 16) | ((Rd) << 12) | (0x7 << 4) | SHIFT_REG(Rm)) #define SXTAH_rrr(Rd,Rn,Rm) CC_SXTAH_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define CC_STM_Ri(cc,Rn,i) _W(((cc) << 28) | (0x8 << 24) | (0x8 << 20) | ((Rn) << 16) | i) +#define STM_Ri(Rn,i) CC_STM_Ri(NATIVE_CC_AL,Rn,i) + // ARMv6T2 #ifdef ARMV6T2 @@ -1343,7 +1346,7 @@ enum { // Floatingpoint #define FADR_ADD(offs) ((1 << 23) | (offs) >> 2) #define FADR_SUB(offs) ((0 << 23) | (offs) >> 2) -#define FIMM8(offs) (offs >= 0 ? FADR_ADD(offs) : FADR_SUB(-offs)) +#define FOFFSET8(offs) (offs >= 0 ? FADR_ADD(offs) : FADR_SUB(-offs)) #define MAKE_Dd(Dd) (((Dd & 0x10) << 18) | ((Dd & 0x0f) << 12)) #define MAKE_Dm(Dm) (((Dm & 0x10) << 1) | ((Dm & 0x0f) << 0)) @@ -1353,133 +1356,131 @@ enum { #define MAKE_Sn(Sn) (((Sn & 0x01) << 7) | ((Sn & 0x1e) << 15)) -#define CC_VLDR64(cc,Dd,Rn,offs) _W(((cc) << 28) | (0xd << 24) | (0x1 << 20) | (Rn << 16) | (0xb << 8) | FIMM8(offs) | MAKE_Dd(Dd)) -#define VLDR64(Dd,Rn,offs) CC_VLDR64(NATIVE_CC_AL,Dd,Rn,offs) -#define CC_VLDR32(cc,Sd,Rn,offs) _W(((cc) << 28) | (0xd << 24) | (0x1 << 20) | (Rn << 16) | (0xa << 8) | FIMM8(offs) | MAKE_Sd(Sd)) -#define VLDR32(Sd,Rn,offs) CC_VLDR32(NATIVE_CC_AL,Sd,Rn,offs) +#define CC_VLDR64_dRi(cc,Dd,Rn,offs) _W(((cc) << 28) | (0xd << 24) | (0x1 << 20) | (Rn << 16) | (0xb << 8) | FOFFSET8(offs) | MAKE_Dd(Dd)) +#define VLDR64_dRi(Dd,Rn,offs) CC_VLDR64_dRi(NATIVE_CC_AL,Dd,Rn,offs) +#define CC_VLDR32_sRi(cc,Sd,Rn,offs) _W(((cc) << 28) | (0xd << 24) | (0x1 << 20) | (Rn << 16) | (0xa << 8) | FOFFSET8(offs) | MAKE_Sd(Sd)) +#define VLDR32_sRi(Sd,Rn,offs) CC_VLDR32_sRi(NATIVE_CC_AL,Sd,Rn,offs) -#define CC_VSTR64(cc,Dd,Rn,offs) _W(((cc) << 28) | (0xd << 24) | (0x0 << 20) | (Rn << 16) | (0xb << 8) | FIMM8(offs) | MAKE_Dd(Dd)) -#define VSTR64(Dd,Rn,offs) CC_VSTR64(NATIVE_CC_AL,Dd,Rn,offs) -#define CC_VSTR32(cc,Dd,Rn,offs) _W(((cc) << 28) | (0xd << 24) | (0x0 << 20) | (Rn << 16) | (0xa << 8) | FIMM8(offs) | MAKE_Dd(Dd)) -#define VSTR32(Dd,Rn,offs) CC_VSTR32(NATIVE_CC_AL,Dd,Rn,offs) +#define CC_VSTR64_dRi(cc,Dd,Rn,offs) _W(((cc) << 28) | (0xd << 24) | (0x0 << 20) | (Rn << 16) | (0xb << 8) | FOFFSET8(offs) | MAKE_Dd(Dd)) +#define VSTR64_dRi(Dd,Rn,offs) CC_VSTR64_dRi(NATIVE_CC_AL,Dd,Rn,offs) +#define CC_VSTR32_sRi(cc,Sd,Rn,offs) _W(((cc) << 28) | (0xd << 24) | (0x0 << 20) | (Rn << 16) | (0xa << 8) | FOFFSET8(offs) | MAKE_Sd(Sd)) +#define VSTR32_sRi(Sd,Rn,offs) CC_VSTR32_sRi(NATIVE_CC_AL,Sd,Rn,offs) -#define CC_VMOV64_rr(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm)) -#define VMOV64_rr(Dd,Dm) CC_VMOV64_rr(NATIVE_CC_AL,Dd,Dm) -#define CC_VMOV32_rr(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) -#define VMOV32_rr(Sd,Sm) CC_VMOV32_rr(NATIVE_CC_AL,Sd,Sm) +#define CC_VMOV64_dd(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm)) +#define VMOV64_dd(Dd,Dm) CC_VMOV64_dd(NATIVE_CC_AL,Dd,Dm) +#define CC_VMOV32_ss(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) +#define VMOV32_ss(Sd,Sm) CC_VMOV32_ss(NATIVE_CC_AL,Sd,Sm) -#define CC_VMOV32_to_ARM(cc,Rt,Sn) _W(((cc) << 28) | (0xe << 24) | (0x1 << 20) | (Rt << 12) | (0xa << 8) | (0x1 << 4) | MAKE_Sn(Sn)) -#define VMOV32_to_ARM(Rt,Sn) CC_VMOV32_to_ARM(NATIVE_CC_AL,Rt,Sn) -#define CC_VMOV32_from_ARM(cc,Sn,Rt) _W(((cc) << 28) | (0xe << 24) | (0x0 << 20) | (Rt << 12) | (0xa << 8) | (0x1 << 4) | MAKE_Sn(Sn)) -#define VMOV32_from_ARM(Sn,Rt) CC_VMOV32_from_ARM(NATIVE_CC_AL,Sn,Rt) +#define CC_VMOV32_rs(cc,Rt,Sn) _W(((cc) << 28) | (0xe << 24) | (0x1 << 20) | (Rt << 12) | (0xa << 8) | (0x1 << 4) | MAKE_Sn(Sn)) +#define VMOV32_rs(Rt,Sn) CC_VMOV32_rs(NATIVE_CC_AL,Rt,Sn) +#define CC_VMOV32_sr(cc,Sn,Rt) _W(((cc) << 28) | (0xe << 24) | (0x0 << 20) | (Rt << 12) | (0xa << 8) | (0x1 << 4) | MAKE_Sn(Sn)) +#define VMOV32_sr(Sn,Rt) CC_VMOV32_sr(NATIVE_CC_AL,Sn,Rt) -#define CC_VMOVi_from_ARM(cc,Dn,Rt) _W(((cc) << 28) | (0xe << 24) | (0x0 << 20) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dn(Dn)) -#define VMOVi_from_ARM(Dn,Rt) CC_VMOVi_from_ARM(NATIVE_CC_AL,Dn,Rt) -#define CC_VMOVi_to_ARM(cc,Rt,Dn) _W(((cc) << 28) | (0xe << 24) | (0x1 << 20) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dn(Dn)) -#define VMOVi_to_ARM(Rt,Dn) CC_VMOVi_to_ARM(NATIVE_CC_AL,Rt,Dn) +#define CC_VMOVi_from_ARM_dr(cc,Dn,Rt,x) _W(((cc) << 28) | (0xe << 24) | (0x0 << 20) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | ((x) << 21) | MAKE_Dn(Dn)) +#define VMOVi_from_ARM_dr(Dn,Rt,x) CC_VMOVi_from_ARM_dr(NATIVE_CC_AL,Dn,Rt,x) +#define CC_VMOVi_to_ARM_rd(cc,Rt,Dn,x) _W(((cc) << 28) | (0xe << 24) | (0x1 << 20) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | ((x) << 21) | MAKE_Dn(Dn)) +#define VMOVi_to_ARM_rd(Rt,Dn,x) CC_VMOVi_to_ARM_rd(NATIVE_CC_AL,Rt,Dn,x) -#define CC_VMOV64_to_ARM(cc,Rt,Rt2,Dm) _W(((cc) << 28) | (0xc << 24) | (0x5 << 20) | (Rt2 << 16) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dm(Dm)) -#define VMOV64_to_ARM(Rt,Rt2,Dm) CC_VMOV64_to_ARM(NATIVE_CC_AL,Rt,Rt2,Dm) -#define CC_VMOV64_from_ARM(cc,Dm,Rt,Rt2) _W(((cc) << 28) | (0xc << 24) | (0x4 << 20) | (Rt2 << 16) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dm(Dm)) -#define VMOV64_from_ARM(Dm,Rt,Rt2) CC_VMOV64_from_ARM(NATIVE_CC_AL,Dm,Rt,Rt2) +#define CC_VMOV64_rrd(cc,Rt,Rt2,Dm) _W(((cc) << 28) | (0xc << 24) | (0x5 << 20) | (Rt2 << 16) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dm(Dm)) +#define VMOV64_rrd(Rt,Rt2,Dm) CC_VMOV64_rrd(NATIVE_CC_AL,Rt,Rt2,Dm) +#define CC_VMOV64_drr(cc,Dm,Rt,Rt2) _W(((cc) << 28) | (0xc << 24) | (0x4 << 20) | (Rt2 << 16) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dm(Dm)) +#define VMOV64_drr(Dm,Rt,Rt2) CC_VMOV64_drr(NATIVE_CC_AL,Dm,Rt,Rt2) -#define CC_VCVT_64_to_32(cc,Sd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x7 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Dm(Dm)) -#define VCVT_64_to_32(Sd,Dm) CC_VCVT_64_to_32(NATIVE_CC_AL,Sd,Dm) -#define CC_VCVT_32_to_64(cc,Dd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x7 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Sm(Sm)) -#define VCVT_32_to_64(Dd,Sm) CC_VCVT_32_to_64(NATIVE_CC_AL,Dd,Sm) +#define CC_VCVT64to32_sd(cc,Sd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x7 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Dm(Dm)) +#define VCVT64to32_sd(Sd,Dm) CC_VCVT64to32_sd(NATIVE_CC_AL,Sd,Dm) +#define CC_VCVT32to64_ds(cc,Dd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x7 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Sm(Sm)) +#define VCVT32to64_ds(Dd,Sm) CC_VCVT32to64_ds(NATIVE_CC_AL,Dd,Sm) -#define CC_VCVTR_64_to_i(cc,Sd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xb << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Dm(Dm)) -#define VCVTR_64_to_i(Sd,Dm) CC_VCVTR_64_to_i(NATIVE_CC_AL,Sd,Dm) -#define CC_VCVTR_32_to_i(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) -#define VCVTR_32_to_i(Sd,Sm) CC_VCVTR_32_to_i(NATIVE_CC_AL,Sd,Sm) +#define CC_VCVTR64toI_sd(cc,Sd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xb << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Dm(Dm)) +#define VCVTR64toI_sd(Sd,Dm) CC_VCVTR64toI_sd(NATIVE_CC_AL,Sd,Dm) +#define CC_VCVTR32toI_ss(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) +#define VCVTR32toI_ss(Sd,Sm) CC_VCVTR32toI_ss(NATIVE_CC_AL,Sd,Sm) -#define CC_VCVT_64_to_i(cc,Sd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xb << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Dm(Dm)) -#define VCVT_64_to_i(Sd,Dm) CC_VCVT_64_to_i(NATIVE_CC_AL,Sd,Dm) -#define CC_VCVT_32_to_i(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) -#define VCVT_32_to_i(Sd,Sm) CC_VCVT_32_to_i(NATIVE_CC_AL,Sd,Sm) +#define CC_VCVT64toI_sd(cc,Sd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xb << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Dm(Dm)) +#define VCVT64toI_sd(Sd,Dm) CC_VCVT64toI_sd(NATIVE_CC_AL,Sd,Dm) +#define CC_VCVT32toI_ss(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) +#define VCVT32toI_ss(Sd,Sm) CC_VCVT32toI_ss(NATIVE_CC_AL,Sd,Sm) -#define CC_VCVT_64_from_i(cc,Dd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x8 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Sm(Sm)) -#define VCVT_64_from_i(Dd,Sm) CC_VCVT_64_from_i(NATIVE_CC_AL,Dd,Sm) -#define CC_VCVT_32_from_i(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x8 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) -#define VCVT_32_from_i(Sd,Sm) CC_VCVT_32_from_i(NATIVE_CC_AL,Dd,Sm) +#define CC_VCVTIto64_ds(cc,Dd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x8 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Sm(Sm)) +#define VCVTIto64_ds(Dd,Sm) CC_VCVTIto64_ds(NATIVE_CC_AL,Dd,Sm) +#define CC_VCVTIto32_ss(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x8 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) +#define VCVTIto32_ss(Sd,Sm) CC_VCVTIto32_ss(NATIVE_CC_AL,Dd,Sm) -#define CC_VMOV_rr64(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm)) -#define VMOV_rr64(Dd,Dm) CC_VMOV_rr64(NATIVE_CC_AL,Dd,Dm) -#define CC_VMOV_rr32(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) -#define VMOV_rr32(Sd,Sm) CC_VMOV_rr32(NATIVE_CC_AL,Sd,Sm) +#define CC_VADD64_ddd(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xb << 8) | (0x0 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm)) +#define VADD64_ddd(Dd,Dn,Dm) CC_VADD64_ddd(NATIVE_CC_AL,Dd,Dn,Dm) +#define CC_VADD32_sss(cc,Sd,Sn,Sm) _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xa << 8) | (0x0 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm)) +#define VADD32_sss(Sd,Sn,Sm) CC_VADD32_sss(NATIVE_CC_AL,Sd,Sn,Sm) -#define CC_VADD64(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xb << 8) | (0x0 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm)) -#define VADD64(Dd,Dn,Dm) CC_VADD64(NATIVE_CC_AL,Dd,Dn,Dm) -#define CC_VADD32(cc,Sd,Sn,Sm) _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xa << 8) | (0x0 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm)) -#define VADD32(Sd,Sn,Sm) CC_VADD32(NATIVE_CC_AL,Sd,Sn,Sm) +#define CC_VSUB64_ddd(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm)) +#define VSUB64_ddd(Dd,Dn,Dm) CC_VSUB64_ddd(NATIVE_CC_AL,Dd,Dn,Dm) +#define CC_VSUB32_sss(cc,Sd,Sn,Sm) _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm)) +#define VSUB32_sss(Sd,Sn,Sm) CC_VSUB32_sss(NATIVE_CC_AL,Sd,Sn,Sm) -#define CC_VSUB64(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm)) -#define VSUB64(Dd,Dn,Dm) CC_VSUB64(NATIVE_CC_AL,Dd,Dn,Dm) -#define CC_VSUB32(cc,Sd,Sn,Sm) _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm)) -#define VSUB32(Sd,Sn,Sm) CC_VSUB32(NATIVE_CC_AL,Sd,Sn,Sm) +#define CC_VMUL64_ddd(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0xe << 24) | (0x2 << 20) | (0xb << 8) | (0x0 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm)) +#define VMUL64_ddd(Dd,Dn,Dm) CC_VMUL64_ddd(NATIVE_CC_AL,Dd,Dn,Dm) +#define CC_VMUL32_sss(cc,Sd,Sn,Sm) _W(((cc) << 28) | (0xe << 24) | (0x2 << 20) | (0xa << 8) | (0x0 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm)) +#define VMUL32_sss(Sd,Sn,Sm) CC_VMUL32_sss(NATIVE_CC_AL,Sd,Sn,Sm) -#define CC_VMUL64(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0xe << 24) | (0x2 << 20) | (0xb << 8) | (0x0 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm)) -#define VMUL64(Dd,Dn,Dm) CC_VMUL64(NATIVE_CC_AL,Dd,Dn,Dm) -#define CC_VMUL32(cc,Sd,Sn,Sm) _W(((cc) << 28) | (0xe << 24) | (0x2 << 20) | (0xa << 8) | (0x0 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm)) -#define VMUL32(Sd,Sn,Sm) CC_VMUL32(NATIVE_CC_AL,Sd,Sn,Sm) +#define CC_VDIV64_ddd(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0xe << 24) | (0x8 << 20) | (0xb << 8) | (0x0 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm)) +#define VDIV64_ddd(Dd,Dn,Dm) CC_VDIV64_ddd(NATIVE_CC_AL,Dd,Dn,Dm) +#define CC_VDIV32_sss(cc,Sd,Sn,Sm) _W(((cc) << 28) | (0xe << 24) | (0x8 << 20) | (0xa << 8) | (0x0 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm)) +#define VDIV32_sss(Sd,Sn,Sm) CC_VDIV32_sss(NATIVE_CC_AL,Sd,Sn,Sm) -#define CC_VDIV64(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0xe << 24) | (0x8 << 20) | (0xb << 8) | (0x0 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm)) -#define VDIV64(Dd,Dn,Dm) CC_VDIV64(NATIVE_CC_AL,Dd,Dn,Dm) -#define CC_VDIV32(cc,Sd,Sn,Sm) _W(((cc) << 28) | (0xe << 24) | (0x8 << 20) | (0xa << 8) | (0x0 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm)) -#define VDIV32(Sd,Sn,Sm) CC_VDIV32(NATIVE_CC_AL,Sd,Sn,Sm) +#define CC_VABS64_dd(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm)) +#define VABS64_dd(Dd,Dm) CC_VABS64_dd(NATIVE_CC_AL,Dd,Dm) +#define CC_VABS32_ss(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) +#define VABS32_ss(Sd,Sm) CC_VABS32_ss(NATIVE_CC_AL,Sd,Sm) -#define CC_VABS64(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm)) -#define VABS64(Dd,Dm) CC_VABS64(NATIVE_CC_AL,Dd,Dm) -#define CC_VABS32(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) -#define VABS32(Sd,Sm) CC_VABS32(NATIVE_CC_AL,Sd,Sm) +#define CC_VNEG64_dd(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm)) +#define VNEG64_dd(Dd,Dm) CC_VNEG64_dd(NATIVE_CC_AL,Dd,Dm) +#define CC_VNEG32_ss(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) +#define VNEG32_ss(Sd,Sm) CC_VNEG32_ss(NATIVE_CC_AL,Sd,Sm) -#define CC_VNEG64(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm)) -#define VNEG64(Dd,Dm) CC_VNEG64(NATIVE_CC_AL,Dd,Dm) -#define CC_VNEG32(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) -#define VNEG32(Sd,Sm) CC_VNEG32(NATIVE_CC_AL,Sd,Sm) +#define CC_VSQRT64_dd(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm)) +#define VSQRT64_dd(Dd,Dm) CC_VSQRT64_dd(NATIVE_CC_AL,Dd,Dm) +#define CC_VSQRT32_ss(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) +#define VSQRT32_ss(Sd,Sm) CC_VSQRT32_ss(NATIVE_CC_AL,Sd,Sm) -#define CC_VSQRT64(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm)) -#define VSQRT64(Dd,Dm) CC_VSQRT64(NATIVE_CC_AL,Dd,Dm) -#define CC_VSQRT32(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) -#define VSQRT32(Sd,Sm) CC_VSQRT32(NATIVE_CC_AL,Sd,Sm) +#define CC_VCMP64_dd(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x4 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm)) +#define VCMP64_dd(Dd,Dm) CC_VCMP64_dd(NATIVE_CC_AL,Dd,Dm) +#define CC_VCMP32_ss(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x4 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) +#define VCMP32_ss(Sd,Sm) CC_VCMP32_ss(NATIVE_CC_AL,Sd,Sm) -#define CC_VCMP64(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x4 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm)) -#define VCMP64(Dd,Dm) CC_VCMP64(NATIVE_CC_AL,Dd,Dm) -#define CC_VCMP32(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x4 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm)) -#define VCMP32(Sd,Sm) CC_VCMP32(NATIVE_CC_AL,Sd,Sm) +#define CC_VCMP64_d0(cc,Dd) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x5 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd)) +#define VCMP64_d0(Dd) CC_VCMP64_d0(NATIVE_CC_AL,Dd) -#define CC_VCMP64_0(cc,Dd) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x5 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd)) -#define VCMP64_0(Dd) CC_VCMP64_0(NATIVE_CC_AL,Dd) +#define CC_VMRS_r(cc,Rt) _W(((cc) << 28) | (0xe << 24) | (0xf << 20) | (0x1 << 16) | (Rt << 12) | (0xa << 8) | (0x1 << 4)) +#define VMRS_r(Rt) CC_VMRS_r(NATIVE_CC_AL,Rt) +#define VMRS_CPSR() VMRS_r(15) -#define CC_VTST64(cc,Dd) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x5 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd)) -#define VTST64(Dd) CC_VTST64(NATIVE_CC_AL,Dd) -#define CC_VTST32(cc,Sd) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x5 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd)) -#define VTST32(Sd) CC_VTST32(NATIVE_CC_AL,Sd) +#define CC_VMSR_r(cc,Rt) _W(((cc) << 28) | (0xe << 24) | (0xe << 20) | (0x1 << 16) | (Rt << 12) | (0xa << 8) | (0x1 << 4)) +#define VMSR_r(Rt) CC_VMSR_r(NATIVE_CC_AL,Rt) -#define CC_VMRS(cc,Rt) _W(((cc) << 28) | (0xe << 24) | (0xf << 20) | (0x1 << 16) | (Rt << 12) | (0xa << 8) | (0x1 << 4)) -#define VMRS(Rt) CC_VMRS(NATIVE_CC_AL,Rt) +// Immediate values for VBIC, VMOV (I32), VMVN (I32) and VORR +#define FIMMVAL(imm) (((imm & 0x80) << 17) | ((imm & 0x70) << 12) | ((imm & 0x0f) << 0)) +#define FIMM32(imm) ((imm & 0xffffff00) == 0 ? (FIMMVAL(imm >> 0) | (0x0 << 8)) : \ + (imm & 0xffff00ff) == 0 ? (FIMMVAL(imm >> 8) | (0x1 << 8)) : \ + (imm & 0xff00ffff) == 0 ? (FIMMVAL(imm >> 16) | (0x2 << 8)) : \ + (FIMMVAL(imm >> 24) | (0x3 << 8)) -#define CC_VMSR(cc,Rt) _W(((cc) << 28) | (0xe << 24) | (0xe << 20) | (0x1 << 16) | (Rt << 12) | (0xa << 8) | (0x1 << 4)) -#define VMSR(Rt) CC_VMSR(NATIVE_CC_AL,Rt) +// VMOV I64: each bit of imm defines the value for an entire byte +// imm -> aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh +#define VMOV_I64_dimmI(Dd,imm) _W((0xf << 28) | (0x2 << 24) | (0x8 << 20) | (0x0 << 16) | (0xe << 8) | (0x3 << 4) | MAKE_Dd(Dd) | FIMMVAL(imm)) -#define CC_VMOV64_i(cc,Dd,imm4H,imm4L) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (imm4H << 16) | (0xb << 8) | (imm4L) | MAKE_Dd(Dd)) -#define VMOV64_i(Dd,imm4H,imm4L) CC_VMOV64_i(NATIVE_CC_AL,Dd,imm4H,imm4L) +// VMOV F64: imm -> aBbbbbbb bbcdefgh 00000000 00000000 00000000 00000000 00000000 00000000 (B = not b) +#define FIMMF64(imm) (((imm & 0xf0) << 12) | ((imm & 0x0f) << 0)) +#define CC_VMOV_F64_dimmF(cc,Dd,imm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xb << 8) | MAKE_Dd(Dd) | FIMMF64(imm)) +#define VMOV_F64_dimmF(Dd,imm) CC_VMOV_F64_dimmF(NATIVE_CC_AL,Dd,imm) -// Floatingpoint used by non FPU JIT -#define CC_VMOV_sr(cc,Sd,Rn) _W(((cc) << 28) | (0x70 << 21) | (0 << 20) | (Sd << 16) | (Rn << 12) | (0x0a << 8) | (0x10)) -#define VMOV_sr(Sd,Rn) CC_VMOV_sr(NATIVE_CC_AL,Sd,Rn) +// VMOV F32: imm -> aBbbbbbc defgh000 00000000 00000000 (B = not b) +#define CC_VMOV_F32_si(cc,Sd,imm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xa << 8) | MAKE_Sd(Sd) | FIMMF64(imm)) +#define VMOV_F32_si(Sd,imm) CC_VMOV_F32_si(NATIVE_CC_AL,sd,imm) -#define CC_VMOV_rs(cc,Rd,Sn) _W(((cc) << 28) | (0x70 << 21) | (1 << 20) | (Sn << 16) | (Rd << 12) | (0x0a << 8) | (0x10)) -#define VMOV_rs(Rd,Sn) CC_VMOV_rs(NATIVE_CC_AL,Rd,Sn) - -#define CC_VCVT_f64_u32(cc,Dd,Sn) _W(((cc) << 28) | (0x1d << 23) | (0x7 << 19) | (0x0 << 16) | (Dd << 12) | (0xb << 8) | (0x4 << 4) | (Sn)) -#define VCVT_f64_u32(Dd,Sn) CC_VCVT_f64_u32(NATIVE_CC_AL,Dd,Sn) - -#define CC_VCVT_u32_f64(cc,Sd,Dn) _W(((cc) << 28) | (0x1d << 23) | (0x7 << 19) | (0x4 << 16) | (Sd << 12) | (0xb << 8) | (0xc << 4) | (Dn)) -#define VCVT_u32_f64(Sd,Dn) CC_VCVT_u32_f64(NATIVE_CC_AL,Sd,Dn) - -#define CC_VDIV_ddd(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0x1d << 23) | (0x0 << 20) | (Dn << 16) | (Dd << 12) | (0xb << 8) | (0x0 << 4) | (Dm)) -#define VDIV_ddd(Dd,Dn,Dm) CC_VDIV_ddd(NATIVE_CC_AL,Dd,Dn,Dm) +// Immediate value for shift +#define FIMM6(imm) ((imm) << 16) +#define VSHL64_ddi(Dd,Dm,imm) _W((0xf << 28) | (0x2 << 24) | (0x8 << 20) | (0x5 << 8) | (0x9 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm) | FIMM6(imm)) +#define VSHR64_ddi(Dd,Dm,imm) _W((0xf << 28) | (0x3 << 24) | (0x8 << 20) | (0x0 << 8) | (0x9 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm) | FIMM6(64-imm)) +#define VORR_ddd(Dd,Dn,Dm) _W((0xf << 28) | (0x2 << 24) | (0x2 << 20) | (0x1 << 8) | (0x1 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dn) | MAKE_Dm(Dm)) #endif /* ARM_RTASM_H */ diff --git a/src/jit/compemu.h b/src/jit/compemu.h index fac4bcf9..eaaad979 100644 --- a/src/jit/compemu.h +++ b/src/jit/compemu.h @@ -90,7 +90,7 @@ typedef union { #define BYTES_PER_INST 10240 /* paranoid ;-) */ #if defined(CPU_arm) -#define LONGEST_68K_INST 128 /* The number of bytes the longest possible +#define LONGEST_68K_INST 256 /* The number of bytes the longest possible 68k instruction takes */ #else #define LONGEST_68K_INST 16 /* The number of bytes the longest possible @@ -127,7 +127,7 @@ typedef union { #else #define N_REGS 8 /* really only 7, but they are numbered 0,1,2,3,5,6,7 */ #endif -#define N_FREGS 16 // We use 16 regs: 0 - FP_RESULT, 1-3 - SCRATCH, 4-7 - ???, 8-15 - Amiga regs FP0-FP7 +#define N_FREGS 16 // We use 10 regs: 6 - FP_RESULT, 7 - SCRATCH, 8-15 - Amiga regs FP0-FP7 /* Functions exposed to newcpu, or to what was moved from newcpu.c to @@ -140,8 +140,6 @@ extern void set_target(uae_u8* t); extern void freescratch(void); extern void build_comp(void); extern void set_cache_state(int enabled); -extern int get_cache_state(void); -extern uae_u32 get_jitted_size(void); #ifdef JIT extern void flush_icache(int n); extern void flush_icache_hard(int n); @@ -166,7 +164,7 @@ extern uae_u8* comp_pc_p; extern void* pushall_call_handler; #define VREGS 32 -#define VFREGS 16 +#define VFREGS 10 #define INMEM 1 #define CLEAN 2 @@ -224,8 +222,6 @@ STATIC_INLINE int end_block(uae_u16 opcode) #define FP_RESULT 8 #define FS1 9 -#define FS2 10 -#define FS3 11 #define SCRATCH_F64_1 1 #define SCRATCH_F64_2 2 @@ -326,6 +322,7 @@ extern void readlong(int address, int dest, int tmp); extern void writebyte(int address, int source, int tmp); extern void writeword(int address, int source, int tmp); extern void writelong(int address, int source, int tmp); +extern void writeword_clobber(int address, int source, int tmp); extern void writelong_clobber(int address, int source, int tmp); extern void get_n_addr(int address, int dest, int tmp); extern void get_n_addr_jmp(int address, int dest, int tmp); diff --git a/src/jit/compemu_fpp.cpp b/src/jit/compemu_fpp.cpp index 91606382..80118207 100644 --- a/src/jit/compemu_fpp.cpp +++ b/src/jit/compemu_fpp.cpp @@ -34,7 +34,7 @@ STATIC_INLINE int comp_fp_get (uae_u32 opcode, uae_u16 extra, int treg) int mode = (opcode >> 3) & 7; int size = (extra >> 10) & 7; - if ((size == 2 && (mode != 7 || reg != 4)) || size == 3 || size == 7) /* 2 = long double, 3 = packed decimal, 7 is not defined */ + if (size == 3 || size == 7) /* 3 = packed decimal, 7 is not defined */ return -1; switch (mode) { case 0: /* Dn */ @@ -180,6 +180,9 @@ STATIC_INLINE int comp_fp_get (uae_u32 opcode, uae_u16 extra, int treg) readlong (S1, S2, S3); fmov_s_rr (treg, S2); return 1; + case 2: /* Long Double */ + fp_to_exten_rm (treg, S1); + return 0; case 4: /* Word */ readword (S1, S2, S3); fmov_w_rr (treg, S2); @@ -208,7 +211,7 @@ STATIC_INLINE int comp_fp_put (uae_u32 opcode, uae_u16 extra) int mode = (opcode >> 3) & 7; int size = (extra >> 10) & 7; - if (size == 2 || size == 3 || size == 7) /* 2 = long double, 3 = packed decimal, 7 is not defined */ + if (size == 3 || size == 7) /* 3 = packed decimal, 7 is not defined */ return -1; switch (mode) { case 0: /* Dn */ @@ -282,9 +285,12 @@ STATIC_INLINE int comp_fp_put (uae_u32 opcode, uae_u16 extra) fmov_to_s_rr(S2, sreg); writelong_clobber (S1, S2, S3); return 0; + case 2:/* Long Double */ + fp_from_exten_mr (S1, sreg); + return 0; case 4: /* Word */ fmov_to_w_rr(S2, sreg); - writeword (S1, S2, S3); + writeword_clobber (S1, S2, S3); return 0; case 5: /* Double */ fmov_to_d_rrr(S2, S3, sreg); @@ -344,14 +350,45 @@ void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra) void comp_fscc_opp (uae_u32 opcode, uae_u16 extra) { - //printf("comp_fscc_opp() called (0x%04x, 0x%04x)\n", opcode, extra); + int reg; + if (!currprefs.compfpu) { FAIL (1); return; } - FAIL (1); - return; + if (extra & 0x20) { /* only cc from 00 to 1f are defined */ + FAIL (1); + return; + } + if ((opcode & 0x38) != 0) { /* We can only do to integer register */ + FAIL (1); + return; + } + + fflags_into_flags (); + reg = (opcode & 7); + + if (!(opcode & 0x38)) { + switch (extra & 0x0f) { /* according to fpp.c, the 0x10 bit is ignored */ + case 0: fp_fscc_ri(reg, NATIVE_CC_F_NEVER); break; + case 1: fp_fscc_ri(reg, NATIVE_CC_EQ); break; + case 2: fp_fscc_ri(reg, NATIVE_CC_F_OGT); break; + case 3: fp_fscc_ri(reg, NATIVE_CC_F_OGE); break; + case 4: fp_fscc_ri(reg, NATIVE_CC_F_OLT); break; + case 5: fp_fscc_ri(reg, NATIVE_CC_F_OLE); break; + case 6: fp_fscc_ri(reg, NATIVE_CC_F_OGL); break; + case 7: fp_fscc_ri(reg, NATIVE_CC_F_OR); break; + case 8: fp_fscc_ri(reg, NATIVE_CC_F_UN); break; + case 9: fp_fscc_ri(reg, NATIVE_CC_F_UEQ); break; + case 10: fp_fscc_ri(reg, NATIVE_CC_F_UGT); break; + case 11: fp_fscc_ri(reg, NATIVE_CC_F_UGE); break; + case 12: fp_fscc_ri(reg, NATIVE_CC_F_ULT); break; + case 13: fp_fscc_ri(reg, NATIVE_CC_F_ULE); break; + case 14: fp_fscc_ri(reg, NATIVE_CC_NE); break; + case 15: fp_fscc_ri(reg, NATIVE_CC_AL); break; + } + } } void comp_ftrapcc_opp (uae_u32 opcode, uaecptr oldpc) @@ -519,8 +556,122 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra) return; case 6: case 7: - FAIL (1); - return; + { + uae_u32 list = 0; + int incr = 0; + if (extra & 0x2000) { + int ad; + + /* FMOVEM FPP->memory */ + switch ((extra >> 11) & 3) { /* Get out early if failure */ + case 0: + case 2: + break; + case 1: + case 3: + default: + FAIL (1); + return; + } + ad = comp_fp_adr (opcode); + if (ad < 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + return; + } + switch ((extra >> 11) & 3) { + case 0: /* static pred */ + list = extra & 0xff; + incr = -1; + break; + case 2: /* static postinc */ + list = extra & 0xff; + incr = 1; + break; + case 1: /* dynamic pred */ + case 3: /* dynamic postinc */ + abort (); + } + if (incr < 0) { /* Predecrement */ + for (reg = 7; reg >= 0; reg--) { + if (list & 0x80) { + sub_l_ri (ad, 12); + fp_from_exten_mr (ad, reg); + } + list <<= 1; + } + } else { /* Postincrement */ + for (reg = 0; reg <= 7; reg++) { + if (list & 0x80) { + fp_from_exten_mr (ad, reg); + add_l_ri (ad, 12); + } + list <<= 1; + } + } + if ((opcode & 0x38) == 0x18) + mov_l_rr ((opcode & 7) + 8, ad); + if ((opcode & 0x38) == 0x20) + mov_l_rr ((opcode & 7) + 8, ad); + } else { + /* FMOVEM memory->FPP */ + + int ad; + switch ((extra >> 11) & 3) { /* Get out early if failure */ + case 0: + case 2: + break; + case 1: + case 3: + default: + FAIL (1); + return; + } + ad = comp_fp_adr (opcode); + if (ad < 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + return; + } + switch ((extra >> 11) & 3) { + case 0: /* static pred */ + list = extra & 0xff; + incr = -1; + break; + case 2: /* static postinc */ + list = extra & 0xff; + incr = 1; + break; + case 1: /* dynamic pred */ + case 3: /* dynamic postinc */ + abort (); + } + + if (incr < 0) { + // not reached + for (reg = 7; reg >= 0; reg--) { + if (list & 0x80) { + sub_l_ri (ad, 12); + fp_to_exten_rm(reg, ad); + } + list <<= 1; + } + } else { + for (reg = 0; reg <= 7; reg++) { + if (list & 0x80) { + fp_to_exten_rm(reg, ad); + add_l_ri (ad, 12); + } + list <<= 1; + } + } + if ((opcode & 0x38) == 0x18) + mov_l_rr ((opcode & 7) + 8, ad); + if ((opcode & 0x38) == 0x20) + mov_l_rr ((opcode & 7) + 8, ad); + } + } + return; case 2: /* from to FPx */ dont_care_fflags (); if ((extra & 0xfc00) == 0x5c00) { /* FMOVECR */ @@ -722,11 +873,13 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra) case 0x35: case 0x36: case 0x37: - if (dreg == (extra & 7)) - ffunc_rr (sin, dreg, sreg); - else - fsincos_rr (dreg, extra & 7, sreg); - break; + FAIL (1); + return; +// if (dreg == (extra & 7)) +// ffunc_rr (sin, dreg, sreg); +// else +// fsincos_rr (dreg, extra & 7, sreg); +// break; case 0x38: /* FCMP */ fmov_rr (FP_RESULT, dreg); fsub_rr (FP_RESULT, sreg); diff --git a/src/jit/compemu_midfunc_arm.cpp b/src/jit/compemu_midfunc_arm.cpp index 8342fbb8..a4f7c2c3 100644 --- a/src/jit/compemu_midfunc_arm.cpp +++ b/src/jit/compemu_midfunc_arm.cpp @@ -491,6 +491,8 @@ STATIC_INLINE void emit_jmp_target(uae_u32 a) { * FPU stuff * *************************************************************************/ +#ifdef USE_JIT_FPU + MIDFUNC(1,f_forget_about,(FW r)) { if (f_isinreg(r)) @@ -875,24 +877,26 @@ MIDFUNC(3,ffunc_rr,(double (*func)(double), FW d, FR s)) } MENDFUNC(3,ffunc_rr,(double (*func)(double), FW d, FR s)) -MIDFUNC(3,fsincos_rr,(FW d, FW c, FR s)) -{ - clobber_flags(); - prepare_for_call_1(); - prepare_for_call_2(); - - s = f_readreg(s); /* s for source */ - d = f_writereg(d); /* d for sine */ - c = f_writereg(c); /* c for cosine */ - - raw_ffunc_rr(cos, c, s); - raw_ffunc_rr(sin, d, s); - - f_unlock(s); - f_unlock(d); - f_unlock(c); -} -MENDFUNC(3,fsincos_rr,(FW d, FW c, FR s)) +//MIDFUNC(3,fsincos_rr,(FW d, FW c, FR s)) +//{ +// clobber_flags(); +// prepare_for_call_1(); +// prepare_for_call_2(); +// +// s = f_readreg(s); /* s for source */ +// d = f_writereg(d); /* d for sine */ +// c = f_writereg(c); /* c for cosine */ +// +// // s may be FS1, so we need to save it before we call external func +// +// raw_ffunc_rr(cos, c, s); +// raw_ffunc_rr(sin, d, s); +// +// f_unlock(s); +// f_unlock(d); +// f_unlock(c); +//} +//MENDFUNC(3,fsincos_rr,(FW d, FW c, FR s)) MIDFUNC(3,fpowx_rr,(uae_u32 x, FW d, FR s)) { @@ -917,3 +921,37 @@ MIDFUNC(1,fflags_into_flags,()) fflags_into_flags_internal(); } MENDFUNC(1,fflags_into_flags,()) + +MIDFUNC(2,fp_from_exten_mr,(RR4 adr, FR s)) +{ + clobber_flags(); + + adr = readreg(adr, 4); + s = f_readreg(s); + raw_fp_from_exten_mr(adr, s); + f_unlock(s); + unlock2(adr); +} +MENDFUNC(2,fp_from_exten_mr,(RR4 adr, FR s)) + +MIDFUNC(2,fp_to_exten_rm,(FW d, RR4 adr)) +{ + clobber_flags(); + + adr = readreg(adr, 4); + d = f_writereg(d); + raw_fp_to_exten_rm(d, adr); + unlock2(adr); + f_unlock(d); +} +MENDFUNC(2,fp_to_exten_rm,(FW d, RR4 adr)) + +MIDFUNC(2,fp_fscc_ri,(RW4 d, int cc)) +{ + d = rmw(d, 4, 4); + raw_fp_fscc_ri(d, cc); + unlock2(d); +} +MENDFUNC(2,fp_fscc_ri,(RW4 d, int cc)) + +#endif // USE_JIT_FPU diff --git a/src/jit/compemu_midfunc_arm.h b/src/jit/compemu_midfunc_arm.h index 03fa9652..cc52356c 100644 --- a/src/jit/compemu_midfunc_arm.h +++ b/src/jit/compemu_midfunc_arm.h @@ -108,3 +108,6 @@ DECLARE_MIDFUNC(ffunc_rr(double (*func)(double), FW d, FR s)); DECLARE_MIDFUNC(fsincos_rr(FW d, FW c, FR s)); DECLARE_MIDFUNC(fpowx_rr(uae_u32 x, FW d, FR s)); DECLARE_MIDFUNC(fflags_into_flags()); +DECLARE_MIDFUNC(fp_from_exten_mr(RR4 adr, FR s)); +DECLARE_MIDFUNC(fp_to_exten_rm(FW d, RR4 adr)); +DECLARE_MIDFUNC(fp_fscc_ri(RW4, int cc)); diff --git a/src/jit/compemu_midfunc_arm2.cpp b/src/jit/compemu_midfunc_arm2.cpp index 89b7b202..d0491be8 100644 --- a/src/jit/compemu_midfunc_arm2.cpp +++ b/src/jit/compemu_midfunc_arm2.cpp @@ -1859,13 +1859,13 @@ MENDFUNC(2,jff_DBCC,(RR2 d, IMM cc)) s2 = readreg(s2, 4); d = writereg(d, 4); - VMOV_sr(0, s1); // move to s0 - VMOV_sr(1, s2); // move to s1 - VCVT_f64_u32(2, 0); // convert s0 to d2 (int to float) - VCVT_f64_u32(3, 1); // convert s1 to d3 (int to float) - VDIV_ddd(4, 2, 3); // d4 = d2 / d3 - VCVT_u32_f64(0, 4); // convert d4 to s0 (float to int) - VMOV_rs(REG_WORK1, 0); // move from s0 + VMOV32_sr(0, s1); // move to s0 + VMOV32_sr(1, s2); // move to s1 + VCVTIto64_ds(2, 0); // convert s0 to d2 (int to float) + VCVTIto64_ds(3, 1); // convert s1 to d3 (int to float) + VDIV64_ddd(4, 2, 3); // d4 = d2 / d3 + VCVT64toI_sd(0, 4); // convert d4 to s0 (float to int) + VMOV32_rs(REG_WORK1, 0); // move from s0 LSRS_rri(REG_WORK2, REG_WORK1, 16); // if result of this is not 0, DIVU overflows -> no result BNE_i(2); @@ -1887,13 +1887,13 @@ MIDFUNC(3,jff_DIVU,(W4 d, RR4 s1, RR4 s2)) s2 = readreg(s2, 4); d = writereg(d, 4); - VMOV_sr(0, s1); // move to s0 - VMOV_sr(1, s2); // move to s1 - VCVT_f64_u32(2, 0); // convert s0 to d2 (int to float) - VCVT_f64_u32(3, 1); // convert s1 to d3 (int to float) - VDIV_ddd(4, 2, 3); // d4 = d2 / d3 - VCVT_u32_f64(0, 4); // convert d4 to s0 (float to int) - VMOV_rs(REG_WORK1, 0); // move from s0 + VMOV32_sr(0, s1); // move to s0 + VMOV32_sr(1, s2); // move to s1 + VCVTIto64_ds(2, 0); // convert s0 to d2 (int to float) + VCVTIto64_ds(3, 1); // convert s1 to d3 (int to float) + VDIV64_ddd(4, 2, 3); // d4 = d2 / d3 + VCVT64toI_sd(0, 4); // convert d4 to s0 (float to int) + VMOV32_rs(REG_WORK1, 0); // move from s0 LSRS_rri(REG_WORK2, REG_WORK1, 16); // if result of this is not 0, DIVU overflows BEQ_i(2); @@ -2855,13 +2855,8 @@ MIDFUNC(2,jnf_MOVE16,(RR4 d, RR4 s)) BIC_rri(s, s, 0x0000000F); BIC_rri(d, d, 0x0000000F); -#ifdef ARMV6T2 - MOVW_ri16(REG_WORK1, NATMEM_OFFSETX); - MOVT_ri16(REG_WORK1, NATMEM_OFFSETX >> 16); -#else - uae_s32 offs = get_data_natmem(); - LDR_rRI(REG_WORK1, RPC_INDEX, offs); -#endif + uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) ®s; + LDR_rRI(REG_WORK1, R_REGSTRUCT, offs); ADD_rrr(s, s, REG_WORK1); ADD_rrr(d, d, REG_WORK1); @@ -5115,13 +5110,8 @@ MENDFUNC(1,jff_TST_l,(RR4 s)) */ MIDFUNC(2,jnf_MEM_WRITE_OFF_b,(RR4 adr, RR4 b)) { -#ifdef ARMV6T2 - MOVW_ri16(REG_WORK2, NATMEM_OFFSETX); - MOVT_ri16(REG_WORK2, NATMEM_OFFSETX >> 16); -#else - uae_s32 offs = get_data_natmem(); - LDR_rRI(REG_WORK2, RPC_INDEX, offs); -#endif + uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) ®s; + LDR_rRI(REG_WORK2, R_REGSTRUCT, offs); adr = readreg(adr, 4); b = readreg(b, 4); @@ -5135,13 +5125,8 @@ MENDFUNC(2,jnf_MEM_WRITE_OFF_b,(RR4 adr, RR4 b)) MIDFUNC(2,jnf_MEM_WRITE_OFF_w,(RR4 adr, RR4 w)) { -#ifdef ARMV6T2 - MOVW_ri16(REG_WORK2, NATMEM_OFFSETX); - MOVT_ri16(REG_WORK2, NATMEM_OFFSETX >> 16); -#else - uae_s32 offs = get_data_natmem(); - LDR_rRI(REG_WORK2, RPC_INDEX, offs); -#endif + uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) ®s; + LDR_rRI(REG_WORK2, R_REGSTRUCT, offs); adr = readreg(adr, 4); w = readreg(w, 4); @@ -5156,13 +5141,8 @@ MENDFUNC(2,jnf_MEM_WRITE_OFF_w,(RR4 adr, RR4 w)) MIDFUNC(2,jnf_MEM_WRITE_OFF_l,(RR4 adr, RR4 l)) { -#ifdef ARMV6T2 - MOVW_ri16(REG_WORK2, NATMEM_OFFSETX); - MOVT_ri16(REG_WORK2, NATMEM_OFFSETX >> 16); -#else - uae_s32 offs = get_data_natmem(); - LDR_rRI(REG_WORK2, RPC_INDEX, offs); -#endif + uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) ®s; + LDR_rRI(REG_WORK2, R_REGSTRUCT, offs); adr = readreg(adr, 4); l = readreg(l, 4); @@ -5178,13 +5158,8 @@ MENDFUNC(2,jnf_MEM_WRITE_OFF_l,(RR4 adr, RR4 l)) MIDFUNC(2,jnf_MEM_READ_OFF_b,(W4 d, RR4 adr)) { -#ifdef ARMV6T2 - MOVW_ri16(REG_WORK2, NATMEM_OFFSETX); - MOVT_ri16(REG_WORK2, NATMEM_OFFSETX >> 16); -#else - uae_s32 offs = get_data_natmem(); - LDR_rRI(REG_WORK2, RPC_INDEX, offs); -#endif + uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) ®s; + LDR_rRI(REG_WORK2, R_REGSTRUCT, offs); adr = readreg(adr, 4); d = writereg(d, 4); @@ -5198,13 +5173,8 @@ MENDFUNC(2,jnf_MEM_READ_OFF_b,(W4 d, RR4 adr)) MIDFUNC(2,jnf_MEM_READ_OFF_w,(W4 d, RR4 adr)) { -#ifdef ARMV6T2 - MOVW_ri16(REG_WORK2, NATMEM_OFFSETX); - MOVT_ri16(REG_WORK2, NATMEM_OFFSETX >> 16); -#else - uae_s32 offs = get_data_natmem(); - LDR_rRI(REG_WORK2, RPC_INDEX, offs); -#endif + uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) ®s; + LDR_rRI(REG_WORK2, R_REGSTRUCT, offs); adr = readreg(adr, 4); d = writereg(d, 4); @@ -5219,13 +5189,8 @@ MENDFUNC(2,jnf_MEM_READ_OFF_w,(W4 d, RR4 adr)) MIDFUNC(2,jnf_MEM_READ_OFF_l,(W4 d, RR4 adr)) { -#ifdef ARMV6T2 - MOVW_ri16(REG_WORK2, NATMEM_OFFSETX); - MOVT_ri16(REG_WORK2, NATMEM_OFFSETX >> 16); -#else - uae_s32 offs = get_data_natmem(); - LDR_rRI(REG_WORK2, RPC_INDEX, offs); -#endif + uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) ®s; + LDR_rRI(REG_WORK2, R_REGSTRUCT, offs); adr = readreg(adr, 4); d = writereg(d, 4); @@ -5241,13 +5206,8 @@ MENDFUNC(2,jnf_MEM_READ_OFF_l,(W4 d, RR4 adr)) MIDFUNC(2,jnf_MEM_WRITE24_OFF_b,(RR4 adr, RR4 b)) { -#ifdef ARMV6T2 - MOVW_ri16(REG_WORK2, NATMEM_OFFSETX); - MOVT_ri16(REG_WORK2, NATMEM_OFFSETX >> 16); -#else - uae_s32 offs = get_data_natmem(); - LDR_rRI(REG_WORK2, RPC_INDEX, offs); -#endif + uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) ®s; + LDR_rRI(REG_WORK2, R_REGSTRUCT, offs); adr = readreg(adr, 4); b = readreg(b, 4); @@ -5262,13 +5222,8 @@ MENDFUNC(2,jnf_MEM_WRITE24_OFF_b,(RR4 adr, RR4 b)) MIDFUNC(2,jnf_MEM_WRITE24_OFF_w,(RR4 adr, RR4 w)) { -#ifdef ARMV6T2 - MOVW_ri16(REG_WORK2, NATMEM_OFFSETX); - MOVT_ri16(REG_WORK2, NATMEM_OFFSETX >> 16); -#else - uae_s32 offs = get_data_natmem(); - LDR_rRI(REG_WORK2, RPC_INDEX, offs); -#endif + uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) ®s; + LDR_rRI(REG_WORK2, R_REGSTRUCT, offs); adr = readreg(adr, 4); w = readreg(w, 4); @@ -5284,13 +5239,8 @@ MENDFUNC(2,jnf_MEM_WRITE24_OFF_w,(RR4 adr, RR4 w)) MIDFUNC(2,jnf_MEM_WRITE24_OFF_l,(RR4 adr, RR4 l)) { -#ifdef ARMV6T2 - MOVW_ri16(REG_WORK2, NATMEM_OFFSETX); - MOVT_ri16(REG_WORK2, NATMEM_OFFSETX >> 16); -#else - uae_s32 offs = get_data_natmem(); - LDR_rRI(REG_WORK2, RPC_INDEX, offs); -#endif + uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) ®s; + LDR_rRI(REG_WORK2, R_REGSTRUCT, offs); adr = readreg(adr, 4); l = readreg(l, 4); @@ -5307,13 +5257,8 @@ MENDFUNC(2,jnf_MEM_WRITE24_OFF_l,(RR4 adr, RR4 l)) MIDFUNC(2,jnf_MEM_READ24_OFF_b,(W4 d, RR4 adr)) { -#ifdef ARMV6T2 - MOVW_ri16(REG_WORK2, NATMEM_OFFSETX); - MOVT_ri16(REG_WORK2, NATMEM_OFFSETX >> 16); -#else - uae_s32 offs = get_data_natmem(); - LDR_rRI(REG_WORK2, RPC_INDEX, offs); -#endif + uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) ®s; + LDR_rRI(REG_WORK2, R_REGSTRUCT, offs); adr = readreg(adr, 4); d = writereg(d, 4); @@ -5328,13 +5273,8 @@ MENDFUNC(2,jnf_MEM_READ24_OFF_b,(W4 d, RR4 adr)) MIDFUNC(2,jnf_MEM_READ24_OFF_w,(W4 d, RR4 adr)) { -#ifdef ARMV6T2 - MOVW_ri16(REG_WORK2, NATMEM_OFFSETX); - MOVT_ri16(REG_WORK2, NATMEM_OFFSETX >> 16); -#else - uae_s32 offs = get_data_natmem(); - LDR_rRI(REG_WORK2, RPC_INDEX, offs); -#endif + uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) ®s; + LDR_rRI(REG_WORK2, R_REGSTRUCT, offs); adr = readreg(adr, 4); d = writereg(d, 4); @@ -5350,13 +5290,8 @@ MENDFUNC(2,jnf_MEM_READ24_OFF_w,(W4 d, RR4 adr)) MIDFUNC(2,jnf_MEM_READ24_OFF_l,(W4 d, RR4 adr)) { -#ifdef ARMV6T2 - MOVW_ri16(REG_WORK2, NATMEM_OFFSETX); - MOVT_ri16(REG_WORK2, NATMEM_OFFSETX >> 16); -#else - uae_s32 offs = get_data_natmem(); - LDR_rRI(REG_WORK2, RPC_INDEX, offs); -#endif + uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) ®s; + LDR_rRI(REG_WORK2, R_REGSTRUCT, offs); adr = readreg(adr, 4); d = writereg(d, 4); @@ -5373,13 +5308,8 @@ MENDFUNC(2,jnf_MEM_READ24_OFF_l,(W4 d, RR4 adr)) MIDFUNC(2,jnf_MEM_GETADR_OFF,(W4 d, RR4 adr)) { -#ifdef ARMV6T2 - MOVW_ri16(REG_WORK2, NATMEM_OFFSETX); - MOVT_ri16(REG_WORK2, NATMEM_OFFSETX >> 16); -#else - uae_s32 offs = get_data_natmem(); - LDR_rRI(REG_WORK2, RPC_INDEX, offs); -#endif + uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) ®s; + LDR_rRI(REG_WORK2, R_REGSTRUCT, offs); adr = readreg(adr, 4); d = writereg(d, 4); @@ -5393,13 +5323,8 @@ MENDFUNC(2,jnf_MEM_GETADR_OFF,(W4 d, RR4 adr)) MIDFUNC(2,jnf_MEM_GETADR24_OFF,(W4 d, RR4 adr)) { -#ifdef ARMV6T2 - MOVW_ri16(REG_WORK2, NATMEM_OFFSETX); - MOVT_ri16(REG_WORK2, NATMEM_OFFSETX >> 16); -#else - uae_s32 offs = get_data_natmem(); - LDR_rRI(REG_WORK2, RPC_INDEX, offs); -#endif + uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) ®s; + LDR_rRI(REG_WORK2, R_REGSTRUCT, offs); adr = readreg(adr, 4); d = writereg(d, 4); diff --git a/src/jit/compemu_support.cpp b/src/jit/compemu_support.cpp index 2f2d6b33..0d70883d 100644 --- a/src/jit/compemu_support.cpp +++ b/src/jit/compemu_support.cpp @@ -81,7 +81,7 @@ static clock_t emul_end_time = 0; #endif #ifdef PROFILE_UNTRANSLATED_INSNS -static int untranslated_top_ten = 30; +static int untranslated_top_ten = 20; static uae_u32 raw_cputbl_count[65536] = { 0, }; static uae_u16 opcode_nums[65536]; @@ -92,7 +92,7 @@ static int untranslated_compfn(const void *e1, const void *e2) } #endif -#define NATMEM_OFFSETX (uae_u32)natmem_offset +#define NATMEM_OFFSETX regs.natmem_offset static compop_func *compfunctbl[65536]; static compop_func *nfcompfunctbl[65536]; @@ -735,8 +735,6 @@ static uae_u32 data_wasted = 0; static uae_u32 data_buffers_used = 0; #endif -static uae_s32 data_natmem_pos = 0; - STATIC_INLINE void compemu_raw_branch(IMM d); STATIC_INLINE void data_check_end(uae_s32 n, uae_s32 codesize) @@ -753,8 +751,6 @@ STATIC_INLINE void data_check_end(uae_s32 n, uae_s32 codesize) data_writepos = get_target_noopt(); data_endpos = data_writepos + DATA_BUFFER_SIZE; set_target(get_target_noopt() + DATA_BUFFER_SIZE); - - data_natmem_pos = 0; } } @@ -789,15 +785,6 @@ STATIC_INLINE uae_s32 get_data_offset(uae_s32 t) return t - (uae_s32)get_target_noopt() - 8; } -STATIC_INLINE uae_s32 get_data_natmem(void) -{ - if(data_natmem_pos == 0 || (uae_s32)get_target_noopt() - data_natmem_pos >= DATA_BUFFER_MAXOFFSET) - { - data_natmem_pos = data_long(NATMEM_OFFSETX, 4); - } - return get_data_offset(data_natmem_pos); -} - STATIC_INLINE void reset_data_buffer(void) { data_writepos = 0; @@ -1407,26 +1394,27 @@ STATIC_INLINE void f_disassociate(int r) } - static int f_alloc_reg(int r, int willclobber) { int bestreg; if(r < 8) - bestreg = r + 8; // map real Amiga reg to ARM VFP reg 8-15 - else - bestreg = r - 8; // map FP_RESULT, FS1, FS2 or FS3 to ARM VFP reg 0-3 + bestreg = r + 8; // map real Amiga reg to ARM VFP reg 8-15 (call save) + else if(r == FP_RESULT) + bestreg = 6; // map FP_RESULT to ARM VFP reg 6 + else // FS1 + bestreg = 7; // map FS1 to ARM VFP reg 7 if (!willclobber) { if (live.fate[r].status == INMEM) { compemu_raw_fmov_rm(bestreg, (uintptr)live.fate[r].mem); - live.fate[r].status=CLEAN; + live.fate[r].status = CLEAN; } } else { live.fate[r].status = DIRTY; } - live.fate[r].realreg=bestreg; + live.fate[r].realreg = bestreg; live.fat[bestreg].holds = r; live.fat[bestreg].nholds = 1; @@ -1533,24 +1521,6 @@ static scratch_t scratch; * Support functions exposed to newcpu * ********************************************************************/ -static void compiler_init(void) -{ - static bool initialized = false; - if (initialized) - return; - - initialized = true; - -#ifdef PROFILE_UNTRANSLATED_INSNS - jit_log(" : gather statistics on untranslated insns count"); -#endif - -#ifdef PROFILE_COMPILE_TIME - jit_log(" : gather statistics on translation time"); - emul_start_time = clock(); -#endif -} - void compiler_exit(void) { #ifdef PROFILE_COMPILE_TIME @@ -1601,15 +1571,7 @@ void compiler_exit(void) dp = table68k + opcode_nums[i]; for (lookup = lookuptab; lookup->mnemo != (instrmnem)dp->mnemo; lookup++) ; - if(strcmp(lookup->name, "FPP") == 0 - || strcmp(lookup->name, "FBcc") == 0 - || strcmp(lookup->name, "DIVS") == 0 - || strcmp(lookup->name, "DIVU") == 0 - || strcmp(lookup->name, "DIVL") == 0) { - untranslated_top_ten++; // Ignore this - } - else - jit_log("%03d: %04x %10u %s", i, opcode_nums[i], count, lookup->name); + jit_log("%03d: %04x %10u %s", i, opcode_nums[i], count, lookup->name); } #endif } @@ -1749,8 +1711,7 @@ void freescratch(void) for (i = S1; i < VREGS; i++) forget_about(i); - for (i = FS1; i <= FS3; i++) // only FS1-FS3 - f_forget_about(i); + f_forget_about(FS1); } /******************************************************************** @@ -1784,9 +1745,10 @@ static void flush_all(void) tomem(i); } } - for (i = FP_RESULT; i <= FS3; i++) // only FP_RESULT and FS1-FS3, FP0-FP7 are call save - if (f_isinreg(i)) - f_evict(i); + if (f_isinreg(FP_RESULT)) + f_evict(FP_RESULT); + if (f_isinreg(FS1)) + f_evict(FS1); } /* Make sure all registers that will get clobbered by a call are @@ -1808,7 +1770,7 @@ static void prepare_for_call_2(void) free_nreg(i); } - for (i = 0; i < 4; i++) // only FP_RESULT and FS1-FS3, FP0-FP7 are call save + for (i = 6; i <= 7; i++) // only FP_RESULT and FS1, FP0-FP7 are call save if (live.fat[i].nholds > 0) f_free_nreg(i); @@ -1915,6 +1877,14 @@ STATIC_INLINE void writemem_real_clobber(int address, int source, int size) forget_about(source); } +void writeword_clobber(int address, int source, int tmp) +{ + if (special_mem & S_WRITE) + writemem_special(address, source, 16, 2, tmp); + else + writemem_real_clobber(address, source, 2); +} + void writelong_clobber(int address, int source, int tmp) { if (special_mem & S_WRITE) @@ -2084,18 +2054,6 @@ void set_cache_state(int enabled) letit = enabled; } -int get_cache_state(void) -{ - return letit; -} - -uae_u32 get_jitted_size(void) -{ - if (compiled_code) - return current_compile_p - compiled_code; - return 0; -} - void alloc_cache(void) { if (compiled_code) { @@ -2900,7 +2858,7 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles) t1 = taken_pc_p; t2 = next_pc_p; if(cc < NATIVE_CC_AL) - cc = branch_cc^1; + cc = branch_cc^1; else if(cc > NATIVE_CC_AL) cc = 0x10 | (branch_cc ^ 0xf); } @@ -2941,40 +2899,40 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles) else { if (was_comp) { - flush(1); + flush(1); } - /* Let's find out where next_handler is... */ - if (was_comp && isinreg(PC_P)) { + /* Let's find out where next_handler is... */ + if (was_comp && isinreg(PC_P)) { #if defined(CPU_arm) && !defined(ARMV6T2) data_check_end(4, 52); #endif - r = live.state[PC_P].realreg; - compemu_raw_endblock_pc_inreg(r, scaled_cycles(totcycles)); - } + r = live.state[PC_P].realreg; + compemu_raw_endblock_pc_inreg(r, scaled_cycles(totcycles)); + } else if (was_comp && isconst(PC_P)) { - uintptr v = live.state[PC_P].val; - uae_u32* tba; - blockinfo* tbi; - - tbi = get_blockinfo_addr_new((void*)v, 1); - match_states(tbi); + uintptr v = live.state[PC_P].val; + uae_u32* tba; + blockinfo* tbi; + + tbi = get_blockinfo_addr_new((void*)v, 1); + match_states(tbi); #if defined(CPU_arm) && !defined(ARMV6T2) data_check_end(4, 56); #endif - compemu_raw_endblock_pc_isconst(scaled_cycles(totcycles), v); - tba = (uae_u32*)get_target(); - emit_jmp_target(get_handler(v)); - create_jmpdep(bi, 0, tba, v); + compemu_raw_endblock_pc_isconst(scaled_cycles(totcycles), v); + tba = (uae_u32*)get_target(); + emit_jmp_target(get_handler(v)); + create_jmpdep(bi, 0, tba, v); } else { - r = REG_PC_TMP; - compemu_raw_mov_l_rm(r, (uintptr)®s.pc_p); + r = REG_PC_TMP; + compemu_raw_mov_l_rm(r, (uintptr)®s.pc_p); #if defined(CPU_arm) && !defined(ARMV6T2) data_check_end(4, 52); #endif - compemu_raw_endblock_pc_inreg(r, scaled_cycles(totcycles)); + compemu_raw_endblock_pc_inreg(r, scaled_cycles(totcycles)); } } } diff --git a/src/osdep/amiberry_mem.cpp b/src/osdep/amiberry_mem.cpp index 442ea38d..c0e29dd6 100644 --- a/src/osdep/amiberry_mem.cpp +++ b/src/osdep/amiberry_mem.cpp @@ -15,22 +15,21 @@ #include -uae_u8* natmem_offset = nullptr; static uae_u32 natmem_size; uae_u32 max_z3fastmem; /* JIT can access few bytes outside of memory block of it executes code at the very end of memory block */ #define BARRIER 32 -static uae_u8* additional_mem = (uae_u8*)MAP_FAILED; -#define ADDITIONAL_MEMSIZE ((128 + 16) * 1024 * 1024) +static uae_u8* additional_mem = (uae_u8*) MAP_FAILED; +#define ADDITIONAL_MEMSIZE (128 + 16) * 1024 * 1024 -static uae_u8* a3000_mem = (uae_u8*)MAP_FAILED; +static uae_u8* a3000_mem = (uae_u8*) MAP_FAILED; static int a3000_totalsize = 0; #define A3000MEM_START 0x08000000 -static unsigned int lastLowSize = 0; -static unsigned int lastHighSize = 0; +static int lastLowSize = 0; +static int lastHighSize = 0; int z3base_adr = 0; @@ -38,26 +37,26 @@ int z3base_adr = 0; void free_AmigaMem(void) { - if (natmem_offset != nullptr) - { + if(regs.natmem_offset != 0) + { #ifdef AMIBERRY - munmap(natmem_offset, natmem_size + BARRIER); + munmap(regs.natmem_offset, natmem_size + BARRIER); #else - free(natmem_offset); + free(regs.natmem_offset); #endif - natmem_offset = nullptr; - } - if (additional_mem != MAP_FAILED) - { - munmap(additional_mem, ADDITIONAL_MEMSIZE + BARRIER); - additional_mem = (uae_u8*)MAP_FAILED; - } - if (a3000_mem != MAP_FAILED) - { - munmap(a3000_mem, a3000_totalsize); - a3000_mem = (uae_u8*)MAP_FAILED; - a3000_totalsize = 0; - } + regs.natmem_offset = 0; + } + if(additional_mem != MAP_FAILED) + { + munmap(additional_mem, ADDITIONAL_MEMSIZE + BARRIER); + additional_mem = (uae_u8*) MAP_FAILED; + } + if(a3000_mem != MAP_FAILED) + { + munmap(a3000_mem, a3000_totalsize); + a3000_mem = (uae_u8*) MAP_FAILED; + a3000_totalsize = 0; + } } @@ -67,140 +66,137 @@ void alloc_AmigaMem(void) uae_u64 total; int max_allowed_mman; - free_AmigaMem(); + free_AmigaMem(); set_expamem_z3_hack_mode(Z3MAPPING_AUTO); - // First attempt: allocate 16 MB for all memory in 24-bit area - // and additional mem for Z3 and RTG at correct offset - natmem_size = 16 * 1024 * 1024; + // First attempt: allocate 16 MB for all memory in 24-bit area + // and additional mem for Z3 and RTG at correct offset + natmem_size = 16 * 1024 * 1024; #ifdef AMIBERRY - // address returned by valloc() too high for later mmap() calls. Use mmap() also for first area. - natmem_offset = (uae_u8*)mmap(reinterpret_cast(0x20000000), natmem_size + BARRIER, - PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + // address returned by valloc() too high for later mmap() calls. Use mmap() also for first area. + regs.natmem_offset = (uae_u8*) mmap((void *)0x20000000, natmem_size + BARRIER, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); #else - natmem_offset = (uae_u8*)valloc(natmem_size + BARRIER); + regs.natmem_offset = (uae_u8*)valloc (natmem_size + BARRIER); #endif - max_z3fastmem = ADDITIONAL_MEMSIZE - (16 * 1024 * 1024); - if (!natmem_offset) { + max_z3fastmem = ADDITIONAL_MEMSIZE - (16 * 1024 * 1024); + if (!regs.natmem_offset) { write_log("Can't allocate 16M of virtual address space!?\n"); - abort(); - } - // FIXME This part of code caused crash on Android devices -#ifndef ANDROID - additional_mem = (uae_u8*)mmap(natmem_offset + Z3BASE_REAL, ADDITIONAL_MEMSIZE + BARRIER, - PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); - if (additional_mem != MAP_FAILED) - { - // Allocation successful -> we can use natmem_offset for entire memory access at real address - changed_prefs.z3autoconfig_start = currprefs.z3autoconfig_start = Z3BASE_REAL; - z3base_adr = Z3BASE_REAL; - write_log("Allocated 16 MB for 24-bit area (0x%08x) and %d MB for Z3 and RTG at real address (0x%08x - 0x%08x)\n", - natmem_offset, ADDITIONAL_MEMSIZE / (1024 * 1024), additional_mem, additional_mem + ADDITIONAL_MEMSIZE + BARRIER); - set_expamem_z3_hack_mode(Z3MAPPING_REAL); - return; + abort(); } + additional_mem = (uae_u8*) mmap(regs.natmem_offset + Z3BASE_REAL, ADDITIONAL_MEMSIZE + BARRIER, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if(additional_mem != MAP_FAILED) + { + // Allocation successful -> we can use natmem_offset for entire memory access at real address + changed_prefs.z3autoconfig_start = currprefs.z3autoconfig_start = Z3BASE_REAL; + z3base_adr = Z3BASE_REAL; + write_log("Allocated 16 MB for 24-bit area (0x%08x) and %d MB for Z3 and RTG at real address (0x%08x - 0x%08x)\n", + regs.natmem_offset, ADDITIONAL_MEMSIZE / (1024 * 1024), additional_mem, additional_mem + ADDITIONAL_MEMSIZE + BARRIER); + set_expamem_z3_hack_mode(Z3MAPPING_REAL); + return; + } - additional_mem = (uae_u8*)mmap(natmem_offset + Z3BASE_UAE, ADDITIONAL_MEMSIZE + BARRIER, - PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); - if (additional_mem != MAP_FAILED) - { - // Allocation successful -> we can use natmem_offset for entire memory access at fake address - changed_prefs.z3autoconfig_start = currprefs.z3autoconfig_start = Z3BASE_UAE; - z3base_adr = Z3BASE_UAE; - write_log("Allocated 16 MB for 24-bit area (0x%08x) and %d MB for Z3 and RTG at fake address (0x%08x - 0x%08x)\n", - natmem_offset, ADDITIONAL_MEMSIZE / (1024 * 1024), additional_mem, additional_mem + ADDITIONAL_MEMSIZE + BARRIER); - set_expamem_z3_hack_mode(Z3MAPPING_UAE); - return; - } -#endif + additional_mem = (uae_u8*) mmap(regs.natmem_offset + Z3BASE_UAE, ADDITIONAL_MEMSIZE + BARRIER, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if(additional_mem != MAP_FAILED) + { + // Allocation successful -> we can use natmem_offset for entire memory access at fake address + changed_prefs.z3autoconfig_start = currprefs.z3autoconfig_start = Z3BASE_UAE; + z3base_adr = Z3BASE_UAE; + write_log("Allocated 16 MB for 24-bit area (0x%08x) and %d MB for Z3 and RTG at fake address (0x%08x - 0x%08x)\n", + regs.natmem_offset, ADDITIONAL_MEMSIZE / (1024 * 1024), additional_mem, additional_mem + ADDITIONAL_MEMSIZE + BARRIER); + set_expamem_z3_hack_mode(Z3MAPPING_UAE); + return; + } #ifdef AMIBERRY - munmap(natmem_offset, natmem_size + BARRIER); + munmap(regs.natmem_offset, natmem_size + BARRIER); #else - free(natmem_offset); + free(regs.natmem_offset); #endif + + // Next attempt: allocate huge memory block for entire area + natmem_size = ADDITIONAL_MEMSIZE + 256 * 1024 * 1024; + regs.natmem_offset = (uae_u8*)valloc (natmem_size + BARRIER); + if(regs.natmem_offset) + { + // Allocation successful + changed_prefs.z3autoconfig_start = currprefs.z3autoconfig_start = Z3BASE_UAE; + z3base_adr = Z3BASE_UAE; + write_log("Allocated %d MB for entire memory\n", natmem_size / (1024 * 1024)); + return; + } - // Next attempt: allocate huge memory block for entire area - natmem_size = ADDITIONAL_MEMSIZE + 256 * 1024 * 1024; - natmem_offset = (uae_u8*)valloc(natmem_size + BARRIER); - if (natmem_offset) - { - // Allocation successful - changed_prefs.z3autoconfig_start = currprefs.z3autoconfig_start = Z3BASE_UAE; - z3base_adr = Z3BASE_UAE; - write_log("Allocated %d MB for entire memory\n", natmem_size / (1024 * 1024)); - return; - } - - // No mem for Z3 or RTG at all + // No mem for Z3 or RTG at all natmem_size = 16 * 1024 * 1024; - natmem_offset = (uae_u8*)valloc(natmem_size + BARRIER); + regs.natmem_offset = (uae_u8*)valloc (natmem_size + BARRIER); - if (!natmem_offset) { + if (!regs.natmem_offset) { write_log("Can't allocate 16M of virtual address space!?\n"); - abort(); + abort(); } - changed_prefs.z3autoconfig_start = currprefs.z3autoconfig_start = 0x00000000; // No mem for Z3 - z3base_adr = 0x00000000; - max_z3fastmem = 0; + changed_prefs.z3autoconfig_start = currprefs.z3autoconfig_start = 0x00000000; // No mem for Z3 + z3base_adr = 0x00000000; + max_z3fastmem = 0; - write_log("Reserved: %p-%p (0x%08x %dM)\n", natmem_offset, (uae_u8*)natmem_offset + natmem_size, + write_log("Reserved: %p-%p (0x%08x %dM)\n", regs.natmem_offset, (uae_u8*)regs.natmem_offset + natmem_size, natmem_size, natmem_size >> 20); } -static bool HandleA3000Mem(unsigned int lowsize, unsigned int highsize) +static bool HandleA3000Mem(int lowsize, int highsize) { - auto result = true; + bool result = true; + + if(lowsize == lastLowSize && highsize == lastHighSize) + return result; + + if(a3000_mem != MAP_FAILED) + { + write_log("HandleA3000Mem(): Free A3000 memory (0x%08x). %d MB.\n", a3000_mem, a3000_totalsize / (1024 * 1024)); + munmap(a3000_mem, a3000_totalsize); + a3000_mem = (uae_u8*) MAP_FAILED; + a3000_totalsize = 0; + lastLowSize = 0; + lastHighSize = 0; + } + if(lowsize + highsize > 0) + { + // Try to get memory for A3000 motherboard + write_log("Try to get A3000 memory at correct place (0x%08x). %d MB and %d MB.\n", A3000MEM_START, + lowsize / (1024 * 1024), highsize / (1024 * 1024)); + a3000_totalsize = lowsize + highsize; + a3000_mem = (uae_u8*) mmap(regs.natmem_offset + (A3000MEM_START - lowsize), a3000_totalsize, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if(a3000_mem != MAP_FAILED) + { + lastLowSize = lowsize; + lastHighSize = highsize; + write_log(_T("Succeeded: location at 0x%08x (Amiga: 0x%08x)\n"), a3000_mem, (A3000MEM_START - lowsize)); + } + else + { + write_log("Failed.\n"); + a3000_totalsize = 0; + result = false; + } + } - if (lowsize == lastLowSize && highsize == lastHighSize) - return result; - - if (a3000_mem != MAP_FAILED) - { - write_log("HandleA3000Mem(): Free A3000 memory (0x%08x). %d MB.\n", a3000_mem, a3000_totalsize / (1024 * 1024)); - munmap(a3000_mem, a3000_totalsize); - a3000_mem = (uae_u8*)MAP_FAILED; - a3000_totalsize = 0; - lastLowSize = 0; - lastHighSize = 0; - } - if (lowsize + highsize > 0) - { - // Try to get memory for A3000 motherboard - write_log("Try to get A3000 memory at correct place (0x%08x). %d MB and %d MB.\n", A3000MEM_START, - lowsize / (1024 * 1024), highsize / (1024 * 1024)); - a3000_totalsize = lowsize + highsize; - a3000_mem = (uae_u8*)mmap(natmem_offset + (A3000MEM_START - lowsize), a3000_totalsize, - PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); - if (a3000_mem != MAP_FAILED) - { - lastLowSize = lowsize; - lastHighSize = highsize; - write_log(_T("Succeeded: location at 0x%08x (Amiga: 0x%08x)\n"), a3000_mem, (A3000MEM_START - lowsize)); - } - else - { - write_log("Failed.\n"); - a3000_totalsize = 0; - result = false; - } - } - - return result; + return result; } static bool A3000MemAvailable(void) { - return (a3000_mem != MAP_FAILED); + return (a3000_mem != MAP_FAILED); } bool uae_mman_info(addrbank *ab, struct uae_mman_data *md) { - auto got = false; - auto readonly = false; + bool got = false; + bool readonly = false; uaecptr start; uae_u32 size = ab->reserved_size; uae_u32 readonlysize = size; @@ -211,173 +207,139 @@ bool uae_mman_info(addrbank *ab, struct uae_mman_data *md) got = true; if (expansion_get_autoconfig_by_address(&currprefs, ab->start) && !expansion_get_autoconfig_by_address(&currprefs, ab->start + size)) barrier = true; - } - else if (!_tcscmp(ab->label, _T("*B"))) { + } else if (!_tcscmp(ab->label, _T("*B"))) { start = ab->start; got = true; barrier = true; - } - else if (!_tcscmp(ab->label, _T("chip"))) { + } else if (!_tcscmp(ab->label, _T("chip"))) { start = 0; got = true; if (!expansion_get_autoconfig_by_address(&currprefs, 0x00200000) && currprefs.chipmem_size == 2 * 1024 * 1024) barrier = true; if (currprefs.chipmem_size > 2 * 1024 * 1024) barrier = true; - } - else if (!_tcscmp(ab->label, _T("kick"))) { + } else if (!_tcscmp(ab->label, _T("kick"))) { start = 0xf80000; got = true; barrier = true; readonly = true; - } - else if (!_tcscmp(ab->label, _T("rom_a8"))) { + } else if (!_tcscmp(ab->label, _T("rom_a8"))) { start = 0xa80000; got = true; readonly = true; - } - else if (!_tcscmp(ab->label, _T("rom_e0"))) { + } else if (!_tcscmp(ab->label, _T("rom_e0"))) { start = 0xe00000; got = true; readonly = true; - } - else if (!_tcscmp(ab->label, _T("rom_f0"))) { + } else if (!_tcscmp(ab->label, _T("rom_f0"))) { start = 0xf00000; got = true; readonly = true; - } - else if (!_tcscmp(ab->label, _T("rom_f0_ppc"))) { + } else if (!_tcscmp(ab->label, _T("rom_f0_ppc"))) { // this is flash and also contains IO start = 0xf00000; got = true; readonly = false; - } - else if (!_tcscmp(ab->label, _T("rtarea"))) { + } else if (!_tcscmp(ab->label, _T("rtarea"))) { start = rtarea_base; got = true; readonly = true; readonlysize = RTAREA_TRAPS; - } - else if (!_tcscmp(ab->label, _T("ramsey_low"))) { - if (ab->reserved_size != lastLowSize) - HandleA3000Mem(ab->reserved_size, lastHighSize); - if (A3000MemAvailable()) { - start = a3000lmem_bank.start; - got = true; - } - } - else if (!_tcscmp(ab->label, _T("csmk1_maprom"))) { + } else if (!_tcscmp(ab->label, _T("ramsey_low"))) { + if(ab->reserved_size != lastLowSize) + HandleA3000Mem(ab->reserved_size, lastHighSize); + if(A3000MemAvailable()) { + start = a3000lmem_bank.start; + got = true; + } + } else if (!_tcscmp(ab->label, _T("csmk1_maprom"))) { start = 0x07f80000; got = true; - } - else if (!_tcscmp(ab->label, _T("25bitram"))) { + } else if (!_tcscmp(ab->label, _T("25bitram"))) { start = 0x01000000; got = true; - } - else if (!_tcscmp(ab->label, _T("ramsey_high"))) { - if (ab->reserved_size != lastHighSize) - HandleA3000Mem(lastLowSize, ab->reserved_size); - if (A3000MemAvailable()) { - start = 0x08000000; - got = true; - } - } - else if (!_tcscmp(ab->label, _T("dkb"))) { + } else if (!_tcscmp(ab->label, _T("ramsey_high"))) { + if(ab->reserved_size != lastHighSize) + HandleA3000Mem(lastLowSize, ab->reserved_size); + if(A3000MemAvailable()) { + start = 0x08000000; + got = true; + } + } else if (!_tcscmp(ab->label, _T("dkb"))) { start = 0x10000000; got = true; - } - else if (!_tcscmp(ab->label, _T("fusionforty"))) { + } else if (!_tcscmp(ab->label, _T("fusionforty"))) { start = 0x11000000; got = true; - } - else if (!_tcscmp(ab->label, _T("blizzard_40"))) { + } else if (!_tcscmp(ab->label, _T("blizzard_40"))) { start = 0x40000000; got = true; - } - else if (!_tcscmp(ab->label, _T("blizzard_48"))) { + } else if (!_tcscmp(ab->label, _T("blizzard_48"))) { start = 0x48000000; got = true; - } - else if (!_tcscmp(ab->label, _T("blizzard_68"))) { + } else if (!_tcscmp(ab->label, _T("blizzard_68"))) { start = 0x68000000; got = true; - } - else if (!_tcscmp(ab->label, _T("blizzard_70"))) { + } else if (!_tcscmp(ab->label, _T("blizzard_70"))) { start = 0x70000000; got = true; - } - else if (!_tcscmp(ab->label, _T("cyberstorm"))) { + } else if (!_tcscmp(ab->label, _T("cyberstorm"))) { start = 0x0c000000; got = true; - } - else if (!_tcscmp(ab->label, _T("cyberstormmaprom"))) { + } else if (!_tcscmp(ab->label, _T("cyberstormmaprom"))) { start = 0xfff00000; got = true; - } - else if (!_tcscmp(ab->label, _T("bogo"))) { + } else if (!_tcscmp(ab->label, _T("bogo"))) { start = 0x00C00000; got = true; if (currprefs.bogomem_size <= 0x100000) barrier = true; - } - else if (!_tcscmp(ab->label, _T("custmem1"))) { + } else if (!_tcscmp(ab->label, _T("custmem1"))) { start = currprefs.custom_memory_addrs[0]; got = true; - } - else if (!_tcscmp(ab->label, _T("custmem2"))) { + } else if (!_tcscmp(ab->label, _T("custmem2"))) { start = currprefs.custom_memory_addrs[1]; got = true; - } - else if (!_tcscmp(ab->label, _T("hrtmem"))) { + } else if (!_tcscmp(ab->label, _T("hrtmem"))) { start = 0x00a10000; got = true; - } - else if (!_tcscmp(ab->label, _T("arhrtmon"))) { + } else if (!_tcscmp(ab->label, _T("arhrtmon"))) { start = 0x00800000; barrier = true; got = true; - } - else if (!_tcscmp(ab->label, _T("xpower_e2"))) { + } else if (!_tcscmp(ab->label, _T("xpower_e2"))) { start = 0x00e20000; barrier = true; got = true; - } - else if (!_tcscmp(ab->label, _T("xpower_f2"))) { + } else if (!_tcscmp(ab->label, _T("xpower_f2"))) { start = 0x00f20000; barrier = true; got = true; - } - else if (!_tcscmp(ab->label, _T("nordic_f0"))) { + } else if (!_tcscmp(ab->label, _T("nordic_f0"))) { start = 0x00f00000; barrier = true; got = true; - } - else if (!_tcscmp(ab->label, _T("nordic_f4"))) { + } else if (!_tcscmp(ab->label, _T("nordic_f4"))) { start = 0x00f40000; barrier = true; got = true; - } - else if (!_tcscmp(ab->label, _T("nordic_f6"))) { + } else if (!_tcscmp(ab->label, _T("nordic_f6"))) { start = 0x00f60000; barrier = true; got = true; - } - else if (!_tcscmp(ab->label, _T("superiv_b0"))) { + } else if (!_tcscmp(ab->label, _T("superiv_b0"))) { start = 0x00b00000; barrier = true; got = true; - } - else if (!_tcscmp(ab->label, _T("superiv_d0"))) { + } else if (!_tcscmp(ab->label, _T("superiv_d0"))) { start = 0x00d00000; barrier = true; got = true; - } - else if (!_tcscmp(ab->label, _T("superiv_e0"))) { + } else if (!_tcscmp(ab->label, _T("superiv_e0"))) { start = 0x00e00000; barrier = true; got = true; - } - else if (!_tcscmp(ab->label, _T("ram_a8"))) { + } else if (!_tcscmp(ab->label, _T("ram_a8"))) { start = 0x00a80000; barrier = true; got = true; @@ -396,8 +358,8 @@ bool uae_mman_info(addrbank *ab, struct uae_mman_data *md) return got; } -ATTRIBUTE_NO_SANITIZE_ADDRESS -bool mapped_malloc(addrbank *ab) + +bool mapped_malloc (addrbank *ab) { if (ab->allocated_size) { write_log(_T("mapped_malloc with memory bank '%s' already allocated!?\n"), ab->name); @@ -415,7 +377,7 @@ bool mapped_malloc(addrbank *ab) uaecptr start = ab->start; if (uae_mman_info(ab, &md)) { start = md.start; - ab->baseaddr = natmem_offset + start; + ab->baseaddr = regs.natmem_offset + start; } if (ab->baseaddr) { @@ -424,54 +386,56 @@ bool mapped_malloc(addrbank *ab) put_long_host(ab->baseaddr + ab->reserved_size, 0x4afc4afc); } ab->allocated_size = ab->reserved_size; - write_log("mapped_malloc(): 0x%08x - 0x%08x (0x%08x - 0x%08x) -> %s (%s)\n", - ab->baseaddr - natmem_offset, ab->baseaddr - natmem_offset + ab->allocated_size, - ab->baseaddr, ab->baseaddr + ab->allocated_size, ab->name, ab->label); + write_log("mapped_malloc(): 0x%08x - 0x%08x (0x%08x - 0x%08x) -> %s (%s)\n", + ab->baseaddr - regs.natmem_offset, ab->baseaddr - regs.natmem_offset + ab->allocated_size, + ab->baseaddr, ab->baseaddr + ab->allocated_size, ab->name, ab->label); } - ab->flags |= ABFLAG_DIRECTMAP; - - return (ab->baseaddr != nullptr); + ab->flags |= ABFLAG_DIRECTMAP; + + return (ab->baseaddr != NULL); } -ATTRIBUTE_NO_SANITIZE_ADDRESS -void mapped_free(addrbank *ab) + +void mapped_free (addrbank *ab) { - if (ab->label != nullptr && !strcmp(ab->label, "filesys") && ab->baseaddr != nullptr) { - free(ab->baseaddr); - write_log("mapped_free(): 0x%08x - 0x%08x (0x%08x - 0x%08x) -> %s (%s)\n", - ab->baseaddr - natmem_offset, ab->baseaddr - natmem_offset + ab->allocated_size, - ab->baseaddr, ab->baseaddr + ab->allocated_size, ab->name, ab->label); - } - ab->baseaddr = nullptr; - ab->allocated_size = 0; + if(ab->label != NULL && !strcmp(ab->label, "filesys") && ab->baseaddr != NULL) { + free(ab->baseaddr); + write_log("mapped_free(): 0x%08x - 0x%08x (0x%08x - 0x%08x) -> %s (%s)\n", + ab->baseaddr - regs.natmem_offset, ab->baseaddr - regs.natmem_offset + ab->allocated_size, + ab->baseaddr, ab->baseaddr + ab->allocated_size, ab->name, ab->label); + } + ab->baseaddr = NULL; + ab->allocated_size = 0; } -void protect_roms(bool protect) -{ - /* - If this code is enabled, we can't switch back from JIT to nonJIT emulation... +void protect_roms (bool protect) +{ +/* + If this code is enabled, we can't switch back from JIT to nonJIT emulation... + if (protect) { - // protect only if JIT enabled, always allow unprotect - if (!currprefs.cachesize) - return; + // protect only if JIT enabled, always allow unprotect + if (!currprefs.cachesize) + return; } - // Protect all regions, which contains ROM - if(extendedkickmem_bank.baseaddr != NULL) - mprotect(extendedkickmem_bank.baseaddr, 0x80000, protect ? PROT_READ : PROT_READ | PROT_WRITE); - if(extendedkickmem2_bank.baseaddr != NULL) - mprotect(extendedkickmem2_bank.baseaddr, 0x80000, protect ? PROT_READ : PROT_READ | PROT_WRITE); - if(kickmem_bank.baseaddr != NULL) - mprotect(kickmem_bank.baseaddr, 0x80000, protect ? PROT_READ : PROT_READ | PROT_WRITE); - if(rtarea != NULL) - mprotect(rtarea, RTAREA_SIZE, protect ? PROT_READ : PROT_READ | PROT_WRITE); - if(filesysory != NULL) - mprotect(filesysory, 0x10000, protect ? PROT_READ : PROT_READ | PROT_WRITE); - */ + // Protect all regions, which contains ROM + if(extendedkickmem_bank.baseaddr != NULL) + mprotect(extendedkickmem_bank.baseaddr, 0x80000, protect ? PROT_READ : PROT_READ | PROT_WRITE); + if(extendedkickmem2_bank.baseaddr != NULL) + mprotect(extendedkickmem2_bank.baseaddr, 0x80000, protect ? PROT_READ : PROT_READ | PROT_WRITE); + if(kickmem_bank.baseaddr != NULL) + mprotect(kickmem_bank.baseaddr, 0x80000, protect ? PROT_READ : PROT_READ | PROT_WRITE); + if(rtarea != NULL) + mprotect(rtarea, RTAREA_SIZE, protect ? PROT_READ : PROT_READ | PROT_WRITE); + if(filesysory != NULL) + mprotect(filesysory, 0x10000, protect ? PROT_READ : PROT_READ | PROT_WRITE); +*/ } -static int doinit_shm() + +static int doinit_shm (void) { expansion_scan_autoconfig(&currprefs, true); @@ -484,17 +448,17 @@ static uae_u32 ofastmem_size[MAX_RAM_BOARDS]; static uae_u32 ortgmem_size[MAX_RTG_BOARDS]; static int ortgmem_type[MAX_RTG_BOARDS]; -bool init_shm() +bool init_shm (void) { - auto changed = false; + bool changed = false; - for (auto i = 0; i < MAX_RAM_BOARDS; i++) { + for (int i = 0; i < MAX_RAM_BOARDS; i++) { if (oz3fastmem_size[i] != changed_prefs.z3fastmem[i].size) changed = true; if (ofastmem_size[i] != changed_prefs.fastmem[i].size) changed = true; } - for (auto i = 0; i < MAX_RTG_BOARDS; i++) { + for (int i = 0; i < MAX_RTG_BOARDS; i++) { if (ortgmem_size[i] != changed_prefs.rtgboards[i].rtgmem_size) changed = true; if (ortgmem_type[i] != changed_prefs.rtgboards[i].rtgmem_type) @@ -503,18 +467,18 @@ bool init_shm() if (!changed) return true; - for (auto i = 0; i < MAX_RAM_BOARDS; i++) { + for (int i = 0; i < MAX_RAM_BOARDS;i++) { oz3fastmem_size[i] = changed_prefs.z3fastmem[i].size; ofastmem_size[i] = changed_prefs.fastmem[i].size; } - for (auto i = 0; i < MAX_RTG_BOARDS; i++) { + for (int i = 0; i < MAX_RTG_BOARDS; i++) { ortgmem_size[i] = changed_prefs.rtgboards[i].rtgmem_size; ortgmem_type[i] = changed_prefs.rtgboards[i].rtgmem_type; } - if (doinit_shm() < 0) + if (doinit_shm () < 0) return false; - memory_hardreset(2); + memory_hardreset (2); return true; } diff --git a/src/osdep/gui/PanelCPU.cpp b/src/osdep/gui/PanelCPU.cpp index 49e848af..e709da1b 100644 --- a/src/osdep/gui/PanelCPU.cpp +++ b/src/osdep/gui/PanelCPU.cpp @@ -418,8 +418,14 @@ void RefreshPanelCPU() optFPUinternal->setEnabled(changed_prefs.cpu_model == 68040); chkFPUstrict->setSelected(changed_prefs.fpu_strict); - chkFPUJIT->setSelected(changed_prefs.compfpu); - chkFPUJIT->setEnabled(changed_prefs.cachesize > 0); + +#ifdef USE_JIT_FPU + chkFPUJIT->setSelected(changed_prefs.compfpu); + chkFPUJIT->setEnabled(changed_prefs.cachesize > 0); +#else + chkFPUJIT->setSelected(false); + chkFPUJIT->setEnabled(false); +#endif if (changed_prefs.m68k_speed == M68K_SPEED_7MHZ_CYCLES) opt7Mhz->setSelected(true); diff --git a/src/osdep/picasso96.cpp b/src/osdep/picasso96.cpp index 0672163a..c8d808f4 100644 --- a/src/osdep/picasso96.cpp +++ b/src/osdep/picasso96.cpp @@ -314,8 +314,6 @@ static void ShowSupportedResolutions (void) #endif -extern uae_u8 *natmem_offset; - static uae_u8 GetBytesPerPixel (uae_u32 RGBfmt) { switch (RGBfmt) @@ -596,7 +594,7 @@ static bool rtg_render (void) bool flushed = false; if (!doskip ()) - flushed = picasso_flushpixels (gfxmem_banks[0]->start + natmem_offset, picasso96_state.XYOffset - gfxmem_banks[0]->start); + flushed = picasso_flushpixels (gfxmem_banks[0]->start + regs.natmem_offset, picasso96_state.XYOffset - gfxmem_banks[0]->start); return flushed; } diff --git a/src/osdep/sigsegv_handler.cpp b/src/osdep/sigsegv_handler.cpp index 10a15c7b..5fb5f49e 100644 --- a/src/osdep/sigsegv_handler.cpp +++ b/src/osdep/sigsegv_handler.cpp @@ -1,28 +1,28 @@ /* -* sigsegv_linux_arm.cpp - x86_64 Linux SIGSEGV handler -* -* Copyright (c) 2014 Jens Heitmann ARAnyM dev team (see AUTHORS) -* -* Inspired by Bernie Meyer's UAE-JIT and Gwenole Beauchesne's Basilisk II-JIT -* -* This file is part of the ARAnyM project which builds a new and powerful -* TOS/FreeMiNT compatible virtual machine running on almost any hardware. -* -* ARAnyM is free software; you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation; either version 2 of the License, or -* (at your option) any later version. -* -* ARAnyM is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with ARAnyM; if not, write to the Free Software -* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -* -*/ + * sigsegv_linux_arm.cpp - x86_64 Linux SIGSEGV handler + * + * Copyright (c) 2014 Jens Heitmann ARAnyM dev team (see AUTHORS) + * + * Inspired by Bernie Meyer's UAE-JIT and Gwenole Beauchesne's Basilisk II-JIT + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ #include "sysconfig.h" #include "sysdeps.h" @@ -78,27 +78,27 @@ enum type_size_t { SIZE_INT }; -enum style_type_t { - STYLE_SIGNED, - STYLE_UNSIGNED +enum style_type_t { + STYLE_SIGNED, + STYLE_UNSIGNED }; static int in_handler = 0; -static int max_signals = 200; +static int max_signals = 200; void init_max_signals(void) { #ifdef WITH_LOGGING - max_signals = 20; + max_signals = 20; #else - max_signals = 200; + max_signals = 200; #endif } enum { - ARM_REG_PC = 15, - ARM_REG_CPSR = 16 + ARM_REG_PC = 15, + ARM_REG_CPSR = 16 }; static const char * reg_names[] = { @@ -110,7 +110,7 @@ static const char * reg_names[] = { static int delete_trigger(blockinfo *bi, void *pc) { while (bi) { - if (bi->handler && (uae_u8*)bi->direct_handler <= pc && (uae_u8*)bi->nexthandler > pc) { + if (bi->handler && (uae_u8*)bi->direct_handler <= pc && (uae_u8*)bi->nexthandler > pc) { output_log(_T("JIT: Deleted trigger (0x%08x < 0x%08x < 0x%08x) 0x%08x\n"), bi->handler, pc, bi->nexthandler, bi->pc_p); invalidate_block(bi); @@ -130,183 +130,182 @@ static int delete_trigger(blockinfo *bi, void *pc) #define HANDLE_EXCEPTION_A4000RAM 2 -static int handle_exception(unsigned long *pregs, uintptr fault_addr) +static int handle_exception(unsigned long *pregs, uintptr fault_addr) { - int handled = HANDLE_EXCEPTION_NONE; + int handled = HANDLE_EXCEPTION_NONE; unsigned int *fault_pc = (unsigned int *)pregs[ARM_REG_PC]; - + if (fault_pc == 0) { - output_log(_T("PC is NULL.\n")); - return HANDLE_EXCEPTION_NONE; - } + output_log(_T("PC is NULL.\n")); + return HANDLE_EXCEPTION_NONE; + } // Check for exception in handler if (in_handler > 0) { - output_log(_T("Segmentation fault in handler.\n")); - return HANDLE_EXCEPTION_NONE; - } - ++in_handler; + output_log(_T("Segmentation fault in handler.\n")); + return HANDLE_EXCEPTION_NONE; + } + ++in_handler; - for (;;) { - // We analyse only exceptions from JIT - if (currprefs.cachesize == 0) { - output_log(_T("JIT not in use.\n")); - break; - } + for(;;) { + // We analyse only exceptions from JIT + if(currprefs.cachesize == 0) { + output_log(_T("JIT not in use.\n")); + break; + } - // Did the error happens in compiled code? - if ((uae_u8*)fault_pc >= compiled_code && (uae_u8*)fault_pc < current_compile_p) - output_log(_T("Error in compiled code.\n")); - else if ((uae_u8*)fault_pc >= popallspace && (uae_u8*)fault_pc < popallspace + POPALLSPACE_SIZE) - output_log(_T("Error in popallspace code.\n")); - else { - output_log(_T("Error not in JIT code.\n")); - break; - } + // Did the error happens in compiled code? + if ((uae_u8*)fault_pc >= compiled_code && (uae_u8*)fault_pc < current_compile_p) + output_log(_T("Error in compiled code.\n")); + else if((uae_u8*)fault_pc >= popallspace && (uae_u8*)fault_pc < popallspace + POPALLSPACE_SIZE) + output_log(_T("Error in popallspace code.\n")); + else { + output_log(_T("Error not in JIT code.\n")); + break; + } - // Get Amiga address of illegal memory address - uintptr amiga_addr = (uae_u32)fault_addr - (uae_u32)natmem_offset; - - // Check for stupid RAM detection of kickstart - if (a3000lmem_bank.allocated_size > 0 && amiga_addr >= a3000lmem_bank.start - 0x00100000 && amiga_addr < a3000lmem_bank.start - 0x00100000 + 8) { - output_log(_T(" Stupid kickstart detection for size of ramsey_low at 0x%08x.\n"), amiga_addr); - pregs[ARM_REG_PC] += 4; - handled = HANDLE_EXCEPTION_A4000RAM; - break; - } - - // Check for stupid RAM detection of kickstart - if (a3000hmem_bank.allocated_size > 0 && amiga_addr >= a3000hmem_bank.start + a3000hmem_bank.allocated_size && amiga_addr < a3000hmem_bank.start + a3000hmem_bank.allocated_size + 8) { - output_log(_T(" Stupid kickstart detection for size of ramsey_high at 0x%08x.\n"), amiga_addr); - pregs[ARM_REG_PC] += 4; - handled = HANDLE_EXCEPTION_A4000RAM; - break; - } - - // Get memory bank of address - addrbank *ab = &get_mem_bank(amiga_addr); - if (ab) - output_log(_T("JIT: Address bank: %s, address %08x\n"), ab->name ? ab->name : _T("NONE"), amiga_addr); - - // Analyse ARM instruction - const unsigned int opcode = fault_pc[0]; - transfer_type_t transfer_type = TYPE_UNKNOWN; - int transfer_size = SIZE_UNKNOWN; - int style = STYLE_UNSIGNED; + // Get Amiga address of illegal memory address + uintptr amiga_addr = (uae_u32) fault_addr - (uae_u32) regs.natmem_offset; + + // Check for stupid RAM detection of kickstart + if(a3000lmem_bank.allocated_size > 0 && amiga_addr >= a3000lmem_bank.start - 0x00100000 && amiga_addr < a3000lmem_bank.start - 0x00100000 + 8) { + output_log(_T(" Stupid kickstart detection for size of ramsey_low at 0x%08x.\n"), amiga_addr); + pregs[ARM_REG_PC] += 4; + handled = HANDLE_EXCEPTION_A4000RAM; + break; + } + + // Check for stupid RAM detection of kickstart + if(a3000hmem_bank.allocated_size > 0 && amiga_addr >= a3000hmem_bank.start + a3000hmem_bank.allocated_size && amiga_addr < a3000hmem_bank.start + a3000hmem_bank.allocated_size + 8) { + output_log(_T(" Stupid kickstart detection for size of ramsey_high at 0x%08x.\n"), amiga_addr); + pregs[ARM_REG_PC] += 4; + handled = HANDLE_EXCEPTION_A4000RAM; + break; + } + + // Get memory bank of address + addrbank *ab = &get_mem_bank(amiga_addr); + if (ab) + output_log(_T("JIT: Address bank: %s, address %08x\n"), ab->name ? ab->name : _T("NONE"), amiga_addr); + + // Analyse ARM instruction + const unsigned int opcode = fault_pc[0]; + transfer_type_t transfer_type = TYPE_UNKNOWN; + int transfer_size = SIZE_UNKNOWN; + int style = STYLE_UNSIGNED; output_log(_T("JIT: ARM opcode = 0x%08x\n"), opcode); + + // Handle load/store instructions only + switch ((opcode >> 25) & 7) { + case 0: // Halfword and Signed Data Transfer (LDRH, STRH, LDRSB, LDRSH) + // Determine transfer size (S/H bits) + switch ((opcode >> 5) & 3) { + case 0: // SWP instruction + output_log(_T("ARM: SWP Instruction, not supported (0x%08x)\n"), opcode); + break; + case 1: // Unsigned halfwords + transfer_size = SIZE_WORD; + break; + case 3: // Signed halfwords + style = STYLE_SIGNED; + transfer_size = SIZE_WORD; + break; + case 2: // Signed byte + style = STYLE_SIGNED; + transfer_size = SIZE_BYTE; + break; + } + break; - // Handle load/store instructions only - switch ((opcode >> 25) & 7) { - case 0: // Halfword and Signed Data Transfer (LDRH, STRH, LDRSB, LDRSH) - // Determine transfer size (S/H bits) - switch ((opcode >> 5) & 3) { - case 0: // SWP instruction - output_log(_T("ARM: SWP Instruction, not supported (0x%08x)\n"), opcode); - break; - case 1: // Unsigned halfwords - transfer_size = SIZE_WORD; - break; - case 3: // Signed halfwords - style = STYLE_SIGNED; - transfer_size = SIZE_WORD; - break; - case 2: // Signed byte - style = STYLE_SIGNED; - transfer_size = SIZE_BYTE; - break; - } - break; + case 2: + case 3: // Single Data Transfer (LDR, STR) + style = STYLE_UNSIGNED; + // Determine transfer size (B bit) + if (((opcode >> 22) & 1) == 1) + transfer_size = SIZE_BYTE; + else + transfer_size = SIZE_INT; + break; - case 2: - case 3: // Single Data Transfer (LDR, STR) - style = STYLE_UNSIGNED; - // Determine transfer size (B bit) - if (((opcode >> 22) & 1) == 1) - transfer_size = SIZE_BYTE; - else - transfer_size = SIZE_INT; - break; + default: + output_log(_T("ARM: Handling of instruction 0x%08x not supported.\n"), opcode); + } - default: - output_log(_T("ARM: Handling of instruction 0x%08x not supported.\n"), opcode); - } + // Determine transfer type (L bit) + if (((opcode >> 20) & 1) == 1) + transfer_type = TYPE_LOAD; + else + transfer_type = TYPE_STORE; - // Determine transfer type (L bit) - if (((opcode >> 20) & 1) == 1) - transfer_type = TYPE_LOAD; - else - transfer_type = TYPE_STORE; + // Get ARM register + int rd = (opcode >> 12) & 0xf; - // Get ARM register - int rd = (opcode >> 12) & 0xf; + output_log(_T("%s %s register %s\n"), + transfer_size == SIZE_BYTE ? _T("byte") : transfer_size == SIZE_WORD ? _T("word") : transfer_size == SIZE_INT ? _T("long") : _T("unknown"), + transfer_type == TYPE_LOAD ? _T("load to") : _T("store from"), + reg_names[rd]); + + if (transfer_size != SIZE_UNKNOWN) { + if (transfer_type == TYPE_LOAD) { + // Perform load via indirect memory call + uae_u32 oldval = pregs[rd]; + switch(transfer_size) { + case SIZE_BYTE: + pregs[rd] = style == STYLE_SIGNED ? (uae_s8)get_byte(amiga_addr) : (uae_u8)get_byte(amiga_addr); + break; - output_log(_T("%s %s register %s\n"), - transfer_size == SIZE_BYTE ? _T("byte") : transfer_size == SIZE_WORD ? _T("word") : transfer_size == SIZE_INT ? _T("long") : _T("unknown"), - transfer_type == TYPE_LOAD ? _T("load to") : _T("store from"), - reg_names[rd]); + case SIZE_WORD: + pregs[rd] = do_byteswap_16(style == STYLE_SIGNED ? (uae_s16)get_word(amiga_addr) : (uae_u16)get_word(amiga_addr)); + break; - if (transfer_size != SIZE_UNKNOWN) { - if (transfer_type == TYPE_LOAD) { - // Perform load via indirect memory call - uae_u32 oldval = pregs[rd]; - switch (transfer_size) { - case SIZE_BYTE: - pregs[rd] = style == STYLE_SIGNED ? (uae_s8)get_byte(amiga_addr) : (uae_u8)get_byte(amiga_addr); - break; + case SIZE_INT: + pregs[rd] = do_byteswap_32(get_long(amiga_addr)); + break; + } + output_log(_T("New value in %s: 0x%08x (old: 0x%08x)\n"), reg_names[rd], pregs[rd], oldval); + } else { + // Perform store via indirect memory call + switch(transfer_size) { + case SIZE_BYTE: { + put_byte(amiga_addr, pregs[rd]); + break; + } + case SIZE_WORD: { + put_word(amiga_addr, do_byteswap_16(pregs[rd])); + break; + } + case SIZE_INT: { + put_long(amiga_addr, do_byteswap_32(pregs[rd])); + break; + } + } + output_log(_T("Stored value from %s to 0x%08x\n"), reg_names[rd], amiga_addr); + } + + // Go to next instruction + pregs[ARM_REG_PC] += 4; + handled = HANDLE_EXCEPTION_OK; + + if (!delete_trigger(active, fault_pc)) { + /* Not found in the active list. Might be a rom routine that + * is in the dormant list */ + delete_trigger(dormant, fault_pc); + } + } + + break; + } - case SIZE_WORD: - pregs[rd] = do_byteswap_16(style == STYLE_SIGNED ? (uae_s16)get_word(amiga_addr) : (uae_u16)get_word(amiga_addr)); - break; - - case SIZE_INT: - pregs[rd] = do_byteswap_32(get_long(amiga_addr)); - break; - } - output_log(_T("New value in %s: 0x%08x (old: 0x%08x)\n"), reg_names[rd], pregs[rd], oldval); - } - else { - // Perform store via indirect memory call - switch (transfer_size) { - case SIZE_BYTE: { - put_byte(amiga_addr, pregs[rd]); - break; - } - case SIZE_WORD: { - put_word(amiga_addr, do_byteswap_16(pregs[rd])); - break; - } - case SIZE_INT: { - put_long(amiga_addr, do_byteswap_32(pregs[rd])); - break; - } - } - output_log(_T("Stored value from %s to 0x%08x\n"), reg_names[rd], amiga_addr); - } - - // Go to next instruction - pregs[ARM_REG_PC] += 4; - handled = HANDLE_EXCEPTION_OK; - - if (!delete_trigger(active, fault_pc)) { - /* Not found in the active list. Might be a rom routine that - * is in the dormant list */ - delete_trigger(dormant, fault_pc); - } - } - - break; - } - - in_handler--; + in_handler--; return handled; -} +} -void signal_segv(int signum, siginfo_t* info, void*ptr) +void signal_segv(int signum, siginfo_t* info, void*ptr) { - ucontext_t *ucontext = (ucontext_t*)ptr; - Dl_info dlinfo; + ucontext_t *ucontext = (ucontext_t*)ptr; + Dl_info dlinfo; - output_log(_T("--- New exception ---\n")); + output_log(_T("--- New exception ---\n")); #ifdef TRACER trace_end(); @@ -316,121 +315,121 @@ void signal_segv(int signum, siginfo_t* info, void*ptr) unsigned long *regs = &context->arm_r0; uintptr addr = (uintptr)info->si_addr; - int handled = handle_exception(regs, addr); + int handled = handle_exception(regs, addr); #if SHOW_DETAILS - if (handled != HANDLE_EXCEPTION_A4000RAM) { - if (signum == 4) - output_log(_T("Illegal Instruction\n")); - else - output_log(_T("Segmentation Fault\n")); - - output_log(_T("info.si_signo = %d\n"), signum); - output_log(_T("info.si_errno = %d\n"), info->si_errno); - output_log(_T("info.si_code = %d\n"), info->si_code); - output_log(_T("info.si_addr = %08x\n"), info->si_addr); - if (signum == 4) - output_log(_T(" value = 0x%08x\n"), *((uae_u32*)(info->si_addr))); - output_log(_T("r0 = 0x%08x\n"), ucontext->uc_mcontext.arm_r0); - output_log(_T("r1 = 0x%08x\n"), ucontext->uc_mcontext.arm_r1); - output_log(_T("r2 = 0x%08x\n"), ucontext->uc_mcontext.arm_r2); - output_log(_T("r3 = 0x%08x\n"), ucontext->uc_mcontext.arm_r3); - output_log(_T("r4 = 0x%08x\n"), ucontext->uc_mcontext.arm_r4); - output_log(_T("r5 = 0x%08x\n"), ucontext->uc_mcontext.arm_r5); - output_log(_T("r6 = 0x%08x\n"), ucontext->uc_mcontext.arm_r6); - output_log(_T("r7 = 0x%08x\n"), ucontext->uc_mcontext.arm_r7); - output_log(_T("r8 = 0x%08x\n"), ucontext->uc_mcontext.arm_r8); - output_log(_T("r9 = 0x%08x\n"), ucontext->uc_mcontext.arm_r9); - output_log(_T("r10 = 0x%08x\n"), ucontext->uc_mcontext.arm_r10); - output_log(_T("FP = 0x%08x\n"), ucontext->uc_mcontext.arm_fp); - output_log(_T("IP = 0x%08x\n"), ucontext->uc_mcontext.arm_ip); - output_log(_T("SP = 0x%08x\n"), ucontext->uc_mcontext.arm_sp); - output_log(_T("LR = 0x%08x\n"), ucontext->uc_mcontext.arm_lr); - output_log(_T("PC = 0x%08x\n"), ucontext->uc_mcontext.arm_pc); - output_log(_T("CPSR = 0x%08x\n"), ucontext->uc_mcontext.arm_cpsr); - output_log(_T("Fault Address = 0x%08x\n"), ucontext->uc_mcontext.fault_address); - output_log(_T("Trap no = 0x%08x\n"), ucontext->uc_mcontext.trap_no); - output_log(_T("Err Code = 0x%08x\n"), ucontext->uc_mcontext.error_code); - output_log(_T("Old Mask = 0x%08x\n"), ucontext->uc_mcontext.oldmask); - - void *getaddr = (void *)ucontext->uc_mcontext.arm_lr; - if (dladdr(getaddr, &dlinfo)) - output_log(_T("LR - 0x%08X: <%s> (%s)\n"), getaddr, dlinfo.dli_sname, dlinfo.dli_fname); - else - output_log(_T("LR - 0x%08X: symbol not found\n"), getaddr); - } + if(handled != HANDLE_EXCEPTION_A4000RAM) { + if(signum == 4) + output_log(_T("Illegal Instruction\n")); + else + output_log(_T("Segmentation Fault\n")); + + output_log(_T("info.si_signo = %d\n"), signum); + output_log(_T("info.si_errno = %d\n"), info->si_errno); + output_log(_T("info.si_code = %d\n"), info->si_code); + output_log(_T("info.si_addr = %08x\n"), info->si_addr); + if(signum == 4) + output_log(_T(" value = 0x%08x\n"), *((uae_u32*)(info->si_addr))); + output_log(_T("r0 = 0x%08x\n"), ucontext->uc_mcontext.arm_r0); + output_log(_T("r1 = 0x%08x\n"), ucontext->uc_mcontext.arm_r1); + output_log(_T("r2 = 0x%08x\n"), ucontext->uc_mcontext.arm_r2); + output_log(_T("r3 = 0x%08x\n"), ucontext->uc_mcontext.arm_r3); + output_log(_T("r4 = 0x%08x\n"), ucontext->uc_mcontext.arm_r4); + output_log(_T("r5 = 0x%08x\n"), ucontext->uc_mcontext.arm_r5); + output_log(_T("r6 = 0x%08x\n"), ucontext->uc_mcontext.arm_r6); + output_log(_T("r7 = 0x%08x\n"), ucontext->uc_mcontext.arm_r7); + output_log(_T("r8 = 0x%08x\n"), ucontext->uc_mcontext.arm_r8); + output_log(_T("r9 = 0x%08x\n"), ucontext->uc_mcontext.arm_r9); + output_log(_T("r10 = 0x%08x\n"), ucontext->uc_mcontext.arm_r10); + output_log(_T("FP = 0x%08x\n"), ucontext->uc_mcontext.arm_fp); + output_log(_T("IP = 0x%08x\n"), ucontext->uc_mcontext.arm_ip); + output_log(_T("SP = 0x%08x\n"), ucontext->uc_mcontext.arm_sp); + output_log(_T("LR = 0x%08x\n"), ucontext->uc_mcontext.arm_lr); + output_log(_T("PC = 0x%08x\n"), ucontext->uc_mcontext.arm_pc); + output_log(_T("CPSR = 0x%08x\n"), ucontext->uc_mcontext.arm_cpsr); + output_log(_T("Fault Address = 0x%08x\n"), ucontext->uc_mcontext.fault_address); + output_log(_T("Trap no = 0x%08x\n"), ucontext->uc_mcontext.trap_no); + output_log(_T("Err Code = 0x%08x\n"), ucontext->uc_mcontext.error_code); + output_log(_T("Old Mask = 0x%08x\n"), ucontext->uc_mcontext.oldmask); + + void *getaddr = (void *)ucontext->uc_mcontext.arm_lr; + if(dladdr(getaddr, &dlinfo)) + output_log(_T("LR - 0x%08X: <%s> (%s)\n"), getaddr, dlinfo.dli_sname, dlinfo.dli_fname); + else + output_log(_T("LR - 0x%08X: symbol not found\n"), getaddr); + } #endif #if SHOW_DETAILS > 1 - if (handled != HANDLE_EXCEPTION_A4000RAM) { - output_log(_T("Stack trace:\n")); - -#define MAX_BACKTRACE 20 - - void *array[MAX_BACKTRACE]; - int size = backtrace(array, MAX_BACKTRACE); - for (int i = 0; i (%s)\n"), array[i], symname, - (unsigned long)array[i] - (unsigned long)dlinfo.dli_saddr, dlinfo.dli_fname); - } - } - - void *ip = (void*)ucontext->uc_mcontext.arm_r10; - void **bp = (void**)ucontext->uc_mcontext.arm_r10; - int f = 0; - while (bp && ip) { - if (!dladdr(ip, &dlinfo)) { - output_log(_T("IP out of range\n")); - break; - } - const char *symname = dlinfo.dli_sname; - output_log(_T("%02d: 0x%08x <%s + 0x%08x> (%s)\n"), ++f, ip, symname, - (unsigned long)ip - (unsigned long)dlinfo.dli_saddr, dlinfo.dli_fname); - if (dlinfo.dli_sname && !strcmp(dlinfo.dli_sname, "main")) - break; - ip = bp[1]; - bp = (void**)bp[0]; - } - - output_log(_T("Stack trace (non-dedicated):\n")); - char **strings; - void *bt[100]; - int sz = backtrace(bt, 100); - strings = backtrace_symbols(bt, sz); - for (int i = 0; i < sz; ++i) - output_log(_T("%s\n"), strings[i]); - output_log(_T("End of stack trace.\n")); - } + if(handled != HANDLE_EXCEPTION_A4000RAM) { + output_log(_T("Stack trace:\n")); + + #define MAX_BACKTRACE 20 + + void *array[MAX_BACKTRACE]; + int size = backtrace(array, MAX_BACKTRACE); + for(int i=0; i (%s)\n"), array[i], symname, + (unsigned long)array[i] - (unsigned long)dlinfo.dli_saddr, dlinfo.dli_fname); + } + } + + void *ip = (void*)ucontext->uc_mcontext.arm_r10; + void **bp = (void**)ucontext->uc_mcontext.arm_r10; + int f = 0; + while(bp && ip) { + if (!dladdr(ip, &dlinfo)) { + output_log(_T("IP out of range\n")); + break; + } + const char *symname = dlinfo.dli_sname; + output_log(_T("%02d: 0x%08x <%s + 0x%08x> (%s)\n"), ++f, ip, symname, + (unsigned long)ip - (unsigned long)dlinfo.dli_saddr, dlinfo.dli_fname); + if(dlinfo.dli_sname && !strcmp(dlinfo.dli_sname, "main")) + break; + ip = bp[1]; + bp = (void**)bp[0]; + } + + output_log(_T("Stack trace (non-dedicated):\n")); + char **strings; + void *bt[100]; + int sz = backtrace(bt, 100); + strings = backtrace_symbols(bt, sz); + for(int i = 0; i < sz; ++i) + output_log(_T("%s\n"), strings[i]); + output_log(_T("End of stack trace.\n")); + } #endif - output_log(_T("--- end exception ---\n")); + output_log(_T("--- end exception ---\n")); - if (handled != HANDLE_EXCEPTION_A4000RAM) { - --max_signals; - if (max_signals <= 0) { - target_startup_msg(_T("Exception"), _T("Too many access violations. Please turn off JIT.")); - uae_restart(1, NULL); - return; - } - } + if (handled != HANDLE_EXCEPTION_A4000RAM) { + --max_signals; + if(max_signals <= 0) { + target_startup_msg(_T("Exception"), _T("Too many access violations. Please turn off JIT.")); + uae_restart(1, NULL); + return; + } + } if (handled != HANDLE_EXCEPTION_NONE) - return; + return; - SDL_Quit(); - exit(1); + SDL_Quit(); + exit(1); } -void signal_buserror(int signum, siginfo_t* info, void*ptr) +void signal_buserror(int signum, siginfo_t* info, void*ptr) { - ucontext_t *ucontext = (ucontext_t*)ptr; - Dl_info dlinfo; + ucontext_t *ucontext = (ucontext_t*)ptr; + Dl_info dlinfo; - output_log(_T("--- New exception ---\n")); + output_log(_T("--- New exception ---\n")); #ifdef TRACER trace_end(); @@ -440,96 +439,96 @@ void signal_buserror(int signum, siginfo_t* info, void*ptr) unsigned long *regs = &context->arm_r0; uintptr addr = (uintptr)info->si_addr; - output_log(_T("info.si_signo = %d\n"), signum); - output_log(_T("info.si_errno = %d\n"), info->si_errno); - output_log(_T("info.si_code = %d\n"), info->si_code); - output_log(_T("info.si_addr = %08x\n"), info->si_addr); - if (signum == 4) - output_log(_T(" value = 0x%08x\n"), *((uae_u32*)(info->si_addr))); - output_log(_T("r0 = 0x%08x\n"), ucontext->uc_mcontext.arm_r0); - output_log(_T("r1 = 0x%08x\n"), ucontext->uc_mcontext.arm_r1); - output_log(_T("r2 = 0x%08x\n"), ucontext->uc_mcontext.arm_r2); - output_log(_T("r3 = 0x%08x\n"), ucontext->uc_mcontext.arm_r3); - output_log(_T("r4 = 0x%08x\n"), ucontext->uc_mcontext.arm_r4); - output_log(_T("r5 = 0x%08x\n"), ucontext->uc_mcontext.arm_r5); - output_log(_T("r6 = 0x%08x\n"), ucontext->uc_mcontext.arm_r6); - output_log(_T("r7 = 0x%08x\n"), ucontext->uc_mcontext.arm_r7); - output_log(_T("r8 = 0x%08x\n"), ucontext->uc_mcontext.arm_r8); - output_log(_T("r9 = 0x%08x\n"), ucontext->uc_mcontext.arm_r9); - output_log(_T("r10 = 0x%08x\n"), ucontext->uc_mcontext.arm_r10); - output_log(_T("FP = 0x%08x\n"), ucontext->uc_mcontext.arm_fp); - output_log(_T("IP = 0x%08x\n"), ucontext->uc_mcontext.arm_ip); - output_log(_T("SP = 0x%08x\n"), ucontext->uc_mcontext.arm_sp); - output_log(_T("LR = 0x%08x\n"), ucontext->uc_mcontext.arm_lr); - output_log(_T("PC = 0x%08x\n"), ucontext->uc_mcontext.arm_pc); - output_log(_T("CPSR = 0x%08x\n"), ucontext->uc_mcontext.arm_cpsr); - output_log(_T("Fault Address = 0x%08x\n"), ucontext->uc_mcontext.fault_address); - output_log(_T("Trap no = 0x%08x\n"), ucontext->uc_mcontext.trap_no); - output_log(_T("Err Code = 0x%08x\n"), ucontext->uc_mcontext.error_code); - output_log(_T("Old Mask = 0x%08x\n"), ucontext->uc_mcontext.oldmask); + output_log(_T("info.si_signo = %d\n"), signum); + output_log(_T("info.si_errno = %d\n"), info->si_errno); + output_log(_T("info.si_code = %d\n"), info->si_code); + output_log(_T("info.si_addr = %08x\n"), info->si_addr); + if(signum == 4) + output_log(_T(" value = 0x%08x\n"), *((uae_u32*)(info->si_addr))); + output_log(_T("r0 = 0x%08x\n"), ucontext->uc_mcontext.arm_r0); + output_log(_T("r1 = 0x%08x\n"), ucontext->uc_mcontext.arm_r1); + output_log(_T("r2 = 0x%08x\n"), ucontext->uc_mcontext.arm_r2); + output_log(_T("r3 = 0x%08x\n"), ucontext->uc_mcontext.arm_r3); + output_log(_T("r4 = 0x%08x\n"), ucontext->uc_mcontext.arm_r4); + output_log(_T("r5 = 0x%08x\n"), ucontext->uc_mcontext.arm_r5); + output_log(_T("r6 = 0x%08x\n"), ucontext->uc_mcontext.arm_r6); + output_log(_T("r7 = 0x%08x\n"), ucontext->uc_mcontext.arm_r7); + output_log(_T("r8 = 0x%08x\n"), ucontext->uc_mcontext.arm_r8); + output_log(_T("r9 = 0x%08x\n"), ucontext->uc_mcontext.arm_r9); + output_log(_T("r10 = 0x%08x\n"), ucontext->uc_mcontext.arm_r10); + output_log(_T("FP = 0x%08x\n"), ucontext->uc_mcontext.arm_fp); + output_log(_T("IP = 0x%08x\n"), ucontext->uc_mcontext.arm_ip); + output_log(_T("SP = 0x%08x\n"), ucontext->uc_mcontext.arm_sp); + output_log(_T("LR = 0x%08x\n"), ucontext->uc_mcontext.arm_lr); + output_log(_T("PC = 0x%08x\n"), ucontext->uc_mcontext.arm_pc); + output_log(_T("CPSR = 0x%08x\n"), ucontext->uc_mcontext.arm_cpsr); + output_log(_T("Fault Address = 0x%08x\n"), ucontext->uc_mcontext.fault_address); + output_log(_T("Trap no = 0x%08x\n"), ucontext->uc_mcontext.trap_no); + output_log(_T("Err Code = 0x%08x\n"), ucontext->uc_mcontext.error_code); + output_log(_T("Old Mask = 0x%08x\n"), ucontext->uc_mcontext.oldmask); - void *getaddr = (void *)ucontext->uc_mcontext.arm_lr; - if (dladdr(getaddr, &dlinfo)) - output_log(_T("LR - 0x%08X: <%s> (%s)\n"), getaddr, dlinfo.dli_sname, dlinfo.dli_fname); - else - output_log(_T("LR - 0x%08X: symbol not found\n"), getaddr); + void *getaddr = (void *)ucontext->uc_mcontext.arm_lr; + if(dladdr(getaddr, &dlinfo)) + output_log(_T("LR - 0x%08X: <%s> (%s)\n"), getaddr, dlinfo.dli_sname, dlinfo.dli_fname); + else + output_log(_T("LR - 0x%08X: symbol not found\n"), getaddr); output_log(_T("Stack trace:\n")); -#define MAX_BACKTRACE 20 + #define MAX_BACKTRACE 20 + + void *array[MAX_BACKTRACE]; + int size = backtrace(array, MAX_BACKTRACE); + for(int i=0; i (%s)\n"), array[i], symname, + (unsigned long)array[i] - (unsigned long)dlinfo.dli_saddr, dlinfo.dli_fname); + } + } - void *array[MAX_BACKTRACE]; - int size = backtrace(array, MAX_BACKTRACE); - for (int i = 0; i (%s)\n"), array[i], symname, - (unsigned long)array[i] - (unsigned long)dlinfo.dli_saddr, dlinfo.dli_fname); - } - } - - void *ip = (void*)ucontext->uc_mcontext.arm_r10; - void **bp = (void**)ucontext->uc_mcontext.arm_r10; - int f = 0; - while (bp && ip) { - if (!dladdr(ip, &dlinfo)) { - output_log(_T("IP out of range\n")); - break; - } - const char *symname = dlinfo.dli_sname; - output_log(_T("%02d: 0x%08x <%s + 0x%08x> (%s)\n"), ++f, ip, symname, - (unsigned long)ip - (unsigned long)dlinfo.dli_saddr, dlinfo.dli_fname); - if (dlinfo.dli_sname && !strcmp(dlinfo.dli_sname, "main")) - break; - ip = bp[1]; - bp = (void**)bp[0]; - } + void *ip = (void*)ucontext->uc_mcontext.arm_r10; + void **bp = (void**)ucontext->uc_mcontext.arm_r10; + int f = 0; + while(bp && ip) { + if (!dladdr(ip, &dlinfo)) { + output_log(_T("IP out of range\n")); + break; + } + const char *symname = dlinfo.dli_sname; + output_log(_T("%02d: 0x%08x <%s + 0x%08x> (%s)\n"), ++f, ip, symname, + (unsigned long)ip - (unsigned long)dlinfo.dli_saddr, dlinfo.dli_fname); + if(dlinfo.dli_sname && !strcmp(dlinfo.dli_sname, "main")) + break; + ip = bp[1]; + bp = (void**)bp[0]; + } output_log(_T("Stack trace (non-dedicated):\n")); - char **strings; - void *bt[100]; - int sz = backtrace(bt, 100); - strings = backtrace_symbols(bt, sz); - for (int i = 0; i < sz; ++i) - output_log(_T("%s\n"), strings[i]); + char **strings; + void *bt[100]; + int sz = backtrace(bt, 100); + strings = backtrace_symbols(bt, sz); + for(int i = 0; i < sz; ++i) + output_log(_T("%s\n"), strings[i]); output_log(_T("End of stack trace.\n")); - output_log(_T("--- end exception ---\n")); + output_log(_T("--- end exception ---\n")); - SDL_Quit(); - exit(1); + SDL_Quit(); + exit(1); } -void signal_term(int signum, siginfo_t* info, void*ptr) +void signal_term(int signum, siginfo_t* info, void*ptr) { - output_log(_T("--- SIGTERM ---\n")); + output_log(_T("--- SIGTERM ---\n")); #ifdef TRACER trace_end(); #endif - SDL_Quit(); - exit(1); + SDL_Quit(); + exit(1); } diff --git a/src/osdep/sysconfig.h b/src/osdep/sysconfig.h index 2735c6ff..2b5471c4 100644 --- a/src/osdep/sysconfig.h +++ b/src/osdep/sysconfig.h @@ -16,7 +16,7 @@ #define AUTOCONFIG /* autoconfig support, fast ram, harddrives etc.. */ #define JIT /* JIT compiler support */ #define USE_JIT_FPU -/* #define NATMEM_OFFSET natmem_offset */ +/* #define NATMEM_OFFSET regs.natmem_offset */ /* #define CATWEASEL */ /* Catweasel MK2/3 support */ /* #define AHI */ /* AHI sound emulation */ /* #define ENFORCER */ /* UAE Enforcer */ diff --git a/src/osdep/target.h b/src/osdep/target.h index 05718903..7aef65c6 100644 --- a/src/osdep/target.h +++ b/src/osdep/target.h @@ -13,7 +13,12 @@ #define OPTIONSFILENAME "uaeconfig" -STATIC_INLINE FILE *uae_tfopen(const char *path, const char *mode) +#ifndef ARMV6T2 +#undef USE_JIT_FPU +#endif + + +STATIC_INLINE FILE *uae_tfopen(const TCHAR *path, const TCHAR *mode) { return fopen(path, mode); } @@ -27,7 +32,6 @@ extern int generic_main (int argc, char *argv[]); extern int emulating; -extern uae_u8* natmem_offset; extern int z3base_adr; extern unsigned long time_per_frame;