From 4b6ecbaab7b45817ada883a49ee94b5c7f897083 Mon Sep 17 00:00:00 2001 From: Dimitris Panokostas Date: Thu, 20 Apr 2017 13:30:02 +0200 Subject: [PATCH] Added arm_helper for RPI1 platforms, moved gperftools to DEBUG only --- Makefile | 16 +++++-- src/osdep/arm_helper.s | 97 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 4 deletions(-) create mode 100644 src/osdep/arm_helper.s diff --git a/Makefile b/Makefile index 4e2e133b..69a1d02f 100644 --- a/Makefile +++ b/Makefile @@ -38,16 +38,16 @@ DEFS += -DUSE_SDL MORE_CFLAGS += -Isrc -Isrc/osdep -Isrc/threaddep -Isrc/include -Isrc/guisan/include MORE_CFLAGS += -Wno-unused -Wno-format -DGCCCONSTFUNC="__attribute__((const))" MORE_CFLAGS += -fexceptions -fpermissive -MORE_CFLAGS += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free -LDFLAGS += -lpthread -lm -lz -lpng -lrt -lxml2 -lFLAC -lmpg123 -ldl -ltcmalloc +LDFLAGS += -lpthread -lm -lz -lpng -lrt -lxml2 -lFLAC -lmpg123 -ldl LDFLAGS += -lSDL2 -lSDL2_image -lSDL2_ttf -lguisan -L/opt/vc/lib -Lsrc/guisan/lib ifndef DEBUG MORE_CFLAGS += -Ofast -pipe -Wno-write-strings else MORE_CFLAGS += -g -DDEBUG -Wl,--export-dynamic -LDFLAGS += -lprofiler +MORE_CFLAGS += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free +LDFLAGS += -ltcmalloc -lprofiler endif ASFLAGS += $(CPU_FLAGS) @@ -183,7 +183,12 @@ OBJS = \ src/osdep/gui/Navigation.o OBJS += src/osdep/picasso96.o -OBJS += src/osdep/neon_helper.o + +ifeq ($(PLATFORM),rpi1) + OBJS += src/osdep/arm_helper.o +else + OBJS += src/osdep/neon_helper.o +endif OBJS += src/newcpu.o OBJS += src/newcpu_common.o @@ -201,6 +206,9 @@ OBJS += src/jit/compemu_support.o src/osdep/neon_helper.o: src/osdep/neon_helper.s $(CXX) $(CPU_FLAGS) -Wall -o src/osdep/neon_helper.o -c src/osdep/neon_helper.s +src/osdep/arm_helper.o: src/osdep/arm_helper.s + $(CXX) $(CPU_FLAGS) -Wall -o src/osdep/arm_helper.o -c src/osdep/arm_helper.s + $(PROG): $(OBJS) $(CXX) $(CXXFLAGS) -o $(PROG) $(OBJS) $(LDFLAGS) ifndef DEBUG diff --git a/src/osdep/arm_helper.s b/src/osdep/arm_helper.s new file mode 100644 index 00000000..9db26dd0 --- /dev/null +++ b/src/osdep/arm_helper.s @@ -0,0 +1,97 @@ +@ Some functions and tests to increase performance in drawing.cpp and custom.cpp + +.arm + +.global copy_screen_8bit +.global copy_screen_16bit_swap +.global copy_screen_32bit_to_16bit + +.text + +.align 8 + + +@---------------------------------------------------------------- +@ copy_screen_8bit +@ +@ r0: uae_u8 *dst +@ r1: uae_u8 *src +@ r2: int bytes always a multiple of 64: even number of lines, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280) +@ r3: uae_u32 *clut +@ +@ void copy_screen_8bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut); +@ +@---------------------------------------------------------------- +copy_screen_8bit: + stmdb sp!, {r4-r6, lr} +copy_screen_8bit_loop: + pld [r1, #192] + mov lr, #64 +copy_screen_8bit_loop_2: + ldr r4, [r1], #4 + and r5, r4, #255 + ldr r6, [r3, r5, lsl #2] + lsr r5, r4, #8 + and r5, r5, #255 + strh r6, [r0], #2 + ldr r6, [r3, r5, lsl #2] + lsr r5, r4, #16 + and r5, r5, #255 + strh r6, [r0], #2 + ldr r6, [r3, r5, lsl #2] + lsr r5, r4, #24 + strh r6, [r0], #2 + ldr r6, [r3, r5, lsl #2] + subs lr, lr, #4 + strh r6, [r0], #2 + bgt copy_screen_8bit_loop_2 + subs r2, r2, #64 + bgt copy_screen_8bit_loop + ldmia sp!, {r4-r6, pc} + + +@---------------------------------------------------------------- +@ copy_screen_16bit_swap +@ +@ r0: uae_u8 *dst +@ r1: uae_u8 *src +@ r2: int bytes always a multiple of 128: even number of lines, 2 bytes per pixel, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280) +@ +@ void copy_screen_16bit_swap(uae_u8 *dst, uae_u8 *src, int bytes); +@ +@---------------------------------------------------------------- +copy_screen_16bit_swap: +ldr r3, [r1], #4 +rev16 r3, r3 +str r3, [r0], #4 +subs r2, r2, #4 +bne copy_screen_16bit_swap +bx lr + + +@---------------------------------------------------------------- +@ copy_screen_32bit_to_16bit +@ +@ r0: uae_u8 *dst - Format (bits): rrrr rggg gggb bbbb +@ r1: uae_u8 *src - Format (bytes) in memory rgba +@ r2: int bytes +@ +@ void copy_screen_32bit_to_16bit(uae_u8 *dst, uae_u8 *src, int bytes); +@ +@---------------------------------------------------------------- +copy_screen_32bit_to_16bit: +stmdb sp!, {r4-r6, lr} +copy_screen_32bit_to_16bit_loop: +ldr r3, [r1], #4 +rev r3, r3 +lsr r4, r3, #27 +lsr r5, r3, #18 +and r5, r5, #63 +lsr r6, r3, #11 +and r6, r6, #31 +orr r6, r6, r5, lsl #5 +orr r6, r6, r4, lsl #11 +strh r6, [r0], #2 +subs r2, r2, #4 +bne copy_screen_32bit_to_16bit_loop +ldmia sp!, {r4-r6, pc} \ No newline at end of file