From 8b5f8f8bb57a38a958dea26a492a08a66640500d Mon Sep 17 00:00:00 2001 From: Dimitris Panokostas Date: Sat, 17 Dec 2016 11:42:10 +0100 Subject: [PATCH] Added missing arm_helper.s Added missing file to allow Picasso96 on Pi 1/Zero again --- Makefile | 11 ++++- src/od-pandora/arm_helper.s | 97 +++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 src/od-pandora/arm_helper.s diff --git a/Makefile b/Makefile index 542a40c4..f7e71d1b 100644 --- a/Makefile +++ b/Makefile @@ -197,8 +197,13 @@ OBJS = \ OBJS += src/od-rasp/rasp_gfx.o OBJS += src/od-pandora/gui/sdltruetypefont.o OBJS += src/od-pandora/picasso96.o -OBJS += src/od-pandora/neon_helper.o +ifdef USE_ARMNEON + OBJS += src/od-pandora/neon_helper.o +else + OBJS += src/od-pandora/arm_helper.o +endif + OBJS += src/newcpu.o OBJS += src/newcpu_common.o OBJS += src/readcpu.o @@ -215,6 +220,10 @@ OBJS += src/jit/compemu_support.o src/od-pandora/neon_helper.o: src/od-pandora/neon_helper.s $(CXX) $(CPU_FLAGS) -Wall -o src/od-pandora/neon_helper.o -c src/od-pandora/neon_helper.s +src/od-pandora/arm_helper.o: src/od-pandora/arm_helper.s + $(CXX) $(CPU_FLAGS) -Wall -o src/od-pandora/arm_helper.o -c src/od-pandora/arm_helper.s + + src/trace.o: src/trace.c $(CC) $(MORE_CFLAGS) -c src/trace.c -o src/trace.o diff --git a/src/od-pandora/arm_helper.s b/src/od-pandora/arm_helper.s new file mode 100644 index 00000000..9db26dd0 --- /dev/null +++ b/src/od-pandora/arm_helper.s @@ -0,0 +1,97 @@ +@ Some functions and tests to increase performance in drawing.cpp and custom.cpp + +.arm + +.global copy_screen_8bit +.global copy_screen_16bit_swap +.global copy_screen_32bit_to_16bit + +.text + +.align 8 + + +@---------------------------------------------------------------- +@ copy_screen_8bit +@ +@ r0: uae_u8 *dst +@ r1: uae_u8 *src +@ r2: int bytes always a multiple of 64: even number of lines, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280) +@ r3: uae_u32 *clut +@ +@ void copy_screen_8bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut); +@ +@---------------------------------------------------------------- +copy_screen_8bit: + stmdb sp!, {r4-r6, lr} +copy_screen_8bit_loop: + pld [r1, #192] + mov lr, #64 +copy_screen_8bit_loop_2: + ldr r4, [r1], #4 + and r5, r4, #255 + ldr r6, [r3, r5, lsl #2] + lsr r5, r4, #8 + and r5, r5, #255 + strh r6, [r0], #2 + ldr r6, [r3, r5, lsl #2] + lsr r5, r4, #16 + and r5, r5, #255 + strh r6, [r0], #2 + ldr r6, [r3, r5, lsl #2] + lsr r5, r4, #24 + strh r6, [r0], #2 + ldr r6, [r3, r5, lsl #2] + subs lr, lr, #4 + strh r6, [r0], #2 + bgt copy_screen_8bit_loop_2 + subs r2, r2, #64 + bgt copy_screen_8bit_loop + ldmia sp!, {r4-r6, pc} + + +@---------------------------------------------------------------- +@ copy_screen_16bit_swap +@ +@ r0: uae_u8 *dst +@ r1: uae_u8 *src +@ r2: int bytes always a multiple of 128: even number of lines, 2 bytes per pixel, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280) +@ +@ void copy_screen_16bit_swap(uae_u8 *dst, uae_u8 *src, int bytes); +@ +@---------------------------------------------------------------- +copy_screen_16bit_swap: +ldr r3, [r1], #4 +rev16 r3, r3 +str r3, [r0], #4 +subs r2, r2, #4 +bne copy_screen_16bit_swap +bx lr + + +@---------------------------------------------------------------- +@ copy_screen_32bit_to_16bit +@ +@ r0: uae_u8 *dst - Format (bits): rrrr rggg gggb bbbb +@ r1: uae_u8 *src - Format (bytes) in memory rgba +@ r2: int bytes +@ +@ void copy_screen_32bit_to_16bit(uae_u8 *dst, uae_u8 *src, int bytes); +@ +@---------------------------------------------------------------- +copy_screen_32bit_to_16bit: +stmdb sp!, {r4-r6, lr} +copy_screen_32bit_to_16bit_loop: +ldr r3, [r1], #4 +rev r3, r3 +lsr r4, r3, #27 +lsr r5, r3, #18 +and r5, r5, #63 +lsr r6, r3, #11 +and r6, r6, #31 +orr r6, r6, r5, lsl #5 +orr r6, r6, r4, lsl #11 +strh r6, [r0], #2 +subs r2, r2, #4 +bne copy_screen_32bit_to_16bit_loop +ldmia sp!, {r4-r6, pc} \ No newline at end of file