Added missing arm_helper.s

Added missing file to allow Picasso96 on Pi 1/Zero again
This commit is contained in:
Dimitris Panokostas 2016-12-17 11:42:10 +01:00
parent 595f5a92b7
commit 8b5f8f8bb5
2 changed files with 107 additions and 1 deletions

View file

@ -197,8 +197,13 @@ OBJS = \
OBJS += src/od-rasp/rasp_gfx.o
OBJS += src/od-pandora/gui/sdltruetypefont.o
OBJS += src/od-pandora/picasso96.o
OBJS += src/od-pandora/neon_helper.o
ifdef USE_ARMNEON
OBJS += src/od-pandora/neon_helper.o
else
OBJS += src/od-pandora/arm_helper.o
endif
OBJS += src/newcpu.o
OBJS += src/newcpu_common.o
OBJS += src/readcpu.o
@ -215,6 +220,10 @@ OBJS += src/jit/compemu_support.o
src/od-pandora/neon_helper.o: src/od-pandora/neon_helper.s
$(CXX) $(CPU_FLAGS) -Wall -o src/od-pandora/neon_helper.o -c src/od-pandora/neon_helper.s
src/od-pandora/arm_helper.o: src/od-pandora/arm_helper.s
$(CXX) $(CPU_FLAGS) -Wall -o src/od-pandora/arm_helper.o -c src/od-pandora/arm_helper.s
src/trace.o: src/trace.c
$(CC) $(MORE_CFLAGS) -c src/trace.c -o src/trace.o

View file

@ -0,0 +1,97 @@
@ Some functions and tests to increase performance in drawing.cpp and custom.cpp
.arm
.global copy_screen_8bit
.global copy_screen_16bit_swap
.global copy_screen_32bit_to_16bit
.text
.align 8
@----------------------------------------------------------------
@ copy_screen_8bit
@
@ r0: uae_u8 *dst
@ r1: uae_u8 *src
@ r2: int bytes always a multiple of 64: even number of lines, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
@ r3: uae_u32 *clut
@
@ void copy_screen_8bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut);
@
@----------------------------------------------------------------
copy_screen_8bit:
stmdb sp!, {r4-r6, lr}
copy_screen_8bit_loop:
pld [r1, #192]
mov lr, #64
copy_screen_8bit_loop_2:
ldr r4, [r1], #4
and r5, r4, #255
ldr r6, [r3, r5, lsl #2]
lsr r5, r4, #8
and r5, r5, #255
strh r6, [r0], #2
ldr r6, [r3, r5, lsl #2]
lsr r5, r4, #16
and r5, r5, #255
strh r6, [r0], #2
ldr r6, [r3, r5, lsl #2]
lsr r5, r4, #24
strh r6, [r0], #2
ldr r6, [r3, r5, lsl #2]
subs lr, lr, #4
strh r6, [r0], #2
bgt copy_screen_8bit_loop_2
subs r2, r2, #64
bgt copy_screen_8bit_loop
ldmia sp!, {r4-r6, pc}
@----------------------------------------------------------------
@ copy_screen_16bit_swap
@
@ r0: uae_u8 *dst
@ r1: uae_u8 *src
@ r2: int bytes always a multiple of 128: even number of lines, 2 bytes per pixel, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
@
@ void copy_screen_16bit_swap(uae_u8 *dst, uae_u8 *src, int bytes);
@
@----------------------------------------------------------------
copy_screen_16bit_swap:
ldr r3, [r1], #4
rev16 r3, r3
str r3, [r0], #4
subs r2, r2, #4
bne copy_screen_16bit_swap
bx lr
@----------------------------------------------------------------
@ copy_screen_32bit_to_16bit
@
@ r0: uae_u8 *dst - Format (bits): rrrr rggg gggb bbbb
@ r1: uae_u8 *src - Format (bytes) in memory rgba
@ r2: int bytes
@
@ void copy_screen_32bit_to_16bit(uae_u8 *dst, uae_u8 *src, int bytes);
@
@----------------------------------------------------------------
copy_screen_32bit_to_16bit:
stmdb sp!, {r4-r6, lr}
copy_screen_32bit_to_16bit_loop:
ldr r3, [r1], #4
rev r3, r3
lsr r4, r3, #27
lsr r5, r3, #18
and r5, r5, #63
lsr r6, r3, #11
and r6, r6, #31
orr r6, r6, r5, lsl #5
orr r6, r6, r4, lsl #11
strh r6, [r0], #2
subs r2, r2, #4
bne copy_screen_32bit_to_16bit_loop
ldmia sp!, {r4-r6, pc}