Added arm_helper for RPI1 platforms, moved gperftools to DEBUG only
This commit is contained in:
parent
e4d9e40563
commit
4b6ecbaab7
2 changed files with 109 additions and 4 deletions
16
Makefile
16
Makefile
|
@ -38,16 +38,16 @@ DEFS += -DUSE_SDL
|
|||
MORE_CFLAGS += -Isrc -Isrc/osdep -Isrc/threaddep -Isrc/include -Isrc/guisan/include
|
||||
MORE_CFLAGS += -Wno-unused -Wno-format -DGCCCONSTFUNC="__attribute__((const))"
|
||||
MORE_CFLAGS += -fexceptions -fpermissive
|
||||
MORE_CFLAGS += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
|
||||
|
||||
LDFLAGS += -lpthread -lm -lz -lpng -lrt -lxml2 -lFLAC -lmpg123 -ldl -ltcmalloc
|
||||
LDFLAGS += -lpthread -lm -lz -lpng -lrt -lxml2 -lFLAC -lmpg123 -ldl
|
||||
LDFLAGS += -lSDL2 -lSDL2_image -lSDL2_ttf -lguisan -L/opt/vc/lib -Lsrc/guisan/lib
|
||||
|
||||
ifndef DEBUG
|
||||
MORE_CFLAGS += -Ofast -pipe -Wno-write-strings
|
||||
else
|
||||
MORE_CFLAGS += -g -DDEBUG -Wl,--export-dynamic
|
||||
LDFLAGS += -lprofiler
|
||||
MORE_CFLAGS += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
|
||||
LDFLAGS += -ltcmalloc -lprofiler
|
||||
endif
|
||||
|
||||
ASFLAGS += $(CPU_FLAGS)
|
||||
|
@ -183,7 +183,12 @@ OBJS = \
|
|||
src/osdep/gui/Navigation.o
|
||||
|
||||
OBJS += src/osdep/picasso96.o
|
||||
OBJS += src/osdep/neon_helper.o
|
||||
|
||||
ifeq ($(PLATFORM),rpi1)
|
||||
OBJS += src/osdep/arm_helper.o
|
||||
else
|
||||
OBJS += src/osdep/neon_helper.o
|
||||
endif
|
||||
|
||||
OBJS += src/newcpu.o
|
||||
OBJS += src/newcpu_common.o
|
||||
|
@ -201,6 +206,9 @@ OBJS += src/jit/compemu_support.o
|
|||
src/osdep/neon_helper.o: src/osdep/neon_helper.s
|
||||
$(CXX) $(CPU_FLAGS) -Wall -o src/osdep/neon_helper.o -c src/osdep/neon_helper.s
|
||||
|
||||
src/osdep/arm_helper.o: src/osdep/arm_helper.s
|
||||
$(CXX) $(CPU_FLAGS) -Wall -o src/osdep/arm_helper.o -c src/osdep/arm_helper.s
|
||||
|
||||
$(PROG): $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o $(PROG) $(OBJS) $(LDFLAGS)
|
||||
ifndef DEBUG
|
||||
|
|
97
src/osdep/arm_helper.s
Normal file
97
src/osdep/arm_helper.s
Normal file
|
@ -0,0 +1,97 @@
|
|||
@ Some functions and tests to increase performance in drawing.cpp and custom.cpp
|
||||
|
||||
.arm
|
||||
|
||||
.global copy_screen_8bit
|
||||
.global copy_screen_16bit_swap
|
||||
.global copy_screen_32bit_to_16bit
|
||||
|
||||
.text
|
||||
|
||||
.align 8
|
||||
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ copy_screen_8bit
|
||||
@
|
||||
@ r0: uae_u8 *dst
|
||||
@ r1: uae_u8 *src
|
||||
@ r2: int bytes always a multiple of 64: even number of lines, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
|
||||
@ r3: uae_u32 *clut
|
||||
@
|
||||
@ void copy_screen_8bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut);
|
||||
@
|
||||
@----------------------------------------------------------------
|
||||
copy_screen_8bit:
|
||||
stmdb sp!, {r4-r6, lr}
|
||||
copy_screen_8bit_loop:
|
||||
pld [r1, #192]
|
||||
mov lr, #64
|
||||
copy_screen_8bit_loop_2:
|
||||
ldr r4, [r1], #4
|
||||
and r5, r4, #255
|
||||
ldr r6, [r3, r5, lsl #2]
|
||||
lsr r5, r4, #8
|
||||
and r5, r5, #255
|
||||
strh r6, [r0], #2
|
||||
ldr r6, [r3, r5, lsl #2]
|
||||
lsr r5, r4, #16
|
||||
and r5, r5, #255
|
||||
strh r6, [r0], #2
|
||||
ldr r6, [r3, r5, lsl #2]
|
||||
lsr r5, r4, #24
|
||||
strh r6, [r0], #2
|
||||
ldr r6, [r3, r5, lsl #2]
|
||||
subs lr, lr, #4
|
||||
strh r6, [r0], #2
|
||||
bgt copy_screen_8bit_loop_2
|
||||
subs r2, r2, #64
|
||||
bgt copy_screen_8bit_loop
|
||||
ldmia sp!, {r4-r6, pc}
|
||||
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ copy_screen_16bit_swap
|
||||
@
|
||||
@ r0: uae_u8 *dst
|
||||
@ r1: uae_u8 *src
|
||||
@ r2: int bytes always a multiple of 128: even number of lines, 2 bytes per pixel, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
|
||||
@
|
||||
@ void copy_screen_16bit_swap(uae_u8 *dst, uae_u8 *src, int bytes);
|
||||
@
|
||||
@----------------------------------------------------------------
|
||||
copy_screen_16bit_swap:
|
||||
ldr r3, [r1], #4
|
||||
rev16 r3, r3
|
||||
str r3, [r0], #4
|
||||
subs r2, r2, #4
|
||||
bne copy_screen_16bit_swap
|
||||
bx lr
|
||||
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ copy_screen_32bit_to_16bit
|
||||
@
|
||||
@ r0: uae_u8 *dst - Format (bits): rrrr rggg gggb bbbb
|
||||
@ r1: uae_u8 *src - Format (bytes) in memory rgba
|
||||
@ r2: int bytes
|
||||
@
|
||||
@ void copy_screen_32bit_to_16bit(uae_u8 *dst, uae_u8 *src, int bytes);
|
||||
@
|
||||
@----------------------------------------------------------------
|
||||
copy_screen_32bit_to_16bit:
|
||||
stmdb sp!, {r4-r6, lr}
|
||||
copy_screen_32bit_to_16bit_loop:
|
||||
ldr r3, [r1], #4
|
||||
rev r3, r3
|
||||
lsr r4, r3, #27
|
||||
lsr r5, r3, #18
|
||||
and r5, r5, #63
|
||||
lsr r6, r3, #11
|
||||
and r6, r6, #31
|
||||
orr r6, r6, r5, lsl #5
|
||||
orr r6, r6, r4, lsl #11
|
||||
strh r6, [r0], #2
|
||||
subs r2, r2, #4
|
||||
bne copy_screen_32bit_to_16bit_loop
|
||||
ldmia sp!, {r4-r6, pc}
|
Loading…
Add table
Add a link
Reference in a new issue