Added arm_helper for RPI1 platforms, moved gperftools to DEBUG only
This commit is contained in:
parent
e4d9e40563
commit
4b6ecbaab7
2 changed files with 109 additions and 4 deletions
16
Makefile
16
Makefile
|
@ -38,16 +38,16 @@ DEFS += -DUSE_SDL
|
||||||
MORE_CFLAGS += -Isrc -Isrc/osdep -Isrc/threaddep -Isrc/include -Isrc/guisan/include
|
MORE_CFLAGS += -Isrc -Isrc/osdep -Isrc/threaddep -Isrc/include -Isrc/guisan/include
|
||||||
MORE_CFLAGS += -Wno-unused -Wno-format -DGCCCONSTFUNC="__attribute__((const))"
|
MORE_CFLAGS += -Wno-unused -Wno-format -DGCCCONSTFUNC="__attribute__((const))"
|
||||||
MORE_CFLAGS += -fexceptions -fpermissive
|
MORE_CFLAGS += -fexceptions -fpermissive
|
||||||
MORE_CFLAGS += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
|
|
||||||
|
|
||||||
LDFLAGS += -lpthread -lm -lz -lpng -lrt -lxml2 -lFLAC -lmpg123 -ldl -ltcmalloc
|
LDFLAGS += -lpthread -lm -lz -lpng -lrt -lxml2 -lFLAC -lmpg123 -ldl
|
||||||
LDFLAGS += -lSDL2 -lSDL2_image -lSDL2_ttf -lguisan -L/opt/vc/lib -Lsrc/guisan/lib
|
LDFLAGS += -lSDL2 -lSDL2_image -lSDL2_ttf -lguisan -L/opt/vc/lib -Lsrc/guisan/lib
|
||||||
|
|
||||||
ifndef DEBUG
|
ifndef DEBUG
|
||||||
MORE_CFLAGS += -Ofast -pipe -Wno-write-strings
|
MORE_CFLAGS += -Ofast -pipe -Wno-write-strings
|
||||||
else
|
else
|
||||||
MORE_CFLAGS += -g -DDEBUG -Wl,--export-dynamic
|
MORE_CFLAGS += -g -DDEBUG -Wl,--export-dynamic
|
||||||
LDFLAGS += -lprofiler
|
MORE_CFLAGS += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
|
||||||
|
LDFLAGS += -ltcmalloc -lprofiler
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ASFLAGS += $(CPU_FLAGS)
|
ASFLAGS += $(CPU_FLAGS)
|
||||||
|
@ -183,7 +183,12 @@ OBJS = \
|
||||||
src/osdep/gui/Navigation.o
|
src/osdep/gui/Navigation.o
|
||||||
|
|
||||||
OBJS += src/osdep/picasso96.o
|
OBJS += src/osdep/picasso96.o
|
||||||
OBJS += src/osdep/neon_helper.o
|
|
||||||
|
ifeq ($(PLATFORM),rpi1)
|
||||||
|
OBJS += src/osdep/arm_helper.o
|
||||||
|
else
|
||||||
|
OBJS += src/osdep/neon_helper.o
|
||||||
|
endif
|
||||||
|
|
||||||
OBJS += src/newcpu.o
|
OBJS += src/newcpu.o
|
||||||
OBJS += src/newcpu_common.o
|
OBJS += src/newcpu_common.o
|
||||||
|
@ -201,6 +206,9 @@ OBJS += src/jit/compemu_support.o
|
||||||
src/osdep/neon_helper.o: src/osdep/neon_helper.s
|
src/osdep/neon_helper.o: src/osdep/neon_helper.s
|
||||||
$(CXX) $(CPU_FLAGS) -Wall -o src/osdep/neon_helper.o -c src/osdep/neon_helper.s
|
$(CXX) $(CPU_FLAGS) -Wall -o src/osdep/neon_helper.o -c src/osdep/neon_helper.s
|
||||||
|
|
||||||
|
src/osdep/arm_helper.o: src/osdep/arm_helper.s
|
||||||
|
$(CXX) $(CPU_FLAGS) -Wall -o src/osdep/arm_helper.o -c src/osdep/arm_helper.s
|
||||||
|
|
||||||
$(PROG): $(OBJS)
|
$(PROG): $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) -o $(PROG) $(OBJS) $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) -o $(PROG) $(OBJS) $(LDFLAGS)
|
||||||
ifndef DEBUG
|
ifndef DEBUG
|
||||||
|
|
97
src/osdep/arm_helper.s
Normal file
97
src/osdep/arm_helper.s
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
@ Some functions and tests to increase performance in drawing.cpp and custom.cpp
|
||||||
|
|
||||||
|
.arm
|
||||||
|
|
||||||
|
.global copy_screen_8bit
|
||||||
|
.global copy_screen_16bit_swap
|
||||||
|
.global copy_screen_32bit_to_16bit
|
||||||
|
|
||||||
|
.text
|
||||||
|
|
||||||
|
.align 8
|
||||||
|
|
||||||
|
|
||||||
|
@----------------------------------------------------------------
|
||||||
|
@ copy_screen_8bit
|
||||||
|
@
|
||||||
|
@ r0: uae_u8 *dst
|
||||||
|
@ r1: uae_u8 *src
|
||||||
|
@ r2: int bytes always a multiple of 64: even number of lines, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
|
||||||
|
@ r3: uae_u32 *clut
|
||||||
|
@
|
||||||
|
@ void copy_screen_8bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut);
|
||||||
|
@
|
||||||
|
@----------------------------------------------------------------
|
||||||
|
copy_screen_8bit:
|
||||||
|
stmdb sp!, {r4-r6, lr}
|
||||||
|
copy_screen_8bit_loop:
|
||||||
|
pld [r1, #192]
|
||||||
|
mov lr, #64
|
||||||
|
copy_screen_8bit_loop_2:
|
||||||
|
ldr r4, [r1], #4
|
||||||
|
and r5, r4, #255
|
||||||
|
ldr r6, [r3, r5, lsl #2]
|
||||||
|
lsr r5, r4, #8
|
||||||
|
and r5, r5, #255
|
||||||
|
strh r6, [r0], #2
|
||||||
|
ldr r6, [r3, r5, lsl #2]
|
||||||
|
lsr r5, r4, #16
|
||||||
|
and r5, r5, #255
|
||||||
|
strh r6, [r0], #2
|
||||||
|
ldr r6, [r3, r5, lsl #2]
|
||||||
|
lsr r5, r4, #24
|
||||||
|
strh r6, [r0], #2
|
||||||
|
ldr r6, [r3, r5, lsl #2]
|
||||||
|
subs lr, lr, #4
|
||||||
|
strh r6, [r0], #2
|
||||||
|
bgt copy_screen_8bit_loop_2
|
||||||
|
subs r2, r2, #64
|
||||||
|
bgt copy_screen_8bit_loop
|
||||||
|
ldmia sp!, {r4-r6, pc}
|
||||||
|
|
||||||
|
|
||||||
|
@----------------------------------------------------------------
|
||||||
|
@ copy_screen_16bit_swap
|
||||||
|
@
|
||||||
|
@ r0: uae_u8 *dst
|
||||||
|
@ r1: uae_u8 *src
|
||||||
|
@ r2: int bytes always a multiple of 128: even number of lines, 2 bytes per pixel, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
|
||||||
|
@
|
||||||
|
@ void copy_screen_16bit_swap(uae_u8 *dst, uae_u8 *src, int bytes);
|
||||||
|
@
|
||||||
|
@----------------------------------------------------------------
|
||||||
|
copy_screen_16bit_swap:
|
||||||
|
ldr r3, [r1], #4
|
||||||
|
rev16 r3, r3
|
||||||
|
str r3, [r0], #4
|
||||||
|
subs r2, r2, #4
|
||||||
|
bne copy_screen_16bit_swap
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
|
||||||
|
@----------------------------------------------------------------
|
||||||
|
@ copy_screen_32bit_to_16bit
|
||||||
|
@
|
||||||
|
@ r0: uae_u8 *dst - Format (bits): rrrr rggg gggb bbbb
|
||||||
|
@ r1: uae_u8 *src - Format (bytes) in memory rgba
|
||||||
|
@ r2: int bytes
|
||||||
|
@
|
||||||
|
@ void copy_screen_32bit_to_16bit(uae_u8 *dst, uae_u8 *src, int bytes);
|
||||||
|
@
|
||||||
|
@----------------------------------------------------------------
|
||||||
|
copy_screen_32bit_to_16bit:
|
||||||
|
stmdb sp!, {r4-r6, lr}
|
||||||
|
copy_screen_32bit_to_16bit_loop:
|
||||||
|
ldr r3, [r1], #4
|
||||||
|
rev r3, r3
|
||||||
|
lsr r4, r3, #27
|
||||||
|
lsr r5, r3, #18
|
||||||
|
and r5, r5, #63
|
||||||
|
lsr r6, r3, #11
|
||||||
|
and r6, r6, #31
|
||||||
|
orr r6, r6, r5, lsl #5
|
||||||
|
orr r6, r6, r4, lsl #11
|
||||||
|
strh r6, [r0], #2
|
||||||
|
subs r2, r2, #4
|
||||||
|
bne copy_screen_32bit_to_16bit_loop
|
||||||
|
ldmia sp!, {r4-r6, pc}
|
Loading…
Add table
Add a link
Reference in a new issue