ARM assembly helpers update

Merged from TomB (not really used)
This commit is contained in:
Dimitris Panokostas 2020-06-29 08:26:20 +02:00
parent 4789eea572
commit b022f3d313
3 changed files with 264 additions and 15 deletions

View file

@ -4,9 +4,12 @@
.global save_host_fp_regs
.global restore_host_fp_regs
.global copy_screen_8bit
.global copy_screen_8bit_to_16bit
.global copy_screen_8bit_to_32bit
.global copy_screen_16bit_swap
.global copy_screen_16bit_to_32bit
.global copy_screen_32bit_to_16bit
.global copy_screen_32bit_to_32bit
.global ARM_doline_n1
.global NEON_doline_n2
.global NEON_doline_n3
@ -40,17 +43,17 @@ restore_host_fp_regs:
//----------------------------------------------------------------
// copy_screen_8bit
// copy_screen_8bit_to_16bit
//
// x0: uae_u8 *dst
// x1: uae_u8 *src
// x2: int bytes always a multiple of 64: even number of lines, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
// x3: uae_u32 *clut
//
// void copy_screen_8bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut);
// void copy_screen_8bit_to_16bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut);
//
//----------------------------------------------------------------
copy_screen_8bit:
copy_screen_8bit_to_16bit:
mov x7, #64
copy_screen_8bit_loop:
ldrsw x4, [x1], #4
@ -69,10 +72,40 @@ copy_screen_8bit_loop:
strh w6, [x0], #2
bgt copy_screen_8bit_loop
subs x2, x2, #64
bgt copy_screen_8bit
bgt copy_screen_8bit_to_16bit
ret
//----------------------------------------------------------------
// copy_screen_8bit_to_32bit
//
// r0: uae_u8 *dst
// r1: uae_u8 *src
// r2: int bytes always a multiple of 64: even number of lines, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
// r3: uae_u32 *clut
//
// void copy_screen_8bit_to_32bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut);
//
//----------------------------------------------------------------
copy_screen_8bit_to_32bit:
ldrsw x4, [x1], #4
subs x2, x2, #4
ubfx x5, x4, #0, #8
ldrsw x6, [x3, x5, lsl #2]
ubfx x5, x4, #8, #8
str w6, [x0], #4
ldrsw x6, [x3, x5, lsl #2]
ubfx x5, x4, #16, #8
str w6, [x0], #4
ldrsw x6, [x3, x5, lsl #2]
ubfx x5, x4, #24, #8
str w6, [x0], #4
ldrsw x6, [x3, x5, lsl #2]
str w6, [x0], #4
bgt copy_screen_8bit_to_32bit
ret
//----------------------------------------------------------------
// copy_screen_16bit_swap
//
@ -95,11 +128,36 @@ copy_screen_16bit_swap:
ret
//----------------------------------------------------------------
// copy_screen_16bit_to_32bit
//
// r0: uae_u8 *dst - Format (bytes): in memory argb
// r1: uae_u8 *src - Format (bits): gggb bbbb rrrr rggg
// r2: int bytes always a multiple of 128: even number of lines, 2 bytes per pixel, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
//
// void copy_screen_16bit_to_32bit(uae_u8 *dst, uae_u8 *src, int bytes);
//
//----------------------------------------------------------------
copy_screen_16bit_to_32bit:
ldrh w3, [x1], #2
subs w2, w2, #2
rev16 w3, w3
ubfx w4, w3, #0, #5
lsl w4, w4, #3
lsr w3, w3, #5
bfi w4, w3, #10, #6
lsr w3, w3, #6
bfi w4, w3, #19, #5
str w4, [x0], #4
bne copy_screen_16bit_to_32bit
ret
//----------------------------------------------------------------
// copy_screen_32bit_to_16bit
//
// x0: uae_u8 *dst - Format (bits): rrrr rggg gggb bbbb
// x1: uae_u8 *src - Format (bytes) in memory rgba
// x1: uae_u8 *src - Format (bytes) in memory abgr
// x2: int bytes
//
// void copy_screen_32bit_to_16bit(uae_u8 *dst, uae_u8 *src, int bytes);
@ -124,6 +182,26 @@ copy_screen_32bit_to_16bit_loop:
ret
//----------------------------------------------------------------
// copy_screen_32bit_to_32bit
//
// r0: uae_u8 *dst - Format (bytes): in memory argb
// r1: uae_u8 *src - Format (bytes): in memory abgr
// r2: int bytes
//
// void copy_screen_32bit_to_32bit(uae_u8 *dst, uae_u8 *src, int bytes);
//
//----------------------------------------------------------------
copy_screen_32bit_to_32bit:
ld1 {v3.4S}, [x1], #16
subs w2, w2, #16
rev32 v3.16B, v3.16B
ushr v3.4S, v3.4S, #8
st1 {v3.4S}, [x0], #16
bne copy_screen_32bit_to_32bit
ret
//----------------------------------------------------------------
// ARM_doline_n1
//

View file

@ -4,9 +4,12 @@
.global save_host_fp_regs
.global restore_host_fp_regs
.global copy_screen_8bit
.global copy_screen_8bit_to_16bit
.global copy_screen_8bit_to_32bit
.global copy_screen_16bit_swap
.global copy_screen_16bit_to_32bit
.global copy_screen_32bit_to_16bit
.global copy_screen_32bit_to_32bit
.text
@ -28,17 +31,17 @@ restore_host_fp_regs:
@----------------------------------------------------------------
@ copy_screen_8bit
@ copy_screen_8bit_to_16bit
@
@ r0: uae_u8 *dst
@ r1: uae_u8 *src
@ r2: int bytes always a multiple of 64: even number of lines, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
@ r3: uae_u32 *clut
@
@ void copy_screen_8bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut);
@ void copy_screen_8bit_to_16bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut);
@
@----------------------------------------------------------------
copy_screen_8bit:
copy_screen_8bit_to_16bit:
stmdb sp!, {r4-r6, lr}
copy_screen_8bit_loop:
pld [r1, #192]
@ -66,6 +69,41 @@ copy_screen_8bit_loop_2:
ldmia sp!, {r4-r6, pc}
@----------------------------------------------------------------
@ copy_screen_8bit_to_32bit
@
@ r0: uae_u8 *dst
@ r1: uae_u8 *src
@ r2: int bytes always a multiple of 64: even number of lines, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
@ r3: uae_u32 *clut
@
@ void copy_screen_8bit_to_32bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut);
@
@----------------------------------------------------------------
copy_screen_8bit_to_32bit:
stmdb sp!, {r4-r5, lr}
copy_screen_8bit_to_32bit_loop:
ldr r4, [r1], #4
subs r2, r2, #4
and r5, r4, #255
ldr lr, [r3, r5, lsl #2]
lsr r5, r4, #8
and r5, r5, #255
str lr, [r0], #4
ldr lr, [r3, r5, lsl #2]
lsr r5, r4, #16
and r5, r5, #255
str lr, [r0], #4
ldr lr, [r3, r5, lsl #2]
lsr r5, r4, #24
and r5, r5, #255
str lr, [r0], #4
ldr lr, [r3, r5, lsl #2]
str lr, [r0], #4
bgt copy_screen_8bit_to_32bit_loop
ldmia sp!, {r4-r5, pc}
@----------------------------------------------------------------
@ copy_screen_16bit_swap
@
@ -85,6 +123,35 @@ bne copy_screen_16bit_swap
bx lr
@----------------------------------------------------------------
@ copy_screen_16bit_to_32bit
@
@ r0: uae_u8 *dst
@ r1: uae_u8 *src
@ r2: int bytes always a multiple of 128: even number of lines, 2 bytes per pixel, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
@
@ void copy_screen_16bit_to_32bit(uae_u8 *dst, uae_u8 *src, int bytes);
@
@----------------------------------------------------------------
copy_screen_16bit_to_32bit:
stmdb sp!, {r4, lr}
copy_screen_16bit_to_32bit_loop:
ldrh r3, [r1], #2
subs r2, r2, #2
rev16 r3, r3
and lr, r3, #31
lsl lr, lr, #3
lsr r3, r3, #5
and r4, r3, #63
orr lr, lr, r4, lsl #10
lsr r3, r3, #6
and r4, r3, #31
orr lr, lr, r4, lsl #19
str lr, [r0], #4
bne copy_screen_16bit_to_32bit_loop
ldmia sp!, {r4, pc}
@----------------------------------------------------------------
@ copy_screen_32bit_to_16bit
@
@ -111,3 +178,23 @@ strh r6, [r0], #2
subs r2, r2, #4
bne copy_screen_32bit_to_16bit_loop
ldmia sp!, {r4-r6, pc}
@----------------------------------------------------------------
@ copy_screen_32bit_to_32bit
@
@ r0: uae_u8 *dst - Format (bytes): in memory rgba
@ r1: uae_u8 *src - Format (bytes): in memory rgba
@ r2: int bytes
@
@ void copy_screen_32bit_to_32bit(uae_u8 *dst, uae_u8 *src, int bytes);
@
@----------------------------------------------------------------
copy_screen_32bit_to_32bit:
ldr r3, [r1], #4
rev r3, r3
lsr r3, r3, #8
subs r2, r2, #4
str r3, [r0], #4
bne copy_screen_32bit_to_32bit
bx lr

View file

@ -4,9 +4,12 @@
.global save_host_fp_regs
.global restore_host_fp_regs
.global copy_screen_8bit
.global copy_screen_8bit_to_16bit
.global copy_screen_8bit_to_32bit
.global copy_screen_16bit_swap
.global copy_screen_16bit_to_32bit
.global copy_screen_32bit_to_16bit
.global copy_screen_32bit_to_32bit
.global ARM_doline_n1
.global NEON_doline_n2
.global NEON_doline_n3
@ -36,17 +39,17 @@ restore_host_fp_regs:
@----------------------------------------------------------------
@ copy_screen_8bit
@ copy_screen_8bit_to_16bit
@
@ r0: uae_u8 *dst
@ r1: uae_u8 *src
@ r2: int bytes always a multiple of 64: even number of lines, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
@ r3: uae_u32 *clut
@
@ void copy_screen_8bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut);
@ void copy_screen_8bit_to_16bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut);
@
@----------------------------------------------------------------
copy_screen_8bit:
copy_screen_8bit_to_16bit:
stmdb sp!, {r4-r6, lr}
copy_screen_8bit_loop:
pld [r1, #192]
@ -72,6 +75,38 @@ copy_screen_8bit_loop_2:
ldmia sp!, {r4-r6, pc}
@----------------------------------------------------------------
@ copy_screen_8bit_to_32bit
@
@ r0: uae_u8 *dst
@ r1: uae_u8 *src
@ r2: int bytes always a multiple of 64: even number of lines, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
@ r3: uae_u32 *clut
@
@ void copy_screen_8bit_to_32bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut);
@
@----------------------------------------------------------------
copy_screen_8bit_to_32bit:
stmdb sp!, {r4-r5, lr}
copy_screen_8bit_to_32bit_loop:
ldr r4, [r1], #4
subs r2, r2, #4
ubfx r5, r4, #0, #8
ldr lr, [r3, r5, lsl #2]
ubfx r5, r4, #8, #8
str lr, [r0], #4
ldr lr, [r3, r5, lsl #2]
ubfx r5, r4, #16, #8
str lr, [r0], #4
ldr lr, [r3, r5, lsl #2]
ubfx r5, r4, #24, #8
str lr, [r0], #4
ldr lr, [r3, r5, lsl #2]
str lr, [r0], #4
bgt copy_screen_8bit_to_32bit_loop
ldmia sp!, {r4-r5, pc}
@----------------------------------------------------------------
@ copy_screen_16bit_swap
@
@ -105,6 +140,33 @@ copy_screen_16bit_swap:
bx lr
@----------------------------------------------------------------
@ copy_screen_16bit_to_32bit
@
@ r0: uae_u8 *dst
@ r1: uae_u8 *src
@ r2: int bytes always a multiple of 128: even number of lines, 2 bytes per pixel, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
@
@ void copy_screen_16bit_to_32bit(uae_u8 *dst, uae_u8 *src, int bytes);
@
@----------------------------------------------------------------
copy_screen_16bit_to_32bit:
stmdb sp!, {lr}
copy_screen_16bit_to_32bit_loop:
ldrh r3, [r1], #2
subs r2, r2, #2
rev16 r3, r3
ubfx lr, r3, #0, #5
lsl lr, lr, #3
lsr r3, r3, #5
bfi lr, r3, #10, #6
lsr r3, r3, #6
bfi lr, r3, #19, #5
str lr, [r0], #4
bne copy_screen_16bit_to_32bit_loop
ldmia sp!, {pc}
@----------------------------------------------------------------
@ copy_screen_32bit_to_16bit
@
@ -131,7 +193,29 @@ copy_screen_32bit_to_16bit:
vst2.8 {d18-d19}, [r0]!
bne copy_screen_32bit_to_16bit
bx lr
@----------------------------------------------------------------
@ copy_screen_32bit_to_32bit
@
@ r0: uae_u8 *dst - Format (bytes): in memory rgba
@ r1: uae_u8 *src - Format (bytes): in memory rgba
@ r2: int bytes
@
@ void copy_screen_32bit_to_32bit(uae_u8 *dst, uae_u8 *src, int bytes);
@
@----------------------------------------------------------------
copy_screen_32bit_to_32bit:
vld1.64 {d18-d19}, [r1]!
vrev32.8 d18, d18
vshr.u32 d18, d18, #8
vrev32.8 d19, d19
vshr.u32 d19, d19, #8
subs r2, r2, #16
vst1.64 {d18-d19}, [r0]!
bne copy_screen_32bit_to_32bit
bx lr
@----------------------------------------------------------------
@ ARM_doline_n1