ARM assembly helpers update
Merged from TomB (not really used)
This commit is contained in:
parent
4789eea572
commit
b022f3d313
3 changed files with 264 additions and 15 deletions
|
@ -4,9 +4,12 @@
|
|||
|
||||
.global save_host_fp_regs
|
||||
.global restore_host_fp_regs
|
||||
.global copy_screen_8bit
|
||||
.global copy_screen_8bit_to_16bit
|
||||
.global copy_screen_8bit_to_32bit
|
||||
.global copy_screen_16bit_swap
|
||||
.global copy_screen_16bit_to_32bit
|
||||
.global copy_screen_32bit_to_16bit
|
||||
.global copy_screen_32bit_to_32bit
|
||||
.global ARM_doline_n1
|
||||
.global NEON_doline_n2
|
||||
.global NEON_doline_n3
|
||||
|
@ -40,17 +43,17 @@ restore_host_fp_regs:
|
|||
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// copy_screen_8bit
|
||||
// copy_screen_8bit_to_16bit
|
||||
//
|
||||
// x0: uae_u8 *dst
|
||||
// x1: uae_u8 *src
|
||||
// x2: int bytes always a multiple of 64: even number of lines, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
|
||||
// x3: uae_u32 *clut
|
||||
//
|
||||
// void copy_screen_8bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut);
|
||||
// void copy_screen_8bit_to_16bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut);
|
||||
//
|
||||
//----------------------------------------------------------------
|
||||
copy_screen_8bit:
|
||||
copy_screen_8bit_to_16bit:
|
||||
mov x7, #64
|
||||
copy_screen_8bit_loop:
|
||||
ldrsw x4, [x1], #4
|
||||
|
@ -69,10 +72,40 @@ copy_screen_8bit_loop:
|
|||
strh w6, [x0], #2
|
||||
bgt copy_screen_8bit_loop
|
||||
subs x2, x2, #64
|
||||
bgt copy_screen_8bit
|
||||
bgt copy_screen_8bit_to_16bit
|
||||
ret
|
||||
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// copy_screen_8bit_to_32bit
|
||||
//
|
||||
// r0: uae_u8 *dst
|
||||
// r1: uae_u8 *src
|
||||
// r2: int bytes always a multiple of 64: even number of lines, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
|
||||
// r3: uae_u32 *clut
|
||||
//
|
||||
// void copy_screen_8bit_to_32bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut);
|
||||
//
|
||||
//----------------------------------------------------------------
|
||||
copy_screen_8bit_to_32bit:
|
||||
ldrsw x4, [x1], #4
|
||||
subs x2, x2, #4
|
||||
ubfx x5, x4, #0, #8
|
||||
ldrsw x6, [x3, x5, lsl #2]
|
||||
ubfx x5, x4, #8, #8
|
||||
str w6, [x0], #4
|
||||
ldrsw x6, [x3, x5, lsl #2]
|
||||
ubfx x5, x4, #16, #8
|
||||
str w6, [x0], #4
|
||||
ldrsw x6, [x3, x5, lsl #2]
|
||||
ubfx x5, x4, #24, #8
|
||||
str w6, [x0], #4
|
||||
ldrsw x6, [x3, x5, lsl #2]
|
||||
str w6, [x0], #4
|
||||
bgt copy_screen_8bit_to_32bit
|
||||
ret
|
||||
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// copy_screen_16bit_swap
|
||||
//
|
||||
|
@ -95,11 +128,36 @@ copy_screen_16bit_swap:
|
|||
ret
|
||||
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// copy_screen_16bit_to_32bit
|
||||
//
|
||||
// r0: uae_u8 *dst - Format (bytes): in memory argb
|
||||
// r1: uae_u8 *src - Format (bits): gggb bbbb rrrr rggg
|
||||
// r2: int bytes always a multiple of 128: even number of lines, 2 bytes per pixel, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
|
||||
//
|
||||
// void copy_screen_16bit_to_32bit(uae_u8 *dst, uae_u8 *src, int bytes);
|
||||
//
|
||||
//----------------------------------------------------------------
|
||||
copy_screen_16bit_to_32bit:
|
||||
ldrh w3, [x1], #2
|
||||
subs w2, w2, #2
|
||||
rev16 w3, w3
|
||||
ubfx w4, w3, #0, #5
|
||||
lsl w4, w4, #3
|
||||
lsr w3, w3, #5
|
||||
bfi w4, w3, #10, #6
|
||||
lsr w3, w3, #6
|
||||
bfi w4, w3, #19, #5
|
||||
str w4, [x0], #4
|
||||
bne copy_screen_16bit_to_32bit
|
||||
ret
|
||||
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// copy_screen_32bit_to_16bit
|
||||
//
|
||||
// x0: uae_u8 *dst - Format (bits): rrrr rggg gggb bbbb
|
||||
// x1: uae_u8 *src - Format (bytes) in memory rgba
|
||||
// x1: uae_u8 *src - Format (bytes) in memory abgr
|
||||
// x2: int bytes
|
||||
//
|
||||
// void copy_screen_32bit_to_16bit(uae_u8 *dst, uae_u8 *src, int bytes);
|
||||
|
@ -124,6 +182,26 @@ copy_screen_32bit_to_16bit_loop:
|
|||
ret
|
||||
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// copy_screen_32bit_to_32bit
|
||||
//
|
||||
// r0: uae_u8 *dst - Format (bytes): in memory argb
|
||||
// r1: uae_u8 *src - Format (bytes): in memory abgr
|
||||
// r2: int bytes
|
||||
//
|
||||
// void copy_screen_32bit_to_32bit(uae_u8 *dst, uae_u8 *src, int bytes);
|
||||
//
|
||||
//----------------------------------------------------------------
|
||||
copy_screen_32bit_to_32bit:
|
||||
ld1 {v3.4S}, [x1], #16
|
||||
subs w2, w2, #16
|
||||
rev32 v3.16B, v3.16B
|
||||
ushr v3.4S, v3.4S, #8
|
||||
st1 {v3.4S}, [x0], #16
|
||||
bne copy_screen_32bit_to_32bit
|
||||
ret
|
||||
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// ARM_doline_n1
|
||||
//
|
||||
|
|
|
@ -4,9 +4,12 @@
|
|||
|
||||
.global save_host_fp_regs
|
||||
.global restore_host_fp_regs
|
||||
.global copy_screen_8bit
|
||||
.global copy_screen_8bit_to_16bit
|
||||
.global copy_screen_8bit_to_32bit
|
||||
.global copy_screen_16bit_swap
|
||||
.global copy_screen_16bit_to_32bit
|
||||
.global copy_screen_32bit_to_16bit
|
||||
.global copy_screen_32bit_to_32bit
|
||||
|
||||
.text
|
||||
|
||||
|
@ -28,17 +31,17 @@ restore_host_fp_regs:
|
|||
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ copy_screen_8bit
|
||||
@ copy_screen_8bit_to_16bit
|
||||
@
|
||||
@ r0: uae_u8 *dst
|
||||
@ r1: uae_u8 *src
|
||||
@ r2: int bytes always a multiple of 64: even number of lines, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
|
||||
@ r3: uae_u32 *clut
|
||||
@
|
||||
@ void copy_screen_8bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut);
|
||||
@ void copy_screen_8bit_to_16bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut);
|
||||
@
|
||||
@----------------------------------------------------------------
|
||||
copy_screen_8bit:
|
||||
copy_screen_8bit_to_16bit:
|
||||
stmdb sp!, {r4-r6, lr}
|
||||
copy_screen_8bit_loop:
|
||||
pld [r1, #192]
|
||||
|
@ -66,6 +69,41 @@ copy_screen_8bit_loop_2:
|
|||
ldmia sp!, {r4-r6, pc}
|
||||
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ copy_screen_8bit_to_32bit
|
||||
@
|
||||
@ r0: uae_u8 *dst
|
||||
@ r1: uae_u8 *src
|
||||
@ r2: int bytes always a multiple of 64: even number of lines, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
|
||||
@ r3: uae_u32 *clut
|
||||
@
|
||||
@ void copy_screen_8bit_to_32bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut);
|
||||
@
|
||||
@----------------------------------------------------------------
|
||||
copy_screen_8bit_to_32bit:
|
||||
stmdb sp!, {r4-r5, lr}
|
||||
copy_screen_8bit_to_32bit_loop:
|
||||
ldr r4, [r1], #4
|
||||
subs r2, r2, #4
|
||||
and r5, r4, #255
|
||||
ldr lr, [r3, r5, lsl #2]
|
||||
lsr r5, r4, #8
|
||||
and r5, r5, #255
|
||||
str lr, [r0], #4
|
||||
ldr lr, [r3, r5, lsl #2]
|
||||
lsr r5, r4, #16
|
||||
and r5, r5, #255
|
||||
str lr, [r0], #4
|
||||
ldr lr, [r3, r5, lsl #2]
|
||||
lsr r5, r4, #24
|
||||
and r5, r5, #255
|
||||
str lr, [r0], #4
|
||||
ldr lr, [r3, r5, lsl #2]
|
||||
str lr, [r0], #4
|
||||
bgt copy_screen_8bit_to_32bit_loop
|
||||
ldmia sp!, {r4-r5, pc}
|
||||
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ copy_screen_16bit_swap
|
||||
@
|
||||
|
@ -85,6 +123,35 @@ bne copy_screen_16bit_swap
|
|||
bx lr
|
||||
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ copy_screen_16bit_to_32bit
|
||||
@
|
||||
@ r0: uae_u8 *dst
|
||||
@ r1: uae_u8 *src
|
||||
@ r2: int bytes always a multiple of 128: even number of lines, 2 bytes per pixel, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
|
||||
@
|
||||
@ void copy_screen_16bit_to_32bit(uae_u8 *dst, uae_u8 *src, int bytes);
|
||||
@
|
||||
@----------------------------------------------------------------
|
||||
copy_screen_16bit_to_32bit:
|
||||
stmdb sp!, {r4, lr}
|
||||
copy_screen_16bit_to_32bit_loop:
|
||||
ldrh r3, [r1], #2
|
||||
subs r2, r2, #2
|
||||
rev16 r3, r3
|
||||
and lr, r3, #31
|
||||
lsl lr, lr, #3
|
||||
lsr r3, r3, #5
|
||||
and r4, r3, #63
|
||||
orr lr, lr, r4, lsl #10
|
||||
lsr r3, r3, #6
|
||||
and r4, r3, #31
|
||||
orr lr, lr, r4, lsl #19
|
||||
str lr, [r0], #4
|
||||
bne copy_screen_16bit_to_32bit_loop
|
||||
ldmia sp!, {r4, pc}
|
||||
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ copy_screen_32bit_to_16bit
|
||||
@
|
||||
|
@ -111,3 +178,23 @@ strh r6, [r0], #2
|
|||
subs r2, r2, #4
|
||||
bne copy_screen_32bit_to_16bit_loop
|
||||
ldmia sp!, {r4-r6, pc}
|
||||
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ copy_screen_32bit_to_32bit
|
||||
@
|
||||
@ r0: uae_u8 *dst - Format (bytes): in memory rgba
|
||||
@ r1: uae_u8 *src - Format (bytes): in memory rgba
|
||||
@ r2: int bytes
|
||||
@
|
||||
@ void copy_screen_32bit_to_32bit(uae_u8 *dst, uae_u8 *src, int bytes);
|
||||
@
|
||||
@----------------------------------------------------------------
|
||||
copy_screen_32bit_to_32bit:
|
||||
ldr r3, [r1], #4
|
||||
rev r3, r3
|
||||
lsr r3, r3, #8
|
||||
subs r2, r2, #4
|
||||
str r3, [r0], #4
|
||||
bne copy_screen_32bit_to_32bit
|
||||
bx lr
|
||||
|
|
|
@ -4,9 +4,12 @@
|
|||
|
||||
.global save_host_fp_regs
|
||||
.global restore_host_fp_regs
|
||||
.global copy_screen_8bit
|
||||
.global copy_screen_8bit_to_16bit
|
||||
.global copy_screen_8bit_to_32bit
|
||||
.global copy_screen_16bit_swap
|
||||
.global copy_screen_16bit_to_32bit
|
||||
.global copy_screen_32bit_to_16bit
|
||||
.global copy_screen_32bit_to_32bit
|
||||
.global ARM_doline_n1
|
||||
.global NEON_doline_n2
|
||||
.global NEON_doline_n3
|
||||
|
@ -36,17 +39,17 @@ restore_host_fp_regs:
|
|||
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ copy_screen_8bit
|
||||
@ copy_screen_8bit_to_16bit
|
||||
@
|
||||
@ r0: uae_u8 *dst
|
||||
@ r1: uae_u8 *src
|
||||
@ r2: int bytes always a multiple of 64: even number of lines, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
|
||||
@ r3: uae_u32 *clut
|
||||
@
|
||||
@ void copy_screen_8bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut);
|
||||
@ void copy_screen_8bit_to_16bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut);
|
||||
@
|
||||
@----------------------------------------------------------------
|
||||
copy_screen_8bit:
|
||||
copy_screen_8bit_to_16bit:
|
||||
stmdb sp!, {r4-r6, lr}
|
||||
copy_screen_8bit_loop:
|
||||
pld [r1, #192]
|
||||
|
@ -72,6 +75,38 @@ copy_screen_8bit_loop_2:
|
|||
ldmia sp!, {r4-r6, pc}
|
||||
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ copy_screen_8bit_to_32bit
|
||||
@
|
||||
@ r0: uae_u8 *dst
|
||||
@ r1: uae_u8 *src
|
||||
@ r2: int bytes always a multiple of 64: even number of lines, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
|
||||
@ r3: uae_u32 *clut
|
||||
@
|
||||
@ void copy_screen_8bit_to_32bit(uae_u8 *dst, uae_u8 *src, int bytes, uae_u32 *clut);
|
||||
@
|
||||
@----------------------------------------------------------------
|
||||
copy_screen_8bit_to_32bit:
|
||||
stmdb sp!, {r4-r5, lr}
|
||||
copy_screen_8bit_to_32bit_loop:
|
||||
ldr r4, [r1], #4
|
||||
subs r2, r2, #4
|
||||
ubfx r5, r4, #0, #8
|
||||
ldr lr, [r3, r5, lsl #2]
|
||||
ubfx r5, r4, #8, #8
|
||||
str lr, [r0], #4
|
||||
ldr lr, [r3, r5, lsl #2]
|
||||
ubfx r5, r4, #16, #8
|
||||
str lr, [r0], #4
|
||||
ldr lr, [r3, r5, lsl #2]
|
||||
ubfx r5, r4, #24, #8
|
||||
str lr, [r0], #4
|
||||
ldr lr, [r3, r5, lsl #2]
|
||||
str lr, [r0], #4
|
||||
bgt copy_screen_8bit_to_32bit_loop
|
||||
ldmia sp!, {r4-r5, pc}
|
||||
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ copy_screen_16bit_swap
|
||||
@
|
||||
|
@ -105,6 +140,33 @@ copy_screen_16bit_swap:
|
|||
bx lr
|
||||
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ copy_screen_16bit_to_32bit
|
||||
@
|
||||
@ r0: uae_u8 *dst
|
||||
@ r1: uae_u8 *src
|
||||
@ r2: int bytes always a multiple of 128: even number of lines, 2 bytes per pixel, number of pixel per line is multiple of 32 (320, 640, 800, 1024, 1152, 1280)
|
||||
@
|
||||
@ void copy_screen_16bit_to_32bit(uae_u8 *dst, uae_u8 *src, int bytes);
|
||||
@
|
||||
@----------------------------------------------------------------
|
||||
copy_screen_16bit_to_32bit:
|
||||
stmdb sp!, {lr}
|
||||
copy_screen_16bit_to_32bit_loop:
|
||||
ldrh r3, [r1], #2
|
||||
subs r2, r2, #2
|
||||
rev16 r3, r3
|
||||
ubfx lr, r3, #0, #5
|
||||
lsl lr, lr, #3
|
||||
lsr r3, r3, #5
|
||||
bfi lr, r3, #10, #6
|
||||
lsr r3, r3, #6
|
||||
bfi lr, r3, #19, #5
|
||||
str lr, [r0], #4
|
||||
bne copy_screen_16bit_to_32bit_loop
|
||||
ldmia sp!, {pc}
|
||||
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ copy_screen_32bit_to_16bit
|
||||
@
|
||||
|
@ -131,7 +193,29 @@ copy_screen_32bit_to_16bit:
|
|||
vst2.8 {d18-d19}, [r0]!
|
||||
bne copy_screen_32bit_to_16bit
|
||||
bx lr
|
||||
|
||||
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ copy_screen_32bit_to_32bit
|
||||
@
|
||||
@ r0: uae_u8 *dst - Format (bytes): in memory rgba
|
||||
@ r1: uae_u8 *src - Format (bytes): in memory rgba
|
||||
@ r2: int bytes
|
||||
@
|
||||
@ void copy_screen_32bit_to_32bit(uae_u8 *dst, uae_u8 *src, int bytes);
|
||||
@
|
||||
@----------------------------------------------------------------
|
||||
copy_screen_32bit_to_32bit:
|
||||
vld1.64 {d18-d19}, [r1]!
|
||||
vrev32.8 d18, d18
|
||||
vshr.u32 d18, d18, #8
|
||||
vrev32.8 d19, d19
|
||||
vshr.u32 d19, d19, #8
|
||||
subs r2, r2, #16
|
||||
vst1.64 {d18-d19}, [r0]!
|
||||
bne copy_screen_32bit_to_32bit
|
||||
bx lr
|
||||
|
||||
|
||||
@----------------------------------------------------------------
|
||||
@ ARM_doline_n1
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue