oleavr-rgl-a500-mini-linux-.../drivers/soc/allwinner/pm/mem_mmu_pc_asm.S
Ole André Vadla Ravnås 169c65d57e Initial commit
2022-05-07 01:01:45 +02:00

562 lines
17 KiB
ArmAsm
Raw Blame History

#include "./pm_assembler_i.h"
/*save_sp*/
/*save_sp_nommu*/
/*restore_sp*/
/*get_sp*/
.text
.globl save_sp
save_sp:
mov r0, r13
ldr r13, =SP_IN_SRAM
mov pc, lr
.text
.globl save_sp_nommu
save_sp_nommu:
mov r0, r13
ldr r13, =SP_IN_SRAM_PA
mov pc, lr
.text
.globl restore_sp
restore_sp:
mov r13, r0
mov pc, lr
.text
.globl get_sp
get_sp:
mov r0, r13
mov pc, lr
.align 4
.globl get_cur_cluster_id
get_cur_cluster_id:
mrc p15, 0, r0, c0, c0, 5 ;/*Read CPU ID register*/
ubfx r0, r0, #8, #4 ;/* if r0 = 1, then mean cluster1*/
/* return*/
mov pc, lr
/*--------------------------------cache related api: ----------------------------------------------- */
/*invalidate_dcache*/
/*invalidate_icache*/
/*flush_dcache*/
/*flush_icache*/
/*disable_cache*/
/*disable_dcache*/
/*disable_icache*/
/*disable_l2cache*/
/*enable_cache*/
/*enable_icache*/
.align 4
.text
.globl invalidate_dcache /*can not use push and pop, because inval will discard the data in the stack*/
invalidate_dcache:
/* Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) */
dmb /* ensure ordering with previous memory accesses */
MRC p15, 1, r0, c0, c0, 1 /*read clidr */
ANDS r3, r0, #0x7000000 /*extract loc from clidr */
MOV r3, r3, lsr #23 /*left align loc bit field */
BEQ inv_finished /*if loc is 0, then no need to clean */
mov r10, #0 /*start clean at cache level 0 */
inv_loop1:
ADD r2, r10, r10, lsr #1 /*work out 3x current cache level */
MOV r1, r0, lsr r2 /*extract cache type bits from clidr */
AND r1, r1, #7 /*mask of the bits for current cache only */
CMP r1, #2 /*see what cache we have at this level */
BLT inv_skip /*skip if no cache, or just i-cache */
MCR p15, 2, r10, c0, c0, 0 /*select current cache level in cssr */
ISB /*isb to sych the new cssr & csidr */
MRC p15, 1, r1, c0, c0, 0 /*read the new csidr */
AND r2, r1, #7 /*extract the length of the cache lines */
ADD r2, r2, #4 /*add 4 (line length offset) */
LDR r4, =0x3ff
ANDS r4, r4, r1, lsr #3 /*find maximum number on the way size */
CLZ r5, r4 /*find bit position of way size increment */
LDR r7, =0x7fff
ANDS r7, r7, r1, lsr #13 /*extract max number of the index size */
inv_loop2:
MOV r9, r4 /*create working copy of max way size */
inv_loop3:
ORR r11, r10, r9, lsl r5 /*factor way and cache number into r11 */
ORR r11, r11, r7, lsl r2 /*factor index number into r11 */
MCR p15, 0, r11, c7, c6, 2 /*invalidate by set/way */
SUBS r9, r9, #1 /*decrement the way */
BGE inv_loop3 /* */
SUBS r7, r7, #1 /*decrement the index */
BGE inv_loop2 /* */
inv_skip: /* */
ADD r10, r10, #2 /*increment cache number */
CMP r3, r10 /* */
BGT inv_loop1 /* */
inv_finished: /* */
MOV r10, #0 /*swith back to cache level 0 */
MCR p15, 2, r10, c0, c0, 0 /*select current cache level in cssr */
dsb
ISB /* */
MOV pc, lr /* */
.text
.globl invalidate_icache
invalidate_icache:
mov r0, #0
mcr p15, 0, r0, c7, c1, 0 @ invalidate I-cache inner shareable
mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate
dsb
ISB
mov pc, lr
.text
.globl flush_dcache
flush_dcache:
push {r0-r12}
dmb /* ensure ordering with previous memory accesses */
MRC p15, 1, r0, c0, c0, 1 /*read clidr */
ANDS r3, r0, #0x7000000 /*extract loc from clidr */
MOV r3, r3, lsr #23 /*left align loc bit field */
BEQ finished /*if loc is 0, then no need to clean */
mov r10, #0 /*start clean at cache level 0 */
loop1:
ADD r2, r10, r10, lsr #1 /*work out 3x current cache level */
MOV r1, r0, lsr r2 /*extract cache type bits from clidr */
AND r1, r1, #7 /*mask of the bits for current cache only */
CMP r1, #2 /*see what cache we have at this level */
BLT skip /*skip if no cache, or just i-cache */
MCR p15, 2, r10, c0, c0, 0 /*select current cache level in cssr */
ISB /*isb to sych the new cssr & csidr */
MRC p15, 1, r1, c0, c0, 0 /*read the new csidr */
AND r2, r1, #7 /*extract the length of the cache lines */
ADD r2, r2, #4 /*add 4 (line length offset) */
LDR r4, =0x3ff
ANDS r4, r4, r1, lsr #3 /*find maximum number on the way size */
CLZ r5, r4 /*find bit position of way size increment */
LDR r7, =0x7fff
ANDS r7, r7, r1, lsr #13 /*extract max number of the index size */
loop2:
MOV r9, r4 /*create working copy of max way size */
loop3:
ORR r11, r10, r9, lsl r5 /*factor way and cache number into r11 */
ORR r11, r11, r7, lsl r2 /*factor index number into r11 */
MCR p15, 0, r11, c7, c14, 2 /*clean & invalidate by set/way */
SUBS r9, r9, #1 /*decrement the way */
BGE loop3 /* */
SUBS r7, r7, #1 /*decrement the index */
BGE loop2 /* */
skip: /* */
ADD r10, r10, #2 /*increment cache number */
CMP r3, r10 /* */
BGT loop1 /* */
finished: /* */
MOV r10, #0 /*swith back to cache level 0 */
MCR p15, 2, r10, c0, c0, 0 /*select current cache level in cssr */
dsb
ISB /* */
pop {r0-r12}
MOV pc, lr /* */
.text
.globl flush_icache
flush_icache:
push {r0-r3}
MOV r0, #0
MCR p15, 0, r0, c7, c5, 0 /*Instruction cache invalidate all to PoU */
MCR p15, 0, r0, c7, c1, 0 /*Instruction cache invalidate all to PoUa Inner Shareable*/
MCR p15, 0, r0, c7, c1, 6 /*Branch predictor invalidate all Inner Shareable*/
ISB
dsb
pop {r0-r3}
MOV pc, lr
.text
.globl disable_cache
disable_cache:
push {r0-r3}
MRC p15, 0, r0, c1, c0, 0 /*read cr */
BIC r0, r0, #0x1 << 12
BIC r0, r0, #0x1 << 2
MCR p15, 0, r0, c1, c0, 0 /*disable cache */
ISB
pop {r0-r3}
MOV pc, lr
.text
.globl disable_dcache
disable_dcache:
push {r0-r3}
MRC p15, 0, r0, c1, c0, 0 /*read cr */
BIC r0, r0, #(0x1 << 2)
MCR p15, 0, r0, c1, c0, 0 /*disable dcache */
ISB
pop {r0-r3}
MOV pc, lr
.text
.globl disable_icache
disable_icache:
push {r0-r3}
MRC p15, 0, r0, c1, c0, 0 /*read cr */
BIC r0, r0, #(0x1 << 12)
MCR p15, 0, r0, c1, c0, 0 /*disable icache */
ISB
pop {r0-r3}
MOV pc, lr
.text
.globl disable_l2cache
disable_l2cache:
push {r0-r3}
MRC p15, 0, r0, c1, c0, 1 /*read acr */
BIC r0, r0, #0x1 << 1
MCR p15, 0, r0, c1, c0, 1 /*disable l2cache */
ISB
pop {r0-r3}
MOV pc, lr
.text
.globl enable_cache
enable_cache:
push {r0-r3}
MRC p15, 0, r0, c1, c0, 0 /*read cr */
ORR r0, r0, #0x1 << 12
ORR r0, r0, #0x1 << 2
MCR p15, 0, r0, c1, c0, 0 /*enable cache */
ISB
pop {r0-r3}
MOV pc, lr
.text
.globl enable_icache
enable_icache:
push {r0-r3}
MRC p15, 0, r0, c1, c0, 0 /*read cr */
ORR r0, r0, #(0x1 << 12)
MCR p15, 0, r0, c1, c0, 0 /*disable dcache */
ISB
pop {r0-r3}
MOV pc, lr
/*--------------------------------------prediction----------------------------------------*/
/*invalidate_branch_predictor*/
/*disable_program_flow_prediction*/
/*enable_program_flow_prediction*/
.text
.globl invalidate_branch_predictor
invalidate_branch_predictor:
push {r0-r3}
MOV r0, #0
MCR p15, 0, r0, c7, c5, 6 /*(invalidate entire branch predictor array)*/
ISB
pop {r0-r3}
MOV pc, lr
.text
.globl disable_program_flow_prediction
disable_program_flow_prediction:
push {r0-r3}
MRC p15, 0, r0, c1, c0, 0 /*read cr */
BIC r0, r0, #0x800
MCR p15, 0, r0, c1, c0, 0 /*disable program_flow_prediction */
ISB
pop {r0-r3}
MOV pc, lr
.text
.globl enable_program_flow_prediction
enable_program_flow_prediction:
push {r0-r3}
MRC p15, 0, r0, c1, c0, 0 /*read cr */
ORR r0, r0, #0x800
MCR p15, 0, r0, c1, c0, 0 /*disable program_flow_prediction */
ISB
pop {r0-r3}
MOV pc, lr
/*-------------------------------------tlb related api:----------------------------------------*/
/*mem_flush_tlb*/
/*mem_preload_tlb*/
.text
.globl mem_flush_tlb
mem_flush_tlb:
push {r0-r3}
MOV r0, #0
/*instruction entire instruction tlb*/
mcr p15, 0, r0, c8, c5, 0
/* invalid entire data tlb */
mcr p15, 0, r0, c8, c6, 0
/*invalidate entire unified TLB inner shareable*/
mcr p15, 0, r0, c8, c3, 0
dsb
ISB
pop {r0-r3}
mov pc, lr
.text
.globl mem_preload_tlb
mem_preload_tlb:
push {r0-r3}
#if defined(CONFIG_ARCH_SUN8IW10P1)
/*32k*/
mov r2, #0xc000
mov r3, #0xf0000000
preload_continue:
movt r2, #0xf000
ldr r1, [r3], #0x400
cmp r3, r2
bne preload_continue
#elif defined(CONFIG_ARCH_SUN8IW11P1)
/*48k*/
ldr r0, =IO_ADDRESS(AW_SRAM_A1_BASE)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A1_BASE + 0x1000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A1_BASE + 0x2000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A1_BASE + 0x3000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0x1000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0x2000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0x3000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A3_BASE)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A3_BASE + 0x1000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A3_BASE + 0x2000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A3_BASE + 0x3000) /*<EFBFBD><EFBFBD><EFBFBD><EFBFBD>SRAM_A4 3K*/
ldr r1, [r0]
#elif defined(CONFIG_ARCH_SUN50IW1P1) || defined(CONFIG_ARCH_SUN50IW2P1) || \
defined(CONFIG_ARCH_SUN50IW3P1) || defined(CONFIG_ARCH_SUN50IW6P1)
/*80k*/
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0x1000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0x2000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0x3000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0x4000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0x5000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0x6000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0x7000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0x8000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0x9000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0xa000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0xb000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0xc000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0xd000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0xe000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0xf000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0x10000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0x11000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0x12000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SRAM_A2_BASE + 0x13000)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_MSGBOX_BASE)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_SPINLOCK_BASE)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_R_PRCM_BASE)
ldr r1, [r0]
#endif
ldr r0, =IO_ADDRESS(AW_PIO_BASE)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_UART0_BASE)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_RTC_BASE)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_CCM_BASE)
ldr r1, [r0]
ldr r0, =IO_ADDRESS(AW_TWI0_BASE)
ldr r1, [r0]
dsb
isb
pop {r0-r3}
mov pc, lr
/*--------------------------------------mmu----------------------------------------*/
/*disable_mmu*/
/*enable_mmu*/
.text
.globl disable_mmu
disable_mmu:
/*read cr*/
MRC p15, 0, r1, c1, c0, 0
BIC r1, #0x1000
BIC r1, #0x0005
b __turn_mmu_off
.align 5
.type __turn_mmu_off, %function
__turn_mmu_off:
/*write cr: disable cache and mmu*/
MCR p15, 0, r1, c1, c0, 0
/*read id reg*/
mrc p15, 0, r3, c0, c0, 0
mov r3, r3
mov r3, r3
/*return*/
mov pc, lr
.text
.globl enable_mmu
enable_mmu:
/*read cr*/
MRC p15, 0, r1, c1, c0, 0
ORR r1, #0x1000
ORR r1, #0x0005
b __turn_mmu_on
.align 5
.type __turn_mmu_on, %function
__turn_mmu_on:
/*write cr: enable cache and mmu*/
MCR p15, 0, r1, c1, c0, 0
/*read id reg*/
mrc p15, 0, r3, c0, c0, 0
mov r3, r3
mov r3, r3
/*return*/
mov pc, lr
/*----------------------------------------pc related api:---------------------------------------*/
/*jump_to_suspend*/
/*jump_to_resume*/
/*jump_to_resume0*/
/*jump_to_resume0_nommu*/
.text
.globl jump_to_suspend
jump_to_suspend:
/*enable 0x0000 <--> 0x0000 mapping */
/*write ttbr0*/
mcr p15, 0, r0, c2, c0, 0
dsb
isb
mov pc, r1
.align 4
.text
.globl jump_to_resume
jump_to_resume:
/*before jump to resume:
* 1st: invalidate the data
* 2nd: restore r0-r13.
* 3rd: jump (para 1).
*/
/* Set the return pointer */
mov r12, r0
mov r8, r1
bl invalidate_dcache
mov r1, r8
mov lr, r12
ldmia r1, {r0 - r13}
mov pc, lr
.align 4
.globl jump_to_resume0
jump_to_resume0:
/* Set the return pointer */
mov lr, r0
mov pc, lr
.align 4
.globl jump_to_resume0_nommu
jump_to_resume0_nommu:
/*read cr*/
MRC p15, 0, r1, c1, c0, 0
BIC r1, #0x1000
BIC r1, #0x0007
/*write cr: disable cache and mmu*/
MCR p15, 0, r1, c1, c0, 0
/*read id reg*/
mrc p15, 0, r3, c0, c0, 0
mov r3, r3
mov r3, r3
/* Set the return pointer */
mov lr, r0
isb
mov pc, lr
.align 4
.global disable_prefetch
disable_prefetch:
/*
* Disable all forms of branch prediction.
* actually, we do not need this ops, because it is always enabled when the MMU is enabled.
* so we mask the code below.
*
* mrc p15, 0, r1, c1, c0, 0
* bic r1, r1, #(0x1 << 11)
* mcr p15, 0, r1, c1, c0, 0
*/
/*
* disable data prefetch
* including:
* Disable optimized data memory barrier behavior. Disable Lx data cache read-allocate mode.
* disable data prefetch.
* Disable Distributed Virtual memory transaction.
* Disable dual issue.
*/
mrc p15, 0, r1, c1, c0, 1
mov r0, r1
orr r1, r1, #(0x7 << 10)
bic r1, r1, #(0x3 << 13)
orr r1, r1, #(0x1 << 15)
orr r1, r1, #(0xE << 28)
mcr p15, 0, r1, c1, c0, 1
/* return*/
mov pc, lr
.align 4
.global restore_prefetch
restore_prefetch:
/*
* revert disable_prefetch ops.
*/
mcr p15, 0, r0, c1, c0, 1
/* return*/
mov pc, lr