/* File: startup_armv8-r.S
 * Purpose: startup file for armv8-r architecture devices.
 *          Should be used with GCC for ARM Embedded Processors
 */

	.syntax	unified
	.arch	armv8-r

	.section .stack
	.align	3
#ifdef __STACK_SIZE
	.equ	Stack_Size, __STACK_SIZE
#else
	.equ	Stack_Size, 0x00004000
#endif
	.globl	__StackTop
	.globl	__StackLimit
__StackLimit:
	.space	Stack_Size
	.size	__StackLimit, . - __StackLimit
__StackTop:
	.size	__StackTop, . - __StackTop

	.section .heap
	.align	3
#ifdef __HEAP_SIZE
	.equ	Heap_Size, __HEAP_SIZE
#else
	.equ	Heap_Size, 0x0000C000
#endif
	.globl	__HeapBase
	.globl	__HeapLimit
__HeapBase:
	.if	Heap_Size
	.space	Heap_Size
	.endif
	.size	__HeapBase, . - __HeapBase
__HeapLimit:
	.size	__HeapLimit, . - __HeapLimit

	.section .vectors
	.align	5
	.globl	__Vectors
__Vectors:
	b	Reset_Handler         /* Reset Handler */
	b	Undefined_Handler     /* Undefined Handler */
	b	SVC_Handler           /* SVCall Handler */
	b	Prefetch_Handler      /* Prefetch Handler */
	b	Abort_Handler         /* Abort Handler */
	b	.                     /* Reserved */
	b	IRQ_Handler           /* IRQ Handler */
	b	FIQ_Handler           /* FIQ Handler */

	.size	__Vectors, . - __Vectors

	.text
	.align	2
	.globl	Reset_Handler
	.type	Reset_Handler, %function
Reset_Handler:
	mov	r0, #0
	mov	r1, r0
	mov	r2, r0
	mov	r3, r0
	mov	r4, r0
	mov	r5, r0
	mov	r6, r0
	mov	r7, r0
	mov	r8, r0
	mov	r9, r0
	mov	r10, r0
	mov	r11, r0
	mov	r12, r0
	mov	r13, r0
	mov	r14, r0

/*  Firstly it copies data from read only memory to RAM. There are two schemes
 *  to copy. One can copy more than one sections. Another can only copy
 *  one section.  The former scheme needs more instructions and read-only
 *  data to implement than the latter.
 *  Macro __STARTUP_COPY_MULTIPLE is used to choose between two schemes.  */


#ifdef __STARTUP_COPY_MULTIPLE
/*  Multiple sections scheme.
 *
 *  Between symbol address __copy_table_start__ and __copy_table_end__,
 *  there are array of triplets, each of which specify:
 *    offset 0: LMA of start of a section to copy from
 *    offset 4: VMA of start of a section to copy to
 *    offset 8: size of the section to copy. Must be multiply of 4
 *
 *  All addresses must be aligned to 4 bytes boundary.
 */
	ldr	r4, =__copy_table_start__
	ldr	r5, =__copy_table_end__

.L_loop0:
	cmp	r4, r5
	bge	.L_loop0_done
	ldr	r1, [r4]
	ldr	r2, [r4, #4]
	ldr	r3, [r4, #8]

.L_loop0_0:
	subs	r3, #4
	ldrge	r0, [r1, r3]
	strge	r0, [r2, r3]
	bge	.L_loop0_0

	adds	r4, #12
	b	.L_loop0

.L_loop0_done:
#else
/*  Single section scheme.
 *
 *  The ranges of copy from/to are specified by following symbols
 *    __etext: LMA of start of the section to copy from. Usually end of text
 *    __data_start__: VMA of start of the section to copy to
 *    __data_end__: VMA of end of the section to copy to
 *
 *  All addresses must be aligned to 4 bytes boundary.
 */
	ldr	r1, =__etext
	ldr	r2, =__data_start__
	ldr	r3, =__data_end__

.L_loop1:
	cmp	r2, r3
	ldrlt	r0, [r1], #4
	strlt	r0, [r2], #4
	blt	.L_loop1
#endif /*__STARTUP_COPY_MULTIPLE */

/*  This part of work usually is done in C library startup code. Otherwise,
 *  define this macro to enable it in this startup.
 *
 *  There are two schemes too. One can clear multiple BSS sections. Another
 *  can only clear one section. The former is more size expensive than the
 *  latter.
 *
 *  Define macro __STARTUP_CLEAR_BSS_MULTIPLE to choose the former.
 *  Otherwise efine macro __STARTUP_CLEAR_BSS to choose the later.
 */
#ifdef __STARTUP_CLEAR_BSS_MULTIPLE
/*  Multiple sections scheme.
 *
 *  Between symbol address __copy_table_start__ and __copy_table_end__,
 *  there are array of tuples specifying:
 *    offset 0: Start of a BSS section
 *    offset 4: Size of this BSS section. Must be multiply of 4
 */
	ldr	r3, =__zero_table_start__
	ldr	r4, =__zero_table_end__

.L_loop2:
	cmp	r3, r4
	bge	.L_loop2_done
	ldr	r1, [r3]
	ldr	r2, [r3, #4]
	movs	r0, 0

.L_loop2_0:
	subs	r2, #4
	strge	r0, [r1, r2]
	bge	.L_loop2_0

	adds	r3, #8
	b	.L_loop2
.L_loop2_done:
#elif defined (__STARTUP_CLEAR_BSS)
/*  Single BSS section scheme.
 *
 *  The BSS section is specified by following symbols
 *    __bss_start__: start of the BSS section.
 *    __bss_end__: end of the BSS section.
 *
 *  Both addresses must be aligned to 4 bytes boundary.
 */
	ldr	r1, =__bss_start__
	ldr	r2, =__bss_end__

	movs	r0, 0
.L_loop3:
	cmp	r1, r2
	strlt	r0, [r1], #4
	blt	.L_loop3
#endif /* __STARTUP_CLEAR_BSS_MULTIPLE || __STARTUP_CLEAR_BSS */

	/* Vector table initialization */
	ldr	r0, =__Vectors
	mcr	p15, 0, r0, c12, c0, 0	/* Write to VBAR */
	ldr	r0, =__Vectors
	mcr	p15, 4, r0, c12, c0, 0	/* Write to HVBAR */
	mrs	r0, cpsr
	ldr	r1, =0xFFFFFE3F
	and	r0, r0, r1
	msr	cpsr, r0

	/* TCM initialization */
	ldr r0, =$(TCMA_INIT)		/* Load new BASE address; 32k; EL0/1=ON L2=ON */
	mcr p15, 0, r0, c9, c1, 0	/* Write to A-TCM config reg */

	ldr r0, =$(TCMB_INIT)		/* Load new BASE address; 32k; EL0/1=ON L2=ON */
	mcr p15, 0, r0, c9, c1, 1	/* Write to B-TCM config reg */

	ldr r0, =$(TCMC_INIT)		/* Load new BASE address; 32k; EL0/1=ON L2=ON */
	mcr p15, 0, r0, c9, c1, 2	/* Write to C-TCM config reg */

	ldr	r0, =__TCMA_Start
	ldr	r1, =__TCMA_Length
	mov	r1, r1, lsr #5			/* Divide by 32 */
	bl	init_tcm_loop

	ldr	r0, =__TCMB_Start
	ldr	r1, =__TCMB_Length
	mov	r1, r1, lsr #5			/* Divide by 32 */
	bl	init_tcm_loop

	ldr	r0, =__TCMC_Start
	ldr	r1, =__TCMC_Length
	mov	r1, r1, lsr #5			/* Divide by 32 */
	bl	init_tcm_loop

#ifndef __START
#define __START _start
#endif

/* ARMv8-R cores are in EL2 (hypervisor mode) after reset, and we need
   to first descend to EL1 (supervisor mode) before the traditional SP
   setting code can be run */
	ldr	r0, =__START
	msr	ELR_hyp, r0
	mrs	r0, SPSR_hyp
	and	r0, r0, #~0x001F
	orr	r0, r0, #0x0013
	msr	SPSR_hyp, r0
	eret
	/* No return here */

init_tcm_loop:
	stm r0,{r4-r11}			/* Move 8 location once 4*8=32 bytes */
	add r0,r0,#32			/* Increment address by 32 */
	sub r1,r1,#1			/* Decrement counter by 1 */
	cmp r1,#0				/* Is the end of DMEM? */
	bne init_tcm_loop		/* Restart loop if not */
	bx lr

	.pool
	.size	Reset_Handler, . - Reset_Handler

	.align	1
	.weak	Default_Handler
	.type	Default_Handler, %function
Default_Handler:
    bkpt #0
	b	.
    .pool
	.size	Default_Handler, . - Default_Handler

/*    Macro to define default handlers. Default handler
 *    will be weak symbol and just dead loops. They can be
 *    overwritten by other handlers */
	.macro	def_irq_handler	handler_name
	.weak	\handler_name
	.set	\handler_name, Default_Handler
	.endm

	def_irq_handler	Undefined_Handler
	def_irq_handler	SVC_Handler
	def_irq_handler	Prefetch_Handler
	def_irq_handler	Abort_Handler
	def_irq_handler	IRQ_Handler
	def_irq_handler	FIQ_Handler

	.end
