patch-2.4.22 linux-2.4.22/arch/arm/boot/compressed/head.S

Next file: linux-2.4.22/arch/arm/boot/compressed/hw-bse.c
Previous file: linux-2.4.22/arch/arm/boot/compressed/head-shark.S
Back to the patch index
Back to the overall index

diff -urN linux-2.4.21/arch/arm/boot/compressed/head.S linux-2.4.22/arch/arm/boot/compressed/head.S
@@ -1,7 +1,7 @@
 /*
  *  linux/arch/arm/boot/compressed/head.S
  *
- *  Copyright (C) 1996-1999 Russell King
+ *  Copyright (C) 1996-2002 Russell King
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -19,6 +19,13 @@
  */
 #ifdef DEBUG
 #if defined(CONFIG_DEBUG_DC21285_PORT)
+		.macro	loadsp, rb
+		mov	\rb, #0x42000000
+		.endm
+		.macro	writeb, rb
+		str	\rb, [r3, #0x160]
+		.endm
+#elif defined(CONFIG_FOOTBRIDGE)
 		.macro	loadsp,	rb
 		mov	\rb, #0x7c000000
 		.endm
@@ -40,6 +47,15 @@
 		.macro	writeb, rb
 		strb	\rb, [r3, #0]
 		.endm
+#elif defined(CONFIG_ARCH_AT91RM9200)
+		.macro	loadsp, rb
+		mov	\rb, #0xFF000000	@ BASE_DBGU (we cannot use ldr \reg, =AT91_DBGU_BASE)
+		add	\rb, \rb, #0x00FF0000
+		add	\rb, \rb, #0x0000F200
+		.endm
+		.macro	writeb, rb
+		strb	\rb, [r3, #0x1C]	@ DBGU_THR (Transmitter Holding Register)
+		.endm
 #elif defined(CONFIG_ARCH_SA1100)
 		.macro	loadsp, rb
 		mov	\rb, #0x80000000	@ physical base address
@@ -50,6 +66,11 @@
 #  endif
 		.endm
 		.macro	writeb, rb
+/*
+ * "The ARM peripheral bus does not support byte or half-word operations.
+ * All reads and writes of the UART by the CPU should be wordwide."
+ *   - SA-1100 Developer's Manual, August 1999
+ */
 		str	\rb, [r3, #0x14]	@ UTDR
 		.endm
 #else
@@ -144,33 +165,98 @@
 		 */
 
 		.text
-1:		adr	r2, LC0
-		ldmia	r2, {r2, r3, r4, r5, sp}
+		adr	r0, LC0
+		ldmia	r0, {r1, r2, r3, r4, r5, r6, ip, sp}
+		subs	r0, r0, r1		@ calculate the delta offset
+
+		teq	r0, #0			@ if delta is zero, we're
+		beq	not_relocated		@ running at the address we
+						@ were linked at.
 
-		mov	r0, #0
+		/*
+		 * We're running at a different address.  We need to fix
+		 * up various pointers:
+		 *   r5 - zImage base address
+		 *   r6 - GOT start
+		 *   ip - GOT end
+		 */
+		add	r5, r5, r0
+		add	r6, r6, r0
+		add	ip, ip, r0
+
+#ifndef CONFIG_ZBOOT_ROM
+		/*
+		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
+		 * we need to fix up pointers into the BSS region.
+		 *   r2 - BSS start
+		 *   r3 - BSS end
+		 *   sp - stack pointer
+		 */
+		add	r2, r2, r0
+		add	r3, r3, r0
+		add	sp, sp, r0
+
+		/*
+		 * Relocate all entries in the GOT table.
+		 */
+1:		ldr	r1, [r6, #0]
+		add	r1, r1, r0
+		str	r1, [r6], #4
+		cmp	r6, ip
+		blo	1b
+#else
+
+		/*
+		 * Relocate entries in the GOT table.  We only relocate
+		 * the entries that are outside the (relocated) BSS region.
+		 */
+1:		ldr	r1, [r6, #0]
+		cmp	r1, r2			@ entry < bss_start ||
+		cmphs	r3, r1			@ _end < entry
+		addlo	r1, r1, r0
+		str	r1, [r6], #4
+		cmp	r6, ip
+		blo	1b
+
+#endif
+
+not_relocated:	mov	r0, #0
 1:		str	r0, [r2], #4		@ clear bss
 		str	r0, [r2], #4
 		str	r0, [r2], #4
 		str	r0, [r2], #4
 		cmp	r2, r3
-		blt	1b
+		blo	1b
 
-		mrc	p15, 0, r6, c0, c0	@ get processor ID
+		/*
+		 * The C runtime environment should now be setup
+		 * sufficiently.  Turn the cache on, set up some
+		 * pointers, and start decompressing.
+		 */
 		bl	cache_on
 
 		mov	r1, sp			@ malloc space above stack
 		add	r2, sp, #0x10000	@ 64k max
 
-		teq	r4, r5			@ will we overwrite ourselves?
-		moveq	r5, r2			@ decompress after image
-		movne	r5, r4			@ decompress to final location
+/*
+ * Check to see if we will overwrite ourselves.
+ *   r4 = final kernel address
+ *   r5 = start of this image
+ *   r2 = end of malloc space (and therefore this image)
+ * We basically want:
+ *   r4 >= r2 -> OK
+ *   r4 + image length <= r5 -> OK
+ */
+		cmp	r4, r2
+		bhs	wont_overwrite
+		add	r0, r4, #4096*1024	@ 4MB largest kernel size
+		cmp	r0, r5
+		bls	wont_overwrite
 
+		mov	r5, r2			@ decompress after malloc space
 		mov	r0, r5
 		mov	r3, r7
-		bl	SYMBOL_NAME(decompress_kernel)
-
-		teq	r4, r5			@ do we need to relocate
-		beq	call_kernel		@ the kernel?
+		bl	decompress_kernel
 
 		add	r0, r0, #127
 		bic	r0, r0, #127		@ align the kernel length
@@ -185,23 +271,39 @@
  */
 		add	r1, r5, r0		@ end of decompressed kernel
 		adr	r2, reloc_start
-		adr	r3, reloc_end
+		ldr	r3, LC1
+		add	r3, r2, r3
 1:		ldmia	r2!, {r8 - r13}		@ copy relocation code
 		stmia	r1!, {r8 - r13}
 		ldmia	r2!, {r8 - r13}
 		stmia	r1!, {r8 - r13}
 		cmp	r2, r3
-		blt	1b
+		blo	1b
 
 		bl	cache_clean_flush
 		add	pc, r5, r0		@ call relocation code
 
+/*
+ * We're not in danger of overwriting ourselves.  Do this the simple way.
+ *
+ * r4     = kernel execution address
+ * r7     = architecture ID
+ */
+wont_overwrite:	mov	r0, r4
+		mov	r3, r7
+		bl	decompress_kernel
+		b	call_kernel
+
 		.type	LC0, #object
-LC0:		.word	__bss_start
-		.word	_end
-		.word	_load_addr
-		.word	_start
-		.word	user_stack+4096
+LC0:		.word	LC0			@ r1
+		.word	__bss_start		@ r2
+		.word	_end			@ r3
+		.word	_load_addr		@ r4
+		.word	_start			@ r5
+		.word	_got_start		@ r6
+		.word	_got_end		@ ip
+		.word	user_stack+4096		@ sp
+LC1:		.word	reloc_end - reloc_start
 		.size	LC0, . - LC0
 
 /*
@@ -218,22 +320,15 @@
  *  r7 = architecture number
  *  r8 = run-time address of "start"
  * On exit,
- *  r0, r1, r2, r3, r8, r9 corrupted
+ *  r1, r2, r3, r8, r9, r12 corrupted
  * This routine must preserve:
  *  r4, r5, r6, r7
  */
 		.align	5
-cache_on:	ldr	r1, proc_sa110_type
-		eor	r1, r1, r6
-		movs	r1, r1, lsr #5		@ catch SA110 and SA1100
-		beq	1f
-		ldr     r1, proc_sa1110_type
-		eor	r1, r1, r6
-		movs	r1, r1, lsr #4
-@		movne	pc, lr
-		bne	cache_off
-1:
-		sub	r3, r4, #16384		@ Page directory size
+cache_on:	mov	r3, #8			@ cache_on function
+		b	call_cache_fn
+
+__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
 		bic	r3, r3, #0xff		@ Align the pointer
 		bic	r3, r3, #0x3f00
 /*
@@ -248,9 +343,9 @@
 		orr	r1, r1, #3 << 10
 		add	r2, r3, #16384
 1:		cmp	r1, r8			@ if virt > start of RAM
-		orrge	r1, r1, #0x0c		@ set cacheable, bufferable
+		orrhs	r1, r1, #0x0c		@ set cacheable, bufferable
 		cmp	r1, r9			@ if virt > end of RAM
-		bicge	r1, r1, #0x0c		@ clear cacheable, bufferable
+		bichs	r1, r1, #0x0c		@ clear cacheable, bufferable
 		str	r1, [r0], #4		@ 1:1 mapping
 		add	r1, r1, #1048576
 		teq	r0, r2
@@ -269,24 +364,42 @@
 		str	r1, [r0], #4
 		add	r1, r1, #1048576
 		str	r1, [r0]
+		mov	pc, lr
 
+__armv4_cache_on:
+		mov	r12, lr
+		bl	__setup_mmu
 		mov	r0, #0
 		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
-		mcr	p15, 0, r0, c8, c7	@ flush I,D TLBs
-		mcr	p15, 0, r3, c2, c0	@ load page table pointer
+		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
+		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
 		mov	r0, #-1
-		mcr	p15, 0, r0, c3, c0	@ load domain access register
-		mrc	p15, 0, r0, c1, c0
+		mcr	p15, 0, r0, c3, c0, 0	@ load domain access register
+		mrc	p15, 0, r0, c1, c0, 0
 		orr	r0, r0, #0x1000		@ I-cache enable
 #ifndef DEBUG
 		orr	r0, r0, #0x003d		@ Write buffer, mmu
 #endif
-		mcr	p15, 0, r0, c1, c0
-		mov	pc, lr
+		mcr	p15, 0, r0, c1, c0, 0
+		mov	pc, r12
+
+__arm6_cache_on:
+		mov	r12, lr
+		bl	__setup_mmu
+		mov	r0, #0
+		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
+		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
+		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
+		mov	r0, #-1
+		mcr	p15, 0, r0, c3, c0, 0	@ load domain access control
+		mov	r0, #0x3d
+		mcr	p15, 0, r0, c1, c0, 0	@ load control register
+		mov	pc, r12
 
 /*
- * This code is relocatable.  It is relocated by the above code to the end
- * of the kernel and executed there.  During this time, we have no stacks.
+ * All code following this line is relocatable.  It is relocated by
+ * the above code to the end of the decompressed kernel image and
+ * executed there.  During this time, we have no stacks.
  *
  * r0     = decompressed kernel length
  * r1-r3  = unused
@@ -307,7 +420,7 @@
 		.endr
 
 		cmp	r5, r8
-		blt	1b
+		blo	1b
 		debug_reloc_end
 
 call_kernel:	bl	cache_clean_flush
@@ -317,47 +430,117 @@
 		mov	pc, r4			@ call kernel
 
 /*
- * Here follow the relocatable cache support functions for
- * the various processors.
+ * Here follow the relocatable cache support functions for the
+ * various processors.  This is a generic hook for locating an
+ * entry and jumping to an instruction at the specified offset
+ * from the start of the block.  Please note this is all position
+ * independent code.
+ *
+ *  r1  = corrupted
+ *  r2  = corrupted
+ *  r3  = block offset
+ *  r6  = corrupted
+ *  r12 = corrupted
  */
 
-		.type	proc_sa110_type,#object
-proc_sa110_type:
-		.word	0x4401a100
-		.size	proc_sa110_type, . - proc_sa110_type
-
-		.type	proc_sa1110_type,#object
-proc_sa1110_type:
-		.word	0x6901b110
-		.size	proc_sa1110_type, . - proc_sa1110_type
+call_cache_fn:	adr	r12, proc_types
+		mrc	p15, 0, r6, c0, c0	@ get processor ID
+1:		ldr	r1, [r12, #0]		@ get value
+		ldr	r2, [r12, #4]		@ get mask
+		eor	r1, r1, r6		@ (real ^ match)
+		tst	r1, r2			@       & mask
+		addeq	pc, r12, r3		@ call cache function
+		add	r12, r12, #4*5
+		b	1b
+
+/*
+ * Table for cache operations.  This is basically:
+ *   - CPU ID match
+ *   - CPU ID mask
+ *   - 'cache on' method instruction
+ *   - 'cache off' method instruction
+ *   - 'cache flush' method instruction
+ *
+ * We match an entry using: ((real_id ^ match) & mask) == 0
+ *
+ * Writethrough caches generally only need 'on' and 'off'
+ * methods.  Writeback caches _must_ have the flush method
+ * defined.
+ */
+		.type	proc_types,#object
+proc_types:
+		.word	0x41560600		@ ARM6/610
+		.word	0xffffffe0
+		b	__arm6_cache_off	@ works, but slow
+		b	__arm6_cache_off
+		mov	pc, lr
+@		b	__arm6_cache_on		@ untested
+@		b	__arm6_cache_off
+@		b	__armv3_cache_flush
+
+		.word	0x41007000		@ ARM7/710
+		.word	0xfff8fe00
+		b	__arm7_cache_off
+		b	__arm7_cache_off
+		mov	pc, lr
+
+		.word	0x41807200		@ ARM720T (writethrough)
+		.word	0xffffff00
+		b	__armv4_cache_on
+		b	__armv4_cache_off
+		mov	pc, lr
+
+		.word	0x41129200		@ ARM920T
+		.word	0xff00fff0
+		b	__armv4_cache_on
+		b	__armv4_cache_off
+		b	__armv4_cache_flush
+
+		.word	0x41029220		@ ARM922T
+		.word	0xff00fff0
+		b	__armv4_cache_on
+		b	__armv4_cache_off
+		b	__armv4_cache_flush
+
+		.word	0x4401a100		@ sa110 / sa1100
+		.word	0xffffffe0
+		b	__armv4_cache_on
+		b	__armv4_cache_off
+		b	__armv4_cache_flush
+
+		.word	0x6901b110		@ sa1110
+		.word	0xfffffff0
+		b	__armv4_cache_on
+		b	__armv4_cache_off
+		b	__armv4_cache_flush
+
+		.word	0x69050000		@ xscale
+		.word	0xffff0000
+		b	__armv4_cache_on
+		b	__armv4_cache_off
+		b	__armv4_cache_flush
+
+		.word	0			@ unrecognised type
+		.word	0
+		mov	pc, lr
+		mov	pc, lr
+		mov	pc, lr
+
+		.size	proc_types, . - proc_types
 
 /*
  * Turn off the Cache and MMU.  ARMv3 does not support
  * reading the control register, but ARMv4 does.
  *
  * On entry,  r6 = processor ID
- * On exit,   r0, r1 corrupted
+ * On exit,   r0, r1, r2, r3, r12 corrupted
  * This routine must preserve: r4, r6, r7
  */
 		.align	5
-cache_off:
-#ifdef CONFIG_CPU_ARM610
-		eor	r1, r6, #0x41000000
-		eor	r1, r1, #0x00560000
-		bic	r1, r1, #0x0000001f
-		teq	r1, #0x00000600
-		mov	r0, #0x00000060		@ ARM6 control reg.
-		beq	__armv3_cache_off
-#endif
-#ifdef CONFIG_CPU_ARM710
-		eor	r1, r6, #0x41000000
-		bic	r1, r1, #0x00070000
-		bic	r1, r1, #0x000000ff
-		teq	r1, #0x00007000		@ ARM7
-		teqne	r1, #0x00007100		@ ARM710
-		mov	r0, #0x00000070		@ ARM7 control reg.
-		beq	__armv3_cache_off
-#endif
+cache_off:	mov	r3, #12			@ cache_off function
+		b	call_cache_fn
+
+__armv4_cache_off:
 		mrc	p15, 0, r0, c1, c0
 		bic	r0, r0, #0x000d
 		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
@@ -366,11 +549,19 @@
 		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
 		mov	pc, lr
 
+__arm6_cache_off:
+		mov	r0, #0x00000030		@ ARM6 control reg.
+		b	__armv3_cache_off
+
+__arm7_cache_off:
+		mov	r0, #0x00000070		@ ARM7 control reg.
+		b	__armv3_cache_off
+
 __armv3_cache_off:
-		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
+		mcr	p15, 0, r0, c1, c0, 0	@ turn MMU and cache off
 		mov	r0, #0
-		mcr	p15, 0, r0, c7, c0	@ invalidate whole cache v3
-		mcr	p15, 0, r0, c5, c0	@ invalidate whole TLB v3
+		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
+		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
 		mov	pc, lr
 
 /*
@@ -379,23 +570,18 @@
  * On entry,
  *  r6 = processor ID
  * On exit,
- *  r1, r2, r12 corrupted
+ *  r1, r2, r3, r12 corrupted
  * This routine must preserve:
- *  r4, r6, r7
+ *  r0, r4, r5, r6, r7
  */
 		.align	5
 cache_clean_flush:
-		ldr	r1, proc_sa110_type
-		eor	r1, r1, r6
-		movs	r1, r1, lsr #5		@ catch SA110 and SA1100
-		beq	1f
-		ldr	r1, proc_sa1110_type
-		eor	r1, r1, r6
-		movs	r1, r1, lsr #4
-		movne	pc, lr
-1:
+		mov	r3, #16
+		b	call_cache_fn
+
+__armv4_cache_flush:
 		bic	r1, pc, #31
-		add	r2, r1, #32768
+		add	r2, r1, #65536		@ 2x the largest dcache size
 1:		ldr	r12, [r1], #32		@ s/w flush D cache
 		teq	r1, r2
 		bne	1b
@@ -404,6 +590,11 @@
 		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
 		mov	pc, lr
 
+__armv3_cache_flush:
+		mov	r1, #0
+		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
+		mov	pc, lr
+
 /*
  * Various debugging routines for printing hex characters and
  * memory, which again must be relocatable.
@@ -479,5 +670,5 @@
 reloc_end:
 
 		.align
-		.section	".stack"
+		.section ".stack", "w"
 user_stack:	.space	4096

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)