patch-2.1.79 linux/arch/ppc/kernel/head.S

Next file: linux/arch/ppc/kernel/idle.c
Previous file: linux/arch/ppc/kernel/find_name.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.1.78/linux/arch/ppc/kernel/head.S linux/arch/ppc/kernel/head.S
@@ -30,7 +30,50 @@
 #include <linux/errno.h>
 #include <linux/config.h>
 
-#define SYNC() \
+#ifdef CONFIG_APUS
+/* At CYBERBASEp we'll find the following sum:
+ * -KERNELBASE+CyberStormMemoryBase
+ */
+#define CYBERBASEp (0xfff00000)
+#endif
+
+/* optimization for 603 to load the tlb directly from the linux table */
+#define NO_RELOAD_HTAB 1
+	
+CACHE_LINE_SIZE = 32
+LG_CACHE_LINE_SIZE = 5
+
+#define TOPHYS(x)	(x - KERNELBASE)
+
+/*
+ * Macros for storing registers into and loading registers from
+ * exception frames.
+ */
+#define SAVE_GPR(n, base)	stw	n,GPR0+4*(n)(base)
+#define SAVE_2GPRS(n, base)	SAVE_GPR(n, base); SAVE_GPR(n+1, base)
+#define SAVE_4GPRS(n, base)	SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base)
+#define SAVE_8GPRS(n, base)	SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base)
+#define SAVE_10GPRS(n, base)	SAVE_8GPRS(n, base); SAVE_2GPRS(n+8, base)
+#define REST_GPR(n, base)	lwz	n,GPR0+4*(n)(base)
+#define REST_2GPRS(n, base)	REST_GPR(n, base); REST_GPR(n+1, base)
+#define REST_4GPRS(n, base)	REST_2GPRS(n, base); REST_2GPRS(n+2, base)
+#define REST_8GPRS(n, base)	REST_4GPRS(n, base); REST_4GPRS(n+4, base)
+#define REST_10GPRS(n, base)	REST_8GPRS(n, base); REST_2GPRS(n+8, base)
+
+#define SAVE_FPR(n, base)	stfd	n,TSS_FPR0+8*(n)(base)
+#define SAVE_2FPRS(n, base)	SAVE_FPR(n, base); SAVE_FPR(n+1, base)
+#define SAVE_4FPRS(n, base)	SAVE_2FPRS(n, base); SAVE_2FPRS(n+2, base)
+#define SAVE_8FPRS(n, base)	SAVE_4FPRS(n, base); SAVE_4FPRS(n+4, base)
+#define SAVE_16FPRS(n, base)	SAVE_8FPRS(n, base); SAVE_8FPRS(n+8, base)
+#define SAVE_32FPRS(n, base)	SAVE_16FPRS(n, base); SAVE_16FPRS(n+16, base)
+#define REST_FPR(n, base)	lfd	n,TSS_FPR0+8*(n)(base)
+#define REST_2FPRS(n, base)	REST_FPR(n, base); REST_FPR(n+1, base)
+#define REST_4FPRS(n, base)	REST_2FPRS(n, base); REST_2FPRS(n+2, base)
+#define REST_8FPRS(n, base)	REST_4FPRS(n, base); REST_4FPRS(n+4, base)
+#define REST_16FPRS(n, base)	REST_8FPRS(n, base); REST_8FPRS(n+8, base)
+#define REST_32FPRS(n, base)	REST_16FPRS(n, base); REST_16FPRS(n+16, base)
+
+#define SYNC \
 	sync; \
 	isync
 
@@ -43,94 +86,16 @@
 	addi	r4,r4,0x1000; \
 	bdnz	0b
 
-#define TOPHYS(x)	(x - KERNELBASE)
+#define LOAD_BAT(n, offset, reg, RA, RB) \
+	lwz	RA,offset+0(reg); \
+	lwz	RB,offset+4(reg);	\
+	mtspr	IBAT##n##U,RA;	\
+	mtspr	IBAT##n##L,RB;	\
+	lwz	RA,offset+8(reg);	\
+	lwz	RB,offset+12(reg);	\
+	mtspr	DBAT##n##U,RA;	\
+	mtspr	DBAT##n##L,RB
 
-/* this is a very kludgey way of loading up the BATs on the
-   prep system.  I'll kill this horrible macro and write
-   something clean when I have a chance -- Cort
- */	
-#define LOAD_BATS(RA,RB) \
-	mfspr	RA,PVR		; \
-	srwi	RA,RA,16	; \
-	cmpi	0,RA,1		; \
-	beq	199f		; \
-	/* load bats for 60x */	; \
-	lis	RA,BAT0@h	; \
-	ori	RA,RA,BAT0@l	; \
-	addis	RA,RA,-KERNELBASE@h;\
-	lwz	RB,0(RA)	; \
-	mtspr	IBAT0U,RB	; \
-	mtspr	DBAT0U,RB	; \
-	lwz	RB,4(RA)	; \
-	mtspr	IBAT0L,RB	; \
-	mtspr	DBAT0L,RB	; \
-	lis	RA,BAT1@h	; \
-	ori	RA,RA,BAT1@l	; \
-	addis	RA,RA,-KERNELBASE@h;\
-	lwz	RB,0(RA)	; \
-	mtspr	IBAT1U,RB	; \
-	mtspr	DBAT1U,RB	; \
-	lwz	RB,4(RA)	; \
-	mtspr	IBAT1L,RB	; \
-	mtspr	DBAT1L,RB	; \
-	lis	RA,BAT2@h	; \
-	ori	RA,RA,BAT2@l	; \
-	addis	RA,RA,-KERNELBASE@h;\
-	lwz	RB,0(RA)	; \
-	mtspr	IBAT2U,RB	; \
-	mtspr	DBAT2U,RB	; \
-	lwz	RB,4(RA)	; \
-	mtspr	IBAT2L,RB	; \
-	mtspr	DBAT2L,RB	; \
-	lis	RA,BAT3@h	; \
-	ori	RA,RA,BAT3@l	; \
-	addis	RA,RA,-KERNELBASE@h;\
-	lwz	RB,0(RA)	; \
-	mtspr	IBAT3U,RB	; \
-	mtspr	DBAT3U,RB	; \
-	lwz	RB,4(RA)	; \
-	mtspr	IBAT3L,RB	; \
-	mtspr	DBAT3L,RB	; \
-	b	200f		; \
-199:	/*load bats for 601 */	; \
-	lis	RA,BAT0_601@h	; \
-	ori	RA,RA,BAT0_601@l; \
-	addis	RA,RA,-KERNELBASE@h;\
-	lwz	RB,0(RA)	; \
-	mtspr	IBAT0U,RB	; \
-	mtspr	DBAT0U,RB	; \
-	lwz	RB,4(RA)	; \
-	mtspr	IBAT0L,RB	; \
-	mtspr	DBAT0L,RB	; \
-	lis	RA,BAT1_601@h	; \
-	ori	RA,RA,BAT1_601@l; \
-	addis	RA,RA,-KERNELBASE@h;\
-	lwz	RB,0(RA)	; \
-	mtspr	IBAT1U,RB	; \
-	mtspr	DBAT1U,RB	; \
-	lwz	RB,4(RA)	; \
-	mtspr	IBAT1L,RB	; \
-	mtspr	DBAT1L,RB	; \
-	lis	RA,BAT2_601@h	; \
-	ori	RA,RA,BAT2_601@l; \
-	addis	RA,RA,-KERNELBASE@h;\
-	lwz	RB,0(RA)	; \
-	mtspr	IBAT2U,RB	; \
-	mtspr	DBAT2U,RB	; \
-	lwz	RB,4(RA)	; \
-	mtspr	IBAT2L,RB	; \
-	mtspr	DBAT2L,RB	; \
-	lis	RA,BAT3_601@h	; \
-	ori	RA,RA,BAT3_601@l; \
-	addis	RA,RA,-KERNELBASE@h;\
-	lwz	RB,0(RA)	; \
-	mtspr	IBAT3U,RB	; \
-	mtspr	DBAT3U,RB	; \
-	lwz	RB,4(RA)	; \
-	mtspr	IBAT3L,RB	; \
-	mtspr	DBAT3L,RB	; \
-200:
-	
 	.text
 	.globl	_stext
 _stext:
@@ -152,8 +117,8 @@
  * managing the hash table.  Interrupts are disabled.  The stack
  * pointer (r1) points to just below the end of the half-meg region
  * from 0x380000 - 0x400000, which is mapped in already.
- */
-/* PREP
+ *
+ * PREP
  * This is jumped to on prep systems right after the kernel is relocated
  * to its proper place in memory by the boot loader.  The expected layout
  * of the regs is:	
@@ -172,30 +137,47 @@
 __start:
 
 /*
+ * We have to do any OF calls before we map ourselves to KERNELBASE,
+ * because OF may have I/O devices mapped in in that area
+ * (particularly on CHRP).
+ */
+	mr	r31,r3			/* save parameters */
+	mr	r30,r4
+	mr	r29,r5
+	mr	r28,r6
+	mr	r29,r7
+	bl	prom_init
+
+/*
  * Use the first pair of BAT registers to map the 1st 16MB
  * of RAM to KERNELBASE.
  */
-        mfspr   r9,PVR
-        rlwinm  r9,r9,16,16,31          /* r9 = 1 for 601, 4 for 604 */
-        cmpi    0,r9,1
-        lis     r11,KERNELBASE@h
-        bne     4f
-        ori     r11,r11,4               /* set up BAT registers for 601 */
-        li      r8,0x7f
-        ori     r11,r11,4               /* set up BAT registers for 601 */
-        li      r8,0x7f
-        oris    r9,r11,0x800000@h       /* set up BAT reg for 2nd 8M */
-        oris    r10,r8,0x800000@h       /* set up BAT reg for 2nd 8M */
-        mtspr   IBAT1U,r9
-        mtspr   IBAT1L,r10
-        b       5f
-4:      ori     r11,r11,0x1ff           /* set up BAT registers for 604 */
-        li      r8,2
-        mtspr   DBAT0U,r11
-        mtspr   DBAT0L,r8
-5:      mtspr   IBAT0U,r11
-        mtspr   IBAT0L,r8
-        isync
+	mfspr	r9,PVR
+	rlwinm	r9,r9,16,16,31		/* r9 = 1 for 601, 4 for 604 */
+	cmpi	0,r9,1
+	lis	r11,KERNELBASE@h
+	bne	4f
+	ori	r11,r11,4		/* set up BAT registers for 601 */
+	li	r8,0x7f
+	oris	r9,r11,0x800000@h	/* set up BAT reg for 2nd 8M */
+	oris	r10,r8,0x800000@h	/* set up BAT reg for 2nd 8M */
+	mtspr	IBAT1U,r9
+	mtspr	IBAT1L,r10
+	b	5f
+4:	ori	r11,r11,0x1ff		/* set up BAT registers for 604 */
+#ifndef CONFIG_APUS
+	li	r8,2
+#else
+	lis	r8,CYBERBASEp@h
+	lwz	r8,0(r8)
+	addis	r8,r8,KERNELBASE@h
+	addi	r8,r8,2
+#endif
+	mtspr	DBAT0U,r11
+	mtspr	DBAT0L,r8
+5:	mtspr	IBAT0U,r11
+	mtspr	IBAT0L,r8
+	isync
 /*
  * we now have the 1st 16M of ram mapped with the bats.
  * prep needs the mmu to be turned on here, but pmac already has it on.
@@ -247,6 +229,20 @@
 #define REST_16FPRS(n, base)	REST_8FPRS(n, base); REST_8FPRS(n+8, base)
 #define REST_32FPRS(n, base)	REST_16FPRS(n, base); REST_16FPRS(n+16, base)
 
+#ifndef CONFIG_APUS
+#define tophys(rd,rs,rt)	addis	rd,rs,-KERNELBASE@h
+#define tovirt(rd,rs,rt)	addis	rd,rs,KERNELBASE@h
+#else
+#define tophys(rd,rs,rt)	 \
+	lis	rt,CYBERBASEp@h; \
+	lwz	rt,0(rt);	 \
+	add	rd,rs,rt
+#define tovirt(rd,rs,rt)	 \
+	lis	rt,CYBERBASEp@h; \
+	lwz	rt,0(rt);	 \
+	sub	rd,rs,rt
+#endif
+	
 /*
  * Exception entry code.  This code runs with address translation
  * turned off, i.e. using physical addresses.
@@ -254,21 +250,15 @@
  * task's thread_struct.
  */
 #define EXCEPTION_PROLOG	\
-0:	mtspr	SPRG0,r20;	\
+	mtspr	SPRG0,r20;	\
 	mtspr	SPRG1,r21;	\
 	mfcr	r20;		\
-	mfspr	r21,SRR1;		/* test whether from user or kernel */\
-	andi.	r21,r21,MSR_PR;	\
-	mr	r21,r1;			/* from kernel - use current sp */\
-	beq	1f;		\
-	mfspr	r21,SPRG3;		/* from user - load kernel sp */\
-	lwz	r21,KSP(r21);	\
-1:	addis	r21,r21,-KERNELBASE@h;	/* convert sp to physical */	\
+	mfspr	r21,SPRG2;		/* exception stack to use from */ \
+	cmpwi	0,r21,0;		/* user mode or RTAS */ \
+	bne	1f;		\
+	tophys(r21,r1,r21);		/* use tophys(kernel sp) otherwise */ \
 	subi	r21,r21,INT_FRAME_SIZE+STACK_UNDERHEAD;	/* alloc exc. frame */\
-	stw	r1,GPR1(r21);	\
-	stw	r1,0(r21);	\
-	addis	r1,r21,KERNELBASE@h;	/* set new kernel sp */		\
-	stw	r20,_CCR(r21);		/* save registers */		\
+1:	stw	r20,_CCR(r21);		/* save registers */ \
 	stw	r22,GPR22(r21);	\
 	stw	r23,GPR23(r21);	\
 	mfspr	r20,SPRG0;	\
@@ -282,9 +272,12 @@
 	mfspr	r20,XER;	\
 	stw	r20,_XER(r21);	\
 	mfspr	r22,SRR0;	\
-	mfspr	r23,SRR1;		/* we can now take exceptions */\
+	mfspr	r23,SRR1;	\
 	stw	r0,GPR0(r21);	\
+	stw	r1,GPR1(r21);	\
 	stw	r2,GPR2(r21);	\
+	stw	r1,0(r21);	\
+	tovirt(r1,r21,r1);		/* set new kernel sp */	\
 	SAVE_4GPRS(3, r21);
 /*
  * Note: code which follows this uses cr0.eq (set if from kernel),
@@ -315,7 +308,7 @@
 DataAccess:
 	EXCEPTION_PROLOG
 	mfspr	r20,DSISR
-	andis.	r0,r20,0x8470		/* weird error? */
+	andis.	r0,r20,0xa470		/* weird error? */
 	bne	1f			/* if not, try to put a PTE */
 	mfspr	r3,DAR			/* into the hash table */
 	rlwinm	r4,r23,32-13,30,30	/* MSR_PR -> _PAGE_USER */
@@ -419,6 +412,47 @@
  */
 	. = 0x1000
 InstructionTLBMiss:
+#ifdef NO_RELOAD_HTAB
+/*
+ * r0:	stored ctr
+ * r1:	linux style pte ( later becomes ppc hardware pte )
+ * r2:	ptr to linux-style pte
+ * r3:	scratch
+ */
+	mfctr	r0
+	/* Get PTE (linux-style) and check access */
+	mfspr	r2,SPRG3
+	lwz	r2,PG_TABLES(r2)
+	tophys(r2,r2,r3)
+	mfspr	r3,IMISS
+	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
+	lwz	r2,0(r2)		/* get pmd entry */
+	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
+	beq-	InstructionAddressInvalid	/* return if no mapping */
+	tophys(r2,r2,r1)
+	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
+	lwz	r1,0(r2)		/* get linux-style pte */
+	/* setup access flags in r3 */
+	mfmsr	r3
+	rlwinm	r3,r3,32-13,30,30	/* MSR_PR -> _PAGE_USER */
+	ori	r3,r3,1			/* set _PAGE_PRESENT bit in access */
+	andc.	r3,r3,r1		/* check access & ~permission */
+	bne-	InstructionAddressInvalid /* return if access not permitted */
+	ori	r1,r1,0x100		/* set _PAGE_ACCESSED in pte */
+	stw	r1,0(r2)		/* update PTE (accessed bit) */
+	/* Convert linux-style PTE to low word of PPC-style PTE */
+	/* this computation could be done better -- Cort */
+	rlwinm	r3,r1,32-9,31,31	/* _PAGE_HWWRITE -> PP lsb */
+	rlwimi	r1,r1,32-1,31,31	/* _PAGE_USER -> PP (both bits now) */
+	ori	r3,r3,0xe04		/* clear out reserved bits */
+	andc	r1,r1,r3		/* PP=2 or 0, when _PAGE_HWWRITE */
+	mtspr	RPA,r1
+	mfspr	r3,IMISS
+	tlbli	r3
+	mfspr	r3,SRR1		/* Need to restore CR0 */
+	mtcrf	0x80,r3
+	rfi	
+#else
 	mfctr	r0		/* Need to save this - CTR can't be touched! */
 	mfspr	r2,HASH1	/* Get PTE pointer */
 	mfspr	r3,ICMP		/* Partial item compare value */
@@ -443,20 +477,25 @@
 	mfspr	r2,HASH2	/* Get hash table pointer */
 	ori	r3,r3,0x40	/* Set secondary hash */
 	b	00b		/* Try lookup again */
+#endif /* NO_RELOAD_HTAB */
 InstructionAddressInvalid:
 	mfspr	r3,SRR1
 	rlwinm	r1,r3,9,6,6	/* Get load/store bit */
-	addis	r1,r1,0x4000	/* Set bit 1 -> PTE not found */
-	mtspr	DSISR,r1
+#ifdef NO_RELOAD_HTAB
+	addis	r1,r1,0x2000
+#else
+	addis	r1,r1,0x4000	/* Set bit 1 -> PTE not found (in HTAB) */
+#endif /* NO_RELOAD_HTAB */
+	mtspr	DSISR,r1	/* (shouldn't be needed) */
 	mtctr	r0		/* Restore CTR */
 	andi.	r2,r3,0xFFFF	/* Clear upper bits of SRR1 */
 	or	r2,r2,r1
 	mtspr	SRR1,r2
 	mfspr	r1,IMISS	/* Get failing address */
 	rlwinm.	r2,r2,0,31,31	/* Check for little endian access */
-	beq	20f		/* Jump if big endian */
-	xori	r1,r1,3
-20:	mtspr	DAR,r1		/* Set fault address */
+	rlwimi	r2,r2,1,30,30	/* change 1 -> 3 */
+	xor	r1,r1,r2
+	mtspr	DAR,r1		/* Set fault address */
 	mfmsr	r0		/* Restore "normal" registers */
 	xoris	r0,r0,MSR_TGPR>>16
 	mtcrf	0x80,r3		/* Restore CR0 */
@@ -469,6 +508,48 @@
  */
 	. = 0x1100
 DataLoadTLBMiss:
+#ifdef NO_RELOAD_HTAB
+/*
+ * r0:	stored ctr
+ * r1:	linux style pte ( later becomes ppc hardware pte )
+ * r2:	ptr to linux-style pte
+ * r3:	scratch
+ */
+	mfctr	r0
+	/* Get PTE (linux-style) and check access */
+	mfspr	r2,SPRG3
+	lwz	r2,PG_TABLES(r2)		
+	tophys(r2,r2,r3)
+	mfspr	r3,DMISS
+	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
+	lwz	r2,0(r2)		/* get pmd entry */
+	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
+	beq-	DataAddressInvalid	/* return if no mapping */
+	tophys(r2,r2,r1)
+	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
+	lwz	r1,0(r2)		/* get linux-style pte */
+	/* setup access flags in r3 */
+	mfmsr	r3
+	rlwinm	r3,r3,32-13,30,30	/* MSR_PR -> _PAGE_USER */
+	ori	r3,r3,1			/* set _PAGE_PRESENT bit in access */
+	/* save r2 and use it as scratch for the andc. */	
+	andc.	r3,r3,r1		/* check access & ~permission */
+	bne-	DataAddressInvalid	/* return if access not permitted */
+	ori	r1,r1,0x100		/* set _PAGE_ACCESSED in pte */
+	stw	r1,0(r2)		/* update PTE (accessed bit) */
+	/* Convert linux-style PTE to low word of PPC-style PTE */
+	/* this computation could be done better -- Cort */
+	rlwinm	r3,r1,32-9,31,31	/* _PAGE_HWWRITE -> PP lsb */
+	rlwimi	r1,r1,32-1,31,31	/* _PAGE_USER -> PP (both bits now) */
+	ori	r3,r3,0xe04		/* clear out reserved bits */
+	andc	r1,r1,r3		/* PP=2 or 0, when _PAGE_HWWRITE */
+	mtspr	RPA,r1
+	mfspr	r3,DMISS
+	tlbld	r3
+	mfspr	r3,SRR1		/* Need to restore CR0 */
+	mtcrf	0x80,r3
+	rfi	
+#else
 	mfctr	r0		/* Need to save this - CTR can't be touched! */
 	mfspr	r2,HASH1	/* Get PTE pointer */
 	mfspr	r3,DCMP		/* Partial item compare value */
@@ -493,10 +574,15 @@
 	mfspr	r2,HASH2	/* Get hash table pointer */
 	ori	r3,r3,0x40	/* Set secondary hash */
 	b	00b		/* Try lookup again */
+#endif /* NO_RELOAD_HTAB */
 DataAddressInvalid:
 	mfspr	r3,SRR1
 	rlwinm	r1,r3,9,6,6	/* Get load/store bit */
+#ifdef NO_RELOAD_HTAB
+	addis	r1,r1,0x2000
+#else
 	addis	r1,r1,0x4000	/* Set bit 1 -> PTE not found */
+#endif /* NO_RELOAD_HTAB */
 	mtspr	DSISR,r1
 	mtctr	r0		/* Restore CTR */
 	andi.	r2,r3,0xFFFF	/* Clear upper bits of SRR1 */
@@ -518,6 +604,48 @@
  */
 	. = 0x1200
 DataStoreTLBMiss:
+#ifdef NO_RELOAD_HTAB
+/*
+ * r0:	stored ctr
+ * r1:	linux style pte ( later becomes ppc hardware pte )
+ * r2:	ptr to linux-style pte
+ * r3:	scratch
+ */
+	mfctr	r0
+	/* Get PTE (linux-style) and check access */
+	mfspr	r2,SPRG3
+	lwz	r2,PG_TABLES(r2)		
+	tophys(r2,r2,r3)
+	mfspr	r3,DMISS
+	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
+	lwz	r2,0(r2)		/* get pmd entry */
+	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
+	beq-	DataAddressInvalid	/* return if no mapping */
+	tophys(r2,r2,r1)
+	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
+	lwz	r1,0(r2)		/* get linux-style pte */
+	/* setup access flags in r3 */
+	mfmsr	r3
+	rlwinm	r3,r3,32-13,30,30	/* MSR_PR -> _PAGE_USER */
+	ori	r3,r3,0x5		/* _PAGE_PRESENT|_PAGE_RW */
+	/* save r2 and use it as scratch for the andc. */	
+	andc.	r3,r3,r1		/* check access & ~permission */
+	bne-	DataAddressInvalid	/* return if access not permitted */
+	ori	r1,r1,0x384		/* set _PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_RW|_PAGE_HWWRITE in pte */
+	stw	r1,0(r2)		/* update PTE (accessed bit) */
+	/* Convert linux-style PTE to low word of PPC-style PTE */
+	/* this computation could be done better -- Cort */
+	rlwinm	r3,r1,32-9,31,31	/* _PAGE_HWWRITE -> PP lsb */
+	rlwimi	r1,r1,32-1,31,31	/* _PAGE_USER -> PP (both bits now) */
+	ori	r3,r3,0xe04		/* clear out reserved bits */
+	andc	r1,r1,r3		/* PP=2 or 0, when _PAGE_HWWRITE */
+	mtspr	RPA,r1
+	mfspr	r3,DMISS
+	tlbld	r3
+	mfspr	r3,SRR1		/* Need to restore CR0 */
+	mtcrf	0x80,r3
+	rfi	
+#else	
 	mfctr	r0		/* Need to save this - CTR can't be touched! */
 	mfspr	r2,HASH1	/* Get PTE pointer */
 	mfspr	r3,DCMP		/* Partial item compare value */
@@ -542,6 +670,8 @@
 	mfspr	r2,HASH2	/* Get hash table pointer */
 	ori	r3,r3,0x40	/* Set secondary hash */
 	b	00b			/* Try lookup again */
+#endif /* NO_RELOAD_HTAB */
+	
 
 /* Instruction address breakpoint exception (on 603/604) */
 	STD_EXCEPTION(0x1300, Trap_13, InstructionBreakpoint)
@@ -596,17 +726,23 @@
 	SAVE_8GPRS(12, r21)
 	SAVE_8GPRS(24, r21)
 	andi.	r23,r23,MSR_PR
-	mfspr	r23,SPRG3		/* if from user, fix up tss */
+	mfspr	r23,SPRG3		/* if from user, fix up tss.regs */
 	beq	2f
 	addi	r24,r1,STACK_FRAME_OVERHEAD
 	stw	r24,PT_REGS(r23)
 2:	addi	r2,r23,-TSS		/* set r2 to current */
-	addis	r2,r2,KERNELBASE@h
+	tovirt(r2,r2,r23)
 	mflr	r23
 	andi.	r24,r23,0x3f00		/* get vector offset */
 	stw	r24,TRAP(r21)
 	li	r22,0
 	stw	r22,RESULT(r21)
+	mtspr	SPRG2,r22		/* r1 is now kernel sp */
+	addi	r24,r2,TASK_STRUCT_SIZE	/* check for kernel stack overflow */
+	cmplw	0,r1,r2
+	cmplw	1,r1,r24
+	crand	1,1,4
+	bgt	stack_ovf		/* if r2 < r1 < r2+TASK_STRUCT_SIZE */
 	lwz	r24,0(r23)		/* virtual address of handler */
 	lwz	r23,4(r23)		/* where to go when done */
 	mtspr	SRR0,r24
@@ -616,28 +752,67 @@
 	rfi				/* jump to handler, enable MMU */
 
 /*
+ * On kernel stack overflow, load up an initial stack pointer
+ * and call StackOverflow(regs), which should not return.
+ */
+stack_ovf:
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	lis	r1,init_task_union@ha
+	addi	r1,r1,init_task_union@l
+	addi	r1,r1,TASK_UNION_SIZE-STACK_FRAME_OVERHEAD
+	lis	r24,StackOverflow@ha
+	addi	r24,r24,StackOverflow@l
+	li	r20,MSR_KERNEL
+	mtspr	SRR0,r24
+	mtspr	SRR1,r20
+	SYNC
+	rfi
+
+/*
  * Continuation of the floating-point unavailable handler.
  */
 load_up_fpu:
-	bl	giveup_fpu_unmapped
-	ori	r23,r23,MSR_FP		/* enable use of FP after return */
+
+/*
+ * Disable FP for the task which had the FPU previously,
+ * and save its floating-point registers in its thread_struct.
+ * Enables the FPU for use in the kernel on return.
+ */
+#ifndef CONFIG_APUS
+	lis	r6,-KERNELBASE@h
+#else
+	lis	r6,CYBERBASEp@h
+	lwz	r6,0(r6)
+#endif
+	addis	r3,r6,last_task_used_math@ha
+	lwz	r4,last_task_used_math@l(r3)
+	mfmsr	r5
+	ori	r5,r5,MSR_FP
+	SYNC
+	mtmsr	r5			/* enable use of fpu now */
+	SYNC
+	cmpi	0,r4,0
+	beq	1f
+	add	r4,r4,r6
+	addi	r4,r4,TSS	        /* want TSS of last_task_used_math */
+	SAVE_32FPRS(0, r4)
+	mffs	fr0
+	stfd	fr0,TSS_FPSCR-4(r4)
+	lwz	r5,PT_REGS(r4)
+	add	r5,r5,r6
+	lwz	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	li	r20,MSR_FP
+	andc	r4,r4,r20		/* disable FP for previous task */
+	stw	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+
+1:	ori	r23,r23,MSR_FP		/* enable use of FP after return */
 	mfspr	r5,SPRG3		/* current task's TSS (phys) */
 	lfd	fr0,TSS_FPSCR-4(r5)
 	mtfsf	0xff,fr0
 	REST_32FPRS(0, r5)
-
-/* use last_task_used_math instead of fpu_tss */
-	lis	r3,last_task_used_math@ha
-	addis	r3,r3,-KERNELBASE@h
 	subi	r4,r5,TSS
-	addis	r4,r4,KERNELBASE@h
+	sub	r4,r4,r6
 	stw	r4,last_task_used_math@l(r3)
-#if 0	
-	lis	r3,fpu_tss@ha
-	addis	r4,r5,KERNELBASE@h
-	addis	r3,r3,-KERNELBASE@h
-	stw	r4,fpu_tss@l(r3)
-#endif	
 	/* restore registers and return */
 	lwz	r3,_CCR(r21)
 	lwz	r4,_LINK(r21)
@@ -672,27 +847,42 @@
  * physical address of the hash table are known.  These definitions
  * of Hash_base and Hash_bits below are just an example.
  */
+/*
+ * Note that the 603s won't come here, since the 603
+ * loads tlb directly into the tlb from the linux tables, while
+ * others (601, 604, etc.) call hash_page() to load entries from
+ * the linux tables into the hash table.  -- Cort
+ */	
 Hash_base = 0x180000
 Hash_bits = 12				/* e.g. 256kB hash table */
 Hash_msk = (((1 << Hash_bits) - 1) * 64)
 
 	.globl	hash_page
 hash_page:
+#ifdef __SMP__
+	lis	r6,hash_table_lock@h
+	ori	r6,r6,hash_table_lock@l
+	tophys(r6,r6,r2)
+1011:	lwarx	r0,0,r6
+	stwcx.	r6,0,r6
+	bne-	1011b
+	cmpi	0,r0,0
+	bne	1011b
+#endif /* __SMP__ */	
 	/* Get PTE (linux-style) and check access */
-	lwz	r5,MM-TSS(r5)		
-	addis	r5,r5,-KERNELBASE@h	/* get physical current->mm */
-	lwz	r5,PGD(r5)		/* get current->mm->pgd */	
-	addis	r5,r5,-KERNELBASE@h	/* convert to phys addr */
+	lwz	r5,PG_TABLES(r5)		
+	tophys(r5,r5,r2)		/* convert to phys addr */
 	rlwimi	r5,r3,12,20,29		/* insert top 10 bits of address */
 	lwz	r5,0(r5)		/* get pmd entry */
 	rlwinm.	r5,r5,0,0,19		/* extract address of pte page */
-	beqlr-				/* return if no mapping */
-	addis	r2,r5,-KERNELBASE@h
+	beq-	hash_page_out		/* return if no mapping */
+	tophys(r2,r5,r2)
 	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
 	lwz	r6,0(r2)		/* get linux-style pte */
 	ori	r4,r4,1			/* set _PAGE_PRESENT bit in access */
 	andc.	r0,r4,r6		/* check access & ~permission */
-	bnelr-				/* return if access not permitted */
+	bne-	hash_page_out		/* return if access not permitted */
+
 	ori	r6,r6,0x100		/* set _PAGE_ACCESSED in pte */
 	rlwinm	r5,r4,5,24,24		/* _PAGE_RW access -> _PAGE_DIRTY */
 	rlwimi	r5,r4,7,22,22		/* _PAGE_RW -> _PAGE_HWWRITE */
@@ -752,7 +942,7 @@
 10:	mtctr	r2
 	addi	r3,r4,-8		/* search primary PTEG */
 1:	lwzu	r0,8(r3)		/* get next PTE */
-	cmpi	0,r0,0			/* empty? */
+	srwi.	r0,r0,31		/* only want to check valid bit */
 	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
 	beq+	found_empty
 
@@ -765,24 +955,71 @@
 	addi	r3,r3,-8
 	mtctr	r2
 2:	lwzu	r0,8(r3)
-	cmpi	0,r0,0
+	srwi.	r0,r0,31		/* only want to check valid bit */
 	bdnzf	2,2b
 	beq+	found_empty
 
 	/* Choose an arbitrary slot in the primary PTEG to overwrite */
+#if 0
 	xori	r5,r5,0x40		/* clear H bit again */
 	lwz	r2,next_slot@l(0)
 	addi	r2,r2,8
 	andi.	r2,r2,0x38
 	stw	r2,next_slot@l(0)
 	add	r3,r4,r2
+#else
+	/* now, allow 2nd hash as well as 1st */
+	lwz	r2,next_slot@l(0)
+	addi	r2,r2,8
+	andi.	r2,r2,0x78
+	stw	r2,next_slot@l(0)
+	cmpi	0,0,r2,0x38             /* if it's the 2nd hash */
+	bgt	second_evict
+first_evict:
+	xori	r5,r5,0x40		/* clear H bit again */
+	add	r3,r4,r2
+	b	11f
+second_evict:
+	.globl	hash_page_patch_D
+hash_page_patch_D:	
+	xoris	r3,r4,Hash_msk>>16	/* compute secondary hash */
+	xori	r3,r3,0xffc0
+	subi	r2,r2,0x40
+	addi	r3,r3,r2
+#endif
+11:		
+	/* update counter of evicted pages */
+	lis	r2,htab_evicts@h
+	ori	r2,r2,htab_evicts@l
+	tophys(r2,r2,r4)
+	lwz	r4,0(r2)
+	addi	r4,r4,1
+	stw	r4,0(r2)
 
 	/* Store PTE in PTEG */
 found_empty:
 	stw	r5,0(r3)
 found_slot:
 	stw	r6,4(r3)
+
+/*
+ * Update the hash table miss count.  We only want misses here
+ * that _are_ valid addresses and have a pte otherwise we don't
+ * count it as a reload.  do_page_fault() takes care of bad addrs
+ * and entries that need linux-style pte's created.
+ *
+ * safe to use r2 here since we're not using it as current yet 
+ * update the htab misses count
+ *   -- Cort
+ */
+	lis	r2,htab_reloads@h
+	ori	r2,r2,htab_reloads@l
+	tophys(r2,r2,r3)
+	lwz	r3,0(r2)
+	addi	r3,r3,1
+	stw	r3,0(r2)
 	SYNC
+
 	/* Return from the exception */
 	lwz	r3,_CCR(r21)
 	lwz	r4,_LINK(r21)
@@ -790,6 +1027,13 @@
 	mtcrf	0xff,r3
 	mtlr	r4
 	mtctr	r5
+#ifdef __SMP__
+	lis	r5,hash_table_lock@h
+	ori	r5,r5,hash_table_lock@l
+	tophys(r5,r5,r6)
+	li	r6,0
+	stw	r6,0(r5)
+#endif /* __SMP__ */	
 	REST_GPR(0, r21)
 	REST_2GPRS(1, r21)
 	REST_4GPRS(3, r21)
@@ -801,10 +1045,28 @@
 	lwz	r21,GPR21(r21)
 	SYNC
 	rfi
-
+	
+hash_page_out:
+#ifdef __SMP__
+	lis	r5,hash_table_lock@h
+	ori	r5,r5,hash_table_lock@l
+	tophys(r5,r5,r6)
+	li	r6,0
+	stw	r6,0(r5)
+#endif /* __SMP__ */	
+	blr
 next_slot:
 	.long	0
 
+#ifdef CONFIG_APUS
+	/* On APUS the first 0x4000 bytes of the kernel	will be mapped
+	 * at a different physical address than the rest. For this
+	 * reason, the exception code cannot use relative branches to
+	 * access the code below.
+	 */
+	. = 0x4000
+#endif
+
 /*
  * This is where the main kernel code starts.
  */
@@ -831,23 +1093,24 @@
 10:
 	sync
 	mtspr	HID0,r8			/* enable and invalidate caches */
+	sync
 	mtspr	HID0,r11		/* enable caches */
 	sync
 	isync
 	cmpi	0,r9,4			/* check for 604 */
 	cmpi	1,r9,9			/* or 604e */
+	cmpi	2,r9,10			/* or mach5 */
 	cror	2,2,6
+	cror	2,2,10
 	bne	4f
 	ori	r11,r11,HID0_SIED|HID0_BHTE /* for 604[e], enable */
-	mtspr	HID0,r11		/* superscalar exec & br history tbl */
+	bne	2,5f
+	ori	r11,r11,HID0_BTCD
+5:	mtspr	HID0,r11		/* superscalar exec & br history tbl */
 4:
 	/* ptr to current */
 	lis	r2,init_task_union@h
 	ori	r2,r2,init_task_union@l
-	/* ptr to phys current tss */
-	addis	r11,r2,-KERNELBASE@h
-	addi	r11,r11,TSS	/* init task's TSS */
-	mtspr	SPRG3,r11
 	/* stack */
 	addi	r1,r2,TASK_UNION_SIZE
 	li	r0,0
@@ -869,10 +1132,14 @@
 	bdnz	3b
 2:
 /*
- * Initialize the prom stuff and the MMU.
+ * Decide what sort of machine this is and initialize the MMU.
  */
+	mr	r3,r31
+	mr	r4,r30
+	mr	r5,r29
+	mr	r6,r28
+	mr	r7,r27
 	bl	identify_machine
-	bl	prom_init
 	bl	MMU_init
 
 /*
@@ -882,10 +1149,10 @@
  */
 	lis	r6,_SDR1@ha
 	lwz	r6,_SDR1@l(r6)
-	li	r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
 	lis	r4,2f@h
-	addis	r4,r4,-KERNELBASE@h
 	ori	r4,r4,2f@l
+	tophys(r4,r4,r3)
+	li	r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
 	mtspr	SRR0,r4
 	mtspr	SRR1,r3
 	rfi
@@ -902,38 +1169,55 @@
 	addi	r3,r3,1		/* increment VSID */
 	addis	r4,r4,0x1000	/* address of next segment */
 	bdnz	3b
-
-	lis	r3,_machine@ha
-	addis	r3,r3,-KERNELBASE@h
-	lwz	r0,_machine@l(r3)
-	cmpi	0,r0,_MACH_Pmac
-	beq	99f
 	
-/* on prep reload the bats now that MMU_init() has setup them up -- Cort */
-	LOAD_BATS(r3,r14)
-	b	100f
-	
-/* on pmac clear the bats out */	
-99:	li	r0,0		/* zot the BATs */
-#if 1
-	mtspr	IBAT0U,r0
-	mtspr	IBAT0L,r0
-	mtspr	DBAT0U,r0
-	mtspr	DBAT0L,r0
-#endif
-	mtspr	IBAT1U,r0
-	mtspr	IBAT1L,r0
-	mtspr	DBAT1U,r0
-	mtspr	DBAT1L,r0
-	mtspr	IBAT2U,r0
-	mtspr	IBAT2L,r0
-	mtspr	DBAT2U,r0
-	mtspr	DBAT2L,r0
-	mtspr	IBAT3U,r0
-	mtspr	IBAT3L,r0
-	mtspr	DBAT3U,r0
-	mtspr	DBAT3L,r0
-100:	
+/* Load the BAT registers with the values set up by MMU_init.
+   MMU_init takes care of whether we're on a 601 or not. */
+	lis	r3,BATS@ha
+	addi	r3,r3,BATS@l
+	tophys(r3,r3,r4)
+	LOAD_BAT(0,0,r3,r4,r5)
+	LOAD_BAT(1,16,r3,r4,r5)
+	LOAD_BAT(2,32,r3,r4,r5)
+	LOAD_BAT(3,48,r3,r4,r5)
+
+/* Set up for using our exception vectors */
+	/* ptr to phys current tss */
+	tophys(r4,r2,r4)
+	addi	r4,r4,TSS	/* init task's TSS */
+	mtspr	SPRG3,r4
+	li	r3,0
+	mtspr	SPRG2,r3	/* 0 => r1 has kernel sp */
+
+/* On CHRP copy exception vectors down to 0 */
+	lis	r5,_stext@ha
+	addi	r5,r5,_stext@l
+	addis	r5,r5,-KERNELBASE@h
+	cmpwi	0,r5,0
+	beq	77f		/* vectors are already at 0 */
+	li	r3,0x1000
+	mtctr	r3
+	li	r4,-4
+	addi	r5,r5,-4
+74:	lwzu	r0,4(r5)
+	stwu	r0,4(r4)
+	bdnz	74b
+	/* need to flush/invalidate caches too */
+	li	r3,0x4000/CACHE_LINE_SIZE
+	li	r4,0
+	mtctr	r3
+73:	dcbst	0,r4
+	addi	r4,r4,CACHE_LINE_SIZE
+	bdnz	73b
+	sync
+	li	r4,0
+	mtctr	r3
+72:	icbi	0,r4
+	addi	r4,r4,CACHE_LINE_SIZE
+	bdnz	72b
+	sync
+	isync
+77:
+
 /* Now turn on the MMU for real! */
 	li	r4,MSR_KERNEL
 	lis	r3,start_kernel@h
@@ -947,25 +1231,25 @@
 reset_SDR1:
 	lis	r6,_SDR1@ha
 	lwz	r6,_SDR1@l(r6)
-	mfmsr	r3
-	li	r4,MSR_IR|MSR_DR
-	andc	r3,r3,r4
+	mfmsr	r5
+	li	r4,0
+	ori	r4,r4,MSR_EE|MSR_IR|MSR_DR
+	andc	r3,r5,r4
 	lis	r4,2f@h
-	addis	r4,r4,-KERNELBASE@h
 	ori	r4,r4,2f@l
+	tophys(r4,r4,r5)
 	mtspr	SRR0,r4
 	mtspr	SRR1,r3
 	rfi
 2:	/* load new SDR1 */
-	tlbia			
+	tlbia
 	mtspr	SDR1,r6
 	/* turn the mmu back on */
-	li	r4,MSR_KERNEL
 	mflr	r3
 	mtspr	SRR0,r3
-	mtspr	SRR1,r4
+	mtspr	SRR1,r5
 	rfi
-	
+
 /*
  * FP unavailable trap from kernel - print a message, but let
  * the task use FP in the kernel until it returns to user mode.
@@ -987,19 +1271,10 @@
  * Disable FP for the task which had the FPU previously,
  * and save its floating-point registers in its thread_struct.
  * Enables the FPU for use in the kernel on return.
- * (If giveup_fpu_unmapped uses any integer registers other than
- * r3 - r6, the return code at load_up_fpu above will have
- * to be adjusted.)
  */
-giveup_fpu_unmapped:
-	lis	r6,-KERNELBASE@h
-	b	1f
-
 	.globl	giveup_fpu
 giveup_fpu:
-	li	r6,0
-1:
-	addis	r3,r6,last_task_used_math@ha
+	lis	r3,last_task_used_math@ha
 	lwz	r4,last_task_used_math@l(r3)
 	mfmsr	r5
 	ori	r5,r5,MSR_FP
@@ -1008,7 +1283,6 @@
 	SYNC
 	cmpi	0,r4,0
 	beqlr-				/* if no previous owner, done */
-	add	r4,r4,r6
 	addi	r4,r4,TSS	        /* want TSS of last_task_used_math */
 	li	r5,0
 	stw	r5,last_task_used_math@l(r3)
@@ -1016,7 +1290,6 @@
 	mffs	fr0
 	stfd	fr0,TSS_FPSCR-4(r4)
 	lwz	r5,PT_REGS(r4)
-	add	r5,r5,r6
 	lwz	r3,_MSR-STACK_FRAME_OVERHEAD(r5)
 	li	r4,MSR_FP
 	andc	r3,r3,r4		/* disable FP for previous task */
@@ -1104,12 +1377,12 @@
 	b	22b
 /* sys_sigreturn */
 10:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	_EXTERN(sys_sigreturn)
+	bl	sys_sigreturn
 	cmpi	0,r3,0		/* Check for restarted system call */
 	bge	int_return
 	b	20b
 /* Traced system call support */
-50:	bl	_EXTERN(syscall_trace)
+50:	bl	syscall_trace
 	lwz	r0,GPR0(r1)	/* Restore original registers */
 	lwz	r3,GPR3(r1)
 	lwz	r4,GPR4(r1)
@@ -1144,7 +1417,7 @@
 	oris	r10,r10,0x1000
 	stw	r10,_CCR(r1)
 60:	stw	r3,GPR3(r1)	/* Update return value */
-	bl	_EXTERN(syscall_trace)
+	bl	syscall_trace
 	b	int_return
 66:	li	r3,ENOSYS
 	b	52b
@@ -1198,7 +1471,7 @@
 	stw	r0,TRAP(r1)
 	stw	r1,KSP(r3)	/* Set old stack pointer */
 	sync
-	addis	r0,r4,-KERNELBASE@h
+	tophys(r0,r4,r3)
 	mtspr	SPRG3,r0	/* Update current TSS phys addr */
 	SYNC
 	lwz	r1,KSP(r4)	/* Load new stack pointer */
@@ -1220,6 +1493,8 @@
 /*
  * Trap exit.
  */
+	.globl	ret_from_syscall
+ret_from_syscall:	
 	.globl	int_return
 int_return:
 0:	mfmsr	r30		/* Disable interrupts */
@@ -1245,34 +1520,31 @@
 	lwz	r5,bh_active@l(r5)
 	and.	r4,r4,r5
 	beq+	2f
-	ori	r31,r30,MSR_EE	/* re-enable interrupts */
-	SYNC
-	mtmsr	r31
-	SYNC
-	bl	_EXTERN(do_bottom_half)
+	bl	do_bottom_half
 	SYNC
 	mtmsr	r30		/* disable interrupts again */
 	SYNC
 2:	lwz	r3,_MSR(r1)	/* Returning to user mode? */
 	andi.	r3,r3,MSR_PR
-	beq+	10f		/* no - no need to mess with stack */
+	beq+	10f		/* if so, check need_resched and signals */
 	lis	r3,need_resched@ha
 	lwz	r3,need_resched@l(r3)
 	cmpi	0,r3,0		/* check need_resched flag */
 	beq+	7f
-	bl	_EXTERN(schedule)
+	bl	schedule
 	b	0b
-7:	lwz	r3,BLOCKED(r2)	/* Check for pending unblocked signals */
-	lwz	r5,SIGNAL(r2)
-	andc.	r0,r5,r3	/* Lets thru any unblocked */
+7:	lwz	r5,SIGPENDING(r2) /* Check for pending unblocked signals */
+	cmpwi	0,r5,0
 	beq+	8f
+	li	r3,0
 	addi	r4,r1,STACK_FRAME_OVERHEAD
-	bl	_EXTERN(do_signal)
+	bl	do_signal
 	b	0b
 8:	addi	r4,r1,INT_FRAME_SIZE+STACK_UNDERHEAD	/* size of frame */
 	stw	r4,TSS+KSP(r2)	/* save kernel stack pointer */
-10:
-	lwz	r2,_CTR(r1)
+	tophys(r3,r1,r3)
+	mtspr	SPRG2,r3	/* phys exception stack pointer */
+10:	lwz	r2,_CTR(r1)
 	lwz	r0,_LINK(r1)
 	mtctr	r2
 	mtlr	r0
@@ -1355,8 +1627,6 @@
  *
  * flush_icache_range(unsigned long start, unsigned long stop)
  */
-CACHE_LINE_SIZE = 32
-LG_CACHE_LINE_SIZE = 5
 _GLOBAL(flush_icache_range)
 	mfspr	r5,PVR
 	rlwinm	r5,r5,16,16,31
@@ -1417,6 +1687,28 @@
  * given.
  */
 _GLOBAL(flush_hash_segments)
+#ifdef NO_RELOAD_HTAB
+/*
+ * Bitmask of PVR numbers of 603-like chips,
+ * for which we don't use the hash table at all.
+ */
+#define PVR_603_LIKE	0x13000000	/* bits 3, 6, 7 set */
+
+	mfspr	r0,PVR
+	rlwinm	r0,r0,16,27,31
+	lis	r9,PVR_603_LIKE@h
+	rlwnm.	r0,r9,r0,0,0
+	bne	99f
+#endif /* NO_RELOAD_HTAB */
+#ifdef __SMP__
+	lis	r6,hash_table_lock@h
+	ori	r6,r6,hash_table_lock@l
+1011:	lwarx	r0,0,r6
+	stwcx.	r6,0,r6
+	bne-	1011b
+	cmpi	0,r0,0
+	bne	1011b
+#endif /* __SMP__ */	
 	rlwinm	r3,r3,7,1,24		/* put VSID lower limit in position */
 	oris	r3,r3,0x8000		/* set V bit */
 	rlwinm	r4,r4,7,1,24		/* put VSID upper limit in position */
@@ -1438,7 +1730,13 @@
 	stw	r0,0(r5)		/* invalidate entry */
 2:	bdnz	1b			/* continue with loop */
 	sync
-	tlbia
+#ifdef __SMP__
+	lis	r5,hash_table_lock@h
+	ori	r5,r5,hash_table_lock@l
+	li	r6,0
+	stw	r6,0(r5)
+#endif /* __SMP__ */	
+99:	tlbia
 	isync
 	blr
 
@@ -1448,6 +1746,22 @@
  * flush_hash_page(unsigned context, unsigned long va)
  */
 _GLOBAL(flush_hash_page)
+#ifdef NO_RELOAD_HTAB
+	mfspr	r0,PVR
+	rlwinm	r0,r0,16,27,31
+	lis	r9,PVR_603_LIKE@h
+	rlwnm.	r0,r9,r0,0,0
+	bne	99f
+#endif /* NO_RELOAD_HTAB */		
+#ifdef __SMP__
+	lis	r6,hash_table_lock@h
+	ori	r6,r6,hash_table_lock@l
+1011:	lwarx	r0,0,r6
+	stwcx.	r6,0,r6
+	bne-	1011b
+	cmpi	0,r0,0
+	bne	1011b
+#endif /* __SMP__ */	
 	rlwinm	r3,r3,11,1,20		/* put context into vsid */
 	rlwimi	r3,r4,11,21,24		/* put top 4 bits of va into vsid */
 	oris	r3,r3,0x8000		/* set V (valid) bit */
@@ -1480,7 +1794,13 @@
 3:	li	r0,0
 	stw	r0,0(r7)		/* invalidate entry */
 4:	sync
-	tlbie	r4			/* in hw tlb too */
+#ifdef __SMP__
+	lis	r5,hash_table_lock@h
+	ori	r5,r5,hash_table_lock@l
+	li	r6,0
+	stw	r6,0(r5)
+#endif /* __SMP__ */	
+99:	tlbie	r4			/* in hw tlb too */
 	isync
 	blr
 
@@ -1491,200 +1811,221 @@
 	blr
 
 /*
- * These exception handlers are used when we have called a prom
- * routine after we have taken over the exception vectors and MMU.
+ * On CHRP, the Run-Time Abstraction Services (RTAS) have to be
+ * called with the MMU off.
  */
-	.globl	prom_exc_table
-prom_exc_table:
-	.long	TOPHYS(prom_exception)		/* 0 */
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)		/* 400 */
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)		/* 800 */
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)		/* c00 */
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)		/* 1000 */
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)		/* 1400 */
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)		/* 1800 */
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)		/* 1c00 */
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)		/* 1000 */
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)		/* 1400 */
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)		/* 1800 */
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)		/* 1c00 */
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-	.long	TOPHYS(prom_exception)
-
-/*
- * When we come in to these prom exceptions, r1 and lr have been
- * saved in sprg1 and sprg2, and lr points to a word containing
- * the vector offset.
- */
-prom_exception:
-	mr	r1,r21			/* save r21 */
-	lis	r21,prom_sp@ha		/* get a stack to use */
-	addis	r21,r21,-KERNELBASE@h
-	lwz	r21,prom_sp@l(r21)
-	addis	r21,r21,-KERNELBASE@h	/* convert to physical addr */
-	subi	r21,r21,INT_FRAME_SIZE+STACK_UNDERHEAD
-	stw	r0,GPR0(r21)
-	stw	r2,GPR2(r21)
-	stw	r3,GPR3(r21)
-	stw	r4,GPR4(r21)
-	stw	r5,GPR5(r21)
-	stw	r6,GPR6(r21)
-	stw	r20,GPR20(r21)
-	stw	r1,GPR21(r21)
-	stw	r22,GPR22(r21)
-	stw	r23,GPR23(r21)
-	mfspr	r1,SPRG1
-	stw	r1,GPR1(r21)
-	mfcr	r3
-	mfspr	r4,SPRG2
-	stw	r3,_CCR(r21)
-	stw	r4,_LINK(r21)
-	mfctr	r3
-	mfspr	r4,XER
-	stw	r3,_CTR(r21)
-	stw	r4,_XER(r21)
-	mfspr	r22,SRR0
-	mfspr	r23,SRR1
-
-	/* at this point we have set things up pretty much exactly
-	   how EXCEPTION_PROLOG does */
-	mflr	r3
-	lwz	r3,0(r3)		/* get exception vector */
-	stw	r3,TRAP(r21)
-	cmpi	0,r3,0x300		/* was it a dsi? */
-	bne	1f
-
-	mfspr	r20,DSISR		/* here on data access exc. */
-	andis.	r0,r20,0x8470		/* weird error? */
-	bne	3f			/* if not, try to put a PTE */
-	mfspr	r3,DAR			/* into the hash table */
-	rlwinm	r4,r23,32-13,30,30	/* MSR_PR -> _PAGE_USER */
-	rlwimi	r4,r20,32-23,29,29	/* DSISR_STORE -> _PAGE_RW */
-	b	2f
-
-1:	cmpi	0,r3,0x400		/* was it an isi? */
-	bne	3f
-	andis.	r0,r23,0x4000		/* if so, check if no pte found */
-	beq	3f			/* if so, try to put a PTE */
-	mr	r3,r22			/* into the hash table */
-	rlwinm	r4,r23,32-13,30,30	/* MSR_PR -> _PAGE_USER */
-	mr	r20,r23			/* SRR1 has reason bits */
-2:	lis	r5,prom_tss@ha		/* phys addr of TSS */
-	addis	r5,r5,-KERNELBASE@h
-	lwz	r5,prom_tss@l(r5)
-	bl	hash_page
-
-3:	addis	r1,r21,KERNELBASE@h	/* restore kernel stack ptr */
-	addi	r3,r1,INT_FRAME_SIZE+STACK_UNDERHEAD
-	stw	r3,0(r21)		/* set stack chain pointer */
-	lis	r5,prom_tss@ha
-	addis	r5,r5,-KERNELBASE@h
-	lwz	r5,prom_tss@l(r5)
-	mtspr	SPRG3,r5		/* reset phys TSS pointer */
-	lwz	r4,TRAP(r21)		/* the real exception vector */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	li	r20,MSR_KERNEL
-	bl	transfer_to_handler
-	.long	PromException
-	.long	prom_int_return
-
-	.comm	prom_sp,4
-	.comm	prom_tss,4
-
-	.globl	prom_int_return
-prom_int_return:
-	lis	r3,prom_exc_table@ha	/* restore sprg3 for prom vectors */
-	addi	r3,r3,prom_exc_table@l
+	.globl	enter_rtas
+enter_rtas:
+	stwu	r1,-16(r1)
+	mflr	r0
+	stw	r0,20(r1)
 	addis	r3,r3,-KERNELBASE@h
-	mtspr	SPRG3,r3
-	b	int_return
+	lis	r4,rtas_data@ha
+	lwz	r4,rtas_data@l(r4)
+	lis	r6,1f@ha	/* physical return address for rtas */
+	addi	r6,r6,1f@l
+	addis	r6,r6,-KERNELBASE@h
+	subi	r7,r1,INT_FRAME_SIZE+STACK_UNDERHEAD
+	addis	r7,r7,-KERNELBASE@h
+	lis	r8,rtas_entry@ha
+	lwz	r8,rtas_entry@l(r8)
+	mfmsr	r9
+	stw	r9,8(r1)
+	li	r0,0
+	ori	r0,r0,MSR_EE|MSR_SE|MSR_BE
+	andc	r0,r9,r0
+	andi.	r9,r9,MSR_ME|MSR_RI
+	sync			/* disable interrupts so SRR0/1 */
+	mtmsr	r0		/* don't get trashed */
+	mtlr	r6
+	mtspr	SPRG2,r7
+	mtspr	SRR0,r8
+	mtspr	SRR1,r9
+	rfi
+1:	addis	r9,r1,-KERNELBASE@h
+	lwz	r8,20(r9)	/* get return address */
+	lwz	r9,8(r9)	/* original msr value */
+	li	r0,0
+	mtspr	SPRG2,r0
+	mtspr	SRR0,r8
+	mtspr	SRR1,r9
+	rfi			/* return to caller */
 
+	
+	.globl amhere
+amhere:	.long 0
+	
+#ifdef __SMP__
 /*
- * When entering the prom, we have to change to using a different
- * set of exception vectors.
+ * Secondary processor begins executing here.
  */
-	.globl	enter_prom
-enter_prom:
-	stwu	r1,-32(r1)
-	mflr	r0
-	stw	r0,36(r1)
-	stw	r29,20(r1)
-	stw	r30,24(r1)
-	stw	r31,28(r1)
-	lis	r8,prom_entry@ha
-	lwz	r8,prom_entry@l(r8)
-	mfmsr	r31
-	andi.	r0,r31,MSR_IP		/* using our own vectors yet? */
-	beq	1f			/* if so, have to switch */
-	mtlr	r8
-	blrl				/* if not, can just charge ahead */
-	b	2f
-1:	lis	r9,prom_sp@ha		/* save sp for exception handler */
-	stw	r1,prom_sp@l(r9)
-	mfspr	r29,SPRG3		/* save physical tss pointer */
-	lis	r9,prom_tss@ha
-	stw	r29,prom_tss@l(r9)
-	li	r9,0
-	ori	r9,r9,MSR_EE
-	andc	r30,r31,r9
-	lis	r9,prom_exc_table@ha	/* set pointer to exception table */
-	addi	r9,r9,prom_exc_table@l
-	addis	r9,r9,-KERNELBASE@h
-	ori	r0,r31,MSR_IP
+	.globl	secondary_entry
+secondary_entry:
+	lis	r0,amhere@h
+	ori	r0,r0,amhere@l
+	addis	r0,r0,-KERNELBASE@h
+	stw	r0,0(r0)
 	sync
-	mtmsr	r30			/* disable interrupts */
-	mtspr	SPRG3,r9		/* while we update MSR_IP and sprg3 */
+	isync	
+	/* just like __start() with a few changes -- Cort */
+	mfspr	r9,PVR
+	rlwinm	r9,r9,16,16,31		/* r9 = 1 for 601, 4 for 604 */
+	cmpi	0,r9,1
+	lis	r11,KERNELBASE@h
+	bne	4f
+	ori	r11,r11,4		/* set up BAT registers for 601 */
+	li	r8,0x7f
+	oris	r9,r11,0x800000@h	/* set up BAT reg for 2nd 8M */
+	oris	r10,r8,0x800000@h	/* set up BAT reg for 2nd 8M */
+	mtspr	IBAT1U,r9
+	mtspr	IBAT1L,r10
+	b	5f
+4:	ori	r11,r11,0x1ff		/* set up BAT registers for 604 */
+	li	r8,2
+	mtspr	DBAT0U,r11
+	mtspr	DBAT0L,r8
+5:	mtspr	IBAT0U,r11
+	mtspr	IBAT0L,r8
+	isync
+/*
+ * we now have the 1st 16M of ram mapped with the bats.
+ * prep needs the mmu to be turned on here, but pmac already has it on.
+ * this shouldn't bother the pmac since it just gets turned on again
+ * as we jump to our code at KERNELBASE. -- Cort
+ */
+	mfmsr	r0
+	ori	r0,r0,MSR_DR|MSR_IR
+	mtspr	SRR1,r0
+	lis	r0,100f@h
+	ori	r0,r0,100f@l
+	mtspr	SRR0,r0
+	SYNC
+	rfi				/* enables MMU */
+100:
+	/*
+	 * Enable caches and 604-specific features if necessary.
+	 */
+	mfspr	r9,PVR
+	rlwinm	r9,r9,16,16,31
+	cmpi	0,r9,1
+	beq	4f			/* not needed for 601 */
+	mfspr	r11,HID0
+	andi.	r0,r11,HID0_DCE
+	ori	r11,r11,HID0_ICE|HID0_DCE
+	ori	r8,r11,HID0_ICFI
+	bne	3f			/* don't invalidate the D-cache */
+	ori	r8,r8,HID0_DCI		/* unless it wasn't enabled */
+3:
+	/* turn on dpm for 603 */
+	cmpi	0,r9,3
+	bne	10f
+	oris	r11,r11,HID0_DPM@h
+10:
 	sync
-	mtmsr	r0			/* start using exc. vectors in prom */
-	mtlr	r8
-	blrl				/* call prom */
+	mtspr	HID0,r8			/* enable and invalidate caches */
 	sync
-	mtmsr	r30			/* disable interrupts again */
-	mtspr	SPRG3,r29		/* while we restore MSR_IP and sprg3 */
+	mtspr	HID0,r11		/* enable caches */
 	sync
-	mtmsr	r31			/* reenable interrupts */
-2:	lwz	r0,36(r1)
-	mtlr	r0
-	lwz	r29,20(r1)
-	lwz	r30,24(r1)
-	lwz	r31,28(r1)
-	lwz	r1,0(r1)
-	blr
+	isync
+	cmpi	0,r9,4			/* check for 604 */
+	cmpi	1,r9,9			/* or 604e */
+	cmpi	2,r9,10			/* or mach5 */
+	cror	2,2,6
+	cror	2,2,10
+	bne	4f
+	ori	r11,r11,HID0_SIED|HID0_BHTE /* for 604[e], enable */
+	bne	2,5f
+	ori	r11,r11,HID0_BTCD
+5:	mtspr	HID0,r11		/* superscalar exec & br history tbl */
+4:
+	/* get ptr to current */
+	lis	r2,current_set@h
+	ori	r2,r2,current_set@l
+	/* assume we're second processor for now */
+	lwz	r2,4(r2)
+	/* stack */
+	addi	r1,r2,TASK_UNION_SIZE
+	li	r0,0
+	stwu	r0,-STACK_FRAME_OVERHEAD(r1)
+		
+/*
+ * init_MMU on the first processor has setup the variables
+ * for us - all we need to do is load them -- Cort 
+ */
+	
+/*
+ * Go back to running unmapped so we can load up new values
+ * for SDR1 (hash table pointer) and the segment registers
+ * and change to using our exception vectors.
+ */
+	lis	r6,_SDR1@ha
+	lwz	r6,_SDR1@l(r6)
+	lis	r4,2f@h
+	ori	r4,r4,2f@l
+	tophys(r4,r4,r3)
+	li	r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
+	mtspr	SRR0,r4
+	mtspr	SRR1,r3
+	rfi
+/* Load up the kernel context */
+2:
+	SYNC			/* Force all PTE updates to finish */
+	tlbia			/* Clear all TLB entries */
+	mtspr	SDR1,r6
+	li	r0,16		/* load up segment register values */
+	mtctr	r0		/* for context 0 */
+	lis	r3,0x2000	/* Ku = 1, VSID = 0 */
+	li	r4,0
+3:	mtsrin	r3,r4
+	addi	r3,r3,1		/* increment VSID */
+	addis	r4,r4,0x1000	/* address of next segment */
+	bdnz	3b
+	
+/* Load the BAT registers with the values set up by MMU_init.
+   MMU_init takes care of whether we're on a 601 or not. */
+	lis	r3,BATS@ha
+	addi	r3,r3,BATS@l
+	tophys(r3,r3,r4)
+	LOAD_BAT(0,0,r3,r4,r5)
+	LOAD_BAT(1,16,r3,r4,r5)
+	LOAD_BAT(2,32,r3,r4,r5)
+	LOAD_BAT(3,48,r3,r4,r5)
 
+/* Set up for using our exception vectors */
+	/* ptr to phys current tss */
+	tophys(r4,r2,r4)
+	addi	r4,r4,TSS	/* init task's TSS */
+	mtspr	SPRG3,r4
+	li	r3,0
+	mtspr	SPRG2,r3	/* 0 => r1 has kernel sp */
+
+	/* need to flush/invalidate caches too */
+	li	r3,0x4000/CACHE_LINE_SIZE
+	li	r4,0
+	mtctr	r3
+73:	dcbst	0,r4
+	addi	r4,r4,CACHE_LINE_SIZE
+	bdnz	73b
+	sync
+	li	r4,0
+	mtctr	r3
+72:	icbi	0,r4
+	addi	r4,r4,CACHE_LINE_SIZE
+	bdnz	72b
+	sync
+	isync
+77:
+/* Now turn on the MMU for real! */
+	li	r4,MSR_KERNEL
+	lis	r3,start_secondary@h
+	ori	r3,r3,start_secondary@l
+	mtspr	SRR0,r3
+	mtspr	SRR1,r4
+	rfi			/* enable MMU and jump to start_kernel */
+/* should never return */
+	.long 0
+#endif /* __SMP__ */
+	
 /*
  * We put a few things here that have to be page-aligned.
  * This stuff goes at the beginning of the data segment,
@@ -1708,4 +2049,3 @@
 	.globl	cmd_line
 cmd_line:
 	.space	512
-

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov