patch-2.4.19 linux-2.4.19/arch/ppc64/kernel/smp.c
Next file: linux-2.4.19/arch/ppc64/kernel/stab.c
Previous file: linux-2.4.19/arch/ppc64/kernel/signal32.c
Back to the patch index
Back to the overall index
- Lines: 764
- Date:
Fri Aug 2 17:39:43 2002
- Orig file:
linux-2.4.18/arch/ppc64/kernel/smp.c
- Orig date:
Wed Dec 31 16:00:00 1969
diff -urN linux-2.4.18/arch/ppc64/kernel/smp.c linux-2.4.19/arch/ppc64/kernel/smp.c
@@ -0,0 +1,763 @@
+/*
+ *
+ *
+ * SMP support for ppc.
+ *
+ * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great
+ * deal of code from the sparc and intel versions.
+ *
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ *
+ * PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and
+ * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/delay.h>
+#define __KERNEL_SYSCALLS__
+#include <linux/unistd.h>
+#include <linux/init.h>
+/* #include <linux/openpic.h> */
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+
+#include <asm/ptrace.h>
+#include <asm/atomic.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/hardirq.h>
+#include <asm/softirq.h>
+#include <asm/init.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/naca.h>
+#include <asm/paca.h>
+#include <asm/iSeries/LparData.h>
+#include <asm/iSeries/HvCall.h>
+#include <asm/iSeries/HvCallCfg.h>
+#include <asm/time.h>
+#include <asm/ppcdebug.h>
+#include "open_pic.h"
+#include <asm/machdep.h>
+
+int smp_threads_ready = 0;
+volatile int smp_commenced = 0;
+int smp_num_cpus = 1;
+int smp_tb_synchronized = 0;
+extern atomic_t ipi_recv;
+extern atomic_t ipi_sent;
+spinlock_t kernel_flag __cacheline_aligned = SPIN_LOCK_UNLOCKED;
+cycles_t cacheflush_time;
+static int max_cpus __initdata = NR_CPUS;
+
+unsigned long cpu_online_map;
+
+volatile unsigned long cpu_callin_map[NR_CPUS] = {0,};
+
+#define TB_SYNC_PASSES 4
+volatile unsigned long __initdata tb_sync_flag = 0;
+volatile unsigned long __initdata tb_offset = 0;
+
+extern unsigned char stab_array[];
+
+int start_secondary(void *);
+extern int cpu_idle(void *unused);
+void smp_call_function_interrupt(void);
+void smp_message_pass(int target, int msg, unsigned long data, int wait);
+static unsigned long iSeries_smp_message[NR_CPUS];
+
+void xics_setup_cpu(void);
+void xics_cause_IPI(int cpu);
+
+/*
+ * XICS only has a single IPI, so encode the messages per CPU
+ */
+volatile unsigned long xics_ipi_message[NR_CPUS] = {0};
+
+#define smp_message_pass(t,m,d,w) \
+ do { atomic_inc(&ipi_sent); \
+ ppc_md.smp_message_pass((t),(m),(d),(w)); \
+ } while(0)
+
+#ifdef CONFIG_KDB
+void smp_kdb_stop(void)
+{
+}
+#endif
+
+static inline void set_tb(unsigned int upper, unsigned int lower)
+{
+ mtspr(SPRN_TBWL, 0);
+ mtspr(SPRN_TBWU, upper);
+ mtspr(SPRN_TBWL, lower);
+}
+
+void iSeries_smp_message_recv( struct pt_regs * regs )
+{
+ int cpu = smp_processor_id();
+ int msg;
+
+ if ( smp_num_cpus < 2 )
+ return;
+
+ for ( msg = 0; msg < 4; ++msg )
+ if ( test_and_clear_bit( msg, &iSeries_smp_message[cpu] ) )
+ smp_message_recv( msg, regs );
+
+}
+
+static void smp_iSeries_message_pass(int target, int msg, unsigned long data, int wait)
+{
+ int i;
+ for (i = 0; i < smp_num_cpus; ++i) {
+ if ( (target == MSG_ALL) ||
+ (target == i) ||
+ ((target == MSG_ALL_BUT_SELF) && (i != smp_processor_id())) ) {
+ set_bit( msg, &iSeries_smp_message[i] );
+ HvCall_sendIPI(&(paca[i]));
+ }
+ }
+}
+
+static int smp_iSeries_numProcs(void)
+{
+ unsigned np, i;
+ struct ItLpPaca * lpPaca;
+
+ np = 0;
+ for (i=0; i < MAX_PACAS; ++i) {
+ lpPaca = paca[i].xLpPacaPtr;
+ if ( lpPaca->xDynProcStatus < 2 ) {
+ ++np;
+ }
+ }
+ return np;
+}
+
+static int smp_iSeries_probe(void)
+{
+ unsigned i;
+ unsigned np;
+ struct ItLpPaca * lpPaca;
+
+ np = 0;
+ for (i=0; i < MAX_PACAS; ++i) {
+ lpPaca = paca[i].xLpPacaPtr;
+ if ( lpPaca->xDynProcStatus < 2 ) {
+ ++np;
+ paca[i].next_jiffy_update_tb = paca[0].next_jiffy_update_tb;
+ }
+ }
+
+ smp_tb_synchronized = 1;
+ return np;
+}
+
+static void smp_iSeries_kick_cpu(int nr)
+{
+ struct ItLpPaca * lpPaca;
+ /* Verify we have a Paca for processor nr */
+ if ( ( nr <= 0 ) ||
+ ( nr >= MAX_PACAS ) )
+ return;
+ /* Verify that our partition has a processor nr */
+ lpPaca = paca[nr].xLpPacaPtr;
+ if ( lpPaca->xDynProcStatus >= 2 )
+ return;
+
+ /* The information for processor bringup must
+ * be written out to main store before we release
+ * the processor.
+ */
+ mb();
+
+ /* The processor is currently spinning, waiting
+ * for the xProcStart field to become non-zero
+ * After we set xProcStart, the processor will
+ * continue on to secondary_start in iSeries_head.S
+ */
+ paca[nr].xProcStart = 1;
+}
+
+static void smp_iSeries_setup_cpu(int nr)
+{
+}
+
+/* This is called very early. */
+void smp_init_iSeries(void)
+{
+ ppc_md.smp_message_pass = smp_iSeries_message_pass;
+ ppc_md.smp_probe = smp_iSeries_probe;
+ ppc_md.smp_kick_cpu = smp_iSeries_kick_cpu;
+ ppc_md.smp_setup_cpu = smp_iSeries_setup_cpu;
+
+ naca->processorCount = smp_iSeries_numProcs();
+}
+
+
+static void
+smp_openpic_message_pass(int target, int msg, unsigned long data, int wait)
+{
+ /* make sure we're sending something that translates to an IPI */
+ if ( msg > 0x3 ){
+ printk("SMP %d: smp_message_pass: unknown msg %d\n",
+ smp_processor_id(), msg);
+ return;
+ }
+ switch ( target )
+ {
+ case MSG_ALL:
+ openpic_cause_IPI(msg, 0xffffffff);
+ break;
+ case MSG_ALL_BUT_SELF:
+ openpic_cause_IPI(msg,
+ 0xffffffff & ~(1 << smp_processor_id()));
+ break;
+ default:
+ openpic_cause_IPI(msg, 1<<target);
+ break;
+ }
+}
+
+static int
+smp_chrp_probe(void)
+{
+ if (naca->processorCount > 1)
+ openpic_request_IPIs();
+
+ return naca->processorCount;
+}
+
+static void
+smp_kick_cpu(int nr)
+{
+ /* Verify we have a Paca for processor nr */
+ if ( ( nr <= 0 ) ||
+ ( nr >= MAX_PACAS ) )
+ return;
+
+ /* The information for processor bringup must
+ * be written out to main store before we release
+ * the processor.
+ */
+ mb();
+
+ /* The processor is currently spinning, waiting
+ * for the xProcStart field to become non-zero
+ * After we set xProcStart, the processor will
+ * continue on to secondary_start in iSeries_head.S
+ */
+ paca[nr].xProcStart = 1;
+}
+
+extern struct gettimeofday_struct do_gtod;
+
+static void smp_space_timers( unsigned nr )
+{
+ unsigned long offset, i;
+
+ offset = tb_ticks_per_jiffy / nr;
+ for ( i=1; i<nr; ++i ) {
+ paca[i].next_jiffy_update_tb = paca[i-1].next_jiffy_update_tb + offset;
+ }
+}
+
+static void
+smp_chrp_setup_cpu(int cpu_nr)
+{
+ static atomic_t ready = ATOMIC_INIT(1);
+ static volatile int frozen = 0;
+
+ if (naca->platform == PLATFORM_PSERIES_LPAR) {
+ /* timebases already synced under the hypervisor. */
+ paca[cpu_nr].next_jiffy_update_tb = tb_last_stamp = get_tb();
+ if (cpu_nr == 0) {
+ do_gtod.tb_orig_stamp = tb_last_stamp;
+ /* Should update do_gtod.stamp_xsec.
+ * For now we leave it which means the time can be some
+ * number of msecs off until someone does a settimeofday()
+ */
+ }
+ smp_tb_synchronized = 1;
+ } else {
+ if (cpu_nr == 0) {
+ /* wait for all the others */
+ while (atomic_read(&ready) < smp_num_cpus)
+ barrier();
+ atomic_set(&ready, 1);
+ /* freeze the timebase */
+ rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL);
+ mb();
+ frozen = 1;
+ set_tb(0, 0);
+ paca[0].next_jiffy_update_tb = 0;
+ smp_space_timers(smp_num_cpus);
+ while (atomic_read(&ready) < smp_num_cpus)
+ barrier();
+ /* thaw the timebase again */
+ rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL);
+ mb();
+ frozen = 0;
+ tb_last_stamp = get_tb();
+ do_gtod.tb_orig_stamp = tb_last_stamp;
+ smp_tb_synchronized = 1;
+ } else {
+ atomic_inc(&ready);
+ while (!frozen)
+ barrier();
+ set_tb(0, 0);
+ mb();
+ atomic_inc(&ready);
+ while (frozen)
+ barrier();
+ }
+ }
+
+ if (OpenPIC_Addr) {
+ do_openpic_setup_cpu();
+ } else {
+ if (cpu_nr > 0)
+ xics_setup_cpu();
+ }
+}
+
+static void
+smp_xics_message_pass(int target, int msg, unsigned long data, int wait)
+{
+ int i;
+
+ for (i = 0; i < smp_num_cpus; ++i) {
+ if (target == MSG_ALL || target == i
+ || (target == MSG_ALL_BUT_SELF
+ && i != smp_processor_id())) {
+ set_bit(msg, &xics_ipi_message[i]);
+ mb();
+ xics_cause_IPI(i);
+ }
+ }
+}
+
+static int
+smp_xics_probe(void)
+{
+ return naca->processorCount;
+}
+
+/* This is called very early */
+void smp_init_pSeries(void)
+{
+ if(naca->interrupt_controller == IC_OPEN_PIC) {
+ ppc_md.smp_message_pass = smp_openpic_message_pass;
+ ppc_md.smp_probe = smp_chrp_probe;
+ ppc_md.smp_kick_cpu = smp_kick_cpu;
+ ppc_md.smp_setup_cpu = smp_chrp_setup_cpu;
+ } else {
+ ppc_md.smp_message_pass = smp_xics_message_pass;
+ ppc_md.smp_probe = smp_xics_probe;
+ ppc_md.smp_kick_cpu = smp_kick_cpu;
+ ppc_md.smp_setup_cpu = smp_chrp_setup_cpu;
+ }
+}
+
+
+void smp_local_timer_interrupt(struct pt_regs * regs)
+{
+ if (!--(get_paca()->prof_counter)) {
+ update_process_times(user_mode(regs));
+ (get_paca()->prof_counter)=get_paca()->prof_multiplier;
+ }
+}
+
+void smp_message_recv(int msg, struct pt_regs *regs)
+{
+ atomic_inc(&ipi_recv);
+
+ switch( msg ) {
+ case PPC_MSG_CALL_FUNCTION:
+ smp_call_function_interrupt();
+ break;
+ case PPC_MSG_RESCHEDULE:
+ current->need_resched = 1;
+ break;
+#ifdef CONFIG_XMON
+ case PPC_MSG_XMON_BREAK:
+ xmon(regs);
+ break;
+#endif /* CONFIG_XMON */
+#ifdef CONFIG_KDB
+ case PPC_MSG_XMON_BREAK:
+ /* This isn't finished yet, obviously -TAI */
+ kdb(KDB_REASON_KEYBOARD,0, (kdb_eframe_t) regs);
+ break;
+#endif
+ default:
+ printk("SMP %d: smp_message_recv(): unknown msg %d\n",
+ smp_processor_id(), msg);
+ break;
+ }
+}
+
+void smp_send_reschedule(int cpu)
+{
+ smp_message_pass(cpu, PPC_MSG_RESCHEDULE, 0, 0);
+}
+
+#ifdef CONFIG_XMON
+void smp_send_xmon_break(int cpu)
+{
+ smp_message_pass(cpu, PPC_MSG_XMON_BREAK, 0, 0);
+}
+#endif /* CONFIG_XMON */
+
+static void stop_this_cpu(void *dummy)
+{
+ __cli();
+ while (1)
+ ;
+}
+
+void smp_send_stop(void)
+{
+ smp_call_function(stop_this_cpu, NULL, 1, 0);
+ smp_num_cpus = 1;
+}
+
+/*
+ * Structure and data for smp_call_function(). This is designed to minimise
+ * static memory requirements. It also looks cleaner.
+ * Stolen from the i386 version.
+ */
+static spinlock_t call_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+
+static struct call_data_struct {
+ void (*func) (void *info);
+ void *info;
+ atomic_t started;
+ atomic_t finished;
+ int wait;
+} *call_data;
+
+/*
+ * This function sends a 'generic call function' IPI to all other CPUs
+ * in the system.
+ *
+ * [SUMMARY] Run a function on all other CPUs.
+ * <func> The function to run. This must be fast and non-blocking.
+ * <info> An arbitrary pointer to pass to the function.
+ * <nonatomic> currently unused.
+ * <wait> If true, wait (atomically) until function has completed on other CPUs.
+ * [RETURNS] 0 on success, else a negative status code. Does not return until
+ * remote CPUs are nearly ready to execute <<func>> or are or have executed.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler, you may call it from a bottom half handler.
+ */
+int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
+ int wait)
+
+{
+ struct call_data_struct data;
+ int ret = -1, cpus = smp_num_cpus-1;
+ int timeout;
+
+ if (!cpus)
+ return 0;
+
+ data.func = func;
+ data.info = info;
+ atomic_set(&data.started, 0);
+ data.wait = wait;
+ if (wait)
+ atomic_set(&data.finished, 0);
+
+ spin_lock_bh(&call_lock);
+ call_data = &data;
+ /* Send a message to all other CPUs and wait for them to respond */
+ smp_message_pass(MSG_ALL_BUT_SELF, PPC_MSG_CALL_FUNCTION, 0, 0);
+
+ /* Wait for response */
+ timeout = 8000000;
+ while (atomic_read(&data.started) != cpus) {
+ HMT_low();
+ if (--timeout == 0) {
+ printk("smp_call_function on cpu %d: other cpus not responding (%d)\n",
+ smp_processor_id(), atomic_read(&data.started));
+#ifdef CONFIG_XMON
+ xmon(0);
+#endif
+#ifdef CONFIG_PPC_ISERIES
+ HvCall_terminateMachineSrc();
+#endif
+ goto out;
+ }
+ barrier();
+ udelay(1);
+ }
+
+ if (wait) {
+ timeout = 1000000;
+ while (atomic_read(&data.finished) != cpus) {
+ HMT_low();
+ if (--timeout == 0) {
+ printk("smp_call_function on cpu %d: other cpus not finishing (%d/%d)\n",
+ smp_processor_id(), atomic_read(&data.finished), atomic_read(&data.started));
+#ifdef CONFIG_PPC_ISERIES
+ HvCall_terminateMachineSrc();
+#endif
+ goto out;
+ }
+ barrier();
+ udelay(1);
+ }
+ }
+ ret = 0;
+
+ out:
+ HMT_medium();
+ spin_unlock_bh(&call_lock);
+ return ret;
+}
+
+void smp_call_function_interrupt(void)
+{
+ void (*func) (void *info) = call_data->func;
+ void *info = call_data->info;
+ int wait = call_data->wait;
+
+ /*
+ * Notify initiating CPU that I've grabbed the data and am
+ * about to execute the function
+ */
+ atomic_inc(&call_data->started);
+ /*
+ * At this point the info structure may be out of scope unless wait==1
+ */
+ (*func)(info);
+ if (wait)
+ atomic_inc(&call_data->finished);
+}
+
+extern unsigned long decr_overclock;
+
+void __init smp_boot_cpus(void)
+{
+ extern struct current_set_struct current_set[];
+ extern void __secondary_start_chrp(void);
+ int i, cpu_nr;
+ struct task_struct *p;
+ unsigned long sp;
+
+ printk("Entering SMP Mode...\n");
+ PPCDBG(PPCDBG_SMP, "smp_boot_cpus: start. NR_CPUS = 0x%lx\n", NR_CPUS);
+
+ smp_num_cpus = 1;
+ smp_store_cpu_info(0);
+ cpu_online_map = 1UL;
+
+ /*
+ * assume for now that the first cpu booted is
+ * cpu 0, the master -- Cort
+ */
+ cpu_callin_map[0] = 1;
+ current->processor = 0;
+
+ init_idle();
+
+ for (i = 0; i < NR_CPUS; i++) {
+ paca[i].prof_counter=1;
+ paca[i].prof_multiplier = 1;
+ if(i != 0) {
+ /*
+ * Processor 0's segment table is statically
+ * initialized to real address 0x5000. The
+ * Other processor's tables are created and
+ * initialized here.
+ */
+ paca[i].xStab_data.virt = (unsigned long)&stab_array[PAGE_SIZE * (i-1)];
+ memset((void *)paca[i].xStab_data.virt, 0, PAGE_SIZE);
+ paca[i].xStab_data.real = __v2a(paca[i].xStab_data.virt);
+ paca[i].default_decr = tb_ticks_per_jiffy / decr_overclock;
+ }
+ }
+
+ /*
+ * XXX very rough, assumes 20 bus cycles to read a cache line,
+ * timebase increments every 4 bus cycles, 32kB L1 data cache.
+ */
+ cacheflush_time = 5 * 1024;
+
+ /* Probe arch for CPUs */
+ cpu_nr = ppc_md.smp_probe();
+
+ printk("Probe found %d CPUs\n", cpu_nr);
+
+ /*
+ * only check for cpus we know exist. We keep the callin map
+ * with cpus at the bottom -- Cort
+ */
+ if (cpu_nr > max_cpus)
+ cpu_nr = max_cpus;
+
+#ifdef CONFIG_ISERIES
+ smp_space_timers( cpu_nr );
+#endif
+
+ printk("Waiting for %d CPUs\n", cpu_nr-1);
+
+ for ( i = 1 ; i < cpu_nr; i++ ) {
+ int c;
+ struct pt_regs regs;
+
+ /* create a process for the processor */
+ /* we don't care about the values in regs since we'll
+ never reschedule the forked task. */
+ /* We DO care about one bit in the pt_regs we
+ pass to do_fork. That is the MSR_FP bit in
+ regs.msr. If that bit is on, then do_fork
+ (via copy_thread) will call giveup_fpu.
+ giveup_fpu will get a pointer to our (current's)
+ last register savearea via current->thread.regs
+ and using that pointer will turn off the MSR_FP,
+ MSR_FE0 and MSR_FE1 bits. At this point, this
+ pointer is pointing to some arbitrary point within
+ our stack */
+
+ memset(®s, 0, sizeof(struct pt_regs));
+
+ if (do_fork(CLONE_VM|CLONE_PID, 0, ®s, 0) < 0)
+ panic("failed fork for CPU %d", i);
+ p = init_task.prev_task;
+ if (!p)
+ panic("No idle task for CPU %d", i);
+
+ PPCDBG(PPCDBG_SMP,"\tProcessor %d, task = 0x%lx\n", i, p);
+
+ del_from_runqueue(p);
+ unhash_process(p);
+ init_tasks[i] = p;
+
+ p->processor = i;
+ p->cpus_runnable = 1 << i; /* we schedule the first task manually */
+ current_set[i].task = p;
+ sp = ((unsigned long)p) + sizeof(union task_union)
+ - STACK_FRAME_OVERHEAD;
+ current_set[i].sp_real = (void *)__v2a(sp);
+
+ /* wake up cpus */
+ ppc_md.smp_kick_cpu(i);
+
+ /*
+ * wait to see if the cpu made a callin (is actually up).
+ * use this value that I found through experimentation.
+ * -- Cort
+ */
+ for ( c = 5000; c && !cpu_callin_map[i] ; c-- ) {
+ udelay(100);
+ }
+
+ if ( cpu_callin_map[i] )
+ {
+ printk("Processor %d found.\n", i);
+ PPCDBG(PPCDBG_SMP, "\tProcessor %d found.\n", i);
+ /* this sync's the decr's -- Cort */
+ smp_num_cpus++;
+ } else {
+ printk("Processor %d is stuck.\n", i);
+ PPCDBG(PPCDBG_SMP, "\tProcessor %d is stuck.\n", i);
+ }
+ }
+
+ /* Setup CPU 0 last (important) */
+ ppc_md.smp_setup_cpu(0);
+
+ if (smp_num_cpus < 2) {
+ tb_last_stamp = get_tb();
+ smp_tb_synchronized = 1;
+ }
+}
+
+void __init smp_commence(void)
+{
+ /*
+ * Lets the callin's below out of their loop.
+ */
+ PPCDBG(PPCDBG_SMP, "smp_commence: start\n");
+ wmb();
+ smp_commenced = 1;
+}
+
+void __init smp_callin(void)
+{
+ int cpu = current->processor;
+
+ smp_store_cpu_info(cpu);
+ set_dec(paca[cpu].default_decr);
+ cpu_callin_map[cpu] = 1;
+
+ ppc_md.smp_setup_cpu(cpu);
+
+ init_idle();
+
+ set_bit(smp_processor_id(), &cpu_online_map);
+
+ while(!smp_commenced) {
+ barrier();
+ }
+ __sti();
+}
+
+/* intel needs this */
+void __init initialize_secondary(void)
+{
+}
+
+/* Activate a secondary processor. */
+int start_secondary(void *unused)
+{
+ int cpu;
+
+ cpu = current->processor;
+ atomic_inc(&init_mm.mm_count);
+ current->active_mm = &init_mm;
+ smp_callin();
+
+ /* Go into the idle loop. */
+ return cpu_idle(NULL);
+}
+
+void __init smp_setup(char *str, int *ints)
+{
+}
+
+int __init setup_profiling_timer(unsigned int multiplier)
+{
+ return 0;
+}
+
+/* this function is called for each processor
+ */
+void __init smp_store_cpu_info(int id)
+{
+ paca[id].pvr = _get_PVR();
+}
+
+static int __init maxcpus(char *str)
+{
+ get_option(&str, &max_cpus);
+ return 1;
+}
+
+__setup("maxcpus=", maxcpus);
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)