patch-2.2.0-pre1 linux/arch/i386/kernel/smp.c
Next file: linux/arch/i386/kernel/time.c
Previous file: linux/arch/i386/kernel/process.c
Back to the patch index
Back to the overall index
- Lines: 628
- Date:
Mon Dec 28 10:52:01 1998
- Orig file:
v2.1.132/linux/arch/i386/kernel/smp.c
- Orig date:
Tue Dec 22 14:16:54 1998
diff -u --recursive --new-file v2.1.132/linux/arch/i386/kernel/smp.c linux/arch/i386/kernel/smp.c
@@ -3,12 +3,14 @@
* hosts.
*
* (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net>
+ * (c) 1998 Ingo Molnar
+ *
* Supported by Caldera http://www.caldera.com.
* Much of the core SMP work is based on previous work by Thomas Radke, to
* whom a great many thanks are extended.
*
- * Thanks to Intel for making available several different Pentium and
- * Pentium Pro MP machines.
+ * Thanks to Intel for making available several different Pentium,
+ * Pentium Pro and Pentium-II/Xeon MP machines.
*
* This code is released under the GNU public license version 2 or
* later.
@@ -26,6 +28,7 @@
* Ingo Molnar : Added APIC timers, based on code
* from Jose Renau
* Alan Cox : Added EBDA scanning
+ * Ingo Molnar : various cleanups and rewrites
*/
#include <linux/config.h>
@@ -112,6 +115,12 @@
return b;
}
+/*
+ * function prototypes:
+ */
+static void cache_APIC_registers (void);
+
+
static int smp_b_stepping = 0; /* Set if we find a B stepping CPU */
static int max_cpus = -1; /* Setup configured maximum number of CPUs to activate */
@@ -131,19 +140,13 @@
unsigned char boot_cpu_id = 0; /* Processor that is doing the boot up */
static int smp_activated = 0; /* Tripped once we need to start cross invalidating */
int apic_version[NR_CPUS]; /* APIC version number */
-static volatile int smp_commenced=0; /* Tripped when we start scheduling */
unsigned long apic_retval; /* Just debugging the assembler.. */
-static volatile unsigned char smp_cpu_in_msg[NR_CPUS]; /* True if this processor is sending an IPI */
-
volatile unsigned long kernel_counter=0; /* Number of times the processor holds the lock */
volatile unsigned long syscall_count=0; /* Number of times the processor holds the syscall lock */
volatile unsigned long ipi_count; /* Number of IPIs delivered */
-volatile unsigned long smp_proc_in_lock[NR_CPUS] = {0,};/* for computing process time */
-volatile int smp_process_available=0;
-
const char lk_lockmsg[] = "lock from interrupt context at %p\n";
int mp_bus_id_to_type [MAX_MP_BUSSES] = { -1, };
@@ -245,7 +248,7 @@
if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4))
{
- printk("Bad signature [%c%c%c%c].\n",
+ panic("SMP mptable: bad signature [%c%c%c%c]!\n",
mpc->mpc_signature[0],
mpc->mpc_signature[1],
mpc->mpc_signature[2],
@@ -254,7 +257,7 @@
}
if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length))
{
- printk("Checksum error.\n");
+ panic("SMP mptable: checksum error!\n");
return 1;
}
if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04)
@@ -629,13 +632,17 @@
* we use to track CPUs as they power up.
*/
+static atomic_t smp_commenced = ATOMIC_INIT(0);
+
void __init smp_commence(void)
{
/*
* Lets the callins below out of their loop.
*/
SMP_PRINTK(("Setting commenced=1, go go go\n"));
- smp_commenced=1;
+
+ wmb();
+ atomic_set(&smp_commenced,1);
}
void __init enable_local_APIC(void)
@@ -736,8 +743,8 @@
mtrr_init_secondary_cpu ();
#endif
smp_callin();
- while (!smp_commenced)
- barrier();
+ while (!atomic_read(&smp_commenced))
+ /* nothing */ ;
return cpu_idle(NULL);
}
@@ -760,11 +767,7 @@
/*
* We don't actually need to load the full TSS,
* basically just the stack pointer and the eip.
- *
- * Get the scheduler lock, because we're going
- * to release it as part of the "reschedule" return.
*/
- spin_lock(&scheduler_lock);
asm volatile(
"movl %0,%%esp\n\t"
@@ -972,6 +975,27 @@
*((volatile unsigned long *)phys_to_virt(8192)) = 0;
}
+cycles_t cacheflush_time;
+extern unsigned long cpu_hz;
+
+static void smp_tune_scheduling (void)
+{
+ /*
+ * Rough estimation for SMP scheduling, this is the number of
+ * cycles it takes for a fully memory-limited process to flush
+ * the SMP-local cache.
+ *
+ * (For a P5 this pretty much means we will choose another idle
+ * CPU almost always at wakeup time (this is due to the small
+ * L1 cache), on PIIs it's around 50-100 usecs, depending on
+ * the cache size)
+ */
+ cacheflush_time = cpu_hz/1024*boot_cpu_data.x86_cache_size/5000;
+ printk("per-CPU timeslice cutoff: %ld.%ld usecs.\n",
+ (long)cacheflush_time/(cpu_hz/1000000),
+ ((long)cacheflush_time*100/(cpu_hz/1000000)) % 100);
+}
+
unsigned int prof_multiplier[NR_CPUS];
unsigned int prof_counter[NR_CPUS];
@@ -1004,6 +1028,7 @@
*/
smp_store_cpu_info(boot_cpu_id); /* Final full version of the data */
+ smp_tune_scheduling();
printk("CPU%d: ", boot_cpu_id);
print_cpu_info(&cpu_data[boot_cpu_id]);
@@ -1165,6 +1190,7 @@
printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
SMP_PRINTK(("Boot done.\n"));
+ cache_APIC_registers();
/*
* Here we can be sure that there is an IO-APIC in the system. Let's
* go and set it up:
@@ -1175,257 +1201,270 @@
smp_done:
}
-void send_IPI(int dest, int vector)
-{
- unsigned long cfg;
- unsigned long flags;
- __save_flags(flags);
- __cli();
+/*
+ * the following functions deal with sending IPIs between CPUs.
+ *
+ * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
+ */
- /*
- * prepare target chip field
- */
- cfg = apic_read(APIC_ICR2) & 0x00FFFFFF;
- apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(dest));
+/*
+ * Silly serialization to work around CPU bug in P5s.
+ * We can safely turn it off on a 686.
+ */
+#if defined(CONFIG_M686) & !defined(SMP_DEBUG)
+# define FORCE_APIC_SERIALIZATION 0
+#else
+# define FORCE_APIC_SERIALIZATION 1
+#endif
- cfg = apic_read(APIC_ICR);
- cfg &= ~0xFDFFF;
- cfg |= APIC_DEST_FIELD|APIC_DEST_DM_FIXED|vector;
- cfg |= dest;
-
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
-
- apic_write(APIC_ICR, cfg);
- __restore_flags(flags);
-}
+static unsigned int cached_APIC_ICR;
+static unsigned int cached_APIC_ICR2;
/*
- * A non wait message cannot pass data or CPU source info. This current setup
- * is only safe because the kernel lock owner is the only person who can send
- * a message.
+ * Caches reserved bits, APIC reads are (mildly) expensive
+ * and force otherwise unnecessary CPU synchronization.
*
- * Wrapping this whole block in a spinlock is not the safe answer either. A
- * processor may get stuck with IRQs off waiting to send a message and thus
- * not replying to the person spinning for a reply.
- *
- * In the end flush tlb ought to be the NMI and a very short function
- * (to avoid the old IDE disk problems), and other messages sent with IRQs
- * enabled in a civilised fashion. That will also boost performance.
+ * (We could cache other APIC registers too, but these are the
+ * main ones used in RL.)
*/
+#define slow_ICR (apic_read(APIC_ICR) & ~0xFDFFF)
+#define slow_ICR2 (apic_read(APIC_ICR2) & 0x00FFFFFF)
-void smp_message_pass(int target, int msg, unsigned long data, int wait)
+void cache_APIC_registers (void)
{
- unsigned long cfg;
- unsigned long dest = 0;
- unsigned long target_map;
- int p=smp_processor_id();
- int irq;
- int ct=0;
+ cached_APIC_ICR = slow_ICR;
+ cached_APIC_ICR2 = slow_ICR2;
+ mb();
+}
+static inline unsigned int __get_ICR (void)
+{
+#if FORCE_APIC_SERIALIZATION
/*
- * During boot up send no messages
+ * Wait for the APIC to become ready - this should never occur. It's
+ * a debugging check really.
*/
-
- if (!smp_activated || !smp_commenced)
- return;
+ int count = 0;
+ unsigned int cfg;
+ while (count < 1000)
+ {
+ cfg = slow_ICR;
+ if (!(cfg&(1<<12))) {
+ if (count)
+ atomic_add(count, (atomic_t*)&ipi_count);
+ return cfg;
+ }
+ count++;
+ udelay(10);
+ }
+ printk("CPU #%d: previous IPI still not cleared after 10mS\n",
+ smp_processor_id());
+ return cfg;
+#else
+ return cached_APIC_ICR;
+#endif
+}
- /*
- * Skip the reschedule if we are waiting to clear a
- * message at this time. The reschedule cannot wait
- * but is not critical.
- */
+static inline unsigned int __get_ICR2 (void)
+{
+#if FORCE_APIC_SERIALIZATION
+ return slow_ICR2;
+#else
+ return cached_APIC_ICR2;
+#endif
+}
- switch (msg) {
- case MSG_RESCHEDULE:
- irq = 0x30;
- if (smp_cpu_in_msg[p])
- return;
- break;
+static inline int __prepare_ICR (unsigned int shortcut, int vector)
+{
+ unsigned int cfg;
- case MSG_INVALIDATE_TLB:
- /* make this a NMI some day */
- irq = 0x31;
- break;
+ cfg = __get_ICR();
+ cfg |= APIC_DEST_FIELD|APIC_DEST_DM_FIXED|shortcut|vector;
- case MSG_STOP_CPU:
- irq = 0x40;
- break;
+ return cfg;
+}
- case MSG_MTRR_CHANGE:
- irq = 0x50;
- break;
+static inline int __prepare_ICR2 (unsigned int dest)
+{
+ unsigned int cfg;
- default:
- printk("Unknown SMP message %d\n", msg);
- return;
- }
+ cfg = __get_ICR2();
+ cfg |= SET_APIC_DEST_FIELD(dest);
- /*
- * Sanity check we don't re-enter this across CPUs. Only the kernel
- * lock holder may send messages. For a STOP_CPU we are bringing the
- * entire box to the fastest halt we can. A reschedule carries
- * no data and can occur during a flush. Guess what panic
- * I got to notice this bug.
- */
-
- /*
- * We are busy.
- */
-
- smp_cpu_in_msg[p]++;
+ return cfg;
+}
-/* printk("SMP message pass #%d to %d of %d\n",
- p, msg, target);*/
+static inline void __send_IPI_shortcut(unsigned int shortcut, int vector)
+{
+ unsigned int cfg;
+/*
+ * Subtle. In the case of the 'never do double writes' workaround we
+ * have to lock out interrupts to be safe. Otherwise it's just one
+ * single atomic write to the APIC, no need for cli/sti.
+ */
+#if FORCE_APIC_SERIALIZATION
+ unsigned long flags;
- /*
- * Wait for the APIC to become ready - this should never occur. It's
- * a debugging check really.
- */
-
- while (ct<1000)
- {
- cfg=apic_read(APIC_ICR);
- if (!(cfg&(1<<12)))
- break;
- ct++;
- udelay(10);
- }
+ __save_flags(flags);
+ __cli();
+#endif
/*
- * Just pray... there is nothing more we can do
+ * No need to touch the target chip field
*/
-
- if (ct==1000)
- printk("CPU #%d: previous IPI still not cleared after 10mS\n", p);
+
+ cfg = __prepare_ICR(shortcut, vector);
/*
- * Set the target requirement
+ * Send the IPI. The write to APIC_ICR fires this off.
*/
-
- if (target==MSG_ALL_BUT_SELF)
- {
- dest=APIC_DEST_ALLBUT;
- target_map=cpu_present_map;
- cpu_callin_map[0]=(1<<p);
- }
- else if (target==MSG_ALL)
- {
- dest=APIC_DEST_ALLINC;
- target_map=cpu_present_map;
- cpu_callin_map[0]=0;
- }
- else
- {
- dest=0;
- target_map=(1<<target);
- cpu_callin_map[0]=0;
- }
+ apic_write(APIC_ICR, cfg);
+#if FORCE_APIC_SERIALIZATION
+ __restore_flags(flags);
+#endif
+}
+
+static inline void send_IPI_allbutself(int vector)
+{
+ __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
+}
+
+static inline void send_IPI_all(int vector)
+{
+ __send_IPI_shortcut(APIC_DEST_ALLINC, vector);
+}
+
+void send_IPI_self(int vector)
+{
+ __send_IPI_shortcut(APIC_DEST_SELF, vector);
+}
+
+static inline void send_IPI_single(int dest, int vector)
+{
+ unsigned long cfg;
+#if FORCE_APIC_SERIALIZATION
+ unsigned long flags;
+
+ __save_flags(flags);
+ __cli();
+#endif
/*
- * Program the APIC to deliver the IPI
+ * prepare target chip field
*/
- send_IPI(dest,irq);
+ cfg = __prepare_ICR2(dest);
+ apic_write(APIC_ICR2, cfg);
/*
- * Spin waiting for completion
+ * program the ICR
*/
+ cfg = __prepare_ICR(0, vector);
- switch(wait)
- {
- int stuck;
- case 1:
- stuck = 50000000;
- while(cpu_callin_map[0]!=target_map) {
- --stuck;
- if (!stuck) {
- printk("stuck on target_map IPI wait\n");
- break;
- }
- }
- break;
- case 2:
- stuck = 50000000;
- /* Wait for invalidate map to clear */
- while (smp_invalidate_needed) {
- /* Take care of "crossing" invalidates */
- if (test_bit(p, &smp_invalidate_needed))
- clear_bit(p, &smp_invalidate_needed);
- --stuck;
- if (!stuck) {
- printk("stuck on smp_invalidate_needed IPI wait (CPU#%d)\n",p);
- break;
- }
- }
- break;
- }
-
/*
- * Record our completion
+ * Send the IPI. The write to APIC_ICR fires this off.
*/
-
- smp_cpu_in_msg[p]--;
+ apic_write(APIC_ICR, cfg);
+#if FORCE_APIC_SERIALIZATION
+ __restore_flags(flags);
+#endif
}
/*
- * This is fraught with deadlocks. Linus does a flush tlb at a whim
- * even with IRQs off. We have to avoid a pair of crossing flushes
- * or we are doomed. See the notes about smp_message_pass.
+ * This is fraught with deadlocks. Probably the situation is not that
+ * bad as in the early days of SMP, so we might ease some of the
+ * paranoia here.
*/
void smp_flush_tlb(void)
{
+ int cpu = smp_processor_id();
+ int stuck;
unsigned long flags;
-/* printk("SMI-");*/
-
/*
- * The assignment is safe because it's volatile so the compiler cannot reorder it,
- * because the i586 has strict memory ordering and because only the kernel lock holder
- * may issue a tlb flush. If you break any one of those three change this to an atomic
- * bus locked or.
+ * The assignment is safe because it's volatile so the
+ * compiler cannot reorder it, because the i586 has
+ * strict memory ordering and because only the kernel
+ * lock holder may issue a tlb flush. If you break any
+ * one of those three change this to an atomic bus
+ * locked or.
*/
- smp_invalidate_needed=cpu_present_map;
+ smp_invalidate_needed = cpu_present_map;
/*
- * Processors spinning on the lock will see this IRQ late. The smp_invalidate_needed map will
- * ensure they don't do a spurious flush tlb or miss one.
+ * Processors spinning on some lock with IRQs disabled
+ * will see this IRQ late. The smp_invalidate_needed
+ * map will ensure they don't do a spurious flush tlb
+ * or miss one.
*/
__save_flags(flags);
__cli();
- smp_message_pass(MSG_ALL_BUT_SELF, MSG_INVALIDATE_TLB, 0L, 2);
+
+ send_IPI_allbutself(INVALIDATE_TLB_VECTOR);
/*
- * Flush the local TLB
+ * Spin waiting for completion
*/
-
- local_flush_tlb();
- __restore_flags(flags);
+ stuck = 50000000;
+ while (smp_invalidate_needed) {
+ /*
+ * Take care of "crossing" invalidates
+ */
+ if (test_bit(cpu, &smp_invalidate_needed))
+ clear_bit(cpu, &smp_invalidate_needed);
+ --stuck;
+ if (!stuck) {
+ printk("stuck on TLB IPI wait (CPU#%d)\n",cpu);
+ break;
+ }
+ }
/*
- * Completed.
+ * Flush the local TLB
*/
-
-/* printk("SMID\n");*/
+ local_flush_tlb();
+
+ __restore_flags(flags);
}
+/*
+ * this function sends a 'reschedule' IPI to another CPU.
+ * it goes straight through and wastes no time serializing
+ * anything. Worst case is that we lose a reschedule ...
+ */
+
void smp_send_reschedule(int cpu)
{
- unsigned long flags;
+ send_IPI_single(cpu, RESCHEDULE_VECTOR);
+}
- __save_flags(flags);
- __cli();
- smp_message_pass(cpu, MSG_RESCHEDULE, 0L, 0);
- __restore_flags(flags);
+/*
+ * this function sends a 'stop' IPI to all other CPUs in the system.
+ * it goes straight through.
+ */
+
+void smp_send_stop(void)
+{
+ send_IPI_allbutself(STOP_CPU_VECTOR);
+}
+
+/*
+ * this function sends an 'reload MTRR state' IPI to all other CPUs
+ * in the system. it goes straight through, completion processing
+ * is done on the mttr.c level.
+ */
+
+void smp_send_mtrr(void)
+{
+ send_IPI_allbutself(MTRR_CHANGE_VECTOR);
}
/*
@@ -1626,12 +1665,9 @@
* Unfortunately the local APIC timer cannot be set up into NMI
* mode. With the IO APIC we can re-route the external timer
* interrupt and broadcast it as an NMI to all CPUs, so no pain.
- *
- * NOTE: this trap vector (0x41) and the gate in
- * BUILD_SMP_TIMER_INTERRUPT should be the same ;)
*/
tmp_value = apic_read(APIC_LVTT);
- lvtt1_value = APIC_LVT_TIMER_PERIODIC | 0x41;
+ lvtt1_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
apic_write(APIC_LVTT , lvtt1_value);
/*
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov