patch-2.4.25 linux-2.4.25/arch/ia64/kernel/salinfo.c

Next file: linux-2.4.25/arch/ia64/kernel/setup.c
Previous file: linux-2.4.25/arch/ia64/kernel/perfmon.c
Back to the patch index
Back to the overall index

diff -urN linux-2.4.24/arch/ia64/kernel/salinfo.c linux-2.4.25/arch/ia64/kernel/salinfo.c
@@ -3,19 +3,29 @@
  *
  * Creates entries in /proc/sal for various system features.
  *
- * Copyright (c) 2001 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (c) 2003 Silicon Graphics, Inc.  All rights reserved.
  * Copyright (c) 2003 Hewlett-Packard Co
  *	Bjorn Helgaas <bjorn.helgaas@hp.com>
  *
  * 10/30/2001	jbarnes@sgi.com		copied much of Stephane's palinfo
  *					code to create this file
+ * Oct 23 2003	kaos@sgi.com
+ *   Replace IPI with set_cpus_allowed() to read a record from the required cpu.
+ *   Redesign salinfo log processing to separate interrupt and user space
+ *   contexts.
+ *   Cache the record across multi-block reads from user space.
+ *   Support > 64 cpus.
+ *   Delete module_exit and MOD_INC/DEC_COUNT, salinfo cannot be a module.
  */
 
 #include <linux/types.h>
 #include <linux/proc_fs.h>
 #include <linux/module.h>
 #include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/vmalloc.h>
 
+#include <asm/semaphore.h>
 #include <asm/sal.h>
 #include <asm/uaccess.h>
 
@@ -57,48 +67,175 @@
 	(2 * ARRAY_SIZE(salinfo_log_name)) +		/* /proc/sal/mca/{event,data} */
 	1];						/* /proc/sal */
 
-struct salinfo_log_data {
-	int	type;
-	u8	*log_buffer;
-	u64	log_size;
-};
+/* Allow build with or without large SSI support */
+#ifdef CPU_MASK_NONE
+#define SCA(x, y) set_cpus_allowed((x), &(y))
+#else
+#define cpumask_t unsigned long
+#define SCA(x, y) set_cpus_allowed((x), (y))
+#endif
 
-struct salinfo_event {
-	int			type;
-	int			cpu;		/* next CPU to check */
-	volatile unsigned long	cpu_mask;
-	wait_queue_head_t	queue;
+/* Some records we get ourselves, some are accessed as saved data in buffers
+ * that are owned by mca.c.
+ */
+struct salinfo_data_saved {
+	u8*			buffer;
+	u64			size;
+	u64			id;
+	int			cpu;
 };
 
-static struct salinfo_event *salinfo_event[ARRAY_SIZE(salinfo_log_name)];
+/* State transitions.  Actions are :-
+ *   Write "read <cpunum>" to the data file.
+ *   Write "clear <cpunum>" to the data file.
+ *   Write "oemdata <cpunum> <offset> to the data file.
+ *   Read from the data file.
+ *   Close the data file.
+ *
+ * Start state is NO_DATA.
+ *
+ * NO_DATA
+ *    write "read <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "oemdata <cpunum> <offset> -> return -EINVAL.
+ *    read data -> return EOF.
+ *    close -> unchanged.  Free record areas.
+ *
+ * LOG_RECORD
+ *    write "read <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA.
+ *    read data -> return the INIT/MCA/CMC/CPE record.
+ *    close -> unchanged.  Keep record areas.
+ *
+ * OEMDATA
+ *    write "read <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
+ *    write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA.
+ *    read data -> return the formatted oemdata.
+ *    close -> unchanged.  Keep record areas.
+ *
+ * Closing the data file does not change the state.  This allows shell scripts
+ * to manipulate salinfo data, each shell redirection opens the file, does one
+ * action then closes it again.  The record areas are only freed at close when
+ * the state is NO_DATA.
+ */
+enum salinfo_state {
+	STATE_NO_DATA,
+	STATE_LOG_RECORD,
+	STATE_OEMDATA,
+};
 
 struct salinfo_data {
-	int	open;		/* single-open to prevent races */
-	int	type;
-	int	cpu;		/* "current" cpu for reads */
+	volatile cpumask_t	cpu_event;	/* which cpus have outstanding events */
+	struct semaphore	sem;		/* count of cpus with outstanding events (bits set in cpu_event) */
+	u8			*log_buffer;
+	u64			log_size;
+	u8			*oemdata;	/* decoded oem data */
+	u64			oemdata_size;
+	int			open;		/* single-open to prevent races */
+	u8			type;
+	u8			saved_num;	/* using a saved record? */
+	enum salinfo_state	state :8;	/* processing state */
+	u8			padding;
+	int			cpu_check;	/* next CPU to check */
+	struct salinfo_data_saved data_saved[5];/* save last 5 records from mca.c, must be < 255 */
 };
 
 static struct salinfo_data salinfo_data[ARRAY_SIZE(salinfo_log_name)];
 
-static spinlock_t data_lock;
+static spinlock_t data_lock, data_saved_lock;
 
+/** salinfo_platform_oemdata - optional callback to decode oemdata from an error
+ * record.
+ * @sect_header: pointer to the start of the section to decode.
+ * @oemdata: returns vmalloc area containing the decded output.
+ * @oemdata_size: returns length of decoded output (strlen).
+ *
+ * Description: If user space asks for oem data to be decoded by the kernel
+ * and/or prom and the platform has set salinfo_platform_oemdata to the address
+ * of a platform specific routine then call that routine.  salinfo_platform_oemdata
+ * vmalloc's and formats its output area, returning the address of the text
+ * and its strlen.  Returns 0 for success, -ve for error.  The callback is
+ * invoked on the cpu that generated the error record.
+ */
+int (*salinfo_platform_oemdata)(const u8 *sect_header, u8 **oemdata, u64 *oemdata_size);
+
+struct salinfo_platform_oemdata_parms {
+	const u8 *efi_guid;
+	u8 **oemdata;
+	u64 *oemdata_size;
+	int ret;
+};
+
+static void
+salinfo_platform_oemdata_cpu(void *context)
+{
+	struct salinfo_platform_oemdata_parms *parms = context;
+	parms->ret = salinfo_platform_oemdata(parms->efi_guid, parms->oemdata, parms->oemdata_size);
+}
+
+static void
+shift1_data_saved (struct salinfo_data *data, int shift)
+{
+	memcpy(data->data_saved+shift, data->data_saved+shift+1,
+	       (ARRAY_SIZE(data->data_saved) - (shift+1)) * sizeof(data->data_saved[0]));
+	memset(data->data_saved + ARRAY_SIZE(data->data_saved) - 1, 0,
+	       sizeof(data->data_saved[0]));
+}
+
+/* This routine is invoked in interrupt context.  Note: mca.c enables
+ * interrupts before calling this code for CMC/CPE.  MCA and INIT events are
+ * not irq safe, do not call any routines that use spinlocks, they may deadlock.
+ *
+ * The buffer passed from mca.c points to the output from ia64_log_get. This is
+ * a persistent buffer but its contents can change between the interrupt and
+ * when user space processes the record.  Save the record id to identify
+ * changes.
+ */
 void
-salinfo_log_wakeup(int type)
+salinfo_log_wakeup(int type, u8 *buffer, u64 size)
 {
-	if (type < ARRAY_SIZE(salinfo_log_name)) {
-		struct salinfo_event *event = salinfo_event[type];
+	struct salinfo_data *data = salinfo_data + type;
+	struct salinfo_data_saved *data_saved;
+	unsigned long flags = 0;
+	int i, irqsafe = type != SAL_INFO_TYPE_MCA && type != SAL_INFO_TYPE_INIT;
+	int saved_size = ARRAY_SIZE(data->data_saved);
+
+	BUG_ON(type >= ARRAY_SIZE(salinfo_log_name));
+
+	if (irqsafe)
+		spin_lock_irqsave(&data_saved_lock, flags);
+	for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
+		if (!data_saved->buffer)
+			break;
+	}
+	if (i == saved_size) {
+		if (!data->saved_num) {
+			shift1_data_saved(data, 0);
+			data_saved = data->data_saved + saved_size - 1;
+		} else
+			data_saved = NULL;
+	}
+	if (data_saved) {
+		data_saved->cpu = smp_processor_id();
+		data_saved->id = ((sal_log_record_header_t *)buffer)->id;
+		data_saved->size = size;
+		data_saved->buffer = buffer;
+	}
+	if (irqsafe)
+		spin_unlock_irqrestore(&data_saved_lock, flags);
 
-		if (event) {
-			set_bit(smp_processor_id(), &event->cpu_mask);
-			wake_up_interruptible(&event->queue);
-		}
+	if (!test_and_set_bit(smp_processor_id(), &data->cpu_event)) {
+		if (irqsafe)
+			up(&data->sem);
 	}
 }
 
 static int
 salinfo_event_open(struct inode *inode, struct file *file)
 {
-	if (!suser())
+	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	return 0;
 }
@@ -107,24 +244,23 @@
 salinfo_event_read(struct file *file, char *buffer, size_t count, loff_t *ppos)
 {
 	struct inode *inode = file->f_dentry->d_inode;
-	struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
-	struct salinfo_event *event = entry->data;
+	struct proc_dir_entry *entry = PDE(inode);
+	struct salinfo_data *data = entry->data;
 	char cmd[32];
 	size_t size;
 	int i, n, cpu = -1;
 
 retry:
-	if (!event->cpu_mask) {
+	if (down_trylock(&data->sem)) {
 		if (file->f_flags & O_NONBLOCK)
 			return -EAGAIN;
-		interruptible_sleep_on(&event->queue);
-		if (signal_pending(current))
-			return -EINTR;
+		if (down_interruptible(&data->sem))
+			return -ERESTARTSYS;
 	}
 
-	n = event->cpu;
+	n = data->cpu_check;
 	for (i = 0; i < NR_CPUS; i++) {
-		if (event->cpu_mask & 1UL << n) {
+		if (test_bit(n, &data->cpu_event)) {
 			cpu = n;
 			break;
 		}
@@ -135,10 +271,13 @@
 	if (cpu == -1)
 		goto retry;
 
+	/* events are sticky until the user says "clear" */
+	up(&data->sem);
+
 	/* for next read, start checking at next CPU */
-	event->cpu = cpu;
-	if (++event->cpu == NR_CPUS)
-		event->cpu = 0;
+	data->cpu_check = cpu;
+	if (++data->cpu_check == NR_CPUS)
+		data->cpu_check = 0;
 
 	snprintf(cmd, sizeof(cmd), "read %d\n", cpu);
 
@@ -159,10 +298,10 @@
 static int
 salinfo_log_open(struct inode *inode, struct file *file)
 {
-	struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
+	struct proc_dir_entry *entry = PDE(inode);
 	struct salinfo_data *data = entry->data;
 
-	if (!suser())
+	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
 	spin_lock(&data_lock);
@@ -173,15 +312,27 @@
 	data->open = 1;
 	spin_unlock(&data_lock);
 
+	if (data->state == STATE_NO_DATA &&
+	    !(data->log_buffer = vmalloc(ia64_sal_get_state_info_size(data->type)))) {
+		data->open = 0;
+		return -ENOMEM;
+	}
+
 	return 0;
 }
 
 static int
 salinfo_log_release(struct inode *inode, struct file *file)
 {
-	struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
+	struct proc_dir_entry *entry = PDE(inode);
 	struct salinfo_data *data = entry->data;
 
+	if (data->state == STATE_NO_DATA) {
+		vfree(data->log_buffer);
+		vfree(data->oemdata);
+		data->log_buffer = NULL;
+		data->oemdata = NULL;
+	}
 	spin_lock(&data_lock);
 	data->open = 0;
 	spin_unlock(&data_lock);
@@ -191,95 +342,136 @@
 static void
 call_on_cpu(int cpu, void (*fn)(void *), void *arg)
 {
-	if (cpu == smp_processor_id())
-		(*fn)(arg);
-#ifdef CONFIG_SMP
-	else if (cpu_online(cpu))	/* cpu may not have been validated */
-		smp_call_function_single(cpu, fn, arg, 0, 1);
-#endif
+	cpumask_t save_cpus_allowed, new_cpus_allowed;
+	memcpy(&save_cpus_allowed, &current->cpus_allowed, sizeof(save_cpus_allowed));
+	memset(&new_cpus_allowed, 0, sizeof(new_cpus_allowed));
+	set_bit(cpu, &new_cpus_allowed);
+	SCA(current, new_cpus_allowed);
+	(*fn)(arg);
+	SCA(current, save_cpus_allowed);
 }
 
 static void
 salinfo_log_read_cpu(void *context)
 {
-	struct salinfo_log_data *info = context;
-	struct salinfo_event *event = salinfo_event[info->type];
-	u64 size;
-
-	size = ia64_sal_get_state_info_size(info->type);
-	info->log_buffer = kmalloc(size, GFP_ATOMIC);
-	if (!info->log_buffer)
-		return;
-
-	clear_bit(smp_processor_id(), &event->cpu_mask);
-	info->log_size = ia64_sal_get_state_info(info->type, (u64 *) info->log_buffer);
-	if (info->log_size)
-		salinfo_log_wakeup(info->type);
+	struct salinfo_data *data = context;
+	data->log_size = ia64_sal_get_state_info(data->type, (u64 *) data->log_buffer);
+	if (data->type == SAL_INFO_TYPE_CPE || data->type == SAL_INFO_TYPE_CMC)
+		ia64_sal_clear_state_info(data->type);
+}
+
+static void
+salinfo_log_new_read(int cpu, struct salinfo_data *data)
+{
+	struct salinfo_data_saved *data_saved;
+	unsigned long flags;
+	int i;
+	int saved_size = ARRAY_SIZE(data->data_saved);
+
+	data->saved_num = 0;
+	spin_lock_irqsave(&data_saved_lock, flags);
+retry:
+	for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
+		if (data_saved->buffer && data_saved->cpu == cpu) {
+			sal_log_record_header_t *rh = (sal_log_record_header_t *)(data_saved->buffer);
+			data->log_size = data_saved->size;
+			memcpy(data->log_buffer, rh, data->log_size);
+			barrier();	/* id check must not be moved */
+			if (rh->id == data_saved->id) {
+				data->saved_num = i+1;
+				break;
+			}
+			/* saved record changed by mca.c since interrupt, discard it */
+			shift1_data_saved(data, i);
+			goto retry;
+		}
+	}
+	spin_unlock_irqrestore(&data_saved_lock, flags);
+
+	if (!data->saved_num)
+		call_on_cpu(cpu, salinfo_log_read_cpu, data);
+	data->state = data->log_size ? STATE_LOG_RECORD : STATE_NO_DATA;
 }
 
 static ssize_t
 salinfo_log_read(struct file *file, char *buffer, size_t count, loff_t *ppos)
 {
 	struct inode *inode = file->f_dentry->d_inode;
-	struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
+	struct proc_dir_entry *entry = PDE(inode);
 	struct salinfo_data *data = entry->data;
-	struct salinfo_log_data info;
-	int ret;
 	void *saldata;
 	size_t size;
+	u8 *buf;
+	u64 bufsize;
 
-	info.type = data->type;
-	info.log_buffer = 0;
-	call_on_cpu(data->cpu, salinfo_log_read_cpu, &info);
-	if (!info.log_buffer || *ppos >= info.log_size) {
-		ret = 0;
-		goto out;
+	if (data->state == STATE_LOG_RECORD) {
+		buf = data->log_buffer;
+		bufsize = data->log_size;
+	} else if (data->state == STATE_OEMDATA) {
+		buf = data->oemdata;
+		bufsize = data->oemdata_size;
+	} else {
+		buf = NULL;
+		bufsize = 0;
 	}
+	if (*ppos >= bufsize)
+		return 0;
 
-	saldata = info.log_buffer + file->f_pos;
-	size = info.log_size - file->f_pos;
+	saldata = buf + file->f_pos;
+	size = bufsize - file->f_pos;
 	if (size > count)
 		size = count;
-	if (copy_to_user(buffer, saldata, size)) {
-		ret = -EFAULT;
-		goto out;
-	}
+	if (copy_to_user(buffer, saldata, size))
+		return -EFAULT;
 
 	*ppos += size;
-	ret = size;
-
-out:
-	kfree(info.log_buffer);
-	return ret;
+	return size;
 }
 
 static void
 salinfo_log_clear_cpu(void *context)
 {
 	struct salinfo_data *data = context;
-	struct salinfo_event *event = salinfo_event[data->type];
-	struct salinfo_log_data info;
-
-	clear_bit(smp_processor_id(), &event->cpu_mask);
 	ia64_sal_clear_state_info(data->type);
+}
 
-	/* clearing one record may make another visible */
-	info.type = data->type;
-	salinfo_log_read_cpu(&info);
-	if (info.log_buffer && info.log_size)
-		salinfo_log_wakeup(data->type);
-
-	kfree(info.log_buffer);
+static int
+salinfo_log_clear(struct salinfo_data *data, int cpu)
+{
+	data->state = STATE_NO_DATA;
+	if (!test_bit(cpu, &data->cpu_event))
+		return 0;
+	down(&data->sem);
+	clear_bit(cpu, &data->cpu_event);
+	if (data->saved_num) {
+		unsigned long flags;
+		spin_lock_irqsave(&data_saved_lock, flags);
+		shift1_data_saved(data, data->saved_num - 1 );
+		data->saved_num = 0;
+		spin_unlock_irqrestore(&data_saved_lock, flags);
+	}
+	/* ia64_mca_log_sal_error_record or salinfo_log_read_cpu already cleared
+	 * CPE and CMC errors
+	 */
+	if (data->type != SAL_INFO_TYPE_CPE && data->type != SAL_INFO_TYPE_CMC)
+		call_on_cpu(cpu, salinfo_log_clear_cpu, data);
+	/* clearing a record may make a new record visible */
+	salinfo_log_new_read(cpu, data);
+	if (data->state == STATE_LOG_RECORD &&
+	    !test_and_set_bit(cpu,  &data->cpu_event))
+		up(&data->sem);
+	return 0;
 }
 
 static ssize_t
 salinfo_log_write(struct file *file, const char *buffer, size_t count, loff_t *ppos)
 {
 	struct inode *inode = file->f_dentry->d_inode;
-	struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
+	struct proc_dir_entry *entry = PDE(inode);
 	struct salinfo_data *data = entry->data;
 	char cmd[32];
 	size_t size;
+	u32 offset;
 	int cpu;
 
 	size = sizeof(cmd);
@@ -288,10 +480,31 @@
 	if (copy_from_user(cmd, buffer, size))
 		return -EFAULT;
 
-	if (sscanf(cmd, "read %d", &cpu) == 1)
-		data->cpu = cpu;
-	else if (sscanf(cmd, "clear %d", &cpu) == 1)
-		call_on_cpu(cpu, salinfo_log_clear_cpu, data);
+	if (sscanf(cmd, "read %d", &cpu) == 1) {
+		salinfo_log_new_read(cpu, data);
+	} else if (sscanf(cmd, "clear %d", &cpu) == 1) {
+		int ret;
+		if ((ret = salinfo_log_clear(data, cpu)))
+			count = ret;
+	} else if (sscanf(cmd, "oemdata %d %d", &cpu, &offset) == 2) {
+		if (data->state != STATE_LOG_RECORD && data->state != STATE_OEMDATA)
+			return -EINVAL;
+		if (offset > data->log_size - sizeof(efi_guid_t))
+			return -EINVAL;
+		data->state = STATE_OEMDATA;
+		if (salinfo_platform_oemdata) {
+			struct salinfo_platform_oemdata_parms parms = {
+				.efi_guid = data->log_buffer + offset,
+				.oemdata = &data->oemdata,
+				.oemdata_size = &data->oemdata_size
+			};
+			call_on_cpu(cpu, salinfo_platform_oemdata_cpu, &parms);
+			if (parms.ret)
+				count = parms.ret;
+		} else
+			data->oemdata_size = 0;
+	} else
+		return -EINVAL;
 
 	return count;
 }
@@ -309,9 +522,8 @@
 	struct proc_dir_entry *salinfo_dir; /* /proc/sal dir entry */
 	struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */
 	struct proc_dir_entry *dir, *entry;
-	struct salinfo_event *event;
 	struct salinfo_data *data;
-	int i, j;
+	int i, j, online;
 
 	salinfo_dir = proc_mkdir("sal", NULL);
 	if (!salinfo_dir)
@@ -324,6 +536,9 @@
 	}
 
 	for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) {
+		data = salinfo_data + i;
+		data->type = i;
+		sema_init(&data->sem, 0);
 		dir = proc_mkdir(salinfo_log_name[i], salinfo_dir);
 		if (!dir)
 			continue;
@@ -331,32 +546,26 @@
 		entry = create_proc_entry("event", S_IRUSR, dir);
 		if (!entry)
 			continue;
-
-		event = kmalloc(sizeof(*event), GFP_KERNEL);
-		if (!event)
-			continue;
-		memset(event, 0, sizeof(*event));
-		event->type = i;
-		init_waitqueue_head(&event->queue);
-		salinfo_event[i] = event;
-		/* we missed any events before now */
-		for (j = 0; j < NR_CPUS; j++)
-			if (cpu_online(j))
-				set_bit(j, &event->cpu_mask);
-		entry->data = event;
+		entry->data = data;
 		entry->proc_fops = &salinfo_event_fops;
 		*sdir++ = entry;
 
 		entry = create_proc_entry("data", S_IRUSR | S_IWUSR, dir);
 		if (!entry)
 			continue;
-
-		data = &salinfo_data[i];
-		data->type = i;
 		entry->data = data;
 		entry->proc_fops = &salinfo_data_fops;
 		*sdir++ = entry;
 
+		/* we missed any events before now */
+		online = 0;
+		for (j = 0; j < NR_CPUS; j++)
+			if (cpu_online(j)) {
+				set_bit(j, &data->cpu_event);
+				++online;
+			}
+		sema_init(&data->sem, online);
+
 		*sdir++ = dir;
 	}
 
@@ -365,17 +574,6 @@
 	return 0;
 }
 
-static void __exit
-salinfo_exit(void)
-{
-	int i = 0;
-
-	for (i = 0; i < ARRAY_SIZE(salinfo_proc_entries); i++) {
-		if (salinfo_proc_entries[i])
-			remove_proc_entry (salinfo_proc_entries[i]->name, NULL);
-	}
-}
-
 /*
  * 'data' contains an integer that corresponds to the feature we're
  * testing
@@ -385,8 +583,6 @@
 {
 	int len = 0;
 
-	MOD_INC_USE_COUNT;
-
 	len = sprintf(page, (sal_platform_features & (unsigned long)data) ? "1\n" : "0\n");
 
 	if (len <= off+count) *eof = 1;
@@ -397,10 +593,7 @@
 	if (len>count) len = count;
 	if (len<0) len = 0;
 
-	MOD_DEC_USE_COUNT;
-
 	return len;
 }
 
 module_init(salinfo_init);
-module_exit(salinfo_exit);

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)