patch-2.1.45 linux/mm/simp.c
Next file: linux/mm/slab.c
Previous file: linux/mm/page_io.c
Back to the patch index
Back to the overall index
- Lines: 435
- Date:
Wed Jul 16 19:22:51 1997
- Orig file:
v2.1.44/linux/mm/simp.c
- Orig date:
Wed Dec 31 16:00:00 1969
diff -u --recursive --new-file v2.1.44/linux/mm/simp.c linux/mm/simp.c
@@ -0,0 +1,434 @@
+#define NULL 0
+/*
+ * mm/simp.c -- simple allocator for cached objects
+ *
+ * (C) 1997 Thomas Schoebel-Theuer
+ */
+
+#include <linux/simp.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+#include <asm/spinlock.h>
+
+/* The next two defines can be independently enabled for debugging */
+/*#define DEBUG*/
+/*#define DEAD_BEEF*/
+
+#ifdef DEAD_BEEF
+#define DEBUG_BEEF 1
+#else
+#define DEBUG_BEEF 0
+#endif
+
+#ifdef __SMP__
+#define NR_PROCESSORS NR_CPUS
+#define GLOBAL_SIZE CHUNK_SIZE
+#else
+#define NR_PROCESSORS 1
+#define GLOBAL_SIZE PAGE_SIZE
+#endif
+
+#define POSTBUFFER_SIZE 63
+#define ORDER 2
+#define CHUNK_SIZE (PAGE_SIZE*(1<<ORDER))
+#define CHUNK_BASE(ptr) (struct header*)(((unsigned long)(ptr)) & ~(CHUNK_SIZE-1))
+#define CHUNK_END(hdr) (void**)((char*)(hdr) + CHUNK_SIZE)
+
+#define COLOR_INCREMENT (8*sizeof(void*)) /* should be 1 cache line */
+#define ALIGN_CACHE(adr) ((((((unsigned long)adr) - 1) / COLOR_INCREMENT) + 1) * COLOR_INCREMENT)
+#define HEADER_SIZE ALIGN_CACHE(sizeof(struct header))
+#define ELEM_SIZE ALIGN_CACHE(sizeof(struct elem))
+#define FILL_TYPE(name,wrongsize) char name[ALIGN_CACHE(wrongsize)-(wrongsize)]
+
+#define MAX_SIMPS ((GLOBAL_SIZE / sizeof(struct simp)) - 1)
+
+struct header { /* this is at the beginning of each memory region */
+ /* 1st cache line */
+ void ** index;
+ void ** fresh;
+ struct simp * father;
+ void ** emptypos;
+ struct header * next;
+ structor again_ctor;
+ structor first_ctor;
+ void * fill[1];
+#ifdef DEBUG
+ /* 2nd cache line */
+ char magic[32];
+#endif
+};
+
+struct per_processor {
+ void ** buffer_pos;
+ void * postbuffer[POSTBUFFER_SIZE];
+};
+
+struct simp {
+ /* 1st cache lines */
+ struct per_processor private[NR_PROCESSORS];
+ /* next cache line */
+ struct header * usable_list;
+ spinlock_t lock;
+ char fill[sizeof(void*) - sizeof(spinlock_t)];
+ long real_size;
+ long max_elems;
+ structor again_ctor;
+ structor first_ctor;
+ structor dtor;
+ long fill2;
+ /* next cache line */
+ long create_offset;
+ long color;
+ long max_color;
+ long size;
+ long fill3[4];
+ /* next cache line */
+ char name[32];
+};
+
+struct global_data {
+ /* 1st cache line */
+ long changed_flag;
+ long nr_simps;
+ spinlock_t lock;
+ char fill[(6+8)*sizeof(void*)+sizeof(void*)-sizeof(spinlock_t)];
+ /* rest */
+ struct simp simps[MAX_SIMPS];
+};
+
+static struct global_data * global = NULL;
+
+#ifdef DEBUG
+static char global_magic[32] = "SIMP header SdC581oi9rY20051962\n";
+#endif
+
+struct simp * simp_create(char * name, long size,
+ structor first_ctor,
+ structor again_ctor,
+ structor dtor)
+{
+ struct simp * simp;
+ long fraction;
+ long real_size;
+ int cpu;
+
+ if(!global) {
+#ifdef __SMP__
+ global = (struct global_data*)__get_free_pages(GFP_KERNEL, ORDER, 0);
+ memset(global, 0, CHUNK_SIZE);
+#else
+ global = (struct global_data*)get_free_page(GFP_KERNEL);
+#endif
+ spin_lock_init(&global->lock);
+ }
+
+ spin_lock(&global->lock);
+ simp = &global->simps[global->nr_simps++];
+ spin_unlock(&global->lock);
+
+ if(global->nr_simps >= MAX_SIMPS) {
+ printk("SIMP: too many simps allocated\n");
+ return NULL;
+ }
+ memset(simp, 0, sizeof(struct simp));
+ spin_lock_init(&simp->lock);
+ strncpy(simp->name, name, 15);
+ simp->size = size;
+ simp->real_size = real_size = ALIGN_CACHE(size);
+ /* allow aggregation of very small objects in 2-power fractions of
+ * cachelines */
+ fraction = COLOR_INCREMENT / 2;
+ while(size <= fraction && fraction >= sizeof(void*)) {
+ simp->real_size = fraction;
+ fraction >>= 1;
+ }
+ simp->first_ctor = first_ctor;
+ simp->again_ctor = again_ctor;
+ simp->dtor = dtor;
+
+ real_size += sizeof(void*);
+ simp->max_elems = (CHUNK_SIZE - HEADER_SIZE) / real_size;
+ simp->max_color = (CHUNK_SIZE - HEADER_SIZE) % real_size;
+ for(cpu = 0; cpu < NR_PROCESSORS; cpu++) {
+ struct per_processor * private = &simp->private[cpu];
+ private->buffer_pos = private->postbuffer;
+ }
+ return simp;
+}
+
+/* Do *not* inline this, it clobbers too many registers... */
+static void alloc_header(struct simp * simp)
+{
+ struct header * hdr;
+ char * ptr;
+ void ** index;
+ long count;
+
+ spin_unlock(&simp->lock);
+ for(;;) {
+ hdr = (struct header*)__get_free_pages(GFP_KERNEL, ORDER, 0);
+ if(hdr)
+ break;
+ if(!simp_garbage())
+ return;
+ }
+#ifdef DEBUG
+ if(CHUNK_BASE(hdr) != hdr)
+ panic("simp: bad kernel page alignment");
+#endif
+
+ memset(hdr, 0, HEADER_SIZE);
+#ifdef DEBUG
+ memcpy(hdr->magic, global_magic, sizeof(global_magic));
+#endif
+ hdr->father = simp;
+ hdr->again_ctor = simp->again_ctor;
+ hdr->first_ctor = simp->first_ctor;
+
+ /* note: races on simp->color don't produce any error :-) */
+ ptr = ((char*)hdr) + HEADER_SIZE + simp->color;
+ index = CHUNK_END(hdr);
+ for(count = 0; count < simp->max_elems; count++) {
+ *--index = ptr;
+ ptr += simp->real_size;
+ /* note: constructors are not called here in bunch but
+ * instead at each single simp_alloc(), in order
+ * to maximize chances that the cache will be
+ * polluted after a simp_alloc() anyway,
+ * and not here. */
+ }
+ hdr->index = hdr->fresh = hdr->emptypos = index;
+
+ spin_lock(&simp->lock);
+ simp->color += COLOR_INCREMENT;
+ if(simp->color >= simp->max_color)
+ simp->color = 0;
+ hdr->next = simp->usable_list;
+ simp->usable_list = hdr;
+}
+
+
+/* current x86 memcpy() is horribly moving around registers for nothing,
+ * is doing unnecessary work if the size is dividable by a power-of-two,
+ * and it clobbers way too many registers.
+ * This results in nearly any other register being transfered to stack.
+ * Fixing this would be a major win for the whole kernel!
+ */
+static void ** bunch_alloc(struct simp * simp, void ** buffer)
+{
+ struct header * hdr;
+ void ** index;
+ void ** to;
+ void ** end;
+ structor todo;
+ long length;
+
+ spin_lock(&simp->lock);
+ hdr = simp->usable_list;
+ if(!hdr) {
+ alloc_header(simp);
+ hdr = simp->usable_list;
+ if(!hdr) {
+ spin_unlock(&simp->lock);
+ *buffer = NULL;
+ return buffer+1;
+ }
+ }
+
+ index = hdr->index;
+ end = hdr->fresh;
+ todo = hdr->again_ctor;
+ if(index == end) {
+ end = CHUNK_END(hdr);
+ todo = hdr->first_ctor;
+ }
+ to = index + POSTBUFFER_SIZE/2;
+ if(to >= end) {
+ to = end;
+ if(to == CHUNK_END(hdr)) {
+ simp->usable_list = hdr->next;
+ hdr->next = NULL;
+ }
+ }
+ if(to > hdr->fresh)
+ hdr->fresh = to;
+ hdr->index = to;
+ length = ((unsigned long)to) - (unsigned long)index;
+ to = buffer + (length/sizeof(void**));
+
+ memcpy(buffer, index, length);
+
+ spin_unlock(&simp->lock);
+
+ if(todo) {
+ do {
+ todo(*buffer++);
+ } while(buffer < to);
+ }
+ return to;
+}
+
+void * simp_alloc(struct simp * simp)
+{
+#ifdef __SMP__
+ const long cpu = smp_processor_id();
+ struct per_processor * priv = &simp->private[cpu];
+#else
+#define priv (&simp->private[0]) /*fool gcc to use no extra register*/
+#endif
+ void ** buffer_pos = priv->buffer_pos;
+ void * res;
+
+ if(buffer_pos == priv->postbuffer) {
+ buffer_pos = bunch_alloc(simp, buffer_pos);
+ }
+ buffer_pos--;
+ res = *buffer_pos;
+ priv->buffer_pos = buffer_pos;
+ return res;
+}
+
+#ifdef DEBUG
+long check_header(struct header * hdr, void * ptr)
+{
+ void ** test;
+
+ if(!hdr) {
+ printk("SIMP: simp_free() with NULL pointer\n");
+ return 1;
+ }
+ if(strncmp(hdr->magic, global_magic, 32)) {
+ printk("SIMP: simpe_free() with bad ptr %p, or header corruption\n", ptr);
+ return 1;
+ }
+ /* This is brute force, but I don't want to pay for any
+ * overhead if debugging is not enabled, in particular
+ * no space overhead for keeping hashtables etc. */
+ test = hdr->index;
+ while(test < CHUNK_END(hdr)) {
+ if(*test++ == ptr) {
+ printk("SIMP: trying to simp_free(%p) again\n", ptr);
+ return 1;
+ }
+ }
+ return 0;
+}
+#endif
+
+static void ** bunch_free(struct simp * simp, void ** buffer)
+{
+ void ** stop;
+
+ stop = buffer - POSTBUFFER_SIZE/3;
+
+ spin_lock(&simp->lock);
+ while(buffer > stop) {
+ void * elem = buffer[-1];
+ struct header * hdr = CHUNK_BASE(elem);
+ void ** index = hdr->index;
+ index--;
+ hdr->index = index;
+ *index = elem;
+ if(!hdr->next) {
+ hdr->next = simp->usable_list;
+ simp->usable_list = hdr;
+ }
+
+ buffer -= 2;
+ elem = *buffer;
+ hdr = CHUNK_BASE(elem);
+ index = hdr->index;
+ index--;
+ hdr->index = index;
+ *index = elem;
+ if(!hdr->next) {
+ hdr->next = simp->usable_list;
+ simp->usable_list = hdr;
+ }
+ }
+ spin_unlock(&simp->lock);
+ global->changed_flag = 1;
+ return buffer;
+}
+
+void simp_free(void * objp)
+{
+ struct header * hdr;
+ void ** buffer_pos;
+ struct per_processor * private;
+#ifdef __SMP__
+ const long cpu = smp_processor_id();
+#else
+ const long cpu = 0;
+#endif
+
+ hdr = CHUNK_BASE(objp);
+#ifdef DEBUG
+ if(check_header(hdr, objp))
+ return;
+#endif
+
+ private = &hdr->father->private[cpu];
+ buffer_pos = private->buffer_pos;
+ if(buffer_pos >= private->postbuffer+POSTBUFFER_SIZE) {
+ buffer_pos = bunch_free(hdr->father, buffer_pos);
+ }
+
+ *buffer_pos++ = objp;
+ private->buffer_pos = buffer_pos;
+
+#ifdef DEAD_BEEF
+ {
+ unsigned int * ptr = (unsigned int*)objp;
+ int count = (hdr->father->real_size - ELEM_SIZE) / sizeof(unsigned int);
+ while(count--)
+ *ptr++ = 0xdeadbeef;
+ }
+#endif
+}
+
+long simp_garbage(void)
+{
+ int i;
+ int res;
+
+ if(!global->changed_flag)
+ return 0; /* shortcut */
+ /* Note: costs do not matter here. Any heavy thrashing of
+ * simp chunks that could be caused by pools stealing each
+ * other's memory has to be considered a BUG :-)
+ * Simply avoid memory shortages by conservative allocating
+ * policies.
+ */
+ global->changed_flag = 0;
+ res = 0;
+ for(i = 0; i < global->nr_simps; i++) {
+ struct simp * simp = &global->simps[i];
+ struct header ** base = &simp->usable_list;
+ struct header * del;
+
+ spin_lock(&simp->lock);
+ del = *base;
+ while(del) {
+ if(del->index == del->emptypos) {
+ if(simp->dtor) {
+ void ** ptr = del->index;
+ while(ptr < CHUNK_END(del)) {
+ simp->dtor(*ptr++);
+ }
+ }
+ *base = del->next;
+#ifdef DEBUG
+ memset(del, 0, CHUNK_SIZE);
+#endif
+ free_pages((unsigned long)del, ORDER);
+ res++;
+ } else
+ base = &del->next;
+ del = *base;
+ }
+ spin_unlock(&simp->lock);
+ }
+ return res;
+}
+
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov