patch-2.1.45 linux/fs/inode.c
Next file: linux/fs/ioctl.c
Previous file: linux/fs/filesystems.c
Back to the patch index
Back to the overall index
- Lines: 1029
- Date:
Wed Jul 16 16:56:52 1997
- Orig file:
v2.1.44/linux/fs/inode.c
- Orig date:
Mon Jul 7 16:28:05 1997
diff -u --recursive --new-file v2.1.44/linux/fs/inode.c linux/fs/inode.c
@@ -1,623 +1,457 @@
/*
- * fs/inode.c
+ * linux/fs/inode.c
*
- * Complete reimplementation
- * (C) 1997 Thomas Schoebel-Theuer
+ * (C) 1997 Linus Torvalds
*/
-/* Everything here is intended to be MP-safe. However, other parts
- * of the kernel are not yet MP-safe, in particular the inode->i_count++
- * that are spread over everywhere. These should be replaced by
- * iinc() as soon as possible. Since I have no MP machine, I could
- * not test it.
- */
-#include <linux/config.h>
-#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/string.h>
#include <linux/mm.h>
-#include <linux/kernel.h>
-#include <linux/dlists.h>
-#include <linux/dalloc.h>
-#include <linux/omirr.h>
-
-/* #define DEBUG */
-
-#define HASH_SIZE 1024 /* must be a power of 2 */
-#define NR_LEVELS 4
-
-#define ST_AGED 1
-#define ST_HASHED 2
-#define ST_EMPTY 4
-#define ST_TO_READ 8
-#define ST_TO_WRITE 16
-#define ST_TO_PUT 32
-#define ST_TO_DROP 64
-#define ST_IO (ST_TO_READ|ST_TO_WRITE|ST_TO_PUT|ST_TO_DROP)
-#define ST_WAITING 128
-#define ST_FREEING 256
-#define ST_IBASKET 512
-
-/* The idea is to keep empty inodes in a separate list, so no search
- * is required as long as empty inodes exit.
- * All reusable inodes occurring in the hash table with i_count==0
- * are also registered in the ringlist aged_i[level], but in LRU order.
- * Used inodes with i_count>0 are kept solely in the hashtable and in
- * all_i, but in no other list.
- * The level is used for multilevel aging to avoid thrashing; each
- * time i_count decreases to 0, the inode is inserted into the next level
- * ringlist. Cache reusage is simply by taking the _last_ element from the
- * lowest-level ringlist that contains inodes.
- * In contrast to the old code, there isn't any O(n) search overhead now
- * in iget/iput (if you make HASH_SIZE large enough).
- */
-static struct inode * hashtable[HASH_SIZE];/* linked with i_hash_{next,prev} */
-static struct inode * all_i = NULL; /* linked with i_{next,prev} */
-static struct inode * empty_i = NULL; /* linked with i_{next,prev} */
-static struct inode * aged_i[NR_LEVELS+1]; /* linked with i_lru_{next,prev} */
-static int aged_reused[NR_LEVELS+1]; /* # removals from aged_i[level] */
-static int age_table[NR_LEVELS+1] = { /* You may tune this. */
- 1, 4, 10, 100, 1000
-}; /* after which # of uses to increase to the next level */
-/* This is for kernel/sysctl.c */
+/*
+ * New inode.c implementation.
+ *
+ * This implementation has the basic premise of trying
+ * to be extremely low-overhead and SMP-safe, yet be
+ * simple enough to be "obviously correct".
+ *
+ * Famous last words.
+ */
+
+/*
+ * Inode lookup is no longer as critical as it used to be:
+ * most of the lookups are going to be through the dcache.
+ */
+#define HASH_BITS 8
+#define HASH_SIZE (1UL << HASH_BITS)
+#define HASH_MASK (HASH_SIZE-1)
+
+/*
+ * Each inode can be on two separate lists. One is
+ * the hash list of the inode, used for lookups. The
+ * other linked list is the "type" list:
+ * "in_use" - valid inode, hashed
+ * "dirty" - valid inode, hashed, dirty.
+ * "unused" - ready to be re-used. Not hashed.
+ *
+ * The two first versions also have a dirty list, allowing
+ * for low-overhead inode sync() operations.
+ */
+
+static LIST_HEAD(inode_in_use);
+static LIST_HEAD(inode_dirty);
+static LIST_HEAD(inode_unused);
+static struct list_head inode_hashtable[HASH_SIZE];
+
+/*
+ * A simple spinlock to protect the list manipulations
+ */
+spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
-/* Just aligning plain ints and arrays thereof doesn't work reliably.. */
+/*
+ * Statistics gathering.. Not actually done yet.
+ */
struct {
int nr_inodes;
int nr_free_inodes;
- int aged_count[NR_LEVELS+1]; /* # in each level */
+ int dummy[10];
} inodes_stat;
int max_inodes = NR_INODE;
-unsigned long last_inode = 0;
-void inode_init(void)
-{
- memset(hashtable, 0, sizeof(hashtable));
- memset(aged_i, 0, sizeof(aged_i));
- memset(aged_reused, 0, sizeof(aged_reused));
- memset(&inodes_stat, 0, sizeof(inodes_stat));
-}
-
-/* Intended for short locks of the above global data structures.
- * Could be replaced with spinlocks completely, since there is
- * no blocking during manipulation of the static data; however the
- * lock in invalidate_inodes() may last relatively long.
- */
-#ifdef __SMP__
-struct semaphore vfs_sem = MUTEX;
-#endif
-
-DEF_INSERT(all,struct inode,i_next,i_prev)
-DEF_REMOVE(all,struct inode,i_next,i_prev)
-
-DEF_INSERT(lru,struct inode,i_lru_next,i_lru_prev)
-DEF_REMOVE(lru,struct inode,i_lru_next,i_lru_prev)
-
-DEF_INSERT(hash,struct inode,i_hash_next,i_hash_prev)
-DEF_REMOVE(hash,struct inode,i_hash_next,i_hash_prev)
-
-DEF_INSERT(ibasket,struct inode,i_basket_next,i_basket_prev)
-DEF_REMOVE(ibasket,struct inode,i_basket_next,i_basket_prev)
-
-#ifdef DEBUG
-extern void printpath(struct dentry * entry);
-struct inode * xtst[15000];
-int xcnt = 0;
-
-void xcheck(char * txt, struct inode * p)
+void __mark_inode_dirty(struct inode *inode)
{
- int i;
- for(i=xcnt-1; i>=0; i--)
- if (xtst[i] == p)
- return;
- printk("Bogus inode %p in %s\n", p, txt);
-}
-#else
-#define xcheck(t,p) /*nothing*/
-#endif
-
-static inline struct inode * grow_inodes(void)
-{
- struct inode * res;
- struct inode * inode = res = (struct inode*)__get_free_page(GFP_KERNEL);
- int size = PAGE_SIZE;
- if (!inode)
- return NULL;
-
- size -= sizeof(struct inode);
- inode++;
- inodes_stat.nr_inodes++;
-#ifdef DEBUG
-xtst[xcnt++]=res;
-#endif
- while(size >= sizeof(struct inode)) {
-#ifdef DEBUG
-xtst[xcnt++]=inode;
-#endif
- inodes_stat.nr_inodes++;
- inodes_stat.nr_free_inodes++;
- insert_all(&empty_i, inode);
- inode->i_status = ST_EMPTY;
- inode++;
- size -= sizeof(struct inode);
- }
- return res;
+ spin_lock(&inode_lock);
+ list_del(&inode->i_list);
+ list_add(&inode->i_list, &inode_dirty);
+ spin_unlock(&inode_lock);
}
-static inline int hash(dev_t i_dev, unsigned long i_ino)
+static inline void unlock_inode(struct inode *inode)
{
- return ((int)i_ino ^ ((int)i_dev << 6)) & (HASH_SIZE-1);
+ clear_bit(I_LOCK, &inode->i_state);
+ wake_up(&inode->i_wait);
}
-static inline blocking void wait_io(struct inode * inode, unsigned short flags)
+static void __wait_on_inode(struct inode * inode)
{
- while(inode->i_status & flags) {
- struct wait_queue wait = {current, NULL};
- inode->i_status |= ST_WAITING;
- vfs_unlock();
- add_wait_queue(&inode->i_wait, &wait);
- sleep_on(&inode->i_wait);
- remove_wait_queue(&inode->i_wait, &wait);
- vfs_lock();
- }
-}
+ struct wait_queue wait = { current, NULL };
-static inline blocking void set_io(struct inode * inode,
- unsigned short waitflags,
- unsigned short setflags)
-{
- wait_io(inode, waitflags);
- inode->i_status |= setflags;
- vfs_unlock();
-}
-
-static inline blocking int release_io(struct inode * inode, unsigned short flags)
-{
- int res = 0;
- vfs_lock();
- inode->i_status &= ~flags;
- if (inode->i_status & ST_WAITING) {
- inode->i_status &= ~ST_WAITING;
- vfs_unlock();
- wake_up(&inode->i_wait);
- res = 1;
+ add_wait_queue(&inode->i_wait, &wait);
+repeat:
+ current->state = TASK_UNINTERRUPTIBLE;
+ if (test_bit(I_LOCK, &inode->i_state)) {
+ schedule();
+ goto repeat;
}
- return res;
+ remove_wait_queue(&inode->i_wait, &wait);
+ current->state = TASK_RUNNING;
}
-static inline blocking void _io(void (*op)(struct inode*), struct inode * inode,
- unsigned short waitflags, unsigned short setflags)
+static inline void wait_on_inode(struct inode *inode)
{
- /* Do nothing if the same op is already in progress. */
- if (op && !(inode->i_status & setflags)) {
- set_io(inode, waitflags, setflags);
- op(inode);
- if (release_io(inode, setflags)) {
- /* Somebody grabbed my inode from under me. */
-#ifdef DEBUG
- printk("_io grab!\n");
-#endif
- vfs_lock();
- }
- }
+ if (test_bit(I_LOCK, &inode->i_state))
+ __wait_on_inode(inode);
}
-blocking void _clear_inode(struct inode * inode, int external, int verbose)
+/*
+ * These are initializations that only need to be done
+ * once, because the fields are idempotent across use
+ * of the inode..
+ */
+static inline void init_once(struct inode * inode)
{
-xcheck("_clear_inode",inode);
- if (inode->i_status & ST_IBASKET) {
- struct super_block * sb = inode->i_sb;
- remove_ibasket(&sb->s_ibasket, inode);
- sb->s_ibasket_count--;
- inode->i_status &= ~ST_IBASKET;
-#if 0
-printpath(inode->i_dentry);
-printk(" put_inode\n");
-#endif
- _io(sb->s_op->put_inode, inode, ST_TO_PUT|ST_TO_WRITE, ST_TO_PUT);
- if (inode->i_status & ST_EMPTY)
- return;
- }
- if (inode->i_status & ST_HASHED)
- remove_hash(&hashtable[hash(inode->i_dev, inode->i_ino)], inode);
- if (inode->i_status & ST_AGED) {
- /* "cannot happen" when called from an fs because at least
- * the caller must use it. Can happen when called from
- * invalidate_inodes(). */
- if (verbose)
- printk("VFS: clearing aged inode\n");
- if (atomic_read(&inode->i_count))
- printk("VFS: aged inode is in use\n");
- remove_lru(&aged_i[inode->i_level], inode);
- inodes_stat.aged_count[inode->i_level]--;
- }
- if (!external && inode->i_status & ST_IO) {
- printk("VFS: clearing inode during IO operation\n");
- }
- if (!(inode->i_status & ST_EMPTY)) {
- remove_all(&all_i, inode);
- inode->i_status = ST_EMPTY;
- if (inode->i_pages) {
- vfs_unlock(); /* may block, can that be revised? */
- truncate_inode_pages(inode, 0);
- vfs_lock();
- }
- insert_all(&empty_i, inode);
- inodes_stat.nr_free_inodes++;
- } else if(external)
- printk("VFS: empty inode is unnecessarily cleared multiple "
- "times by an fs\n");
- else
- printk("VFS: clearing empty inode\n");
- inode->i_status = ST_EMPTY;
- /* The inode is not really cleared any more here, but only once
- * when taken from empty_i. This saves instructions and processor
- * cache pollution.
- */
+ memset(inode, 0, sizeof(*inode));
+ init_waitqueue(&inode->i_wait);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ INIT_LIST_HEAD(&inode->i_hash);
+ sema_init(&inode->i_sem, 1);
}
-void insert_inode_hash(struct inode * inode)
-{
-xcheck("insert_inode_hash",inode);
- vfs_lock();
- if (!(inode->i_status & ST_HASHED)) {
- insert_hash(&hashtable[hash(inode->i_dev, inode->i_ino)], inode);
- inode->i_status |= ST_HASHED;
- } else
- printk("VFS: trying to hash an inode again\n");
- vfs_unlock();
-}
-blocking struct inode * _get_empty_inode(void)
+/*
+ * Look out! This returns with the inode lock held if
+ * it got an inode..
+ */
+static struct inode * grow_inodes(void)
{
- struct inode * inode;
- int retry = 0;
+ struct inode * inode = (struct inode *)__get_free_page(GFP_KERNEL);
-retry:
- inode = empty_i;
if (inode) {
- remove_all(&empty_i, inode);
- inodes_stat.nr_free_inodes--;
- } else if(inodes_stat.nr_inodes < max_inodes || retry > 2) {
- inode = grow_inodes();
- }
- if (!inode) {
- int level;
- int usable = 0;
- for(level = 0; level <= NR_LEVELS; level++)
- if (aged_i[level]) {
- inode = aged_i[level]->i_lru_prev;
- /* Here is the picking strategy, tune this */
- if (aged_reused[level] < (usable++ ?
- inodes_stat.aged_count[level] :
- 2))
- break;
- aged_reused[level] = 0;
- }
- if (inode) {
- if (!(inode->i_status & ST_AGED))
- printk("VFS: inode aging inconsistency\n");
- if (atomic_read(&inode->i_count))
- printk("VFS: i_count of aged inode is not zero\n");
- if (inode->i_dirt)
- printk("VFS: Hey, somebody made my aged inode dirty\n");
- _clear_inode(inode, 0, 0);
- goto retry;
- }
- }
- if (!inode) {
- vfs_unlock();
- schedule();
- if (retry > 10)
- panic("VFS: cannot repair inode shortage");
- if (retry > 2)
- printk("VFS: no free inodes\n");
- retry++;
- vfs_lock();
- goto retry;
+ int size;
+ struct inode * tmp;
+
+ spin_lock(&inode_lock);
+ size = PAGE_SIZE - 2*sizeof(struct inode);
+ tmp = inode;
+ do {
+ tmp++;
+ init_once(tmp);
+ list_add(&tmp->i_list, &inode_unused);
+ size -= sizeof(struct inode);
+ } while (size >= 0);
+ init_once(inode);
}
-xcheck("get_empty_inode",inode);
- memset(inode, 0, sizeof(struct inode));
- atomic_set(&inode->i_count, 1);
- inode->i_nlink = 1;
- sema_init(&inode->i_sem, 1);
- inode->i_ino = ++last_inode;
- inode->i_version = ++event;
- insert_all(&all_i, inode);
return inode;
}
-static inline blocking struct inode * _get_empty_inode_hashed(dev_t i_dev,
- unsigned long i_ino)
+static inline void write_inode(struct inode *inode)
{
- struct inode ** base = &hashtable[hash(i_dev, i_ino)];
- struct inode * inode = *base;
- if (inode) do {
- if (inode->i_ino == i_ino && inode->i_dev == i_dev) {
- atomic_inc(&inode->i_count);
- printk("VFS: inode %lx is already in use\n", i_ino);
- return inode;
- }
- inode = inode->i_hash_next;
- } while(inode != *base);
- inode = _get_empty_inode();
- inode->i_dev = i_dev;
- inode->i_ino = i_ino;
- insert_hash(base, inode);
- inode->i_status |= ST_HASHED;
- return inode;
+ if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode)
+ inode->i_sb->s_op->write_inode(inode);
}
-blocking struct inode * get_empty_inode_hashed(dev_t i_dev, unsigned long i_ino)
+static inline void sync_list(struct list_head *head, struct list_head *clean)
{
- struct inode * inode;
+ struct list_head * tmp;
+
+ while ((tmp = head->prev) != head) {
+ struct inode *inode = list_entry(tmp, struct inode, i_list);
+ list_del(tmp);
+
+ /*
+ * If the inode is locked, it's already being written out.
+ * We have to wait for it, though.
+ */
+ if (test_bit(I_LOCK, &inode->i_state)) {
+ list_add(tmp, head);
+ spin_unlock(&inode_lock);
+ __wait_on_inode(inode);
+ } else {
+ list_add(tmp, clean);
+ clear_bit(I_DIRTY, &inode->i_state);
+ set_bit(I_LOCK, &inode->i_state);
+ spin_unlock(&inode_lock);
+ write_inode(inode);
+ unlock_inode(inode);
+ }
+ spin_lock(&inode_lock);
+ }
+}
- vfs_lock();
- inode = _get_empty_inode_hashed(i_dev, i_ino);
- vfs_unlock();
- return inode;
+/*
+ * "sync_inodes()" goes through the dirty list
+ * and writes them out and puts them back on
+ * the normal list.
+ */
+void sync_inodes(kdev_t dev)
+{
+ spin_lock(&inode_lock);
+ sync_list(&inode_dirty, &inode_in_use);
+ spin_unlock(&inode_lock);
}
-void _get_inode(struct inode * inode)
+/*
+ * This is called by the filesystem to tell us
+ * that the inode is no longer useful. We just
+ * terminate it with extreme predjudice.
+ */
+void clear_inode(struct inode *inode)
{
- if (inode->i_status & ST_IBASKET) {
- inode->i_status &= ~ST_IBASKET;
- remove_ibasket(&inode->i_sb->s_ibasket, inode);
- inode->i_sb->s_ibasket_count--;
- }
- if (inode->i_status & ST_AGED) {
- inode->i_status &= ~ST_AGED;
- remove_lru(&aged_i[inode->i_level], inode);
- inodes_stat.aged_count[inode->i_level]--;
- aged_reused[inode->i_level]++;
- if (S_ISDIR(inode->i_mode))
- /* make dirs less thrashable */
- inode->i_level = NR_LEVELS-1;
- else if(inode->i_nlink > 1)
- /* keep hardlinks totally separate */
- inode->i_level = NR_LEVELS;
- else if(++inode->i_reuse_count >= age_table[inode->i_level]
- && inode->i_level < NR_LEVELS-1)
- inode->i_level++;
- if (atomic_read(&inode->i_count) != 1)
- printk("VFS: inode count was not zero (%d after ++)\n", atomic_read(&inode->i_count));
- } else if(inode->i_status & ST_EMPTY)
- printk("VFS: invalid reuse of empty inode\n");
+ truncate_inode_pages(inode, 0);
+ wait_on_inode(inode);
+ if (IS_WRITABLE(inode) && inode->i_sb && inode->i_sb->dq_op)
+ inode->i_sb->dq_op->drop(inode);
+
+ inode->i_state = 0;
}
-blocking struct inode * iget(struct super_block * sb, unsigned long i_ino)
+#define CAN_UNUSE(inode) \
+ (((inode)->i_count == 0) && \
+ ((inode)->i_nrpages == 0) && \
+ (!(inode)->i_state))
+
+static void invalidate_list(struct list_head *head, kdev_t dev)
{
- struct inode ** base;
- struct inode * inode;
- dev_t i_dev;
-
- if (!sb)
- panic("VFS: iget with sb == NULL");
- i_dev = sb->s_dev;
- if (!i_dev)
- panic("VFS: sb->s_dev is NULL\n");
- base = &hashtable[hash(i_dev, i_ino)];
- vfs_lock();
- inode = *base;
- if (inode) do {
- if (inode->i_ino == i_ino && inode->i_dev == i_dev) {
- atomic_inc(&inode->i_count);
- _get_inode(inode);
-
- /* Allow concurrent writes/puts. This is in particular
- * useful e.g. when syncing large chunks.
- * I hope the i_dirty flag is everywhere set as soon
- * as _any_ modifcation is made and _before_
- * giving up control, so no harm should occur if data
- * is modified during writes, because it will be
- * rewritten again (does a short inconsistency on the
- * disk harm?)
- */
- wait_io(inode, ST_TO_READ);
- vfs_unlock();
- goto done;
- }
- inode = inode->i_hash_next;
- } while(inode != *base);
- inode = _get_empty_inode_hashed(i_dev, i_ino);
- inode->i_sb = sb;
- inode->i_flags = sb->s_flags;
- if (sb->s_op && sb->s_op->read_inode) {
- set_io(inode, 0, ST_TO_READ); /* do not wait at all */
- sb->s_op->read_inode(inode);
- if (release_io(inode, ST_TO_READ))
- goto done;
+ struct list_head *next;
+
+ next = head->next;
+ for (;;) {
+ struct list_head * tmp = next;
+ struct inode * inode;
+
+ next = next->next;
+ if (tmp == head)
+ break;
+ inode = list_entry(tmp, struct inode, i_list);
+ if (inode->i_dev != dev)
+ continue;
+ if (!CAN_UNUSE(inode))
+ continue;
+ list_del(&inode->i_hash);
+ INIT_LIST_HEAD(&inode->i_hash);
+ list_del(&inode->i_list);
+ list_add(&inode->i_list, &inode_unused);
}
- vfs_unlock();
-done:
- return inode;
}
-blocking void __iput(struct inode * inode)
+void invalidate_inodes(kdev_t dev)
{
- struct super_block * sb;
-xcheck("_iput",inode);
-
- if (atomic_read(&inode->i_count) < 0)
- printk("VFS: i_count is negative\n");
+ spin_lock(&inode_lock);
+ invalidate_list(&inode_in_use, dev);
+ invalidate_list(&inode_dirty, dev);
+ spin_unlock(&inode_lock);
+}
- if (atomic_read(&inode->i_count) || (inode->i_status & ST_FREEING))
- return;
+/*
+ * This is called with the inode lock held. It just looks at the last
+ * inode on the in-use list, and if the inode is trivially freeable
+ * we just move it to the unused list.
+ *
+ * Otherwise we just move the inode to be the first inode and expect to
+ * get back to the problem later..
+ */
+static void try_to_free_inodes(void)
+{
+ struct list_head * tmp;
+ struct list_head *head = &inode_in_use;
- inode->i_status |= ST_FREEING;
- if (inode->i_pipe) {
- free_page((unsigned long)PIPE_BASE(*inode));
- PIPE_BASE(*inode)= NULL;
- }
- if ((sb = inode->i_sb)) {
- if (sb->s_op) {
- if (inode->i_nlink <= 0 &&
- !(inode->i_status & (ST_EMPTY|ST_IBASKET))) {
- _clear_inode(inode, 0, 1);
- goto done;
- }
- if (inode->i_dirt) {
- inode->i_dirt = 0;
- _io(sb->s_op->write_inode, inode,
- ST_TO_PUT|ST_TO_WRITE, ST_TO_WRITE);
- if (atomic_read(&inode->i_count))
- goto done;
- }
- }
- if (IS_WRITABLE(inode) && sb->dq_op) {
- /* can operate in parallel to other ops ? */
- _io(sb->dq_op->drop, inode, 0, ST_TO_DROP);
- if (atomic_read(&inode->i_count))
- goto done;
+ tmp = head->prev;
+ if (tmp != head) {
+ struct inode * inode;
+
+ list_del(tmp);
+ inode = list_entry(tmp, struct inode, i_list);
+ if (CAN_UNUSE(inode)) {
+ list_del(&inode->i_hash);
+ INIT_LIST_HEAD(&inode->i_hash);
+ head = &inode_unused;
}
+ list_add(tmp, head);
}
- if (inode->i_mmap)
- printk("VFS: inode has mappings\n");
- if (inode->i_status & ST_AGED) {
- printk("VFS: reaging inode\n");
-#if defined(DEBUG)
-printpath(inode->i_dentry);
-printk("\n");
-#endif
- goto done;
- }
- if (!(inode->i_status & (ST_HASHED|ST_EMPTY))) {
- _clear_inode(inode, 0, 1);
- goto done;
- }
- if (inode->i_status & ST_EMPTY) {
- printk("VFS: aging an empty inode\n");
- goto done;
+}
+
+
+static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head)
+{
+ struct list_head *tmp;
+ struct inode * inode;
+
+ tmp = head;
+ for (;;) {
+ tmp = tmp->next;
+ inode = NULL;
+ if (tmp == head)
+ break;
+ inode = list_entry(tmp, struct inode, i_hash);
+ if (inode->i_sb != sb)
+ continue;
+ if (inode->i_ino != ino)
+ continue;
+ inode->i_count++;
+ break;
}
- insert_lru(&aged_i[inode->i_level], inode);
- inodes_stat.aged_count[inode->i_level]++;
- inode->i_status |= ST_AGED;
-done:
- inode->i_status &= ~ST_FREEING;
+ return inode;
}
-blocking void _iput(struct inode * inode)
+/*
+ * This just initializes the inode fields
+ * to known values before returning the inode..
+ *
+ * i_sb, i_ino, i_count, i_state and the lists have
+ * been initialized elsewhere..
+ */
+void clean_inode(struct inode *inode)
{
- vfs_lock();
- __iput(inode);
- vfs_unlock();
+ memset(&inode->u, 0, sizeof(inode->u));
+ inode->i_sock = 0;
+ inode->i_op = NULL;
+ inode->i_nlink = 0;
+ inode->i_writecount = 0;
+ inode->i_size = 0;
+ memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
+ sema_init(&inode->i_sem, 1);
}
-blocking void sync_inodes(kdev_t dev)
+/*
+ * This gets called with I_LOCK held: it needs
+ * to read the inode and then unlock it
+ */
+static inline void read_inode(struct inode *inode, struct super_block *sb)
{
- struct inode * inode;
- vfs_lock();
- inode = all_i;
- if (inode) do {
-xcheck("sync_inodes",inode);
- if (inode->i_dirt && (inode->i_dev == dev || !dev)) {
- if (inode->i_sb && inode->i_sb->s_op &&
- !(inode->i_status & ST_FREEING)) {
- inode->i_dirt = 0;
- _io(inode->i_sb->s_op->write_inode, inode,
- ST_IO, ST_TO_WRITE);
- }
- }
- inode = inode->i_next;
- } while(inode != all_i);
- vfs_unlock();
+ sb->s_op->read_inode(inode);
+ unlock_inode(inode);
}
-blocking int _check_inodes(kdev_t dev, int complain)
+struct inode * get_empty_inode(void)
{
+ static unsigned long last_ino = 0;
struct inode * inode;
- int bad = 0;
+ struct list_head * tmp;
- vfs_lock();
-startover:
- inode = all_i;
- if (inode) do {
- struct inode * next;
-xcheck("_check_inodes",inode);
- next = inode->i_next;
- if (inode->i_dev == dev) {
- if (inode->i_dirt || atomic_read(&inode->i_count)) {
- bad++;
- } else {
- _clear_inode(inode, 0, 0);
-
- /* _clear_inode() may recursively clear other
- * inodes, probably also the next one.
- */
- if (next->i_status & ST_EMPTY)
- goto startover;
- }
- }
- inode = next;
- } while(inode != all_i);
- vfs_unlock();
- if (complain && bad)
- printk("VFS: %d inode(s) busy on removed device `%s'\n",
- bad, kdevname(dev));
- return (bad == 0);
+ spin_lock(&inode_lock);
+ try_to_free_inodes();
+ tmp = inode_unused.next;
+ if (tmp != &inode_unused) {
+ list_del(tmp);
+ inode = list_entry(tmp, struct inode, i_list);
+add_new_inode:
+ inode->i_sb = NULL;
+ inode->i_ino = ++last_ino;
+ inode->i_count = 1;
+ list_add(&inode->i_list, &inode_in_use);
+ inode->i_state = 0;
+ spin_unlock(&inode_lock);
+ clean_inode(inode);
+ return inode;
+ }
+
+ /*
+ * Warning: if this succeeded, we will now
+ * return with the inode lock.
+ */
+ spin_unlock(&inode_lock);
+ inode = grow_inodes();
+ if (inode)
+ goto add_new_inode;
+
+ return inode;
}
-/*inline*/ void invalidate_inodes(kdev_t dev)
+/*
+ * This is called with the inode lock held.. Be careful.
+ */
+static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, struct list_head *head)
{
- /* Requires two passes, because of the new dcache holding
- * directories with i_count > 1.
+ struct inode * inode;
+ struct list_head * tmp = inode_unused.next;
+
+ if (tmp != &inode_unused) {
+ list_del(tmp);
+ inode = list_entry(tmp, struct inode, i_list);
+add_new_inode:
+ list_add(&inode->i_list, &inode_in_use);
+ list_add(&inode->i_hash, head);
+ inode->i_sb = sb;
+ inode->i_dev = sb->s_dev;
+ inode->i_ino = ino;
+ inode->i_flags = sb->s_flags;
+ inode->i_count = 1;
+ inode->i_state = 1 << I_LOCK;
+ spin_unlock(&inode_lock);
+ clean_inode(inode);
+ read_inode(inode, sb);
+ return inode;
+ }
+
+ /*
+ * Uhhuh.. We need to expand. Unlock for the allocation,
+ * but note that "grow_inodes()" will return with the
+ * lock held again if the allocation succeeded.
*/
- (void)_check_inodes(dev, 0);
- (void)_check_inodes(dev, 1);
+ spin_unlock(&inode_lock);
+ inode = grow_inodes();
+ if (inode) {
+ /* We released the lock, so.. */
+ struct inode * old = find_inode(sb, ino, head);
+ if (!old)
+ goto add_new_inode;
+ list_add(&inode->i_list, &inode_unused);
+ spin_unlock(&inode_lock);
+ wait_on_inode(old);
+ return old;
+ }
+ return inode;
}
-/*inline*/ int fs_may_mount(kdev_t dev)
+static inline unsigned long hash(struct super_block *sb, unsigned long i_ino)
{
- return _check_inodes(dev, 0);
+ unsigned long tmp = i_ino | (unsigned long) sb;
+ tmp = tmp + (tmp >> HASH_BITS) + (tmp >> HASH_BITS*2);
+ return tmp & HASH_MASK;
}
-int fs_may_remount_ro(kdev_t dev)
+struct inode *iget(struct super_block *sb, unsigned long ino)
{
- (void)dev;
- return 1; /* not checked any more */
+ struct list_head * head = inode_hashtable + hash(sb,ino);
+ struct inode * inode;
+
+ spin_lock(&inode_lock);
+ inode = find_inode(sb, ino, head);
+ if (!inode) {
+ try_to_free_inodes();
+ return get_new_inode(sb, ino, head);
+ }
+ spin_unlock(&inode_lock);
+ wait_on_inode(inode);
+ return inode;
}
-int fs_may_umount(kdev_t dev, struct dentry * root)
+void insert_inode_hash(struct inode *inode)
{
- struct inode * inode;
- vfs_lock();
- inode = all_i;
- if (inode) do {
- if (inode->i_dev == dev && atomic_read(&inode->i_count))
- if (inode != root->d_inode) {
- vfs_unlock();
- return 0;
- }
- inode = inode->i_next;
- } while(inode != all_i);
- vfs_unlock();
- return 1;
+ struct list_head *head = inode_hashtable + hash(inode->i_sb, inode->i_ino);
+ list_add(&inode->i_hash, head);
}
-extern struct inode_operations pipe_inode_operations;
-
-blocking struct inode * get_pipe_inode(void)
+void iput(struct inode *inode)
{
- struct inode * inode = get_empty_inode();
+ if (inode) {
+ struct super_operations *op = NULL;
- PIPE_BASE(*inode) = (char*)__get_free_page(GFP_USER);
- if (!(PIPE_BASE(*inode))) {
- iput(inode);
- return NULL;
+ if (inode->i_sb && inode->i_sb->s_op)
+ op = inode->i_sb->s_op;
+ if (op && op->put_inode)
+ op->put_inode(inode);
+
+ spin_lock(&inode_lock);
+ if (!--inode->i_count) {
+ if (!inode->i_nlink) {
+ list_del(&inode->i_hash);
+ INIT_LIST_HEAD(&inode->i_hash);
+ if (op && op->delete_inode) {
+ void (*delete)(struct inode *) = op->delete_inode;
+ spin_unlock(&inode_lock);
+ delete(inode);
+ spin_lock(&inode_lock);
+ }
+ }
+ if (list_empty(&inode->i_hash)) {
+ list_del(&inode->i_list);
+ list_add(&inode->i_list, &inode_unused);
+ }
+ }
+ spin_unlock(&inode_lock);
}
- inode->i_blksize = PAGE_SIZE;
- inode->i_pipe = 1;
- inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
- atomic_inc(&inode->i_count);
- inode->i_uid = current->fsuid;
- inode->i_gid = current->fsgid;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- inode->i_op = &pipe_inode_operations;
- PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
-
- return inode;
}
int bmap(struct inode * inode, int block)
@@ -625,4 +459,40 @@
if (inode->i_op && inode->i_op->bmap)
return inode->i_op->bmap(inode, block);
return 0;
+}
+
+/*
+ * Initialize the hash tables
+ */
+void inode_init(void)
+{
+ int i;
+ struct list_head *head = inode_hashtable;
+
+ i = HASH_SIZE;
+ do {
+ INIT_LIST_HEAD(head);
+ head++;
+ i--;
+ } while (i);
+}
+
+/*
+ * FIXME! These need to go through the in-use inodes to
+ * check whether we can mount/umount/remount.
+ */
+int fs_may_mount(kdev_t dev)
+{
+ return 1;
+}
+
+int fs_may_umount(struct super_block *sb, struct dentry * root)
+{
+ shrink_dcache();
+ return root->d_count == 1;
+}
+
+int fs_may_remount_ro(struct super_block *sb)
+{
+ return 1;
}
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov