patch-2.1.43 linux/fs/inode.c
Next file: linux/fs/isofs/dir.c
Previous file: linux/fs/hpfs/hpfs_fs.c
Back to the patch index
Back to the overall index
- Lines: 1278
- Date:
Thu Jun 12 21:53:45 1997
- Orig file:
v2.1.42/linux/fs/inode.c
- Orig date:
Thu May 15 16:48:04 1997
diff -u --recursive --new-file v2.1.42/linux/fs/inode.c linux/fs/inode.c
@@ -1,657 +1,708 @@
/*
- * linux/fs/inode.c: Keeping track of inodes.
+ * fs/inode.c
*
- * Copyright (C) 1991, 1992 Linus Torvalds
- * Copyright (C) 1997 David S. Miller
+ * Complete reimplementation
+ * (C) 1997 Thomas Schoebel-Theuer
*/
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
+/* Everything here is intended to be MP-safe. However, other parts
+ * of the kernel are not yet MP-safe, in particular the inode->i_count++
+ * that are spread over everywhere. These should be replaced by
+ * iinc() as soon as possible. Since I have no MP machine, I could
+ * not test it.
+ */
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/dlists.h>
+#include <linux/dalloc.h>
+#include <linux/omirr.h>
+
+/* #define DEBUG */
+
+#define HASH_SIZE 1024 /* must be a power of 2 */
+#define NR_LEVELS 4
+
+#define ST_AGED 1
+#define ST_HASHED 2
+#define ST_EMPTY 4
+#define ST_TO_READ 8
+#define ST_TO_WRITE 16
+#define ST_TO_PUT 32
+#define ST_TO_DROP 64
+#define ST_IO (ST_TO_READ|ST_TO_WRITE|ST_TO_PUT|ST_TO_DROP)
+#define ST_WAITING 128
+#define ST_FREEING 256
+#define ST_IBASKET 512
+
+/* The idea is to keep empty inodes in a separate list, so no search
+ * is required as long as empty inodes exit.
+ * All reusable inodes occurring in the hash table with i_count==0
+ * are also registered in the ringlist aged_i[level], but in LRU order.
+ * Used inodes with i_count>0 are kept solely in the hashtable and in
+ * all_i, but in no other list.
+ * The level is used for multilevel aging to avoid thrashing; each
+ * time i_count decreases to 0, the inode is inserted into the next level
+ * ringlist. Cache reusage is simply by taking the _last_ element from the
+ * lowest-level ringlist that contains inodes.
+ * In contrast to the old code, there isn't any O(n) search overhead now
+ * in iget/iput (if you make HASH_SIZE large enough).
+ */
+static struct inode * hashtable[HASH_SIZE];/* linked with i_hash_{next,prev} */
+static struct inode * all_i = NULL; /* linked with i_{next,prev} */
+static struct inode * empty_i = NULL; /* linked with i_{next,prev} */
+static struct inode * aged_i[NR_LEVELS+1]; /* linked with i_lru_{next,prev} */
+static int aged_reused[NR_LEVELS+1]; /* # removals from aged_i[level] */
+static int age_table[NR_LEVELS+1] = { /* You may tune this. */
+ 1, 4, 10, 100, 1000
+}; /* after which # of uses to increase to the next level */
+
+/* This is for kernel/sysctl.c */
+
+/* Just aligning plain ints and arrays thereof doesn't work reliably.. */
+struct {
+ int nr_inodes;
+ int nr_free_inodes;
+ int aged_count[NR_LEVELS+1]; /* # in each level */
+} inodes_stat;
-int nr_inodes = 0, nr_free_inodes = 0;
int max_inodes = NR_INODE;
+unsigned long last_inode = 0;
-#define INODE_HASHSZ 1024
-
-static struct inode *inode_hash[INODE_HASHSZ];
-
-/* All the details of hashing and lookup. */
-#define hashfn(dev, i) ((HASHDEV(dev) + ((i) ^ ((i) >> 10))) & (INODE_HASHSZ - 1))
-
-__inline__ void insert_inode_hash(struct inode *inode)
-{
- struct inode **htable = &inode_hash[hashfn(inode->i_dev, inode->i_ino)];
- if((inode->i_hash_next = *htable) != NULL)
- (*htable)->i_hash_pprev = &inode->i_hash_next;
- *htable = inode;
- inode->i_hash_pprev = htable;
-}
-
-#define hash_inode(inode) insert_inode_hash(inode)
-
-static inline void unhash_inode(struct inode *inode)
-{
- if(inode->i_hash_pprev) {
- if(inode->i_hash_next)
- inode->i_hash_next->i_hash_pprev = inode->i_hash_pprev;
- *(inode->i_hash_pprev) = inode->i_hash_next;
- inode->i_hash_pprev = NULL;
- }
-}
-
-static inline struct inode *find_inode(unsigned int hashent,
- kdev_t dev, unsigned long ino)
+void inode_init(void)
{
- struct inode *inode;
-
- for(inode = inode_hash[hashent]; inode; inode = inode->i_hash_next)
- if(inode->i_dev == dev && inode->i_ino == ino)
- break;
- return inode;
+ memset(hashtable, 0, sizeof(hashtable));
+ memset(aged_i, 0, sizeof(aged_i));
+ memset(aged_reused, 0, sizeof(aged_reused));
+ memset(&inodes_stat, 0, sizeof(inodes_stat));
}
-/* Free list queue and management. */
-static struct free_inode_queue {
- struct inode *head;
- struct inode **last;
-} free_inodes = { NULL, &free_inodes.head };
-
-static inline void put_inode_head(struct inode *inode)
-{
- if((inode->i_next = free_inodes.head) != NULL)
- free_inodes.head->i_pprev = &inode->i_next;
- else
- free_inodes.last = &inode->i_next;
- free_inodes.head = inode;
- inode->i_pprev = &free_inodes.head;
- nr_free_inodes++;
-}
+/* Intended for short locks of the above global data structures.
+ * Could be replaced with spinlocks completely, since there is
+ * no blocking during manipulation of the static data; however the
+ * lock in invalidate_inodes() may last relatively long.
+ */
+#ifdef __SMP__
+struct semaphore vfs_sem = MUTEX;
+#endif
+
+DEF_INSERT(all,struct inode,i_next,i_prev)
+DEF_REMOVE(all,struct inode,i_next,i_prev)
+
+DEF_INSERT(lru,struct inode,i_lru_next,i_lru_prev)
+DEF_REMOVE(lru,struct inode,i_lru_next,i_lru_prev)
+
+DEF_INSERT(hash,struct inode,i_hash_next,i_hash_prev)
+DEF_REMOVE(hash,struct inode,i_hash_next,i_hash_prev)
+
+DEF_INSERT(ibasket,struct inode,i_basket_next,i_basket_prev)
+DEF_REMOVE(ibasket,struct inode,i_basket_next,i_basket_prev)
+
+#ifdef DEBUG
+extern void printpath(struct dentry * entry);
+struct inode * xtst[15000];
+int xcnt = 0;
-static inline void put_inode_last(struct inode *inode)
+void xcheck(char * txt, struct inode * p)
{
- inode->i_next = NULL;
- inode->i_pprev = free_inodes.last;
- *free_inodes.last = inode;
- free_inodes.last = &inode->i_next;
- nr_free_inodes++;
+ int i;
+ for(i=xcnt-1; i>=0; i--)
+ if(xtst[i] == p)
+ return;
+ printk("Bogus inode %p in %s\n", p, txt);
}
+#else
+#define xcheck(t,p) /*nothing*/
+#endif
-static inline void remove_free_inode(struct inode *inode)
+static inline struct inode * grow_inodes(void)
{
- if(inode->i_pprev) {
- if(inode->i_next)
- inode->i_next->i_pprev = inode->i_pprev;
- else
- free_inodes.last = inode->i_pprev;
- *inode->i_pprev = inode->i_next;
- inode->i_pprev = NULL;
- nr_free_inodes--;
+ struct inode * res;
+ struct inode * inode = res = (struct inode*)__get_free_page(GFP_KERNEL);
+ int size = PAGE_SIZE;
+ if(!inode)
+ return NULL;
+
+ size -= sizeof(struct inode);
+ inode++;
+ inodes_stat.nr_inodes++;
+#ifdef DEBUG
+xtst[xcnt++]=res;
+#endif
+ while(size >= sizeof(struct inode)) {
+#ifdef DEBUG
+xtst[xcnt++]=inode;
+#endif
+ inodes_stat.nr_inodes++;
+ inodes_stat.nr_free_inodes++;
+ insert_all(&empty_i, inode);
+ inode->i_status = ST_EMPTY;
+ inode++;
+ size -= sizeof(struct inode);
}
+ return res;
}
-/* This is the in-use queue, if i_count > 0 (as far as we can tell)
- * the sucker is here.
- */
-static struct inode *inuse_list = NULL;
-
-static inline void put_inuse(struct inode *inode)
+static inline int hash(dev_t i_dev, unsigned long i_ino)
{
- if((inode->i_next = inuse_list) != NULL)
- inuse_list->i_pprev = &inode->i_next;
- inuse_list = inode;
- inode->i_pprev = &inuse_list;
+ return ((int)i_ino ^ ((int)i_dev << 6)) & (HASH_SIZE-1);
}
-static inline void remove_inuse(struct inode *inode)
+static inline blocking void wait_io(struct inode * inode, unsigned short flags)
{
- if(inode->i_pprev) {
- if(inode->i_next)
- inode->i_next->i_pprev = inode->i_pprev;
- *inode->i_pprev = inode->i_next;
- inode->i_pprev = NULL;
+ while(inode->i_status & flags) {
+ struct wait_queue wait = {current, NULL};
+ inode->i_status |= ST_WAITING;
+ vfs_unlock();
+ add_wait_queue(&inode->i_wait, &wait);
+ sleep_on(&inode->i_wait);
+ remove_wait_queue(&inode->i_wait, &wait);
+ vfs_lock();
}
}
-/* Locking and unlocking inodes, plus waiting for locks to clear. */
-static void __wait_on_inode(struct inode *);
-
-static inline void wait_on_inode(struct inode *inode)
+static inline blocking void set_io(struct inode * inode,
+ unsigned short waitflags,
+ unsigned short setflags)
{
- if(inode->i_lock)
- __wait_on_inode(inode);
+ wait_io(inode, waitflags);
+ inode->i_status |= setflags;
+ vfs_unlock();
}
-static inline void lock_inode(struct inode *inode)
+static inline blocking int release_io(struct inode * inode, unsigned short flags)
{
- if(inode->i_lock)
- __wait_on_inode(inode);
- inode->i_lock = 1;
-}
-
-static inline void unlock_inode(struct inode *inode)
-{
- inode->i_lock = 0;
- wake_up(&inode->i_wait);
+ int res = 0;
+ vfs_lock();
+ inode->i_status &= ~flags;
+ if(inode->i_status & ST_WAITING) {
+ inode->i_status &= ~ST_WAITING;
+ vfs_unlock();
+ wake_up(&inode->i_wait);
+ res = 1;
+ }
+ return res;
}
-static void __wait_on_inode(struct inode * inode)
+static inline blocking void _io(void (*op)(struct inode*), struct inode * inode,
+ unsigned short waitflags, unsigned short setflags)
{
- struct wait_queue wait = { current, NULL };
-
- add_wait_queue(&inode->i_wait, &wait);
-repeat:
- current->state = TASK_UNINTERRUPTIBLE;
- if (inode->i_lock) {
- schedule();
- goto repeat;
+ /* Do nothing if the same op is already in progress. */
+ if(op && !(inode->i_status & setflags)) {
+ set_io(inode, waitflags, setflags);
+ op(inode);
+ if(release_io(inode, setflags)) {
+ /* Somebody grabbed my inode from under me. */
+#ifdef DEBUG
+ printk("_io grab!\n");
+#endif
+ vfs_lock();
+ }
}
- remove_wait_queue(&inode->i_wait, &wait);
- current->state = TASK_RUNNING;
}
-/* Clear an inode of all it's identity, this is exported to the world. */
-void clear_inode(struct inode *inode)
+blocking int _free_ibasket(struct super_block * sb)
{
- struct wait_queue *wait;
-
- /* So we don't disappear. */
- inode->i_count++;
-
- truncate_inode_pages(inode, 0);
- wait_on_inode(inode);
- if(IS_WRITABLE(inode) && inode->i_sb && inode->i_sb->dq_op)
- inode->i_sb->dq_op->drop(inode);
-
- if(--inode->i_count > 0)
- remove_inuse(inode);
- else
- remove_free_inode(inode);
- unhash_inode(inode);
- wait = inode->i_wait;
- memset(inode, 0, sizeof(*inode)); barrier();
- inode->i_wait = wait;
- put_inode_head(inode); /* Pages zapped, put at the front. */
+ if(sb->s_ibasket) {
+ struct inode * delinquish = sb->s_ibasket->i_basket_prev;
+#if 0
+printpath(delinquish->i_dentry);
+printk(" delinquish\n");
+#endif
+ _clear_inode(delinquish, 0, 1);
+ return 1;
+ }
+ return 0;
}
-/* These check the validity of a mount/umount type operation, we essentially
- * check if there are any inodes hanging around which prevent this operation
- * from occurring. We also clear out clean inodes referencing this device.
- */
-int fs_may_mount(kdev_t dev)
+static /*inline*/ void _put_ibasket(struct inode * inode)
{
- struct inode *inode;
- int pass = 0;
-
- inode = free_inodes.head;
-repeat:
- while(inode) {
- struct inode *next = inode->i_next;
- if(inode->i_dev != dev)
- goto next;
- if(inode->i_count || inode->i_dirt || inode->i_lock)
- return 0;
- clear_inode(inode);
- next:
- inode = next;
+ struct super_block * sb = inode->i_sb;
+ if(!(inode->i_status & ST_IBASKET)) {
+ inode->i_status |= ST_IBASKET;
+ insert_ibasket(&sb->s_ibasket, inode);
+ sb->s_ibasket_count++;
+ if(sb->s_ibasket_count > sb->s_ibasket_max)
+ (void)_free_ibasket(sb);
}
- if(pass == 0) {
- inode = inuse_list;
- pass = 1;
- goto repeat;
- }
- return 1; /* Tis' cool bro. */
}
-int fs_may_umount(kdev_t dev, struct inode *iroot)
-{
- struct inode *inode;
- int pass = 0;
-
- inode = free_inodes.head;
-repeat:
- for(; inode; inode = inode->i_next) {
- if(inode->i_dev != dev || !inode->i_count)
- continue;
- if(inode == iroot &&
- (inode->i_count == (inode->i_mount == inode ? 2 : 1)))
- continue;
- return 0;
- }
- if(pass == 0) {
- inode = inuse_list;
- pass = 1;
- goto repeat;
+blocking void _clear_inode(struct inode * inode, int external, int verbose)
+{
+xcheck("_clear_inode",inode);
+ if(inode->i_status & ST_IBASKET) {
+ struct super_block * sb = inode->i_sb;
+ remove_ibasket(&sb->s_ibasket, inode);
+ sb->s_ibasket_count--;
+ inode->i_status &= ~ST_IBASKET;
+#if 0
+printpath(inode->i_dentry);
+printk(" put_inode\n");
+#endif
+ _io(sb->s_op->put_inode, inode, ST_TO_PUT|ST_TO_WRITE, ST_TO_PUT);
+ if(inode->i_status & ST_EMPTY)
+ return;
}
- return 1; /* Tis' cool bro. */
+ if(inode->i_status & ST_HASHED)
+ remove_hash(&hashtable[hash(inode->i_dev, inode->i_ino)], inode);
+ if(inode->i_status & ST_AGED) {
+ /* "cannot happen" when called from an fs because at least
+ * the caller must use it. Can happen when called from
+ * invalidate_inodes(). */
+ if(verbose)
+ printk("VFS: clearing aged inode\n");
+ if(atomic_read(&inode->i_count))
+ printk("VFS: aged inode is in use\n");
+ remove_lru(&aged_i[inode->i_level], inode);
+ inodes_stat.aged_count[inode->i_level]--;
+ }
+ if(!external && inode->i_status & ST_IO) {
+ printk("VFS: clearing inode during IO operation\n");
+ }
+ if(!(inode->i_status & ST_EMPTY)) {
+ remove_all(&all_i, inode);
+ inode->i_status = ST_EMPTY;
+ while(inode->i_dentry) {
+ d_del(inode->i_dentry, D_NO_CLEAR_INODE);
+ }
+ if(inode->i_pages) {
+ vfs_unlock(); /* may block, can that be revised? */
+ truncate_inode_pages(inode, 0);
+ vfs_lock();
+ }
+ insert_all(&empty_i, inode);
+ inodes_stat.nr_free_inodes++;
+ } else if(external)
+ printk("VFS: empty inode is unnecessarily cleared multiple "
+ "times by an fs\n");
+ else
+ printk("VFS: clearing empty inode\n");
+ inode->i_status = ST_EMPTY;
+ /* The inode is not really cleared any more here, but only once
+ * when taken from empty_i. This saves instructions and processor
+ * cache pollution.
+ */
+}
+
+void insert_inode_hash(struct inode * inode)
+{
+xcheck("insert_inode_hash",inode);
+ vfs_lock();
+ if(!(inode->i_status & ST_HASHED)) {
+ insert_hash(&hashtable[hash(inode->i_dev, inode->i_ino)], inode);
+ inode->i_status |= ST_HASHED;
+ } else
+ printk("VFS: trying to hash an inode again\n");
+ vfs_unlock();
}
-/* This belongs in file_table.c, not here... */
-int fs_may_remount_ro(kdev_t dev)
+blocking struct inode * _get_empty_inode(void)
{
- struct file * file;
+ struct inode * inode;
+ int retry = 0;
- /* Check that no files are currently opened for writing. */
- for (file = inuse_filps; file; file = file->f_next) {
- if (!file->f_inode || file->f_inode->i_dev != dev)
- continue;
- if (S_ISREG(file->f_inode->i_mode) && (file->f_mode & 2))
- return 0;
- }
- return 1; /* Tis' cool bro. */
-}
-
-/* Reading/writing inodes. */
-static void write_inode(struct inode *inode)
-{
- if(inode->i_dirt) {
- wait_on_inode(inode);
- if(inode->i_dirt) {
- if(inode->i_sb &&
- inode->i_sb->s_op &&
- inode->i_sb->s_op->write_inode) {
- inode->i_lock = 1;
- inode->i_sb->s_op->write_inode(inode);
- unlock_inode(inode);
- } else {
- inode->i_dirt = 0;
+retry:
+ inode = empty_i;
+ if(inode) {
+ remove_all(&empty_i, inode);
+ inodes_stat.nr_free_inodes--;
+ } else if(inodes_stat.nr_inodes < max_inodes || retry > 2) {
+ inode = grow_inodes();
+ }
+ if(!inode) {
+ int level;
+ int usable = 0;
+ for(level = 0; level <= NR_LEVELS; level++)
+ if(aged_i[level]) {
+ inode = aged_i[level]->i_lru_prev;
+ /* Here is the picking strategy, tune this */
+ if(aged_reused[level] < (usable++ ?
+ inodes_stat.aged_count[level] :
+ 2))
+ break;
+ aged_reused[level] = 0;
}
+ if(inode) {
+ if(!(inode->i_status & ST_AGED))
+ printk("VFS: inode aging inconsistency\n");
+ if(atomic_read(&inode->i_count) + inode->i_ddir_count)
+ printk("VFS: i_count of aged inode is not zero\n");
+ if(inode->i_dirt)
+ printk("VFS: Hey, somebody made my aged inode dirty\n");
+ _clear_inode(inode, 0, 0);
+ goto retry;
}
}
+ if(!inode) {
+ vfs_unlock();
+ schedule();
+ if(retry > 10)
+ panic("VFS: cannot repair inode shortage");
+ if(retry > 2)
+ printk("VFS: no free inodes\n");
+ retry++;
+ vfs_lock();
+ goto retry;
+ }
+xcheck("get_empty_inode",inode);
+ memset(inode, 0, sizeof(struct inode));
+ atomic_set(&inode->i_count, 1);
+ inode->i_nlink = 1;
+ sema_init(&inode->i_sem, 1);
+ inode->i_ino = ++last_inode;
+ inode->i_version = ++event;
+ insert_all(&all_i, inode);
+ return inode;
}
-static inline void read_inode(struct inode *inode)
+static inline blocking struct inode * _get_empty_inode_hashed(dev_t i_dev,
+ unsigned long i_ino)
{
- if(inode->i_sb &&
- inode->i_sb->s_op &&
- inode->i_sb->s_op->read_inode) {
- lock_inode(inode);
- inode->i_sb->s_op->read_inode(inode);
- unlock_inode(inode);
- }
-}
-
-int inode_change_ok(struct inode *inode, struct iattr *attr)
-{
- if(!(attr->ia_valid & ATTR_FORCE)) {
- unsigned short fsuid = current->fsuid;
- uid_t iuid = inode->i_uid;
- int not_fsuser = !fsuser();
-
- if(((attr->ia_valid & ATTR_UID) &&
- ((fsuid != iuid) ||
- (attr->ia_uid != iuid)) && not_fsuser) ||
-
- ((attr->ia_valid & ATTR_GID) &&
- (!in_group_p(attr->ia_gid) &&
- (attr->ia_gid != inode->i_gid)) && not_fsuser) ||
-
- ((attr->ia_valid & (ATTR_ATIME_SET | ATTR_MTIME_SET)) &&
- (fsuid != iuid) && not_fsuser))
- return -EPERM;
-
- if(attr->ia_valid & ATTR_MODE) {
- gid_t grp;
- if(fsuid != iuid && not_fsuser)
- return -EPERM;
- grp = attr->ia_valid & ATTR_GID ? attr->ia_gid : inode->i_gid;
- if(not_fsuser && !in_group_p(grp))
- attr->ia_mode &= ~S_ISGID;
- }
- }
- return 0;
+ struct inode ** base = &hashtable[hash(i_dev, i_ino)];
+ struct inode * inode = *base;
+ if(inode) do {
+ if(inode->i_ino == i_ino && inode->i_dev == i_dev) {
+ atomic_inc(&inode->i_count);
+ printk("VFS: inode %lx is already in use\n", i_ino);
+ return inode;
+ }
+ inode = inode->i_hash_next;
+ } while(inode != *base);
+ inode = _get_empty_inode();
+ inode->i_dev = i_dev;
+ inode->i_ino = i_ino;
+ insert_hash(base, inode);
+ inode->i_status |= ST_HASHED;
+ return inode;
}
-void inode_setattr(struct inode *inode, struct iattr *attr)
+blocking struct inode * get_empty_inode_hashed(dev_t i_dev, unsigned long i_ino)
{
- if (attr->ia_valid & ATTR_UID)
- inode->i_uid = attr->ia_uid;
- if (attr->ia_valid & ATTR_GID)
- inode->i_gid = attr->ia_gid;
- if (attr->ia_valid & ATTR_SIZE)
- inode->i_size = attr->ia_size;
- if (attr->ia_valid & ATTR_ATIME)
- inode->i_atime = attr->ia_atime;
- if (attr->ia_valid & ATTR_MTIME)
- inode->i_mtime = attr->ia_mtime;
- if (attr->ia_valid & ATTR_CTIME)
- inode->i_ctime = attr->ia_ctime;
- if (attr->ia_valid & ATTR_MODE) {
- inode->i_mode = attr->ia_mode;
- if (!fsuser() && !in_group_p(inode->i_gid))
- inode->i_mode &= ~S_ISGID;
- }
- if (attr->ia_valid & ATTR_ATTR_FLAG)
- inode->i_attr_flags = attr->ia_attr_flags;
- inode->i_dirt = 1;
-}
-
-int notify_change(struct inode *inode, struct iattr *attr)
-{
- attr->ia_ctime = CURRENT_TIME;
- if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME)) {
- if (!(attr->ia_valid & ATTR_ATIME_SET))
- attr->ia_atime = attr->ia_ctime;
- if (!(attr->ia_valid & ATTR_MTIME_SET))
- attr->ia_mtime = attr->ia_ctime;
- }
-
- if (inode->i_sb &&
- inode->i_sb->s_op &&
- inode->i_sb->s_op->notify_change)
- return inode->i_sb->s_op->notify_change(inode, attr);
+ struct inode * inode;
- if(inode_change_ok(inode, attr) != 0)
- return -EPERM;
-
- inode_setattr(inode, attr);
- return 0;
-}
-
-int bmap(struct inode *inode, int block)
-{
- if(inode->i_op && inode->i_op->bmap)
- return inode->i_op->bmap(inode, block);
- return 0;
+ vfs_lock();
+ inode = _get_empty_inode_hashed(i_dev, i_ino);
+ vfs_unlock();
+ return inode;
}
-void invalidate_inodes(kdev_t dev)
+void _get_inode(struct inode * inode)
{
- struct inode *inode;
- int pass = 0;
-
- inode = free_inodes.head;
-repeat:
- while(inode) {
- struct inode *next = inode->i_next;
- if(inode->i_dev != dev)
- goto next;
- clear_inode(inode);
- next:
- inode = next;
- }
- if(pass == 0) {
- inode = inuse_list;
- pass = 1;
- goto repeat;
- }
-}
-
-void sync_inodes(kdev_t dev)
-{
- struct inode *inode;
- int pass = 0;
-
- inode = free_inodes.head;
-repeat:
- while(inode) {
- struct inode *next = inode->i_next;
- if(dev && inode->i_dev != dev)
- goto next;
- wait_on_inode(inode);
- write_inode(inode);
- next:
- inode = next;
- }
- if(pass == 0) {
- inode = inuse_list;
- pass = 1;
- goto repeat;
+ if(inode->i_status & ST_IBASKET) {
+ inode->i_status &= ~ST_IBASKET;
+ remove_ibasket(&inode->i_sb->s_ibasket, inode);
+ inode->i_sb->s_ibasket_count--;
+ }
+ if(inode->i_status & ST_AGED) {
+ inode->i_status &= ~ST_AGED;
+ remove_lru(&aged_i[inode->i_level], inode);
+ inodes_stat.aged_count[inode->i_level]--;
+ aged_reused[inode->i_level]++;
+ if(S_ISDIR(inode->i_mode))
+ /* make dirs less thrashable */
+ inode->i_level = NR_LEVELS-1;
+ else if(inode->i_nlink > 1)
+ /* keep hardlinks totally separate */
+ inode->i_level = NR_LEVELS;
+ else if(++inode->i_reuse_count >= age_table[inode->i_level]
+ && inode->i_level < NR_LEVELS-1)
+ inode->i_level++;
+ if(atomic_read(&inode->i_count) != 1)
+ printk("VFS: inode count was not zero\n");
+ } else if(inode->i_status & ST_EMPTY)
+ printk("VFS: invalid reuse of empty inode\n");
+}
+
+blocking struct inode * __iget(struct super_block * sb,
+ unsigned long i_ino,
+ int crossmntp)
+{
+ struct inode ** base;
+ struct inode * inode;
+ dev_t i_dev;
+
+ if(!sb)
+ panic("VFS: iget with sb == NULL");
+ i_dev = sb->s_dev;
+ if(!i_dev)
+ panic("VFS: sb->s_dev is NULL\n");
+ base = &hashtable[hash(i_dev, i_ino)];
+ vfs_lock();
+ inode = *base;
+ if(inode) do {
+ if(inode->i_ino == i_ino && inode->i_dev == i_dev) {
+ atomic_inc(&inode->i_count);
+ _get_inode(inode);
+
+ /* Allow concurrent writes/puts. This is in particular
+ * useful e.g. when syncing large chunks.
+ * I hope the i_dirty flag is everywhere set as soon
+ * as _any_ modifcation is made and _before_
+ * giving up control, so no harm should occur if data
+ * is modified during writes, because it will be
+ * rewritten again (does a short inconsistency on the
+ * disk harm?)
+ */
+ wait_io(inode, ST_TO_READ);
+ vfs_unlock();
+ goto done;
+ }
+ inode = inode->i_hash_next;
+ } while(inode != *base);
+ inode = _get_empty_inode_hashed(i_dev, i_ino);
+ inode->i_sb = sb;
+ inode->i_flags = sb->s_flags;
+ if(sb->s_op && sb->s_op->read_inode) {
+ set_io(inode, 0, ST_TO_READ); /* do not wait at all */
+ sb->s_op->read_inode(inode);
+ if(release_io(inode, ST_TO_READ))
+ goto done;
+ }
+ vfs_unlock();
+done:
+ while(crossmntp && inode->i_mount) {
+ struct inode * tmp = inode->i_mount;
+ iinc(tmp);
+ iput(inode);
+ inode = tmp;
}
+xcheck("_iget",inode);
+ return inode;
}
-static struct wait_queue *inode_wait, *update_wait;
-
-void iput(struct inode *inode)
+blocking void __iput(struct inode * inode)
{
- if(!inode)
- return;
- wait_on_inode(inode);
- if(!inode->i_count) {
- printk("VFS: Freeing free inode, tell DaveM\n");
+ struct super_block * sb;
+xcheck("_iput",inode);
+ if(atomic_read(&inode->i_count) + inode->i_ddir_count < 0)
+ printk("VFS: i_count is negative\n");
+ if((atomic_read(&inode->i_count) + inode->i_ddir_count) ||
+ (inode->i_status & ST_FREEING)) {
return;
}
- if(inode->i_pipe)
- wake_up_interruptible(&PIPE_WAIT(*inode));
-we_slept:
- if(inode->i_count > 1) {
- inode->i_count--;
- } else {
- wake_up(&inode_wait);
- if(inode->i_pipe) {
- free_page((unsigned long)PIPE_BASE(*inode));
- PIPE_BASE(*inode) = NULL;
- }
- if(inode->i_sb &&
- inode->i_sb->s_op &&
- inode->i_sb->s_op->put_inode) {
- inode->i_sb->s_op->put_inode(inode);
- if(!inode->i_nlink)
- return;
- }
- if(inode->i_dirt) {
- write_inode(inode);
- wait_on_inode(inode);
- goto we_slept;
- }
- if(IS_WRITABLE(inode) &&
- inode->i_sb &&
- inode->i_sb->dq_op) {
- inode->i_lock = 1;
- inode->i_sb->dq_op->drop(inode);
- unlock_inode(inode);
- goto we_slept;
- }
- /* There is a serious race leading to here, watch out. */
- if(--inode->i_count == 0) {
- remove_inuse(inode);
- put_inode_last(inode); /* Place at end of LRU free queue */
+ inode->i_status |= ST_FREEING;
+#ifdef CONFIG_OMIRR
+ if(inode->i_status & ST_MODIFIED) {
+ inode->i_status &= ~ST_MODIFIED;
+ omirr_printall(inode, " W %ld ", CURRENT_TIME);
+ }
+#endif
+ if(inode->i_pipe) {
+ free_page((unsigned long)PIPE_BASE(*inode));
+ PIPE_BASE(*inode)= NULL;
+ }
+ if((sb = inode->i_sb)) {
+ if(sb->s_type && (sb->s_type->fs_flags & FS_NO_DCACHE)) {
+ while(inode->i_dentry)
+ d_del(inode->i_dentry, D_NO_CLEAR_INODE);
+ if(atomic_read(&inode->i_count) + inode->i_ddir_count)
+ goto done;
+ }
+ if(sb->s_op) {
+ if(inode->i_nlink <= 0 && inode->i_dent_count &&
+ !(inode->i_status & (ST_EMPTY|ST_IBASKET)) &&
+ (sb->s_type->fs_flags & FS_IBASKET)) {
+ _put_ibasket(inode);
+ goto done;
+ }
+ if(!inode->i_dent_count ||
+ (sb->s_type->fs_flags & FS_NO_DCACHE)) {
+ _io(sb->s_op->put_inode, inode,
+ ST_TO_PUT|ST_TO_WRITE, ST_TO_PUT);
+ if(atomic_read(&inode->i_count) + inode->i_ddir_count)
+ goto done;
+ if(inode->i_nlink <= 0) {
+ if(!(inode->i_status & ST_EMPTY)) {
+ _clear_inode(inode, 0, 1);
+ }
+ goto done;
+ }
+ }
+ if(inode->i_dirt) {
+ inode->i_dirt = 0;
+ _io(sb->s_op->write_inode, inode,
+ ST_TO_PUT|ST_TO_WRITE, ST_TO_WRITE);
+ if(atomic_read(&inode->i_count) + inode->i_ddir_count)
+ goto done;
+ }
}
- }
-}
-
-static kmem_cache_t *inode_cachep;
-
-static void grow_inodes(void)
-{
- int i = 16;
-
- while(i--) {
- struct inode *inode;
-
- inode = kmem_cache_alloc(inode_cachep, SLAB_KERNEL);
- if(!inode)
- return;
- memset(inode, 0, sizeof(*inode));
- put_inode_head(inode);
- nr_inodes++;
- }
-}
-
-/* We have to be really careful, it's really easy to run yourself into
- * inefficient sequences of events. The first problem is that when you
- * steal a non-referenced inode you run the risk of zaping a considerable
- * number of page cache entries, which might get refernced once again.
- * But if you are growing the inode set to quickly, you suck up ram
- * and cause other problems.
- *
- * We approach the problem in the following way, we take two things into
- * consideration. Firstly we take a look at how much we have "committed"
- * to this inode already (i_nrpages), this accounts for the cost of getting
- * those pages back if someone should reference that inode soon. We also
- * attempt to factor in i_blocks, which says "how much of a problem could
- * this potentially be". It still needs some tuning though. -DaveM
- */
-#define BLOCK_FACTOR_SHIFT 5 /* It is not factored in as much. */
-static struct inode *find_best_candidate_weighted(struct inode *inode)
-{
- struct inode *best = NULL;
+ if(IS_WRITABLE(inode) && sb->dq_op) {
+ /* can operate in parallel to other ops ? */
+ _io(sb->dq_op->drop, inode, 0, ST_TO_DROP);
+ if(atomic_read(&inode->i_count) + inode->i_ddir_count)
+ goto done;
+ }
+ }
+ if(inode->i_mmap)
+ printk("VFS: inode has mappings\n");
+ if(inode->i_status & ST_AGED) {
+ printk("VFS: reaging inode\n");
+#if defined(DEBUG)
+printpath(inode->i_dentry);
+printk("\n");
+#endif
+ goto done;
+ }
+ if(!(inode->i_status & (ST_HASHED|ST_EMPTY))) {
+ _clear_inode(inode, 0, 1);
+ goto done;
+ }
+ if(inode->i_status & ST_EMPTY) {
+ printk("VFS: aging an empty inode\n");
+ goto done;
+ }
+ insert_lru(&aged_i[inode->i_level], inode);
+ inodes_stat.aged_count[inode->i_level]++;
+ inode->i_status |= ST_AGED;
+done:
+ inode->i_status &= ~ST_FREEING;
+}
+
+blocking void _iput(struct inode * inode)
+{
+ vfs_lock();
+ __iput(inode);
+ vfs_unlock();
+}
+
+blocking void sync_inodes(kdev_t dev)
+{
+ struct inode * inode;
+ vfs_lock();
+ inode = all_i;
+ if(inode) do {
+xcheck("sync_inodes",inode);
+ if(inode->i_dirt && (inode->i_dev == dev || !dev)) {
+ if(inode->i_sb && inode->i_sb->s_op &&
+ !(inode->i_status & ST_FREEING)) {
+ inode->i_dirt = 0;
+ _io(inode->i_sb->s_op->write_inode, inode,
+ ST_IO, ST_TO_WRITE);
+ }
+ }
+ inode = inode->i_next;
+ } while(inode != all_i);
+ vfs_unlock();
+}
+
+blocking int _check_inodes(kdev_t dev, int complain)
+{
+ struct inode * inode;
+ int bad = 0;
+
+ vfs_lock();
+startover:
+ inode = all_i;
+ if(inode) do {
+ struct inode * next;
+xcheck("_check_inodes",inode);
+ next = inode->i_next;
+ if(inode->i_dev == dev) {
+ if(inode->i_dirt || atomic_read(&inode->i_count)) {
+ bad++;
+ } else {
+ _clear_inode(inode, 0, 0);
- if(inode) {
- unsigned long bestscore = 1000;
- int limit = nr_free_inodes >> 2;
- do {
- if(!(inode->i_lock | inode->i_dirt)) {
- int myscore = inode->i_nrpages;
-
- myscore += (inode->i_blocks >> BLOCK_FACTOR_SHIFT);
- if(myscore < bestscore) {
- bestscore = myscore;
- best = inode;
- }
+ /* _clear_inode() may recursively clear other
+ * inodes, probably also the next one.
+ */
+ if(next->i_status & ST_EMPTY)
+ goto startover;
}
- inode = inode->i_next;
- } while(inode && --limit);
- }
- return best;
+ }
+ inode = next;
+ } while(inode != all_i);
+ vfs_unlock();
+ if(complain && bad)
+ printk("VFS: %d inode(s) busy on removed device `%s'\n",
+ bad, kdevname(dev));
+ return (bad == 0);
}
-static inline struct inode *find_best_free(struct inode *inode)
+/*inline*/ void invalidate_inodes(kdev_t dev)
{
- if(inode) {
- int limit = nr_free_inodes >> 5;
- do {
- if(!inode->i_nrpages)
- return inode;
- inode = inode->i_next;
- } while(inode && --limit);
- }
- return NULL;
+ /* Requires two passes, because of the new dcache holding
+ * directories with i_count > 1.
+ */
+ (void)_check_inodes(dev, 0);
+ (void)_check_inodes(dev, 1);
}
-struct inode *get_empty_inode(void)
+/*inline*/ int fs_may_mount(kdev_t dev)
{
- static int ino = 0;
- struct inode *inode;
-
-repeat:
- inode = find_best_free(free_inodes.head);
- if(!inode)
- goto pressure;
-got_it:
- inode->i_count++;
- truncate_inode_pages(inode, 0);
- wait_on_inode(inode);
- if(IS_WRITABLE(inode) && inode->i_sb && inode->i_sb->dq_op)
- inode->i_sb->dq_op->drop(inode);
- unhash_inode(inode);
- remove_free_inode(inode);
-
- memset(inode, 0, sizeof(*inode));
- inode->i_count = 1;
- inode->i_nlink = 1;
- inode->i_version = ++event;
- sema_init(&inode->i_sem, 1);
- inode->i_ino = ++ino;
- inode->i_dev = 0;
- put_inuse(inode);
- return inode;
-pressure:
- if(nr_inodes < max_inodes) {
- grow_inodes();
- goto repeat;
- }
- inode = find_best_candidate_weighted(free_inodes.head);
- if(!inode) {
- printk("VFS: No free inodes, contact DaveM\n");
- sleep_on(&inode_wait);
- goto repeat;
- }
- if(inode->i_lock) {
- wait_on_inode(inode);
- goto repeat;
- } else if(inode->i_dirt) {
- write_inode(inode);
- goto repeat;
- }
- goto got_it;
+ return _check_inodes(dev, 0);
}
-struct inode *get_pipe_inode(void)
+int fs_may_remount_ro(kdev_t dev)
{
- extern struct inode_operations pipe_inode_operations;
- struct inode *inode = get_empty_inode();
-
- if(inode) {
- unsigned long page = __get_free_page(GFP_USER);
- if(!page) {
- iput(inode);
- inode = NULL;
- } else {
- PIPE_BASE(*inode) = (char *) page;
- inode->i_op = &pipe_inode_operations;
- inode->i_count = 2;
- PIPE_WAIT(*inode) = NULL;
- PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
- PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0;
- PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
- PIPE_LOCK(*inode) = 0;
- inode->i_pipe = 1;
- inode->i_mode |= S_IFIFO | S_IRUSR | S_IWUSR;
- inode->i_uid = current->fsuid;
- inode->i_gid = current->fsgid;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- inode->i_blksize = PAGE_SIZE;
- }
- }
- return inode;
+ (void)dev;
+ return 1; /* not checked any more */
}
-static int inode_updating[INODE_HASHSZ];
-
-struct inode *__iget(struct super_block *sb, int nr, int crossmntp)
+int fs_may_umount(kdev_t dev, struct inode * mount_root)
{
- unsigned int hashent = hashfn(sb->s_dev, nr);
- struct inode *inode, *empty = NULL;
-
-we_slept:
- if((inode = find_inode(hashent, sb->s_dev, nr)) == NULL) {
- if(empty == NULL) {
- inode_updating[hashent]++;
- empty = get_empty_inode();
- if(!--inode_updating[hashent])
- wake_up(&update_wait);
- goto we_slept;
- }
- inode = empty;
- inode->i_sb = sb;
- inode->i_dev = sb->s_dev;
- inode->i_ino = nr;
- inode->i_flags = sb->s_flags;
- hash_inode(inode);
- read_inode(inode);
- } else {
- if(!inode->i_count++) {
- remove_free_inode(inode);
- put_inuse(inode);
- }
- wait_on_inode(inode);
- if(crossmntp && inode->i_mount) {
- struct inode *mp = inode->i_mount;
- mp->i_count++;
- iput(inode);
- wait_on_inode(inode = mp);
- }
- if(empty)
- iput(empty);
+ struct inode * inode;
+ vfs_lock();
+ inode = all_i;
+ if(inode) do {
+xcheck("fs_may_umount",inode);
+ if(inode->i_dev == dev && atomic_read(&inode->i_count))
+ if(inode != mount_root || atomic_read(&inode->i_count) >
+ (inode->i_mount == inode ? 2 : 1)) {
+ vfs_unlock();
+ return 0;
+ }
+ inode = inode->i_next;
+ } while(inode != all_i);
+ vfs_unlock();
+ return 1;
+}
+
+extern struct inode_operations pipe_inode_operations;
+
+blocking struct inode * get_pipe_inode(void)
+{
+ struct inode * inode = get_empty_inode();
+
+ PIPE_BASE(*inode) = (char*)__get_free_page(GFP_USER);
+ if(!(PIPE_BASE(*inode))) {
+ iput(inode);
+ return NULL;
+ }
+ inode->i_blksize = PAGE_SIZE;
+ inode->i_pipe = 1;
+ inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
+ atomic_inc(&inode->i_count);
+ inode->i_uid = current->fsuid;
+ inode->i_gid = current->fsgid;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_op = &pipe_inode_operations;
+ PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
+
+ /* I hope this does not introduce security problems.
+ * Please check and give me response.
+ */
+ {
+ char dummyname[32];
+ struct qstr dummy = { dummyname, 0 };
+ struct dentry * new;
+ sprintf(dummyname, ".anonymous-pipe-%06lud", inode->i_ino);
+ dummy.len = strlen(dummyname);
+ vfs_lock();
+ new = d_alloc(the_root, dummy.len, 0);
+ if(new)
+ d_add(new, inode, &dummy, D_BASKET);
+ vfs_unlock();
}
- while(inode_updating[hashent])
- sleep_on(&update_wait);
return inode;
}
-void inode_init(void)
+int bmap(struct inode * inode, int block)
{
- int i;
-
- inode_cachep = kmem_cache_create("inode", sizeof(struct inode),
- 0,
- SLAB_HWCACHE_ALIGN, NULL, NULL);
- if(!inode_cachep)
- panic("Cannot create inode SLAB cache\n");
-
- for(i = 0; i < INODE_HASHSZ; i++)
- inode_hash[i] = NULL;
+ if (inode->i_op && inode->i_op->bmap)
+ return inode->i_op->bmap(inode, block);
+ return 0;
}
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov