patch-2.4.10 linux/fs/block_dev.c
Next file: linux/fs/buffer.c
Previous file: linux/fs/binfmt_elf.c
Back to the patch index
Back to the overall index
- Lines: 988
- Date:
Sun Sep 23 09:44:37 2001
- Orig file:
v2.4.9/linux/fs/block_dev.c
- Orig date:
Sun Aug 12 13:28:00 2001
diff -u --recursive --new-file v2.4.9/linux/fs/block_dev.c linux/fs/block_dev.c
@@ -2,6 +2,7 @@
* linux/fs/block_dev.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
*/
#include <linux/config.h>
@@ -14,311 +15,298 @@
#include <linux/major.h>
#include <linux/devfs_fs_kernel.h>
#include <linux/smp_lock.h>
+#include <linux/iobuf.h>
+#include <linux/highmem.h>
+#include <linux/blkdev.h>
+#include <linux/module.h>
#include <asm/uaccess.h>
-extern int *blk_size[];
-extern int *blksize_size[];
+static inline int blkdev_get_block(struct inode * inode, long iblock, struct buffer_head * bh_result)
+{
+ int err;
+
+ err = -EIO;
+ if (iblock >= buffered_blk_size(inode->i_rdev) >> (BUFFERED_BLOCKSIZE_BITS - BLOCK_SIZE_BITS))
+ goto out;
-#define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
-#define NBUF 64
+ bh_result->b_blocknr = iblock;
+ bh_result->b_state |= 1UL << BH_Mapped;
+ err = 0;
-ssize_t block_write(struct file * filp, const char * buf,
- size_t count, loff_t *ppos)
+ out:
+ return err;
+}
+
+static int blkdev_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize)
{
- struct inode * inode = filp->f_dentry->d_inode;
- ssize_t blocksize, blocksize_bits, i, buffercount, write_error;
- ssize_t block, blocks;
- loff_t offset;
- ssize_t chars;
- ssize_t written, retval;
- struct buffer_head * bhlist[NBUF];
- size_t size;
- kdev_t dev = inode->i_rdev;
- struct buffer_head * bh, *bufferlist[NBUF];
- register char * p;
+ int i, nr_blocks, retval, dev = inode->i_rdev;
+ unsigned long * blocks = iobuf->blocks;
- if (is_read_only(dev))
- return -EPERM;
+ if (blocksize != BUFFERED_BLOCKSIZE)
+ BUG();
- retval = written = write_error = buffercount = 0;
- blocksize = BLOCK_SIZE;
- if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)])
- blocksize = blksize_size[MAJOR(dev)][MINOR(dev)];
-
- i = blocksize;
- blocksize_bits = 0;
- while(i != 1) {
- blocksize_bits++;
- i >>= 1;
- }
-
- block = *ppos >> blocksize_bits;
- offset = *ppos & (blocksize-1);
-
- if (blk_size[MAJOR(dev)])
- size = ((loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS) >> blocksize_bits;
- else
- size = INT_MAX;
- while (count>0) {
- if (block >= size) {
- retval = -ENOSPC;
- goto cleanup;
- }
- chars = blocksize - offset;
- if (chars > count)
- chars=count;
-
-#if 0
- /* get the buffer head */
- {
- struct buffer_head * (*fn)(kdev_t, int, int) = getblk;
- if (chars != blocksize)
- fn = bread;
- bh = fn(dev, block, blocksize);
- if (!bh) {
- retval = -EIO;
- goto cleanup;
- }
- if (!buffer_uptodate(bh))
- wait_on_buffer(bh);
+ nr_blocks = iobuf->length >> BUFFERED_BLOCKSIZE_BITS;
+ /* build the blocklist */
+ for (i = 0; i < nr_blocks; i++, blocknr++) {
+ struct buffer_head bh;
+
+ retval = blkdev_get_block(inode, blocknr, &bh);
+ if (retval)
+ goto out;
+
+ blocks[i] = bh.b_blocknr;
+ }
+
+ retval = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, blocksize);
+
+ out:
+ return retval;
+}
+
+static int blkdev_writepage(struct page * page)
+{
+ int err, i;
+ unsigned long block;
+ struct buffer_head *bh, *head;
+ struct inode *inode = page->mapping->host;
+
+ if (!PageLocked(page))
+ BUG();
+
+ if (!page->buffers)
+ create_empty_buffers(page, inode->i_rdev, BUFFERED_BLOCKSIZE);
+ head = page->buffers;
+
+ block = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS);
+
+ bh = head;
+ i = 0;
+
+ /* Stage 1: make sure we have all the buffers mapped! */
+ do {
+ /*
+ * If the buffer isn't up-to-date, we can't be sure
+ * that the buffer has been initialized with the proper
+ * block number information etc..
+ *
+ * Leave it to the low-level FS to make all those
+ * decisions (block #0 may actually be a valid block)
+ */
+ if (!buffer_mapped(bh)) {
+ err = blkdev_get_block(inode, block, bh);
+ if (err)
+ goto out;
}
-#else
- bh = getblk(dev, block, blocksize);
- if (!bh) {
- retval = -EIO;
- goto cleanup;
- }
-
- if (!buffer_uptodate(bh))
- {
- if (chars == blocksize)
- wait_on_buffer(bh);
- else
- {
- bhlist[0] = bh;
- if (!filp->f_reada || !read_ahead[MAJOR(dev)]) {
- /* We do this to force the read of a single buffer */
- blocks = 1;
- } else {
- /* Read-ahead before write */
- blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9) / 2;
- if (block + blocks > size) blocks = size - block;
- if (blocks > NBUF) blocks=NBUF;
- if (!blocks) blocks = 1;
- for(i=1; i<blocks; i++)
- {
- bhlist[i] = getblk (dev, block+i, blocksize);
- if (!bhlist[i])
- {
- while(i >= 0) brelse(bhlist[i--]);
- retval = -EIO;
- goto cleanup;
- }
- }
- }
- ll_rw_block(READ, blocks, bhlist);
- for(i=1; i<blocks; i++) brelse(bhlist[i]);
- wait_on_buffer(bh);
- if (!buffer_uptodate(bh)) {
- brelse(bh);
- retval = -EIO;
- goto cleanup;
- }
- };
- };
-#endif
+ bh = bh->b_this_page;
block++;
- p = offset + bh->b_data;
- offset = 0;
- *ppos += chars;
- written += chars;
- count -= chars;
- copy_from_user(p,buf,chars);
- p += chars;
- buf += chars;
- mark_buffer_uptodate(bh, 1);
- mark_buffer_dirty(bh);
- if (filp->f_flags & O_SYNC)
- bufferlist[buffercount++] = bh;
- else
- brelse(bh);
- if (buffercount == NBUF){
- ll_rw_block(WRITE, buffercount, bufferlist);
- for(i=0; i<buffercount; i++){
- wait_on_buffer(bufferlist[i]);
- if (!buffer_uptodate(bufferlist[i]))
- write_error=1;
- brelse(bufferlist[i]);
+ } while (bh != head);
+
+ /* Stage 2: lock the buffers, mark them clean */
+ do {
+ lock_buffer(bh);
+ set_buffer_async_io(bh);
+ set_bit(BH_Uptodate, &bh->b_state);
+ clear_bit(BH_Dirty, &bh->b_state);
+ bh = bh->b_this_page;
+ } while (bh != head);
+
+ /* Stage 3: submit the IO */
+ do {
+ submit_bh(WRITE, bh);
+ bh = bh->b_this_page;
+ } while (bh != head);
+
+ /* Done - end_buffer_io_async will unlock */
+ SetPageUptodate(page);
+ return 0;
+
+out:
+ ClearPageUptodate(page);
+ UnlockPage(page);
+ return err;
+}
+
+static int blkdev_readpage(struct file * file, struct page * page)
+{
+ struct inode *inode = page->mapping->host;
+ kdev_t dev = inode->i_rdev;
+ unsigned long iblock, lblock;
+ struct buffer_head *bh, *head, *arr[1 << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS)];
+ unsigned int blocks;
+ int nr, i;
+
+ if (!PageLocked(page))
+ PAGE_BUG(page);
+ if (!page->buffers)
+ create_empty_buffers(page, dev, BUFFERED_BLOCKSIZE);
+ head = page->buffers;
+
+ blocks = PAGE_CACHE_SIZE >> BUFFERED_BLOCKSIZE_BITS;
+ iblock = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS);
+ lblock = buffered_blk_size(dev) >> (BUFFERED_BLOCKSIZE_BITS - BLOCK_SIZE_BITS);
+ bh = head;
+ nr = 0;
+ i = 0;
+
+ do {
+ if (buffer_uptodate(bh))
+ continue;
+
+ if (!buffer_mapped(bh)) {
+ if (iblock <= lblock) {
+ if (blkdev_get_block(inode, iblock, bh))
+ continue;
}
- buffercount=0;
+ if (!buffer_mapped(bh)) {
+ memset(kmap(page) + i * BUFFERED_BLOCKSIZE, 0, BUFFERED_BLOCKSIZE);
+ flush_dcache_page(page);
+ kunmap(page);
+ set_bit(BH_Uptodate, &bh->b_state);
+ continue;
+ }
+ /* get_block() might have updated the buffer synchronously */
+ if (buffer_uptodate(bh))
+ continue;
}
- balance_dirty(dev);
- if (write_error)
- break;
- }
- cleanup:
- if ( buffercount ){
- ll_rw_block(WRITE, buffercount, bufferlist);
- for(i=0; i<buffercount; i++){
- wait_on_buffer(bufferlist[i]);
- if (!buffer_uptodate(bufferlist[i]))
- write_error=1;
- brelse(bufferlist[i]);
- }
- }
- if(!retval)
- filp->f_reada = 1;
- if(write_error)
- return -EIO;
- return written ? written : retval;
-}
-
-ssize_t block_read(struct file * filp, char * buf, size_t count, loff_t *ppos)
-{
- struct inode * inode = filp->f_dentry->d_inode;
- size_t block;
- loff_t offset;
- ssize_t blocksize;
- ssize_t blocksize_bits, i;
- size_t blocks, rblocks, left;
- int bhrequest, uptodate;
- struct buffer_head ** bhb, ** bhe;
- struct buffer_head * buflist[NBUF];
- struct buffer_head * bhreq[NBUF];
- unsigned int chars;
- loff_t size;
- kdev_t dev;
- ssize_t read;
- dev = inode->i_rdev;
- blocksize = BLOCK_SIZE;
- if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)])
- blocksize = blksize_size[MAJOR(dev)][MINOR(dev)];
- i = blocksize;
- blocksize_bits = 0;
- while (i != 1) {
- blocksize_bits++;
- i >>= 1;
- }
-
- offset = *ppos;
- if (blk_size[MAJOR(dev)])
- size = (loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS;
- else
- size = (loff_t) INT_MAX << BLOCK_SIZE_BITS;
-
- if (offset > size)
- left = 0;
- /* size - offset might not fit into left, so check explicitly. */
- else if (size - offset > INT_MAX)
- left = INT_MAX;
- else
- left = size - offset;
- if (left > count)
- left = count;
- if (left <= 0)
+ arr[nr] = bh;
+ nr++;
+ } while (i++, iblock++, (bh = bh->b_this_page) != head);
+
+ if (!nr) {
+ /*
+ * all buffers are uptodate - we can set the page
+ * uptodate as well.
+ */
+ SetPageUptodate(page);
+ UnlockPage(page);
return 0;
- read = 0;
- block = offset >> blocksize_bits;
- offset &= blocksize-1;
- size >>= blocksize_bits;
- rblocks = blocks = (left + offset + blocksize - 1) >> blocksize_bits;
- bhb = bhe = buflist;
- if (filp->f_reada) {
- if (blocks < read_ahead[MAJOR(dev)] / (blocksize >> 9))
- blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9);
- if (rblocks > blocks)
- blocks = rblocks;
-
- }
- if (block + blocks > size) {
- blocks = size - block;
- if (blocks == 0)
- return 0;
- }
-
- /* We do this in a two stage process. We first try to request
- as many blocks as we can, then we wait for the first one to
- complete, and then we try to wrap up as many as are actually
- done. This routine is rather generic, in that it can be used
- in a filesystem by substituting the appropriate function in
- for getblk.
+ }
- This routine is optimized to make maximum use of the various
- buffers and caches. */
+ /* Stage two: lock the buffers */
+ for (i = 0; i < nr; i++) {
+ struct buffer_head * bh = arr[i];
+ lock_buffer(bh);
+ set_buffer_async_io(bh);
+ }
- do {
- bhrequest = 0;
- uptodate = 1;
- while (blocks) {
- --blocks;
- *bhb = getblk(dev, block++, blocksize);
- if (*bhb && !buffer_uptodate(*bhb)) {
- uptodate = 0;
- bhreq[bhrequest++] = *bhb;
- }
+ /* Stage 3: start the IO */
+ for (i = 0; i < nr; i++)
+ submit_bh(READ, arr[i]);
- if (++bhb == &buflist[NBUF])
- bhb = buflist;
+ return 0;
+}
- /* If the block we have on hand is uptodate, go ahead
- and complete processing. */
- if (uptodate)
- break;
- if (bhb == bhe)
- break;
- }
-
- /* Now request them all */
- if (bhrequest) {
- ll_rw_block(READ, bhrequest, bhreq);
- }
-
- do { /* Finish off all I/O that has actually completed */
- if (*bhe) {
- wait_on_buffer(*bhe);
- if (!buffer_uptodate(*bhe)) { /* read error? */
- brelse(*bhe);
- if (++bhe == &buflist[NBUF])
- bhe = buflist;
- left = 0;
- break;
- }
- }
- if (left < blocksize - offset)
- chars = left;
- else
- chars = blocksize - offset;
- *ppos += chars;
- left -= chars;
- read += chars;
- if (*bhe) {
- copy_to_user(buf,offset+(*bhe)->b_data,chars);
- brelse(*bhe);
- buf += chars;
- } else {
- while (chars-- > 0)
- put_user(0,buf++);
- }
- offset = 0;
- if (++bhe == &buflist[NBUF])
- bhe = buflist;
- } while (left > 0 && bhe != bhb && (!*bhe || !buffer_locked(*bhe)));
- if (bhe == bhb && !blocks)
+static int __blkdev_prepare_write(struct inode *inode, struct page *page,
+ unsigned from, unsigned to)
+{
+ kdev_t dev = inode->i_rdev;
+ unsigned block_start, block_end;
+ unsigned long block;
+ int err = 0;
+ struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
+ kmap(page);
+
+ if (!page->buffers)
+ create_empty_buffers(page, dev, BUFFERED_BLOCKSIZE);
+ head = page->buffers;
+
+ block = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS);
+
+ for(bh = head, block_start = 0; bh != head || !block_start;
+ block++, block_start=block_end, bh = bh->b_this_page) {
+ if (!bh)
+ BUG();
+ block_end = block_start + BUFFERED_BLOCKSIZE;
+ if (block_end <= from)
+ continue;
+ if (block_start >= to)
break;
- } while (left > 0);
+ if (!buffer_mapped(bh)) {
+ err = blkdev_get_block(inode, block, bh);
+ if (err)
+ goto out;
+ }
+ if (Page_Uptodate(page)) {
+ set_bit(BH_Uptodate, &bh->b_state);
+ continue;
+ }
+ if (!buffer_uptodate(bh) &&
+ (block_start < from || block_end > to)) {
+ ll_rw_block(READ, 1, &bh);
+ *wait_bh++=bh;
+ }
+ }
+ /*
+ * If we issued read requests - let them complete.
+ */
+ while(wait_bh > wait) {
+ wait_on_buffer(*--wait_bh);
+ err = -EIO;
+ if (!buffer_uptodate(*wait_bh))
+ goto out;
+ }
+ return 0;
+out:
+ return err;
+}
+
+static int blkdev_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
+{
+ struct inode *inode = page->mapping->host;
+ int err = __blkdev_prepare_write(inode, page, from, to);
+ if (err) {
+ ClearPageUptodate(page);
+ kunmap(page);
+ }
+ return err;
+}
+
+static int __blkdev_commit_write(struct inode *inode, struct page *page,
+ unsigned from, unsigned to)
+{
+ unsigned block_start, block_end;
+ int partial = 0, need_balance_dirty = 0;
+ struct buffer_head *bh, *head;
-/* Release the read-ahead blocks */
- while (bhe != bhb) {
- brelse(*bhe);
- if (++bhe == &buflist[NBUF])
- bhe = buflist;
- };
- if (!read)
- return -EIO;
- filp->f_reada = 1;
- return read;
+ for(bh = head = page->buffers, block_start = 0;
+ bh != head || !block_start;
+ block_start=block_end, bh = bh->b_this_page) {
+ block_end = block_start + BUFFERED_BLOCKSIZE;
+ if (block_end <= from || block_start >= to) {
+ if (!buffer_uptodate(bh))
+ partial = 1;
+ } else {
+ set_bit(BH_Uptodate, &bh->b_state);
+ if (!atomic_set_buffer_dirty(bh)) {
+ __mark_dirty(bh);
+ buffer_insert_inode_data_queue(bh, inode);
+ need_balance_dirty = 1;
+ }
+ }
+ }
+
+ if (need_balance_dirty)
+ balance_dirty();
+ /*
+ * is this a partial write that happened to make all buffers
+ * uptodate then we can optimize away a bogus readpage() for
+ * the next read(). Here we 'discover' wether the page went
+ * uptodate as a result of this (potentially partial) write.
+ */
+ if (!partial)
+ SetPageUptodate(page);
+ return 0;
+}
+
+static int blkdev_commit_write(struct file *file, struct page *page,
+ unsigned from, unsigned to)
+{
+ struct inode *inode = page->mapping->host;
+ __blkdev_commit_write(inode,page,from,to);
+ kunmap(page);
+ return 0;
}
/*
@@ -354,6 +342,17 @@
}
+static int __block_fsync(struct inode * inode)
+{
+ int ret;
+
+ filemap_fdatasync(inode->i_mapping);
+ ret = sync_buffers(inode->i_rdev, 1);
+ filemap_fdatawait(inode->i_mapping);
+
+ return ret;
+}
+
/*
* Filp may be NULL when we are called by an msync of a vma
* since the vma has no handle.
@@ -361,9 +360,43 @@
static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
{
- return fsync_dev(dentry->d_inode->i_rdev);
+ struct inode * inode = dentry->d_inode;
+
+ return __block_fsync(inode);
+}
+
+/*
+ * pseudo-fs
+ */
+
+static struct super_block *bd_read_super(struct super_block *sb, void *data, int silent)
+{
+ static struct super_operations sops = {};
+ struct inode *root = new_inode(sb);
+ if (!root)
+ return NULL;
+ root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
+ root->i_uid = root->i_gid = 0;
+ root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
+ sb->s_blocksize = 1024;
+ sb->s_blocksize_bits = 10;
+ sb->s_magic = 0x62646576;
+ sb->s_op = &sops;
+ sb->s_root = d_alloc(NULL, &(const struct qstr) { "bdev:", 5, 0 });
+ if (!sb->s_root) {
+ iput(root);
+ return NULL;
+ }
+ sb->s_root->d_sb = sb;
+ sb->s_root->d_parent = sb->s_root;
+ d_instantiate(sb->s_root, root);
+ return sb;
}
+static DECLARE_FSTYPE(bd_type, "bdev", bd_read_super, FS_NOMOUNT);
+
+static struct vfsmount *bd_mnt;
+
/*
* bdev cache handling - shamelessly stolen from inode.c
* We use smaller hashtable, though.
@@ -389,12 +422,13 @@
{
memset(bdev, 0, sizeof(*bdev));
sema_init(&bdev->bd_sem, 1);
+ INIT_LIST_HEAD(&bdev->bd_inodes);
}
}
void __init bdev_cache_init(void)
{
- int i;
+ int i, err;
struct list_head *head = bdev_hashtable;
i = HASH_SIZE;
@@ -410,6 +444,13 @@
NULL);
if (!bdev_cachep)
panic("Cannot create bdev_cache SLAB cache");
+ err = register_filesystem(&bd_type);
+ if (err)
+ panic("Cannot register bdev pseudo-fs");
+ bd_mnt = kern_mount(&bd_type);
+ err = PTR_ERR(bd_mnt);
+ if (IS_ERR(bd_mnt))
+ panic("Cannot create bdev pseudo-fs");
}
/*
@@ -447,34 +488,87 @@
if (bdev)
return bdev;
new_bdev = alloc_bdev();
- if (!new_bdev)
- return NULL;
- atomic_set(&new_bdev->bd_count,1);
- new_bdev->bd_dev = dev;
- new_bdev->bd_op = NULL;
- spin_lock(&bdev_lock);
- bdev = bdfind(dev, head);
- if (!bdev) {
- list_add(&new_bdev->bd_hash, head);
- spin_unlock(&bdev_lock);
- return new_bdev;
+ if (new_bdev) {
+ struct inode *inode = new_inode(bd_mnt->mnt_sb);
+ if (inode) {
+ atomic_set(&new_bdev->bd_count,1);
+ new_bdev->bd_dev = dev;
+ new_bdev->bd_op = NULL;
+ new_bdev->bd_inode = inode;
+ inode->i_rdev = to_kdev_t(dev);
+ inode->i_bdev = new_bdev;
+ inode->i_data.a_ops = &def_blk_aops;
+ spin_lock(&bdev_lock);
+ bdev = bdfind(dev, head);
+ if (!bdev) {
+ list_add(&new_bdev->bd_hash, head);
+ spin_unlock(&bdev_lock);
+ return new_bdev;
+ }
+ spin_unlock(&bdev_lock);
+ iput(new_bdev->bd_inode);
+ }
+ destroy_bdev(new_bdev);
}
- spin_unlock(&bdev_lock);
- destroy_bdev(new_bdev);
return bdev;
}
+static inline void __bd_forget(struct inode *inode)
+{
+ list_del_init(&inode->i_devices);
+ inode->i_bdev = NULL;
+ inode->i_mapping = &inode->i_data;
+}
+
void bdput(struct block_device *bdev)
{
- if (atomic_dec_and_test(&bdev->bd_count)) {
- spin_lock(&bdev_lock);
- if (atomic_read(&bdev->bd_openers))
+ if (atomic_dec_and_lock(&bdev->bd_count, &bdev_lock)) {
+ struct list_head *p;
+ if (bdev->bd_openers)
BUG();
list_del(&bdev->bd_hash);
+ while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
+ __bd_forget(list_entry(p, struct inode, i_devices));
+ }
spin_unlock(&bdev_lock);
+ iput(bdev->bd_inode);
destroy_bdev(bdev);
}
}
+
+int bd_acquire(struct inode *inode)
+{
+ struct block_device *bdev;
+ spin_lock(&bdev_lock);
+ if (inode->i_bdev) {
+ atomic_inc(&inode->i_bdev->bd_count);
+ spin_unlock(&bdev_lock);
+ return 0;
+ }
+ spin_unlock(&bdev_lock);
+ bdev = bdget(kdev_t_to_nr(inode->i_rdev));
+ if (!bdev)
+ return -ENOMEM;
+ spin_lock(&bdev_lock);
+ if (!inode->i_bdev) {
+ inode->i_bdev = bdev;
+ inode->i_mapping = bdev->bd_inode->i_mapping;
+ list_add(&inode->i_devices, &bdev->bd_inodes);
+ } else if (inode->i_bdev != bdev)
+ BUG();
+ spin_unlock(&bdev_lock);
+ return 0;
+}
+
+/* Call when you free inode */
+
+void bd_forget(struct inode *inode)
+{
+ spin_lock(&bdev_lock);
+ if (inode->i_bdev)
+ __bd_forget(inode);
+ spin_unlock(&bdev_lock);
+}
static struct {
const char *name;
@@ -595,18 +689,13 @@
int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
{
- struct inode inode_fake;
int res;
mm_segment_t old_fs = get_fs();
if (!bdev->bd_op->ioctl)
return -EINVAL;
- memset(&inode_fake, 0, sizeof(inode_fake));
- inode_fake.i_rdev = to_kdev_t(bdev->bd_dev);
- inode_fake.i_bdev = bdev;
- init_waitqueue_head(&inode_fake.i_wait);
set_fs(KERNEL_DS);
- res = bdev->bd_op->ioctl(&inode_fake, NULL, cmd, arg);
+ res = bdev->bd_op->ioctl(bdev->bd_inode, NULL, cmd, arg);
set_fs(old_fs);
return res;
}
@@ -616,6 +705,8 @@
int ret = -ENODEV;
kdev_t rdev = to_kdev_t(bdev->bd_dev); /* this should become bdev */
down(&bdev->bd_sem);
+
+ lock_kernel();
if (!bdev->bd_op)
bdev->bd_op = get_blkfops(MAJOR(rdev));
if (bdev->bd_op) {
@@ -627,33 +718,44 @@
*/
struct file fake_file = {};
struct dentry fake_dentry = {};
- struct inode *fake_inode = get_empty_inode();
ret = -ENOMEM;
- if (fake_inode) {
- fake_file.f_mode = mode;
- fake_file.f_flags = flags;
- fake_file.f_dentry = &fake_dentry;
- fake_dentry.d_inode = fake_inode;
- fake_inode->i_rdev = rdev;
- ret = 0;
- if (bdev->bd_op->open)
- ret = bdev->bd_op->open(fake_inode, &fake_file);
- if (!ret)
- atomic_inc(&bdev->bd_openers);
- else if (!atomic_read(&bdev->bd_openers))
- bdev->bd_op = NULL;
- iput(fake_inode);
- }
+ fake_file.f_mode = mode;
+ fake_file.f_flags = flags;
+ fake_file.f_dentry = &fake_dentry;
+ fake_dentry.d_inode = bdev->bd_inode;
+ ret = 0;
+ if (bdev->bd_op->open)
+ ret = bdev->bd_op->open(bdev->bd_inode, &fake_file);
+ if (!ret) {
+ bdev->bd_openers++;
+ } else if (!bdev->bd_openers)
+ bdev->bd_op = NULL;
}
+ unlock_kernel();
up(&bdev->bd_sem);
+ if (ret)
+ bdput(bdev);
return ret;
}
int blkdev_open(struct inode * inode, struct file * filp)
{
- int ret = -ENXIO;
- struct block_device *bdev = inode->i_bdev;
+ int ret;
+ struct block_device *bdev;
+
+ /*
+ * Preserve backwards compatibility and allow large file access
+ * even if userspace doesn't ask for it explicitly. Some mkfs
+ * binary needs it. We might want to drop this workaround
+ * during an unstable branch.
+ */
+ filp->f_flags |= O_LARGEFILE;
+
+ bd_acquire(inode);
+ bdev = inode->i_bdev;
down(&bdev->bd_sem);
+
+ ret = -ENXIO;
lock_kernel();
if (!bdev->bd_op)
bdev->bd_op = get_blkfops(MAJOR(inode->i_rdev));
@@ -662,12 +764,14 @@
if (bdev->bd_op->open)
ret = bdev->bd_op->open(inode,filp);
if (!ret)
- atomic_inc(&bdev->bd_openers);
- else if (!atomic_read(&bdev->bd_openers))
+ bdev->bd_openers++;
+ else if (!bdev->bd_openers)
bdev->bd_op = NULL;
}
unlock_kernel();
up(&bdev->bd_sem);
+ if (ret)
+ bdput(bdev);
return ret;
}
@@ -675,35 +779,46 @@
{
int ret = 0;
kdev_t rdev = to_kdev_t(bdev->bd_dev); /* this should become bdev */
+ struct inode *bd_inode = bdev->bd_inode;
+
down(&bdev->bd_sem);
- /* syncing will go here */
lock_kernel();
- if (kind == BDEV_FILE)
- fsync_dev(rdev);
- else if (kind == BDEV_FS)
+ if (kind == BDEV_FILE) {
+ struct super_block * sb;
+
+ __block_fsync(bd_inode);
+
+ /* Janitorianism: this shit must go away */
+ sb = get_super(bd_inode->i_rdev);
+ if (sb) {
+ if (sb->s_flags & MS_RDONLY) {
+ shrink_dcache_sb(sb);
+ invalidate_inodes(sb);
+ invalidate_buffers(bd_inode->i_rdev);
+ }
+ lock_super(sb);
+ if (sb->s_flags & MS_RDONLY)
+ update_buffers(bd_inode->i_rdev);
+ unlock_super(sb);
+ drop_super(sb);
+ }
+ } else if (kind == BDEV_FS)
fsync_no_super(rdev);
- if (atomic_dec_and_test(&bdev->bd_openers)) {
- /* invalidating buffers will go here */
+ if (!--bdev->bd_openers) {
+ truncate_inode_pages(bd_inode->i_mapping, 0);
invalidate_buffers(rdev);
}
- if (bdev->bd_op->release) {
- struct inode * fake_inode = get_empty_inode();
- ret = -ENOMEM;
- if (fake_inode) {
- fake_inode->i_rdev = rdev;
- ret = bdev->bd_op->release(fake_inode, NULL);
- iput(fake_inode);
- }
- }
- if (!atomic_read(&bdev->bd_openers))
- bdev->bd_op = NULL; /* we can't rely on driver being */
- /* kind to stay around. */
+ if (bdev->bd_op->release)
+ ret = bdev->bd_op->release(bd_inode, NULL);
+ if (!bdev->bd_openers)
+ bdev->bd_op = NULL;
unlock_kernel();
up(&bdev->bd_sem);
+ bdput(bdev);
return ret;
}
-static int blkdev_close(struct inode * inode, struct file * filp)
+int blkdev_close(struct inode * inode, struct file * filp)
{
return blkdev_put(inode->i_bdev, BDEV_FILE);
}
@@ -716,12 +831,22 @@
return -EINVAL;
}
+struct address_space_operations def_blk_aops = {
+ readpage: blkdev_readpage,
+ writepage: blkdev_writepage,
+ sync_page: block_sync_page,
+ prepare_write: blkdev_prepare_write,
+ commit_write: blkdev_commit_write,
+ direct_IO: blkdev_direct_IO,
+};
+
struct file_operations def_blk_fops = {
open: blkdev_open,
release: blkdev_close,
llseek: block_llseek,
- read: block_read,
- write: block_write,
+ read: generic_file_read,
+ write: generic_file_write,
+ mmap: generic_file_mmap,
fsync: block_fsync,
ioctl: blkdev_ioctl,
};
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)