patch-2.3.51 linux/ipc/shm.c
Next file: linux/ipc/util.c
Previous file: linux/init/main.c
Back to the patch index
Back to the overall index
- Lines: 1626
- Date:
Fri Mar 10 10:48:52 2000
- Orig file:
v2.3.50/linux/ipc/shm.c
- Orig date:
Tue Mar 7 14:32:26 2000
diff -u --recursive --new-file v2.3.50/linux/ipc/shm.c linux/ipc/shm.c
@@ -12,6 +12,16 @@
* avoid vmalloc and make shmmax, shmall, shmmni sysctl'able,
* Christoph Rohland <hans-christoph.rohland@sap.com>
* Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
+ * make it a file system, Christoph Rohland <hans-christoph.rohland@sap.com>
+ *
+ * The filesystem has the following restrictions/bugs:
+ * 1) It only can handle one directory.
+ * 2) Because the directory is represented by the SYSV shm array it
+ * can only be mounted one time.
+ * 3) This again leads to SYSV shm not working properly in a chrooted
+ * environment
+ * 4) Read and write are not implemented (should they?)
+ * 5) No special nodes are supported
*/
#include <linux/config.h>
@@ -20,6 +30,9 @@
#include <linux/swap.h>
#include <linux/smp_lock.h>
#include <linux/init.h>
+#include <linux/locks.h>
+#include <linux/file.h>
+#include <linux/mman.h>
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/proc_fs.h>
@@ -30,23 +43,61 @@
#include "util.h"
+static struct super_block *shm_read_super(struct super_block *,void *, int);
+static void shm_put_super (struct super_block *);
+static int shm_remount_fs (struct super_block *, int *, char *);
+static void shm_read_inode (struct inode *);
+static void shm_write_inode(struct inode *);
+static int shm_statfs (struct super_block *, struct statfs *);
+static int shm_create (struct inode *,struct dentry *,int);
+static struct dentry *shm_lookup (struct inode *,struct dentry *);
+static int shm_unlink (struct inode *,struct dentry *);
+static int shm_setattr (struct dentry *dent, struct iattr *attr);
+static void shm_delete (struct inode *);
+static int shm_mmap (struct file *, struct vm_area_struct *);
+static int shm_readdir (struct file *, void *, filldir_t);
+
+char shm_path[256] = "/var/shm";
+
+#define SHM_NAME_LEN NAME_MAX
+#define SHM_FMT ".IPC_%08x"
+#define SHM_FMT_LEN 13
+
struct shmid_kernel /* private to the kernel */
{
struct kern_ipc_perm shm_perm;
size_t shm_segsz;
- time_t shm_atime;
- time_t shm_dtime;
- time_t shm_ctime;
- pid_t shm_cpid;
- pid_t shm_lpid;
unsigned long shm_nattch;
unsigned long shm_npages; /* size of segment (pages) */
- pte_t **shm_dir; /* ptr to array of ptrs to frames -> SHMMAX */
- struct vm_area_struct *attaches; /* descriptors for attaches */
- int id; /* backreference to id for shm_close */
- struct semaphore sem;
+ pte_t **shm_dir; /* ptr to arr of ptrs to frames */
+ int id;
+ union permap {
+ struct shmem {
+ time_t atime;
+ time_t dtime;
+ time_t ctime;
+ pid_t cpid;
+ pid_t lpid;
+ int nlen;
+ char nm[0];
+ } shmem;
+ struct zero {
+ struct semaphore sema;
+ struct list_head list;
+ } zero;
+ } permap;
};
+#define shm_atim permap.shmem.atime
+#define shm_dtim permap.shmem.dtime
+#define shm_ctim permap.shmem.ctime
+#define shm_cprid permap.shmem.cpid
+#define shm_lprid permap.shmem.lpid
+#define shm_namelen permap.shmem.nlen
+#define shm_name permap.shmem.nm
+#define zsem permap.zero.sema
+#define zero_list permap.zero.list
+
static struct ipc_ids shm_ids;
#define shm_lock(id) ((struct shmid_kernel*)ipc_lock(&shm_ids,id))
@@ -60,9 +111,8 @@
#define shm_buildid(id, seq) \
ipc_buildid(&shm_ids, id, seq)
-static int newseg (key_t key, int shmflg, size_t size);
-static int shm_map (struct vm_area_struct *shmd);
-static void killseg (int shmid);
+static int newseg (key_t key, const char *name, int namelen, int shmflg, size_t size);
+static void killseg_core(struct shmid_kernel *shp, int doacc);
static void shm_open (struct vm_area_struct *shmd);
static void shm_close (struct vm_area_struct *shmd);
static struct page * shm_nopage(struct vm_area_struct *, unsigned long, int);
@@ -75,12 +125,57 @@
static void zmap_unuse(swp_entry_t entry, struct page *page);
static void shmzero_open(struct vm_area_struct *shmd);
static void shmzero_close(struct vm_area_struct *shmd);
+static struct page *shmzero_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share);
static int zero_id;
static struct shmid_kernel zshmid_kernel;
+static struct dentry *zdent;
+
+#define SHM_FS_MAGIC 0x02011994
+
+static struct super_block * shm_sb;
+
+static DECLARE_FSTYPE(shm_fs_type, "shm", shm_read_super, 0);
+
+static struct super_operations shm_sops = {
+ read_inode: shm_read_inode,
+ write_inode: shm_write_inode,
+ delete_inode: shm_delete,
+ put_super: shm_put_super,
+ statfs: shm_statfs,
+ remount_fs: shm_remount_fs,
+};
+
+static struct file_operations shm_root_operations = {
+ readdir: shm_readdir,
+};
+
+static struct inode_operations shm_root_inode_operations = {
+ create: shm_create,
+ lookup: shm_lookup,
+ unlink: shm_unlink,
+};
+
+static struct file_operations shm_file_operations = {
+ mmap: shm_mmap,
+};
+
+static struct inode_operations shm_inode_operations = {
+ setattr: shm_setattr,
+};
+
+static struct vm_operations_struct shm_vm_ops = {
+ open: shm_open, /* callback for a new vm-area open */
+ close: shm_close, /* callback for when the vm-area is released */
+ nopage: shm_nopage,
+ swapout:shm_swapout,
+};
size_t shm_ctlmax = SHMMAX;
-int shm_ctlall = SHMALL;
-int shm_ctlmni = SHMMNI;
+
+/* These parameters should be part of the superblock */
+static int shm_ctlall;
+static int shm_ctlmni;
+static int shm_mode;
static int shm_tot = 0; /* total number of shared memory pages */
static int shm_rss = 0; /* number of shared memory pages that are in memory */
@@ -90,7 +185,7 @@
pagecache_lock
shm_lock()/shm_lockall()
kernel lock
- shp->sem
+ inode->i_sem
sem_ids.sem
mmap_sem
@@ -104,18 +199,318 @@
/* some statistics */
static ulong swap_attempts = 0;
static ulong swap_successes = 0;
+static ulong used_segs = 0;
void __init shm_init (void)
{
- ipc_init_ids(&shm_ids, shm_ctlmni);
+ ipc_init_ids(&shm_ids, 1);
+
+ register_filesystem (&shm_fs_type);
#ifdef CONFIG_PROC_FS
create_proc_read_entry("sysvipc/shm", 0, 0, sysvipc_shm_read_proc, NULL);
#endif
- zero_id = ipc_addid(&shm_ids, &zshmid_kernel.shm_perm, shm_ctlmni);
+ zero_id = ipc_addid(&shm_ids, &zshmid_kernel.shm_perm, 1);
shm_unlock(zero_id);
+ INIT_LIST_HEAD(&zshmid_kernel.zero_list);
+ zdent = d_alloc_root(get_empty_inode());
return;
}
+static int shm_parse_options(char *options)
+{
+ int blocks = shm_ctlall;
+ int inodes = shm_ctlmni;
+ umode_t mode = shm_mode;
+ char *this_char, *value;
+
+ this_char = NULL;
+ if ( options )
+ this_char = strtok(options,",");
+ for ( ; this_char; this_char = strtok(NULL,",")) {
+ if ((value = strchr(this_char,'=')) != NULL)
+ *value++ = 0;
+ if (!strcmp(this_char,"nr_blocks")) {
+ if (!value || !*value)
+ return 1;
+ blocks = simple_strtoul(value,&value,0);
+ if (*value)
+ return 1;
+ }
+ else if (!strcmp(this_char,"nr_inodes")) {
+ if (!value || !*value)
+ return 1;
+ inodes = simple_strtoul(value,&value,0);
+ if (*value)
+ return 1;
+ }
+ else if (!strcmp(this_char,"mode")) {
+ if (!value || !*value)
+ return 1;
+ mode = simple_strtoul(value,&value,8);
+ if (*value)
+ return 1;
+ }
+ else
+ return 1;
+ }
+ shm_ctlmni = inodes;
+ shm_ctlall = blocks;
+ shm_mode = mode;
+
+ return 0;
+}
+
+static struct super_block *shm_read_super(struct super_block *s,void *data,
+ int silent)
+{
+ struct inode * root_inode;
+
+ if (shm_sb) {
+ printk ("shm fs already mounted\n");
+ return NULL;
+ }
+
+ shm_ctlall = SHMALL;
+ shm_ctlmni = SHMMNI;
+ shm_mode = S_IRWXUGO | S_ISVTX;
+ if (shm_parse_options (data)) {
+ printk ("shm fs invalid option\n");
+ goto out_unlock;
+ }
+
+ s->s_blocksize = PAGE_SIZE;
+ s->s_blocksize_bits = PAGE_SHIFT;
+ s->s_magic = SHM_FS_MAGIC;
+ s->s_op = &shm_sops;
+ root_inode = iget (s, SEQ_MULTIPLIER);
+ if (!root_inode)
+ goto out_no_root;
+ root_inode->i_op = &shm_root_inode_operations;
+ root_inode->i_sb = s;
+ root_inode->i_nlink = 2;
+ root_inode->i_mode = S_IFDIR | shm_mode;
+ s->s_root = d_alloc_root(root_inode);
+ if (!s->s_root)
+ goto out_no_root;
+ s->u.generic_sbp = (void*) shm_sb;
+ shm_sb = s;
+ return s;
+
+out_no_root:
+ printk("proc_read_super: get root inode failed\n");
+ iput(root_inode);
+out_unlock:
+ return NULL;
+}
+
+static int shm_remount_fs (struct super_block *sb, int *flags, char *data)
+{
+ if (shm_parse_options (data))
+ return -EINVAL;
+ return 0;
+}
+
+static void shm_put_super(struct super_block *sb)
+{
+ struct super_block **p = &shm_sb;
+ int i;
+ struct shmid_kernel *shp;
+
+ while (*p != sb) {
+ if (!*p) /* should never happen */
+ return;
+ p = (struct super_block **)&(*p)->u.generic_sbp;
+ }
+ *p = (struct super_block *)(*p)->u.generic_sbp;
+ down(&shm_ids.sem);
+ for(i = 0; i <= shm_ids.max_id; i++) {
+ if (i == zero_id)
+ continue;
+ if (!(shp = shm_lock (i)))
+ continue;
+ if (shp->shm_nattch)
+ printk ("shm_nattch = %ld\n", shp->shm_nattch);
+ shp = shm_rmid(i);
+ shm_unlock(i);
+ killseg_core(shp, 1);
+ }
+ dput (sb->s_root);
+ up(&shm_ids.sem);
+}
+
+static int shm_statfs(struct super_block *sb, struct statfs *buf)
+{
+ buf->f_type = 0;
+ buf->f_bsize = PAGE_SIZE;
+ buf->f_blocks = shm_ctlall;
+ buf->f_bavail = buf->f_bfree = shm_ctlall - shm_tot;
+ buf->f_files = shm_ctlmni;
+ buf->f_ffree = shm_ctlmni - used_segs;
+ buf->f_namelen = SHM_NAME_LEN;
+ return 0;
+}
+
+static void shm_write_inode(struct inode * inode)
+{
+}
+
+static void shm_read_inode(struct inode * inode)
+{
+ int id;
+ struct shmid_kernel *shp;
+
+ id = inode->i_ino;
+ inode->i_op = NULL;
+ inode->i_mode = 0;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+
+ if (id < SEQ_MULTIPLIER) {
+ if (!(shp = shm_lock (id)))
+ return;
+ inode->i_mode = shp->shm_perm.mode | S_IFREG;
+ inode->i_uid = shp->shm_perm.uid;
+ inode->i_gid = shp->shm_perm.gid;
+ inode->i_size = shp->shm_segsz;
+ shm_unlock (id);
+ inode->i_op = &shm_inode_operations;
+ inode->i_fop = &shm_file_operations;
+ return;
+ }
+ inode->i_op = &shm_root_inode_operations;
+ inode->i_fop = &shm_root_operations;
+ inode->i_sb = shm_sb;
+ inode->i_nlink = 2;
+ inode->i_mode = S_IFDIR | shm_mode;
+ inode->i_uid = inode->i_gid = 0;
+
+}
+
+static int shm_create (struct inode *dir, struct dentry *dent, int mode)
+{
+ int id, err;
+ struct inode * inode;
+
+ down(&shm_ids.sem);
+ err = id = newseg (IPC_PRIVATE, dent->d_name.name, dent->d_name.len, mode, 0);
+ if (err < 0)
+ goto out;
+
+ err = -ENOMEM;
+ inode = iget (shm_sb, id % SEQ_MULTIPLIER);
+ if (!inode)
+ goto out;
+
+ err = 0;
+ down (&inode->i_sem);
+ inode->i_mode = mode | S_IFREG;
+ inode->i_op = &shm_inode_operations;
+ d_instantiate(dent, inode);
+ up (&inode->i_sem);
+
+out:
+ up(&shm_ids.sem);
+ return err;
+}
+
+static int shm_readdir (struct file *filp, void *dirent, filldir_t filldir)
+{
+ struct inode * inode = filp->f_dentry->d_inode;
+ struct shmid_kernel *shp;
+ off_t nr;
+
+ nr = filp->f_pos;
+
+ switch(nr)
+ {
+ case 0:
+ if (filldir(dirent, ".", 1, nr, inode->i_ino) < 0)
+ return 0;
+ filp->f_pos = ++nr;
+ /* fall through */
+ case 1:
+ if (filldir(dirent, "..", 2, nr, inode->i_ino) < 0)
+ return 0;
+ filp->f_pos = ++nr;
+ /* fall through */
+ default:
+ down(&shm_ids.sem);
+ for (; nr-2 <= shm_ids.max_id; nr++ ) {
+ if (!(shp = shm_get (nr-2)))
+ continue;
+ if (shp->shm_perm.mode & SHM_DEST)
+ continue;
+ if (filldir(dirent, shp->shm_name, shp->shm_namelen, nr, nr) < 0 )
+ break;;
+ }
+ filp->f_pos = nr;
+ up(&shm_ids.sem);
+ break;
+ }
+
+ UPDATE_ATIME(inode);
+ return 0;
+}
+
+static struct dentry *shm_lookup (struct inode *dir, struct dentry *dent)
+{
+ int i, err = 0;
+ struct shmid_kernel* shp;
+ struct inode *inode = NULL;
+
+ if (dent->d_name.len > SHM_NAME_LEN)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ down(&shm_ids.sem);
+ for(i = 0; i <= shm_ids.max_id; i++) {
+ if (!(shp = shm_lock(i)))
+ continue;
+ if (!(shp->shm_perm.mode & SHM_DEST) &&
+ dent->d_name.len == shp->shm_namelen &&
+ strncmp(dent->d_name.name, shp->shm_name, shp->shm_namelen) == 0)
+ goto found;
+ shm_unlock(i);
+ }
+
+ /*
+ * prevent the reserved names as negative dentries.
+ * This also prevents object creation through the filesystem
+ */
+ if (dent->d_name.len == SHM_FMT_LEN &&
+ memcmp (SHM_FMT, dent->d_name.name, SHM_FMT_LEN - 8) == 0)
+ err = -EINVAL; /* EINVAL to give IPC_RMID the right error */
+
+ goto out;
+
+found:
+ shm_unlock(i);
+ inode = iget(dir->i_sb, i);
+
+ if (!inode)
+ err = -EACCES;
+out:
+ if (err == 0)
+ d_add (dent, inode);
+ up (&shm_ids.sem);
+ return ERR_PTR(err);
+}
+
+static int shm_unlink (struct inode *dir, struct dentry *dent)
+{
+ struct inode * inode = dent->d_inode;
+ struct shmid_kernel *shp;
+
+ down (&shm_ids.sem);
+ if (!(shp = shm_lock (inode->i_ino)))
+ BUG();
+ shp->shm_perm.mode |= SHM_DEST;
+ shp->shm_perm.key = IPC_PRIVATE; /* Do not find it any more */
+ shm_unlock (inode->i_ino);
+ up (&shm_ids.sem);
+ inode->i_nlink -= 1;
+ d_delete (dent);
+ return 0;
+}
+
#define SHM_ENTRY(shp, index) (shp)->shm_dir[(index)/PTRS_PER_PTE][(index)%PTRS_PER_PTE]
static pte_t **shm_alloc(unsigned long pages)
@@ -124,9 +519,12 @@
unsigned short last = pages % PTRS_PER_PTE;
pte_t **ret, **ptr;
+ if (pages == 0)
+ return NULL;
+
ret = kmalloc ((dir+1) * sizeof(pte_t *), GFP_KERNEL);
if (!ret)
- goto out;
+ goto nomem;
for (ptr = ret; ptr < ret+dir ; ptr++)
{
@@ -143,7 +541,6 @@
goto free;
memset (*ptr, 0, last*sizeof(pte_t));
}
-out:
return ret;
free:
@@ -152,48 +549,90 @@
free_page ((unsigned long)*ptr);
kfree (ret);
- return NULL;
+nomem:
+ return ERR_PTR(-ENOMEM);
}
-
static void shm_free(pte_t** dir, unsigned long pages)
{
pte_t **ptr = dir+pages/PTRS_PER_PTE;
+ if (!dir)
+ return;
+
/* first the last page */
if (pages%PTRS_PER_PTE)
kfree (*ptr);
/* now the whole pages */
while (--ptr >= dir)
- free_page ((unsigned long)*ptr);
+ if (*ptr)
+ free_page ((unsigned long)*ptr);
/* Now the indirect block */
kfree (dir);
}
-static int shm_revalidate(struct shmid_kernel* shp, int shmid, int pagecount, int flg)
+static int shm_setattr (struct dentry *dentry, struct iattr *attr)
{
- struct shmid_kernel* new;
- new = shm_lock(shmid);
- if(new==NULL) {
- return -EIDRM;
- }
- if(new!=shp || shm_checkid(shp, shmid) || shp->shm_npages != pagecount) {
- shm_unlock(shmid);
- return -EIDRM;
- }
- if (ipcperms(&shp->shm_perm, flg)) {
- shm_unlock(shmid);
- return -EACCES;
+ int error;
+ struct inode *inode = dentry->d_inode;
+ struct shmid_kernel *shp;
+ unsigned long new_pages, old_pages;
+ pte_t **new_dir, **old_dir;
+
+ if ((error = inode_change_ok(inode, attr)))
+ return error;
+ if (!(attr->ia_valid & ATTR_SIZE))
+ goto set_attr;
+ if (attr->ia_size > shm_ctlmax)
+ return -EFBIG;
+
+ /* We set old_pages and old_dir for easier cleanup */
+ old_pages = new_pages = (attr->ia_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ if (shm_tot + new_pages >= shm_ctlall)
+ return -ENOSPC;
+ if (IS_ERR(old_dir = new_dir = shm_alloc(new_pages)))
+ return PTR_ERR(new_dir);
+
+ if (!(shp = shm_lock(inode->i_ino)))
+ BUG();
+ if (shp->shm_segsz == attr->ia_size)
+ goto out;
+ old_dir = shp->shm_dir;
+ old_pages = shp->shm_npages;
+ if (old_dir){
+ pte_t *swap;
+ int i,j;
+ i = old_pages < new_pages ? old_pages : new_pages;
+ j = i % PTRS_PER_PTE;
+ i /= PTRS_PER_PTE;
+ if (j)
+ memcpy (new_dir[i], old_dir[i], j * sizeof (pte_t));
+ while (i--) {
+ swap = new_dir[i];
+ new_dir[i] = old_dir[i];
+ old_dir[i] = swap;
+ }
}
+ shp->shm_dir = new_dir;
+ shp->shm_npages = new_pages;
+ shp->shm_segsz = attr->ia_size;
+out:
+ shm_unlock(inode->i_ino);
+ shm_lockall();
+ shm_tot += new_pages - old_pages;
+ shm_unlockall();
+ shm_free (old_dir, old_pages);
+set_attr:
+ inode_setattr(inode, attr);
return 0;
}
-static inline struct shmid_kernel *newseg_alloc(int numpages)
+static inline struct shmid_kernel *newseg_alloc(int numpages, size_t namelen)
{
struct shmid_kernel *shp;
- shp = (struct shmid_kernel *) kmalloc (sizeof (*shp), GFP_KERNEL);
+ shp = (struct shmid_kernel *) kmalloc (sizeof (*shp) + namelen, GFP_KERNEL);
if (!shp)
return 0;
@@ -203,29 +642,29 @@
return 0;
}
shp->shm_npages = numpages;
- shp->attaches = NULL;
shp->shm_nattch = 0;
- init_MUTEX(&shp->sem);
+ shp->shm_namelen = namelen;
return(shp);
}
-static int newseg (key_t key, int shmflg, size_t size)
+static int newseg (key_t key, const char *name, int namelen,
+ int shmflg, size_t size)
{
struct shmid_kernel *shp;
int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
int id;
- if (size < SHMMIN)
- return -EINVAL;
+ if (namelen > SHM_NAME_LEN)
+ return -ENAMETOOLONG;
if (size > shm_ctlmax)
return -EINVAL;
if (shm_tot + numpages >= shm_ctlall)
return -ENOSPC;
- if (!(shp = newseg_alloc(numpages)))
+ if (!(shp = newseg_alloc(numpages, namelen ? namelen : SHM_FMT_LEN + 1)))
return -ENOMEM;
- id = ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni);
+ id = ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni+1);
if(id == -1) {
shm_free(shp->shm_dir,numpages);
kfree(shp);
@@ -234,16 +673,23 @@
shp->shm_perm.key = key;
shp->shm_perm.mode = (shmflg & S_IRWXUGO);
shp->shm_segsz = size;
- shp->shm_cpid = current->pid;
- shp->shm_lpid = 0;
- shp->shm_atime = shp->shm_dtime = 0;
- shp->shm_ctime = CURRENT_TIME;
+ shp->shm_cprid = current->pid;
+ shp->shm_lprid = 0;
+ shp->shm_atim = shp->shm_dtim = 0;
+ shp->shm_ctim = CURRENT_TIME;
shp->id = shm_buildid(id,shp->shm_perm.seq);
+ if (namelen != 0) {
+ shp->shm_namelen = namelen;
+ memcpy (shp->shm_name, name, namelen);
+ } else {
+ shp->shm_namelen = sprintf (shp->shm_name, SHM_FMT, shp->id);
+ }
shm_tot += numpages;
+ used_segs++;
shm_unlock(id);
-
- return shm_buildid(id,shp->shm_perm.seq);
+
+ return shp->id;
}
asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
@@ -251,21 +697,31 @@
struct shmid_kernel *shp;
int err, id = 0;
+ if (!shm_sb) {
+ printk ("shmget: shm filesystem not mounted\n");
+ return -EINVAL;
+ }
+
+ if (size < SHMMIN)
+ return -EINVAL;
+
down(&shm_ids.sem);
if (key == IPC_PRIVATE) {
- err = newseg(key, shmflg, size);
+ err = newseg(key, NULL, 0, shmflg, size);
} else if ((id = ipc_findkey(&shm_ids,key)) == -1) {
if (!(shmflg & IPC_CREAT))
err = -ENOENT;
else
- err = newseg(key, shmflg, size);
+ err = newseg(key, NULL, 0, shmflg, size);
} else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
err = -EEXIST;
} else {
shp = shm_lock(id);
if(shp==NULL)
BUG();
- if (ipcperms(&shp->shm_perm, shmflg))
+ if (shp->shm_segsz < size)
+ err = -EINVAL;
+ else if (ipcperms(&shp->shm_perm, shmflg))
err = -EACCES;
else
err = shm_buildid(id, shp->shm_perm.seq);
@@ -300,40 +756,26 @@
shm_rss -= rss;
shm_swp -= swp;
shm_tot -= numpages;
+ used_segs--;
shm_unlockall();
}
}
-/*
- * Only called after testing nattch and SHM_DEST.
- * Here pages, pgtable and shmid_kernel are freed.
- */
-static void killseg (int shmid)
+static void shm_delete (struct inode *ino)
{
+ int shmid = ino->i_ino;
struct shmid_kernel *shp;
down(&shm_ids.sem);
shp = shm_lock(shmid);
if(shp==NULL) {
-out_up:
- up(&shm_ids.sem);
- return;
- }
- if(shm_checkid(shp,shmid) || shp->shm_nattch > 0 ||
- !(shp->shm_perm.mode & SHM_DEST)) {
- shm_unlock(shmid);
- goto out_up;
+ BUG();
}
shp = shm_rmid(shmid);
- if(shp==NULL)
- BUG();
- if (!shp->shm_dir)
- BUG();
shm_unlock(shmid);
up(&shm_ids.sem);
killseg_core(shp, 1);
-
- return;
+ clear_inode(ino);
}
static inline unsigned long copy_shmid_to_user(void *buf, struct shmid64_ds *in, int version)
@@ -427,12 +869,29 @@
}
}
+char * shm_getname(int id)
+{
+ char *result;
+
+ result = __getname ();
+ if (IS_ERR(result))
+ return result;
+
+ sprintf (result, "%s/" SHM_FMT, shm_path, id);
+ return result;
+}
+
asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf)
{
struct shm_setbuf setbuf;
struct shmid_kernel *shp;
int err, version;
+ if (!shm_sb) {
+ printk ("shmctl: shm filesystem not mounted\n");
+ return -EINVAL;
+ }
+
if (cmd < 0 || shmid < 0)
return -EINVAL;
@@ -481,14 +940,12 @@
{
struct shmid64_ds tbuf;
int result;
+ if ((shmid % SEQ_MULTIPLIER) == zero_id)
+ return -EINVAL;
memset(&tbuf, 0, sizeof(tbuf));
shp = shm_lock(shmid);
if(shp==NULL)
return -EINVAL;
- if (shp == &zshmid_kernel) {
- shm_unlock(shmid);
- return -EINVAL;
- }
if(cmd==SHM_STAT) {
err = -EINVAL;
if (shmid > shm_ids.max_id)
@@ -505,11 +962,11 @@
goto out_unlock;
kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
tbuf.shm_segsz = shp->shm_segsz;
- tbuf.shm_atime = shp->shm_atime;
- tbuf.shm_dtime = shp->shm_dtime;
- tbuf.shm_ctime = shp->shm_ctime;
- tbuf.shm_cpid = shp->shm_cpid;
- tbuf.shm_lpid = shp->shm_lpid;
+ tbuf.shm_atime = shp->shm_atim;
+ tbuf.shm_dtime = shp->shm_dtim;
+ tbuf.shm_ctime = shp->shm_ctim;
+ tbuf.shm_cpid = shp->shm_cprid;
+ tbuf.shm_lpid = shp->shm_lprid;
tbuf.shm_nattch = shp->shm_nattch;
shm_unlock(shmid);
if(copy_shmid_to_user (buf, &tbuf, version))
@@ -523,16 +980,14 @@
/* Should the pages be faulted in here or leave it to user? */
/* need to determine interaction with current->swappable */
struct kern_ipc_perm *ipcp;
+ if ((shmid % SEQ_MULTIPLIER)== zero_id)
+ return -EINVAL;
if (!capable(CAP_IPC_LOCK))
return -EPERM;
shp = shm_lock(shmid);
if(shp==NULL)
return -EINVAL;
- if (shp == &zshmid_kernel) {
- shm_unlock(shmid);
- return -EINVAL;
- }
err=-EIDRM;
if(shm_checkid(shp,shmid))
goto out_unlock;
@@ -552,50 +1007,56 @@
return err;
}
case IPC_RMID:
- case IPC_SET:
- break;
- default:
- return -EINVAL;
+ {
+ char *name;
+ if ((shmid % SEQ_MULTIPLIER)== zero_id)
+ return -EINVAL;
+ name = shm_getname(shmid);
+ if (IS_ERR(name))
+ return PTR_ERR(name);
+ lock_kernel();
+ err = do_unlink (name);
+ unlock_kernel();
+ putname (name);
+ if (err == -ENOENT)
+ err = -EINVAL;
+ return err;
}
- if (cmd == IPC_SET) {
+ case IPC_SET:
+ {
+ if ((shmid % SEQ_MULTIPLIER)== zero_id)
+ return -EINVAL;
+
if(copy_shmid_from_user (&setbuf, buf, version))
return -EFAULT;
- }
- down(&shm_ids.sem);
- shp = shm_lock(shmid);
- err=-EINVAL;
- if(shp==NULL)
- goto out_up;
- if (shp == &zshmid_kernel)
- goto out_unlock_up;
- err=-EIDRM;
- if(shm_checkid(shp,shmid))
- goto out_unlock_up;
- err=-EPERM;
- if (current->euid != shp->shm_perm.uid &&
- current->euid != shp->shm_perm.cuid &&
- !capable(CAP_SYS_ADMIN)) {
- goto out_unlock_up;
- }
+ down(&shm_ids.sem);
+ shp = shm_lock(shmid);
+ err=-EINVAL;
+ if(shp==NULL)
+ goto out_up;
+ err=-EIDRM;
+ if(shm_checkid(shp,shmid))
+ goto out_unlock_up;
+ err=-EPERM;
+ if (current->euid != shp->shm_perm.uid &&
+ current->euid != shp->shm_perm.cuid &&
+ !capable(CAP_SYS_ADMIN)) {
+ goto out_unlock_up;
+ }
- switch (cmd) {
- case IPC_SET:
shp->shm_perm.uid = setbuf.uid;
shp->shm_perm.gid = setbuf.gid;
shp->shm_perm.mode = (shp->shm_perm.mode & ~S_IRWXUGO)
| (setbuf.mode & S_IRWXUGO);
- shp->shm_ctime = CURRENT_TIME;
+ shp->shm_ctim = CURRENT_TIME;
break;
- case IPC_RMID:
- shp->shm_perm.mode |= SHM_DEST;
- if (shp->shm_nattch <= 0) {
- shm_unlock(shmid);
- up(&shm_ids.sem);
- killseg (shmid);
- return 0;
- }
}
+
+ default:
+ return -EINVAL;
+ }
+
err = 0;
out_unlock_up:
shm_unlock(shmid);
@@ -607,65 +1068,24 @@
return err;
}
-/*
- * The per process internal structure for managing segments is
- * `struct vm_area_struct'.
- * A shmat will add to and shmdt will remove from the list.
- * shmd->vm_mm the attacher
- * shmd->vm_start virt addr of attach, multiple of SHMLBA
- * shmd->vm_end multiple of SHMLBA
- * shmd->vm_next next attach for task
- * shmd->vm_next_share next attach for segment
- * shmd->vm_pgoff offset into segment (in pages)
- * shmd->vm_private_data signature for this attach
- */
-
-static struct vm_operations_struct shm_vm_ops = {
- open: shm_open, /* open - callback for a new vm-area open */
- close: shm_close, /* close - callback for when the vm-area is released */
- nopage: shm_nopage,
- swapout: shm_swapout,
-};
-
-/* Insert shmd into the list shp->attaches */
-static inline void insert_attach (struct shmid_kernel * shp, struct vm_area_struct * shmd)
-{
- if((shmd->vm_next_share = shp->attaches) != NULL)
- shp->attaches->vm_pprev_share = &shmd->vm_next_share;
- shp->attaches = shmd;
- shmd->vm_pprev_share = &shp->attaches;
-}
+static inline void shm_inc (int id) {
+ struct shmid_kernel *shp;
-/* Remove shmd from list shp->attaches */
-static inline void remove_attach (struct shmid_kernel * shp, struct vm_area_struct * shmd)
-{
- if(shmd->vm_next_share)
- shmd->vm_next_share->vm_pprev_share = shmd->vm_pprev_share;
- *shmd->vm_pprev_share = shmd->vm_next_share;
+ if(!(shp = shm_lock(id)))
+ BUG();
+ shp->shm_atim = CURRENT_TIME;
+ shp->shm_lprid = current->pid;
+ shp->shm_nattch++;
+ shm_unlock(id);
}
-/*
- * ensure page tables exist
- * mark page table entries with shm_sgn.
- */
-static int shm_map (struct vm_area_struct *shmd)
+static int shm_mmap(struct file * file, struct vm_area_struct * vma)
{
- unsigned long tmp;
-
- /* clear old mappings */
- do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start);
-
- /* add new mapping */
- tmp = shmd->vm_end - shmd->vm_start;
- if((current->mm->total_vm << PAGE_SHIFT) + tmp
- > (unsigned long) current->rlim[RLIMIT_AS].rlim_cur)
- return -ENOMEM;
- current->mm->total_vm += tmp >> PAGE_SHIFT;
- vmlist_modify_lock(current->mm);
- insert_vm_struct(current->mm, shmd);
- merge_segments(current->mm, shmd->vm_start, shmd->vm_end);
- vmlist_modify_unlock(current->mm);
-
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL; /* we cannot do private mappings */
+ UPDATE_ATIME(file->f_dentry->d_inode);
+ vma->vm_ops = &shm_vm_ops;
+ shm_inc(file->f_dentry->d_inode->i_ino);
return 0;
}
@@ -674,137 +1094,57 @@
*/
asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr)
{
- struct shmid_kernel *shp;
- struct vm_area_struct *shmd;
- int err;
unsigned long addr;
- unsigned long len;
- short flg = shmflg & SHM_RDONLY ? S_IRUGO : S_IRUGO|S_IWUGO;
-
+ struct file * file;
+ int err;
+ int flags;
+ char *name;
- if (shmid < 0)
+ if (!shm_sb || (shmid % SEQ_MULTIPLIER) == zero_id)
return -EINVAL;
- down(¤t->mm->mmap_sem);
- err = -EINVAL;
- shp = shm_lock(shmid);
- if (!shp)
- goto out_up;
- if (shp == &zshmid_kernel)
- goto out_unlock_up;
-
- err = -EACCES;
- if (ipcperms(&shp->shm_perm, flg))
- goto out_unlock_up;
-
- err = -EIDRM;
- if (shm_checkid(shp,shmid))
- goto out_unlock_up;
-
- if (!(addr = (ulong) shmaddr)) {
- if (shmflg & SHM_REMAP)
- goto out_unlock_up;
- err = -ENOMEM;
- addr = 0;
- again:
- if (!(addr = get_unmapped_area(addr, (unsigned long)shp->shm_segsz)))
- goto out_unlock_up;
- if(addr & (SHMLBA - 1)) {
- addr = (addr + (SHMLBA - 1)) & ~(SHMLBA - 1);
- goto again;
+ if ((addr = (ulong)shmaddr))
+ {
+ if(addr & (SHMLBA-1)) {
+ if (shmflg & SHM_RND)
+ addr &= ~(SHMLBA-1); /* round down */
+ else
+ return -EINVAL;
}
- } else if (addr & (SHMLBA-1)) {
- err=-EINVAL;
- if (shmflg & SHM_RND)
- addr &= ~(SHMLBA-1); /* round down */
- else
- goto out_unlock_up;
- }
- /*
- * Check if addr exceeds TASK_SIZE (from do_mmap)
- */
- len = PAGE_SIZE*shp->shm_npages;
- err = -EINVAL;
- if (addr >= TASK_SIZE || len > TASK_SIZE || addr > TASK_SIZE - len)
- goto out_unlock_up;
- /*
- * If shm segment goes below stack, make sure there is some
- * space left for the stack to grow (presently 4 pages).
- */
- if (addr < current->mm->start_stack &&
- addr > current->mm->start_stack - PAGE_SIZE*(shp->shm_npages + 4))
- goto out_unlock_up;
- if (!(shmflg & SHM_REMAP) && find_vma_intersection(current->mm, addr, addr + (unsigned long)shp->shm_segsz))
- goto out_unlock_up;
-
- shm_unlock(shmid);
- err = -ENOMEM;
- shmd = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- err = shm_revalidate(shp, shmid, len/PAGE_SIZE,flg);
- if(err) {
- kmem_cache_free(vm_area_cachep, shmd);
- goto out_up;
- }
-
- shmd->vm_private_data = shp;
- shmd->vm_start = addr;
- shmd->vm_end = addr + shp->shm_npages * PAGE_SIZE;
- shmd->vm_mm = current->mm;
- shmd->vm_page_prot = (shmflg & SHM_RDONLY) ? PAGE_READONLY : PAGE_SHARED;
- shmd->vm_flags = VM_SHM | VM_MAYSHARE | VM_SHARED
- | VM_MAYREAD | VM_MAYEXEC | VM_READ | VM_EXEC
- | ((shmflg & SHM_RDONLY) ? 0 : VM_MAYWRITE | VM_WRITE);
- shmd->vm_file = NULL;
- shmd->vm_pgoff = 0;
- shmd->vm_ops = &shm_vm_ops;
-
- shp->shm_nattch++; /* prevent destruction */
- shm_unlock(shp->id);
- err = shm_map (shmd);
- shm_lock(shmid); /* cannot fail */
- if (err)
- goto failed_shm_map;
-
- insert_attach(shp,shmd); /* insert shmd into shp->attaches */
-
- shp->shm_lpid = current->pid;
- shp->shm_atime = CURRENT_TIME;
+ flags = MAP_SHARED | MAP_FIXED;
+ } else
+ flags = MAP_SHARED;
- *raddr = addr;
- err = 0;
-out_unlock_up:
- shm_unlock(shmid);
-out_up:
- up(¤t->mm->mmap_sem);
+ name = shm_getname(shmid);
+ if (IS_ERR (name))
+ return PTR_ERR (name);
+
+ file = filp_open (name, O_RDWR, 0);
+ putname (name);
+ if (IS_ERR (file))
+ goto bad_file;
+ lock_kernel();
+ *raddr = do_mmap (file, addr, file->f_dentry->d_inode->i_size,
+ (shmflg & SHM_RDONLY ? PROT_READ :
+ PROT_READ | PROT_WRITE), flags, 0);
+ unlock_kernel();
+ if (IS_ERR(*raddr))
+ err = PTR_ERR(*raddr);
+ else
+ err = 0;
+ fput (file);
return err;
-failed_shm_map:
- {
- int delete = 0;
- if (--shp->shm_nattch <= 0 && shp->shm_perm.mode & SHM_DEST)
- delete = 1;
- shm_unlock(shmid);
- up(¤t->mm->mmap_sem);
- kmem_cache_free(vm_area_cachep, shmd);
- if(delete)
- killseg(shmid);
- return err;
- }
+bad_file:
+ if ((err = PTR_ERR(file)) == -ENOENT)
+ return -EINVAL;
+ return err;
}
/* This is called by fork, once for every shm attach. */
static void shm_open (struct vm_area_struct *shmd)
{
- struct shmid_kernel *shp;
-
- shp = (struct shmid_kernel *) shmd->vm_private_data;
- if(shp != shm_lock(shp->id))
- BUG();
- insert_attach(shp,shmd); /* insert shmd into shp->attaches */
- shp->shm_nattch++;
- shp->shm_atime = CURRENT_TIME;
- shp->shm_lpid = current->pid;
- shm_unlock(shp->id);
+ shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino);
}
/*
@@ -815,22 +1155,16 @@
*/
static void shm_close (struct vm_area_struct *shmd)
{
+ int id = shmd->vm_file->f_dentry->d_inode->i_ino;
struct shmid_kernel *shp;
- int id;
/* remove from the list of attaches of the shm segment */
- shp = (struct shmid_kernel *) shmd->vm_private_data;
- if(shp != shm_lock(shp->id))
+ if(!(shp = shm_lock(id)))
BUG();
- remove_attach(shp,shmd); /* remove from shp->attaches */
- shp->shm_lpid = current->pid;
- shp->shm_dtime = CURRENT_TIME;
- id=-1;
- if (--shp->shm_nattch <= 0 && shp->shm_perm.mode & SHM_DEST)
- id=shp->id;
- shm_unlock(shp->id);
- if(id!=-1)
- killseg(id);
+ shp->shm_lprid = current->pid;
+ shp->shm_dtim = CURRENT_TIME;
+ shp->shm_nattch--;
+ shm_unlock(id);
}
/*
@@ -868,31 +1202,13 @@
/*
* page not present ... go through shm_dir
*/
-static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
+static struct page * shm_nopage_core(struct shmid_kernel *shp, unsigned int idx, int *swp, int *rss)
{
pte_t pte;
- struct shmid_kernel *shp;
- unsigned int idx;
struct page * page;
- int is_shmzero;
- shp = (struct shmid_kernel *) shmd->vm_private_data;
- idx = (address - shmd->vm_start) >> PAGE_SHIFT;
- idx += shmd->vm_pgoff;
- is_shmzero = (shp->id == zero_id);
-
- /*
- * A shared mapping past the last page of the file is an error
- * and results in a SIGBUS, so logically a shared mapping past
- * the end of a shared memory segment should result in SIGBUS
- * as well.
- */
- if (idx >= shp->shm_npages) {
- return NULL;
- }
- down(&shp->sem);
- if ((shp != shm_lock(shp->id)) && (is_shmzero == 0))
- BUG();
+ if (idx >= shp->shm_npages)
+ goto sigbus;
pte = SHM_ENTRY(shp,idx);
if (!pte_present(pte)) {
@@ -905,7 +1221,7 @@
if (!page)
goto oom;
clear_highpage(page);
- if ((shp != shm_lock(shp->id)) && (is_shmzero == 0))
+ if ((shp != shm_lock(shp->id)) && (shp->id != zero_id))
BUG();
} else {
swp_entry_t entry = pte_to_swp_entry(pte);
@@ -923,11 +1239,11 @@
delete_from_swap_cache(page);
page = replace_with_highmem(page);
swap_free(entry);
- if ((shp != shm_lock(shp->id)) && (is_shmzero == 0))
+ if ((shp != shm_lock(shp->id)) && (shp->id != zero_id))
BUG();
- if (is_shmzero == 0) shm_swp--;
+ (*swp)--;
}
- if (is_shmzero == 0) shm_rss++;
+ (*rss)++;
pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
SHM_ENTRY(shp, idx) = pte;
} else
@@ -935,14 +1251,32 @@
/* pte_val(pte) == SHM_ENTRY (shp, idx) */
get_page(pte_page(pte));
- shm_unlock(shp->id);
- up(&shp->sem);
current->min_flt++;
return pte_page(pte);
oom:
- up(&shp->sem);
return NOPAGE_OOM;
+sigbus:
+ return NOPAGE_SIGBUS;
+}
+
+static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
+{
+ struct page * page;
+ struct shmid_kernel *shp;
+ unsigned int idx;
+ struct inode * inode = shmd->vm_file->f_dentry->d_inode;
+
+ idx = (address - shmd->vm_start) >> PAGE_SHIFT;
+ idx += shmd->vm_pgoff;
+
+ down(&inode->i_sem);
+ if(!(shp = shm_lock(inode->i_ino)))
+ BUG();
+ page = shm_nopage_core(shp, idx, &shm_swp, &shm_rss);
+ shm_unlock(inode->i_ino);
+ up(&inode->i_sem);
+ return(page);
}
#define OKAY 0
@@ -1127,38 +1461,40 @@
int i, len = 0;
down(&shm_ids.sem);
- len += sprintf(buffer, " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n");
+ len += sprintf(buffer, " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime name\n");
- for(i = 0; i <= shm_ids.max_id; i++) {
- struct shmid_kernel* shp = shm_lock(i);
- if (shp == &zshmid_kernel) {
- shm_unlock(i);
+ for(i = 0; i <= shm_ids.max_id; i++) {
+ struct shmid_kernel* shp;
+
+ if (i == zero_id)
continue;
- }
+ shp = shm_lock(i);
if(shp!=NULL) {
-#define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
-#define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
+#define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu %.*s\n"
+#define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu %.*s\n"
char *format;
if (sizeof(size_t) <= sizeof(int))
format = SMALL_STRING;
else
format = BIG_STRING;
- len += sprintf(buffer + len, format,
+ len += sprintf(buffer + len, format,
shp->shm_perm.key,
shm_buildid(i, shp->shm_perm.seq),
shp->shm_perm.mode,
shp->shm_segsz,
- shp->shm_cpid,
- shp->shm_lpid,
+ shp->shm_cprid,
+ shp->shm_lprid,
shp->shm_nattch,
shp->shm_perm.uid,
shp->shm_perm.gid,
shp->shm_perm.cuid,
shp->shm_perm.cgid,
- shp->shm_atime,
- shp->shm_dtime,
- shp->shm_ctime);
+ shp->shm_atim,
+ shp->shm_dtim,
+ shp->shm_ctim,
+ shp->shm_namelen,
+ shp->shm_name);
shm_unlock(i);
pos += len;
@@ -1183,31 +1519,84 @@
}
#endif
-static struct shmid_kernel *zmap_list = 0;
+#define VMA_TO_SHP(vma) ((vma)->vm_file->private_data)
+
static spinlock_t zmap_list_lock = SPIN_LOCK_UNLOCKED;
static unsigned long zswap_idx = 0; /* next to swap */
-static struct shmid_kernel *zswap_shp = 0;
+static struct shmid_kernel *zswap_shp = &zshmid_kernel;
+static int zshm_rss;
static struct vm_operations_struct shmzero_vm_ops = {
open: shmzero_open,
close: shmzero_close,
- nopage: shm_nopage,
+ nopage: shmzero_nopage,
swapout: shm_swapout,
};
+/*
+ * In this implementation, the "unuse" and "swapout" interfaces are
+ * interlocked out via the kernel_lock, as well as shm_lock(zero_id).
+ * "unuse" and "nopage/swapin", as well as "swapout" and "nopage/swapin"
+ * interlock via shm_lock(zero_id). All these interlocks can be based
+ * on a per mapping lock instead of being a global lock.
+ */
+/*
+ * Reference (existance) counting on the file/dentry/inode is done
+ * by generic vm_file code. The zero code does not hold any reference
+ * on the pseudo-file. This is possible because the open/close calls
+ * are bracketed by the file count update calls.
+ */
+static struct file *file_setup(struct file *fzero, struct shmid_kernel *shp)
+{
+ struct file *filp;
+ struct inode *inp;
+
+ if ((filp = get_empty_filp()) == 0)
+ return(filp);
+ if ((inp = get_empty_inode()) == 0) {
+ put_filp(filp);
+ return(0);
+ }
+ if ((filp->f_dentry = d_alloc(zdent, &(const struct qstr) { "dev/zero",
+ 8, 0 })) == 0) {
+ iput(inp);
+ put_filp(filp);
+ return(0);
+ }
+ d_instantiate(filp->f_dentry, inp);
+
+ /*
+ * Copy over /dev/zero dev/ino for benefit of procfs. Use
+ * ino to indicate seperate mappings.
+ */
+ filp->f_dentry->d_inode->i_dev = fzero->f_dentry->d_inode->i_dev;
+ filp->f_dentry->d_inode->i_ino = (unsigned long)shp;
+ fput(fzero); /* release /dev/zero file */
+ return(filp);
+}
+
int map_zero_setup(struct vm_area_struct *vma)
{
+ extern int vm_enough_memory(long pages);
struct shmid_kernel *shp;
+ struct file *filp;
- if (!(shp = newseg_alloc((vma->vm_end - vma->vm_start) / PAGE_SIZE)))
+ if (!vm_enough_memory((vma->vm_end - vma->vm_start) >> PAGE_SHIFT))
+ return -ENOMEM;
+ if (!(shp = newseg_alloc((vma->vm_end - vma->vm_start) / PAGE_SIZE, 0)))
+ return -ENOMEM;
+ if ((filp = file_setup(vma->vm_file, shp)) == 0) {
+ killseg_core(shp, 0);
return -ENOMEM;
- shp->id = zero_id; /* hack for shm_lock et al */
- vma->vm_private_data = shp;
+ }
+ vma->vm_file = filp;
+ VMA_TO_SHP(vma) = (void *)shp;
+ shp->id = zero_id;
+ init_MUTEX(&shp->zsem);
vma->vm_ops = &shmzero_vm_ops;
shmzero_open(vma);
spin_lock(&zmap_list_lock);
- shp->attaches = (struct vm_area_struct *)zmap_list;
- zmap_list = shp;
+ list_add(&shp->zero_list, &zshmid_kernel.zero_list);
spin_unlock(&zmap_list_lock);
return 0;
}
@@ -1216,53 +1605,66 @@
{
struct shmid_kernel *shp;
- shp = (struct shmid_kernel *) shmd->vm_private_data;
- down(&shp->sem);
+ shp = VMA_TO_SHP(shmd);
+ down(&shp->zsem);
shp->shm_nattch++;
- up(&shp->sem);
+ up(&shp->zsem);
}
static void shmzero_close(struct vm_area_struct *shmd)
{
int done = 0;
- struct shmid_kernel *shp, *prev, *cur;
+ struct shmid_kernel *shp;
- shp = (struct shmid_kernel *) shmd->vm_private_data;
- down(&shp->sem);
+ shp = VMA_TO_SHP(shmd);
+ down(&shp->zsem);
if (--shp->shm_nattch == 0)
done = 1;
- up(&shp->sem);
+ up(&shp->zsem);
if (done) {
spin_lock(&zmap_list_lock);
if (shp == zswap_shp)
- zswap_shp = (struct shmid_kernel *)(shp->attaches);
- if (shp == zmap_list)
- zmap_list = (struct shmid_kernel *)(shp->attaches);
- else {
- prev = zmap_list;
- cur = (struct shmid_kernel *)(prev->attaches);
- while (cur != shp) {
- prev = cur;
- cur = (struct shmid_kernel *)(prev->attaches);
- }
- prev->attaches = (struct vm_area_struct *)(shp->attaches);
- }
+ zswap_shp = list_entry(zswap_shp->zero_list.next,
+ struct shmid_kernel, zero_list);
+ list_del(&shp->zero_list);
spin_unlock(&zmap_list_lock);
killseg_core(shp, 0);
}
}
+static struct page * shmzero_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
+{
+ struct page *page;
+ struct shmid_kernel *shp;
+ unsigned int idx;
+ int dummy;
+
+ idx = (address - shmd->vm_start) >> PAGE_SHIFT;
+ idx += shmd->vm_pgoff;
+
+ shp = VMA_TO_SHP(shmd);
+ down(&shp->zsem);
+ shm_lock(zero_id);
+ page = shm_nopage_core(shp, idx, &dummy, &zshm_rss);
+ shm_unlock(zero_id);
+ up(&shp->zsem);
+ return(page);
+}
+
static void zmap_unuse(swp_entry_t entry, struct page *page)
{
struct shmid_kernel *shp;
spin_lock(&zmap_list_lock);
- shp = zmap_list;
- while (shp) {
+ shm_lock(zero_id);
+ for (shp = list_entry(zshmid_kernel.zero_list.next, struct shmid_kernel,
+ zero_list); shp != &zshmid_kernel;
+ shp = list_entry(shp->zero_list.next, struct shmid_kernel,
+ zero_list)) {
if (shm_unuse_core(shp, entry, page))
break;
- shp = (struct shmid_kernel *)shp->attaches;
}
+ shm_unlock(zero_id);
spin_unlock(&zmap_list_lock);
}
@@ -1275,7 +1677,7 @@
int counter;
struct page * page_map;
- counter = 10; /* maybe we should use zshm_rss */
+ counter = zshm_rss >> prio;
if (!counter)
return;
next:
@@ -1283,25 +1685,30 @@
return;
spin_lock(&zmap_list_lock);
- if (zmap_list == 0)
+ shm_lock(zero_id);
+ if (zshmid_kernel.zero_list.next == 0)
goto failed;
next_id:
- if ((shp = zswap_shp) == 0) {
+ if (zswap_shp == &zshmid_kernel) {
if (loop) {
failed:
+ shm_unlock(zero_id);
spin_unlock(&zmap_list_lock);
__swap_free(swap_entry, 2);
return;
}
- zswap_shp = shp = zmap_list;
+ zswap_shp = list_entry(zshmid_kernel.zero_list.next,
+ struct shmid_kernel, zero_list);
zswap_idx = 0;
loop = 1;
}
+ shp = zswap_shp;
check_table:
idx = zswap_idx++;
if (idx >= shp->shm_npages) {
- zswap_shp = (struct shmid_kernel *)(zswap_shp->attaches);
+ zswap_shp = list_entry(zswap_shp->zero_list.next,
+ struct shmid_kernel, zero_list);
zswap_idx = 0;
goto next_id;
}
@@ -1310,6 +1717,7 @@
case RETRY: goto check_table;
case FAILED: goto failed;
}
+ shm_unlock(zero_id);
spin_unlock(&zmap_list_lock);
shm_swap_postop(page_map);
@@ -1317,3 +1725,4 @@
goto next;
return;
}
+
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)