patch-2.1.15 linux/net/socket.c

Next file: linux/net/unix/af_unix.c
Previous file: linux/net/rose/rose_subr.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.1.14/linux/net/socket.c linux/net/socket.c
@@ -49,6 +49,12 @@
  *
  *	In addition it lacks an effective kernel -> kernel interface to go with
  *	the user one.
+ *
+ * PROBLEMS:
+ *	- CLONE_FILES. Big problem, cloned thread can close file,
+ *	  while other thread sleeps in kernel. It can be solved
+ *	  by increasing f_count and releasing it on exit from syscall.
+ *
  */
 
 #include <linux/config.h>
@@ -76,6 +82,18 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/rarp.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/scm.h>
+
+
 #if defined(CONFIG_MODULES) && defined(CONFIG_NET)
 extern void export_net_symbols(void);
 #endif
@@ -116,7 +134,8 @@
 /*
  *	The protocol list. Each protocol is registered in here.
  */
-static struct proto_ops *pops[NPROTO];
+static struct net_proto_family *net_families[NPROTO];
+
 /*
  *	Statistics counters of the socket lists
  */
@@ -147,7 +166,7 @@
 {
 	int err;
 	int len;
-		
+
 	if((err=get_user(len, ulen)))
 		return err;
 	if(len>klen)
@@ -200,15 +219,7 @@
 	return fd;
 }
 
-
-/*
- *	Go from an inode to its socket slot.
- *
- * The original socket implementation wasn't very clever, which is
- * why this exists at all..
- */
-
-__inline struct socket *socki_lookup(struct inode *inode)
+static __inline__ struct socket *socki_lookup(struct inode *inode)
 {
 	return &inode->u.socket_i;
 }
@@ -217,20 +228,23 @@
  *	Go from a file number to its socket slot.
  */
 
-extern __inline struct socket *sockfd_lookup(int fd, struct file **pfile)
+extern __inline__ struct socket *sockfd_lookup(int fd, int *err)
 {
 	struct file *file;
 	struct inode *inode;
 
-	if (fd < 0 || fd >= NR_OPEN || !(file = current->files->fd[fd])) 
+	if (fd < 0 || fd >= NR_OPEN || !(file = current->files->fd[fd]))
+	{
+		*err = -EBADF;
 		return NULL;
+	}
 
 	inode = file->f_inode;
-	if (!inode || !inode->i_sock)
+	if (!inode || !inode->i_sock || !socki_lookup(inode))
+	{
+		*err = -ENOTSOCK;
 		return NULL;
-
-	if (pfile) 
-		*pfile = file;
+	}
 
 	return socki_lookup(inode);
 }
@@ -247,78 +261,87 @@
 	inode = get_empty_inode();
 	if (!inode)
 		return NULL;
+	sock = socki_lookup(inode);
 
 	inode->i_mode = S_IFSOCK;
 	inode->i_sock = 1;
 	inode->i_uid = current->uid;
 	inode->i_gid = current->gid;
 
-	sock = &inode->u.socket_i;
+	sock->inode = inode;
+	sock->fasync_list = NULL;
 	sock->state = SS_UNCONNECTED;
 	sock->flags = 0;
 	sock->ops = NULL;
-	sock->data = NULL;
-	sock->conn = NULL;
-	sock->iconn = NULL;
-	sock->next = NULL;
+	sock->sk = NULL;
 	sock->file = NULL;
-	sock->wait = &inode->i_wait;
-	sock->inode = inode;		/* "backlink": we could use pointer arithmetic instead */
-	sock->fasync_list = NULL;
+
 	sockets_in_use++;
 	return sock;
 }
 
-/*
- *	Release a socket.
- */
-
-static inline void sock_release_peer(struct socket *peer)
-{
-	peer->state = SS_DISCONNECTING;
-	wake_up_interruptible(peer->wait);
-	sock_wake_async(peer, 1);
-}
-
 void sock_release(struct socket *sock)
 {
 	int oldstate;
-	struct socket *peersock, *nextsock;
 
 	if ((oldstate = sock->state) != SS_UNCONNECTED)
 		sock->state = SS_DISCONNECTING;
 
-	/*
-	 *	Wake up anyone waiting for connections. 
-	 */
-
-	for (peersock = sock->iconn; peersock; peersock = nextsock) 
-	{
-		nextsock = peersock->next;
-		sock_release_peer(peersock);
-	}
-
-	/*
-	 * Wake up anyone we're connected to. First, we release the
-	 * protocol, to give it a chance to flush data, etc.
-	 */
-
-	peersock = (oldstate == SS_CONNECTED) ? sock->conn : NULL;
 	if (sock->ops) 
-		sock->ops->release(sock, peersock);
-	if (peersock)
-		sock_release_peer(peersock);
+		sock->ops->release(sock, NULL);
+
 	--sockets_in_use;	/* Bookkeeping.. */
 	sock->file=NULL;
-	iput(SOCK_INODE(sock));
+	iput(sock->inode);
+}
+
+int
+sock_sendmsg(struct socket *sock, struct msghdr *msg, int size)
+{
+	int err;
+	struct scm_cookie scm;
+
+	if (!sock->ops->sendmsg)
+		return -EOPNOTSUPP;
+
+	err = scm_send(sock, msg, &scm);
+	if (err < 0)
+		return err;
+
+	err = sock->ops->sendmsg(sock, msg, size, &scm);
+
+	scm_destroy(&scm);
+
+	return err;
+}
+
+int
+sock_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags)
+{
+	struct scm_cookie scm;
+
+	if (!sock->ops->recvmsg)
+		return -EOPNOTSUPP;
+
+	memset(&scm, 0, sizeof(scm));
+
+	size = sock->ops->recvmsg(sock, msg, size, flags, &scm);
+
+	if (size < 0)
+		return size;
+
+	scm_recv(sock, msg, &scm, flags);
+
+	return size;
 }
 
+
 /*
  *	Sockets are not seekable.
  */
 
 static long long sock_lseek(struct inode *inode, struct file *file,
-	long long offset, int whence)
+			    long long offset, int whence)
 {
 	return -ESPIPE;
 }
@@ -329,40 +352,42 @@
  */
 
 static long sock_read(struct inode *inode, struct file *file,
-	char *ubuf, unsigned long size)
+		      char *ubuf, unsigned long size)
 {
 	struct socket *sock;
 	int err;
 	struct iovec iov;
 	struct msghdr msg;
-  
-	sock = socki_lookup(inode); 
-	if (sock->flags & SO_ACCEPTCON) 
-		return(-EINVAL);
 
-	if(size<0)
+	sock = socki_lookup(inode); 
+  
+	if (size<0)
 		return -EINVAL;
-	if(size==0)		/* Match SYS5 behaviour */
+	if (size==0)		/* Match SYS5 behaviour */
 		return 0;
 	if ((err=verify_area(VERIFY_WRITE,ubuf,size))<0)
 	  	return err;
 	msg.msg_name=NULL;
+	msg.msg_namelen=0;
 	msg.msg_iov=&iov;
 	msg.msg_iovlen=1;
 	msg.msg_control=NULL;
+	msg.msg_controllen=0;
 	iov.iov_base=ubuf;
 	iov.iov_len=size;
 
-	return(sock->ops->recvmsg(sock, &msg, size,(file->f_flags & O_NONBLOCK), 0,&msg.msg_namelen));
+	return sock_recvmsg(sock, &msg, size,
+			    !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT);
 }
 
+
 /*
  *	Write data to a socket. We verify that the user area ubuf..ubuf+size-1 is
  *	readable by the user process.
  */
 
 static long sock_write(struct inode *inode, struct file *file,
-	const char *ubuf, unsigned long size)
+		       const char *ubuf, unsigned long size)
 {
 	struct socket *sock;
 	int err;
@@ -371,9 +396,6 @@
 	
 	sock = socki_lookup(inode); 
 
-	if (sock->flags & SO_ACCEPTCON) 
-		return(-EINVAL);
-	
 	if(size<0)
 		return -EINVAL;
 	if(size==0)		/* Match SYS5 behaviour */
@@ -383,15 +405,41 @@
 	  	return err;
 	
 	msg.msg_name=NULL;
+	msg.msg_namelen=0;
 	msg.msg_iov=&iov;
 	msg.msg_iovlen=1;
 	msg.msg_control=NULL;
+	msg.msg_controllen=0;
+	msg.msg_flags=!(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
 	iov.iov_base=(void *)ubuf;
 	iov.iov_len=size;
 	
-	return(sock->ops->sendmsg(sock, &msg, size,(file->f_flags & O_NONBLOCK),0));
+	return sock_sendmsg(sock, &msg, size);
+}
+
+int sock_readv_writev(int type, struct inode * inode, struct file * file,
+		      const struct iovec * iov, long count, long size)
+{
+	struct msghdr msg;
+	struct socket *sock;
+
+	sock = socki_lookup(inode);
+
+	msg.msg_name = NULL;
+	msg.msg_namelen = 0;
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_iov = (struct iovec *) iov;
+	msg.msg_iovlen = count;
+	msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
+
+	/* read() does a VERIFY_WRITE */
+	if (type == VERIFY_WRITE)
+		return sock_recvmsg(sock, &msg, size, msg.msg_flags);
+	return sock_sendmsg(sock, &msg, size);
 }
 
+
 /*
  *	With an ioctl arg may well be a user mode pointer, but we don't know what to do
  *	with it - that's up to the protocol still.
@@ -400,9 +448,8 @@
 int sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 	   unsigned long arg)
 {
-	struct socket *sock;
-	sock = socki_lookup(inode); 
-  	return(sock->ops->ioctl(sock, cmd, arg));
+	struct socket *sock = socki_lookup(inode);
+  	return sock->ops->ioctl(sock, cmd, arg);
 }
 
 
@@ -417,7 +464,7 @@
 	 */
 
 	if (sock->ops->select)
-		return(sock->ops->select(sock, sel_type, wait));
+		return sock->ops->select(sock, sel_type, wait);
 	return(0);
 }
 
@@ -425,11 +472,14 @@
 void sock_close(struct inode *inode, struct file *filp)
 {
 	/*
-	 *	It's possible the inode is NULL if we're closing an unfinished socket. 
+	 * It's possible the inode is NULL if we're closing an unfinished socket. 
 	 */
 
-	if (!inode) 
+	if (!inode)
+	{
+		printk(KERN_DEBUG "Nope. It is impossible!\n");
 		return;
+	}
 	sock_fasync(inode, filp, 0);
 	sock_release(socki_lookup(inode));
 }
@@ -458,8 +508,8 @@
 	save_flags(flags);
 	cli();
 	
-	for(fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev)
-		if(fa->fa_file==filp)
+	for (fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev)
+		if (fa->fa_file==filp)
 			break;
 	
 	if(on)
@@ -477,7 +527,7 @@
 	}
 	else
 	{
-		if(fa!=NULL)
+		if (fa!=NULL)
 		{
 			*prev=fa->fa_next;
 			kfree_s(fa,sizeof(struct fasync_struct));
@@ -522,9 +572,9 @@
 	register int i;
 	for (i = 0; i < NPROTO; i++)
 	{
-		if (pops[i] == NULL)
+		if (net_families[i] == NULL)
 			continue;
-		if (pops[i]->family == family)
+		if (net_families[i]->family == family)
 			return i;
 	}
 	return -1;
@@ -534,7 +584,6 @@
 {
 	int i, fd;
 	struct socket *sock;
-	struct proto_ops *ops;
 
 	/* Locate the correct protocol family. */
 	i = find_protocol_family(family);
@@ -556,11 +605,7 @@
 #endif
 
 	if (i < 0)
-	{
   		return -EINVAL;
-	}
-
-	ops = pops[i];
 
 /*
  *	Check that this is a type that we know how to manipulate and
@@ -569,8 +614,8 @@
  */
   
 	if ((type != SOCK_STREAM && type != SOCK_DGRAM &&
-		type != SOCK_SEQPACKET && type != SOCK_RAW &&
-		type != SOCK_PACKET) || protocol < 0)
+	     type != SOCK_SEQPACKET && type != SOCK_RAW &&
+	     type != SOCK_PACKET) || protocol < 0)
 			return(-EINVAL);
 
 /*
@@ -587,20 +632,20 @@
 	}
 
 	sock->type = type;
-	sock->ops = ops;
-	if ((i = sock->ops->create(sock, protocol)) < 0) 
+
+	if ((i = net_families[i]->create(sock, protocol)) < 0) 
 	{
 		sock_release(sock);
 		return(i);
 	}
 
-	if ((fd = get_fd(SOCK_INODE(sock))) < 0) 
+	if ((fd = get_fd(sock->inode)) < 0) 
 	{
 		sock_release(sock);
 		return(-EINVAL);
 	}
 
-	sock->file=current->files->fd[fd];
+	sock->file = current->files->fd[fd];
 
 	return(fd);
 }
@@ -613,7 +658,7 @@
 {
 	int fd1, fd2, i;
 	struct socket *sock1, *sock2;
-	int er;
+	int err;
 
 	/*
 	 * Obtain the first socket and check if the underlying protocol
@@ -622,11 +667,14 @@
 
 	if ((fd1 = sys_socket(family, type, protocol)) < 0) 
 		return(fd1);
-	sock1 = sockfd_lookup(fd1, NULL);
+
+	sock1 = sockfd_lookup(fd1, &err);
+	if (!sock1)
+		return err;
 	if (!sock1->ops->socketpair) 
 	{
 		sys_close(fd1);
-		return(-EINVAL);
+		return -EOPNOTSUPP;
 	}
 
 	/*
@@ -639,7 +687,9 @@
 		return(-EINVAL);
 	}
 
-	sock2 = sockfd_lookup(fd2, NULL);
+	sock2 = sockfd_lookup(fd2,&err);
+	if (!sock2)
+		return err;
 	if ((i = sock1->ops->socketpair(sock1, sock2)) < 0) 
 	{
 		sys_close(fd1);
@@ -647,19 +697,14 @@
 		return(i);
 	}
 
-	sock1->conn = sock2;
-	sock2->conn = sock1;
-	sock1->state = SS_CONNECTED;
-	sock2->state = SS_CONNECTED;
-
-	er = put_user(fd1, &usockvec[0]); 
-	if (!er) 
-		er = put_user(fd2, &usockvec[1]);
-	if (er) {
+	err = put_user(fd1, &usockvec[0]); 
+	if (!err) 
+		err = put_user(fd2, &usockvec[1]);
+	if (err) {
 		sys_close(fd1);
 		sys_close(fd2);
 	}
-	return er;
+	return err;
 }
 
 
@@ -678,19 +723,14 @@
 	char address[MAX_SOCK_ADDR];
 	int err;
 
-	if (fd < 0 || fd >= NR_OPEN || current->files->fd[fd] == NULL)
-		return(-EBADF);
-	
-	if (!(sock = sockfd_lookup(fd, NULL))) 
-		return(-ENOTSOCK);
+	if (!(sock = sockfd_lookup(fd,&err))) 
+		return err;
   
 	if((err=move_addr_to_kernel(umyaddr,addrlen,address))<0)
 	  	return err;
   
 	if ((i = sock->ops->bind(sock, (struct sockaddr *)address, addrlen)) < 0) 
-	{
 		return(i);
-	}
 	return(0);
 }
 
@@ -706,21 +746,12 @@
 	struct socket *sock;
 	int err=-EOPNOTSUPP;
 	
-	if (fd < 0 || fd >= NR_OPEN || current->files->fd[fd] == NULL)
-		return(-EBADF);
-	if (!(sock = sockfd_lookup(fd, NULL))) 
-		return(-ENOTSOCK);
-
-	if (sock->state != SS_UNCONNECTED) 
-		return(-EINVAL);
+	if (!(sock = sockfd_lookup(fd, &err))) 
+		return err;
 
 	if (sock->ops && sock->ops->listen)
-	{
 		err=sock->ops->listen(sock, backlog);
-		if(!err)
-			sock->flags |= SO_ACCEPTCON;
-	}
-	return(err);
+	return err;
 }
 
 
@@ -738,24 +769,17 @@
 
 asmlinkage int sys_accept(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrlen)
 {
-	struct file *file;
+	struct inode *inode;
 	struct socket *sock, *newsock;
 	int i;
+	int err;
 	char address[MAX_SOCK_ADDR];
 	int len;
 
-	if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL))
-		return(-EBADF);
-  	if (!(sock = sockfd_lookup(fd, &file))) 
-		return(-ENOTSOCK);
-	if (sock->state != SS_UNCONNECTED) 
-	{
-		return(-EINVAL);
-	}
-	if (!(sock->flags & SO_ACCEPTCON)) 
-	{
-		return(-EINVAL);
-	}
+  	if (!(sock = sockfd_lookup(fd, &err)))
+		return err;
+	if (!sock->ops->accept)
+		return -EOPNOTSUPP;
 
 	if (!(newsock = sock_alloc())) 
 	{
@@ -763,34 +787,39 @@
 		return(-ENOSR);	/* Was: EAGAIN, but we are out of system
 				   resources! */
 	}
+
+	inode = newsock->inode;
 	newsock->type = sock->type;
-	newsock->ops = sock->ops;
+
 	if ((i = sock->ops->dup(newsock, sock)) < 0) 
 	{
 		sock_release(newsock);
 		return(i);
 	}
 
-	i = newsock->ops->accept(sock, newsock, file->f_flags);
-	if ( i < 0) 
+	i = newsock->ops->accept(sock, newsock, current->files->fd[fd]->f_flags);
+
+	if (i < 0)
 	{
 		sock_release(newsock);
 		return(i);
 	}
+	newsock = socki_lookup(inode);
 
-	if ((fd = get_fd(SOCK_INODE(newsock))) < 0) 
+	if ((fd = get_fd(inode)) < 0) 
 	{
 		sock_release(newsock);
 		return(-EINVAL);
 	}
-	newsock->file=current->files->fd[fd];
+
+	newsock->file = current->files->fd[fd];
 	
 	if (upeer_sockaddr)
 	{
 		newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 1);
 		move_addr_to_user(address,len, upeer_sockaddr, upeer_addrlen);
 	}
-	return(fd);
+	return fd;
 }
 
 
@@ -809,46 +838,20 @@
 asmlinkage int sys_connect(int fd, struct sockaddr *uservaddr, int addrlen)
 {
 	struct socket *sock;
-	struct file *file;
 	int i;
 	char address[MAX_SOCK_ADDR];
 	int err;
 
-	if (fd < 0 || fd >= NR_OPEN || (file=current->files->fd[fd]) == NULL)
-		return(-EBADF);
-	if (!(sock = sockfd_lookup(fd, &file)))
-		return(-ENOTSOCK);
+	if (!(sock = sockfd_lookup(fd,&err)))
+		return err;
 
 	if((err=move_addr_to_kernel(uservaddr,addrlen,address))<0)
 	  	return err;
   
-	switch(sock->state) 
-	{
-		case SS_UNCONNECTED:
-			/* This is ok... continue with connect */
-			break;
-		case SS_CONNECTED:
-			/* Socket is already connected */
-			if(sock->type == SOCK_DGRAM) /* Hack for now - move this all into the protocol */
-				break;
-			return -EISCONN;
-		case SS_CONNECTING:
-			/* Not yet connected... we will check this. */
-		
-			/*
-			 *	FIXME:  for all protocols what happens if you start
-			 *	an async connect fork and both children connect. Clean
-			 *	this up in the protocols!
-			 */
-			break;
-		default:
-			return(-EINVAL);
-	}
-	i = sock->ops->connect(sock, (struct sockaddr *)address, addrlen, file->f_flags);
-	if (i < 0) 
-	{
+	i = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
+			     current->files->fd[fd]->f_flags);
+	if (i < 0)
 		return(i);
-	}
 	return(0);
 }
 
@@ -864,10 +867,8 @@
 	int len;
 	int err;
 	
-	if (fd < 0 || fd >= NR_OPEN || current->files->fd[fd] == NULL)
-		return(-EBADF);
-	if (!(sock = sockfd_lookup(fd, NULL)))
-		return(-ENOTSOCK);
+	if (!(sock = sockfd_lookup(fd, &err)))
+		return err;
 
 	err=sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
 	if(err)
@@ -889,10 +890,8 @@
 	int len;
 	int err;
 
-	if (fd < 0 || fd >= NR_OPEN || current->files->fd[fd] == NULL)
-		return(-EBADF);
-	if (!(sock = sockfd_lookup(fd, NULL)))
-		return(-ENOTSOCK);
+	if (!(sock = sockfd_lookup(fd, &err)))
+		return err;
 
 	err=sock->ops->getname(sock, (struct sockaddr *)address, &len, 1);
 	if(err)
@@ -910,15 +909,12 @@
 asmlinkage int sys_send(int fd, void * buff, size_t len, unsigned flags)
 {
 	struct socket *sock;
-	struct file *file;
 	int err;
 	struct msghdr msg;
 	struct iovec iov;
 
-	if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL))
-		return(-EBADF);
-	if (!(sock = sockfd_lookup(fd, NULL))) 
-		return(-ENOTSOCK);
+	if (!(sock = sockfd_lookup(fd, &err))) 
+		return err;
 
 	if(len<0)
 		return -EINVAL;
@@ -929,10 +925,16 @@
 	iov.iov_base=buff;
 	iov.iov_len=len;
 	msg.msg_name=NULL;
+	msg.msg_namelen=0;
 	msg.msg_iov=&iov;
 	msg.msg_iovlen=1;
 	msg.msg_control=NULL;
-	return(sock->ops->sendmsg(sock, &msg, len, (file->f_flags & O_NONBLOCK), flags));
+	msg.msg_controllen=0;
+	if (current->files->fd[fd]->f_flags & O_NONBLOCK)
+		flags |= MSG_DONTWAIT;
+	msg.msg_flags=flags;
+
+	return sock_sendmsg(sock, &msg, len);
 }
 
 /*
@@ -945,31 +947,29 @@
 	   struct sockaddr *addr, int addr_len)
 {
 	struct socket *sock;
-	struct file *file;
 	char address[MAX_SOCK_ADDR];
 	int err;
 	struct msghdr msg;
 	struct iovec iov;
 	
-	if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL))
-		return(-EBADF);
-	if (!(sock = sockfd_lookup(fd, NULL)))
-		return(-ENOTSOCK);
+	if (!(sock = sockfd_lookup(fd,&err)))
+		return err;
 
 	if(len<0)
 		return -EINVAL;
 	err=verify_area(VERIFY_READ,buff,len);
 	if(err)
 	  	return err;
-
+  	
 	iov.iov_base=buff;
 	iov.iov_len=len;
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
+	msg.msg_name=NULL;
+	msg.msg_namelen=0;
 	msg.msg_iov=&iov;
 	msg.msg_iovlen=1;
 	msg.msg_control=NULL;
-	if (addr && addr_len) {
+	msg.msg_controllen=0;
+	if (addr_len) {
 		err=move_addr_to_kernel(addr,addr_len,address);
 		if (err < 0)
 			return err;
@@ -977,8 +977,11 @@
 		msg.msg_namelen=addr_len;
 	}
 	  	
-	return(sock->ops->sendmsg(sock, &msg, len, (file->f_flags & O_NONBLOCK),
-		flags));
+	if (current->files->fd[fd]->f_flags & O_NONBLOCK)
+		flags |= MSG_DONTWAIT;
+	msg.msg_flags=flags;
+
+	return sock_sendmsg(sock, &msg, len);
 }
 
 
@@ -991,14 +994,10 @@
 	struct iovec iov;
 	struct msghdr msg;
 	struct socket *sock;
-	struct file *file;
 	int err;
 
-	if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL))
-		return(-EBADF);
-
-	if (!(sock = sockfd_lookup(fd, NULL))) 
-		return(-ENOTSOCK);
+	if (!(sock = sockfd_lookup(fd, &err))) 
+		return err;
 		
 	if(size<0)
 		return -EINVAL;
@@ -1012,10 +1011,12 @@
 	msg.msg_iov=&iov;
 	msg.msg_iovlen=1;
 	msg.msg_control=NULL;
+	msg.msg_controllen=0;
 	iov.iov_base=ubuf;
 	iov.iov_len=size;
 
-	return(sock->ops->recvmsg(sock, &msg, size,(file->f_flags & O_NONBLOCK), flags,&msg.msg_namelen));
+	return sock_recvmsg(sock, &msg, size,
+			    (current->files->fd[fd]->f_flags & O_NONBLOCK) ? flags | MSG_DONTWAIT : flags);
 }
 
 /*
@@ -1028,19 +1029,16 @@
 	     struct sockaddr *addr, int *addr_len)
 {
 	struct socket *sock;
-	struct file *file;
 	struct iovec iov;
 	struct msghdr msg;
 	char address[MAX_SOCK_ADDR];
 	int err;
-	int alen;
-	if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL))
-		return(-EBADF);
-	if (!(sock = sockfd_lookup(fd, NULL))) 
-	  	return(-ENOTSOCK);
-	if(size<0)
+
+	if (!(sock = sockfd_lookup(fd, &err)))
+	  	return err;
+	if (size<0)
 		return -EINVAL;
-	if(size==0)
+	if (size==0)
 		return 0;
 
 	err=verify_area(VERIFY_WRITE,ubuf,size);
@@ -1048,18 +1046,19 @@
 	  	return err;
   
   	msg.msg_control=NULL;
+  	msg.msg_controllen=0;
   	msg.msg_iovlen=1;
   	msg.msg_iov=&iov;
   	iov.iov_len=size;
   	iov.iov_base=ubuf;
   	msg.msg_name=address;
   	msg.msg_namelen=MAX_SOCK_ADDR;
-	size=sock->ops->recvmsg(sock, &msg, size, (file->f_flags & O_NONBLOCK),
-		     flags, &alen);
+	size=sock_recvmsg(sock, &msg, size,
+			  (current->files->fd[fd]->f_flags & O_NONBLOCK) ? (flags | MSG_DONTWAIT) : flags);
 
 	if(size<0)
 	 	return size;
-	if(addr!=NULL && (err=move_addr_to_user(address,alen, addr, addr_len))<0)
+	if(addr!=NULL && (err=move_addr_to_user(address, msg.msg_namelen, addr, addr_len))<0)
 	  	return err;
 
 	return size;
@@ -1072,15 +1071,19 @@
  
 asmlinkage int sys_setsockopt(int fd, int level, int optname, char *optval, int optlen)
 {
+	int err;
 	struct socket *sock;
-	struct file *file;
 	
-	if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL))
-		return(-EBADF);
-	if (!(sock = sockfd_lookup(fd, NULL))) 
-		return(-ENOTSOCK);
+	if (!(sock = sockfd_lookup(fd, &err))) 
+		return err;
+
+	if (level == SOL_SOCKET)
+		return sock_setsockopt(sock,level,optname,optval,optlen);
+
+	if (sock->ops->setsockopt)
+		return sock->ops->setsockopt(sock, level, optname, optval, optlen);
 
-	return(sock->ops->setsockopt(sock, level, optname, optval, optlen));
+	return -EOPNOTSUPP;
 }
 
 /*
@@ -1090,17 +1093,19 @@
 
 asmlinkage int sys_getsockopt(int fd, int level, int optname, char *optval, int *optlen)
 {
+	int err;
 	struct socket *sock;
-	struct file *file;
 
-	if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL))
-		return(-EBADF);
-	if (!(sock = sockfd_lookup(fd, NULL)))
-		return(-ENOTSOCK);
+	if (!(sock = sockfd_lookup(fd, &err)))
+		return err;
 	    
-	if (!sock->ops->getsockopt) 
-		return(0);
-	return(sock->ops->getsockopt(sock, level, optname, optval, optlen));
+	if (level == SOL_SOCKET)
+		return sock_getsockopt(sock,level,optname,optval,optlen);
+
+	if (sock->ops->getsockopt)
+		return sock->ops->getsockopt(sock, level, optname, optval, optlen);
+
+	return -EOPNOTSUPP;
 }
 
 
@@ -1110,45 +1115,40 @@
  
 asmlinkage int sys_shutdown(int fd, int how)
 {
+	int err;
 	struct socket *sock;
-	struct file *file;
 
-	if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL))
-		return(-EBADF);
-	if (!(sock = sockfd_lookup(fd, NULL))) 
-		return(-ENOTSOCK);
+	if (!(sock = sockfd_lookup(fd, &err))) 
+		return err;
 
-	return(sock->ops->shutdown(sock, how));
+	return sock->ops->shutdown(sock, how);
 }
 
 /*
  *	BSD sendmsg interface
  */
  
-asmlinkage int sys_sendmsg(int fd, struct msghdr *msg, unsigned int flags)
+asmlinkage int sys_sendmsg(int fd, struct msghdr *msg, unsigned flags)
 {
 	struct socket *sock;
-	struct file *file;
 	char address[MAX_SOCK_ADDR];
-	struct iovec iov[UIO_MAXIOV];
+	struct iovec iov[UIO_FASTIOV];
 	struct msghdr msg_sys;
 	void * krn_msg_ctl = NULL;
 	int err;
 	int total_len;
 	
-	if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL))
-		return(-EBADF);
-	if (!(sock = sockfd_lookup(fd, NULL)))
-		return(-ENOTSOCK);
+	if (!(sock = sockfd_lookup(fd,&err)))
+		return err;
 	
-	if(sock->ops->sendmsg==NULL)
+	if (sock->ops->sendmsg==NULL)
 		return -EOPNOTSUPP;
 
 	if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
-		return -EFAULT; 
+		return -EFAULT;
 
 	/* do not move before msg_sys is valid */
-	if(msg_sys.msg_iovlen>UIO_MAXIOV)
+	if (msg_sys.msg_iovlen>UIO_MAXIOV)
 		return -EINVAL;
 
 	/* This will also move the address data into kernel space */
@@ -1156,21 +1156,37 @@
 	if (err < 0)
 		return err;
 	total_len=err;
-		
-	if (msg_sys.msg_control)
+
+	if (msg_sys.msg_control==NULL)
+		msg_sys.msg_controllen = 0;
+
+	if (msg_sys.msg_controllen)
 	{
 		krn_msg_ctl = kmalloc(msg_sys.msg_controllen, GFP_KERNEL);
+		
+		if (!krn_msg_ctl)
+		{
+			err = -ENOBUFS;
+			goto flush_it;
+		}		
 		err = copy_from_user(krn_msg_ctl, msg_sys.msg_control,
 				     msg_sys.msg_controllen);
 		if (err)
-			return -EFAULT;
+			goto flush_it;
 		msg_sys.msg_control = krn_msg_ctl;
 	}
 
-	err = sock->ops->sendmsg(sock, &msg_sys, total_len,
-				 (file->f_flags&O_NONBLOCK), flags);
+	msg_sys.msg_flags = flags;
+	if (current->files->fd[fd]->f_flags & O_NONBLOCK)
+		msg_sys.msg_flags |= MSG_DONTWAIT;
 
-	if (msg_sys.msg_control)
+	err = sock_sendmsg(sock, &msg_sys, total_len);
+
+flush_it:
+	if (msg_sys.msg_iov != iov)
+		kfree(iov);
+
+	if (krn_msg_ctl)
 	{
 		kfree(krn_msg_ctl);
 	}
@@ -1185,32 +1201,28 @@
 asmlinkage int sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags)
 {
 	struct socket *sock;
-	struct file *file;
-	struct iovec iov[UIO_MAXIOV];
+	struct iovec iovstack[UIO_FASTIOV];
+	struct iovec *iov=iovstack;
 	struct msghdr msg_sys;
-	void *usr_msg_ctl = NULL;
-	void *krn_msg_ctl = NULL;
+	void * krn_msg_ctl = NULL;
 	int err;
 	int total_len;
 	int len;
 
 	/* kernel mode address */
 	char addr[MAX_SOCK_ADDR];
-	int addr_len;
 
 	/* user mode address pointers */
 	struct sockaddr *uaddr;
 	int *uaddr_len;
 	
-	if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL))
-		return(-EBADF);
-	if (!(sock = sockfd_lookup(fd, NULL)))
-		return(-ENOTSOCK);
-	
+	if (!(sock = sockfd_lookup(fd, &err)))
+		return err;
+
 	if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
 		return -EFAULT; 
 
-	if(msg_sys.msg_iovlen>UIO_MAXIOV)
+	if (msg_sys.msg_iovlen>UIO_MAXIOV)
 		return -EINVAL;
 
 	/*
@@ -1219,49 +1231,77 @@
 	 */
 	uaddr = msg_sys.msg_name;
 	uaddr_len = &msg->msg_namelen;
-	err=verify_iovec(&msg_sys,iov,addr, VERIFY_WRITE);
-	if(err<0)
+	err=verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
+	if (err<0)
 		return err;
 
 	total_len=err;
 
+	msg_sys.msg_flags = 0;
 	
+	if (msg_sys.msg_control==NULL)
+		msg_sys.msg_controllen = 0;
 
-	if (msg_sys.msg_control)
+	if (msg_sys.msg_controllen)
 	{
-		usr_msg_ctl = msg_sys.msg_control;
 		krn_msg_ctl = kmalloc(msg_sys.msg_controllen, GFP_KERNEL);
-		err = copy_from_user(krn_msg_ctl, usr_msg_ctl,
+		
+		if (!krn_msg_ctl)
+		{
+			err=-ENOBUFS;
+			goto flush_it;		
+		}
+		err = copy_from_user(krn_msg_ctl, msg_sys.msg_control,
 				     msg_sys.msg_controllen);
 		if (err)
-			return -EFAULT;
+			goto flush_it;
 		msg_sys.msg_control = krn_msg_ctl;
 	}
-	
-	if(sock->ops->recvmsg==NULL)
-		return -EOPNOTSUPP;
-	len=sock->ops->recvmsg(sock, &msg_sys, total_len, (file->f_flags&O_NONBLOCK), flags, &addr_len);
-	if(len<0)
-		return len;
 
+	if (current->files->fd[fd]->f_flags&O_NONBLOCK)
+		flags |= MSG_DONTWAIT;
+
+	len=sock_recvmsg(sock, &msg_sys, total_len, flags);
+	if (msg_sys.msg_iov != iov)
+		kfree(iov);
+	if (len<0)
+	{
+		err=len;
+		goto flush_it;
+	}
+	
 	if (uaddr != NULL)
 	{
-		err = move_addr_to_user(addr, addr_len, uaddr, uaddr_len);
+		err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
+					uaddr_len);
+
 	}
+	
+	if (!err && msg_sys.msg_controllen)
+	{
+		err = copy_to_user(msg_sys.msg_control, krn_msg_ctl,
+				   msg_sys.msg_controllen);
+	}
+
+flush_it:
+	if (msg_sys.msg_iov != iov)
+		kfree(iov);
 
-	if (msg_sys.msg_control)
+	if (krn_msg_ctl)
 	{
-		if (!err)
-		{
-			err = copy_to_user(usr_msg_ctl, krn_msg_ctl,
-					   msg_sys.msg_controllen);
-			if (err)
-			    err = -EFAULT; 
-		}
-		kfree(krn_msg_ctl);		
+		kfree(krn_msg_ctl);
 	}
 
-	return err ? err : len;
+	if (err)
+		return err;
+
+	if (put_user(msg_sys.msg_flags, &msg->msg_flags))
+		return -EFAULT;
+
+	if (put_user(msg_sys.msg_controllen, &msg->msg_controllen))
+		return -EFAULT;
+
+	return len;
 }
 
 
@@ -1274,8 +1314,8 @@
 	struct socket *sock;
 
 	sock = socki_lookup (filp->f_inode);
-	if (sock != NULL && sock->ops != NULL && sock->ops->fcntl != NULL)
-		return(sock->ops->fcntl(sock, cmd, arg));
+	if (sock && sock->ops && sock->ops->fcntl)
+		return sock->ops->fcntl(sock, cmd, arg);
 	return(-EINVAL);
 }
 
@@ -1383,23 +1423,23 @@
 	return -EINVAL; /* to keep gcc happy */
 }
 
+
 /*
  *	This function is called by a protocol handler that wants to
  *	advertise its address family, and have it linked into the
  *	SOCKET module.
  */
  
-int sock_register(int family, struct proto_ops *ops)
+int sock_register(struct net_proto_family *ops)
 {
 	int i;
 
 	cli();
 	for(i = 0; i < NPROTO; i++) 
 	{
-		if (pops[i] != NULL) 
+		if (net_families[i] != NULL) 
 			continue;
-		pops[i] = ops;
-		pops[i]->family = family;
+		net_families[i] = ops;
 		sti();
 		return(i);
 	}
@@ -1420,11 +1460,11 @@
 	cli();
 	for(i = 0; i < NPROTO; i++) 
 	{
-		if (pops[i] == NULL) 
+		if (net_families[i] == NULL) 
 			continue;
-		if (pops[i]->family == family)
+		if (net_families[i]->family == family)
 		{
-			pops[i]=NULL;
+			net_families[i]=NULL;
 			sti();
 			return(i);
 		}
@@ -1453,13 +1493,13 @@
 {
 	int i;
 
-	printk(KERN_INFO "Swansea University Computer Society NET3.037 for Linux 2.1\n");
+	printk(KERN_INFO "Swansea University Computer Society NET3.038 for Linux 2.1\n");
 
 	/*
 	 *	Initialize all address (protocol) families. 
 	 */
 	 
-	for (i = 0; i < NPROTO; ++i) pops[i] = NULL;
+	for (i = 0; i < NPROTO; i++) net_families[i] = NULL;
 	
 	/*
 	 *	The netlink device handler may be needed early.
@@ -1467,13 +1507,6 @@
 
 #ifdef CONFIG_NETLINK
 	init_netlink();
-#endif		 
-	/*
-	 *	Attach the routing/device information port.
-	 */
-
-#if defined(CONFIG_RTNETLINK)
-	netlink_attach(NETLINK_ROUTE, netlink_donothing);
 #endif
 
 	/*

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov