patch-2.4.23 linux-2.4.23/net/ipv4/ipvs/ip_vs_app.c

Next file: linux-2.4.23/net/ipv4/ipvs/ip_vs_conn.c
Previous file: linux-2.4.23/net/ipv4/ipvs/Makefile
Back to the patch index
Back to the overall index

diff -urN linux-2.4.22/net/ipv4/ipvs/ip_vs_app.c linux-2.4.23/net/ipv4/ipvs/ip_vs_app.c
@@ -0,0 +1,508 @@
+/*
+ * IPVS         Application module
+ *
+ * Version:     $Id: ip_vs_app.c,v 1.14 2001/11/23 14:34:10 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
+ * is that ip_vs_app module handles the reverse direction (incoming requests
+ * and outgoing responses). The ip_vs_app modules are only used for VS/NAT.
+ *
+ *		IP_MASQ_APP application masquerading module
+ *
+ * Author:	Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/init.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <asm/system.h>
+#include <linux/stat.h>
+#include <linux/proc_fs.h>
+
+#include <net/ip_vs.h>
+
+#define IP_VS_APP_TAB_SIZE  16          /* must be power of 2 */
+
+#define IP_VS_APP_HASH(proto, port) ((port^proto) & (IP_VS_APP_TAB_SIZE-1))
+#define IP_VS_APP_TYPE(proto, port) (proto<<16 | port)
+#define IP_VS_APP_PORT(type)        (type & 0xffff)
+#define IP_VS_APP_PROTO(type)       ((type>>16) & 0x00ff)
+
+
+EXPORT_SYMBOL(register_ip_vs_app);
+EXPORT_SYMBOL(unregister_ip_vs_app);
+
+
+/*
+ *	will hold ipvs app. hashed list heads
+ */
+static struct list_head ip_vs_app_base[IP_VS_APP_TAB_SIZE];
+
+/* lock for ip_vs_app table */
+static rwlock_t __ip_vs_app_lock = RW_LOCK_UNLOCKED;
+
+
+/*
+ *	ip_vs_app registration routine
+ *	port: host byte order.
+ */
+int register_ip_vs_app(struct ip_vs_app *vapp,
+		       unsigned short proto, __u16 port)
+{
+	unsigned hash;
+
+	if (!vapp) {
+		IP_VS_ERR("register_ip_vs_app(): NULL arg\n");
+		return -EINVAL;
+	}
+
+	MOD_INC_USE_COUNT;
+
+	vapp->type = IP_VS_APP_TYPE(proto, port);
+	hash = IP_VS_APP_HASH(proto, port);
+
+	write_lock_bh(&__ip_vs_app_lock);
+	list_add(&vapp->n_list, &ip_vs_app_base[hash]);
+	write_unlock_bh(&__ip_vs_app_lock);
+
+	return 0;
+}
+
+
+/*
+ *	ip_vs_app unregistration routine.
+ */
+int unregister_ip_vs_app(struct ip_vs_app *vapp)
+{
+	if (!vapp) {
+		IP_VS_ERR("unregister_ip_vs_app(): NULL arg\n");
+		return -EINVAL;
+	}
+
+	write_lock_bh(&__ip_vs_app_lock);
+	list_del(&vapp->n_list);
+	write_unlock_bh(&__ip_vs_app_lock);
+
+	MOD_DEC_USE_COUNT;
+
+	return 0;
+}
+
+
+/*
+ *	get ip_vs_app object by its proto and port (net byte order).
+ */
+static struct ip_vs_app * ip_vs_app_get(unsigned short proto, __u16 port)
+{
+	struct list_head *e;
+	struct ip_vs_app *vapp;
+	unsigned hash;
+	unsigned type;
+
+	port = ntohs(port);
+	type = IP_VS_APP_TYPE(proto, port);
+	hash = IP_VS_APP_HASH(proto, port);
+
+	read_lock_bh(&__ip_vs_app_lock);
+
+	list_for_each(e, &ip_vs_app_base[hash]) {
+		vapp = list_entry(e, struct ip_vs_app, n_list);
+
+		/*
+		 * Test and MOD_INC_USE_COUNT atomically
+		 */
+		if (vapp->module && !try_inc_mod_count(vapp->module)) {
+			/*
+			 * This application module is just deleted
+			 */
+			continue;
+		}
+		if (type == vapp->type) {
+			read_unlock_bh(&__ip_vs_app_lock);
+			return vapp;
+		}
+
+		if (vapp->module)
+			__MOD_DEC_USE_COUNT(vapp->module);
+	}
+
+	read_unlock_bh(&__ip_vs_app_lock);
+	return NULL;
+}
+
+
+/*
+ *	Bind ip_vs_conn to its ip_vs_app based on proto and dport,
+ *	and call the ip_vs_app constructor.
+ */
+struct ip_vs_app * ip_vs_bind_app(struct ip_vs_conn *cp)
+{
+	struct ip_vs_app *vapp;
+
+	/* no need to bind app if its forwarding method is not NAT */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		return NULL;
+
+	if (cp->protocol != IPPROTO_TCP && cp->protocol != IPPROTO_UDP)
+		return NULL;
+
+	/*
+	 *	don't allow binding if already bound
+	 */
+	if (cp->app != NULL) {
+		IP_VS_ERR("ip_vs_bind_app(): "
+			  "called for already bound object.\n");
+		return cp->app;
+	}
+
+	vapp = ip_vs_app_get(cp->protocol, cp->vport);
+
+	if (vapp != NULL) {
+		cp->app = vapp;
+
+		if (vapp->init_conn)
+			vapp->init_conn(vapp, cp);
+	}
+	return vapp;
+}
+
+
+/*
+ *	Unbind cp from type object and call cp destructor (does not kfree()).
+ */
+int ip_vs_unbind_app(struct ip_vs_conn *cp)
+{
+	struct ip_vs_app *vapp = cp->app;
+
+	if (cp->protocol != IPPROTO_TCP && cp->protocol != IPPROTO_UDP)
+		return 0;
+
+	if (vapp != NULL) {
+		if (vapp->done_conn)
+			vapp->done_conn(vapp, cp);
+		cp->app = NULL;
+		if (vapp->module)
+			__MOD_DEC_USE_COUNT(vapp->module);
+	}
+	return (vapp != NULL);
+}
+
+
+/*
+ *	Fixes th->seq based on ip_vs_seq info.
+ */
+static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
+{
+	__u32 seq = ntohl(th->seq);
+
+	/*
+	 *	Adjust seq with delta-offset for all packets after
+	 *	the most recent resized pkt seq and with previous_delta offset
+	 *	for all packets	before most recent resized pkt seq.
+	 */
+	if (vseq->delta || vseq->previous_delta) {
+		if(after(seq, vseq->init_seq)) {
+			th->seq = htonl(seq + vseq->delta);
+			IP_VS_DBG(9, "vs_fix_seq(): added delta (%d) to seq\n",
+				  vseq->delta);
+		} else {
+			th->seq = htonl(seq + vseq->previous_delta);
+			IP_VS_DBG(9, "vs_fix_seq(): added previous_delta "
+				  "(%d) to seq\n", vseq->previous_delta);
+		}
+	}
+}
+
+
+/*
+ *	Fixes th->ack_seq based on ip_vs_seq info.
+ */
+static inline void
+vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
+{
+	__u32 ack_seq = ntohl(th->ack_seq);
+
+	/*
+	 * Adjust ack_seq with delta-offset for
+	 * the packets AFTER most recent resized pkt has caused a shift
+	 * for packets before most recent resized pkt, use previous_delta
+	 */
+	if (vseq->delta || vseq->previous_delta) {
+		/* since ack_seq is the number of octet that is expected
+		   to receive next, so compare it with init_seq+delta */
+		if(after(ack_seq, vseq->init_seq+vseq->delta)) {
+			th->ack_seq = htonl(ack_seq - vseq->delta);
+			IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted delta "
+				  "(%d) from ack_seq\n", vseq->delta);
+
+		} else {
+			th->ack_seq = htonl(ack_seq - vseq->previous_delta);
+			IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted "
+				  "previous_delta (%d) from ack_seq\n",
+				  vseq->previous_delta);
+		}
+	}
+}
+
+
+/*
+ *	Updates ip_vs_seq if pkt has been resized
+ *	Assumes already checked proto==IPPROTO_TCP and diff!=0.
+ */
+static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
+				 unsigned flag, __u32 seq, int diff)
+{
+	/* spinlock is to keep updating cp->flags atomic */
+	spin_lock(&cp->lock);
+	if ( !(cp->flags & flag) || after(seq, vseq->init_seq)) {
+		vseq->previous_delta = vseq->delta;
+		vseq->delta += diff;
+		vseq->init_seq = seq;
+		cp->flags |= flag;
+	}
+	spin_unlock(&cp->lock);
+}
+
+
+/*
+ *	Output pkt hook. Will call bound ip_vs_app specific function
+ *	called by ip_vs_out(), assumes previously checked cp!=NULL
+ *	returns (new - old) skb->len diff.
+ */
+int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+	struct ip_vs_app *vapp;
+	int diff;
+	struct iphdr *iph;
+	struct tcphdr *th;
+	__u32 seq;
+
+	/*
+	 *	check if application module is bound to
+	 *	this ip_vs_conn.
+	 */
+	if ((vapp = cp->app) == NULL)
+		return 0;
+
+	iph = skb->nh.iph;
+	th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+
+	/*
+	 *	Remember seq number in case this pkt gets resized
+	 */
+	seq = ntohl(th->seq);
+
+	/*
+	 *	Fix seq stuff if flagged as so.
+	 */
+	if (cp->protocol == IPPROTO_TCP) {
+		if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
+			vs_fix_seq(&cp->out_seq, th);
+		if (cp->flags & IP_VS_CONN_F_IN_SEQ)
+			vs_fix_ack_seq(&cp->in_seq, th);
+	}
+
+	/*
+	 *	Call private output hook function
+	 */
+	if (vapp->pkt_out == NULL)
+		return 0;
+
+	diff = vapp->pkt_out(vapp, cp, skb);
+
+	/*
+	 *	Update ip_vs seq stuff if len has changed.
+	 */
+	if (diff != 0 && cp->protocol == IPPROTO_TCP)
+		vs_seq_update(cp, &cp->out_seq,
+			      IP_VS_CONN_F_OUT_SEQ, seq, diff);
+
+	return diff;
+}
+
+
+/*
+ *	Input pkt hook. Will call bound ip_vs_app specific function
+ *	called by ip_fw_demasquerade(), assumes previously checked cp!=NULL.
+ *	returns (new - old) skb->len diff.
+ */
+int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
+{
+	struct ip_vs_app *vapp;
+	int diff;
+	struct iphdr *iph;
+	struct tcphdr *th;
+	__u32 seq;
+
+	/*
+	 *	check if application module is bound to
+	 *	this ip_vs_conn.
+	 */
+	if ((vapp = cp->app) == NULL)
+		return 0;
+
+	iph = skb->nh.iph;
+	th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
+
+	/*
+	 *	Remember seq number in case this pkt gets resized
+	 */
+	seq = ntohl(th->seq);
+
+	/*
+	 *	Fix seq stuff if flagged as so.
+	 */
+	if (cp->protocol == IPPROTO_TCP) {
+		if (cp->flags & IP_VS_CONN_F_IN_SEQ)
+			vs_fix_seq(&cp->in_seq, th);
+		if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
+			vs_fix_ack_seq(&cp->out_seq, th);
+	}
+
+	/*
+	 *	Call private input hook function
+	 */
+	if (vapp->pkt_in == NULL)
+		return 0;
+
+	diff = vapp->pkt_in(vapp, cp, skb);
+
+	/*
+	 *	Update ip_vs seq stuff if len has changed.
+	 */
+	if (diff != 0 && cp->protocol == IPPROTO_TCP)
+		vs_seq_update(cp, &cp->in_seq,
+			      IP_VS_CONN_F_IN_SEQ, seq, diff);
+
+	return diff;
+}
+
+
+/*
+ *	/proc/net/ip_vs_app entry function
+ */
+static int ip_vs_app_getinfo(char *buffer, char **start, off_t offset,
+			     int length)
+{
+	off_t pos=0;
+	int len=0;
+	char temp[64];
+	int idx;
+	struct ip_vs_app *vapp;
+	struct list_head *e;
+
+	pos = 64;
+	if (pos > offset) {
+		len += sprintf(buffer+len, "%-63s\n",
+			       "prot port    usecnt name");
+	}
+
+	read_lock_bh(&__ip_vs_app_lock);
+	for (idx=0 ; idx < IP_VS_APP_TAB_SIZE; idx++) {
+		list_for_each (e, &ip_vs_app_base[idx]) {
+			vapp = list_entry(e, struct ip_vs_app, n_list);
+
+			pos += 64;
+			if (pos <= offset)
+				continue;
+			sprintf(temp, "%-3s  %-7u %-6d %-17s",
+				ip_vs_proto_name(IP_VS_APP_PROTO(vapp->type)),
+				IP_VS_APP_PORT(vapp->type),
+				vapp->module?GET_USE_COUNT(vapp->module):0,
+				vapp->name);
+			len += sprintf(buffer+len, "%-63s\n", temp);
+			if (pos >= offset+length)
+				goto done;
+		}
+	}
+  done:
+	read_unlock_bh(&__ip_vs_app_lock);
+
+	*start = buffer+len-(pos-offset);       /* Start of wanted data */
+	len = pos-offset;
+	if (len > length)
+		len = length;
+	if (len < 0)
+		len = 0;
+	return len;
+}
+
+
+/*
+ *	Replace a segment of data with a new segment
+ */
+int ip_vs_skb_replace(struct sk_buff *skb, int pri,
+		      char *o_buf, int o_len, char *n_buf, int n_len)
+{
+	struct iphdr *iph;
+	int diff;
+	int o_offset;
+	int o_left;
+
+	EnterFunction(9);
+
+	diff = n_len - o_len;
+	o_offset = o_buf - (char *)skb->data;
+	/* The length of left data after o_buf+o_len in the skb data */
+	o_left = skb->len - (o_offset + o_len);
+
+	if (diff <= 0) {
+		memmove(o_buf + n_len, o_buf + o_len, o_left);
+		memcpy(o_buf, n_buf, n_len);
+		skb_trim(skb, skb->len + diff);
+	} else if (diff <= skb_tailroom(skb)) {
+		skb_put(skb, diff);
+		memmove(o_buf + n_len, o_buf + o_len, o_left);
+		memcpy(o_buf, n_buf, n_len);
+	} else {
+		if (pskb_expand_head(skb, skb_headroom(skb), diff, pri))
+			return -ENOMEM;
+		skb_put(skb, diff);
+		memmove(skb->data + o_offset + n_len,
+			skb->data + o_offset + o_len, o_left);
+		memcpy(skb->data + o_offset, n_buf, n_len);
+	}
+
+	/* must update the iph total length here */
+	iph = skb->nh.iph;
+	iph->tot_len = htons(skb->len);
+
+	LeaveFunction(9);
+	return 0;
+}
+
+
+int ip_vs_app_init(void)
+{
+	int idx;
+
+	for (idx=0 ; idx < IP_VS_APP_TAB_SIZE; idx++) {
+		INIT_LIST_HEAD(&ip_vs_app_base[idx]);
+	}
+
+	/* we will replace it with proc_net_ipvs_create() soon */
+	proc_net_create("ip_vs_app", 0, ip_vs_app_getinfo);
+	return 0;
+}
+
+void ip_vs_app_cleanup(void)
+{
+	proc_net_remove("ip_vs_app");
+}

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)