patch-2.1.11 linux/net/ipv4/tcp_input.c
Next file: linux/net/ipv4/tcp_ipv4.c
Previous file: linux/net/ipv4/tcp.c
Back to the patch index
Back to the overall index
- Lines: 458
- Date:
Mon Nov 18 10:44:23 1996
- Orig file:
v2.1.10/linux/net/ipv4/tcp_input.c
- Orig date:
Fri Nov 15 23:49:11 1996
diff -u --recursive --new-file v2.1.10/linux/net/ipv4/tcp_input.c linux/net/ipv4/tcp_input.c
@@ -45,12 +45,24 @@
*/
#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/sysctl.h>
#include <net/tcp.h>
-/*
- * Policy code extracted so it's now seperate
- */
+
+typedef void (*tcp_sys_cong_ctl_t)(struct sock *sk,
+ u32 seq, u32 ack,
+ u32 seq_rtt);
+
+static void tcp_cong_avoid_vanj(struct sock *sk, u32 seq, u32 ack,
+ u32 seq_rtt);
+static void tcp_cong_avoid_vegas(struct sock *sk, u32 seq, u32 ack,
+ u32 seq_rtt);
+
+int sysctl_tcp_cong_avoidance = 0;
+
+static tcp_sys_cong_ctl_t tcp_sys_cong_ctl_f = &tcp_cong_avoid_vanj;
/*
* Called each time to estimate the delayed ack timeout. This is
@@ -60,7 +72,7 @@
* The estimated value is changing to fast
*/
-extern __inline__ void tcp_delack_estimator(struct tcp_opt *tp)
+static void tcp_delack_estimator(struct tcp_opt *tp)
{
int m;
@@ -355,8 +367,6 @@
}
-int sysctl_tcp_vegas_cong_avoidance = 1;
-
/*
* TCP slow start and congestion avoidance in two flavors:
* RFC 1122 and TCP Vegas.
@@ -364,7 +374,7 @@
* This is a /proc/sys configurable option.
*/
-#define SHIFT_FACTOR 12
+#define SHIFT_FACTOR 16
static void tcp_cong_avoid_vegas(struct sock *sk, u32 seq, u32 ack,
u32 seq_rtt)
@@ -380,9 +390,11 @@
* to have improved several things over the initial spec.
*/
- u32 Actual, Expected;
- u32 snt_bytes;
struct tcp_opt * tp;
+ unsigned int Actual, Expected;
+ unsigned int inv_rtt, inv_basertt;
+ u32 snt_bytes;
+
tp = &(sk->tp_pinfo.af_tcp);
@@ -393,8 +405,7 @@
tp->basertt = min(seq_rtt, tp->basertt);
else
tp->basertt = seq_rtt;
-
-
+
/*
*
* Actual = throughput for this segment.
@@ -403,14 +414,13 @@
*/
snt_bytes = (ack - seq) << SHIFT_FACTOR;
-
- Actual = snt_bytes / seq_rtt;
- Expected = ((tp->snd_nxt - tp->snd_una) << SHIFT_FACTOR) / tp->basertt;
+ inv_rtt = (1 << SHIFT_FACTOR) / seq_rtt;
+
+ Actual = snt_bytes * inv_rtt;
+
+ inv_basertt = (1 << SHIFT_FACTOR) / tp->basertt;
+ Expected = ((tp->snd_nxt - tp->snd_una) << SHIFT_FACTOR) * inv_basertt;
-/*
- printk(KERN_DEBUG "A:%x E:%x rtt:%x srtt:%x win: %d\n",
- Actual, Expected, seq_rtt, tp->srtt, sk->cong_window);
- */
/*
* Slow Start
*/
@@ -418,23 +428,20 @@
if (sk->cong_window < sk->ssthresh &&
(seq == tp->snd_nxt ||
(((Expected - Actual) <=
- ((TCP_VEGAS_GAMMA << SHIFT_FACTOR) * sk->mss / tp->basertt))
+ ((TCP_VEGAS_GAMMA << SHIFT_FACTOR) * sk->mss * inv_basertt))
)
))
{
-
/*
* "Vegas allows exponential growth only every other
* RTT"
*/
- if (sk->cong_count || sk->cong_window <= 2)
+ if (!(sk->cong_count++))
{
sk->cong_window++;
sk->cong_count = 0;
}
- else
- sk->cong_count++;
}
else
{
@@ -443,39 +450,32 @@
*/
if (Expected - Actual <=
- ((TCP_VEGAS_ALPHA << SHIFT_FACTOR) * sk->mss / tp->basertt))
+ ((TCP_VEGAS_ALPHA << SHIFT_FACTOR) * sk->mss * inv_basertt))
{
/* Increase Linearly */
- if (sk->cong_count >= sk->cong_window)
+ if (sk->cong_count++ >= sk->cong_window)
{
sk->cong_window++;
sk->cong_count = 0;
}
- else
- sk->cong_count++;
}
if (Expected - Actual >=
- ((TCP_VEGAS_BETA << SHIFT_FACTOR) * sk->mss / tp->basertt))
+ ((TCP_VEGAS_BETA << SHIFT_FACTOR) * sk->mss * inv_basertt))
{
/* Decrease Linearly */
- if (sk->cong_count >= sk->cong_window)
+ if (sk->cong_count++ >= sk->cong_window)
{
sk->cong_window--;
sk->cong_count = 0;
}
- else
- sk->cong_count++;
-
-
+
/* Never less than 2 segments */
if (sk->cong_window < 2)
sk->cong_window = 2;
}
-
-
}
}
@@ -520,6 +520,92 @@
#define FLAG_DATA 0x01
#define FLAG_WIN_UPDATE 0x02
#define FLAG_DATA_ACKED 0x04
+
+static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack, __u32 *seq,
+ __u32 *seq_rtt)
+{
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+ struct sk_buff *skb;
+ unsigned long now = jiffies;
+ int acked = 0;
+
+ while((skb=skb_peek(&sk->write_queue)) && (skb != tp->send_head))
+ {
+
+#ifdef TCP_DEBUG
+ /* Check for a bug. */
+
+ if (skb->next != (struct sk_buff*) &sk->write_queue &&
+ after(skb->end_seq, skb->next->seq))
+ printk("INET: tcp_input.c: *** "
+ "bug send_list out of order.\n");
+#endif
+ /*
+ * If our packet is before the ack sequence we can
+ * discard it as it's confirmed to have arrived the
+ * other end.
+ */
+
+ if (after(skb->end_seq, ack))
+ break;
+
+ if (sk->debug)
+ {
+ printk(KERN_DEBUG "removing seg %x-%x from "
+ "retransmit queue\n", skb->seq, skb->end_seq);
+ }
+
+ acked = FLAG_DATA_ACKED;
+
+ atomic_dec(&sk->packets_out);
+
+ *seq = skb->seq;
+ *seq_rtt = now - skb->when;
+
+ skb_unlink(skb);
+ skb->free = 1;
+
+ kfree_skb(skb, FREE_WRITE);
+ }
+
+ if (acked && !sk->dead)
+ {
+ tp->retrans_head = NULL;
+ sk->write_space(sk);
+ }
+
+ return acked;
+}
+
+static void tcp_ack_probe(struct sock *sk, __u32 ack)
+{
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+ /*
+ * Our probe was answered
+ */
+ tp->probes_out = 0;
+
+ /*
+ * Was it a usable window open ?
+ */
+
+ /* should always be non-null */
+ if (tp->send_head != NULL &&
+ !before (ack + tp->snd_wnd, tp->send_head->end_seq))
+ {
+ tp->backoff = 0;
+ tp->pending = 0;
+
+ tcp_clear_xmit_timer(sk, TIME_PROBE0);
+
+ }
+ else
+ {
+ tcp_reset_xmit_timer(sk, TIME_PROBE0,
+ min(tp->rto << tp->backoff, 120*HZ));
+ }
+}
/*
* This routine deals with incoming acks, but not outgoing ones.
@@ -603,85 +689,18 @@
* it needs to be for normal retransmission.
*/
- if (tp->pending == TIME_PROBE0)
+ if (tp->pending == TIME_PROBE0)
{
- tp->probes_out = 0; /* Our probe was answered */
-
- /*
- * Was it a usable window open ?
- */
-
- /* should always be non-null */
- if (tp->send_head != NULL &&
- !before (ack + tp->snd_wnd, tp->send_head->end_seq))
- {
- tp->backoff = 0;
- tp->pending = 0;
-
- tcp_clear_xmit_timer(sk, TIME_PROBE0);
-
- }
- else
- {
- tcp_reset_xmit_timer(sk, TIME_PROBE0,
- min(tp->rto << tp->backoff,
- 120*HZ));
- }
+ tcp_ack_probe(sk, ack);
}
/*
* See if we can take anything off of the retransmit queue.
*/
-
- start_bh_atomic();
- while(((skb=skb_peek(&sk->write_queue)) != NULL) &&
- (skb != tp->send_head))
- {
- /* Check for a bug. */
+ if (tcp_clean_rtx_queue(sk, ack, &seq, &seq_rtt))
+ flag |= FLAG_DATA_ACKED;
- if (skb->next != (struct sk_buff*) &sk->write_queue &&
- after(skb->end_seq, skb->next->seq))
- printk("INET: tcp_input.c: *** "
- "bug send_list out of order.\n");
-
- /*
- * If our packet is before the ack sequence we can
- * discard it as it's confirmed to have arrived the
- * other end.
- */
-
- if (!after(skb->end_seq, ack))
- {
- if (sk->debug)
- {
- printk(KERN_DEBUG "removing seg %x-%x from "
- "retransmit queue\n",
- skb->seq, skb->end_seq);
- }
-
- tp->retrans_head = NULL;
-
- flag |= FLAG_DATA_ACKED;
- seq = skb->seq;
- seq_rtt = jiffies - skb->when;
-
- skb_unlink(skb);
- atomic_dec(&sk->packets_out);
- skb->free = 1;
-
- kfree_skb(skb, FREE_WRITE);
-
- if (!sk->dead)
- sk->write_space(sk);
- }
- else
- {
- break;
- }
- }
-
- end_bh_atomic();
/*
* if we where retransmiting don't count rtt estimate
@@ -709,18 +728,13 @@
if (flag & FLAG_DATA_ACKED)
{
tcp_rtt_estimator(tp, seq_rtt);
- if (sysctl_tcp_vegas_cong_avoidance)
- {
- tcp_cong_avoid_vegas(sk, seq, ack, seq_rtt);
- }
- else
- {
- tcp_cong_avoid_vanj(sk, seq, ack, seq_rtt);
- }
+
+ (*tcp_sys_cong_ctl_f)(sk, seq, ack, seq_rtt);
}
}
+#ifdef TCP_DEBUG
/* Sanity check out packets_out counter */
if (skb_queue_len(&sk->write_queue) == 0 ||
@@ -733,7 +747,7 @@
sk->packets_out = 0;
}
}
-
+#endif
if (sk->packets_out)
{
@@ -773,12 +787,6 @@
tcp_fast_retrans(sk, ack, (flag & (FLAG_DATA|FLAG_WIN_UPDATE)));
- /*
- * Maybe we can take some stuff off of the write queue,
- * and put it onto the xmit queue.
- */
-
-
return 1;
uninteresting_ack:
@@ -895,7 +903,7 @@
* out_of_order queue into the receive_queue
*/
-static __inline__ void tcp_ofo_queue(struct sock *sk)
+static void tcp_ofo_queue(struct sock *sk)
{
struct sk_buff * skb;
struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp);
@@ -930,7 +938,7 @@
}
}
-static __inline__ void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
+static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
{
struct sk_buff * skb1;
struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp);
@@ -1255,7 +1263,7 @@
}
-static __inline__ void prune_queue(struct sock *sk)
+static void prune_queue(struct sock *sk)
{
struct sk_buff * skb;
@@ -1832,6 +1840,32 @@
kfree_skb(skb, FREE_READ);
return 0;
+}
+
+int tcp_sysctl_congavoid(ctl_table *ctl, int write, struct file * filp,
+ void *buffer, size_t *lenp)
+{
+ int val = sysctl_tcp_cong_avoidance;
+ int retv;
+
+ retv = proc_dointvec(ctl, write, filp, buffer, lenp);
+
+ if (write)
+ {
+ switch (sysctl_tcp_cong_avoidance) {
+ case 0:
+ tcp_sys_cong_ctl_f = &tcp_cong_avoid_vanj;
+ break;
+ case 1:
+ tcp_sys_cong_ctl_f = &tcp_cong_avoid_vegas;
+ break;
+ default:
+ retv = -EINVAL;
+ sysctl_tcp_cong_avoidance = val;
+ }
+ }
+
+ return retv;
}
/*
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov