patch-2.1.92 linux/net/ipv4/tcp_output.c

Next file: linux/net/ipv4/tcp_timer.c
Previous file: linux/net/ipv4/tcp_ipv4.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.1.91/linux/net/ipv4/tcp_output.c linux/net/ipv4/tcp_output.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_output.c,v 1.76 1998/03/22 22:10:24 davem Exp $
+ * Version:	$Id: tcp_output.c,v 1.81 1998/03/30 08:41:36 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -99,12 +99,13 @@
 		/* Build TCP header and checksum it. */
 		th->source		= sk->sport;
 		th->dest		= sk->dport;
-		th->seq			= htonl(skb->seq);
+		th->seq			= htonl(TCP_SKB_CB(skb)->seq);
 		th->ack_seq		= htonl(tp->rcv_nxt);
 		th->doff		= (tcp_header_size >> 2);
 		th->res1		= 0;
 		*(((__u8 *)th) + 13)	= tcb->flags;
-		th->window		= htons(tcp_select_window(sk));
+		if(!(tcb->flags & TCPCB_FLAG_SYN))
+			th->window	= htons(tcp_select_window(sk));
 		th->check		= 0;
 		th->urg_ptr		= ntohs(tcb->urg_ptr);
 		if(tcb->flags & TCPCB_FLAG_SYN) {
@@ -114,10 +115,10 @@
 					      sysctl_tcp_sack,
 					      sysctl_tcp_window_scaling,
 					      tp->rcv_wscale,
-					      skb->when);
+					      TCP_SKB_CB(skb)->when);
 		} else {
 			tcp_build_and_update_options((__u32 *)(th + 1),
-						     tp, skb->when);
+						     tp, TCP_SKB_CB(skb)->when);
 		}
 		tp->af_specific->send_check(sk, th, skb->len, skb);
 
@@ -136,13 +137,13 @@
 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 
 	/* Advance write_seq and place onto the write_queue. */
-	tp->write_seq += (skb->end_seq - skb->seq);
-	skb_queue_tail(&sk->write_queue, skb);
+	tp->write_seq += (TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq);
+	__skb_queue_tail(&sk->write_queue, skb);
 
 	if (!force_queue && tp->send_head == NULL && tcp_snd_test(sk, skb)) {
 		/* Send it out now. */
-		skb->when = jiffies;
-		tp->snd_nxt = skb->end_seq;
+		TCP_SKB_CB(skb)->when = jiffies;
+		tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
 		tp->packets_out++;
 		tcp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
 		if(!tcp_timer_is_set(sk, TIME_RETRANS))
@@ -171,9 +172,7 @@
 
 	/* Get a new skb... force flag on. */
 	buff = sock_wmalloc(sk,
-			    (nsize +
-			     MAX_HEADER +
-			     sk->prot->max_header + 15),
+			    (nsize + MAX_HEADER + sk->prot->max_header),
 			    1, GFP_ATOMIC);
 	if (buff == NULL)
 		return -1; /* We'll just try again later. */
@@ -182,8 +181,8 @@
 	skb_reserve(buff, MAX_HEADER + sk->prot->max_header);
 		
 	/* Correct the sequence numbers. */
-	buff->seq = skb->seq + len;
-	buff->end_seq = skb->end_seq;
+	TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
+	TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
 	
 	/* PSH and FIN should only be set in the second packet. */
 	flags = TCP_SKB_CB(skb)->flags;
@@ -209,14 +208,14 @@
 	buff->csum = csum_partial_copy(skb->data + len, skb_put(buff, nsize),
 				       nsize, 0);
 
-	skb->end_seq -= nsize;
+	TCP_SKB_CB(skb)->end_seq -= nsize;
 	skb_trim(skb, skb->len - nsize);
 
 	/* Rechecksum original buffer. */
 	skb->csum = csum_partial(skb->data, skb->len, 0);
 
 	/* Link BUFF into the send queue. */
-	skb_append(skb, buff);
+	__skb_append(skb, buff);
 
 	return 0;
 }
@@ -264,8 +263,8 @@
 
 			/* Advance the send_head.  This one is going out. */
 			update_send_head(sk);
-			skb->when = jiffies;
-			tp->snd_nxt = skb->end_seq;
+			TCP_SKB_CB(skb)->when = jiffies;
+			tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
 			tp->packets_out++;
 			tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
 			sent_pkts = 1;
@@ -397,7 +396,7 @@
 			return;
 
 		/* Ok.  We will be able to collapse the packet. */
-		skb_unlink(next_skb);
+		__skb_unlink(next_skb, next_skb->list);
 
 		if(skb->len % 4) {
 			/* Must copy and rechecksum all data. */
@@ -413,7 +412,8 @@
 		}
 	
 		/* Update sequence range on original skb. */
-		skb->end_seq += next_skb->end_seq - next_skb->seq;
+		TCP_SKB_CB(skb)->end_seq +=
+			TCP_SKB_CB(next_skb)->end_seq - TCP_SKB_CB(next_skb)->seq;
 
 		/* Merge over control information. */
 		flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */
@@ -444,9 +444,12 @@
  	/* Don't muck with the congestion window here. */
  	tp->dup_acks = 0;
  	tp->high_seq = tp->snd_nxt;
-
- 	/* FIXME: make the current rtt sample invalid */
  	tp->retrans_head = NULL; 
+
+ 	/* Input control flow will see that this was retransmitted
+	 * and not use it for RTT calculation in the absence of
+	 * the timestamp option.
+	 */
  	tcp_retransmit_skb(sk, skb_peek(&sk->write_queue)); 
 }
 
@@ -496,11 +499,12 @@
 
 	/* Ok, we're gonna send it out, update state. */
 	TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_RETRANS;
+	tp->retrans_out++;
 
 	/* Make a copy, if the first transmission SKB clone we made
 	 * is still in somebodies hands, else make a clone.
 	 */
-	skb->when = jiffies;
+	TCP_SKB_CB(skb)->when = jiffies;
 	if(skb_cloned(skb))
 		skb = skb_copy(skb, GFP_ATOMIC);
 	else
@@ -518,12 +522,14 @@
  * retransmitted data is acknowledged.  It tries to continue
  * resending the rest of the retransmit queue, until either
  * we've sent it all or the congestion window limit is reached.
+ * If doing SACK, the first ACK which comes back for a timeout
+ * based retransmit packet might feed us FACK information again.
+ * If so, we use it to avoid unnecessarily retransmissions.
  */
 void tcp_xmit_retransmit_queue(struct sock *sk)
 {
 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 	struct sk_buff *skb;
-	int ct = 0;
 
 	if (tp->retrans_head == NULL)
 		tp->retrans_head = skb_peek(&sk->write_queue);
@@ -539,19 +545,48 @@
 			if(tcp_retransmit_skb(sk, skb))
 				break;
 		
-			/* Count retransmissions locally. */
-			ct++;
-
 			/* Stop retransmitting if we've hit the congestion
 			 * window limit.
 			 */
-			if (ct >= tp->snd_cwnd)
+			if (tp->retrans_out >= (tp->snd_cwnd >> TCP_CWND_SHIFT))
 				break;
 		}
 		update_retrans_head(sk);
 	}
 }
 
+/* Using FACK information, retransmit all missing frames at the receiver
+ * up to the forward most SACK'd packet (tp->fackets_out) if the packet
+ * has not been retransmitted already.
+ */
+void tcp_fack_retransmit(struct sock *sk)
+{
+	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+	struct sk_buff *skb = skb_peek(&sk->write_queue);
+	int packet_cnt = 0;
+
+	while((skb != NULL) &&
+	      (skb != tp->send_head) &&
+	      (skb != (struct sk_buff *)&sk->write_queue)) {
+		__u8 sacked = TCP_SKB_CB(skb)->sacked;
+
+		if(sacked & (TCPCB_SACKED_ACKED | TCPCB_SACKED_RETRANS))
+			goto next_packet;
+
+		/* Ok, retransmit it. */
+		if(tcp_retransmit_skb(sk, skb))
+			break;
+
+		if(tcp_packets_in_flight(tp) >= (tp->snd_cwnd >> TCP_CWND_SHIFT))
+			break;
+next_packet:
+		packet_cnt++;
+		if(packet_cnt >= tp->fackets_out)
+			break;
+		skb = skb->next;
+	}
+}
+
 /* Send a fin.  The caller locks the socket for us.  This cannot be
  * allowed to fail queueing a FIN frame under any circumstances.
  */
@@ -573,7 +608,7 @@
 	if((tp->send_head != NULL) && (skb->len < mss_now)) {
 		/* tcp_write_xmit() takes care of the rest. */
 		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
-		skb->end_seq++;
+		TCP_SKB_CB(skb)->end_seq++;
 		tp->write_seq++;
 	} else {
 		/* Socket is locked, keep trying until memory is available. */
@@ -592,8 +627,8 @@
 		TCP_SKB_CB(skb)->urg_ptr = 0;
 
 		/* FIN eats a sequence byte, write_seq advanced by tcp_send_skb(). */
-		skb->seq = tp->write_seq;
-		skb->end_seq = skb->seq + 1;
+		TCP_SKB_CB(skb)->seq = tp->write_seq;
+		TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
 		tcp_send_skb(sk, skb, 0);
 	}
 }
@@ -621,9 +656,9 @@
 	TCP_SKB_CB(skb)->urg_ptr = 0;
 
 	/* Send it off. */
-	skb->seq = tp->write_seq;
-	skb->end_seq = skb->seq;
-	skb->when = jiffies;
+	TCP_SKB_CB(skb)->seq = tp->write_seq;
+	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
+	TCP_SKB_CB(skb)->when = jiffies;
 	tcp_transmit_skb(sk, skb);
 }
 
@@ -650,10 +685,10 @@
 	TCP_SKB_CB(skb)->urg_ptr = 0;
 
 	/* SYN eats a sequence byte. */
-	skb->seq = tp->snd_una;
-	skb->end_seq = skb->seq + 1;
-	skb_queue_tail(&sk->write_queue, skb);
-	skb->when = jiffies;
+	TCP_SKB_CB(skb)->seq = tp->snd_una;
+	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
+	__skb_queue_tail(&sk->write_queue, skb);
+	TCP_SKB_CB(skb)->when = jiffies;
 	tp->packets_out++;
 	tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
 	return 0;
@@ -705,9 +740,9 @@
 	th->ack = 1;
 	th->source = sk->sport;
 	th->dest = req->rmt_port;
-	skb->seq = req->snt_isn;
-	skb->end_seq = skb->seq + 1;
-	th->seq = htonl(skb->seq);
+	TCP_SKB_CB(skb)->seq = req->snt_isn;
+	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
+	th->seq = htonl(TCP_SKB_CB(skb)->seq);
 	th->ack_seq = htonl(req->rcv_isn + 1);
 	if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
 		__u8 rcv_wscale; 
@@ -722,10 +757,10 @@
 	}
 	th->window = htons(req->rcv_wnd);
 
-	skb->when = jiffies;
+	TCP_SKB_CB(skb)->when = jiffies;
 	tcp_syn_build_options((__u32 *)(th + 1), req->mss, req->tstamp_ok,
 			      req->sack_ok, req->wscale_ok, req->rcv_wscale,
-			      skb->when);
+			      TCP_SKB_CB(skb)->when);
 
 	skb->csum = 0;
 	th->doff = (tcp_header_size >> 2);
@@ -774,9 +809,9 @@
 	TCP_SKB_CB(buff)->sacked = 0;
 	TCP_SKB_CB(buff)->urg_ptr = 0;
 	buff->csum = 0;
-	buff->seq = tp->write_seq++;
-	buff->end_seq = tp->write_seq;
-	tp->snd_nxt = buff->end_seq;
+	TCP_SKB_CB(buff)->seq = tp->write_seq++;
+	TCP_SKB_CB(buff)->end_seq = tp->write_seq;
+	tp->snd_nxt = TCP_SKB_CB(buff)->end_seq;
 
 	tp->window_clamp = dst->window;
 	tcp_select_initial_window(sock_rspace(sk)/2,sk->mss,
@@ -784,7 +819,6 @@
 		&tp->window_clamp,
 		sysctl_tcp_window_scaling,
 		&tp->rcv_wscale);
-
 	/* Ok, now lock the socket before we make it visible to
 	 * the incoming packet engine.
 	 */
@@ -800,10 +834,12 @@
 	tp->rto = dst->rtt;
 	tcp_init_xmit_timers(sk);
 	tp->retransmits = 0;
+	tp->fackets_out = 0;
+	tp->retrans_out = 0;
 
 	/* Send it off. */
-	skb_queue_tail(&sk->write_queue, buff);
-	buff->when = jiffies;
+	__skb_queue_tail(&sk->write_queue, buff);
+	TCP_SKB_CB(buff)->when = jiffies;
 	tp->packets_out++;
 	tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL));
 	tcp_statistics.TcpActiveOpens++;
@@ -870,8 +906,8 @@
 		TCP_SKB_CB(buff)->urg_ptr = 0;
 
 		/* Send it off, this clears delayed acks for us. */
-		buff->seq = buff->end_seq = tp->snd_nxt;
-		buff->when = jiffies;
+		TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tp->snd_nxt;
+		TCP_SKB_CB(buff)->when = jiffies;
 		tcp_transmit_skb(sk, buff);
 	}
 }
@@ -904,13 +940,13 @@
 			 * must have been a result SWS avoidance ( sender )
 			 */
 			win_size = tp->snd_wnd - (tp->snd_nxt - tp->snd_una);
-			if (win_size < skb->end_seq - skb->seq) {
+			if (win_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq) {
 				if (tcp_fragment(sk, skb, win_size))
 					return; /* Let a retransmit get it. */
 			}
 			update_send_head(sk);
-			skb->when = jiffies;
-			tp->snd_nxt = skb->end_seq;
+			TCP_SKB_CB(skb)->when = jiffies;
+			tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
 			tp->packets_out++;
 			tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
 			if (!tcp_timer_is_set(sk, TIME_RETRANS))
@@ -933,9 +969,9 @@
 			 * end to send an ack.  Don't queue or clone SKB, just
 			 * send it.
 			 */
-			skb->seq = tp->snd_nxt - 1;
-			skb->end_seq = skb->seq;
-			skb->when = jiffies;
+			TCP_SKB_CB(skb)->seq = tp->snd_nxt - 1;
+			TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
+			TCP_SKB_CB(skb)->when = jiffies;
 			tcp_transmit_skb(sk, skb);
 		}
 	}

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov