diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
| -rw-r--r-- | net/ipv4/tcp_input.c | 73 | 
1 files changed, 60 insertions, 13 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8d821e45b91..b2b36196b34 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -107,6 +107,7 @@ int sysctl_tcp_early_retrans __read_mostly = 3;  #define FLAG_DATA_SACKED	0x20 /* New SACK.				*/  #define FLAG_ECE		0x40 /* ECE in this ACK				*/  #define FLAG_SLOWPATH		0x100 /* Do not skip RFC checks for window update.*/ +#define FLAG_ORIG_SACK_ACKED	0x200 /* Never retransmitted data are (s)acked	*/  #define FLAG_SND_UNA_ADVANCED	0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */  #define FLAG_DSACKING_ACK	0x800 /* SACK blocks contained D-SACK info */  #define FLAG_SACK_RENEGING	0x2000 /* snd_una advanced to a sacked seq */ @@ -1155,6 +1156,8 @@ static u8 tcp_sacktag_one(struct sock *sk,  					   tcp_highest_sack_seq(tp)))  					state->reord = min(fack_count,  							   state->reord); +				if (!after(end_seq, tp->high_seq)) +					state->flag |= FLAG_ORIG_SACK_ACKED;  			}  			if (sacked & TCPCB_LOST) { @@ -1835,10 +1838,13 @@ void tcp_enter_loss(struct sock *sk, int how)  	const struct inet_connection_sock *icsk = inet_csk(sk);  	struct tcp_sock *tp = tcp_sk(sk);  	struct sk_buff *skb; +	bool new_recovery = false;  	/* Reduce ssthresh if it has not yet been made inside this window. */ -	if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || +	if (icsk->icsk_ca_state <= TCP_CA_Disorder || +	    !after(tp->high_seq, tp->snd_una) ||  	    (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { +		new_recovery = true;  		tp->prior_ssthresh = tcp_current_ssthresh(sk);  		tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);  		tcp_ca_event(sk, CA_EVENT_LOSS); @@ -1883,6 +1889,14 @@ void tcp_enter_loss(struct sock *sk, int how)  	tcp_set_ca_state(sk, TCP_CA_Loss);  	tp->high_seq = tp->snd_nxt;  	TCP_ECN_queue_cwr(tp); + +	/* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous +	 * loss recovery is underway except recurring timeout(s) on +	 * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing +	 */ +	tp->frto = sysctl_tcp_frto && +		   (new_recovery || icsk->icsk_retransmits) && +		   !inet_csk(sk)->icsk_mtup.probe_size;  }  /* If ACK arrived pointing to a remembered SACK, it means that our @@ -2426,12 +2440,12 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)  	return failed;  } -/* Undo during loss recovery after partial ACK. */ -static bool tcp_try_undo_loss(struct sock *sk) +/* Undo during loss recovery after partial ACK or using F-RTO. */ +static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)  {  	struct tcp_sock *tp = tcp_sk(sk); -	if (tcp_may_undo(tp)) { +	if (frto_undo || tcp_may_undo(tp)) {  		struct sk_buff *skb;  		tcp_for_write_queue(skb, sk) {  			if (skb == tcp_send_head(sk)) @@ -2445,9 +2459,12 @@ static bool tcp_try_undo_loss(struct sock *sk)  		tp->lost_out = 0;  		tcp_undo_cwr(sk, true);  		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); +		if (frto_undo) +			NET_INC_STATS_BH(sock_net(sk), +					 LINUX_MIB_TCPSPURIOUSRTOS);  		inet_csk(sk)->icsk_retransmits = 0;  		tp->undo_marker = 0; -		if (tcp_is_sack(tp)) +		if (frto_undo || tcp_is_sack(tp))  			tcp_set_ca_state(sk, TCP_CA_Open);  		return true;  	} @@ -2667,24 +2684,52 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)  /* Process an ACK in CA_Loss state. Move to CA_Open if lost data are   * recovered or spurious. Otherwise retransmits more on partial ACKs.   */ -static void tcp_process_loss(struct sock *sk, int flag) +static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)  {  	struct inet_connection_sock *icsk = inet_csk(sk);  	struct tcp_sock *tp = tcp_sk(sk); +	bool recovered = !before(tp->snd_una, tp->high_seq); -	if (!before(tp->snd_una, tp->high_seq)) { +	if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ +		if (flag & FLAG_ORIG_SACK_ACKED) { +			/* Step 3.b. A timeout is spurious if not all data are +			 * lost, i.e., never-retransmitted data are (s)acked. +			 */ +			tcp_try_undo_loss(sk, true); +			return; +		} +		if (after(tp->snd_nxt, tp->high_seq) && +		    (flag & FLAG_DATA_SACKED || is_dupack)) { +			tp->frto = 0; /* Loss was real: 2nd part of step 3.a */ +		} else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) { +			tp->high_seq = tp->snd_nxt; +			__tcp_push_pending_frames(sk, tcp_current_mss(sk), +						  TCP_NAGLE_OFF); +			if (after(tp->snd_nxt, tp->high_seq)) +				return; /* Step 2.b */ +			tp->frto = 0; +		} +	} + +	if (recovered) { +		/* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */  		icsk->icsk_retransmits = 0;  		tcp_try_undo_recovery(sk);  		return;  	} -  	if (flag & FLAG_DATA_ACKED)  		icsk->icsk_retransmits = 0; -	if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED) -		tcp_reset_reno_sack(tp); -	if (tcp_try_undo_loss(sk)) +	if (tcp_is_reno(tp)) { +		/* A Reno DUPACK means new data in F-RTO step 2.b above are +		 * delivered. Lower inflight to clock out (re)tranmissions. +		 */ +		if (after(tp->snd_nxt, tp->high_seq) && is_dupack) +			tcp_add_reno_sack(sk); +		else if (flag & FLAG_SND_UNA_ADVANCED) +			tcp_reset_reno_sack(tp); +	} +	if (tcp_try_undo_loss(sk, false))  		return; -	tcp_moderate_cwnd(tp);  	tcp_xmit_retransmit_queue(sk);  } @@ -2764,7 +2809,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,  		newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;  		break;  	case TCP_CA_Loss: -		tcp_process_loss(sk, flag); +		tcp_process_loss(sk, flag, is_dupack);  		if (icsk->icsk_ca_state != TCP_CA_Open)  			return;  		/* Fall through to processing in Open state. */ @@ -3003,6 +3048,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,  			}  			if (!(sacked & TCPCB_SACKED_ACKED))  				reord = min(pkts_acked, reord); +			if (!after(scb->end_seq, tp->high_seq)) +				flag |= FLAG_ORIG_SACK_ACKED;  		}  		if (sacked & TCPCB_SACKED_ACKED)  |