diff options
Diffstat (limited to 'net')
| -rw-r--r-- | net/ipv4/proc.c | 1 | ||||
| -rw-r--r-- | net/ipv4/tcp_input.c | 39 | ||||
| -rw-r--r-- | net/ipv4/tcp_minisocks.c | 1 | ||||
| -rw-r--r-- | net/ipv4/tcp_output.c | 9 | ||||
| -rw-r--r-- | net/ipv4/tcp_timer.c | 2 | 
5 files changed, 52 insertions, 0 deletions
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 4c35911d935..b6f2ea17489 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -225,6 +225,7 @@ static const struct snmp_mib snmp4_net_list[] = {  	SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS),  	SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS),  	SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES), +	SNMP_MIB_ITEM("TCPLossProbeRecovery", LINUX_MIB_TCPLOSSPROBERECOVERY),  	SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL),  	SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL),  	SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED), diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b794f89ac1f..836d74dd018 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2682,6 +2682,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)  	struct tcp_sock *tp = tcp_sk(sk);  	tp->high_seq = tp->snd_nxt; +	tp->tlp_high_seq = 0;  	tp->snd_cwnd_cnt = 0;  	tp->prior_cwnd = tp->snd_cwnd;  	tp->prr_delivered = 0; @@ -3569,6 +3570,38 @@ static void tcp_send_challenge_ack(struct sock *sk)  	}  } +/* This routine deals with acks during a TLP episode. + * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. + */ +static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) +{ +	struct tcp_sock *tp = tcp_sk(sk); +	bool is_tlp_dupack = (ack == tp->tlp_high_seq) && +			     !(flag & (FLAG_SND_UNA_ADVANCED | +				       FLAG_NOT_DUP | FLAG_DATA_SACKED)); + +	/* Mark the end of TLP episode on receiving TLP dupack or when +	 * ack is after tlp_high_seq. +	 */ +	if (is_tlp_dupack) { +		tp->tlp_high_seq = 0; +		return; +	} + +	if (after(ack, tp->tlp_high_seq)) { +		tp->tlp_high_seq = 0; +		/* Don't reduce cwnd if DSACK arrives for TLP retrans. */ +		if (!(flag & FLAG_DSACKING_ACK)) { +			tcp_init_cwnd_reduction(sk, true); +			tcp_set_ca_state(sk, TCP_CA_CWR); +			tcp_end_cwnd_reduction(sk); +			tcp_set_ca_state(sk, TCP_CA_Open); +			NET_INC_STATS_BH(sock_net(sk), +					 LINUX_MIB_TCPLOSSPROBERECOVERY); +		} +	} +} +  /* This routine deals with incoming acks, but not outgoing ones. */  static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)  { @@ -3676,6 +3709,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)  			tcp_cong_avoid(sk, ack, prior_in_flight);  	} +	if (tp->tlp_high_seq) +		tcp_process_tlp_ack(sk, ack, flag); +  	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {  		struct dst_entry *dst = __sk_dst_get(sk);  		if (dst) @@ -3697,6 +3733,9 @@ no_queue:  	 */  	if (tcp_send_head(sk))  		tcp_ack_probe(sk); + +	if (tp->tlp_high_seq) +		tcp_process_tlp_ack(sk, ack, flag);  	return 1;  invalid_ack: diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b83a49cc381..4bdb09fca40 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -440,6 +440,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,  		newtp->fackets_out = 0;  		newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;  		tcp_enable_early_retrans(newtp); +		newtp->tlp_high_seq = 0;  		/* So many TCP implementations out there (incorrectly) count the  		 * initial SYN frame in their delayed-ACK and congestion control diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index beb63dbc85f..8e7742f0b5d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2132,6 +2132,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)   */  void tcp_send_loss_probe(struct sock *sk)  { +	struct tcp_sock *tp = tcp_sk(sk);  	struct sk_buff *skb;  	int pcount;  	int mss = tcp_current_mss(sk); @@ -2142,6 +2143,10 @@ void tcp_send_loss_probe(struct sock *sk)  		goto rearm_timer;  	} +	/* At most one outstanding TLP retransmission. */ +	if (tp->tlp_high_seq) +		goto rearm_timer; +  	/* Retransmit last segment. */  	skb = tcp_write_queue_tail(sk);  	if (WARN_ON(!skb)) @@ -2164,6 +2169,10 @@ void tcp_send_loss_probe(struct sock *sk)  	if (skb->len > 0)  		err = __tcp_retransmit_skb(sk, skb); +	/* Record snd_nxt for loss detection. */ +	if (likely(!err)) +		tp->tlp_high_seq = tp->snd_nxt; +  rearm_timer:  	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,  				  inet_csk(sk)->icsk_rto, diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index ecd61d54147..eeccf795e91 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -356,6 +356,8 @@ void tcp_retransmit_timer(struct sock *sk)  	WARN_ON(tcp_write_queue_empty(sk)); +	tp->tlp_high_seq = 0; +  	if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&  	    !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {  		/* Receiver dastardly shrinks window. Our retransmits  |