diff options
| -rw-r--r-- | include/net/inet_hashtables.h | 48 | ||||
| -rw-r--r-- | include/net/tcp.h | 21 | ||||
| -rw-r--r-- | net/ipv4/inet_hashtables.c | 32 | ||||
| -rw-r--r-- | net/ipv4/tcp_diag.c | 8 | ||||
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 70 | ||||
| -rw-r--r-- | net/ipv6/tcp_ipv6.c | 2 | 
6 files changed, 94 insertions, 87 deletions
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index da07411b36d..f5d65121f7b 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -19,10 +19,14 @@  #include <linux/list.h>  #include <linux/slab.h>  #include <linux/spinlock.h> +#include <linux/tcp.h>		/* only for TCP_LISTEN, damn :-( */  #include <linux/types.h> +#include <linux/wait.h>  #include <net/sock.h> +#include <asm/atomic.h> +  /* This is for all connections with a full identity, no wildcards.   * New scheme, half the table is for TIME_WAIT, the other half is   * for the rest.  I'll experiment with dynamic table growth later. @@ -192,4 +196,48 @@ static inline void inet_inherit_port(struct inet_hashinfo *table,  extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk); +extern void inet_listen_wlock(struct inet_hashinfo *hashinfo); + +/* + * - We may sleep inside this lock. + * - If sleeping is not required (or called from BH), + *   use plain read_(un)lock(&inet_hashinfo.lhash_lock). + */ +static inline void inet_listen_lock(struct inet_hashinfo *hashinfo) +{ +	/* read_lock synchronizes to candidates to writers */ +	read_lock(&hashinfo->lhash_lock); +	atomic_inc(&hashinfo->lhash_users); +	read_unlock(&hashinfo->lhash_lock); +} + +static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo) +{ +	if (atomic_dec_and_test(&hashinfo->lhash_users)) +		wake_up(&hashinfo->lhash_wait); +} + +static inline void __inet_hash(struct inet_hashinfo *hashinfo, +			       struct sock *sk, const int listen_possible) +{ +	struct hlist_head *list; +	rwlock_t *lock; + +	BUG_TRAP(sk_unhashed(sk)); +	if (listen_possible && sk->sk_state == TCP_LISTEN) { +		list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; +		lock = &hashinfo->lhash_lock; +		inet_listen_wlock(hashinfo); +	} else { +		sk->sk_hashent = inet_sk_ehashfn(sk, hashinfo->ehash_size); +		list = &hashinfo->ehash[sk->sk_hashent].chain; +		lock = &hashinfo->ehash[sk->sk_hashent].lock; +		write_lock(lock); +	} +	__sk_add_node(sk, list); +	sock_prot_inc_use(sk->sk_prot); +	write_unlock(lock); +	if (listen_possible && sk->sk_state == TCP_LISTEN) +		wake_up(&hashinfo->lhash_wait); +}  #endif /* _INET_HASHTABLES_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 99e47695d4b..bc110cc7022 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1447,27 +1447,6 @@ static __inline__ void tcp_openreq_init(struct request_sock *req,  extern void tcp_enter_memory_pressure(void); -extern void tcp_listen_wlock(void); - -/* - We may sleep inside this lock. - * - If sleeping is not required (or called from BH), - *   use plain read_(un)lock(&inet_hashinfo.lhash_lock). - */ - -static inline void tcp_listen_lock(void) -{ -	/* read_lock synchronizes to candidates to writers */ -	read_lock(&tcp_hashinfo.lhash_lock); -	atomic_inc(&tcp_hashinfo.lhash_users); -	read_unlock(&tcp_hashinfo.lhash_lock); -} - -static inline void tcp_listen_unlock(void) -{ -	if (atomic_dec_and_test(&tcp_hashinfo.lhash_users)) -		wake_up(&tcp_hashinfo.lhash_wait); -} -  static inline int keepalive_intvl_when(const struct tcp_sock *tp)  {  	return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 33d6cbe32cd..06cbc6f689c 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -15,7 +15,9 @@  #include <linux/config.h>  #include <linux/module.h> +#include <linux/sched.h>  #include <linux/slab.h> +#include <linux/wait.h>  #include <net/inet_hashtables.h> @@ -89,3 +91,33 @@ void inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk)  }  EXPORT_SYMBOL(inet_put_port); + +/* + * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. + * Look, when several writers sleep and reader wakes them up, all but one + * immediately hit write lock and grab all the cpus. Exclusive sleep solves + * this, _but_ remember, it adds useless work on UP machines (wake up each + * exclusive lock release). It should be ifdefed really. + */ +void inet_listen_wlock(struct inet_hashinfo *hashinfo) +{ +	write_lock(&hashinfo->lhash_lock); + +	if (atomic_read(&hashinfo->lhash_users)) { +		DEFINE_WAIT(wait); + +		for (;;) { +			prepare_to_wait_exclusive(&hashinfo->lhash_wait, +						  &wait, TASK_UNINTERRUPTIBLE); +			if (!atomic_read(&hashinfo->lhash_users)) +				break; +			write_unlock_bh(&hashinfo->lhash_lock); +			schedule(); +			write_lock_bh(&hashinfo->lhash_lock); +		} + +		finish_wait(&hashinfo->lhash_wait, &wait); +	} +} + +EXPORT_SYMBOL(inet_listen_wlock); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 0ae738b455f..1a89a03c449 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -589,7 +589,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb)  	if (cb->args[0] == 0) {  		if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV)))  			goto skip_listen_ht; -		tcp_listen_lock(); +		inet_listen_lock(&tcp_hashinfo);  		for (i = s_i; i < INET_LHTABLE_SIZE; i++) {  			struct sock *sk;  			struct hlist_node *node; @@ -613,7 +613,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb)  					goto syn_recv;  				if (tcpdiag_dump_sock(skb, sk, cb) < 0) { -					tcp_listen_unlock(); +					inet_listen_unlock(&tcp_hashinfo);  					goto done;  				} @@ -622,7 +622,7 @@ syn_recv:  					goto next_listen;  				if (tcpdiag_dump_reqs(skb, sk, cb) < 0) { -					tcp_listen_unlock(); +					inet_listen_unlock(&tcp_hashinfo);  					goto done;  				} @@ -636,7 +636,7 @@ next_listen:  			cb->args[3] = 0;  			cb->args[4] = 0;  		} -		tcp_listen_unlock(); +		inet_listen_unlock(&tcp_hashinfo);  skip_listen_ht:  		cb->args[0] = 1;  		s_i = num = s_num = 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f5373f9f00a..5f9ad95304c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -228,62 +228,11 @@ fail:  	return ret;  } -/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. - * Look, when several writers sleep and reader wakes them up, all but one - * immediately hit write lock and grab all the cpus. Exclusive sleep solves - * this, _but_ remember, it adds useless work on UP machines (wake up each - * exclusive lock release). It should be ifdefed really. - */ - -void tcp_listen_wlock(void) -{ -	write_lock(&tcp_hashinfo.lhash_lock); - -	if (atomic_read(&tcp_hashinfo.lhash_users)) { -		DEFINE_WAIT(wait); - -		for (;;) { -			prepare_to_wait_exclusive(&tcp_hashinfo.lhash_wait, -						&wait, TASK_UNINTERRUPTIBLE); -			if (!atomic_read(&tcp_hashinfo.lhash_users)) -				break; -			write_unlock_bh(&tcp_hashinfo.lhash_lock); -			schedule(); -			write_lock_bh(&tcp_hashinfo.lhash_lock); -		} - -		finish_wait(&tcp_hashinfo.lhash_wait, &wait); -	} -} - -static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible) -{ -	struct hlist_head *list; -	rwlock_t *lock; - -	BUG_TRAP(sk_unhashed(sk)); -	if (listen_possible && sk->sk_state == TCP_LISTEN) { -		list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)]; -		lock = &tcp_hashinfo.lhash_lock; -		tcp_listen_wlock(); -	} else { -		sk->sk_hashent = inet_sk_ehashfn(sk, tcp_hashinfo.ehash_size); -		list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; -		lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock; -		write_lock(lock); -	} -	__sk_add_node(sk, list); -	sock_prot_inc_use(sk->sk_prot); -	write_unlock(lock); -	if (listen_possible && sk->sk_state == TCP_LISTEN) -		wake_up(&tcp_hashinfo.lhash_wait); -} -  static void tcp_v4_hash(struct sock *sk)  {  	if (sk->sk_state != TCP_CLOSE) {  		local_bh_disable(); -		__tcp_v4_hash(sk, 1); +		__inet_hash(&tcp_hashinfo, sk, 1);  		local_bh_enable();  	}  } @@ -297,7 +246,7 @@ void tcp_unhash(struct sock *sk)  	if (sk->sk_state == TCP_LISTEN) {  		local_bh_disable(); -		tcp_listen_wlock(); +		inet_listen_wlock(&tcp_hashinfo);  		lock = &tcp_hashinfo.lhash_lock;  	} else {  		struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[sk->sk_hashent]; @@ -624,7 +573,7 @@ ok:   		inet_bind_hash(sk, tb, port);  		if (sk_unhashed(sk)) {   			inet_sk(sk)->sport = htons(port); - 			__tcp_v4_hash(sk, 0); + 			__inet_hash(&tcp_hashinfo, sk, 0);   		}   		spin_unlock(&head->lock); @@ -641,7 +590,7 @@ ok:   	tb  = inet_sk(sk)->bind_hash;  	spin_lock_bh(&head->lock);  	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { -		__tcp_v4_hash(sk, 0); +		__inet_hash(&tcp_hashinfo, sk, 0);  		spin_unlock_bh(&head->lock);  		return 0;  	} else { @@ -1479,7 +1428,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,  	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);  	tcp_initialize_rcv_mss(newsk); -	__tcp_v4_hash(newsk, 0); +	__inet_hash(&tcp_hashinfo, newsk, 0);  	__inet_inherit_port(&tcp_hashinfo, sk, newsk);  	return newsk; @@ -2102,12 +2051,12 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)  	void *rc;  	struct tcp_iter_state* st = seq->private; -	tcp_listen_lock(); +	inet_listen_lock(&tcp_hashinfo);  	st->state = TCP_SEQ_STATE_LISTENING;  	rc	  = listening_get_idx(seq, &pos);  	if (!rc) { -		tcp_listen_unlock(); +		inet_listen_unlock(&tcp_hashinfo);  		local_bh_disable();  		st->state = TCP_SEQ_STATE_ESTABLISHED;  		rc	  = established_get_idx(seq, pos); @@ -2140,7 +2089,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)  	case TCP_SEQ_STATE_LISTENING:  		rc = listening_get_next(seq, v);  		if (!rc) { -			tcp_listen_unlock(); +			inet_listen_unlock(&tcp_hashinfo);  			local_bh_disable();  			st->state = TCP_SEQ_STATE_ESTABLISHED;  			rc	  = established_get_first(seq); @@ -2168,7 +2117,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)  		}  	case TCP_SEQ_STATE_LISTENING:  		if (v != SEQ_START_TOKEN) -			tcp_listen_unlock(); +			inet_listen_unlock(&tcp_hashinfo);  		break;  	case TCP_SEQ_STATE_TIME_WAIT:  	case TCP_SEQ_STATE_ESTABLISHED: @@ -2431,7 +2380,6 @@ void __init tcp_v4_init(struct net_proto_family *ops)  EXPORT_SYMBOL(ipv4_specific);  EXPORT_SYMBOL(inet_bind_bucket_create);  EXPORT_SYMBOL(tcp_hashinfo); -EXPORT_SYMBOL(tcp_listen_wlock);  EXPORT_SYMBOL(tcp_prot);  EXPORT_SYMBOL(tcp_unhash);  EXPORT_SYMBOL(tcp_v4_conn_request); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 362ef5a6406..93a66b9a76e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -229,7 +229,7 @@ static __inline__ void __tcp_v6_hash(struct sock *sk)  	if (sk->sk_state == TCP_LISTEN) {  		list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];  		lock = &tcp_hashinfo.lhash_lock; -		tcp_listen_wlock(); +		inet_listen_wlock(&tcp_hashinfo);  	} else {  		sk->sk_hashent = tcp_v6_sk_hashfn(sk);  		list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;  |