diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-23 11:47:02 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-23 11:47:02 -0700 | 
| commit | 5f05647dd81c11a6a165ccc8f0c1370b16f3bcb0 (patch) | |
| tree | 7851ef1c93aa1aba7ef327ca4b75fd35e6d10f29 /net/core/neighbour.c | |
| parent | 02f36038c568111ad4fc433f6fa760ff5e38fab4 (diff) | |
| parent | ec37a48d1d16c30b655ac5280209edf52a6775d4 (diff) | |
| download | olio-linux-3.10-5f05647dd81c11a6a165ccc8f0c1370b16f3bcb0.tar.xz olio-linux-3.10-5f05647dd81c11a6a165ccc8f0c1370b16f3bcb0.zip  | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1699 commits)
  bnx2/bnx2x: Unsupported Ethtool operations should return -EINVAL.
  vlan: Calling vlan_hwaccel_do_receive() is always valid.
  tproxy: use the interface primary IP address as a default value for --on-ip
  tproxy: added IPv6 support to the socket match
  cxgb3: function namespace cleanup
  tproxy: added IPv6 support to the TPROXY target
  tproxy: added IPv6 socket lookup function to nf_tproxy_core
  be2net: Changes to use only priority codes allowed by f/w
  tproxy: allow non-local binds of IPv6 sockets if IP_TRANSPARENT is enabled
  tproxy: added tproxy sockopt interface in the IPV6 layer
  tproxy: added udp6_lib_lookup function
  tproxy: added const specifiers to udp lookup functions
  tproxy: split off ipv6 defragmentation to a separate module
  l2tp: small cleanup
  nf_nat: restrict ICMP translation for embedded header
  can: mcp251x: fix generation of error frames
  can: mcp251x: fix endless loop in interrupt handler if CANINTF_MERRF is set
  can-raw: add msg_flags to distinguish local traffic
  9p: client code cleanup
  rds: make local functions/variables static
  ...
Fix up conflicts in net/core/dev.c, drivers/net/pcmcia/smc91c92_cs.c and
drivers/net/wireless/ath/ath9k/debug.c as per David
Diffstat (limited to 'net/core/neighbour.c')
| -rw-r--r-- | net/core/neighbour.c | 482 | 
1 files changed, 301 insertions, 181 deletions
diff --git a/net/core/neighbour.c b/net/core/neighbour.c index a4e0a7482c2..8cc8f9a79db 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -122,7 +122,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)  unsigned long neigh_rand_reach_time(unsigned long base)  { -	return (base ? (net_random() % base) + (base >> 1) : 0); +	return base ? (net_random() % base) + (base >> 1) : 0;  }  EXPORT_SYMBOL(neigh_rand_reach_time); @@ -131,15 +131,20 @@ static int neigh_forced_gc(struct neigh_table *tbl)  {  	int shrunk = 0;  	int i; +	struct neigh_hash_table *nht;  	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);  	write_lock_bh(&tbl->lock); -	for (i = 0; i <= tbl->hash_mask; i++) { -		struct neighbour *n, **np; +	nht = rcu_dereference_protected(tbl->nht, +					lockdep_is_held(&tbl->lock)); +	for (i = 0; i <= nht->hash_mask; i++) { +		struct neighbour *n; +		struct neighbour __rcu **np; -		np = &tbl->hash_buckets[i]; -		while ((n = *np) != NULL) { +		np = &nht->hash_buckets[i]; +		while ((n = rcu_dereference_protected(*np, +					lockdep_is_held(&tbl->lock))) != NULL) {  			/* Neighbour record may be discarded if:  			 * - nobody refers to it.  			 * - it is not permanent @@ -147,7 +152,9 @@ static int neigh_forced_gc(struct neigh_table *tbl)  			write_lock(&n->lock);  			if (atomic_read(&n->refcnt) == 1 &&  			    !(n->nud_state & NUD_PERMANENT)) { -				*np	= n->next; +				rcu_assign_pointer(*np, +					rcu_dereference_protected(n->next, +						  lockdep_is_held(&tbl->lock)));  				n->dead = 1;  				shrunk	= 1;  				write_unlock(&n->lock); @@ -199,16 +206,24 @@ static void pneigh_queue_purge(struct sk_buff_head *list)  static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)  {  	int i; +	struct neigh_hash_table *nht; -	for (i = 0; i <= tbl->hash_mask; i++) { -		struct neighbour *n, **np = &tbl->hash_buckets[i]; +	nht = rcu_dereference_protected(tbl->nht, +					lockdep_is_held(&tbl->lock)); -		while ((n = *np) != NULL) { +	for (i = 0; i <= nht->hash_mask; i++) { +		struct neighbour *n; +		struct neighbour __rcu **np = &nht->hash_buckets[i]; + +		while ((n = rcu_dereference_protected(*np, +					lockdep_is_held(&tbl->lock))) != NULL) {  			if (dev && n->dev != dev) {  				np = &n->next;  				continue;  			} -			*np = n->next; +			rcu_assign_pointer(*np, +				   rcu_dereference_protected(n->next, +						lockdep_is_held(&tbl->lock)));  			write_lock(&n->lock);  			neigh_del_timer(n);  			n->dead = 1; @@ -279,6 +294,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)  	skb_queue_head_init(&n->arp_queue);  	rwlock_init(&n->lock); +	seqlock_init(&n->ha_lock);  	n->updated	  = n->used = now;  	n->nud_state	  = NUD_NONE;  	n->output	  = neigh_blackhole; @@ -297,64 +313,86 @@ out_entries:  	goto out;  } -static struct neighbour **neigh_hash_alloc(unsigned int entries) +static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)  { -	unsigned long size = entries * sizeof(struct neighbour *); -	struct neighbour **ret; +	size_t size = entries * sizeof(struct neighbour *); +	struct neigh_hash_table *ret; +	struct neighbour **buckets; -	if (size <= PAGE_SIZE) { -		ret = kzalloc(size, GFP_ATOMIC); -	} else { -		ret = (struct neighbour **) -		      __get_free_pages(GFP_ATOMIC|__GFP_ZERO, get_order(size)); +	ret = kmalloc(sizeof(*ret), GFP_ATOMIC); +	if (!ret) +		return NULL; +	if (size <= PAGE_SIZE) +		buckets = kzalloc(size, GFP_ATOMIC); +	else +		buckets = (struct neighbour **) +			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO, +					   get_order(size)); +	if (!buckets) { +		kfree(ret); +		return NULL;  	} +	rcu_assign_pointer(ret->hash_buckets, buckets); +	ret->hash_mask = entries - 1; +	get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));  	return ret;  } -static void neigh_hash_free(struct neighbour **hash, unsigned int entries) +static void neigh_hash_free_rcu(struct rcu_head *head)  { -	unsigned long size = entries * sizeof(struct neighbour *); +	struct neigh_hash_table *nht = container_of(head, +						    struct neigh_hash_table, +						    rcu); +	size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *); +	struct neighbour **buckets = nht->hash_buckets;  	if (size <= PAGE_SIZE) -		kfree(hash); +		kfree(buckets);  	else -		free_pages((unsigned long)hash, get_order(size)); +		free_pages((unsigned long)buckets, get_order(size)); +	kfree(nht);  } -static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries) +static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, +						unsigned long new_entries)  { -	struct neighbour **new_hash, **old_hash; -	unsigned int i, new_hash_mask, old_entries; +	unsigned int i, hash; +	struct neigh_hash_table *new_nht, *old_nht;  	NEIGH_CACHE_STAT_INC(tbl, hash_grows);  	BUG_ON(!is_power_of_2(new_entries)); -	new_hash = neigh_hash_alloc(new_entries); -	if (!new_hash) -		return; - -	old_entries = tbl->hash_mask + 1; -	new_hash_mask = new_entries - 1; -	old_hash = tbl->hash_buckets; +	old_nht = rcu_dereference_protected(tbl->nht, +					    lockdep_is_held(&tbl->lock)); +	new_nht = neigh_hash_alloc(new_entries); +	if (!new_nht) +		return old_nht; -	get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); -	for (i = 0; i < old_entries; i++) { +	for (i = 0; i <= old_nht->hash_mask; i++) {  		struct neighbour *n, *next; -		for (n = old_hash[i]; n; n = next) { -			unsigned int hash_val = tbl->hash(n->primary_key, n->dev); +		for (n = rcu_dereference_protected(old_nht->hash_buckets[i], +						   lockdep_is_held(&tbl->lock)); +		     n != NULL; +		     n = next) { +			hash = tbl->hash(n->primary_key, n->dev, +					 new_nht->hash_rnd); -			hash_val &= new_hash_mask; -			next = n->next; +			hash &= new_nht->hash_mask; +			next = rcu_dereference_protected(n->next, +						lockdep_is_held(&tbl->lock)); -			n->next = new_hash[hash_val]; -			new_hash[hash_val] = n; +			rcu_assign_pointer(n->next, +					   rcu_dereference_protected( +						new_nht->hash_buckets[hash], +						lockdep_is_held(&tbl->lock))); +			rcu_assign_pointer(new_nht->hash_buckets[hash], n);  		}  	} -	tbl->hash_buckets = new_hash; -	tbl->hash_mask = new_hash_mask; -	neigh_hash_free(old_hash, old_entries); +	rcu_assign_pointer(tbl->nht, new_nht); +	call_rcu(&old_nht->rcu, neigh_hash_free_rcu); +	return new_nht;  }  struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, @@ -363,19 +401,26 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,  	struct neighbour *n;  	int key_len = tbl->key_len;  	u32 hash_val; +	struct neigh_hash_table *nht;  	NEIGH_CACHE_STAT_INC(tbl, lookups); -	read_lock_bh(&tbl->lock); -	hash_val = tbl->hash(pkey, dev); -	for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { +	rcu_read_lock_bh(); +	nht = rcu_dereference_bh(tbl->nht); +	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask; + +	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); +	     n != NULL; +	     n = rcu_dereference_bh(n->next)) {  		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) { -			neigh_hold(n); +			if (!atomic_inc_not_zero(&n->refcnt)) +				n = NULL;  			NEIGH_CACHE_STAT_INC(tbl, hits);  			break;  		}  	} -	read_unlock_bh(&tbl->lock); + +	rcu_read_unlock_bh();  	return n;  }  EXPORT_SYMBOL(neigh_lookup); @@ -386,20 +431,27 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,  	struct neighbour *n;  	int key_len = tbl->key_len;  	u32 hash_val; +	struct neigh_hash_table *nht;  	NEIGH_CACHE_STAT_INC(tbl, lookups); -	read_lock_bh(&tbl->lock); -	hash_val = tbl->hash(pkey, NULL); -	for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { +	rcu_read_lock_bh(); +	nht = rcu_dereference_bh(tbl->nht); +	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask; + +	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); +	     n != NULL; +	     n = rcu_dereference_bh(n->next)) {  		if (!memcmp(n->primary_key, pkey, key_len) &&  		    net_eq(dev_net(n->dev), net)) { -			neigh_hold(n); +			if (!atomic_inc_not_zero(&n->refcnt)) +				n = NULL;  			NEIGH_CACHE_STAT_INC(tbl, hits);  			break;  		}  	} -	read_unlock_bh(&tbl->lock); + +	rcu_read_unlock_bh();  	return n;  }  EXPORT_SYMBOL(neigh_lookup_nodev); @@ -411,6 +463,7 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,  	int key_len = tbl->key_len;  	int error;  	struct neighbour *n1, *rc, *n = neigh_alloc(tbl); +	struct neigh_hash_table *nht;  	if (!n) {  		rc = ERR_PTR(-ENOBUFS); @@ -437,18 +490,24 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,  	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);  	write_lock_bh(&tbl->lock); +	nht = rcu_dereference_protected(tbl->nht, +					lockdep_is_held(&tbl->lock)); -	if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1)) -		neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1); +	if (atomic_read(&tbl->entries) > (nht->hash_mask + 1)) +		nht = neigh_hash_grow(tbl, (nht->hash_mask + 1) << 1); -	hash_val = tbl->hash(pkey, dev) & tbl->hash_mask; +	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;  	if (n->parms->dead) {  		rc = ERR_PTR(-EINVAL);  		goto out_tbl_unlock;  	} -	for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) { +	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val], +					    lockdep_is_held(&tbl->lock)); +	     n1 != NULL; +	     n1 = rcu_dereference_protected(n1->next, +			lockdep_is_held(&tbl->lock))) {  		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {  			neigh_hold(n1);  			rc = n1; @@ -456,10 +515,12 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,  		}  	} -	n->next = tbl->hash_buckets[hash_val]; -	tbl->hash_buckets[hash_val] = n;  	n->dead = 0;  	neigh_hold(n); +	rcu_assign_pointer(n->next, +			   rcu_dereference_protected(nht->hash_buckets[hash_val], +						     lockdep_is_held(&tbl->lock))); +	rcu_assign_pointer(nht->hash_buckets[hash_val], n);  	write_unlock_bh(&tbl->lock);  	NEIGH_PRINTK2("neigh %p is created.\n", n);  	rc = n; @@ -616,6 +677,12 @@ static inline void neigh_parms_put(struct neigh_parms *parms)  		neigh_parms_destroy(parms);  } +static void neigh_destroy_rcu(struct rcu_head *head) +{ +	struct neighbour *neigh = container_of(head, struct neighbour, rcu); + +	kmem_cache_free(neigh->tbl->kmem_cachep, neigh); +}  /*   *	neighbour must already be out of the table;   * @@ -643,8 +710,7 @@ void neigh_destroy(struct neighbour *neigh)  		write_seqlock_bh(&hh->hh_lock);  		hh->hh_output = neigh_blackhole;  		write_sequnlock_bh(&hh->hh_lock); -		if (atomic_dec_and_test(&hh->hh_refcnt)) -			kfree(hh); +		hh_cache_put(hh);  	}  	skb_queue_purge(&neigh->arp_queue); @@ -655,7 +721,7 @@ void neigh_destroy(struct neighbour *neigh)  	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);  	atomic_dec(&neigh->tbl->entries); -	kmem_cache_free(neigh->tbl->kmem_cachep, neigh); +	call_rcu(&neigh->rcu, neigh_destroy_rcu);  }  EXPORT_SYMBOL(neigh_destroy); @@ -696,12 +762,16 @@ static void neigh_connect(struct neighbour *neigh)  static void neigh_periodic_work(struct work_struct *work)  {  	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); -	struct neighbour *n, **np; +	struct neighbour *n; +	struct neighbour __rcu **np;  	unsigned int i; +	struct neigh_hash_table *nht;  	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);  	write_lock_bh(&tbl->lock); +	nht = rcu_dereference_protected(tbl->nht, +					lockdep_is_held(&tbl->lock));  	/*  	 *	periodically recompute ReachableTime from random function @@ -715,10 +785,11 @@ static void neigh_periodic_work(struct work_struct *work)  				neigh_rand_reach_time(p->base_reachable_time);  	} -	for (i = 0 ; i <= tbl->hash_mask; i++) { -		np = &tbl->hash_buckets[i]; +	for (i = 0 ; i <= nht->hash_mask; i++) { +		np = &nht->hash_buckets[i]; -		while ((n = *np) != NULL) { +		while ((n = rcu_dereference_protected(*np, +				lockdep_is_held(&tbl->lock))) != NULL) {  			unsigned int state;  			write_lock(&n->lock); @@ -766,9 +837,9 @@ next_elt:  static __inline__ int neigh_max_probes(struct neighbour *n)  {  	struct neigh_parms *p = n->parms; -	return (n->nud_state & NUD_PROBE ? +	return (n->nud_state & NUD_PROBE) ?  		p->ucast_probes : -		p->ucast_probes + p->app_probes + p->mcast_probes); +		p->ucast_probes + p->app_probes + p->mcast_probes;  }  static void neigh_invalidate(struct neighbour *neigh) @@ -945,7 +1016,7 @@ out_unlock_bh:  }  EXPORT_SYMBOL(__neigh_event_send); -static void neigh_update_hhs(struct neighbour *neigh) +static void neigh_update_hhs(const struct neighbour *neigh)  {  	struct hh_cache *hh;  	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) @@ -1081,7 +1152,9 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,  	}  	if (lladdr != neigh->ha) { +		write_seqlock(&neigh->ha_lock);  		memcpy(&neigh->ha, lladdr, dev->addr_len); +		write_sequnlock(&neigh->ha_lock);  		neigh_update_hhs(neigh);  		if (!(new & NUD_CONNECTED))  			neigh->confirmed = jiffies - @@ -1139,44 +1212,73 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,  }  EXPORT_SYMBOL(neigh_event_ns); +static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst, +				   __be16 protocol) +{ +	struct hh_cache *hh; + +	smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */ +	for (hh = n->hh; hh; hh = hh->hh_next) { +		if (hh->hh_type == protocol) { +			atomic_inc(&hh->hh_refcnt); +			if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL)) +				hh_cache_put(hh); +			return true; +		} +	} +	return false; +} + +/* called with read_lock_bh(&n->lock); */  static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,  			  __be16 protocol)  {  	struct hh_cache	*hh;  	struct net_device *dev = dst->dev; -	for (hh = n->hh; hh; hh = hh->hh_next) -		if (hh->hh_type == protocol) -			break; +	if (likely(neigh_hh_lookup(n, dst, protocol))) +		return; -	if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) { -		seqlock_init(&hh->hh_lock); -		hh->hh_type = protocol; -		atomic_set(&hh->hh_refcnt, 0); -		hh->hh_next = NULL; +	/* slow path */ +	hh = kzalloc(sizeof(*hh), GFP_ATOMIC); +	if (!hh) +		return; -		if (dev->header_ops->cache(n, hh)) { -			kfree(hh); -			hh = NULL; -		} else { -			atomic_inc(&hh->hh_refcnt); -			hh->hh_next = n->hh; -			n->hh	    = hh; -			if (n->nud_state & NUD_CONNECTED) -				hh->hh_output = n->ops->hh_output; -			else -				hh->hh_output = n->ops->output; -		} +	seqlock_init(&hh->hh_lock); +	hh->hh_type = protocol; +	atomic_set(&hh->hh_refcnt, 2); + +	if (dev->header_ops->cache(n, hh)) { +		kfree(hh); +		return;  	} -	if (hh)	{ -		atomic_inc(&hh->hh_refcnt); -		dst->hh = hh; + +	write_lock_bh(&n->lock); + +	/* must check if another thread already did the insert */ +	if (neigh_hh_lookup(n, dst, protocol)) { +		kfree(hh); +		goto end;  	} + +	if (n->nud_state & NUD_CONNECTED) +		hh->hh_output = n->ops->hh_output; +	else +		hh->hh_output = n->ops->output; + +	hh->hh_next = n->hh; +	smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */ +	n->hh	    = hh; + +	if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL)) +		hh_cache_put(hh); +end: +	write_unlock_bh(&n->lock);  }  /* This function can be used in contexts, where only old dev_queue_xmit -   worked, f.e. if you want to override normal output path (eql, shaper), -   but resolution is not made yet. + * worked, f.e. if you want to override normal output path (eql, shaper), + * but resolution is not made yet.   */  int neigh_compat_output(struct sk_buff *skb) @@ -1210,19 +1312,19 @@ int neigh_resolve_output(struct sk_buff *skb)  	if (!neigh_event_send(neigh, skb)) {  		int err;  		struct net_device *dev = neigh->dev; -		if (dev->header_ops->cache && !dst->hh) { -			write_lock_bh(&neigh->lock); -			if (!dst->hh) -				neigh_hh_init(neigh, dst, dst->ops->protocol); -			err = dev_hard_header(skb, dev, ntohs(skb->protocol), -					      neigh->ha, NULL, skb->len); -			write_unlock_bh(&neigh->lock); -		} else { -			read_lock_bh(&neigh->lock); +		unsigned int seq; + +		if (dev->header_ops->cache && +		    !dst->hh && +		    !(dst->flags & DST_NOCACHE)) +			neigh_hh_init(neigh, dst, dst->ops->protocol); + +		do { +			seq = read_seqbegin(&neigh->ha_lock);  			err = dev_hard_header(skb, dev, ntohs(skb->protocol),  					      neigh->ha, NULL, skb->len); -			read_unlock_bh(&neigh->lock); -		} +		} while (read_seqretry(&neigh->ha_lock, seq)); +  		if (err >= 0)  			rc = neigh->ops->queue_xmit(skb);  		else @@ -1248,13 +1350,16 @@ int neigh_connected_output(struct sk_buff *skb)  	struct dst_entry *dst = skb_dst(skb);  	struct neighbour *neigh = dst->neighbour;  	struct net_device *dev = neigh->dev; +	unsigned int seq;  	__skb_pull(skb, skb_network_offset(skb)); -	read_lock_bh(&neigh->lock); -	err = dev_hard_header(skb, dev, ntohs(skb->protocol), -			      neigh->ha, NULL, skb->len); -	read_unlock_bh(&neigh->lock); +	do { +		seq = read_seqbegin(&neigh->ha_lock); +		err = dev_hard_header(skb, dev, ntohs(skb->protocol), +				      neigh->ha, NULL, skb->len); +	} while (read_seqretry(&neigh->ha_lock, seq)); +  	if (err >= 0)  		err = neigh->ops->queue_xmit(skb);  	else { @@ -1436,17 +1541,14 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)  		panic("cannot create neighbour proc dir entry");  #endif -	tbl->hash_mask = 1; -	tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1); +	tbl->nht = neigh_hash_alloc(8);  	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);  	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); -	if (!tbl->hash_buckets || !tbl->phash_buckets) +	if (!tbl->nht || !tbl->phash_buckets)  		panic("cannot allocate neighbour cache hashes"); -	get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); -  	rwlock_init(&tbl->lock);  	INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);  	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time); @@ -1486,8 +1588,7 @@ int neigh_table_clear(struct neigh_table *tbl)  	struct neigh_table **tp;  	/* It is not clean... Fix it to unload IPv6 module safely */ -	cancel_delayed_work(&tbl->gc_work); -	flush_scheduled_work(); +	cancel_delayed_work_sync(&tbl->gc_work);  	del_timer_sync(&tbl->proxy_timer);  	pneigh_queue_purge(&tbl->proxy_queue);  	neigh_ifdown(tbl, NULL); @@ -1502,8 +1603,8 @@ int neigh_table_clear(struct neigh_table *tbl)  	}  	write_unlock(&neigh_tbl_lock); -	neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1); -	tbl->hash_buckets = NULL; +	call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu); +	tbl->nht = NULL;  	kfree(tbl->phash_buckets);  	tbl->phash_buckets = NULL; @@ -1529,6 +1630,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)  	struct net_device *dev = NULL;  	int err = -EINVAL; +	ASSERT_RTNL();  	if (nlmsg_len(nlh) < sizeof(*ndm))  		goto out; @@ -1538,7 +1640,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)  	ndm = nlmsg_data(nlh);  	if (ndm->ndm_ifindex) { -		dev = dev_get_by_index(net, ndm->ndm_ifindex); +		dev = __dev_get_by_index(net, ndm->ndm_ifindex);  		if (dev == NULL) {  			err = -ENODEV;  			goto out; @@ -1554,34 +1656,31 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)  		read_unlock(&neigh_tbl_lock);  		if (nla_len(dst_attr) < tbl->key_len) -			goto out_dev_put; +			goto out;  		if (ndm->ndm_flags & NTF_PROXY) {  			err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); -			goto out_dev_put; +			goto out;  		}  		if (dev == NULL) -			goto out_dev_put; +			goto out;  		neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);  		if (neigh == NULL) {  			err = -ENOENT; -			goto out_dev_put; +			goto out;  		}  		err = neigh_update(neigh, NULL, NUD_FAILED,  				   NEIGH_UPDATE_F_OVERRIDE |  				   NEIGH_UPDATE_F_ADMIN);  		neigh_release(neigh); -		goto out_dev_put; +		goto out;  	}  	read_unlock(&neigh_tbl_lock);  	err = -EAFNOSUPPORT; -out_dev_put: -	if (dev) -		dev_put(dev);  out:  	return err;  } @@ -1595,6 +1694,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)  	struct net_device *dev = NULL;  	int err; +	ASSERT_RTNL();  	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);  	if (err < 0)  		goto out; @@ -1605,14 +1705,14 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)  	ndm = nlmsg_data(nlh);  	if (ndm->ndm_ifindex) { -		dev = dev_get_by_index(net, ndm->ndm_ifindex); +		dev = __dev_get_by_index(net, ndm->ndm_ifindex);  		if (dev == NULL) {  			err = -ENODEV;  			goto out;  		}  		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) -			goto out_dev_put; +			goto out;  	}  	read_lock(&neigh_tbl_lock); @@ -1626,7 +1726,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)  		read_unlock(&neigh_tbl_lock);  		if (nla_len(tb[NDA_DST]) < tbl->key_len) -			goto out_dev_put; +			goto out;  		dst = nla_data(tb[NDA_DST]);  		lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; @@ -1639,29 +1739,29 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)  				pn->flags = ndm->ndm_flags;  				err = 0;  			} -			goto out_dev_put; +			goto out;  		}  		if (dev == NULL) -			goto out_dev_put; +			goto out;  		neigh = neigh_lookup(tbl, dst, dev);  		if (neigh == NULL) {  			if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {  				err = -ENOENT; -				goto out_dev_put; +				goto out;  			}  			neigh = __neigh_lookup_errno(tbl, dst, dev);  			if (IS_ERR(neigh)) {  				err = PTR_ERR(neigh); -				goto out_dev_put; +				goto out;  			}  		} else {  			if (nlh->nlmsg_flags & NLM_F_EXCL) {  				err = -EEXIST;  				neigh_release(neigh); -				goto out_dev_put; +				goto out;  			}  			if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) @@ -1674,15 +1774,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)  		} else  			err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);  		neigh_release(neigh); -		goto out_dev_put; +		goto out;  	}  	read_unlock(&neigh_tbl_lock);  	err = -EAFNOSUPPORT; - -out_dev_put: -	if (dev) -		dev_put(dev);  out:  	return err;  } @@ -1748,18 +1844,22 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,  		unsigned long now = jiffies;  		unsigned int flush_delta = now - tbl->last_flush;  		unsigned int rand_delta = now - tbl->last_rand; - +		struct neigh_hash_table *nht;  		struct ndt_config ndc = {  			.ndtc_key_len		= tbl->key_len,  			.ndtc_entry_size	= tbl->entry_size,  			.ndtc_entries		= atomic_read(&tbl->entries),  			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),  			.ndtc_last_rand		= jiffies_to_msecs(rand_delta), -			.ndtc_hash_rnd		= tbl->hash_rnd, -			.ndtc_hash_mask		= tbl->hash_mask,  			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,  		}; +		rcu_read_lock_bh(); +		nht = rcu_dereference_bh(tbl->nht); +		ndc.ndtc_hash_rnd = nht->hash_rnd; +		ndc.ndtc_hash_mask = nht->hash_mask; +		rcu_read_unlock_bh(); +  		NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);  	} @@ -2056,10 +2156,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,  	read_lock_bh(&neigh->lock);  	ndm->ndm_state	 = neigh->nud_state; -	if ((neigh->nud_state & NUD_VALID) && -	    nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) { -		read_unlock_bh(&neigh->lock); -		goto nla_put_failure; +	if (neigh->nud_state & NUD_VALID) { +		char haddr[MAX_ADDR_LEN]; + +		neigh_ha_snapshot(haddr, neigh, neigh->dev); +		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) { +			read_unlock_bh(&neigh->lock); +			goto nla_put_failure; +		}  	}  	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used); @@ -2087,18 +2191,23 @@ static void neigh_update_notify(struct neighbour *neigh)  static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,  			    struct netlink_callback *cb)  { -	struct net * net = sock_net(skb->sk); +	struct net *net = sock_net(skb->sk);  	struct neighbour *n;  	int rc, h, s_h = cb->args[1];  	int idx, s_idx = idx = cb->args[2]; +	struct neigh_hash_table *nht; -	read_lock_bh(&tbl->lock); -	for (h = 0; h <= tbl->hash_mask; h++) { +	rcu_read_lock_bh(); +	nht = rcu_dereference_bh(tbl->nht); + +	for (h = 0; h <= nht->hash_mask; h++) {  		if (h < s_h)  			continue;  		if (h > s_h)  			s_idx = 0; -		for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) { +		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; +		     n != NULL; +		     n = rcu_dereference_bh(n->next)) {  			if (!net_eq(dev_net(n->dev), net))  				continue;  			if (idx < s_idx) @@ -2107,17 +2216,16 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,  					    cb->nlh->nlmsg_seq,  					    RTM_NEWNEIGH,  					    NLM_F_MULTI) <= 0) { -				read_unlock_bh(&tbl->lock);  				rc = -1;  				goto out;  			} -		next: +next:  			idx++;  		}  	} -	read_unlock_bh(&tbl->lock);  	rc = skb->len;  out: +	rcu_read_unlock_bh();  	cb->args[1] = h;  	cb->args[2] = idx;  	return rc; @@ -2150,15 +2258,22 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)  void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)  {  	int chain; +	struct neigh_hash_table *nht; -	read_lock_bh(&tbl->lock); -	for (chain = 0; chain <= tbl->hash_mask; chain++) { +	rcu_read_lock_bh(); +	nht = rcu_dereference_bh(tbl->nht); + +	read_lock(&tbl->lock); /* avoid resizes */ +	for (chain = 0; chain <= nht->hash_mask; chain++) {  		struct neighbour *n; -		for (n = tbl->hash_buckets[chain]; n; n = n->next) +		for (n = rcu_dereference_bh(nht->hash_buckets[chain]); +		     n != NULL; +		     n = rcu_dereference_bh(n->next))  			cb(n, cookie);  	} -	read_unlock_bh(&tbl->lock); +	read_unlock(&tbl->lock); +	rcu_read_unlock_bh();  }  EXPORT_SYMBOL(neigh_for_each); @@ -2167,18 +2282,25 @@ void __neigh_for_each_release(struct neigh_table *tbl,  			      int (*cb)(struct neighbour *))  {  	int chain; +	struct neigh_hash_table *nht; -	for (chain = 0; chain <= tbl->hash_mask; chain++) { -		struct neighbour *n, **np; +	nht = rcu_dereference_protected(tbl->nht, +					lockdep_is_held(&tbl->lock)); +	for (chain = 0; chain <= nht->hash_mask; chain++) { +		struct neighbour *n; +		struct neighbour __rcu **np; -		np = &tbl->hash_buckets[chain]; -		while ((n = *np) != NULL) { +		np = &nht->hash_buckets[chain]; +		while ((n = rcu_dereference_protected(*np, +					lockdep_is_held(&tbl->lock))) != NULL) {  			int release;  			write_lock(&n->lock);  			release = cb(n);  			if (release) { -				*np = n->next; +				rcu_assign_pointer(*np, +					rcu_dereference_protected(n->next, +						lockdep_is_held(&tbl->lock)));  				n->dead = 1;  			} else  				np = &n->next; @@ -2196,13 +2318,13 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)  {  	struct neigh_seq_state *state = seq->private;  	struct net *net = seq_file_net(seq); -	struct neigh_table *tbl = state->tbl; +	struct neigh_hash_table *nht = state->nht;  	struct neighbour *n = NULL;  	int bucket = state->bucket;  	state->flags &= ~NEIGH_SEQ_IS_PNEIGH; -	for (bucket = 0; bucket <= tbl->hash_mask; bucket++) { -		n = tbl->hash_buckets[bucket]; +	for (bucket = 0; bucket <= nht->hash_mask; bucket++) { +		n = rcu_dereference_bh(nht->hash_buckets[bucket]);  		while (n) {  			if (!net_eq(dev_net(n->dev), net)) @@ -2219,8 +2341,8 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)  				break;  			if (n->nud_state & ~NUD_NOARP)  				break; -		next: -			n = n->next; +next: +			n = rcu_dereference_bh(n->next);  		}  		if (n) @@ -2237,14 +2359,14 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,  {  	struct neigh_seq_state *state = seq->private;  	struct net *net = seq_file_net(seq); -	struct neigh_table *tbl = state->tbl; +	struct neigh_hash_table *nht = state->nht;  	if (state->neigh_sub_iter) {  		void *v = state->neigh_sub_iter(state, n, pos);  		if (v)  			return n;  	} -	n = n->next; +	n = rcu_dereference_bh(n->next);  	while (1) {  		while (n) { @@ -2261,17 +2383,17 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,  			if (n->nud_state & ~NUD_NOARP)  				break; -		next: -			n = n->next; +next: +			n = rcu_dereference_bh(n->next);  		}  		if (n)  			break; -		if (++state->bucket > tbl->hash_mask) +		if (++state->bucket > nht->hash_mask)  			break; -		n = tbl->hash_buckets[state->bucket]; +		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);  	}  	if (n && pos) @@ -2369,7 +2491,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)  }  void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) -	__acquires(tbl->lock) +	__acquires(rcu_bh)  {  	struct neigh_seq_state *state = seq->private; @@ -2377,7 +2499,8 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl  	state->bucket = 0;  	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); -	read_lock_bh(&tbl->lock); +	rcu_read_lock_bh(); +	state->nht = rcu_dereference_bh(tbl->nht);  	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;  } @@ -2411,12 +2534,9 @@ out:  EXPORT_SYMBOL(neigh_seq_next);  void neigh_seq_stop(struct seq_file *seq, void *v) -	__releases(tbl->lock) +	__releases(rcu_bh)  { -	struct neigh_seq_state *state = seq->private; -	struct neigh_table *tbl = state->tbl; - -	read_unlock_bh(&tbl->lock); +	rcu_read_unlock_bh();  }  EXPORT_SYMBOL(neigh_seq_stop);  |