diff options
Diffstat (limited to 'net/ipv4/route.c')
| -rw-r--r-- | net/ipv4/route.c | 124 | 
1 files changed, 113 insertions, 11 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 46af62363b8..bcacf54e541 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -91,6 +91,7 @@  #include <linux/rcupdate.h>  #include <linux/times.h>  #include <linux/slab.h> +#include <linux/prefetch.h>  #include <net/dst.h>  #include <net/net_namespace.h>  #include <net/protocol.h> @@ -108,7 +109,6 @@  #ifdef CONFIG_SYSCTL  #include <linux/sysctl.h>  #endif -#include <net/atmclip.h>  #include <net/secure_seq.h>  #define RT_FL_TOS(oldflp4) \ @@ -120,6 +120,7 @@  static int ip_rt_max_size;  static int ip_rt_gc_timeout __read_mostly	= RT_GC_TIMEOUT; +static int ip_rt_gc_interval __read_mostly  = 60 * HZ;  static int ip_rt_gc_min_interval __read_mostly	= HZ / 2;  static int ip_rt_redirect_number __read_mostly	= 9;  static int ip_rt_redirect_load __read_mostly	= HZ / 50; @@ -133,6 +134,9 @@ static int ip_rt_min_advmss __read_mostly	= 256;  static int rt_chain_length_max __read_mostly	= 20;  static int redirect_genid; +static struct delayed_work expires_work; +static unsigned long expires_ljiffies; +  /*   *	Interface to generic destination cache.   */ @@ -420,7 +424,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)  		int len, HHUptod;  		rcu_read_lock(); -		n = dst_get_neighbour(&r->dst); +		n = dst_get_neighbour_noref(&r->dst);  		HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0;  		rcu_read_unlock(); @@ -830,6 +834,97 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)  	return ONE;  } +static void rt_check_expire(void) +{ +	static unsigned int rover; +	unsigned int i = rover, goal; +	struct rtable *rth; +	struct rtable __rcu **rthp; +	unsigned long samples = 0; +	unsigned long sum = 0, sum2 = 0; +	unsigned long delta; +	u64 mult; + +	delta = jiffies - expires_ljiffies; +	expires_ljiffies = jiffies; +	mult = ((u64)delta) << rt_hash_log; +	if (ip_rt_gc_timeout > 1) +		do_div(mult, ip_rt_gc_timeout); +	goal = (unsigned int)mult; +	if (goal > rt_hash_mask) +		goal = rt_hash_mask + 1; +	for (; goal > 0; goal--) { +		unsigned long tmo = ip_rt_gc_timeout; +		unsigned long length; + +		i = (i + 1) & rt_hash_mask; +		rthp = &rt_hash_table[i].chain; + +		if (need_resched()) +			cond_resched(); + +		samples++; + +		if (rcu_dereference_raw(*rthp) == NULL) +			continue; +		length = 0; +		spin_lock_bh(rt_hash_lock_addr(i)); +		while ((rth = rcu_dereference_protected(*rthp, +					lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { +			prefetch(rth->dst.rt_next); +			if (rt_is_expired(rth)) { +				*rthp = rth->dst.rt_next; +				rt_free(rth); +				continue; +			} +			if (rth->dst.expires) { +				/* Entry is expired even if it is in use */ +				if (time_before_eq(jiffies, rth->dst.expires)) { +nofree: +					tmo >>= 1; +					rthp = &rth->dst.rt_next; +					/* +					 * We only count entries on +					 * a chain with equal hash inputs once +					 * so that entries for different QOS +					 * levels, and other non-hash input +					 * attributes don't unfairly skew +					 * the length computation +					 */ +					length += has_noalias(rt_hash_table[i].chain, rth); +					continue; +				} +			} else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) +				goto nofree; + +			/* Cleanup aged off entries. */ +			*rthp = rth->dst.rt_next; +			rt_free(rth); +		} +		spin_unlock_bh(rt_hash_lock_addr(i)); +		sum += length; +		sum2 += length*length; +	} +	if (samples) { +		unsigned long avg = sum / samples; +		unsigned long sd = int_sqrt(sum2 / samples - avg*avg); +		rt_chain_length_max = max_t(unsigned long, +					ip_rt_gc_elasticity, +					(avg + 4*sd) >> FRACT_BITS); +	} +	rover = i; +} + +/* + * rt_worker_func() is run in process context. + * we call rt_check_expire() to scan part of the hash table + */ +static void rt_worker_func(struct work_struct *work) +{ +	rt_check_expire(); +	schedule_delayed_work(&expires_work, ip_rt_gc_interval); +} +  /*   * Perturbation of rt_genid by a small quantity [1..256]   * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() @@ -1019,23 +1114,18 @@ static int slow_chain_length(const struct rtable *head)  static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr)  { -	struct neigh_table *tbl = &arp_tbl;  	static const __be32 inaddr_any = 0;  	struct net_device *dev = dst->dev;  	const __be32 *pkey = daddr;  	struct neighbour *n; -#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) -	if (dev->type == ARPHRD_ATM) -		tbl = clip_tbl_hook; -#endif  	if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))  		pkey = &inaddr_any; -	n = __ipv4_neigh_lookup(tbl, dev, *(__force u32 *)pkey); +	n = __ipv4_neigh_lookup(&arp_tbl, dev, *(__force u32 *)pkey);  	if (n)  		return n; -	return neigh_create(tbl, pkey, dev); +	return neigh_create(&arp_tbl, pkey, dev);  }  static int rt_bind_neighbour(struct rtable *rt) @@ -1271,7 +1361,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)  {  	struct rtable *rt = (struct rtable *) dst; -	if (rt) { +	if (rt && !(rt->dst.flags & DST_NOPEER)) {  		if (rt->peer == NULL)  			rt_bind_peer(rt, rt->rt_dst, 1); @@ -1282,7 +1372,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)  			iph->id = htons(inet_getid(rt->peer, more));  			return;  		} -	} else +	} else if (!rt)  		printk(KERN_DEBUG "rt_bind_peer(0) @%p\n",  		       __builtin_return_address(0)); @@ -3179,6 +3269,13 @@ static ctl_table ipv4_route_table[] = {  		.proc_handler	= proc_dointvec_jiffies,  	},  	{ +		.procname	= "gc_interval", +		.data		= &ip_rt_gc_interval, +		.maxlen		= sizeof(int), +		.mode		= 0644, +		.proc_handler	= proc_dointvec_jiffies, +	}, +	{  		.procname	= "redirect_load",  		.data		= &ip_rt_redirect_load,  		.maxlen		= sizeof(int), @@ -3388,6 +3485,11 @@ int __init ip_rt_init(void)  	devinet_init();  	ip_fib_init(); +	INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); +	expires_ljiffies = jiffies; +	schedule_delayed_work(&expires_work, +		net_random() % ip_rt_gc_interval + ip_rt_gc_interval); +  	if (ip_rt_proc_init())  		printk(KERN_ERR "Unable to create route proc files\n");  #ifdef CONFIG_XFRM  |