diff options
Diffstat (limited to 'net/xfrm/xfrm_policy.c')
| -rw-r--r-- | net/xfrm/xfrm_policy.c | 845 | 
1 files changed, 440 insertions, 405 deletions
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 843e066649c..7430ac26ec4 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -37,6 +37,8 @@  DEFINE_MUTEX(xfrm_cfg_mutex);  EXPORT_SYMBOL(xfrm_cfg_mutex); +static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock); +static struct dst_entry *xfrm_policy_sk_bundles;  static DEFINE_RWLOCK(xfrm_policy_lock);  static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); @@ -44,12 +46,10 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];  static struct kmem_cache *xfrm_dst_cache __read_mostly; -static HLIST_HEAD(xfrm_policy_gc_list); -static DEFINE_SPINLOCK(xfrm_policy_gc_lock); -  static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);  static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);  static void xfrm_init_pmtu(struct dst_entry *dst); +static int stale_bundle(struct dst_entry *dst);  static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,  						int dir); @@ -156,7 +156,7 @@ static void xfrm_policy_timer(unsigned long data)  	read_lock(&xp->lock); -	if (xp->walk.dead) +	if (unlikely(xp->walk.dead))  		goto out;  	dir = xfrm_policy_id2dir(xp->index); @@ -216,6 +216,35 @@ expired:  	xfrm_pol_put(xp);  } +static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo) +{ +	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); + +	if (unlikely(pol->walk.dead)) +		flo = NULL; +	else +		xfrm_pol_hold(pol); + +	return flo; +} + +static int xfrm_policy_flo_check(struct flow_cache_object *flo) +{ +	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); + +	return !pol->walk.dead; +} + +static void xfrm_policy_flo_delete(struct flow_cache_object *flo) +{ +	xfrm_pol_put(container_of(flo, struct xfrm_policy, flo)); +} + +static const struct flow_cache_ops xfrm_policy_fc_ops = { +	.get = xfrm_policy_flo_get, +	.check = xfrm_policy_flo_check, +	.delete = xfrm_policy_flo_delete, +};  /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2   * SPD calls. @@ -236,6 +265,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)  		atomic_set(&policy->refcnt, 1);  		setup_timer(&policy->timer, xfrm_policy_timer,  				(unsigned long)policy); +		policy->flo.ops = &xfrm_policy_fc_ops;  	}  	return policy;  } @@ -247,8 +277,6 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)  {  	BUG_ON(!policy->walk.dead); -	BUG_ON(policy->bundles); -  	if (del_timer(&policy->timer))  		BUG(); @@ -257,63 +285,20 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)  }  EXPORT_SYMBOL(xfrm_policy_destroy); -static void xfrm_policy_gc_kill(struct xfrm_policy *policy) -{ -	struct dst_entry *dst; - -	while ((dst = policy->bundles) != NULL) { -		policy->bundles = dst->next; -		dst_free(dst); -	} - -	if (del_timer(&policy->timer)) -		atomic_dec(&policy->refcnt); - -	if (atomic_read(&policy->refcnt) > 1) -		flow_cache_flush(); - -	xfrm_pol_put(policy); -} - -static void xfrm_policy_gc_task(struct work_struct *work) -{ -	struct xfrm_policy *policy; -	struct hlist_node *entry, *tmp; -	struct hlist_head gc_list; - -	spin_lock_bh(&xfrm_policy_gc_lock); -	gc_list.first = xfrm_policy_gc_list.first; -	INIT_HLIST_HEAD(&xfrm_policy_gc_list); -	spin_unlock_bh(&xfrm_policy_gc_lock); - -	hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst) -		xfrm_policy_gc_kill(policy); -} -static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task); -  /* Rule must be locked. Release descentant resources, announce   * entry dead. The rule must be unlinked from lists to the moment.   */  static void xfrm_policy_kill(struct xfrm_policy *policy)  { -	int dead; - -	write_lock_bh(&policy->lock); -	dead = policy->walk.dead;  	policy->walk.dead = 1; -	write_unlock_bh(&policy->lock); -	if (unlikely(dead)) { -		WARN_ON(1); -		return; -	} +	atomic_inc(&policy->genid); -	spin_lock_bh(&xfrm_policy_gc_lock); -	hlist_add_head(&policy->bydst, &xfrm_policy_gc_list); -	spin_unlock_bh(&xfrm_policy_gc_lock); +	if (del_timer(&policy->timer)) +		xfrm_pol_put(policy); -	schedule_work(&xfrm_policy_gc_work); +	xfrm_pol_put(policy);  }  static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; @@ -555,7 +540,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)  	struct xfrm_policy *delpol;  	struct hlist_head *chain;  	struct hlist_node *entry, *newpos; -	struct dst_entry *gc_list;  	u32 mark = policy->mark.v & policy->mark.m;  	write_lock_bh(&xfrm_policy_lock); @@ -605,34 +589,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)  	else if (xfrm_bydst_should_resize(net, dir, NULL))  		schedule_work(&net->xfrm.policy_hash_work); -	read_lock_bh(&xfrm_policy_lock); -	gc_list = NULL; -	entry = &policy->bydst; -	hlist_for_each_entry_continue(policy, entry, bydst) { -		struct dst_entry *dst; - -		write_lock(&policy->lock); -		dst = policy->bundles; -		if (dst) { -			struct dst_entry *tail = dst; -			while (tail->next) -				tail = tail->next; -			tail->next = gc_list; -			gc_list = dst; - -			policy->bundles = NULL; -		} -		write_unlock(&policy->lock); -	} -	read_unlock_bh(&xfrm_policy_lock); - -	while (gc_list) { -		struct dst_entry *dst = gc_list; - -		gc_list = dst->next; -		dst_free(dst); -	} -  	return 0;  }  EXPORT_SYMBOL(xfrm_policy_insert); @@ -671,10 +627,8 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,  	}  	write_unlock_bh(&xfrm_policy_lock); -	if (ret && delete) { -		atomic_inc(&flow_cache_genid); +	if (ret && delete)  		xfrm_policy_kill(ret); -	}  	return ret;  }  EXPORT_SYMBOL(xfrm_policy_bysel_ctx); @@ -713,10 +667,8 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,  	}  	write_unlock_bh(&xfrm_policy_lock); -	if (ret && delete) { -		atomic_inc(&flow_cache_genid); +	if (ret && delete)  		xfrm_policy_kill(ret); -	}  	return ret;  }  EXPORT_SYMBOL(xfrm_policy_byid); @@ -776,7 +728,6 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi  int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)  {  	int dir, err = 0, cnt = 0; -	struct xfrm_policy *dp;  	write_lock_bh(&xfrm_policy_lock); @@ -794,10 +745,9 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)  				     &net->xfrm.policy_inexact[dir], bydst) {  			if (pol->type != type)  				continue; -			dp = __xfrm_policy_unlink(pol, dir); +			__xfrm_policy_unlink(pol, dir);  			write_unlock_bh(&xfrm_policy_lock); -			if (dp) -				cnt++; +			cnt++;  			xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,  						 audit_info->sessionid, @@ -816,10 +766,9 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)  					     bydst) {  				if (pol->type != type)  					continue; -				dp = __xfrm_policy_unlink(pol, dir); +				__xfrm_policy_unlink(pol, dir);  				write_unlock_bh(&xfrm_policy_lock); -				if (dp) -					cnt++; +				cnt++;  				xfrm_audit_policy_delete(pol, 1,  							 audit_info->loginuid, @@ -835,7 +784,6 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)  	}  	if (!cnt)  		err = -ESRCH; -	atomic_inc(&flow_cache_genid);  out:  	write_unlock_bh(&xfrm_policy_lock);  	return err; @@ -989,32 +937,37 @@ fail:  	return ret;  } -static int xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, -			      u8 dir, void **objp, atomic_t **obj_refp) +static struct xfrm_policy * +__xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir)  { +#ifdef CONFIG_XFRM_SUB_POLICY  	struct xfrm_policy *pol; -	int err = 0; -#ifdef CONFIG_XFRM_SUB_POLICY  	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); -	if (IS_ERR(pol)) { -		err = PTR_ERR(pol); -		pol = NULL; -	} -	if (pol || err) -		goto end; -#endif -	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); -	if (IS_ERR(pol)) { -		err = PTR_ERR(pol); -		pol = NULL; -	} -#ifdef CONFIG_XFRM_SUB_POLICY -end: +	if (pol != NULL) +		return pol;  #endif -	if ((*objp = (void *) pol) != NULL) -		*obj_refp = &pol->refcnt; -	return err; +	return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); +} + +static struct flow_cache_object * +xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, +		   u8 dir, struct flow_cache_object *old_obj, void *ctx) +{ +	struct xfrm_policy *pol; + +	if (old_obj) +		xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); + +	pol = __xfrm_policy_lookup(net, fl, family, dir); +	if (IS_ERR_OR_NULL(pol)) +		return ERR_CAST(pol); + +	/* Resolver returns two references: +	 * one for cache and one for caller of flow_cache_lookup() */ +	xfrm_pol_hold(pol); + +	return &pol->flo;  }  static inline int policy_to_flow_dir(int dir) @@ -1104,8 +1057,6 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir)  	pol = __xfrm_policy_unlink(pol, dir);  	write_unlock_bh(&xfrm_policy_lock);  	if (pol) { -		if (dir < XFRM_POLICY_MAX) -			atomic_inc(&flow_cache_genid);  		xfrm_policy_kill(pol);  		return 0;  	} @@ -1132,6 +1083,9 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)  		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);  	}  	if (old_pol) +		/* Unlinking succeeds always. This is the only function +		 * allowed to delete or replace socket policy. +		 */  		__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);  	write_unlock_bh(&xfrm_policy_lock); @@ -1300,18 +1254,6 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,   * still valid.   */ -static struct dst_entry * -xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family) -{ -	struct dst_entry *x; -	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); -	if (unlikely(afinfo == NULL)) -		return ERR_PTR(-EINVAL); -	x = afinfo->find_bundle(fl, policy); -	xfrm_policy_put_afinfo(afinfo); -	return x; -} -  static inline int xfrm_get_tos(struct flowi *fl, int family)  {  	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); @@ -1327,6 +1269,54 @@ static inline int xfrm_get_tos(struct flowi *fl, int family)  	return tos;  } +static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo) +{ +	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); +	struct dst_entry *dst = &xdst->u.dst; + +	if (xdst->route == NULL) { +		/* Dummy bundle - if it has xfrms we were not +		 * able to build bundle as template resolution failed. +		 * It means we need to try again resolving. */ +		if (xdst->num_xfrms > 0) +			return NULL; +	} else { +		/* Real bundle */ +		if (stale_bundle(dst)) +			return NULL; +	} + +	dst_hold(dst); +	return flo; +} + +static int xfrm_bundle_flo_check(struct flow_cache_object *flo) +{ +	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); +	struct dst_entry *dst = &xdst->u.dst; + +	if (!xdst->route) +		return 0; +	if (stale_bundle(dst)) +		return 0; + +	return 1; +} + +static void xfrm_bundle_flo_delete(struct flow_cache_object *flo) +{ +	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); +	struct dst_entry *dst = &xdst->u.dst; + +	dst_free(dst); +} + +static const struct flow_cache_ops xfrm_bundle_fc_ops = { +	.get = xfrm_bundle_flo_get, +	.check = xfrm_bundle_flo_check, +	.delete = xfrm_bundle_flo_delete, +}; +  static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)  {  	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); @@ -1349,9 +1339,10 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)  		BUG();  	}  	xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS); -  	xfrm_policy_put_afinfo(afinfo); +	xdst->flo.ops = &xfrm_bundle_fc_ops; +  	return xdst;  } @@ -1389,6 +1380,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,  	return err;  } +  /* Allocate chain of dst_entry's, attach known xfrm's, calculate   * all the metrics... Shortly, bundle a bundle.   */ @@ -1452,7 +1444,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,  			dst_hold(dst);  		dst1->xfrm = xfrm[i]; -		xdst->genid = xfrm[i]->genid; +		xdst->xfrm_genid = xfrm[i]->genid;  		dst1->obsolete = -1;  		dst1->flags |= DST_HOST; @@ -1545,7 +1537,186 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)  #endif  } -static int stale_bundle(struct dst_entry *dst); +static int xfrm_expand_policies(struct flowi *fl, u16 family, +				struct xfrm_policy **pols, +				int *num_pols, int *num_xfrms) +{ +	int i; + +	if (*num_pols == 0 || !pols[0]) { +		*num_pols = 0; +		*num_xfrms = 0; +		return 0; +	} +	if (IS_ERR(pols[0])) +		return PTR_ERR(pols[0]); + +	*num_xfrms = pols[0]->xfrm_nr; + +#ifdef CONFIG_XFRM_SUB_POLICY +	if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW && +	    pols[0]->type != XFRM_POLICY_TYPE_MAIN) { +		pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]), +						    XFRM_POLICY_TYPE_MAIN, +						    fl, family, +						    XFRM_POLICY_OUT); +		if (pols[1]) { +			if (IS_ERR(pols[1])) { +				xfrm_pols_put(pols, *num_pols); +				return PTR_ERR(pols[1]); +			} +			(*num_pols) ++; +			(*num_xfrms) += pols[1]->xfrm_nr; +		} +	} +#endif +	for (i = 0; i < *num_pols; i++) { +		if (pols[i]->action != XFRM_POLICY_ALLOW) { +			*num_xfrms = -1; +			break; +		} +	} + +	return 0; + +} + +static struct xfrm_dst * +xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, +			       struct flowi *fl, u16 family, +			       struct dst_entry *dst_orig) +{ +	struct net *net = xp_net(pols[0]); +	struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; +	struct dst_entry *dst; +	struct xfrm_dst *xdst; +	int err; + +	/* Try to instantiate a bundle */ +	err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); +	if (err < 0) { +		if (err != -EAGAIN) +			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); +		return ERR_PTR(err); +	} + +	dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); +	if (IS_ERR(dst)) { +		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); +		return ERR_CAST(dst); +	} + +	xdst = (struct xfrm_dst *)dst; +	xdst->num_xfrms = err; +	if (num_pols > 1) +		err = xfrm_dst_update_parent(dst, &pols[1]->selector); +	else +		err = xfrm_dst_update_origin(dst, fl); +	if (unlikely(err)) { +		dst_free(dst); +		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); +		return ERR_PTR(err); +	} + +	xdst->num_pols = num_pols; +	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols); +	xdst->policy_genid = atomic_read(&pols[0]->genid); + +	return xdst; +} + +static struct flow_cache_object * +xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir, +		   struct flow_cache_object *oldflo, void *ctx) +{ +	struct dst_entry *dst_orig = (struct dst_entry *)ctx; +	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; +	struct xfrm_dst *xdst, *new_xdst; +	int num_pols = 0, num_xfrms = 0, i, err, pol_dead; + +	/* Check if the policies from old bundle are usable */ +	xdst = NULL; +	if (oldflo) { +		xdst = container_of(oldflo, struct xfrm_dst, flo); +		num_pols = xdst->num_pols; +		num_xfrms = xdst->num_xfrms; +		pol_dead = 0; +		for (i = 0; i < num_pols; i++) { +			pols[i] = xdst->pols[i]; +			pol_dead |= pols[i]->walk.dead; +		} +		if (pol_dead) { +			dst_free(&xdst->u.dst); +			xdst = NULL; +			num_pols = 0; +			num_xfrms = 0; +			oldflo = NULL; +		} +	} + +	/* Resolve policies to use if we couldn't get them from +	 * previous cache entry */ +	if (xdst == NULL) { +		num_pols = 1; +		pols[0] = __xfrm_policy_lookup(net, fl, family, dir); +		err = xfrm_expand_policies(fl, family, pols, +					   &num_pols, &num_xfrms); +		if (err < 0) +			goto inc_error; +		if (num_pols == 0) +			return NULL; +		if (num_xfrms <= 0) +			goto make_dummy_bundle; +	} + +	new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig); +	if (IS_ERR(new_xdst)) { +		err = PTR_ERR(new_xdst); +		if (err != -EAGAIN) +			goto error; +		if (oldflo == NULL) +			goto make_dummy_bundle; +		dst_hold(&xdst->u.dst); +		return oldflo; +	} + +	/* Kill the previous bundle */ +	if (xdst) { +		/* The policies were stolen for newly generated bundle */ +		xdst->num_pols = 0; +		dst_free(&xdst->u.dst); +	} + +	/* Flow cache does not have reference, it dst_free()'s, +	 * but we do need to return one reference for original caller */ +	dst_hold(&new_xdst->u.dst); +	return &new_xdst->flo; + +make_dummy_bundle: +	/* We found policies, but there's no bundles to instantiate: +	 * either because the policy blocks, has no transformations or +	 * we could not build template (no xfrm_states).*/ +	xdst = xfrm_alloc_dst(net, family); +	if (IS_ERR(xdst)) { +		xfrm_pols_put(pols, num_pols); +		return ERR_CAST(xdst); +	} +	xdst->num_pols = num_pols; +	xdst->num_xfrms = num_xfrms; +	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols); + +	dst_hold(&xdst->u.dst); +	return &xdst->flo; + +inc_error: +	XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); +error: +	if (xdst != NULL) +		dst_free(&xdst->u.dst); +	else +		xfrm_pols_put(pols, num_pols); +	return ERR_PTR(err); +}  /* Main function: finds/creates a bundle for given flow.   * @@ -1555,245 +1726,152 @@ static int stale_bundle(struct dst_entry *dst);  int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,  		  struct sock *sk, int flags)  { -	struct xfrm_policy *policy;  	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; -	int npols; -	int pol_dead; -	int xfrm_nr; -	int pi; -	struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; -	struct dst_entry *dst, *dst_orig = *dst_p; -	int nx = 0; -	int err; -	u32 genid; -	u16 family; +	struct flow_cache_object *flo; +	struct xfrm_dst *xdst; +	struct dst_entry *dst, *dst_orig = *dst_p, *route; +	u16 family = dst_orig->ops->family;  	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); +	int i, err, num_pols, num_xfrms, drop_pols = 0;  restart: -	genid = atomic_read(&flow_cache_genid); -	policy = NULL; -	for (pi = 0; pi < ARRAY_SIZE(pols); pi++) -		pols[pi] = NULL; -	npols = 0; -	pol_dead = 0; -	xfrm_nr = 0; +	dst = NULL; +	xdst = NULL; +	route = NULL;  	if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { -		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); -		err = PTR_ERR(policy); -		if (IS_ERR(policy)) { -			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); +		num_pols = 1; +		pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); +		err = xfrm_expand_policies(fl, family, pols, +					   &num_pols, &num_xfrms); +		if (err < 0)  			goto dropdst; + +		if (num_pols) { +			if (num_xfrms <= 0) { +				drop_pols = num_pols; +				goto no_transform; +			} + +			xdst = xfrm_resolve_and_create_bundle( +					pols, num_pols, fl, +					family, dst_orig); +			if (IS_ERR(xdst)) { +				xfrm_pols_put(pols, num_pols); +				err = PTR_ERR(xdst); +				goto dropdst; +			} + +			spin_lock_bh(&xfrm_policy_sk_bundle_lock); +			xdst->u.dst.next = xfrm_policy_sk_bundles; +			xfrm_policy_sk_bundles = &xdst->u.dst; +			spin_unlock_bh(&xfrm_policy_sk_bundle_lock); + +			route = xdst->route;  		}  	} -	if (!policy) { +	if (xdst == NULL) {  		/* To accelerate a bit...  */  		if ((dst_orig->flags & DST_NOXFRM) ||  		    !net->xfrm.policy_count[XFRM_POLICY_OUT])  			goto nopol; -		policy = flow_cache_lookup(net, fl, dst_orig->ops->family, -					   dir, xfrm_policy_lookup); -		err = PTR_ERR(policy); -		if (IS_ERR(policy)) { -			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); +		flo = flow_cache_lookup(net, fl, family, dir, +					xfrm_bundle_lookup, dst_orig); +		if (flo == NULL) +			goto nopol; +		if (IS_ERR(flo)) { +			err = PTR_ERR(flo);  			goto dropdst;  		} -	} - -	if (!policy) -		goto nopol; - -	family = dst_orig->ops->family; -	pols[0] = policy; -	npols ++; -	xfrm_nr += pols[0]->xfrm_nr; - -	err = -ENOENT; -	if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP)) -		goto error; - -	policy->curlft.use_time = get_seconds(); - -	switch (policy->action) { -	default: -	case XFRM_POLICY_BLOCK: -		/* Prohibit the flow */ -		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); -		err = -EPERM; -		goto error; - -	case XFRM_POLICY_ALLOW: -#ifndef CONFIG_XFRM_SUB_POLICY -		if (policy->xfrm_nr == 0) { -			/* Flow passes not transformed. */ -			xfrm_pol_put(policy); -			return 0; -		} -#endif - -		/* Try to find matching bundle. -		 * -		 * LATER: help from flow cache. It is optional, this -		 * is required only for output policy. -		 */ -		dst = xfrm_find_bundle(fl, policy, family); -		if (IS_ERR(dst)) { -			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); -			err = PTR_ERR(dst); -			goto error; -		} - -		if (dst) -			break; +		xdst = container_of(flo, struct xfrm_dst, flo); -#ifdef CONFIG_XFRM_SUB_POLICY -		if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { -			pols[1] = xfrm_policy_lookup_bytype(net, -							    XFRM_POLICY_TYPE_MAIN, -							    fl, family, -							    XFRM_POLICY_OUT); -			if (pols[1]) { -				if (IS_ERR(pols[1])) { -					XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); -					err = PTR_ERR(pols[1]); -					goto error; -				} -				if (pols[1]->action == XFRM_POLICY_BLOCK) { -					XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); -					err = -EPERM; -					goto error; -				} -				npols ++; -				xfrm_nr += pols[1]->xfrm_nr; -			} -		} +		num_pols = xdst->num_pols; +		num_xfrms = xdst->num_xfrms; +		memcpy(pols, xdst->pols, sizeof(struct xfrm_policy*) * num_pols); +		route = xdst->route; +	} -		/* -		 * Because neither flowi nor bundle information knows about -		 * transformation template size. On more than one policy usage -		 * we can realize whether all of them is bypass or not after -		 * they are searched. See above not-transformed bypass -		 * is surrounded by non-sub policy configuration, too. -		 */ -		if (xfrm_nr == 0) { -			/* Flow passes not transformed. */ -			xfrm_pols_put(pols, npols); -			return 0; +	dst = &xdst->u.dst; +	if (route == NULL && num_xfrms > 0) { +		/* The only case when xfrm_bundle_lookup() returns a +		 * bundle with null route, is when the template could +		 * not be resolved. It means policies are there, but +		 * bundle could not be created, since we don't yet +		 * have the xfrm_state's. We need to wait for KM to +		 * negotiate new SA's or bail out with error.*/ +		if (net->xfrm.sysctl_larval_drop) { +			/* EREMOTE tells the caller to generate +			 * a one-shot blackhole route. */ +			dst_release(dst); +			xfrm_pols_put(pols, num_pols); +			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); +			return -EREMOTE;  		} +		if (flags & XFRM_LOOKUP_WAIT) { +			DECLARE_WAITQUEUE(wait, current); -#endif -		nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); - -		if (unlikely(nx<0)) { -			err = nx; -			if (err == -EAGAIN && net->xfrm.sysctl_larval_drop) { -				/* EREMOTE tells the caller to generate -				 * a one-shot blackhole route. -				 */ -				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); -				xfrm_pol_put(policy); -				return -EREMOTE; -			} -			if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) { -				DECLARE_WAITQUEUE(wait, current); - -				add_wait_queue(&net->xfrm.km_waitq, &wait); -				set_current_state(TASK_INTERRUPTIBLE); -				schedule(); -				set_current_state(TASK_RUNNING); -				remove_wait_queue(&net->xfrm.km_waitq, &wait); - -				nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); +			add_wait_queue(&net->xfrm.km_waitq, &wait); +			set_current_state(TASK_INTERRUPTIBLE); +			schedule(); +			set_current_state(TASK_RUNNING); +			remove_wait_queue(&net->xfrm.km_waitq, &wait); -				if (nx == -EAGAIN && signal_pending(current)) { -					XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); -					err = -ERESTART; -					goto error; -				} -				if (nx == -EAGAIN || -				    genid != atomic_read(&flow_cache_genid)) { -					xfrm_pols_put(pols, npols); -					goto restart; -				} -				err = nx; -			} -			if (err < 0) { -				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); -				goto error; +			if (!signal_pending(current)) { +				dst_release(dst); +				goto restart;  			} -		} -		if (nx == 0) { -			/* Flow passes not transformed. */ -			xfrm_pols_put(pols, npols); -			return 0; -		} -		dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig); -		err = PTR_ERR(dst); -		if (IS_ERR(dst)) { -			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); -			goto error; -		} +			err = -ERESTART; +		} else +			err = -EAGAIN; -		for (pi = 0; pi < npols; pi++) { -			read_lock_bh(&pols[pi]->lock); -			pol_dead |= pols[pi]->walk.dead; -			read_unlock_bh(&pols[pi]->lock); -		} +		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); +		goto error; +	} -		write_lock_bh(&policy->lock); -		if (unlikely(pol_dead || stale_bundle(dst))) { -			/* Wow! While we worked on resolving, this -			 * policy has gone. Retry. It is not paranoia, -			 * we just cannot enlist new bundle to dead object. -			 * We can't enlist stable bundles either. -			 */ -			write_unlock_bh(&policy->lock); -			dst_free(dst); +no_transform: +	if (num_pols == 0) +		goto nopol; -			if (pol_dead) -				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLDEAD); -			else -				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); -			err = -EHOSTUNREACH; -			goto error; -		} +	if ((flags & XFRM_LOOKUP_ICMP) && +	    !(pols[0]->flags & XFRM_POLICY_ICMP)) { +		err = -ENOENT; +		goto error; +	} -		if (npols > 1) -			err = xfrm_dst_update_parent(dst, &pols[1]->selector); -		else -			err = xfrm_dst_update_origin(dst, fl); -		if (unlikely(err)) { -			write_unlock_bh(&policy->lock); -			dst_free(dst); -			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); -			goto error; -		} +	for (i = 0; i < num_pols; i++) +		pols[i]->curlft.use_time = get_seconds(); -		dst->next = policy->bundles; -		policy->bundles = dst; -		dst_hold(dst); -		write_unlock_bh(&policy->lock); +	if (num_xfrms < 0) { +		/* Prohibit the flow */ +		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); +		err = -EPERM; +		goto error; +	} else if (num_xfrms > 0) { +		/* Flow transformed */ +		*dst_p = dst; +		dst_release(dst_orig); +	} else { +		/* Flow passes untransformed */ +		dst_release(dst);  	} -	*dst_p = dst; -	dst_release(dst_orig); -	xfrm_pols_put(pols, npols); +ok: +	xfrm_pols_put(pols, drop_pols);  	return 0; +nopol: +	if (!(flags & XFRM_LOOKUP_ICMP)) +		goto ok; +	err = -ENOENT;  error: -	xfrm_pols_put(pols, npols); +	dst_release(dst);  dropdst:  	dst_release(dst_orig);  	*dst_p = NULL; +	xfrm_pols_put(pols, drop_pols);  	return err; - -nopol: -	err = -ENOENT; -	if (flags & XFRM_LOOKUP_ICMP) -		goto dropdst; -	return 0;  }  EXPORT_SYMBOL(__xfrm_lookup); @@ -1952,9 +2030,16 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,  		}  	} -	if (!pol) -		pol = flow_cache_lookup(net, &fl, family, fl_dir, -					xfrm_policy_lookup); +	if (!pol) { +		struct flow_cache_object *flo; + +		flo = flow_cache_lookup(net, &fl, family, fl_dir, +					xfrm_policy_lookup, NULL); +		if (IS_ERR_OR_NULL(flo)) +			pol = ERR_CAST(flo); +		else +			pol = container_of(flo, struct xfrm_policy, flo); +	}  	if (IS_ERR(pol)) {  		XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); @@ -2138,71 +2223,24 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)  	return dst;  } -static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p) -{ -	struct dst_entry *dst, **dstp; - -	write_lock(&pol->lock); -	dstp = &pol->bundles; -	while ((dst=*dstp) != NULL) { -		if (func(dst)) { -			*dstp = dst->next; -			dst->next = *gc_list_p; -			*gc_list_p = dst; -		} else { -			dstp = &dst->next; -		} -	} -	write_unlock(&pol->lock); -} - -static void xfrm_prune_bundles(struct net *net, int (*func)(struct dst_entry *)) +static void __xfrm_garbage_collect(struct net *net)  { -	struct dst_entry *gc_list = NULL; -	int dir; +	struct dst_entry *head, *next; -	read_lock_bh(&xfrm_policy_lock); -	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { -		struct xfrm_policy *pol; -		struct hlist_node *entry; -		struct hlist_head *table; -		int i; - -		hlist_for_each_entry(pol, entry, -				     &net->xfrm.policy_inexact[dir], bydst) -			prune_one_bundle(pol, func, &gc_list); +	flow_cache_flush(); -		table = net->xfrm.policy_bydst[dir].table; -		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { -			hlist_for_each_entry(pol, entry, table + i, bydst) -				prune_one_bundle(pol, func, &gc_list); -		} -	} -	read_unlock_bh(&xfrm_policy_lock); +	spin_lock_bh(&xfrm_policy_sk_bundle_lock); +	head = xfrm_policy_sk_bundles; +	xfrm_policy_sk_bundles = NULL; +	spin_unlock_bh(&xfrm_policy_sk_bundle_lock); -	while (gc_list) { -		struct dst_entry *dst = gc_list; -		gc_list = dst->next; -		dst_free(dst); +	while (head) { +		next = head->next; +		dst_free(head); +		head = next;  	}  } -static int unused_bundle(struct dst_entry *dst) -{ -	return !atomic_read(&dst->__refcnt); -} - -static void __xfrm_garbage_collect(struct net *net) -{ -	xfrm_prune_bundles(net, unused_bundle); -} - -static int xfrm_flush_bundles(struct net *net) -{ -	xfrm_prune_bundles(net, stale_bundle); -	return 0; -} -  static void xfrm_init_pmtu(struct dst_entry *dst)  {  	do { @@ -2260,7 +2298,9 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,  			return 0;  		if (dst->xfrm->km.state != XFRM_STATE_VALID)  			return 0; -		if (xdst->genid != dst->xfrm->genid) +		if (xdst->xfrm_genid != dst->xfrm->genid) +			return 0; +		if (xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))  			return 0;  		if (strict && fl && @@ -2425,7 +2465,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void  	switch (event) {  	case NETDEV_DOWN: -		xfrm_flush_bundles(dev_net(dev)); +		__xfrm_garbage_collect(dev_net(dev));  	}  	return NOTIFY_DONE;  } @@ -2531,7 +2571,6 @@ static void xfrm_policy_fini(struct net *net)  	audit_info.sessionid = -1;  	audit_info.secid = 0;  	xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); -	flush_work(&xfrm_policy_gc_work);  	WARN_ON(!list_empty(&net->xfrm.policy_all)); @@ -2757,7 +2796,6 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,  			       struct xfrm_migrate *m, int num_migrate)  {  	struct xfrm_migrate *mp; -	struct dst_entry *dst;  	int i, j, n = 0;  	write_lock_bh(&pol->lock); @@ -2782,10 +2820,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,  			       sizeof(pol->xfrm_vec[i].saddr));  			pol->xfrm_vec[i].encap_family = mp->new_family;  			/* flush bundles */ -			while ((dst = pol->bundles) != NULL) { -				pol->bundles = dst->next; -				dst_free(dst); -			} +			atomic_inc(&pol->genid);  		}  	}  |