diff options
Diffstat (limited to 'net/ipv6/udp.c')
| -rw-r--r-- | net/ipv6/udp.c | 269 | 
1 files changed, 213 insertions, 56 deletions
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3a60f12b34e..69ebdbe78c4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -53,7 +53,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)  {  	const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;  	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); -	__be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; +	__be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr;  	__be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);  	int sk_ipv6only = ipv6_only_sock(sk);  	int sk2_ipv6only = inet_v6_ipv6only(sk2); @@ -63,8 +63,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)  	/* if both are mapped, treat as IPv4 */  	if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)  		return (!sk2_ipv6only && -			(!sk_rcv_saddr || !sk2_rcv_saddr || -			  sk_rcv_saddr == sk2_rcv_saddr)); +			(!sk1_rcv_saddr || !sk2_rcv_saddr || +			  sk1_rcv_saddr == sk2_rcv_saddr));  	if (addr_type2 == IPV6_ADDR_ANY &&  	    !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) @@ -81,9 +81,33 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)  	return 0;  } +static unsigned int udp6_portaddr_hash(struct net *net, +				       const struct in6_addr *addr6, +				       unsigned int port) +{ +	unsigned int hash, mix = net_hash_mix(net); + +	if (ipv6_addr_any(addr6)) +		hash = jhash_1word(0, mix); +	else if (ipv6_addr_v4mapped(addr6)) +		hash = jhash_1word(addr6->s6_addr32[3], mix); +	else +		hash = jhash2(addr6->s6_addr32, 4, mix); + +	return hash ^ port; +} + +  int udp_v6_get_port(struct sock *sk, unsigned short snum)  { -	return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal); +	unsigned int hash2_nulladdr = +		udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum); +	unsigned int hash2_partial =  +		udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0); + +	/* precompute partial secondary hash */ +	udp_sk(sk)->udp_portaddr_hash = hash2_partial; +	return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr);  }  static inline int compute_score(struct sock *sk, struct net *net, @@ -94,14 +118,14 @@ static inline int compute_score(struct sock *sk, struct net *net,  {  	int score = -1; -	if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && +	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&  			sk->sk_family == PF_INET6) {  		struct ipv6_pinfo *np = inet6_sk(sk);  		struct inet_sock *inet = inet_sk(sk);  		score = 0; -		if (inet->dport) { -			if (inet->dport != sport) +		if (inet->inet_dport) { +			if (inet->inet_dport != sport)  				return -1;  			score++;  		} @@ -124,6 +148,86 @@ static inline int compute_score(struct sock *sk, struct net *net,  	return score;  } +#define SCORE2_MAX (1 + 1 + 1) +static inline int compute_score2(struct sock *sk, struct net *net, +				const struct in6_addr *saddr, __be16 sport, +				const struct in6_addr *daddr, unsigned short hnum, +				int dif) +{ +	int score = -1; + +	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && +			sk->sk_family == PF_INET6) { +		struct ipv6_pinfo *np = inet6_sk(sk); +		struct inet_sock *inet = inet_sk(sk); + +		if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) +			return -1; +		score = 0; +		if (inet->inet_dport) { +			if (inet->inet_dport != sport) +				return -1; +			score++; +		} +		if (!ipv6_addr_any(&np->daddr)) { +			if (!ipv6_addr_equal(&np->daddr, saddr)) +				return -1; +			score++; +		} +		if (sk->sk_bound_dev_if) { +			if (sk->sk_bound_dev_if != dif) +				return -1; +			score++; +		} +	} +	return score; +} + + +/* called with read_rcu_lock() */ +static struct sock *udp6_lib_lookup2(struct net *net, +		const struct in6_addr *saddr, __be16 sport, +		const struct in6_addr *daddr, unsigned int hnum, int dif, +		struct udp_hslot *hslot2, unsigned int slot2) +{ +	struct sock *sk, *result; +	struct hlist_nulls_node *node; +	int score, badness; + +begin: +	result = NULL; +	badness = -1; +	udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { +		score = compute_score2(sk, net, saddr, sport, +				      daddr, hnum, dif); +		if (score > badness) { +			result = sk; +			badness = score; +			if (score == SCORE2_MAX) +				goto exact_match; +		} +	} +	/* +	 * if the nulls value we got at the end of this lookup is +	 * not the expected one, we must restart lookup. +	 * We probably met an item that was moved to another chain. +	 */ +	if (get_nulls_value(node) != slot2) +		goto begin; + +	if (result) { +exact_match: +		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) +			result = NULL; +		else if (unlikely(compute_score2(result, net, saddr, sport, +				  daddr, hnum, dif) < badness)) { +			sock_put(result); +			goto begin; +		} +	} +	return result; +} +  static struct sock *__udp6_lib_lookup(struct net *net,  				      struct in6_addr *saddr, __be16 sport,  				      struct in6_addr *daddr, __be16 dport, @@ -132,11 +236,35 @@ static struct sock *__udp6_lib_lookup(struct net *net,  	struct sock *sk, *result;  	struct hlist_nulls_node *node;  	unsigned short hnum = ntohs(dport); -	unsigned int hash = udp_hashfn(net, hnum); -	struct udp_hslot *hslot = &udptable->hash[hash]; +	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); +	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];  	int score, badness;  	rcu_read_lock(); +	if (hslot->count > 10) { +		hash2 = udp6_portaddr_hash(net, daddr, hnum); +		slot2 = hash2 & udptable->mask; +		hslot2 = &udptable->hash2[slot2]; +		if (hslot->count < hslot2->count) +			goto begin; + +		result = udp6_lib_lookup2(net, saddr, sport, +					  daddr, hnum, dif, +					  hslot2, slot2); +		if (!result) { +			hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum); +			slot2 = hash2 & udptable->mask; +			hslot2 = &udptable->hash2[slot2]; +			if (hslot->count < hslot2->count) +				goto begin; + +			result = udp6_lib_lookup2(net, &in6addr_any, sport, +						  daddr, hnum, dif, +						  hslot2, slot2); +		} +		rcu_read_unlock(); +		return result; +	}  begin:  	result = NULL;  	badness = -1; @@ -152,7 +280,7 @@ begin:  	 * not the expected one, we must restart lookup.  	 * We probably met an item that was moved to another chain.  	 */ -	if (get_nulls_value(node) != hash) +	if (get_nulls_value(node) != slot)  		goto begin;  	if (result) { @@ -252,7 +380,7 @@ try_again:  					UDP_MIB_INDATAGRAMS, is_udplite);  	} -	sock_recv_timestamp(msg, sk, skb); +	sock_recv_ts_and_drops(msg, sk, skb);  	/* Copy the address. */  	if (msg->msg_name) { @@ -265,8 +393,8 @@ try_again:  		sin6->sin6_scope_id = 0;  		if (is_udp4) -			ipv6_addr_set(&sin6->sin6_addr, 0, 0, -				      htonl(0xffff), ip_hdr(skb)->saddr); +			ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, +					       &sin6->sin6_addr);  		else {  			ipv6_addr_copy(&sin6->sin6_addr,  				       &ipv6_hdr(skb)->saddr); @@ -288,9 +416,7 @@ try_again:  		err = ulen;  out_free: -	lock_sock(sk); -	skb_free_datagram(sk, skb); -	release_sock(sk); +	skb_free_datagram_locked(sk, skb);  out:  	return err; @@ -385,18 +511,18 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)  			goto drop;  	} -	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { +	if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) {  		/* Note that an ENOMEM error is charged twice */ -		if (rc == -ENOMEM) { +		if (rc == -ENOMEM)  			UDP6_INC_STATS_BH(sock_net(sk),  					UDP_MIB_RCVBUFERRORS, is_udplite); -			atomic_inc(&sk->sk_drops); -		} -		goto drop; +		goto drop_no_sk_drops_inc;  	}  	return 0;  drop: +	atomic_inc(&sk->sk_drops); +drop_no_sk_drops_inc:  	UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);  	kfree_skb(skb);  	return -1; @@ -417,10 +543,11 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,  		if (!net_eq(sock_net(s), net))  			continue; -		if (s->sk_hash == num && s->sk_family == PF_INET6) { +		if (udp_sk(s)->udp_port_hash == num && +		    s->sk_family == PF_INET6) {  			struct ipv6_pinfo *np = inet6_sk(s); -			if (inet->dport) { -				if (inet->dport != rmt_port) +			if (inet->inet_dport) { +				if (inet->inet_dport != rmt_port)  					continue;  			}  			if (!ipv6_addr_any(&np->daddr) && @@ -442,6 +569,33 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,  	return NULL;  } +static void flush_stack(struct sock **stack, unsigned int count, +			struct sk_buff *skb, unsigned int final) +{ +	unsigned int i; +	struct sock *sk; +	struct sk_buff *skb1; + +	for (i = 0; i < count; i++) { +		skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); + +		sk = stack[i]; +		if (skb1) { +			bh_lock_sock(sk); +			if (!sock_owned_by_user(sk)) +				udpv6_queue_rcv_skb(sk, skb1); +			else +				sk_add_backlog(sk, skb1); +			bh_unlock_sock(sk); +		} else { +			atomic_inc(&sk->sk_drops); +			UDP6_INC_STATS_BH(sock_net(sk), +					UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk)); +			UDP6_INC_STATS_BH(sock_net(sk), +					UDP_MIB_INERRORS, IS_UDPLITE(sk)); +		} +	} +}  /*   * Note: called only from the BH handler context,   * so we don't need to lock the hashes. @@ -450,41 +604,43 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,  		struct in6_addr *saddr, struct in6_addr *daddr,  		struct udp_table *udptable)  { -	struct sock *sk, *sk2; +	struct sock *sk, *stack[256 / sizeof(struct sock *)];  	const struct udphdr *uh = udp_hdr(skb); -	struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; +	struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));  	int dif; +	unsigned int i, count = 0;  	spin_lock(&hslot->lock);  	sk = sk_nulls_head(&hslot->head);  	dif = inet6_iif(skb);  	sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); -	if (!sk) { -		kfree_skb(skb); -		goto out; -	} - -	sk2 = sk; -	while ((sk2 = udp_v6_mcast_next(net, sk_nulls_next(sk2), uh->dest, daddr, -					uh->source, saddr, dif))) { -		struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); -		if (buff) { -			bh_lock_sock(sk2); -			if (!sock_owned_by_user(sk2)) -				udpv6_queue_rcv_skb(sk2, buff); -			else -				sk_add_backlog(sk2, buff); -			bh_unlock_sock(sk2); +	while (sk) { +		stack[count++] = sk; +		sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr, +				       uh->source, saddr, dif); +		if (unlikely(count == ARRAY_SIZE(stack))) { +			if (!sk) +				break; +			flush_stack(stack, count, skb, ~0); +			count = 0;  		}  	} -	bh_lock_sock(sk); -	if (!sock_owned_by_user(sk)) -		udpv6_queue_rcv_skb(sk, skb); -	else -		sk_add_backlog(sk, skb); -	bh_unlock_sock(sk); -out: +	/* +	 * before releasing the lock, we must take reference on sockets +	 */ +	for (i = 0; i < count; i++) +		sock_hold(stack[i]); +  	spin_unlock(&hslot->lock); + +	if (count) { +		flush_stack(stack, count, skb, count - 1); + +		for (i = 0; i < count; i++) +			sock_put(stack[i]); +	} else { +		kfree_skb(skb); +	}  	return 0;  } @@ -794,7 +950,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,  		if (ipv6_addr_v4mapped(daddr)) {  			struct sockaddr_in sin;  			sin.sin_family = AF_INET; -			sin.sin_port = sin6 ? sin6->sin6_port : inet->dport; +			sin.sin_port = sin6 ? sin6->sin6_port : inet->inet_dport;  			sin.sin_addr.s_addr = daddr->s6_addr32[3];  			msg->msg_name = &sin;  			msg->msg_namelen = sizeof(sin); @@ -867,7 +1023,7 @@ do_udp_sendmsg:  		if (sk->sk_state != TCP_ESTABLISHED)  			return -EDESTADDRREQ; -		fl.fl_ip_dport = inet->dport; +		fl.fl_ip_dport = inet->inet_dport;  		daddr = &np->daddr;  		fl.fl6_flowlabel = np->flow_label;  		connected = 1; @@ -879,6 +1035,8 @@ do_udp_sendmsg:  	if (!fl.oif)  		fl.oif = np->sticky_pktinfo.ipi6_ifindex; +	fl.mark = sk->sk_mark; +  	if (msg->msg_controllen) {  		opt = &opt_space;  		memset(opt, 0, sizeof(struct ipv6_txoptions)); @@ -911,7 +1069,7 @@ do_udp_sendmsg:  		fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */  	if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))  		ipv6_addr_copy(&fl.fl6_src, &np->saddr); -	fl.fl_ip_sport = inet->sport; +	fl.fl_ip_sport = inet->inet_sport;  	/* merge ip6_build_xmit from ip6_output */  	if (opt && opt->srcrt) { @@ -1192,10 +1350,10 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket  	dest  = &np->daddr;  	src   = &np->rcv_saddr; -	destp = ntohs(inet->dport); -	srcp  = ntohs(inet->sport); +	destp = ntohs(inet->inet_dport); +	srcp  = ntohs(inet->inet_sport);  	seq_printf(seq, -		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " +		   "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "  		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",  		   bucket,  		   src->s6_addr32[0], src->s6_addr32[1], @@ -1284,7 +1442,6 @@ static struct inet_protosw udpv6_protosw = {  	.protocol =  IPPROTO_UDP,  	.prot =      &udpv6_prot,  	.ops =       &inet6_dgram_ops, -	.capability =-1,  	.no_check =  UDP_CSUM_DEFAULT,  	.flags =     INET_PROTOSW_PERMANENT,  };  |