diff options
Diffstat (limited to 'drivers/net/tun.c')
| -rw-r--r-- | drivers/net/tun.c | 153 | 
1 files changed, 140 insertions, 13 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 987aeefbc77..c62163e272c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -22,7 +22,7 @@   *    Add TUNSETLINK ioctl to set the link encapsulation   *   *  Mark Smith <markzzzsmith@yahoo.com.au> - *    Use random_ether_addr() for tap MAC address. + *    Use eth_random_addr() for tap MAC address.   *   *  Harald Roelle <harald.roelle@ifi.lmu.de>  2004/04/20   *    Fixes in packet dropping, queue length setting and queue wakeup. @@ -100,6 +100,8 @@ do {								\  } while (0)  #endif +#define GOODCOPY_LEN 128 +  #define FLT_EXACT_COUNT 8  struct tap_filter {  	unsigned int    count;    /* Number of addrs. Zero means disabled */ @@ -358,6 +360,8 @@ static void tun_free_netdev(struct net_device *dev)  {  	struct tun_struct *tun = netdev_priv(dev); +	BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED, &tun->socket.flags)); +  	sk_release_kernel(tun->socket.sk);  } @@ -414,6 +418,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)  	/* Orphan the skb - required as we might hang on to it  	 * for indefinite time. */ +	if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) +		goto drop;  	skb_orphan(skb);  	/* Enqueue packet */ @@ -600,19 +606,100 @@ static struct sk_buff *tun_alloc_skb(struct tun_struct *tun,  	return skb;  } +/* set skb frags from iovec, this can move to core network code for reuse */ +static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, +				  int offset, size_t count) +{ +	int len = iov_length(from, count) - offset; +	int copy = skb_headlen(skb); +	int size, offset1 = 0; +	int i = 0; + +	/* Skip over from offset */ +	while (count && (offset >= from->iov_len)) { +		offset -= from->iov_len; +		++from; +		--count; +	} + +	/* copy up to skb headlen */ +	while (count && (copy > 0)) { +		size = min_t(unsigned int, copy, from->iov_len - offset); +		if (copy_from_user(skb->data + offset1, from->iov_base + offset, +				   size)) +			return -EFAULT; +		if (copy > size) { +			++from; +			--count; +			offset = 0; +		} else +			offset += size; +		copy -= size; +		offset1 += size; +	} + +	if (len == offset1) +		return 0; + +	while (count--) { +		struct page *page[MAX_SKB_FRAGS]; +		int num_pages; +		unsigned long base; +		unsigned long truesize; + +		len = from->iov_len - offset; +		if (!len) { +			offset = 0; +			++from; +			continue; +		} +		base = (unsigned long)from->iov_base + offset; +		size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; +		if (i + size > MAX_SKB_FRAGS) +			return -EMSGSIZE; +		num_pages = get_user_pages_fast(base, size, 0, &page[i]); +		if (num_pages != size) { +			for (i = 0; i < num_pages; i++) +				put_page(page[i]); +			return -EFAULT; +		} +		truesize = size * PAGE_SIZE; +		skb->data_len += len; +		skb->len += len; +		skb->truesize += truesize; +		atomic_add(truesize, &skb->sk->sk_wmem_alloc); +		while (len) { +			int off = base & ~PAGE_MASK; +			int size = min_t(int, len, PAGE_SIZE - off); +			__skb_fill_page_desc(skb, i, page[i], off, size); +			skb_shinfo(skb)->nr_frags++; +			/* increase sk_wmem_alloc */ +			base += size; +			len -= size; +			i++; +		} +		offset = 0; +		++from; +	} +	return 0; +} +  /* Get packet from user space buffer */ -static ssize_t tun_get_user(struct tun_struct *tun, -			    const struct iovec *iv, size_t count, -			    int noblock) +static ssize_t tun_get_user(struct tun_struct *tun, void *msg_control, +			    const struct iovec *iv, size_t total_len, +			    size_t count, int noblock)  {  	struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) };  	struct sk_buff *skb; -	size_t len = count, align = NET_SKB_PAD; +	size_t len = total_len, align = NET_SKB_PAD;  	struct virtio_net_hdr gso = { 0 };  	int offset = 0; +	int copylen; +	bool zerocopy = false; +	int err;  	if (!(tun->flags & TUN_NO_PI)) { -		if ((len -= sizeof(pi)) > count) +		if ((len -= sizeof(pi)) > total_len)  			return -EINVAL;  		if (memcpy_fromiovecend((void *)&pi, iv, 0, sizeof(pi))) @@ -621,7 +708,7 @@ static ssize_t tun_get_user(struct tun_struct *tun,  	}  	if (tun->flags & TUN_VNET_HDR) { -		if ((len -= tun->vnet_hdr_sz) > count) +		if ((len -= tun->vnet_hdr_sz) > total_len)  			return -EINVAL;  		if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso))) @@ -643,14 +730,46 @@ static ssize_t tun_get_user(struct tun_struct *tun,  			return -EINVAL;  	} -	skb = tun_alloc_skb(tun, align, len, gso.hdr_len, noblock); +	if (msg_control) +		zerocopy = true; + +	if (zerocopy) { +		/* Userspace may produce vectors with count greater than +		 * MAX_SKB_FRAGS, so we need to linearize parts of the skb +		 * to let the rest of data to be fit in the frags. +		 */ +		if (count > MAX_SKB_FRAGS) { +			copylen = iov_length(iv, count - MAX_SKB_FRAGS); +			if (copylen < offset) +				copylen = 0; +			else +				copylen -= offset; +		} else +				copylen = 0; +		/* There are 256 bytes to be copied in skb, so there is enough +		 * room for skb expand head in case it is used. +		 * The rest of the buffer is mapped from userspace. +		 */ +		if (copylen < gso.hdr_len) +			copylen = gso.hdr_len; +		if (!copylen) +			copylen = GOODCOPY_LEN; +	} else +		copylen = len; + +	skb = tun_alloc_skb(tun, align, copylen, gso.hdr_len, noblock);  	if (IS_ERR(skb)) {  		if (PTR_ERR(skb) != -EAGAIN)  			tun->dev->stats.rx_dropped++;  		return PTR_ERR(skb);  	} -	if (skb_copy_datagram_from_iovec(skb, 0, iv, offset, len)) { +	if (zerocopy) +		err = zerocopy_sg_from_iovec(skb, iv, offset, count); +	else +		err = skb_copy_datagram_from_iovec(skb, 0, iv, offset, len); + +	if (err) {  		tun->dev->stats.rx_dropped++;  		kfree_skb(skb);  		return -EFAULT; @@ -724,12 +843,18 @@ static ssize_t tun_get_user(struct tun_struct *tun,  		skb_shinfo(skb)->gso_segs = 0;  	} +	/* copy skb_ubuf_info for callback when skb has no error */ +	if (zerocopy) { +		skb_shinfo(skb)->destructor_arg = msg_control; +		skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; +	} +  	netif_rx_ni(skb);  	tun->dev->stats.rx_packets++;  	tun->dev->stats.rx_bytes += len; -	return count; +	return total_len;  }  static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, @@ -744,7 +869,7 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,  	tun_debug(KERN_INFO, tun, "tun_chr_write %ld\n", count); -	result = tun_get_user(tun, iv, iov_length(iv, count), +	result = tun_get_user(tun, NULL, iv, iov_length(iv, count), count,  			      file->f_flags & O_NONBLOCK);  	tun_put(tun); @@ -958,8 +1083,8 @@ static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,  		       struct msghdr *m, size_t total_len)  {  	struct tun_struct *tun = container_of(sock, struct tun_struct, socket); -	return tun_get_user(tun, m->msg_iov, total_len, -			    m->msg_flags & MSG_DONTWAIT); +	return tun_get_user(tun, m->msg_control, m->msg_iov, total_len, +			    m->msg_iovlen, m->msg_flags & MSG_DONTWAIT);  }  static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, @@ -1115,6 +1240,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)  		tun->flags = flags;  		tun->txflt.count = 0;  		tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr); +		set_bit(SOCK_EXTERNALLY_ALLOCATED, &tun->socket.flags);  		err = -ENOMEM;  		sk = sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL, &tun_proto); @@ -1128,6 +1254,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)  		sock_init_data(&tun->socket, sk);  		sk->sk_write_space = tun_sock_write_space;  		sk->sk_sndbuf = INT_MAX; +		sock_set_flag(sk, SOCK_ZEROCOPY);  		tun_sk(sk)->tun = tun;  |