diff options
Diffstat (limited to 'drivers/net/macvtap.c')
| -rw-r--r-- | drivers/net/macvtap.c | 119 | 
1 files changed, 63 insertions, 56 deletions
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 9dccb1edfd2..3381c4f91a8 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -11,7 +11,6 @@  #include <linux/sched.h>  #include <linux/types.h>  #include <linux/slab.h> -#include <linux/init.h>  #include <linux/wait.h>  #include <linux/cdev.h>  #include <linux/idr.h> @@ -70,6 +69,11 @@ static const struct proto_ops macvtap_socket_ops;  #define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO)  #define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG) +static struct macvlan_dev *macvtap_get_vlan_rcu(const struct net_device *dev) +{ +	return rcu_dereference(dev->rx_handler_data); +} +  /*   * RCU usage:   * The macvtap_queue and the macvlan_dev are loosely coupled, the @@ -219,7 +223,7 @@ static struct macvtap_queue *macvtap_get_queue(struct net_device *dev,  		goto out;  	/* Check if we can use flow to select a queue */ -	rxq = skb_get_rxhash(skb); +	rxq = skb_get_hash(skb);  	if (rxq) {  		tap = rcu_dereference(vlan->taps[rxq % numvtaps]);  		goto out; @@ -271,24 +275,27 @@ static void macvtap_del_queues(struct net_device *dev)  		sock_put(&qlist[j]->sk);  } -/* - * Forward happens for data that gets sent from one macvlan - * endpoint to another one in bridge mode. We just take - * the skb and put it into the receive queue. - */ -static int macvtap_forward(struct net_device *dev, struct sk_buff *skb) +static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)  { -	struct macvlan_dev *vlan = netdev_priv(dev); -	struct macvtap_queue *q = macvtap_get_queue(dev, skb); +	struct sk_buff *skb = *pskb; +	struct net_device *dev = skb->dev; +	struct macvlan_dev *vlan; +	struct macvtap_queue *q;  	netdev_features_t features = TAP_FEATURES; +	vlan = macvtap_get_vlan_rcu(dev); +	if (!vlan) +		return RX_HANDLER_PASS; + +	q = macvtap_get_queue(dev, skb);  	if (!q) -		goto drop; +		return RX_HANDLER_PASS;  	if (skb_queue_len(&q->sk.sk_receive_queue) >= dev->tx_queue_len)  		goto drop; -	skb->dev = dev; +	skb_push(skb, ETH_HLEN); +  	/* Apply the forward feature mask so that we perform segmentation  	 * according to users wishes.  This only works if VNET_HDR is  	 * enabled. @@ -315,27 +322,27 @@ static int macvtap_forward(struct net_device *dev, struct sk_buff *skb)  			segs = nskb;  		}  	} else { +		/* If we receive a partial checksum and the tap side +		 * doesn't support checksum offload, compute the checksum. +		 * Note: it doesn't matter which checksum feature to +		 *        check, we either support them all or none. +		 */ +		if (skb->ip_summed == CHECKSUM_PARTIAL && +		    !(features & NETIF_F_ALL_CSUM) && +		    skb_checksum_help(skb)) +			goto drop;  		skb_queue_tail(&q->sk.sk_receive_queue, skb);  	}  wake_up:  	wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND); -	return NET_RX_SUCCESS; +	return RX_HANDLER_CONSUMED;  drop: +	/* Count errors/drops only here, thus don't care about args. */ +	macvlan_count_rx(vlan, 0, 0, 0);  	kfree_skb(skb); -	return NET_RX_DROP; -} - -/* - * Receive is for data from the external interface (lowerdev), - * in case of macvtap, we can treat that the same way as - * forward, which macvlan cannot. - */ -static int macvtap_receive(struct sk_buff *skb) -{ -	skb_push(skb, ETH_HLEN); -	return macvtap_forward(skb->dev, skb); +	return RX_HANDLER_CONSUMED;  }  static int macvtap_get_minor(struct macvlan_dev *vlan) @@ -385,6 +392,8 @@ static int macvtap_newlink(struct net *src_net,  			   struct nlattr *data[])  {  	struct macvlan_dev *vlan = netdev_priv(dev); +	int err; +  	INIT_LIST_HEAD(&vlan->queue_list);  	/* Since macvlan supports all offloads by default, make @@ -392,16 +401,20 @@ static int macvtap_newlink(struct net *src_net,  	 */  	vlan->tap_features = TUN_OFFLOADS; +	err = netdev_rx_handler_register(dev, macvtap_handle_frame, vlan); +	if (err) +		return err; +  	/* Don't put anything that may fail after macvlan_common_newlink  	 * because we can't undo what it does.  	 */ -	return macvlan_common_newlink(src_net, dev, tb, data, -				      macvtap_receive, macvtap_forward); +	return macvlan_common_newlink(src_net, dev, tb, data);  }  static void macvtap_dellink(struct net_device *dev,  			    struct list_head *head)  { +	netdev_rx_handler_unregister(dev);  	macvtap_del_queues(dev);  	macvlan_dellink(dev, head);  } @@ -588,7 +601,7 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb,  	return 0;  } -static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb, +static void macvtap_skb_to_vnet_hdr(const struct sk_buff *skb,  				   struct virtio_net_hdr *vnet_hdr)  {  	memset(vnet_hdr, 0, sizeof(*vnet_hdr)); @@ -619,8 +632,6 @@ static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb,  	} else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {  		vnet_hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;  	} /* else everything is zero */ - -	return 0;  }  /* Get packet from user space buffer */ @@ -628,6 +639,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,  				const struct iovec *iv, unsigned long total_len,  				size_t count, int noblock)  { +	int good_linear = SKB_MAX_HEAD(NET_IP_ALIGN);  	struct sk_buff *skb;  	struct macvlan_dev *vlan;  	unsigned long len = total_len; @@ -670,6 +682,8 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,  	if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) {  		copylen = vnet_hdr.hdr_len ? vnet_hdr.hdr_len : GOODCOPY_LEN; +		if (copylen > good_linear) +			copylen = good_linear;  		linear = copylen;  		if (iov_pages(iv, vnet_hdr_len + copylen, count)  		    <= MAX_SKB_FRAGS) @@ -678,7 +692,10 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,  	if (!zerocopy) {  		copylen = len; -		linear = vnet_hdr.hdr_len; +		if (vnet_hdr.hdr_len > good_linear) +			linear = good_linear; +		else +			linear = vnet_hdr.hdr_len;  	}  	skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, copylen, @@ -721,9 +738,8 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,  		skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;  	}  	if (vlan) { -		local_bh_disable(); -		macvlan_start_xmit(skb, vlan->dev); -		local_bh_enable(); +		skb->dev = vlan->dev; +		dev_queue_xmit(skb);  	} else {  		kfree_skb(skb);  	} @@ -738,7 +754,7 @@ err:  	rcu_read_lock();  	vlan = rcu_dereference(q->vlan);  	if (vlan) -		vlan->dev->stats.tx_dropped++; +		this_cpu_inc(vlan->pcpu_stats->tx_dropped);  	rcu_read_unlock();  	return err; @@ -761,11 +777,10 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,  				const struct sk_buff *skb,  				const struct iovec *iv, int len)  { -	struct macvlan_dev *vlan;  	int ret;  	int vnet_hdr_len = 0;  	int vlan_offset = 0; -	int copied; +	int copied, total;  	if (q->flags & IFF_VNET_HDR) {  		struct virtio_net_hdr vnet_hdr; @@ -773,14 +788,13 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,  		if ((len -= vnet_hdr_len) < 0)  			return -EINVAL; -		ret = macvtap_skb_to_vnet_hdr(skb, &vnet_hdr); -		if (ret) -			return ret; +		macvtap_skb_to_vnet_hdr(skb, &vnet_hdr);  		if (memcpy_toiovecend(iv, (void *)&vnet_hdr, 0, sizeof(vnet_hdr)))  			return -EFAULT;  	} -	copied = vnet_hdr_len; +	total = copied = vnet_hdr_len; +	total += skb->len;  	if (!vlan_tx_tag_present(skb))  		len = min_t(int, skb->len, len); @@ -795,6 +809,7 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,  		vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto);  		len = min_t(int, skb->len + VLAN_HLEN, len); +		total += VLAN_HLEN;  		copy = min_t(int, vlan_offset, len);  		ret = skb_copy_datagram_const_iovec(skb, 0, iv, copied, copy); @@ -812,22 +827,12 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,  	}  	ret = skb_copy_datagram_const_iovec(skb, vlan_offset, iv, copied, len); -	copied += len;  done: -	rcu_read_lock(); -	vlan = rcu_dereference(q->vlan); -	if (vlan) { -		preempt_disable(); -		macvlan_count_rx(vlan, copied - vnet_hdr_len, ret == 0, 0); -		preempt_enable(); -	} -	rcu_read_unlock(); - -	return ret ? ret : copied; +	return ret ? ret : total;  } -static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb, +static ssize_t macvtap_do_read(struct macvtap_queue *q,  			       const struct iovec *iv, unsigned long len,  			       int noblock)  { @@ -878,8 +883,10 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,  		goto out;  	} -	ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK); -	ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */ +	ret = macvtap_do_read(q, iv, len, file->f_flags & O_NONBLOCK); +	ret = min_t(ssize_t, ret, len); +	if (ret > 0) +		iocb->ki_pos = ret;  out:  	return ret;  } @@ -1110,7 +1117,7 @@ static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock,  	int ret;  	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))  		return -EINVAL; -	ret = macvtap_do_read(q, iocb, m->msg_iov, total_len, +	ret = macvtap_do_read(q, m->msg_iov, total_len,  			  flags & MSG_DONTWAIT);  	if (ret > total_len) {  		m->msg_flags |= MSG_TRUNC;  | 
