aboutsummaryrefslogtreecommitdiff
path: root/drivers/net/macvtap.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/macvtap.c')
-rw-r--r--drivers/net/macvtap.c263
1 files changed, 93 insertions, 170 deletions
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 876c72246ae..3381c4f91a8 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -11,7 +11,6 @@
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/slab.h>
-#include <linux/init.h>
#include <linux/wait.h>
#include <linux/cdev.h>
#include <linux/idr.h>
@@ -68,6 +67,13 @@ static const struct proto_ops macvtap_socket_ops;
#define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \
NETIF_F_TSO6 | NETIF_F_UFO)
#define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO)
+#define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG)
+
+static struct macvlan_dev *macvtap_get_vlan_rcu(const struct net_device *dev)
+{
+ return rcu_dereference(dev->rx_handler_data);
+}
+
/*
* RCU usage:
* The macvtap_queue and the macvlan_dev are loosely coupled, the
@@ -217,7 +223,7 @@ static struct macvtap_queue *macvtap_get_queue(struct net_device *dev,
goto out;
/* Check if we can use flow to select a queue */
- rxq = skb_get_rxhash(skb);
+ rxq = skb_get_hash(skb);
if (rxq) {
tap = rcu_dereference(vlan->taps[rxq % numvtaps]);
goto out;
@@ -269,27 +275,33 @@ static void macvtap_del_queues(struct net_device *dev)
sock_put(&qlist[j]->sk);
}
-/*
- * Forward happens for data that gets sent from one macvlan
- * endpoint to another one in bridge mode. We just take
- * the skb and put it into the receive queue.
- */
-static int macvtap_forward(struct net_device *dev, struct sk_buff *skb)
+static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
{
- struct macvlan_dev *vlan = netdev_priv(dev);
- struct macvtap_queue *q = macvtap_get_queue(dev, skb);
- netdev_features_t features;
+ struct sk_buff *skb = *pskb;
+ struct net_device *dev = skb->dev;
+ struct macvlan_dev *vlan;
+ struct macvtap_queue *q;
+ netdev_features_t features = TAP_FEATURES;
+
+ vlan = macvtap_get_vlan_rcu(dev);
+ if (!vlan)
+ return RX_HANDLER_PASS;
+
+ q = macvtap_get_queue(dev, skb);
if (!q)
- goto drop;
+ return RX_HANDLER_PASS;
if (skb_queue_len(&q->sk.sk_receive_queue) >= dev->tx_queue_len)
goto drop;
- skb->dev = dev;
+ skb_push(skb, ETH_HLEN);
+
/* Apply the forward feature mask so that we perform segmentation
- * according to users wishes.
+ * according to users wishes. This only works if VNET_HDR is
+ * enabled.
*/
- features = netif_skb_features(skb) & vlan->tap_features;
+ if (q->flags & IFF_VNET_HDR)
+ features |= vlan->tap_features;
if (netif_needs_gso(skb, features)) {
struct sk_buff *segs = __skb_gso_segment(skb, features, false);
@@ -310,27 +322,27 @@ static int macvtap_forward(struct net_device *dev, struct sk_buff *skb)
segs = nskb;
}
} else {
+ /* If we receive a partial checksum and the tap side
+ * doesn't support checksum offload, compute the checksum.
+ * Note: it doesn't matter which checksum feature to
+ * check, we either support them all or none.
+ */
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ !(features & NETIF_F_ALL_CSUM) &&
+ skb_checksum_help(skb))
+ goto drop;
skb_queue_tail(&q->sk.sk_receive_queue, skb);
}
wake_up:
wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND);
- return NET_RX_SUCCESS;
+ return RX_HANDLER_CONSUMED;
drop:
+ /* Count errors/drops only here, thus don't care about args. */
+ macvlan_count_rx(vlan, 0, 0, 0);
kfree_skb(skb);
- return NET_RX_DROP;
-}
-
-/*
- * Receive is for data from the external interface (lowerdev),
- * in case of macvtap, we can treat that the same way as
- * forward, which macvlan cannot.
- */
-static int macvtap_receive(struct sk_buff *skb)
-{
- skb_push(skb, ETH_HLEN);
- return macvtap_forward(skb->dev, skb);
+ return RX_HANDLER_CONSUMED;
}
static int macvtap_get_minor(struct macvlan_dev *vlan)
@@ -380,6 +392,8 @@ static int macvtap_newlink(struct net *src_net,
struct nlattr *data[])
{
struct macvlan_dev *vlan = netdev_priv(dev);
+ int err;
+
INIT_LIST_HEAD(&vlan->queue_list);
/* Since macvlan supports all offloads by default, make
@@ -387,16 +401,20 @@ static int macvtap_newlink(struct net *src_net,
*/
vlan->tap_features = TUN_OFFLOADS;
+ err = netdev_rx_handler_register(dev, macvtap_handle_frame, vlan);
+ if (err)
+ return err;
+
/* Don't put anything that may fail after macvlan_common_newlink
* because we can't undo what it does.
*/
- return macvlan_common_newlink(src_net, dev, tb, data,
- macvtap_receive, macvtap_forward);
+ return macvlan_common_newlink(src_net, dev, tb, data);
}
static void macvtap_dellink(struct net_device *dev,
struct list_head *head)
{
+ netdev_rx_handler_unregister(dev);
macvtap_del_queues(dev);
macvlan_dellink(dev, head);
}
@@ -524,7 +542,7 @@ static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad,
linear = len;
skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
- err);
+ err, 0);
if (!skb)
return NULL;
@@ -536,86 +554,6 @@ static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad,
return skb;
}
-/* set skb frags from iovec, this can move to core network code for reuse */
-static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
- int offset, size_t count)
-{
- int len = iov_length(from, count) - offset;
- int copy = skb_headlen(skb);
- int size, offset1 = 0;
- int i = 0;
-
- /* Skip over from offset */
- while (count && (offset >= from->iov_len)) {
- offset -= from->iov_len;
- ++from;
- --count;
- }
-
- /* copy up to skb headlen */
- while (count && (copy > 0)) {
- size = min_t(unsigned int, copy, from->iov_len - offset);
- if (copy_from_user(skb->data + offset1, from->iov_base + offset,
- size))
- return -EFAULT;
- if (copy > size) {
- ++from;
- --count;
- offset = 0;
- } else
- offset += size;
- copy -= size;
- offset1 += size;
- }
-
- if (len == offset1)
- return 0;
-
- while (count--) {
- struct page *page[MAX_SKB_FRAGS];
- int num_pages;
- unsigned long base;
- unsigned long truesize;
-
- len = from->iov_len - offset;
- if (!len) {
- offset = 0;
- ++from;
- continue;
- }
- base = (unsigned long)from->iov_base + offset;
- size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
- if (i + size > MAX_SKB_FRAGS)
- return -EMSGSIZE;
- num_pages = get_user_pages_fast(base, size, 0, &page[i]);
- if (num_pages != size) {
- int j;
-
- for (j = 0; j < num_pages; j++)
- put_page(page[i + j]);
- return -EFAULT;
- }
- truesize = size * PAGE_SIZE;
- skb->data_len += len;
- skb->len += len;
- skb->truesize += truesize;
- atomic_add(truesize, &skb->sk->sk_wmem_alloc);
- while (len) {
- int off = base & ~PAGE_MASK;
- int size = min_t(int, len, PAGE_SIZE - off);
- __skb_fill_page_desc(skb, i, page[i], off, size);
- skb_shinfo(skb)->nr_frags++;
- /* increase sk_wmem_alloc */
- base += size;
- len -= size;
- i++;
- }
- offset = 0;
- ++from;
- }
- return 0;
-}
-
/*
* macvtap_skb_from_vnet_hdr and macvtap_skb_to_vnet_hdr should
* be shared with the tun/tap driver.
@@ -663,7 +601,7 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb,
return 0;
}
-static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb,
+static void macvtap_skb_to_vnet_hdr(const struct sk_buff *skb,
struct virtio_net_hdr *vnet_hdr)
{
memset(vnet_hdr, 0, sizeof(*vnet_hdr));
@@ -694,16 +632,14 @@ static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb,
} else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
vnet_hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
} /* else everything is zero */
-
- return 0;
}
-
/* Get packet from user space buffer */
static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
const struct iovec *iv, unsigned long total_len,
size_t count, int noblock)
{
+ int good_linear = SKB_MAX_HEAD(NET_IP_ALIGN);
struct sk_buff *skb;
struct macvlan_dev *vlan;
unsigned long len = total_len;
@@ -744,33 +680,22 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
if (unlikely(count > UIO_MAXIOV))
goto err;
- if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY))
- zerocopy = true;
-
- if (zerocopy) {
- /* Userspace may produce vectors with count greater than
- * MAX_SKB_FRAGS, so we need to linearize parts of the skb
- * to let the rest of data to be fit in the frags.
- */
- if (count > MAX_SKB_FRAGS) {
- copylen = iov_length(iv, count - MAX_SKB_FRAGS);
- if (copylen < vnet_hdr_len)
- copylen = 0;
- else
- copylen -= vnet_hdr_len;
- }
- /* There are 256 bytes to be copied in skb, so there is enough
- * room for skb expand head in case it is used.
- * The rest buffer is mapped from userspace.
- */
- if (copylen < vnet_hdr.hdr_len)
- copylen = vnet_hdr.hdr_len;
- if (!copylen)
- copylen = GOODCOPY_LEN;
+ if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) {
+ copylen = vnet_hdr.hdr_len ? vnet_hdr.hdr_len : GOODCOPY_LEN;
+ if (copylen > good_linear)
+ copylen = good_linear;
linear = copylen;
- } else {
+ if (iov_pages(iv, vnet_hdr_len + copylen, count)
+ <= MAX_SKB_FRAGS)
+ zerocopy = true;
+ }
+
+ if (!zerocopy) {
copylen = len;
- linear = vnet_hdr.hdr_len;
+ if (vnet_hdr.hdr_len > good_linear)
+ linear = good_linear;
+ else
+ linear = vnet_hdr.hdr_len;
}
skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, copylen,
@@ -780,9 +705,15 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
if (zerocopy)
err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count);
- else
+ else {
err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len,
len);
+ if (!err && m && m->msg_control) {
+ struct ubuf_info *uarg = m->msg_control;
+ uarg->callback(uarg, false);
+ }
+ }
+
if (err)
goto err_kfree;
@@ -806,10 +737,12 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
}
- if (vlan)
- macvlan_start_xmit(skb, vlan->dev);
- else
+ if (vlan) {
+ skb->dev = vlan->dev;
+ dev_queue_xmit(skb);
+ } else {
kfree_skb(skb);
+ }
rcu_read_unlock();
return total_len;
@@ -821,7 +754,7 @@ err:
rcu_read_lock();
vlan = rcu_dereference(q->vlan);
if (vlan)
- vlan->dev->stats.tx_dropped++;
+ this_cpu_inc(vlan->pcpu_stats->tx_dropped);
rcu_read_unlock();
return err;
@@ -844,11 +777,10 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,
const struct sk_buff *skb,
const struct iovec *iv, int len)
{
- struct macvlan_dev *vlan;
int ret;
int vnet_hdr_len = 0;
int vlan_offset = 0;
- int copied;
+ int copied, total;
if (q->flags & IFF_VNET_HDR) {
struct virtio_net_hdr vnet_hdr;
@@ -856,14 +788,13 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,
if ((len -= vnet_hdr_len) < 0)
return -EINVAL;
- ret = macvtap_skb_to_vnet_hdr(skb, &vnet_hdr);
- if (ret)
- return ret;
+ macvtap_skb_to_vnet_hdr(skb, &vnet_hdr);
if (memcpy_toiovecend(iv, (void *)&vnet_hdr, 0, sizeof(vnet_hdr)))
return -EFAULT;
}
- copied = vnet_hdr_len;
+ total = copied = vnet_hdr_len;
+ total += skb->len;
if (!vlan_tx_tag_present(skb))
len = min_t(int, skb->len, len);
@@ -873,11 +804,12 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,
__be16 h_vlan_proto;
__be16 h_vlan_TCI;
} veth;
- veth.h_vlan_proto = htons(ETH_P_8021Q);
+ veth.h_vlan_proto = skb->vlan_proto;
veth.h_vlan_TCI = htons(vlan_tx_tag_get(skb));
vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto);
len = min_t(int, skb->len + VLAN_HLEN, len);
+ total += VLAN_HLEN;
copy = min_t(int, vlan_offset, len);
ret = skb_copy_datagram_const_iovec(skb, 0, iv, copied, copy);
@@ -895,19 +827,12 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,
}
ret = skb_copy_datagram_const_iovec(skb, vlan_offset, iv, copied, len);
- copied += len;
done:
- rcu_read_lock();
- vlan = rcu_dereference(q->vlan);
- if (vlan)
- macvlan_count_rx(vlan, copied - vnet_hdr_len, ret == 0, 0);
- rcu_read_unlock();
-
- return ret ? ret : copied;
+ return ret ? ret : total;
}
-static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb,
+static ssize_t macvtap_do_read(struct macvtap_queue *q,
const struct iovec *iv, unsigned long len,
int noblock)
{
@@ -958,8 +883,10 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
goto out;
}
- ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK);
- ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */
+ ret = macvtap_do_read(q, iv, len, file->f_flags & O_NONBLOCK);
+ ret = min_t(ssize_t, ret, len);
+ if (ret > 0)
+ iocb->ki_pos = ret;
out:
return ret;
}
@@ -1046,8 +973,7 @@ static int set_offload(struct macvtap_queue *q, unsigned long arg)
/* tap_features are the same as features on tun/tap and
* reflect user expectations.
*/
- vlan->tap_features = vlan->dev->features &
- (feature_mask | ~TUN_OFFLOADS);
+ vlan->tap_features = feature_mask;
vlan->set_features = features;
netdev_update_features(vlan->dev);
@@ -1107,6 +1033,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
rtnl_lock();
ret = macvtap_ioctl_set_queue(file, u);
rtnl_unlock();
+ return ret;
case TUNGETFEATURES:
if (put_user(IFF_TAP | IFF_NO_PI | IFF_VNET_HDR |
@@ -1142,10 +1069,6 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
TUN_F_TSO_ECN | TUN_F_UFO))
return -EINVAL;
- /* TODO: only accept frames with the features that
- got enabled for forwarded frames */
- if (!(q->flags & IFF_VNET_HDR))
- return -EINVAL;
rtnl_lock();
ret = set_offload(q, arg);
rtnl_unlock();
@@ -1194,7 +1117,7 @@ static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock,
int ret;
if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
return -EINVAL;
- ret = macvtap_do_read(q, iocb, m->msg_iov, total_len,
+ ret = macvtap_do_read(q, m->msg_iov, total_len,
flags & MSG_DONTWAIT);
if (ret > total_len) {
m->msg_flags |= MSG_TRUNC;