diff options
Diffstat (limited to 'drivers/net/tun.c')
| -rw-r--r-- | drivers/net/tun.c | 445 |
1 files changed, 252 insertions, 193 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index bfa9bb48e42..98bad1fb1bf 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -60,6 +60,7 @@ #include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/if_tun.h> +#include <linux/if_vlan.h> #include <linux/crc32.h> #include <linux/nsproxy.h> #include <linux/virtio_net.h> @@ -68,6 +69,7 @@ #include <net/netns/generic.h> #include <net/rtnetlink.h> #include <net/sock.h> +#include <linux/seq_file.h> #include <asm/uaccess.h> @@ -109,7 +111,7 @@ struct tap_filter { unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN]; }; -/* DEFAULT_MAX_NUM_RSS_QUEUES were choosed to let the rx/tx queues allocated for +/* DEFAULT_MAX_NUM_RSS_QUEUES were chosen to let the rx/tx queues allocated for * the netdevice to be fit in one page. So we can make sure the success of * memory allocation. TODO: increase the limit. */ #define MAX_TAP_QUEUES DEFAULT_MAX_NUM_RSS_QUEUES @@ -118,7 +120,7 @@ struct tap_filter { #define TUN_FLOW_EXPIRE (3 * HZ) /* A tun_file connects an open character device to a tuntap netdevice. It - * also contains all socket related strctures (except sock_fprog and tap_filter) + * also contains all socket related structures (except sock_fprog and tap_filter) * to serve as one transmit queue for tuntap device. The sock_fprog and * tap_filter were kept in tun_struct since they were used for filtering for the * netdevice not for a specific queue (at least I didn't see the requirement for @@ -137,7 +139,10 @@ struct tun_file { struct fasync_struct *fasync; /* only used for fasnyc */ unsigned int flags; - u16 queue_index; + union { + u16 queue_index; + unsigned int ifindex; + }; struct list_head next; struct tun_struct *detached; }; @@ -148,6 +153,7 @@ struct tun_flow_entry { struct tun_struct *tun; u32 rxhash; + u32 rps_rxhash; int queue_index; unsigned long updated; }; @@ -216,6 +222,7 @@ static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun, rxhash, queue_index); e->updated = jiffies; e->rxhash = rxhash; + e->rps_rxhash = 0; e->queue_index = queue_index; e->tun = tun; hlist_add_head_rcu(&e->hash_link, head); @@ -228,6 +235,7 @@ static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e) { tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n", e->rxhash, e->queue_index); + sock_rps_reset_flow_hash(e->rps_rxhash); hlist_del_rcu(&e->hash_link); kfree_rcu(e, rcu); --tun->flow_count; @@ -321,6 +329,7 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash, /* TODO: keep queueing to old queue until it's empty? */ e->queue_index = queue_index; e->updated = jiffies; + sock_rps_record_flow_hash(e->rps_rxhash); } else { spin_lock_bh(&tun->lock); if (!tun_flow_find(head, rxhash) && @@ -337,14 +346,27 @@ unlock: rcu_read_unlock(); } +/** + * Save the hash received in the stack receive path and update the + * flow_hash table accordingly. + */ +static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash) +{ + if (unlikely(e->rps_rxhash != hash)) { + sock_rps_reset_flow_hash(e->rps_rxhash); + e->rps_rxhash = hash; + } +} + /* We try to identify a flow through its rxhash first. The reason that - * we do not check rxq no. is becuase some cards(e.g 82599), chooses + * we do not check rxq no. is because some cards(e.g 82599), chooses * the rxq based on the txq where the last packet of the flow comes. As * the userspace application move between processors, we may get a * different rxq no. here. If we could not get rxhash, then we would * hope the rxq no. may help here. */ -static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb) +static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, select_queue_fallback_t fallback) { struct tun_struct *tun = netdev_priv(dev); struct tun_flow_entry *e; @@ -354,12 +376,13 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb) rcu_read_lock(); numqueues = ACCESS_ONCE(tun->numqueues); - txq = skb_get_rxhash(skb); + txq = skb_get_hash(skb); if (txq) { e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq); - if (e) + if (e) { + tun_flow_save_rps_rxhash(e, txq); txq = e->queue_index; - else + } else /* use multiply and shift instead of expensive divide */ txq = ((u64)txq * numqueues) >> 32; } else if (likely(skb_rx_queue_recorded(skb))) { @@ -405,6 +428,12 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile) return tun; } +static void tun_queue_purge(struct tun_file *tfile) +{ + skb_queue_purge(&tfile->sk.sk_receive_queue); + skb_queue_purge(&tfile->sk.sk_error_queue); +} + static void __tun_detach(struct tun_file *tfile, bool clean) { struct tun_file *ntfile; @@ -423,7 +452,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) --tun->numqueues; if (clean) { - rcu_assign_pointer(tfile->tun, NULL); + RCU_INIT_POINTER(tfile->tun, NULL); sock_put(&tfile->sk); } else tun_disable_queue(tun, tfile); @@ -431,7 +460,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) synchronize_net(); tun_flow_delete_by_queue(tun, tun->numqueues + 1); /* Drop read queue */ - skb_queue_purge(&tfile->sk.sk_receive_queue); + tun_queue_purge(tfile); tun_set_real_num_queues(tun); } else if (tfile->detached && clean) { tun = tun_enable_queue(tfile); @@ -469,13 +498,13 @@ static void tun_detach_all(struct net_device *dev) for (i = 0; i < n; i++) { tfile = rtnl_dereference(tun->tfiles[i]); BUG_ON(!tfile); - wake_up_all(&tfile->wq.wait); - rcu_assign_pointer(tfile->tun, NULL); + tfile->socket.sk->sk_data_ready(tfile->socket.sk); + RCU_INIT_POINTER(tfile->tun, NULL); --tun->numqueues; } list_for_each_entry(tfile, &tun->disabled, next) { - wake_up_all(&tfile->wq.wait); - rcu_assign_pointer(tfile->tun, NULL); + tfile->socket.sk->sk_data_ready(tfile->socket.sk); + RCU_INIT_POINTER(tfile->tun, NULL); } BUG_ON(tun->numqueues != 0); @@ -483,12 +512,12 @@ static void tun_detach_all(struct net_device *dev) for (i = 0; i < n; i++) { tfile = rtnl_dereference(tun->tfiles[i]); /* Drop read queue */ - skb_queue_purge(&tfile->sk.sk_receive_queue); + tun_queue_purge(tfile); sock_put(&tfile->sk); } list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { tun_enable_queue(tfile); - skb_queue_purge(&tfile->sk.sk_receive_queue); + tun_queue_purge(tfile); sock_put(&tfile->sk); } BUG_ON(tun->numdisabled != 0); @@ -497,7 +526,7 @@ static void tun_detach_all(struct net_device *dev) module_put(THIS_MODULE); } -static int tun_attach(struct tun_struct *tun, struct file *file) +static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filter) { struct tun_file *tfile = file->private_data; int err; @@ -521,8 +550,8 @@ static int tun_attach(struct tun_struct *tun, struct file *file) err = 0; - /* Re-attach the filter to presist device */ - if (tun->filter_attached == true) { + /* Re-attach the filter to persist device */ + if (!skip_filter && (tun->filter_attached == true)) { err = sk_attach_filter(&tun->fprog, tfile->socket.sk); if (!err) goto out; @@ -710,14 +739,32 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) struct tun_struct *tun = netdev_priv(dev); int txq = skb->queue_mapping; struct tun_file *tfile; + u32 numqueues = 0; rcu_read_lock(); tfile = rcu_dereference(tun->tfiles[txq]); + numqueues = ACCESS_ONCE(tun->numqueues); /* Drop packet if interface is not attached */ - if (txq >= tun->numqueues) + if (txq >= numqueues) goto drop; + if (numqueues == 1) { + /* Select queue was not called for the skbuff, so we extract the + * RPS hash and save it into the flow_table here. + */ + __u32 rxhash; + + rxhash = skb_get_hash(skb); + if (rxhash) { + struct tun_flow_entry *e; + e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)], + rxhash); + if (e) + tun_flow_save_rps_rxhash(e, rxhash); + } + } + tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len); BUG_ON(!tfile); @@ -735,14 +782,21 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* Limit the number of packets queued by dividing txq length with the * number of queues. */ - if (skb_queue_len(&tfile->socket.sk->sk_receive_queue) - >= dev->tx_queue_len / tun->numqueues) + if (skb_queue_len(&tfile->socket.sk->sk_receive_queue) * numqueues + >= dev->tx_queue_len) goto drop; - /* Orphan the skb - required as we might hang on to it - * for indefinite time. */ if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) goto drop; + + if (skb->sk) { + sock_tx_timestamp(skb->sk, &skb_shinfo(skb)->tx_flags); + sw_tx_timestamp(skb); + } + + /* Orphan the skb - required as we might hang on to it + * for indefinite time. + */ skb_orphan(skb); nf_reset(skb); @@ -753,8 +807,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* Notify and wake up reader process */ if (tfile->flags & TUN_FASYNC) kill_fasync(&tfile->fasync, SIGIO, POLL_IN); - wake_up_interruptible_poll(&tfile->wq.wait, POLLIN | - POLLRDNORM | POLLRDBAND); + tfile->socket.sk->sk_data_ready(tfile->socket.sk); rcu_read_unlock(); return NETDEV_TX_OK; @@ -802,9 +855,9 @@ static void tun_poll_controller(struct net_device *dev) * Tun only receives frames when: * 1) the char device endpoint gets data from user space * 2) the tun socket gets a sendmsg call from user space - * Since both of those are syncronous operations, we are guaranteed + * Since both of those are synchronous operations, we are guaranteed * never to have pending data when we poll for it - * so theres nothing to do here but return. + * so there is nothing to do here but return. * We need this though so netpoll recognizes us as an interface that * supports polling, which enables bridge devices in virt setups to * still use netconsole @@ -841,7 +894,7 @@ static const struct net_device_ops tap_netdev_ops = { #endif }; -static int tun_flow_init(struct tun_struct *tun) +static void tun_flow_init(struct tun_struct *tun) { int i; @@ -852,8 +905,6 @@ static int tun_flow_init(struct tun_struct *tun) setup_timer(&tun->flow_gc_timer, tun_flow_cleanup, (unsigned long)tun); mod_timer(&tun->flow_gc_timer, round_jiffies_up(jiffies + tun->ageing_time)); - - return 0; } static void tun_flow_uninit(struct tun_struct *tun) @@ -913,7 +964,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait) tun_debug(KERN_INFO, tun, "tun_chr_poll\n"); - poll_wait(file, &tfile->wq.wait, wait); + poll_wait(file, sk_sleep(sk), wait); if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; @@ -945,7 +996,7 @@ static struct sk_buff *tun_alloc_skb(struct tun_file *tfile, linear = len; skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, - &err); + &err, 0); if (!skb) return ERR_PTR(err); @@ -957,84 +1008,6 @@ static struct sk_buff *tun_alloc_skb(struct tun_file *tfile, return skb; } -/* set skb frags from iovec, this can move to core network code for reuse */ -static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, - int offset, size_t count) -{ - int len = iov_length(from, count) - offset; - int copy = skb_headlen(skb); - int size, offset1 = 0; - int i = 0; - - /* Skip over from offset */ - while (count && (offset >= from->iov_len)) { - offset -= from->iov_len; - ++from; - --count; - } - - /* copy up to skb headlen */ - while (count && (copy > 0)) { - size = min_t(unsigned int, copy, from->iov_len - offset); - if (copy_from_user(skb->data + offset1, from->iov_base + offset, - size)) - return -EFAULT; - if (copy > size) { - ++from; - --count; - offset = 0; - } else - offset += size; - copy -= size; - offset1 += size; - } - - if (len == offset1) - return 0; - - while (count--) { - struct page *page[MAX_SKB_FRAGS]; - int num_pages; - unsigned long base; - unsigned long truesize; - - len = from->iov_len - offset; - if (!len) { - offset = 0; - ++from; - continue; - } - base = (unsigned long)from->iov_base + offset; - size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; - if (i + size > MAX_SKB_FRAGS) - return -EMSGSIZE; - num_pages = get_user_pages_fast(base, size, 0, &page[i]); - if (num_pages != size) { - for (i = 0; i < num_pages; i++) - put_page(page[i]); - return -EFAULT; - } - truesize = size * PAGE_SIZE; - skb->data_len += len; - skb->len += len; - skb->truesize += truesize; - atomic_add(truesize, &skb->sk->sk_wmem_alloc); - while (len) { - int off = base & ~PAGE_MASK; - int size = min_t(int, len, PAGE_SIZE - off); - __skb_fill_page_desc(skb, i, page[i], off, size); - skb_shinfo(skb)->nr_frags++; - /* increase sk_wmem_alloc */ - base += size; - len -= size; - i++; - } - offset = 0; - ++from; - } - return 0; -} - /* Get packet from user space buffer */ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, void *msg_control, const struct iovec *iv, @@ -1042,8 +1015,9 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, { struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; struct sk_buff *skb; - size_t len = total_len, align = NET_SKB_PAD; + size_t len = total_len, align = NET_SKB_PAD, linear; struct virtio_net_hdr gso = { 0 }; + int good_linear; int offset = 0; int copylen; bool zerocopy = false; @@ -1051,8 +1025,9 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, u32 rxhash; if (!(tun->flags & TUN_NO_PI)) { - if ((len -= sizeof(pi)) > total_len) + if (len < sizeof(pi)) return -EINVAL; + len -= sizeof(pi); if (memcpy_fromiovecend((void *)&pi, iv, 0, sizeof(pi))) return -EFAULT; @@ -1060,8 +1035,9 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } if (tun->flags & TUN_VNET_HDR) { - if ((len -= tun->vnet_hdr_sz) > total_len) + if (len < tun->vnet_hdr_sz) return -EINVAL; + len -= tun->vnet_hdr_sz; if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso))) return -EFAULT; @@ -1082,34 +1058,30 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, return -EINVAL; } - if (msg_control) - zerocopy = true; + good_linear = SKB_MAX_HEAD(align); - if (zerocopy) { - /* Userspace may produce vectors with count greater than - * MAX_SKB_FRAGS, so we need to linearize parts of the skb - * to let the rest of data to be fit in the frags. - */ - if (count > MAX_SKB_FRAGS) { - copylen = iov_length(iv, count - MAX_SKB_FRAGS); - if (copylen < offset) - copylen = 0; - else - copylen -= offset; - } else - copylen = 0; - /* There are 256 bytes to be copied in skb, so there is enough - * room for skb expand head in case it is used. + if (msg_control) { + /* There are 256 bytes to be copied in skb, so there is + * enough room for skb expand head in case it is used. * The rest of the buffer is mapped from userspace. */ - if (copylen < gso.hdr_len) - copylen = gso.hdr_len; - if (!copylen) - copylen = GOODCOPY_LEN; - } else + copylen = gso.hdr_len ? gso.hdr_len : GOODCOPY_LEN; + if (copylen > good_linear) + copylen = good_linear; + linear = copylen; + if (iov_pages(iv, offset + copylen, count) <= MAX_SKB_FRAGS) + zerocopy = true; + } + + if (!zerocopy) { copylen = len; + if (gso.hdr_len > good_linear) + linear = good_linear; + else + linear = gso.hdr_len; + } - skb = tun_alloc_skb(tfile, align, copylen, gso.hdr_len, noblock); + skb = tun_alloc_skb(tfile, align, copylen, linear, noblock); if (IS_ERR(skb)) { if (PTR_ERR(skb) != -EAGAIN) tun->dev->stats.rx_dropped++; @@ -1118,8 +1090,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (zerocopy) err = zerocopy_sg_from_iovec(skb, iv, offset, count); - else + else { err = skb_copy_datagram_from_iovec(skb, 0, iv, offset, len); + if (!err && msg_control) { + struct ubuf_info *uarg = msg_control; + uarg->callback(uarg, false); + } + } if (err) { tun->dev->stats.rx_dropped++; @@ -1205,7 +1182,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb_reset_network_header(skb); skb_probe_transport_header(skb, 0); - rxhash = skb_get_rxhash(skb); + rxhash = skb_get_hash(skb); netif_rx_ni(skb); tun->dev->stats.rx_packets++; @@ -1243,6 +1220,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, { struct tun_pi pi = { 0, skb->protocol }; ssize_t total = 0; + int vlan_offset = 0, copied; if (!(tun->flags & TUN_NO_PI)) { if ((len -= sizeof(pi)) < 0) @@ -1306,11 +1284,42 @@ static ssize_t tun_put_user(struct tun_struct *tun, total += tun->vnet_hdr_sz; } - len = min_t(int, skb->len, len); - - skb_copy_datagram_const_iovec(skb, 0, iv, total, len); + copied = total; total += skb->len; + if (!vlan_tx_tag_present(skb)) { + len = min_t(int, skb->len, len); + } else { + int copy, ret; + struct { + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + } veth; + + veth.h_vlan_proto = skb->vlan_proto; + veth.h_vlan_TCI = htons(vlan_tx_tag_get(skb)); + + vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto); + len = min_t(int, skb->len + VLAN_HLEN, len); + total += VLAN_HLEN; + + copy = min_t(int, vlan_offset, len); + ret = skb_copy_datagram_const_iovec(skb, 0, iv, copied, copy); + len -= copy; + copied += copy; + if (ret || !len) + goto done; + + copy = min_t(int, sizeof(veth), len); + ret = memcpy_toiovecend(iv, (void *)&veth, copied, copy); + len -= copy; + copied += copy; + if (ret || !len) + goto done; + } + + skb_copy_datagram_const_iovec(skb, vlan_offset, iv, copied, len); +done: tun->dev->stats.tx_packets++; tun->dev->stats.tx_bytes += len; @@ -1318,48 +1327,28 @@ static ssize_t tun_put_user(struct tun_struct *tun, } static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, - struct kiocb *iocb, const struct iovec *iv, - ssize_t len, int noblock) + const struct iovec *iv, ssize_t len, int noblock) { - DECLARE_WAITQUEUE(wait, current); struct sk_buff *skb; ssize_t ret = 0; + int peeked, err, off = 0; tun_debug(KERN_INFO, tun, "tun_do_read\n"); - if (unlikely(!noblock)) - add_wait_queue(&tfile->wq.wait, &wait); - while (len) { - current->state = TASK_INTERRUPTIBLE; - - /* Read frames from the queue */ - if (!(skb = skb_dequeue(&tfile->socket.sk->sk_receive_queue))) { - if (noblock) { - ret = -EAGAIN; - break; - } - if (signal_pending(current)) { - ret = -ERESTARTSYS; - break; - } - if (tun->dev->reg_state != NETREG_REGISTERED) { - ret = -EIO; - break; - } + if (!len) + return ret; - /* Nothing to read, let's sleep */ - schedule(); - continue; - } + if (tun->dev->reg_state != NETREG_REGISTERED) + return -EIO; + /* Read frames from queue */ + skb = __skb_recv_datagram(tfile->socket.sk, noblock ? MSG_DONTWAIT : 0, + &peeked, &off, &err); + if (skb) { ret = tun_put_user(tun, tfile, skb, iv, len); kfree_skb(skb); - break; - } - - current->state = TASK_RUNNING; - if (unlikely(!noblock)) - remove_wait_queue(&tfile->wq.wait, &wait); + } else + ret = err; return ret; } @@ -1380,9 +1369,11 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, goto out; } - ret = tun_do_read(tun, tfile, iocb, iv, len, + ret = tun_do_read(tun, tfile, iv, len, file->f_flags & O_NONBLOCK); ret = min_t(ssize_t, ret, len); + if (ret > 0) + iocb->ki_pos = ret; out: tun_put(tun); return ret; @@ -1459,7 +1450,6 @@ static int tun_sendmsg(struct kiocb *iocb, struct socket *sock, return ret; } - static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len, int flags) @@ -1471,11 +1461,16 @@ static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, if (!tun) return -EBADFD; - if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) { + if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) { ret = -EINVAL; goto out; } - ret = tun_do_read(tun, tfile, iocb, m->msg_iov, total_len, + if (flags & MSG_ERRQUEUE) { + ret = sock_recv_errqueue(sock->sk, m, total_len, + SOL_PACKET, TUN_TX_TIMESTAMP); + goto out; + } + ret = tun_do_read(tun, tfile, m->msg_iov, total_len, flags & MSG_DONTWAIT); if (ret > total_len) { m->msg_flags |= MSG_TRUNC; @@ -1530,6 +1525,9 @@ static int tun_flags(struct tun_struct *tun) if (tun->flags & TUN_TAP_MQ) flags |= IFF_MULTI_QUEUE; + if (tun->flags & TUN_PERSIST) + flags |= IFF_PERSIST; + return flags; } @@ -1595,7 +1593,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (err < 0) return err; - err = tun_attach(tun, file); + err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER); if (err < 0) return err; @@ -1642,6 +1640,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) dev_net_set(dev, net); dev->rtnl_link_ops = &tun_link_ops; + dev->ifindex = tfile->ifindex; tun = netdev_priv(dev); tun->dev = dev; @@ -1659,24 +1658,24 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) goto err_free_dev; tun_net_init(dev); - - err = tun_flow_init(tun); - if (err < 0) - goto err_free_dev; + tun_flow_init(tun); dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | - TUN_USER_FEATURES; + TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; dev->features = dev->hw_features; - dev->vlan_features = dev->features; + dev->vlan_features = dev->features & + ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); INIT_LIST_HEAD(&tun->disabled); - err = tun_attach(tun, file); + err = tun_attach(tun, file, false); if (err < 0) - goto err_free_dev; + goto err_free_flow; err = register_netdevice(tun->dev); if (err < 0) - goto err_free_dev; + goto err_detach; if (device_create_file(&tun->dev->dev, &dev_attr_tun_flags) || device_create_file(&tun->dev->dev, &dev_attr_owner) || @@ -1720,7 +1719,12 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) strcpy(ifr->ifr_name, tun->dev->name); return 0; - err_free_dev: +err_detach: + tun_detach_all(dev); +err_free_flow: + tun_flow_uninit(tun); + security_tun_dev_free_security(tun->security); +err_free_dev: free_netdev(dev); return err; } @@ -1834,7 +1838,7 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr) ret = security_tun_dev_attach_queue(tun->security); if (ret < 0) goto unlock; - ret = tun_attach(tun, file); + ret = tun_attach(tun, file, false); } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) { tun = rtnl_dereference(tfile->tun); if (!tun || !(tun->flags & TUN_TAP_MQ) || tfile->detached) @@ -1860,6 +1864,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, kgid_t group; int sndbuf; int vnet_hdr_sz; + unsigned int ifindex; int ret; if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == 0x89) { @@ -1894,6 +1899,19 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, ret = -EFAULT; goto unlock; } + if (cmd == TUNSETIFINDEX) { + ret = -EPERM; + if (tun) + goto unlock; + + ret = -EFAULT; + if (copy_from_user(&ifindex, argp, sizeof(ifindex))) + goto unlock; + + ret = 0; + tfile->ifindex = ifindex; + goto unlock; + } ret = -EBADFD; if (!tun) @@ -1906,6 +1924,11 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, case TUNGETIFF: tun_get_iff(current->nsproxy->net_ns, tun, &ifr); + if (tfile->detached) + ifr.ifr_flags |= IFF_DETACH_QUEUE; + if (!tfile->socket.sk->sk_filter) + ifr.ifr_flags |= IFF_NOFILTER; + if (copy_to_user(argp, &ifr, ifreq_len)) ret = -EFAULT; break; @@ -2062,6 +2085,16 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, tun_detach_filter(tun, tun->numqueues); break; + case TUNGETFILTER: + ret = -EINVAL; + if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV) + break; + ret = -EFAULT; + if (copy_to_user(argp, &tun->fprog, sizeof(tun->fprog))) + break; + ret = 0; + break; + default: ret = -EINVAL; break; @@ -2139,12 +2172,13 @@ static int tun_chr_open(struct inode *inode, struct file * file) &tun_proto); if (!tfile) return -ENOMEM; - rcu_assign_pointer(tfile->tun, NULL); + RCU_INIT_POINTER(tfile->tun, NULL); tfile->net = get_net(current->nsproxy->net_ns); tfile->flags = 0; + tfile->ifindex = 0; - rcu_assign_pointer(tfile->socket.wq, &tfile->wq); init_waitqueue_head(&tfile->wq.wait); + RCU_INIT_POINTER(tfile->socket.wq, &tfile->wq); tfile->socket.file = file; tfile->socket.ops = &tun_socket_ops; @@ -2175,6 +2209,27 @@ static int tun_chr_close(struct inode *inode, struct file *file) return 0; } +#ifdef CONFIG_PROC_FS +static int tun_chr_show_fdinfo(struct seq_file *m, struct file *f) +{ + struct tun_struct *tun; + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + + rtnl_lock(); + tun = tun_get(f); + if (tun) + tun_get_iff(current->nsproxy->net_ns, tun, &ifr); + rtnl_unlock(); + + if (tun) + tun_put(tun); + + return seq_printf(m, "iff:\t%s\n", ifr.ifr_name); +} +#endif + static const struct file_operations tun_fops = { .owner = THIS_MODULE, .llseek = no_llseek, @@ -2189,7 +2244,10 @@ static const struct file_operations tun_fops = { #endif .open = tun_chr_open, .release = tun_chr_close, - .fasync = tun_chr_fasync + .fasync = tun_chr_fasync, +#ifdef CONFIG_PROC_FS + .show_fdinfo = tun_chr_show_fdinfo, +#endif }; static struct miscdevice tun_miscdev = { @@ -2257,6 +2315,7 @@ static const struct ethtool_ops tun_ethtool_ops = { .get_msglevel = tun_get_msglevel, .set_msglevel = tun_set_msglevel, .get_link = ethtool_op_get_link, + .get_ts_info = ethtool_op_get_ts_info, }; |
