diff options
Diffstat (limited to 'drivers/net/tun.c')
| -rw-r--r-- | drivers/net/tun.c | 166 |
1 files changed, 107 insertions, 59 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index bbb69351291..98bad1fb1bf 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -69,6 +69,7 @@ #include <net/netns/generic.h> #include <net/rtnetlink.h> #include <net/sock.h> +#include <linux/seq_file.h> #include <asm/uaccess.h> @@ -152,6 +153,7 @@ struct tun_flow_entry { struct tun_struct *tun; u32 rxhash; + u32 rps_rxhash; int queue_index; unsigned long updated; }; @@ -220,6 +222,7 @@ static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun, rxhash, queue_index); e->updated = jiffies; e->rxhash = rxhash; + e->rps_rxhash = 0; e->queue_index = queue_index; e->tun = tun; hlist_add_head_rcu(&e->hash_link, head); @@ -232,6 +235,7 @@ static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e) { tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n", e->rxhash, e->queue_index); + sock_rps_reset_flow_hash(e->rps_rxhash); hlist_del_rcu(&e->hash_link); kfree_rcu(e, rcu); --tun->flow_count; @@ -325,6 +329,7 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash, /* TODO: keep queueing to old queue until it's empty? */ e->queue_index = queue_index; e->updated = jiffies; + sock_rps_record_flow_hash(e->rps_rxhash); } else { spin_lock_bh(&tun->lock); if (!tun_flow_find(head, rxhash) && @@ -341,6 +346,18 @@ unlock: rcu_read_unlock(); } +/** + * Save the hash received in the stack receive path and update the + * flow_hash table accordingly. + */ +static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash) +{ + if (unlikely(e->rps_rxhash != hash)) { + sock_rps_reset_flow_hash(e->rps_rxhash); + e->rps_rxhash = hash; + } +} + /* We try to identify a flow through its rxhash first. The reason that * we do not check rxq no. is because some cards(e.g 82599), chooses * the rxq based on the txq where the last packet of the flow comes. As @@ -348,7 +365,8 @@ unlock: * different rxq no. here. If we could not get rxhash, then we would * hope the rxq no. may help here. */ -static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb) +static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, select_queue_fallback_t fallback) { struct tun_struct *tun = netdev_priv(dev); struct tun_flow_entry *e; @@ -358,12 +376,13 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb) rcu_read_lock(); numqueues = ACCESS_ONCE(tun->numqueues); - txq = skb_get_rxhash(skb); + txq = skb_get_hash(skb); if (txq) { e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq); - if (e) + if (e) { + tun_flow_save_rps_rxhash(e, txq); txq = e->queue_index; - else + } else /* use multiply and shift instead of expensive divide */ txq = ((u64)txq * numqueues) >> 32; } else if (likely(skb_rx_queue_recorded(skb))) { @@ -433,7 +452,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) --tun->numqueues; if (clean) { - rcu_assign_pointer(tfile->tun, NULL); + RCU_INIT_POINTER(tfile->tun, NULL); sock_put(&tfile->sk); } else tun_disable_queue(tun, tfile); @@ -479,13 +498,13 @@ static void tun_detach_all(struct net_device *dev) for (i = 0; i < n; i++) { tfile = rtnl_dereference(tun->tfiles[i]); BUG_ON(!tfile); - wake_up_all(&tfile->wq.wait); - rcu_assign_pointer(tfile->tun, NULL); + tfile->socket.sk->sk_data_ready(tfile->socket.sk); + RCU_INIT_POINTER(tfile->tun, NULL); --tun->numqueues; } list_for_each_entry(tfile, &tun->disabled, next) { - wake_up_all(&tfile->wq.wait); - rcu_assign_pointer(tfile->tun, NULL); + tfile->socket.sk->sk_data_ready(tfile->socket.sk); + RCU_INIT_POINTER(tfile->tun, NULL); } BUG_ON(tun->numqueues != 0); @@ -720,14 +739,32 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) struct tun_struct *tun = netdev_priv(dev); int txq = skb->queue_mapping; struct tun_file *tfile; + u32 numqueues = 0; rcu_read_lock(); tfile = rcu_dereference(tun->tfiles[txq]); + numqueues = ACCESS_ONCE(tun->numqueues); /* Drop packet if interface is not attached */ - if (txq >= tun->numqueues) + if (txq >= numqueues) goto drop; + if (numqueues == 1) { + /* Select queue was not called for the skbuff, so we extract the + * RPS hash and save it into the flow_table here. + */ + __u32 rxhash; + + rxhash = skb_get_hash(skb); + if (rxhash) { + struct tun_flow_entry *e; + e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)], + rxhash); + if (e) + tun_flow_save_rps_rxhash(e, rxhash); + } + } + tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len); BUG_ON(!tfile); @@ -745,8 +782,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* Limit the number of packets queued by dividing txq length with the * number of queues. */ - if (skb_queue_len(&tfile->socket.sk->sk_receive_queue) - >= dev->tx_queue_len / tun->numqueues) + if (skb_queue_len(&tfile->socket.sk->sk_receive_queue) * numqueues + >= dev->tx_queue_len) goto drop; if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) @@ -770,8 +807,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* Notify and wake up reader process */ if (tfile->flags & TUN_FASYNC) kill_fasync(&tfile->fasync, SIGIO, POLL_IN); - wake_up_interruptible_poll(&tfile->wq.wait, POLLIN | - POLLRDNORM | POLLRDBAND); + tfile->socket.sk->sk_data_ready(tfile->socket.sk); rcu_read_unlock(); return NETDEV_TX_OK; @@ -928,7 +964,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait) tun_debug(KERN_INFO, tun, "tun_chr_poll\n"); - poll_wait(file, &tfile->wq.wait, wait); + poll_wait(file, sk_sleep(sk), wait); if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; @@ -1146,7 +1182,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb_reset_network_header(skb); skb_probe_transport_header(skb, 0); - rxhash = skb_get_rxhash(skb); + rxhash = skb_get_hash(skb); netif_rx_ni(skb); tun->dev->stats.rx_packets++; @@ -1184,7 +1220,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, { struct tun_pi pi = { 0, skb->protocol }; ssize_t total = 0; - int vlan_offset = 0; + int vlan_offset = 0, copied; if (!(tun->flags & TUN_NO_PI)) { if ((len -= sizeof(pi)) < 0) @@ -1248,6 +1284,8 @@ static ssize_t tun_put_user(struct tun_struct *tun, total += tun->vnet_hdr_sz; } + copied = total; + total += skb->len; if (!vlan_tx_tag_present(skb)) { len = min_t(int, skb->len, len); } else { @@ -1262,24 +1300,24 @@ static ssize_t tun_put_user(struct tun_struct *tun, vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto); len = min_t(int, skb->len + VLAN_HLEN, len); + total += VLAN_HLEN; copy = min_t(int, vlan_offset, len); - ret = skb_copy_datagram_const_iovec(skb, 0, iv, total, copy); + ret = skb_copy_datagram_const_iovec(skb, 0, iv, copied, copy); len -= copy; - total += copy; + copied += copy; if (ret || !len) goto done; copy = min_t(int, sizeof(veth), len); - ret = memcpy_toiovecend(iv, (void *)&veth, total, copy); + ret = memcpy_toiovecend(iv, (void *)&veth, copied, copy); len -= copy; - total += copy; + copied += copy; if (ret || !len) goto done; } - skb_copy_datagram_const_iovec(skb, vlan_offset, iv, total, len); - total += len; + skb_copy_datagram_const_iovec(skb, vlan_offset, iv, copied, len); done: tun->dev->stats.tx_packets++; @@ -1291,47 +1329,26 @@ done: static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, const struct iovec *iv, ssize_t len, int noblock) { - DECLARE_WAITQUEUE(wait, current); struct sk_buff *skb; ssize_t ret = 0; + int peeked, err, off = 0; tun_debug(KERN_INFO, tun, "tun_do_read\n"); - if (unlikely(!noblock)) - add_wait_queue(&tfile->wq.wait, &wait); - while (len) { - if (unlikely(!noblock)) - current->state = TASK_INTERRUPTIBLE; + if (!len) + return ret; - /* Read frames from the queue */ - if (!(skb = skb_dequeue(&tfile->socket.sk->sk_receive_queue))) { - if (noblock) { - ret = -EAGAIN; - break; - } - if (signal_pending(current)) { - ret = -ERESTARTSYS; - break; - } - if (tun->dev->reg_state != NETREG_REGISTERED) { - ret = -EIO; - break; - } - - /* Nothing to read, let's sleep */ - schedule(); - continue; - } + if (tun->dev->reg_state != NETREG_REGISTERED) + return -EIO; + /* Read frames from queue */ + skb = __skb_recv_datagram(tfile->socket.sk, noblock ? MSG_DONTWAIT : 0, + &peeked, &off, &err); + if (skb) { ret = tun_put_user(tun, tfile, skb, iv, len); kfree_skb(skb); - break; - } - - if (unlikely(!noblock)) { - current->state = TASK_RUNNING; - remove_wait_queue(&tfile->wq.wait, &wait); - } + } else + ret = err; return ret; } @@ -1354,6 +1371,7 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, ret = tun_do_read(tun, tfile, iv, len, file->f_flags & O_NONBLOCK); + ret = min_t(ssize_t, ret, len); if (ret > 0) iocb->ki_pos = ret; out: @@ -1454,6 +1472,10 @@ static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, } ret = tun_do_read(tun, tfile, m->msg_iov, total_len, flags & MSG_DONTWAIT); + if (ret > total_len) { + m->msg_flags |= MSG_TRUNC; + ret = flags & MSG_TRUNC ? ret : total_len; + } out: tun_put(tun); return ret; @@ -1642,7 +1664,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; dev->features = dev->hw_features; - dev->vlan_features = dev->features; + dev->vlan_features = dev->features & + ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); INIT_LIST_HEAD(&tun->disabled); err = tun_attach(tun, file, false); @@ -2148,13 +2172,13 @@ static int tun_chr_open(struct inode *inode, struct file * file) &tun_proto); if (!tfile) return -ENOMEM; - rcu_assign_pointer(tfile->tun, NULL); + RCU_INIT_POINTER(tfile->tun, NULL); tfile->net = get_net(current->nsproxy->net_ns); tfile->flags = 0; tfile->ifindex = 0; - rcu_assign_pointer(tfile->socket.wq, &tfile->wq); init_waitqueue_head(&tfile->wq.wait); + RCU_INIT_POINTER(tfile->socket.wq, &tfile->wq); tfile->socket.file = file; tfile->socket.ops = &tun_socket_ops; @@ -2185,6 +2209,27 @@ static int tun_chr_close(struct inode *inode, struct file *file) return 0; } +#ifdef CONFIG_PROC_FS +static int tun_chr_show_fdinfo(struct seq_file *m, struct file *f) +{ + struct tun_struct *tun; + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + + rtnl_lock(); + tun = tun_get(f); + if (tun) + tun_get_iff(current->nsproxy->net_ns, tun, &ifr); + rtnl_unlock(); + + if (tun) + tun_put(tun); + + return seq_printf(m, "iff:\t%s\n", ifr.ifr_name); +} +#endif + static const struct file_operations tun_fops = { .owner = THIS_MODULE, .llseek = no_llseek, @@ -2199,7 +2244,10 @@ static const struct file_operations tun_fops = { #endif .open = tun_chr_open, .release = tun_chr_close, - .fasync = tun_chr_fasync + .fasync = tun_chr_fasync, +#ifdef CONFIG_PROC_FS + .show_fdinfo = tun_chr_show_fdinfo, +#endif }; static struct miscdevice tun_miscdev = { |
