diff options
Diffstat (limited to 'net/packet/af_packet.c')
| -rw-r--r-- | net/packet/af_packet.c | 143 |
1 files changed, 87 insertions, 56 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d711ecbbb9d..b85c67ccb79 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -88,7 +88,6 @@ #include <linux/virtio_net.h> #include <linux/errqueue.h> #include <linux/net_tstamp.h> -#include <linux/reciprocal_div.h> #include <linux/percpu.h> #ifdef CONFIG_INET #include <net/inet_common.h> @@ -244,40 +243,41 @@ static int packet_direct_xmit(struct sk_buff *skb) const struct net_device_ops *ops = dev->netdev_ops; netdev_features_t features; struct netdev_queue *txq; + int ret = NETDEV_TX_BUSY; u16 queue_map; - int ret; if (unlikely(!netif_running(dev) || - !netif_carrier_ok(dev))) { - kfree_skb(skb); - return NET_XMIT_DROP; - } + !netif_carrier_ok(dev))) + goto drop; features = netif_skb_features(skb); if (skb_needs_linearize(skb, features) && - __skb_linearize(skb)) { - kfree_skb(skb); - return NET_XMIT_DROP; - } + __skb_linearize(skb)) + goto drop; queue_map = skb_get_queue_mapping(skb); txq = netdev_get_tx_queue(dev, queue_map); - __netif_tx_lock_bh(txq); - if (unlikely(netif_xmit_frozen_or_stopped(txq))) { - ret = NETDEV_TX_BUSY; - kfree_skb(skb); - goto out; + local_bh_disable(); + + HARD_TX_LOCK(dev, txq, smp_processor_id()); + if (!netif_xmit_frozen_or_drv_stopped(txq)) { + ret = ops->ndo_start_xmit(skb, dev); + if (ret == NETDEV_TX_OK) + txq_trans_update(txq); } + HARD_TX_UNLOCK(dev, txq); - ret = ops->ndo_start_xmit(skb, dev); - if (likely(dev_xmit_complete(ret))) - txq_trans_update(txq); - else + local_bh_enable(); + + if (!dev_xmit_complete(ret)) kfree_skb(skb); -out: - __netif_tx_unlock_bh(txq); + return ret; +drop: + atomic_long_inc(&dev->tx_dropped); + kfree_skb(skb); + return NET_XMIT_DROP; } static struct net_device *packet_cached_dev_get(struct packet_sock *po) @@ -309,11 +309,27 @@ static bool packet_use_direct_xmit(const struct packet_sock *po) return po->xmit == packet_direct_xmit; } -static u16 packet_pick_tx_queue(struct net_device *dev) +static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) { return (u16) raw_smp_processor_id() % dev->real_num_tx_queues; } +static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) +{ + const struct net_device_ops *ops = dev->netdev_ops; + u16 queue_index; + + if (ops->ndo_select_queue) { + queue_index = ops->ndo_select_queue(dev, skb, NULL, + __packet_pick_tx_queue); + queue_index = netdev_cap_txqueue(dev, queue_index); + } else { + queue_index = __packet_pick_tx_queue(dev, skb); + } + + skb_set_queue_mapping(skb, queue_index); +} + /* register_prot_hook must be invoked with the po->bind_lock held, * or from a context in which asynchronous accesses to the packet * socket is not possible (packet_create()). @@ -1262,7 +1278,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - return reciprocal_divide(skb->rxhash, num); + return reciprocal_scale(skb_get_hash(skb), num); } static unsigned int fanout_demux_lb(struct packet_fanout *f, @@ -1289,7 +1305,7 @@ static unsigned int fanout_demux_rnd(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - return reciprocal_divide(prandom_u32(), num); + return prandom_u32_max(num); } static unsigned int fanout_demux_rollover(struct packet_fanout *f, @@ -1313,6 +1329,13 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f, return idx; } +static unsigned int fanout_demux_qm(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int num) +{ + return skb_get_queue_mapping(skb) % num; +} + static bool fanout_has_flag(struct packet_fanout *f, u16 flag) { return f->flags & (flag >> 8); @@ -1340,7 +1363,6 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, if (!skb) return 0; } - skb_get_hash(skb); idx = fanout_demux_hash(f, skb, num); break; case PACKET_FANOUT_LB: @@ -1352,6 +1374,9 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, case PACKET_FANOUT_RND: idx = fanout_demux_rnd(f, skb, num); break; + case PACKET_FANOUT_QM: + idx = fanout_demux_qm(f, skb, num); + break; case PACKET_FANOUT_ROLLOVER: idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num); break; @@ -1422,6 +1447,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) case PACKET_FANOUT_LB: case PACKET_FANOUT_CPU: case PACKET_FANOUT_RND: + case PACKET_FANOUT_QM: break; default: return -EINVAL; @@ -1822,7 +1848,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, skb->dropcount = atomic_read(&sk->sk_drops); __skb_queue_tail(&sk->sk_receive_queue, skb); spin_unlock(&sk->sk_receive_queue.lock); - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk); return 0; drop_n_acct: @@ -2009,25 +2035,26 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, sll->sll_ifindex = dev->ifindex; smp_mb(); + #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 - { + if (po->tp_version <= TPACKET_V2) { u8 *start, *end; - if (po->tp_version <= TPACKET_V2) { - end = (u8 *)PAGE_ALIGN((unsigned long)h.raw - + macoff + snaplen); - for (start = h.raw; start < end; start += PAGE_SIZE) - flush_dcache_page(pgv_to_page(start)); - } - smp_wmb(); + end = (u8 *) PAGE_ALIGN((unsigned long) h.raw + + macoff + snaplen); + + for (start = h.raw; start < end; start += PAGE_SIZE) + flush_dcache_page(pgv_to_page(start)); } + smp_wmb(); #endif + if (po->tp_version <= TPACKET_V2) __packet_set_status(po, h.raw, status); else prb_clear_blk_fill_status(&po->rx_ring); - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); drop_n_restore: if (skb_head != skb->data && skb_shared(skb)) { @@ -2042,7 +2069,7 @@ ring_is_full: po->stats.stats1.tp_drops++; spin_unlock(&sk->sk_receive_queue.lock); - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); kfree_skb(copy_skb); goto drop_n_restore; } @@ -2050,9 +2077,9 @@ ring_is_full: static void tpacket_destruct_skb(struct sk_buff *skb) { struct packet_sock *po = pkt_sk(skb->sk); - void *ph; if (likely(po->tx_ring.pg_vec)) { + void *ph; __u32 ts; ph = skb_shinfo(skb)->destructor_arg; @@ -2230,8 +2257,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if (unlikely(!(dev->flags & IFF_UP))) goto out_put; - reserve = dev->hard_header_len; - + reserve = dev->hard_header_len + VLAN_HLEN; size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); @@ -2258,8 +2284,19 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) goto out_status; tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, - addr, hlen); + addr, hlen); + if (tp_len > dev->mtu + dev->hard_header_len) { + struct ethhdr *ehdr; + /* Earlier code assumed this would be a VLAN pkt, + * double-check this now that we have the actual + * packet in hand. + */ + skb_reset_mac_header(skb); + ehdr = eth_hdr(skb); + if (ehdr->h_proto != htons(ETH_P_8021Q)) + tp_len = -EMSGSIZE; + } if (unlikely(tp_len < 0)) { if (po->tp_loss) { __packet_set_status(po, ph, @@ -2274,7 +2311,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } } - skb_set_queue_mapping(skb, packet_pick_tx_queue(dev)); + packet_pick_tx_queue(dev, skb); + skb->destructor = tpacket_destruct_skb; __packet_set_status(po, ph, TP_STATUS_SENDING); packet_inc_pending(&po->tx_ring); @@ -2488,7 +2526,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb_set_queue_mapping(skb, packet_pick_tx_queue(dev)); + + packet_pick_tx_queue(dev, skb); if (po->has_vnet_hdr) { if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { @@ -3649,34 +3688,26 @@ static void free_pg_vec(struct pgv *pg_vec, unsigned int order, static char *alloc_one_pg_vec_page(unsigned long order) { - char *buffer = NULL; + char *buffer; gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; buffer = (char *) __get_free_pages(gfp_flags, order); - if (buffer) return buffer; - /* - * __get_free_pages failed, fall back to vmalloc - */ + /* __get_free_pages failed, fall back to vmalloc */ buffer = vzalloc((1 << order) * PAGE_SIZE); - if (buffer) return buffer; - /* - * vmalloc failed, lets dig into swap here - */ + /* vmalloc failed, lets dig into swap here */ gfp_flags &= ~__GFP_NORETRY; - buffer = (char *)__get_free_pages(gfp_flags, order); + buffer = (char *) __get_free_pages(gfp_flags, order); if (buffer) return buffer; - /* - * complete and utter failure - */ + /* complete and utter failure */ return NULL; } @@ -3783,7 +3814,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, */ if (!tx_ring) init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring); - break; + break; default: break; } |
