aboutsummaryrefslogtreecommitdiff
path: root/net/packet/af_packet.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/packet/af_packet.c')
-rw-r--r--net/packet/af_packet.c143
1 files changed, 87 insertions, 56 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d711ecbbb9d..b85c67ccb79 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -88,7 +88,6 @@
#include <linux/virtio_net.h>
#include <linux/errqueue.h>
#include <linux/net_tstamp.h>
-#include <linux/reciprocal_div.h>
#include <linux/percpu.h>
#ifdef CONFIG_INET
#include <net/inet_common.h>
@@ -244,40 +243,41 @@ static int packet_direct_xmit(struct sk_buff *skb)
const struct net_device_ops *ops = dev->netdev_ops;
netdev_features_t features;
struct netdev_queue *txq;
+ int ret = NETDEV_TX_BUSY;
u16 queue_map;
- int ret;
if (unlikely(!netif_running(dev) ||
- !netif_carrier_ok(dev))) {
- kfree_skb(skb);
- return NET_XMIT_DROP;
- }
+ !netif_carrier_ok(dev)))
+ goto drop;
features = netif_skb_features(skb);
if (skb_needs_linearize(skb, features) &&
- __skb_linearize(skb)) {
- kfree_skb(skb);
- return NET_XMIT_DROP;
- }
+ __skb_linearize(skb))
+ goto drop;
queue_map = skb_get_queue_mapping(skb);
txq = netdev_get_tx_queue(dev, queue_map);
- __netif_tx_lock_bh(txq);
- if (unlikely(netif_xmit_frozen_or_stopped(txq))) {
- ret = NETDEV_TX_BUSY;
- kfree_skb(skb);
- goto out;
+ local_bh_disable();
+
+ HARD_TX_LOCK(dev, txq, smp_processor_id());
+ if (!netif_xmit_frozen_or_drv_stopped(txq)) {
+ ret = ops->ndo_start_xmit(skb, dev);
+ if (ret == NETDEV_TX_OK)
+ txq_trans_update(txq);
}
+ HARD_TX_UNLOCK(dev, txq);
- ret = ops->ndo_start_xmit(skb, dev);
- if (likely(dev_xmit_complete(ret)))
- txq_trans_update(txq);
- else
+ local_bh_enable();
+
+ if (!dev_xmit_complete(ret))
kfree_skb(skb);
-out:
- __netif_tx_unlock_bh(txq);
+
return ret;
+drop:
+ atomic_long_inc(&dev->tx_dropped);
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
}
static struct net_device *packet_cached_dev_get(struct packet_sock *po)
@@ -309,11 +309,27 @@ static bool packet_use_direct_xmit(const struct packet_sock *po)
return po->xmit == packet_direct_xmit;
}
-static u16 packet_pick_tx_queue(struct net_device *dev)
+static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
{
return (u16) raw_smp_processor_id() % dev->real_num_tx_queues;
}
+static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ u16 queue_index;
+
+ if (ops->ndo_select_queue) {
+ queue_index = ops->ndo_select_queue(dev, skb, NULL,
+ __packet_pick_tx_queue);
+ queue_index = netdev_cap_txqueue(dev, queue_index);
+ } else {
+ queue_index = __packet_pick_tx_queue(dev, skb);
+ }
+
+ skb_set_queue_mapping(skb, queue_index);
+}
+
/* register_prot_hook must be invoked with the po->bind_lock held,
* or from a context in which asynchronous accesses to the packet
* socket is not possible (packet_create()).
@@ -1262,7 +1278,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f,
struct sk_buff *skb,
unsigned int num)
{
- return reciprocal_divide(skb->rxhash, num);
+ return reciprocal_scale(skb_get_hash(skb), num);
}
static unsigned int fanout_demux_lb(struct packet_fanout *f,
@@ -1289,7 +1305,7 @@ static unsigned int fanout_demux_rnd(struct packet_fanout *f,
struct sk_buff *skb,
unsigned int num)
{
- return reciprocal_divide(prandom_u32(), num);
+ return prandom_u32_max(num);
}
static unsigned int fanout_demux_rollover(struct packet_fanout *f,
@@ -1313,6 +1329,13 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f,
return idx;
}
+static unsigned int fanout_demux_qm(struct packet_fanout *f,
+ struct sk_buff *skb,
+ unsigned int num)
+{
+ return skb_get_queue_mapping(skb) % num;
+}
+
static bool fanout_has_flag(struct packet_fanout *f, u16 flag)
{
return f->flags & (flag >> 8);
@@ -1340,7 +1363,6 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
if (!skb)
return 0;
}
- skb_get_hash(skb);
idx = fanout_demux_hash(f, skb, num);
break;
case PACKET_FANOUT_LB:
@@ -1352,6 +1374,9 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
case PACKET_FANOUT_RND:
idx = fanout_demux_rnd(f, skb, num);
break;
+ case PACKET_FANOUT_QM:
+ idx = fanout_demux_qm(f, skb, num);
+ break;
case PACKET_FANOUT_ROLLOVER:
idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num);
break;
@@ -1422,6 +1447,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
case PACKET_FANOUT_LB:
case PACKET_FANOUT_CPU:
case PACKET_FANOUT_RND:
+ case PACKET_FANOUT_QM:
break;
default:
return -EINVAL;
@@ -1822,7 +1848,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
skb->dropcount = atomic_read(&sk->sk_drops);
__skb_queue_tail(&sk->sk_receive_queue, skb);
spin_unlock(&sk->sk_receive_queue.lock);
- sk->sk_data_ready(sk, skb->len);
+ sk->sk_data_ready(sk);
return 0;
drop_n_acct:
@@ -2009,25 +2035,26 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
sll->sll_ifindex = dev->ifindex;
smp_mb();
+
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
- {
+ if (po->tp_version <= TPACKET_V2) {
u8 *start, *end;
- if (po->tp_version <= TPACKET_V2) {
- end = (u8 *)PAGE_ALIGN((unsigned long)h.raw
- + macoff + snaplen);
- for (start = h.raw; start < end; start += PAGE_SIZE)
- flush_dcache_page(pgv_to_page(start));
- }
- smp_wmb();
+ end = (u8 *) PAGE_ALIGN((unsigned long) h.raw +
+ macoff + snaplen);
+
+ for (start = h.raw; start < end; start += PAGE_SIZE)
+ flush_dcache_page(pgv_to_page(start));
}
+ smp_wmb();
#endif
+
if (po->tp_version <= TPACKET_V2)
__packet_set_status(po, h.raw, status);
else
prb_clear_blk_fill_status(&po->rx_ring);
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk);
drop_n_restore:
if (skb_head != skb->data && skb_shared(skb)) {
@@ -2042,7 +2069,7 @@ ring_is_full:
po->stats.stats1.tp_drops++;
spin_unlock(&sk->sk_receive_queue.lock);
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk);
kfree_skb(copy_skb);
goto drop_n_restore;
}
@@ -2050,9 +2077,9 @@ ring_is_full:
static void tpacket_destruct_skb(struct sk_buff *skb)
{
struct packet_sock *po = pkt_sk(skb->sk);
- void *ph;
if (likely(po->tx_ring.pg_vec)) {
+ void *ph;
__u32 ts;
ph = skb_shinfo(skb)->destructor_arg;
@@ -2230,8 +2257,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
if (unlikely(!(dev->flags & IFF_UP)))
goto out_put;
- reserve = dev->hard_header_len;
-
+ reserve = dev->hard_header_len + VLAN_HLEN;
size_max = po->tx_ring.frame_size
- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
@@ -2258,8 +2284,19 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
goto out_status;
tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
- addr, hlen);
+ addr, hlen);
+ if (tp_len > dev->mtu + dev->hard_header_len) {
+ struct ethhdr *ehdr;
+ /* Earlier code assumed this would be a VLAN pkt,
+ * double-check this now that we have the actual
+ * packet in hand.
+ */
+ skb_reset_mac_header(skb);
+ ehdr = eth_hdr(skb);
+ if (ehdr->h_proto != htons(ETH_P_8021Q))
+ tp_len = -EMSGSIZE;
+ }
if (unlikely(tp_len < 0)) {
if (po->tp_loss) {
__packet_set_status(po, ph,
@@ -2274,7 +2311,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
}
}
- skb_set_queue_mapping(skb, packet_pick_tx_queue(dev));
+ packet_pick_tx_queue(dev, skb);
+
skb->destructor = tpacket_destruct_skb;
__packet_set_status(po, ph, TP_STATUS_SENDING);
packet_inc_pending(&po->tx_ring);
@@ -2488,7 +2526,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
skb->dev = dev;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
- skb_set_queue_mapping(skb, packet_pick_tx_queue(dev));
+
+ packet_pick_tx_queue(dev, skb);
if (po->has_vnet_hdr) {
if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
@@ -3649,34 +3688,26 @@ static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
static char *alloc_one_pg_vec_page(unsigned long order)
{
- char *buffer = NULL;
+ char *buffer;
gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
__GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
buffer = (char *) __get_free_pages(gfp_flags, order);
-
if (buffer)
return buffer;
- /*
- * __get_free_pages failed, fall back to vmalloc
- */
+ /* __get_free_pages failed, fall back to vmalloc */
buffer = vzalloc((1 << order) * PAGE_SIZE);
-
if (buffer)
return buffer;
- /*
- * vmalloc failed, lets dig into swap here
- */
+ /* vmalloc failed, lets dig into swap here */
gfp_flags &= ~__GFP_NORETRY;
- buffer = (char *)__get_free_pages(gfp_flags, order);
+ buffer = (char *) __get_free_pages(gfp_flags, order);
if (buffer)
return buffer;
- /*
- * complete and utter failure
- */
+ /* complete and utter failure */
return NULL;
}
@@ -3783,7 +3814,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
*/
if (!tx_ring)
init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring);
- break;
+ break;
default:
break;
}