diff options
Diffstat (limited to 'net/packet/af_packet.c')
| -rw-r--r-- | net/packet/af_packet.c | 1181 |
1 files changed, 679 insertions, 502 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2dbb32b988c..b85c67ccb79 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -73,7 +73,6 @@ #include <net/sock.h> #include <linux/errno.h> #include <linux/timer.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/ioctls.h> #include <asm/page.h> @@ -89,11 +88,13 @@ #include <linux/virtio_net.h> #include <linux/errqueue.h> #include <linux/net_tstamp.h> - +#include <linux/percpu.h> #ifdef CONFIG_INET #include <net/inet_common.h> #endif +#include "internal.h" + /* Assumptions: - if device has no dev->hard_header routine, it adds and removes ll header @@ -147,14 +148,6 @@ dev->hard_header == NULL (ll header is added by device, we cannot control it) /* Private packet socket structures. */ -struct packet_mclist { - struct packet_mclist *next; - int ifindex; - int count; - unsigned short type; - unsigned short alen; - unsigned char addr[MAX_ADDR_LEN]; -}; /* identical to struct packet_mreq except it has * a longer address field. */ @@ -165,10 +158,16 @@ struct packet_mreq_max { unsigned char mr_address[MAX_ADDR_LEN]; }; +union tpacket_uhdr { + struct tpacket_hdr *h1; + struct tpacket2_hdr *h2; + struct tpacket3_hdr *h3; + void *raw; +}; + static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, int closing, int tx_ring); - #define V3_ALIGNMENT (8) #define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT)) @@ -176,63 +175,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, #define BLK_PLUS_PRIV(sz_of_priv) \ (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT)) -/* kbdq - kernel block descriptor queue */ -struct tpacket_kbdq_core { - struct pgv *pkbdq; - unsigned int feature_req_word; - unsigned int hdrlen; - unsigned char reset_pending_on_curr_blk; - unsigned char delete_blk_timer; - unsigned short kactive_blk_num; - unsigned short blk_sizeof_priv; - - /* last_kactive_blk_num: - * trick to see if user-space has caught up - * in order to avoid refreshing timer when every single pkt arrives. - */ - unsigned short last_kactive_blk_num; - - char *pkblk_start; - char *pkblk_end; - int kblk_size; - unsigned int knum_blocks; - uint64_t knxt_seq_num; - char *prev; - char *nxt_offset; - struct sk_buff *skb; - - atomic_t blk_fill_in_prog; - - /* Default is set to 8ms */ -#define DEFAULT_PRB_RETIRE_TOV (8) - - unsigned short retire_blk_tov; - unsigned short version; - unsigned long tov_in_jiffies; - - /* timer to retire an outstanding block */ - struct timer_list retire_blk_timer; -}; - #define PGV_FROM_VMALLOC 1 -struct pgv { - char *buffer; -}; - -struct packet_ring_buffer { - struct pgv *pg_vec; - unsigned int head; - unsigned int frames_per_block; - unsigned int frame_size; - unsigned int frame_max; - - unsigned int pg_vec_order; - unsigned int pg_vec_pages; - unsigned int pg_vec_len; - - struct tpacket_kbdq_core prb_bdqc; - atomic_t pending; -}; #define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status) #define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts) @@ -244,6 +187,8 @@ struct packet_ring_buffer { struct packet_sock; static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); +static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev); static void *packet_previous_frame(struct packet_sock *po, struct packet_ring_buffer *rb, @@ -270,52 +215,6 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *, struct tpacket3_hdr *); static void packet_flush_mclist(struct sock *sk); -struct packet_fanout; -struct packet_sock { - /* struct sock has to be the first member of packet_sock */ - struct sock sk; - struct packet_fanout *fanout; - struct tpacket_stats stats; - union tpacket_stats_u stats_u; - struct packet_ring_buffer rx_ring; - struct packet_ring_buffer tx_ring; - int copy_thresh; - spinlock_t bind_lock; - struct mutex pg_vec_lock; - unsigned int running:1, /* prot_hook is attached*/ - auxdata:1, - origdev:1, - has_vnet_hdr:1; - int ifindex; /* bound device */ - __be16 num; - struct packet_mclist *mclist; - atomic_t mapped; - enum tpacket_versions tp_version; - unsigned int tp_hdrlen; - unsigned int tp_reserve; - unsigned int tp_loss:1; - unsigned int tp_tstamp; - struct packet_type prot_hook ____cacheline_aligned_in_smp; -}; - -#define PACKET_FANOUT_MAX 256 - -struct packet_fanout { -#ifdef CONFIG_NET_NS - struct net *net; -#endif - unsigned int num_members; - u16 id; - u8 type; - u8 defrag; - atomic_t rr_cur; - struct list_head list; - struct sock *arr[PACKET_FANOUT_MAX]; - spinlock_t lock; - atomic_t sk_ref; - struct packet_type prot_hook ____cacheline_aligned_in_smp; -}; - struct packet_skb_cb { unsigned int origlen; union { @@ -335,13 +234,101 @@ struct packet_skb_cb { (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \ ((x)->kactive_blk_num+1) : 0) -static struct packet_sock *pkt_sk(struct sock *sk) +static void __fanout_unlink(struct sock *sk, struct packet_sock *po); +static void __fanout_link(struct sock *sk, struct packet_sock *po); + +static int packet_direct_xmit(struct sk_buff *skb) { - return (struct packet_sock *)sk; + struct net_device *dev = skb->dev; + const struct net_device_ops *ops = dev->netdev_ops; + netdev_features_t features; + struct netdev_queue *txq; + int ret = NETDEV_TX_BUSY; + u16 queue_map; + + if (unlikely(!netif_running(dev) || + !netif_carrier_ok(dev))) + goto drop; + + features = netif_skb_features(skb); + if (skb_needs_linearize(skb, features) && + __skb_linearize(skb)) + goto drop; + + queue_map = skb_get_queue_mapping(skb); + txq = netdev_get_tx_queue(dev, queue_map); + + local_bh_disable(); + + HARD_TX_LOCK(dev, txq, smp_processor_id()); + if (!netif_xmit_frozen_or_drv_stopped(txq)) { + ret = ops->ndo_start_xmit(skb, dev); + if (ret == NETDEV_TX_OK) + txq_trans_update(txq); + } + HARD_TX_UNLOCK(dev, txq); + + local_bh_enable(); + + if (!dev_xmit_complete(ret)) + kfree_skb(skb); + + return ret; +drop: + atomic_long_inc(&dev->tx_dropped); + kfree_skb(skb); + return NET_XMIT_DROP; } -static void __fanout_unlink(struct sock *sk, struct packet_sock *po); -static void __fanout_link(struct sock *sk, struct packet_sock *po); +static struct net_device *packet_cached_dev_get(struct packet_sock *po) +{ + struct net_device *dev; + + rcu_read_lock(); + dev = rcu_dereference(po->cached_dev); + if (likely(dev)) + dev_hold(dev); + rcu_read_unlock(); + + return dev; +} + +static void packet_cached_dev_assign(struct packet_sock *po, + struct net_device *dev) +{ + rcu_assign_pointer(po->cached_dev, dev); +} + +static void packet_cached_dev_reset(struct packet_sock *po) +{ + RCU_INIT_POINTER(po->cached_dev, NULL); +} + +static bool packet_use_direct_xmit(const struct packet_sock *po) +{ + return po->xmit == packet_direct_xmit; +} + +static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) +{ + return (u16) raw_smp_processor_id() % dev->real_num_tx_queues; +} + +static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) +{ + const struct net_device_ops *ops = dev->netdev_ops; + u16 queue_index; + + if (ops->ndo_select_queue) { + queue_index = ops->ndo_select_queue(dev, skb, NULL, + __packet_pick_tx_queue); + queue_index = netdev_cap_txqueue(dev, queue_index); + } else { + queue_index = __packet_pick_tx_queue(dev, skb); + } + + skb_set_queue_mapping(skb, queue_index); +} /* register_prot_hook must be invoked with the po->bind_lock held, * or from a context in which asynchronous accesses to the packet @@ -350,11 +337,13 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static void register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); + if (!po->running) { if (po->fanout) __fanout_link(sk, po); else dev_add_pack(&po->prot_hook); + sock_hold(sk); po->running = 1; } @@ -372,10 +361,12 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) struct packet_sock *po = pkt_sk(sk); po->running = 0; + if (po->fanout) __fanout_unlink(sk, po); else __dev_remove_pack(&po->prot_hook); + __sock_put(sk); if (sync) { @@ -402,11 +393,7 @@ static inline __pure struct page *pgv_to_page(void *addr) static void __packet_set_status(struct packet_sock *po, void *frame, int status) { - union { - struct tpacket_hdr *h1; - struct tpacket2_hdr *h2; - void *raw; - } h; + union tpacket_uhdr h; h.raw = frame; switch (po->tp_version) { @@ -429,11 +416,7 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status) static int __packet_get_status(struct packet_sock *po, void *frame) { - union { - struct tpacket_hdr *h1; - struct tpacket2_hdr *h2; - void *raw; - } h; + union tpacket_uhdr h; smp_rmb(); @@ -453,17 +436,66 @@ static int __packet_get_status(struct packet_sock *po, void *frame) } } +static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts, + unsigned int flags) +{ + struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); + + if (shhwtstamps) { + if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) && + ktime_to_timespec_cond(shhwtstamps->syststamp, ts)) + return TP_STATUS_TS_SYS_HARDWARE; + if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) && + ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts)) + return TP_STATUS_TS_RAW_HARDWARE; + } + + if (ktime_to_timespec_cond(skb->tstamp, ts)) + return TP_STATUS_TS_SOFTWARE; + + return 0; +} + +static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame, + struct sk_buff *skb) +{ + union tpacket_uhdr h; + struct timespec ts; + __u32 ts_status; + + if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) + return 0; + + h.raw = frame; + switch (po->tp_version) { + case TPACKET_V1: + h.h1->tp_sec = ts.tv_sec; + h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC; + break; + case TPACKET_V2: + h.h2->tp_sec = ts.tv_sec; + h.h2->tp_nsec = ts.tv_nsec; + break; + case TPACKET_V3: + default: + WARN(1, "TPACKET version not supported.\n"); + BUG(); + } + + /* one flush is safe, as both fields always lie on the same cacheline */ + flush_dcache_page(pgv_to_page(&h.h1->tp_sec)); + smp_wmb(); + + return ts_status; +} + static void *packet_lookup_frame(struct packet_sock *po, struct packet_ring_buffer *rb, unsigned int position, int status) { unsigned int pg_vec_pos, frame_offset; - union { - struct tpacket_hdr *h1; - struct tpacket2_hdr *h2; - void *raw; - } h; + union tpacket_uhdr h; pg_vec_pos = position / rb->frames_per_block; frame_offset = position % rb->frames_per_block; @@ -495,11 +527,12 @@ static void prb_shutdown_retire_blk_timer(struct packet_sock *po, { struct tpacket_kbdq_core *pkc; - pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; + pkc = tx_ring ? GET_PBDQC_FROM_RB(&po->tx_ring) : + GET_PBDQC_FROM_RB(&po->rx_ring); - spin_lock(&rb_queue->lock); + spin_lock_bh(&rb_queue->lock); pkc->delete_blk_timer = 1; - spin_unlock(&rb_queue->lock); + spin_unlock_bh(&rb_queue->lock); prb_del_retire_blk_timer(pkc); } @@ -521,7 +554,8 @@ static void prb_setup_retire_blk_timer(struct packet_sock *po, int tx_ring) if (tx_ring) BUG(); - pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; + pkc = tx_ring ? GET_PBDQC_FROM_RB(&po->tx_ring) : + GET_PBDQC_FROM_RB(&po->rx_ring); prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired); } @@ -532,6 +566,7 @@ static int prb_calc_retire_blk_tmo(struct packet_sock *po, unsigned int mbits = 0, msec = 0, div = 0, tmo = 0; struct ethtool_cmd ecmd; int err; + u32 speed; rtnl_lock(); dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex); @@ -540,25 +575,18 @@ static int prb_calc_retire_blk_tmo(struct packet_sock *po, return DEFAULT_PRB_RETIRE_TOV; } err = __ethtool_get_settings(dev, &ecmd); + speed = ethtool_cmd_speed(&ecmd); rtnl_unlock(); if (!err) { - switch (ecmd.speed) { - case SPEED_10000: - msec = 1; - div = 10000/1000; - break; - case SPEED_1000: - msec = 1; - div = 1000/1000; - break; /* * If the link speed is so slow you don't really * need to worry about perf anyways */ - case SPEED_100: - case SPEED_10: - default: + if (speed < SPEED_1000 || speed == SPEED_UNKNOWN) { return DEFAULT_PRB_RETIRE_TOV; + } else { + msec = 1; + div = speed / 1000; } } @@ -585,7 +613,7 @@ static void init_prb_bdqc(struct packet_sock *po, struct pgv *pg_vec, union tpacket_req_u *req_u, int tx_ring) { - struct tpacket_kbdq_core *p1 = &rb->prb_bdqc; + struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb); struct tpacket_block_desc *pbd; memset(p1, 0x0, sizeof(*p1)); @@ -593,13 +621,13 @@ static void init_prb_bdqc(struct packet_sock *po, p1->knxt_seq_num = 1; p1->pkbdq = pg_vec; pbd = (struct tpacket_block_desc *)pg_vec[0].buffer; - p1->pkblk_start = (char *)pg_vec[0].buffer; + p1->pkblk_start = pg_vec[0].buffer; p1->kblk_size = req_u->req3.tp_block_size; p1->knum_blocks = req_u->req3.tp_block_nr; p1->hdrlen = po->tp_hdrlen; p1->version = po->tp_version; p1->last_kactive_blk_num = 0; - po->stats_u.stats3.tp_freeze_q_cnt = 0; + po->stats.stats3.tp_freeze_q_cnt = 0; if (req_u->req3.tp_retire_blk_tov) p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov; else @@ -649,7 +677,7 @@ static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc) static void prb_retire_rx_blk_timer_expired(unsigned long data) { struct packet_sock *po = (struct packet_sock *)data; - struct tpacket_kbdq_core *pkc = &po->rx_ring.prb_bdqc; + struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring); unsigned int frozen; struct tpacket_block_desc *pbd; @@ -767,7 +795,7 @@ static void prb_close_block(struct tpacket_kbdq_core *pkc1, struct tpacket3_hdr *last_pkt; struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; - if (po->stats.tp_drops) + if (po->stats.stats3.tp_drops) status |= TP_STATUS_LOSING; last_pkt = (struct tpacket3_hdr *)pkc1->prev; @@ -813,37 +841,33 @@ static void prb_open_block(struct tpacket_kbdq_core *pkc1, smp_rmb(); - if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd1))) { + /* We could have just memset this but we will lose the + * flexibility of making the priv area sticky + */ - /* We could have just memset this but we will lose the - * flexibility of making the priv area sticky - */ - BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++; - BLOCK_NUM_PKTS(pbd1) = 0; - BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); - getnstimeofday(&ts); - h1->ts_first_pkt.ts_sec = ts.tv_sec; - h1->ts_first_pkt.ts_nsec = ts.tv_nsec; - pkc1->pkblk_start = (char *)pbd1; - pkc1->nxt_offset = (char *)(pkc1->pkblk_start + - BLK_PLUS_PRIV(pkc1->blk_sizeof_priv)); - BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); - BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN; - pbd1->version = pkc1->version; - pkc1->prev = pkc1->nxt_offset; - pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size; - prb_thaw_queue(pkc1); - _prb_refresh_rx_retire_blk_timer(pkc1); - - smp_wmb(); + BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++; + BLOCK_NUM_PKTS(pbd1) = 0; + BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); - return; - } + getnstimeofday(&ts); + + h1->ts_first_pkt.ts_sec = ts.tv_sec; + h1->ts_first_pkt.ts_nsec = ts.tv_nsec; + + pkc1->pkblk_start = (char *)pbd1; + pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); + + BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); + BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN; + + pbd1->version = pkc1->version; + pkc1->prev = pkc1->nxt_offset; + pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size; - WARN(1, "ERROR block:%p is NOT FREE status:%d kactive_blk_num:%d\n", - pbd1, BLOCK_STATUS(pbd1), pkc1->kactive_blk_num); - dump_stack(); - BUG(); + prb_thaw_queue(pkc1); + _prb_refresh_rx_retire_blk_timer(pkc1); + + smp_wmb(); } /* @@ -873,7 +897,7 @@ static void prb_freeze_queue(struct tpacket_kbdq_core *pkc, struct packet_sock *po) { pkc->reset_pending_on_curr_blk = 1; - po->stats_u.stats3.tp_freeze_q_cnt++; + po->stats.stats3.tp_freeze_q_cnt++; } #define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT)) @@ -934,10 +958,6 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, prb_close_block(pkc, pbd, po, status); return; } - - WARN(1, "ERROR-pbd[%d]:%p\n", pkc->kactive_blk_num, pbd); - dump_stack(); - BUG(); } static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc, @@ -960,7 +980,7 @@ static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb) static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { - ppd->hv1.tp_rxhash = skb_get_rxhash(pkc->skb); + ppd->hv1.tp_rxhash = skb_get_hash(pkc->skb); } static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc, @@ -974,15 +994,19 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, { if (vlan_tx_tag_present(pkc->skb)) { ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb); - ppd->tp_status = TP_STATUS_VLAN_VALID; + ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto); + ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { - ppd->hv1.tp_vlan_tci = ppd->tp_status = 0; + ppd->hv1.tp_vlan_tci = 0; + ppd->hv1.tp_vlan_tpid = 0; + ppd->tp_status = TP_STATUS_AVAILABLE; } } static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { + ppd->hv1.tp_padding = 0; prb_fill_vlan_info(pkc, ppd); if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH) @@ -1019,7 +1043,7 @@ static void *__packet_lookup_frame_in_block(struct packet_sock *po, struct tpacket_block_desc *pbd; char *curr, *end; - pkc = GET_PBDQC_FROM_RB(((struct packet_ring_buffer *)&po->rx_ring)); + pkc = GET_PBDQC_FROM_RB(&po->rx_ring); pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); /* Queue is frozen when user space is lagging behind */ @@ -1045,7 +1069,7 @@ static void *__packet_lookup_frame_in_block(struct packet_sock *po, smp_mb(); curr = pkc->nxt_offset; pkc->skb = skb; - end = (char *) ((char *)pbd + pkc->kblk_size); + end = (char *)pbd + pkc->kblk_size; /* first try the current block */ if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) { @@ -1087,17 +1111,17 @@ static void *packet_current_rx_frame(struct packet_sock *po, default: WARN(1, "TPACKET version not supported\n"); BUG(); - return 0; + return NULL; } } static void *prb_lookup_block(struct packet_sock *po, struct packet_ring_buffer *rb, - unsigned int previous, + unsigned int idx, int status) { struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); - struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, previous); + struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx); if (status != BLOCK_STATUS(pbd)) return NULL; @@ -1161,6 +1185,70 @@ static void packet_increment_head(struct packet_ring_buffer *buff) buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; } +static void packet_inc_pending(struct packet_ring_buffer *rb) +{ + this_cpu_inc(*rb->pending_refcnt); +} + +static void packet_dec_pending(struct packet_ring_buffer *rb) +{ + this_cpu_dec(*rb->pending_refcnt); +} + +static unsigned int packet_read_pending(const struct packet_ring_buffer *rb) +{ + unsigned int refcnt = 0; + int cpu; + + /* We don't use pending refcount in rx_ring. */ + if (rb->pending_refcnt == NULL) + return 0; + + for_each_possible_cpu(cpu) + refcnt += *per_cpu_ptr(rb->pending_refcnt, cpu); + + return refcnt; +} + +static int packet_alloc_pending(struct packet_sock *po) +{ + po->rx_ring.pending_refcnt = NULL; + + po->tx_ring.pending_refcnt = alloc_percpu(unsigned int); + if (unlikely(po->tx_ring.pending_refcnt == NULL)) + return -ENOBUFS; + + return 0; +} + +static void packet_free_pending(struct packet_sock *po) +{ + free_percpu(po->tx_ring.pending_refcnt); +} + +static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) +{ + struct sock *sk = &po->sk; + bool has_room; + + if (po->prot_hook.func != tpacket_rcv) + return (atomic_read(&sk->sk_rmem_alloc) + skb->truesize) + <= sk->sk_rcvbuf; + + spin_lock(&sk->sk_receive_queue.lock); + if (po->tp_version == TPACKET_V3) + has_room = prb_lookup_block(po, &po->rx_ring, + po->rx_ring.prb_bdqc.kactive_blk_num, + TP_STATUS_KERNEL); + else + has_room = packet_lookup_frame(po, &po->rx_ring, + po->rx_ring.head, + TP_STATUS_KERNEL); + spin_unlock(&sk->sk_receive_queue.lock); + + return has_room; +} + static void packet_sock_destruct(struct sock *sk) { skb_queue_purge(&sk->sk_error_queue); @@ -1186,16 +1274,16 @@ static int fanout_rr_next(struct packet_fanout *f, unsigned int num) return x; } -static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) +static unsigned int fanout_demux_hash(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int num) { - u32 idx, hash = skb->rxhash; - - idx = ((u64)hash * num) >> 32; - - return f->arr[idx]; + return reciprocal_scale(skb_get_hash(skb), num); } -static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) +static unsigned int fanout_demux_lb(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int num) { int cur, old; @@ -1203,14 +1291,54 @@ static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb while ((old = atomic_cmpxchg(&f->rr_cur, cur, fanout_rr_next(f, num))) != cur) cur = old; - return f->arr[cur]; + return cur; +} + +static unsigned int fanout_demux_cpu(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int num) +{ + return smp_processor_id() % num; +} + +static unsigned int fanout_demux_rnd(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int num) +{ + return prandom_u32_max(num); +} + +static unsigned int fanout_demux_rollover(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int idx, unsigned int skip, + unsigned int num) +{ + unsigned int i, j; + + i = j = min_t(int, f->next[idx], num - 1); + do { + if (i != skip && packet_rcv_has_room(pkt_sk(f->arr[i]), skb)) { + if (i != j) + f->next[idx] = i; + return i; + } + if (++i == num) + i = 0; + } while (i != j); + + return idx; } -static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) +static unsigned int fanout_demux_qm(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int num) { - unsigned int cpu = smp_processor_id(); + return skb_get_queue_mapping(skb) % num; +} - return f->arr[cpu % num]; +static bool fanout_has_flag(struct packet_fanout *f, u16 flag) +{ + return f->flags & (flag >> 8); } static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, @@ -1219,7 +1347,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, struct packet_fanout *f = pt->af_packet_priv; unsigned int num = f->num_members; struct packet_sock *po; - struct sock *sk; + unsigned int idx; if (!net_eq(dev_net(dev), read_pnet(&f->net)) || !num) { @@ -1230,28 +1358,42 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, switch (f->type) { case PACKET_FANOUT_HASH: default: - if (f->defrag) { + if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) { skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET); if (!skb) return 0; } - skb_get_rxhash(skb); - sk = fanout_demux_hash(f, skb, num); + idx = fanout_demux_hash(f, skb, num); break; case PACKET_FANOUT_LB: - sk = fanout_demux_lb(f, skb, num); + idx = fanout_demux_lb(f, skb, num); break; case PACKET_FANOUT_CPU: - sk = fanout_demux_cpu(f, skb, num); + idx = fanout_demux_cpu(f, skb, num); + break; + case PACKET_FANOUT_RND: + idx = fanout_demux_rnd(f, skb, num); + break; + case PACKET_FANOUT_QM: + idx = fanout_demux_qm(f, skb, num); + break; + case PACKET_FANOUT_ROLLOVER: + idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num); break; } - po = pkt_sk(sk); + po = pkt_sk(f->arr[idx]); + if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER) && + unlikely(!packet_rcv_has_room(po, skb))) { + idx = fanout_demux_rollover(f, skb, idx, idx, num); + po = pkt_sk(f->arr[idx]); + } return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); } -static DEFINE_MUTEX(fanout_mutex); +DEFINE_MUTEX(fanout_mutex); +EXPORT_SYMBOL_GPL(fanout_mutex); static LIST_HEAD(fanout_list); static void __fanout_link(struct sock *sk, struct packet_sock *po) @@ -1281,18 +1423,31 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po) spin_unlock(&f->lock); } +static bool match_fanout_group(struct packet_type *ptype, struct sock *sk) +{ + if (ptype->af_packet_priv == (void *)((struct packet_sock *)sk)->fanout) + return true; + + return false; +} + static int fanout_add(struct sock *sk, u16 id, u16 type_flags) { struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f, *match; u8 type = type_flags & 0xff; - u8 defrag = (type_flags & PACKET_FANOUT_FLAG_DEFRAG) ? 1 : 0; + u8 flags = type_flags >> 8; int err; switch (type) { + case PACKET_FANOUT_ROLLOVER: + if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER) + return -EINVAL; case PACKET_FANOUT_HASH: case PACKET_FANOUT_LB: case PACKET_FANOUT_CPU: + case PACKET_FANOUT_RND: + case PACKET_FANOUT_QM: break; default: return -EINVAL; @@ -1314,7 +1469,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) } } err = -EINVAL; - if (match && match->defrag != defrag) + if (match && match->flags != flags) goto out; if (!match) { err = -ENOMEM; @@ -1324,7 +1479,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) write_pnet(&match->net, sock_net(sk)); match->id = id; match->type = type; - match->defrag = defrag; + match->flags = flags; atomic_set(&match->rr_cur, 0); INIT_LIST_HEAD(&match->list); spin_lock_init(&match->lock); @@ -1333,6 +1488,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) match->prot_hook.dev = po->prot_hook.dev; match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; + match->prot_hook.id_match = match_fanout_group; dev_add_pack(&match->prot_hook); list_add(&match->list, &fanout_list); } @@ -1363,9 +1519,9 @@ static void fanout_release(struct sock *sk) if (!f) return; + mutex_lock(&fanout_mutex); po->fanout = NULL; - mutex_lock(&fanout_mutex); if (atomic_dec_and_test(&f->sk_ref)) { list_del(&f->list); dev_remove_pack(&f->prot_hook); @@ -1454,11 +1610,12 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; - struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name); struct sk_buff *skb = NULL; struct net_device *dev; __be16 proto = 0; int err; + int extra_len = 0; /* * Get and verify the address. @@ -1476,7 +1633,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, * Find the device first to size check it */ - saddr->spkt_device[13] = 0; + saddr->spkt_device[sizeof(saddr->spkt_device) - 1] = 0; retry: rcu_read_lock(); dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device); @@ -1493,8 +1650,16 @@ retry: * raw protocol and you must do your own fragmentation at this level. */ + if (unlikely(sock_flag(sk, SOCK_NOFCS))) { + if (!netif_supports_nofcs(dev)) { + err = -EPROTONOSUPPORT; + goto out_unlock; + } + extra_len = 4; /* We're doing our own CRC */ + } + err = -EMSGSIZE; - if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN) + if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len) goto out_unlock; if (!skb) { @@ -1526,7 +1691,7 @@ retry: goto retry; } - if (len > (dev->mtu + dev->hard_header_len)) { + if (len > (dev->mtu + dev->hard_header_len + extra_len)) { /* Earlier code assumed this would be a VLAN pkt, * double-check this now that we have the actual * packet in hand. @@ -1544,9 +1709,13 @@ retry: skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (err < 0) - goto out_unlock; + + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); + + if (unlikely(extra_len == 4)) + skb->no_fcs = 1; + + skb_probe_transport_header(skb, 0); dev_queue_xmit(skb); rcu_read_unlock(); @@ -1643,7 +1812,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, skb->data = skb_head; skb->len = skb_len; } - kfree_skb(skb); + consume_skb(skb); skb = nskb; } @@ -1675,16 +1844,16 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, nf_reset(skb); spin_lock(&sk->sk_receive_queue.lock); - po->stats.tp_packets++; + po->stats.stats1.tp_packets++; skb->dropcount = atomic_read(&sk->sk_drops); __skb_queue_tail(&sk->sk_receive_queue, skb); spin_unlock(&sk->sk_receive_queue.lock); - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk); return 0; drop_n_acct: spin_lock(&sk->sk_receive_queue.lock); - po->stats.tp_drops++; + po->stats.stats1.tp_drops++; atomic_inc(&sk->sk_drops); spin_unlock(&sk->sk_receive_queue.lock); @@ -1704,21 +1873,22 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct sock *sk; struct packet_sock *po; struct sockaddr_ll *sll; - union { - struct tpacket_hdr *h1; - struct tpacket2_hdr *h2; - struct tpacket3_hdr *h3; - void *raw; - } h; + union tpacket_uhdr h; u8 *skb_head = skb->data; int skb_len = skb->len; unsigned int snaplen, res; unsigned long status = TP_STATUS_USER; unsigned short macoff, netoff, hdrlen; struct sk_buff *copy_skb = NULL; - struct timeval tv; struct timespec ts; - struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); + __u32 ts_status; + + /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT. + * We may add members to them until current aligned size without forcing + * userspace to call getsockopt(..., PACKET_HDRLEN, ...). + */ + BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h2)) != 32); + BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h3)) != 48); if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -1753,7 +1923,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 + po->tp_reserve; } else { - unsigned maclen = skb_network_offset(skb); + unsigned int maclen = skb_network_offset(skb); netoff = TPACKET_ALIGN(po->tp_hdrlen + (maclen < 16 ? 16 : maclen)) + po->tp_reserve; @@ -1790,10 +1960,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, * Anyways, moving it for V1/V2 only as V3 doesn't need this * at packet level. */ - if (po->stats.tp_drops) + if (po->stats.stats1.tp_drops) status |= TP_STATUS_LOSING; } - po->stats.tp_packets++; + po->stats.stats1.tp_packets++; if (copy_skb) { status |= TP_STATUS_COPY; __skb_queue_tail(&sk->sk_receive_queue, copy_skb); @@ -1802,24 +1972,19 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, skb_copy_bits(skb, 0, h.raw + macoff, snaplen); + if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) + getnstimeofday(&ts); + + status |= ts_status; + switch (po->tp_version) { case TPACKET_V1: h.h1->tp_len = skb->len; h.h1->tp_snaplen = snaplen; h.h1->tp_mac = macoff; h.h1->tp_net = netoff; - if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) - && shhwtstamps->syststamp.tv64) - tv = ktime_to_timeval(shhwtstamps->syststamp); - else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) - && shhwtstamps->hwtstamp.tv64) - tv = ktime_to_timeval(shhwtstamps->hwtstamp); - else if (skb->tstamp.tv64) - tv = ktime_to_timeval(skb->tstamp); - else - do_gettimeofday(&tv); - h.h1->tp_sec = tv.tv_sec; - h.h1->tp_usec = tv.tv_usec; + h.h1->tp_sec = ts.tv_sec; + h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC; hdrlen = sizeof(*h.h1); break; case TPACKET_V2: @@ -1827,25 +1992,17 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h2->tp_snaplen = snaplen; h.h2->tp_mac = macoff; h.h2->tp_net = netoff; - if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) - && shhwtstamps->syststamp.tv64) - ts = ktime_to_timespec(shhwtstamps->syststamp); - else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) - && shhwtstamps->hwtstamp.tv64) - ts = ktime_to_timespec(shhwtstamps->hwtstamp); - else if (skb->tstamp.tv64) - ts = ktime_to_timespec(skb->tstamp); - else - getnstimeofday(&ts); h.h2->tp_sec = ts.tv_sec; h.h2->tp_nsec = ts.tv_nsec; if (vlan_tx_tag_present(skb)) { h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); - status |= TP_STATUS_VLAN_VALID; + h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto); + status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { h.h2->tp_vlan_tci = 0; + h.h2->tp_vlan_tpid = 0; } - h.h2->tp_padding = 0; + memset(h.h2->tp_padding, 0, sizeof(h.h2->tp_padding)); hdrlen = sizeof(*h.h2); break; case TPACKET_V3: @@ -1857,18 +2014,9 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h3->tp_snaplen = snaplen; h.h3->tp_mac = macoff; h.h3->tp_net = netoff; - if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) - && shhwtstamps->syststamp.tv64) - ts = ktime_to_timespec(shhwtstamps->syststamp); - else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) - && shhwtstamps->hwtstamp.tv64) - ts = ktime_to_timespec(shhwtstamps->hwtstamp); - else if (skb->tstamp.tv64) - ts = ktime_to_timespec(skb->tstamp); - else - getnstimeofday(&ts); h.h3->tp_sec = ts.tv_sec; h.h3->tp_nsec = ts.tv_nsec; + memset(h.h3->tp_padding, 0, sizeof(h.h3->tp_padding)); hdrlen = sizeof(*h.h3); break; default: @@ -1887,25 +2035,26 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, sll->sll_ifindex = dev->ifindex; smp_mb(); + #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 - { + if (po->tp_version <= TPACKET_V2) { u8 *start, *end; - if (po->tp_version <= TPACKET_V2) { - end = (u8 *)PAGE_ALIGN((unsigned long)h.raw - + macoff + snaplen); - for (start = h.raw; start < end; start += PAGE_SIZE) - flush_dcache_page(pgv_to_page(start)); - } - smp_wmb(); + end = (u8 *) PAGE_ALIGN((unsigned long) h.raw + + macoff + snaplen); + + for (start = h.raw; start < end; start += PAGE_SIZE) + flush_dcache_page(pgv_to_page(start)); } + smp_wmb(); #endif + if (po->tp_version <= TPACKET_V2) __packet_set_status(po, h.raw, status); else prb_clear_blk_fill_status(&po->rx_ring); - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); drop_n_restore: if (skb_head != skb->data && skb_shared(skb)) { @@ -1917,10 +2066,10 @@ drop: return 0; ring_is_full: - po->stats.tp_drops++; + po->stats.stats1.tp_drops++; spin_unlock(&sk->sk_receive_queue.lock); - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); kfree_skb(copy_skb); goto drop_n_restore; } @@ -1928,14 +2077,16 @@ ring_is_full: static void tpacket_destruct_skb(struct sk_buff *skb) { struct packet_sock *po = pkt_sk(skb->sk); - void *ph; if (likely(po->tx_ring.pg_vec)) { + void *ph; + __u32 ts; + ph = skb_shinfo(skb)->destructor_arg; - BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING); - BUG_ON(atomic_read(&po->tx_ring.pending) == 0); - atomic_dec(&po->tx_ring.pending); - __packet_set_status(po, ph, TP_STATUS_AVAILABLE); + packet_dec_pending(&po->tx_ring); + + ts = __packet_set_timestamp(po, ph, skb); + __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts); } sock_wfree(skb); @@ -1945,11 +2096,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, void *frame, struct net_device *dev, int size_max, __be16 proto, unsigned char *addr, int hlen) { - union { - struct tpacket_hdr *h1; - struct tpacket2_hdr *h2; - void *raw; - } ph; + union tpacket_uhdr ph; int to_write, offset, len, tp_len, nr_frags, len_max; struct socket *sock = po->sk.sk_socket; struct page *page; @@ -1962,6 +2109,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->dev = dev; skb->priority = po->sk.sk_priority; skb->mark = po->sk.sk_mark; + sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags); skb_shinfo(skb)->destructor_arg = ph.raw; switch (po->tp_version) { @@ -1980,7 +2128,37 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb_reserve(skb, hlen); skb_reset_network_header(skb); - data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll); + if (!packet_use_direct_xmit(po)) + skb_probe_transport_header(skb, 0); + if (unlikely(po->tp_tx_has_off)) { + int off_min, off_max, off; + off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); + off_max = po->tx_ring.frame_size - tp_len; + if (sock->type == SOCK_DGRAM) { + switch (po->tp_version) { + case TPACKET_V2: + off = ph.h2->tp_net; + break; + default: + off = ph.h1->tp_net; + break; + } + } else { + switch (po->tp_version) { + case TPACKET_V2: + off = ph.h2->tp_mac; + break; + default: + off = ph.h1->tp_mac; + break; + } + } + if (unlikely((off < off_min) || (off_max < off))) + return -EINVAL; + data = ph.raw + off; + } else { + data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll); + } to_write = tp_len; if (sock->type == SOCK_DGRAM) { @@ -2006,7 +2184,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, to_write -= dev->hard_header_len; } - err = -EFAULT; offset = offset_in_page(data); len_max = PAGE_SIZE - offset; len = ((to_write > len_max) ? len_max : to_write); @@ -2044,21 +2221,20 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; int err, reserve = 0; void *ph; - struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name); + bool need_wait = !(msg->msg_flags & MSG_DONTWAIT); int tp_len, size_max; unsigned char *addr; int len_sum = 0; - int status = 0; + int status = TP_STATUS_AVAILABLE; int hlen, tlen; mutex_lock(&po->pg_vec_lock); - err = -EBUSY; - if (saddr == NULL) { - dev = po->prot_hook.dev; + if (likely(saddr == NULL)) { + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -2072,19 +2248,16 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; if (unlikely(dev == NULL)) goto out; - - reserve = dev->hard_header_len; - err = -ENETDOWN; if (unlikely(!(dev->flags & IFF_UP))) goto out_put; + reserve = dev->hard_header_len + VLAN_HLEN; size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); @@ -2093,10 +2266,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) do { ph = packet_current_frame(po, &po->tx_ring, - TP_STATUS_SEND_REQUEST); - + TP_STATUS_SEND_REQUEST); if (unlikely(ph == NULL)) { - schedule(); + if (need_wait && need_resched()) + schedule(); continue; } @@ -2111,8 +2284,19 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) goto out_status; tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, - addr, hlen); + addr, hlen); + if (tp_len > dev->mtu + dev->hard_header_len) { + struct ethhdr *ehdr; + /* Earlier code assumed this would be a VLAN pkt, + * double-check this now that we have the actual + * packet in hand. + */ + skb_reset_mac_header(skb); + ehdr = eth_hdr(skb); + if (ehdr->h_proto != htons(ETH_P_8021Q)) + tp_len = -EMSGSIZE; + } if (unlikely(tp_len < 0)) { if (po->tp_loss) { __packet_set_status(po, ph, @@ -2127,12 +2311,14 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } } + packet_pick_tx_queue(dev, skb); + skb->destructor = tpacket_destruct_skb; __packet_set_status(po, ph, TP_STATUS_SENDING); - atomic_inc(&po->tx_ring.pending); + packet_inc_pending(&po->tx_ring); status = TP_STATUS_SEND_REQUEST; - err = dev_queue_xmit(skb); + err = po->xmit(skb); if (unlikely(err > 0)) { err = net_xmit_errno(err); if (err && __packet_get_status(po, ph) == @@ -2150,9 +2336,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) packet_increment_head(&po->tx_ring); len_sum += tp_len; } while (likely((ph != NULL) || - ((!(msg->msg_flags & MSG_DONTWAIT)) && - (atomic_read(&po->tx_ring.pending)))) - ); + /* Note: packet_read_pending() might be slow if we have + * to call it as it's per_cpu variable, but in fast-path + * we already short-circuit the loop with the first + * condition, and luckily don't have to go that path + * anyway. + */ + (need_wait && packet_read_pending(&po->tx_ring)))); err = len_sum; goto out_put; @@ -2161,8 +2351,7 @@ out_status: __packet_set_status(po, ph, status); kfree_skb(skb); out_put: - if (need_rls_dev) - dev_put(dev); + dev_put(dev); out: mutex_unlock(&po->pg_vec_lock); return err; @@ -2180,7 +2369,7 @@ static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad, linear = len; skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, - err); + err, 0); if (!skb) return NULL; @@ -2192,15 +2381,13 @@ static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad, return skb; } -static int packet_snd(struct socket *sock, - struct msghdr *msg, size_t len) +static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; - struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name); struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; unsigned char *addr; int err, reserve = 0; struct virtio_net_hdr vnet_hdr = { 0 }; @@ -2209,13 +2396,14 @@ static int packet_snd(struct socket *sock, struct packet_sock *po = pkt_sk(sk); unsigned short gso_type = 0; int hlen, tlen; + int extra_len = 0; /* * Get and verify the address. */ - if (saddr == NULL) { - dev = po->prot_hook.dev; + if (likely(saddr == NULL)) { + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -2227,19 +2415,17 @@ static int packet_snd(struct socket *sock, proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; - if (dev == NULL) + if (unlikely(dev == NULL)) goto out_unlock; - if (sock->type == SOCK_RAW) - reserve = dev->hard_header_len; - err = -ENETDOWN; - if (!(dev->flags & IFF_UP)) + if (unlikely(!(dev->flags & IFF_UP))) goto out_unlock; + if (sock->type == SOCK_RAW) + reserve = dev->hard_header_len; if (po->has_vnet_hdr) { vnet_hdr_len = sizeof(vnet_hdr); @@ -2288,8 +2474,16 @@ static int packet_snd(struct socket *sock, } } + if (unlikely(sock_flag(sk, SOCK_NOFCS))) { + if (!netif_supports_nofcs(dev)) { + err = -EPROTONOSUPPORT; + goto out_unlock; + } + extra_len = 4; /* We're doing our own CRC */ + } + err = -EMSGSIZE; - if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN)) + if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN + extra_len)) goto out_unlock; err = -ENOBUFS; @@ -2311,11 +2505,10 @@ static int packet_snd(struct socket *sock, err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); if (err) goto out_free; - err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (err < 0) - goto out_free; - if (!gso_type && (len > dev->mtu + reserve)) { + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); + + if (!gso_type && (len > dev->mtu + reserve + extra_len)) { /* Earlier code assumed this would be a VLAN pkt, * double-check this now that we have the actual * packet in hand. @@ -2334,6 +2527,8 @@ static int packet_snd(struct socket *sock, skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; + packet_pick_tx_queue(dev, skb); + if (po->has_vnet_hdr) { if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { if (!skb_partial_csum_set(skb, vnet_hdr.csum_start, @@ -2353,23 +2548,23 @@ static int packet_snd(struct socket *sock, len += vnet_hdr_len; } - /* - * Now send it - */ + if (!packet_use_direct_xmit(po)) + skb_probe_transport_header(skb, reserve); + if (unlikely(extra_len == 4)) + skb->no_fcs = 1; - err = dev_queue_xmit(skb); + err = po->xmit(skb); if (err > 0 && (err = net_xmit_errno(err)) != 0) goto out_unlock; - if (need_rls_dev) - dev_put(dev); + dev_put(dev); return len; out_free: kfree_skb(skb); out_unlock: - if (dev && need_rls_dev) + if (dev) dev_put(dev); out: return err; @@ -2380,6 +2575,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); + if (po->tx_ring.pg_vec) return tpacket_snd(po, msg); else @@ -2404,13 +2600,18 @@ static int packet_release(struct socket *sock) net = sock_net(sk); po = pkt_sk(sk); - spin_lock_bh(&net->packet.sklist_lock); + mutex_lock(&net->packet.sklist_lock); sk_del_node_init_rcu(sk); + mutex_unlock(&net->packet.sklist_lock); + + preempt_disable(); sock_prot_inuse_add(net, sk->sk_prot, -1); - spin_unlock_bh(&net->packet.sklist_lock); + preempt_enable(); spin_lock(&po->bind_lock); unregister_prot_hook(sk, false); + packet_cached_dev_reset(po); + if (po->prot_hook.dev) { dev_put(po->prot_hook.dev); po->prot_hook.dev = NULL; @@ -2419,13 +2620,15 @@ static int packet_release(struct socket *sock) packet_flush_mclist(sk); - memset(&req_u, 0, sizeof(req_u)); - - if (po->rx_ring.pg_vec) + if (po->rx_ring.pg_vec) { + memset(&req_u, 0, sizeof(req_u)); packet_set_ring(sk, &req_u, 1, 0); + } - if (po->tx_ring.pg_vec) + if (po->tx_ring.pg_vec) { + memset(&req_u, 0, sizeof(req_u)); packet_set_ring(sk, &req_u, 1, 1); + } fanout_release(sk); @@ -2439,6 +2642,7 @@ static int packet_release(struct socket *sock) /* Purge queues */ skb_queue_purge(&sk->sk_receive_queue); + packet_free_pending(po); sk_refcnt_debug_release(sk); sock_put(sk); @@ -2449,9 +2653,12 @@ static int packet_release(struct socket *sock) * Attach a packet hook. */ -static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol) +static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) { struct packet_sock *po = pkt_sk(sk); + const struct net_device *dev_curr; + __be16 proto_curr; + bool need_rehook; if (po->fanout) { if (dev) @@ -2461,18 +2668,29 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc } lock_sock(sk); - spin_lock(&po->bind_lock); - unregister_prot_hook(sk, true); - po->num = protocol; - po->prot_hook.type = protocol; - if (po->prot_hook.dev) - dev_put(po->prot_hook.dev); - po->prot_hook.dev = dev; - po->ifindex = dev ? dev->ifindex : 0; + proto_curr = po->prot_hook.type; + dev_curr = po->prot_hook.dev; + + need_rehook = proto_curr != proto || dev_curr != dev; - if (protocol == 0) + if (need_rehook) { + unregister_prot_hook(sk, true); + + po->num = proto; + po->prot_hook.type = proto; + + if (po->prot_hook.dev) + dev_put(po->prot_hook.dev); + + po->prot_hook.dev = dev; + + po->ifindex = dev ? dev->ifindex : 0; + packet_cached_dev_assign(po, dev); + } + + if (proto == 0 || !need_rehook) goto out_unlock; if (!dev || (dev->flags & IFF_UP)) { @@ -2562,7 +2780,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, __be16 proto = (__force __be16)protocol; /* weird, but documented */ int err; - if (!capable(CAP_NET_RAW)) + if (!ns_capable(net->user_ns, CAP_NET_RAW)) return -EPERM; if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW && sock->type != SOCK_PACKET) @@ -2584,6 +2802,13 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po = pkt_sk(sk); sk->sk_family = PF_PACKET; po->num = proto; + po->xmit = dev_queue_xmit; + + err = packet_alloc_pending(po); + if (err) + goto out2; + + packet_cached_dev_reset(po); sk->sk_destruct = packet_sock_destruct; sk_refcnt_debug_inc(sk); @@ -2606,57 +2831,17 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, register_prot_hook(sk); } - spin_lock_bh(&net->packet.sklist_lock); + mutex_lock(&net->packet.sklist_lock); sk_add_node_rcu(sk, &net->packet.sklist); + mutex_unlock(&net->packet.sklist_lock); + + preempt_disable(); sock_prot_inuse_add(net, &packet_proto, 1); - spin_unlock_bh(&net->packet.sklist_lock); + preempt_enable(); return 0; -out: - return err; -} - -static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len) -{ - struct sock_exterr_skb *serr; - struct sk_buff *skb, *skb2; - int copied, err; - - err = -EAGAIN; - skb = skb_dequeue(&sk->sk_error_queue); - if (skb == NULL) - goto out; - - copied = skb->len; - if (copied > len) { - msg->msg_flags |= MSG_TRUNC; - copied = len; - } - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - if (err) - goto out_free_skb; - - sock_recv_timestamp(msg, sk, skb); - - serr = SKB_EXT_ERR(skb); - put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP, - sizeof(serr->ee), &serr->ee); - - msg->msg_flags |= MSG_ERRQUEUE; - err = copied; - - /* Reset and regenerate socket error */ - spin_lock_bh(&sk->sk_error_queue.lock); - sk->sk_err = 0; - if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) { - sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno; - spin_unlock_bh(&sk->sk_error_queue.lock); - sk->sk_error_report(sk); - } else - spin_unlock_bh(&sk->sk_error_queue.lock); - -out_free_skb: - kfree_skb(skb); +out2: + sk_free(sk); out: return err; } @@ -2672,7 +2857,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; - struct sockaddr_ll *sll; int vnet_hdr_len = 0; err = -EINVAL; @@ -2686,7 +2870,8 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, #endif if (flags & MSG_ERRQUEUE) { - err = packet_recv_error(sk, msg, len); + err = sock_recv_errqueue(sk, msg, len, + SOL_PACKET, PACKET_TX_TIMESTAMP); goto out; } @@ -2755,22 +2940,10 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, goto out_free; } - /* - * If the address length field is there to be filled in, we fill - * it in now. - */ - - sll = &PACKET_SKB_CB(skb)->sa.ll; - if (sock->type == SOCK_PACKET) - msg->msg_namelen = sizeof(struct sockaddr_pkt); - else - msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); - - /* - * You lose any data beyond the buffer you gave. If it worries a - * user program they can ask the device for its MTU anyway. + /* You lose any data beyond the buffer you gave. If it worries + * a user program they can ask the device for its MTU + * anyway. */ - copied = skb->len; if (copied > len) { copied = len; @@ -2783,9 +2956,21 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, sock_recv_ts_and_drops(msg, sk, skb); - if (msg->msg_name) + if (msg->msg_name) { + /* If the address length field is there to be filled + * in, we fill it in now. + */ + if (sock->type == SOCK_PACKET) { + __sockaddr_check_size(sizeof(struct sockaddr_pkt)); + msg->msg_namelen = sizeof(struct sockaddr_pkt); + } else { + struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; + msg->msg_namelen = sll->sll_halen + + offsetof(struct sockaddr_ll, sll_addr); + } memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, msg->msg_namelen); + } if (pkt_sk(sk)->auxdata) { struct tpacket_auxdata aux; @@ -2799,11 +2984,12 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, aux.tp_net = skb_network_offset(skb); if (vlan_tx_tag_present(skb)) { aux.tp_vlan_tci = vlan_tx_tag_get(skb); - aux.tp_status |= TP_STATUS_VLAN_VALID; + aux.tp_vlan_tpid = ntohs(skb->vlan_proto); + aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { aux.tp_vlan_tci = 0; + aux.tp_vlan_tpid = 0; } - aux.tp_padding = 0; put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } @@ -2829,12 +3015,11 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; + memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex); if (dev) - strncpy(uaddr->sa_data, dev->name, 14); - else - memset(uaddr->sa_data, 0, 14); + strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); rcu_read_unlock(); *uaddr_len = sizeof(*uaddr); @@ -3192,6 +3377,31 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv return fanout_add(sk, val & 0xffff, val >> 16); } + case PACKET_TX_HAS_OFF: + { + unsigned int val; + + if (optlen != sizeof(val)) + return -EINVAL; + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) + return -EBUSY; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + po->tp_tx_has_off = !!val; + return 0; + } + case PACKET_QDISC_BYPASS: + { + int val; + + if (optlen != sizeof(val)) + return -EINVAL; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + + po->xmit = val ? packet_direct_xmit : dev_queue_xmit; + return 0; + } default: return -ENOPROTOOPT; } @@ -3201,12 +3411,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { int len; - int val; + int val, lv = sizeof(val); struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); - void *data; - struct tpacket_stats st; - union tpacket_stats_u st_u; + void *data = &val; + union tpacket_stats_u st; if (level != SOL_PACKET) return -ENOPROTOOPT; @@ -3219,54 +3428,33 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, switch (optname) { case PACKET_STATISTICS: - if (po->tp_version == TPACKET_V3) { - len = sizeof(struct tpacket_stats_v3); - } else { - if (len > sizeof(struct tpacket_stats)) - len = sizeof(struct tpacket_stats); - } spin_lock_bh(&sk->sk_receive_queue.lock); + memcpy(&st, &po->stats, sizeof(st)); + memset(&po->stats, 0, sizeof(po->stats)); + spin_unlock_bh(&sk->sk_receive_queue.lock); + if (po->tp_version == TPACKET_V3) { - memcpy(&st_u.stats3, &po->stats, - sizeof(struct tpacket_stats)); - st_u.stats3.tp_freeze_q_cnt = - po->stats_u.stats3.tp_freeze_q_cnt; - st_u.stats3.tp_packets += po->stats.tp_drops; - data = &st_u.stats3; + lv = sizeof(struct tpacket_stats_v3); + st.stats3.tp_packets += st.stats3.tp_drops; + data = &st.stats3; } else { - st = po->stats; - st.tp_packets += st.tp_drops; - data = &st; + lv = sizeof(struct tpacket_stats); + st.stats1.tp_packets += st.stats1.tp_drops; + data = &st.stats1; } - memset(&po->stats, 0, sizeof(st)); - spin_unlock_bh(&sk->sk_receive_queue.lock); + break; case PACKET_AUXDATA: - if (len > sizeof(int)) - len = sizeof(int); val = po->auxdata; - - data = &val; break; case PACKET_ORIGDEV: - if (len > sizeof(int)) - len = sizeof(int); val = po->origdev; - - data = &val; break; case PACKET_VNET_HDR: - if (len > sizeof(int)) - len = sizeof(int); val = po->has_vnet_hdr; - - data = &val; break; case PACKET_VERSION: - if (len > sizeof(int)) - len = sizeof(int); val = po->tp_version; - data = &val; break; case PACKET_HDRLEN: if (len > sizeof(int)) @@ -3286,39 +3474,35 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, default: return -EINVAL; } - data = &val; break; case PACKET_RESERVE: - if (len > sizeof(unsigned int)) - len = sizeof(unsigned int); val = po->tp_reserve; - data = &val; break; case PACKET_LOSS: - if (len > sizeof(unsigned int)) - len = sizeof(unsigned int); val = po->tp_loss; - data = &val; break; case PACKET_TIMESTAMP: - if (len > sizeof(int)) - len = sizeof(int); val = po->tp_tstamp; - data = &val; break; case PACKET_FANOUT: - if (len > sizeof(int)) - len = sizeof(int); val = (po->fanout ? ((u32)po->fanout->id | - ((u32)po->fanout->type << 16)) : + ((u32)po->fanout->type << 16) | + ((u32)po->fanout->flags << 24)) : 0); - data = &val; + break; + case PACKET_TX_HAS_OFF: + val = po->tp_tx_has_off; + break; + case PACKET_QDISC_BYPASS: + val = packet_use_direct_xmit(po); break; default: return -ENOPROTOOPT; } + if (len > lv) + len = lv; if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, data, len)) @@ -3327,15 +3511,15 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, } -static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data) +static int packet_notifier(struct notifier_block *this, + unsigned long msg, void *ptr) { struct sock *sk; - struct hlist_node *node; - struct net_device *dev = data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); rcu_read_lock(); - sk_for_each_rcu(sk, node, &net->packet.sklist) { + sk_for_each_rcu(sk, &net->packet.sklist) { struct packet_sock *po = pkt_sk(sk); switch (msg) { @@ -3354,6 +3538,7 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void sk->sk_error_report(sk); } if (msg == NETDEV_UNREGISTER) { + packet_cached_dev_reset(po); po->ifindex = -1; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); @@ -3503,34 +3688,26 @@ static void free_pg_vec(struct pgv *pg_vec, unsigned int order, static char *alloc_one_pg_vec_page(unsigned long order) { - char *buffer = NULL; + char *buffer; gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; buffer = (char *) __get_free_pages(gfp_flags, order); - if (buffer) return buffer; - /* - * __get_free_pages failed, fall back to vmalloc - */ + /* __get_free_pages failed, fall back to vmalloc */ buffer = vzalloc((1 << order) * PAGE_SIZE); - if (buffer) return buffer; - /* - * vmalloc failed, lets dig into swap here - */ + /* vmalloc failed, lets dig into swap here */ gfp_flags &= ~__GFP_NORETRY; - buffer = (char *)__get_free_pages(gfp_flags, order); + buffer = (char *) __get_free_pages(gfp_flags, order); if (buffer) return buffer; - /* - * complete and utter failure - */ + /* complete and utter failure */ return NULL; } @@ -3585,7 +3762,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, if (!closing) { if (atomic_read(&po->mapped)) goto out; - if (atomic_read(&rb->pending)) + if (packet_read_pending(rb)) goto out; } @@ -3637,7 +3814,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, */ if (!tx_ring) init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring); - break; + break; default: break; } @@ -3860,7 +4037,7 @@ static int packet_seq_show(struct seq_file *seq, void *v) po->ifindex, po->running, atomic_read(&s->sk_rmem_alloc), - sock_i_uid(s), + from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)), sock_i_ino(s)); } @@ -3892,10 +4069,10 @@ static const struct file_operations packet_seq_fops = { static int __net_init packet_net_init(struct net *net) { - spin_lock_init(&net->packet.sklist_lock); + mutex_init(&net->packet.sklist_lock); INIT_HLIST_HEAD(&net->packet.sklist); - if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) + if (!proc_create("packet", 0, net->proc_net, &packet_seq_fops)) return -ENOMEM; return 0; @@ -3903,7 +4080,7 @@ static int __net_init packet_net_init(struct net *net) static void __net_exit packet_net_exit(struct net *net) { - proc_net_remove(net, "packet"); + remove_proc_entry("packet", net->proc_net); } static struct pernet_operations packet_net_ops = { |
