diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 595 |
1 files changed, 328 insertions, 267 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index ba3b7ea5ebb..3721db71635 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -147,6 +147,8 @@ struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; struct list_head ptype_all __read_mostly; /* Taps */ static struct list_head offload_base __read_mostly; +static int netif_rx_internal(struct sk_buff *skb); + /* * The @dev_base_head list is protected by @dev_base_lock and the rtnl * semaphore. @@ -480,7 +482,7 @@ EXPORT_SYMBOL(dev_add_offload); * and must not be freed until after all the CPU's have gone * through a quiescent state. */ -void __dev_remove_offload(struct packet_offload *po) +static void __dev_remove_offload(struct packet_offload *po) { struct list_head *head = &offload_base; struct packet_offload *po1; @@ -498,7 +500,6 @@ void __dev_remove_offload(struct packet_offload *po) out: spin_unlock(&offload_lock); } -EXPORT_SYMBOL(__dev_remove_offload); /** * dev_remove_offload - remove packet offload handler @@ -1118,6 +1119,8 @@ rollback: write_seqcount_end(&devnet_rename_seq); + netdev_adjacent_rename_links(dev, oldname); + write_lock_bh(&dev_base_lock); hlist_del_rcu(&dev->name_hlist); write_unlock_bh(&dev_base_lock); @@ -1137,6 +1140,7 @@ rollback: err = ret; write_seqcount_begin(&devnet_rename_seq); memcpy(dev->name, oldname, IFNAMSIZ); + memcpy(oldname, newname, IFNAMSIZ); goto rollback; } else { pr_err("%s: name change rollback failed: %d\n", @@ -1566,14 +1570,14 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); * are as for raw_notifier_call_chain(). */ -int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, - struct netdev_notifier_info *info) +static int call_netdevice_notifiers_info(unsigned long val, + struct net_device *dev, + struct netdev_notifier_info *info) { ASSERT_RTNL(); netdev_notifier_info_init(info, dev); return raw_notifier_call_chain(&netdev_chain, val, info); } -EXPORT_SYMBOL(call_netdevice_notifiers_info); /** * call_netdevice_notifiers - call all network notifier blocks @@ -1699,7 +1703,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) skb_scrub_packet(skb, true); skb->protocol = eth_type_trans(skb, dev); - return netif_rx(skb); + return netif_rx_internal(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); @@ -2079,7 +2083,7 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) } EXPORT_SYMBOL(netif_set_real_num_tx_queues); -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS /** * netif_set_real_num_rx_queues - set actual number of RX queues used * @dev: Network device @@ -2145,30 +2149,42 @@ void __netif_schedule(struct Qdisc *q) } EXPORT_SYMBOL(__netif_schedule); -void dev_kfree_skb_irq(struct sk_buff *skb) +struct dev_kfree_skb_cb { + enum skb_free_reason reason; +}; + +static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb) { - if (atomic_dec_and_test(&skb->users)) { - struct softnet_data *sd; - unsigned long flags; + return (struct dev_kfree_skb_cb *)skb->cb; +} + +void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) +{ + unsigned long flags; - local_irq_save(flags); - sd = &__get_cpu_var(softnet_data); - skb->next = sd->completion_queue; - sd->completion_queue = skb; - raise_softirq_irqoff(NET_TX_SOFTIRQ); - local_irq_restore(flags); + if (likely(atomic_read(&skb->users) == 1)) { + smp_rmb(); + atomic_set(&skb->users, 0); + } else if (likely(!atomic_dec_and_test(&skb->users))) { + return; } + get_kfree_skb_cb(skb)->reason = reason; + local_irq_save(flags); + skb->next = __this_cpu_read(softnet_data.completion_queue); + __this_cpu_write(softnet_data.completion_queue, skb); + raise_softirq_irqoff(NET_TX_SOFTIRQ); + local_irq_restore(flags); } -EXPORT_SYMBOL(dev_kfree_skb_irq); +EXPORT_SYMBOL(__dev_kfree_skb_irq); -void dev_kfree_skb_any(struct sk_buff *skb) +void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason) { if (in_irq() || irqs_disabled()) - dev_kfree_skb_irq(skb); + __dev_kfree_skb_irq(skb, reason); else dev_kfree_skb(skb); } -EXPORT_SYMBOL(dev_kfree_skb_any); +EXPORT_SYMBOL(__dev_kfree_skb_any); /** @@ -2442,13 +2458,8 @@ static void dev_gso_skb_destructor(struct sk_buff *skb) { struct dev_gso_cb *cb; - do { - struct sk_buff *nskb = skb->next; - - skb->next = nskb->next; - nskb->next = NULL; - kfree_skb(nskb); - } while (skb->next); + kfree_skb_list(skb->next); + skb->next = NULL; cb = DEV_GSO_CB(skb); if (cb->destructor) @@ -2523,23 +2534,8 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) } EXPORT_SYMBOL(netif_skb_features); -/* - * Returns true if either: - * 1. skb has frag_list and the device doesn't support FRAGLIST, or - * 2. skb is fragmented and the device does not support SG. - */ -static inline int skb_needs_linearize(struct sk_buff *skb, - netdev_features_t features) -{ - return skb_is_nonlinear(skb) && - ((skb_has_frag_list(skb) && - !(features & NETIF_F_FRAGLIST)) || - (skb_shinfo(skb)->nr_frags && - !(features & NETIF_F_SG))); -} - int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, - struct netdev_queue *txq, void *accel_priv) + struct netdev_queue *txq) { const struct net_device_ops *ops = dev->netdev_ops; int rc = NETDEV_TX_OK; @@ -2605,13 +2601,10 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, dev_queue_xmit_nit(skb, dev); skb_len = skb->len; - if (accel_priv) - rc = ops->ndo_dfwd_start_xmit(skb, dev, accel_priv); - else - rc = ops->ndo_start_xmit(skb, dev); - + trace_net_dev_start_xmit(skb, dev); + rc = ops->ndo_start_xmit(skb, dev); trace_net_dev_xmit(skb, rc, dev, skb_len); - if (rc == NETDEV_TX_OK && txq) + if (rc == NETDEV_TX_OK) txq_trans_update(txq); return rc; } @@ -2627,10 +2620,8 @@ gso: dev_queue_xmit_nit(nskb, dev); skb_len = nskb->len; - if (accel_priv) - rc = ops->ndo_dfwd_start_xmit(nskb, dev, accel_priv); - else - rc = ops->ndo_start_xmit(nskb, dev); + trace_net_dev_start_xmit(nskb, dev); + rc = ops->ndo_start_xmit(nskb, dev); trace_net_dev_xmit(nskb, rc, dev, skb_len); if (unlikely(rc != NETDEV_TX_OK)) { if (rc & ~NETDEV_TX_MASK) @@ -2750,7 +2741,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, return rc; } -#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) +#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) static void skb_update_prio(struct sk_buff *skb) { struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); @@ -2787,8 +2778,9 @@ int dev_loopback_xmit(struct sk_buff *skb) EXPORT_SYMBOL(dev_loopback_xmit); /** - * dev_queue_xmit - transmit a buffer + * __dev_queue_xmit - transmit a buffer * @skb: buffer to transmit + * @accel_priv: private data used for L2 forwarding offload * * Queue a buffer for transmission to a network device. The caller must * have set the device and priority and built the buffer before calling @@ -2811,7 +2803,7 @@ EXPORT_SYMBOL(dev_loopback_xmit); * the BH enable code must have IRQs enabled so that it will not deadlock. * --BLG */ -int dev_queue_xmit(struct sk_buff *skb) +int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) { struct net_device *dev = skb->dev; struct netdev_queue *txq; @@ -2827,7 +2819,7 @@ int dev_queue_xmit(struct sk_buff *skb) skb_update_prio(skb); - txq = netdev_pick_tx(dev, skb); + txq = netdev_pick_tx(dev, skb, accel_priv); q = rcu_dereference_bh(txq->qdisc); #ifdef CONFIG_NET_CLS_ACT @@ -2863,7 +2855,7 @@ int dev_queue_xmit(struct sk_buff *skb) if (!netif_xmit_stopped(txq)) { __this_cpu_inc(xmit_recursion); - rc = dev_hard_start_xmit(skb, dev, txq, NULL); + rc = dev_hard_start_xmit(skb, dev, txq); __this_cpu_dec(xmit_recursion); if (dev_xmit_complete(rc)) { HARD_TX_UNLOCK(dev, txq); @@ -2892,8 +2884,19 @@ out: rcu_read_unlock_bh(); return rc; } + +int dev_queue_xmit(struct sk_buff *skb) +{ + return __dev_queue_xmit(skb, NULL); +} EXPORT_SYMBOL(dev_queue_xmit); +int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv) +{ + return __dev_queue_xmit(skb, accel_priv); +} +EXPORT_SYMBOL(dev_queue_xmit_accel); + /*======================================================================= Receiver routines @@ -3009,7 +3012,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, } skb_reset_network_header(skb); - if (!skb_get_rxhash(skb)) + if (!skb_get_hash(skb)) goto done; flow_table = rcu_dereference(rxqueue->rps_flow_table); @@ -3154,7 +3157,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) rcu_read_lock(); fl = rcu_dereference(sd->flow_limit); if (fl) { - new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1); + new_flow = skb_get_hash(skb) & (fl->num_buckets - 1); old_flow = fl->history[fl->history_head]; fl->history[fl->history_head] = new_flow; @@ -3222,22 +3225,7 @@ enqueue: return NET_RX_DROP; } -/** - * netif_rx - post buffer to the network code - * @skb: buffer to post - * - * This function receives a packet from a device driver and queues it for - * the upper (protocol) levels to process. It always succeeds. The buffer - * may be dropped during processing for congestion control or by the - * protocol layers. - * - * return values: - * NET_RX_SUCCESS (no congestion) - * NET_RX_DROP (packet was dropped) - * - */ - -int netif_rx(struct sk_buff *skb) +static int netif_rx_internal(struct sk_buff *skb) { int ret; @@ -3273,14 +3261,38 @@ int netif_rx(struct sk_buff *skb) } return ret; } + +/** + * netif_rx - post buffer to the network code + * @skb: buffer to post + * + * This function receives a packet from a device driver and queues it for + * the upper (protocol) levels to process. It always succeeds. The buffer + * may be dropped during processing for congestion control or by the + * protocol layers. + * + * return values: + * NET_RX_SUCCESS (no congestion) + * NET_RX_DROP (packet was dropped) + * + */ + +int netif_rx(struct sk_buff *skb) +{ + trace_netif_rx_entry(skb); + + return netif_rx_internal(skb); +} EXPORT_SYMBOL(netif_rx); int netif_rx_ni(struct sk_buff *skb) { int err; + trace_netif_rx_ni_entry(skb); + preempt_disable(); - err = netif_rx(skb); + err = netif_rx_internal(skb); if (local_softirq_pending()) do_softirq(); preempt_enable(); @@ -3306,7 +3318,10 @@ static void net_tx_action(struct softirq_action *h) clist = clist->next; WARN_ON(atomic_read(&skb->users)); - trace_kfree_skb(skb, net_tx_action); + if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED)) + trace_consume_skb(skb); + else + trace_kfree_skb(skb, net_tx_action); __kfree_skb(skb); } } @@ -3662,22 +3677,7 @@ static int __netif_receive_skb(struct sk_buff *skb) return ret; } -/** - * netif_receive_skb - process receive buffer from network - * @skb: buffer to process - * - * netif_receive_skb() is the main receive data processing function. - * It always succeeds. The buffer may be dropped during processing - * for congestion control or by the protocol layers. - * - * This function may only be called from softirq context and interrupts - * should be enabled. - * - * Return values (usually ignored): - * NET_RX_SUCCESS: no congestion - * NET_RX_DROP: packet was dropped - */ -int netif_receive_skb(struct sk_buff *skb) +static int netif_receive_skb_internal(struct sk_buff *skb) { net_timestamp_check(netdev_tstamp_prequeue, skb); @@ -3703,6 +3703,28 @@ int netif_receive_skb(struct sk_buff *skb) #endif return __netif_receive_skb(skb); } + +/** + * netif_receive_skb - process receive buffer from network + * @skb: buffer to process + * + * netif_receive_skb() is the main receive data processing function. + * It always succeeds. The buffer may be dropped during processing + * for congestion control or by the protocol layers. + * + * This function may only be called from softirq context and interrupts + * should be enabled. + * + * Return values (usually ignored): + * NET_RX_SUCCESS: no congestion + * NET_RX_DROP: packet was dropped + */ +int netif_receive_skb(struct sk_buff *skb) +{ + trace_netif_receive_skb_entry(skb); + + return netif_receive_skb_internal(skb); +} EXPORT_SYMBOL(netif_receive_skb); /* Network device is going away, flush any packets still pending @@ -3752,7 +3774,7 @@ static int napi_gro_complete(struct sk_buff *skb) if (ptype->type != type || !ptype->callbacks.gro_complete) continue; - err = ptype->callbacks.gro_complete(skb); + err = ptype->callbacks.gro_complete(skb, 0); break; } rcu_read_unlock(); @@ -3764,7 +3786,7 @@ static int napi_gro_complete(struct sk_buff *skb) } out: - return netif_receive_skb(skb); + return netif_receive_skb_internal(skb); } /* napi->gro_list contains packets ordered by age. @@ -3800,10 +3822,18 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff *p; unsigned int maclen = skb->dev->hard_header_len; + u32 hash = skb_get_hash_raw(skb); for (p = napi->gro_list; p; p = p->next) { unsigned long diffs; + NAPI_GRO_CB(p)->flush = 0; + + if (hash != skb_get_hash_raw(p)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; diffs |= p->vlan_tci ^ skb->vlan_tci; if (maclen == ETH_HLEN) @@ -3814,7 +3844,23 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) skb_gro_mac_header(skb), maclen); NAPI_GRO_CB(p)->same_flow = !diffs; - NAPI_GRO_CB(p)->flush = 0; + } +} + +static void skb_gro_reset_offset(struct sk_buff *skb) +{ + const struct skb_shared_info *pinfo = skb_shinfo(skb); + const skb_frag_t *frag0 = &pinfo->frags[0]; + + NAPI_GRO_CB(skb)->data_offset = 0; + NAPI_GRO_CB(skb)->frag0 = NULL; + NAPI_GRO_CB(skb)->frag0_len = 0; + + if (skb_mac_header(skb) == skb_tail_pointer(skb) && + pinfo->nr_frags && + !PageHighMem(skb_frag_page(frag0))) { + NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); + NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); } } @@ -3833,7 +3879,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (skb_is_gso(skb) || skb_has_frag_list(skb)) goto normal; + skb_gro_reset_offset(skb); gro_list_prepare(napi, skb); + NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */ rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { @@ -3845,6 +3893,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; + NAPI_GRO_CB(skb)->udp_mark = 0; pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); break; @@ -3869,10 +3918,23 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (same_flow) goto ok; - if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS) + if (NAPI_GRO_CB(skb)->flush) goto normal; - napi->gro_count++; + if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) { + struct sk_buff *nskb = napi->gro_list; + + /* locate the end of the list to select the 'oldest' flow */ + while (nskb->next) { + pp = &nskb->next; + nskb = *pp; + } + *pp = NULL; + nskb->next = NULL; + napi_gro_complete(nskb); + } else { + napi->gro_count++; + } NAPI_GRO_CB(skb)->count = 1; NAPI_GRO_CB(skb)->age = jiffies; skb_shinfo(skb)->gso_size = skb_gro_len(skb); @@ -3910,12 +3972,39 @@ normal: goto pull; } +struct packet_offload *gro_find_receive_by_type(__be16 type) +{ + struct list_head *offload_head = &offload_base; + struct packet_offload *ptype; + + list_for_each_entry_rcu(ptype, offload_head, list) { + if (ptype->type != type || !ptype->callbacks.gro_receive) + continue; + return ptype; + } + return NULL; +} +EXPORT_SYMBOL(gro_find_receive_by_type); + +struct packet_offload *gro_find_complete_by_type(__be16 type) +{ + struct list_head *offload_head = &offload_base; + struct packet_offload *ptype; + + list_for_each_entry_rcu(ptype, offload_head, list) { + if (ptype->type != type || !ptype->callbacks.gro_complete) + continue; + return ptype; + } + return NULL; +} +EXPORT_SYMBOL(gro_find_complete_by_type); static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) { switch (ret) { case GRO_NORMAL: - if (netif_receive_skb(skb)) + if (netif_receive_skb_internal(skb)) ret = GRO_DROP; break; @@ -3938,26 +4027,9 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) return ret; } -static void skb_gro_reset_offset(struct sk_buff *skb) -{ - const struct skb_shared_info *pinfo = skb_shinfo(skb); - const skb_frag_t *frag0 = &pinfo->frags[0]; - - NAPI_GRO_CB(skb)->data_offset = 0; - NAPI_GRO_CB(skb)->frag0 = NULL; - NAPI_GRO_CB(skb)->frag0_len = 0; - - if (skb_mac_header(skb) == skb_tail_pointer(skb) && - pinfo->nr_frags && - !PageHighMem(skb_frag_page(frag0))) { - NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); - NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); - } -} - gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { - skb_gro_reset_offset(skb); + trace_napi_gro_receive_entry(skb); return napi_skb_finish(dev_gro_receive(napi, skb), skb); } @@ -3981,8 +4053,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) if (!skb) { skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); - if (skb) - napi->skb = skb; + napi->skb = skb; } return skb; } @@ -3993,12 +4064,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * { switch (ret) { case GRO_NORMAL: - case GRO_HELD: - skb->protocol = eth_type_trans(skb, skb->dev); - - if (ret == GRO_HELD) - skb_gro_pull(skb, -ETH_HLEN); - else if (netif_receive_skb(skb)) + if (netif_receive_skb_internal(skb)) ret = GRO_DROP; break; @@ -4007,6 +4073,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * napi_reuse_skb(napi, skb); break; + case GRO_HELD: case GRO_MERGED: break; } @@ -4017,36 +4084,15 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * static struct sk_buff *napi_frags_skb(struct napi_struct *napi) { struct sk_buff *skb = napi->skb; - struct ethhdr *eth; - unsigned int hlen; - unsigned int off; napi->skb = NULL; - skb_reset_mac_header(skb); - skb_gro_reset_offset(skb); - - off = skb_gro_offset(skb); - hlen = off + sizeof(*eth); - eth = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) { - eth = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!eth)) { - napi_reuse_skb(napi, skb); - skb = NULL; - goto out; - } + if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) { + napi_reuse_skb(napi, skb); + return NULL; } + skb->protocol = eth_type_trans(skb, skb->dev); - skb_gro_pull(skb, sizeof(*eth)); - - /* - * This works because the only protocols we care about don't require - * special handling. We'll fix it up properly at the end. - */ - skb->protocol = eth->h_proto; - -out: return skb; } @@ -4057,12 +4103,14 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) if (!skb) return GRO_DROP; + trace_napi_gro_frags_entry(skb); + return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); } EXPORT_SYMBOL(napi_gro_frags); /* - * net_rps_action sends any pending IPI's for rps. + * net_rps_action_and_irq_enable sends any pending IPI's for rps. * Note: called with local irq disabled, but exits with local irq enabled. */ static void net_rps_action_and_irq_enable(struct softnet_data *sd) @@ -4267,17 +4315,10 @@ EXPORT_SYMBOL(netif_napi_add); void netif_napi_del(struct napi_struct *napi) { - struct sk_buff *skb, *next; - list_del_init(&napi->dev_list); napi_free_frags(napi); - for (skb = napi->gro_list; skb; skb = next) { - next = skb->next; - skb->next = NULL; - kfree_skb(skb); - } - + kfree_skb_list(napi->gro_list); napi->gro_list = NULL; napi->gro_count = 0; } @@ -4394,19 +4435,6 @@ struct netdev_adjacent { struct rcu_head rcu; }; -static struct netdev_adjacent *__netdev_find_adj_rcu(struct net_device *dev, - struct net_device *adj_dev, - struct list_head *adj_list) -{ - struct netdev_adjacent *adj; - - list_for_each_entry_rcu(adj, adj_list, list) { - if (adj->dev == adj_dev) - return adj; - } - return NULL; -} - static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, struct net_device *adj_dev, struct list_head *adj_list) @@ -4445,13 +4473,12 @@ EXPORT_SYMBOL(netdev_has_upper_dev); * Find out if a device is linked to an upper device and return true in case * it is. The caller must hold the RTNL lock. */ -bool netdev_has_any_upper_dev(struct net_device *dev) +static bool netdev_has_any_upper_dev(struct net_device *dev) { ASSERT_RTNL(); return !list_empty(&dev->all_adj_list.upper); } -EXPORT_SYMBOL(netdev_has_any_upper_dev); /** * netdev_master_upper_dev_get - Get master upper device @@ -4500,7 +4527,7 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, { struct netdev_adjacent *upper; - WARN_ON_ONCE(!rcu_read_lock_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held()); upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); @@ -4571,6 +4598,27 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, EXPORT_SYMBOL(netdev_lower_get_next_private_rcu); /** + * netdev_lower_get_first_private_rcu - Get the first ->private from the + * lower neighbour list, RCU + * variant + * @dev: device + * + * Gets the first netdev_adjacent->private from the dev's lower neighbour + * list. The caller must hold RCU read lock. + */ +void *netdev_lower_get_first_private_rcu(struct net_device *dev) +{ + struct netdev_adjacent *lower; + + lower = list_first_or_null_rcu(&dev->adj_list.lower, + struct netdev_adjacent, list); + if (lower) + return lower->private; + return NULL; +} +EXPORT_SYMBOL(netdev_lower_get_first_private_rcu); + +/** * netdev_master_upper_dev_get_rcu - Get master upper device * @dev: device * @@ -4589,13 +4637,36 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) } EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); +int netdev_adjacent_sysfs_add(struct net_device *dev, + struct net_device *adj_dev, + struct list_head *dev_list) +{ + char linkname[IFNAMSIZ+7]; + sprintf(linkname, dev_list == &dev->adj_list.upper ? + "upper_%s" : "lower_%s", adj_dev->name); + return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj), + linkname); +} +void netdev_adjacent_sysfs_del(struct net_device *dev, + char *name, + struct list_head *dev_list) +{ + char linkname[IFNAMSIZ+7]; + sprintf(linkname, dev_list == &dev->adj_list.upper ? + "upper_%s" : "lower_%s", name); + sysfs_remove_link(&(dev->dev.kobj), linkname); +} + +#define netdev_adjacent_is_neigh_list(dev, dev_list) \ + (dev_list == &dev->adj_list.upper || \ + dev_list == &dev->adj_list.lower) + static int __netdev_adjacent_dev_insert(struct net_device *dev, struct net_device *adj_dev, struct list_head *dev_list, void *private, bool master) { struct netdev_adjacent *adj; - char linkname[IFNAMSIZ+7]; int ret; adj = __netdev_find_adj(dev, adj_dev, dev_list); @@ -4618,16 +4689,8 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, pr_debug("dev_hold for %s, because of link added from %s to %s\n", adj_dev->name, dev->name, adj_dev->name); - if (dev_list == &dev->adj_list.lower) { - sprintf(linkname, "lower_%s", adj_dev->name); - ret = sysfs_create_link(&(dev->dev.kobj), - &(adj_dev->dev.kobj), linkname); - if (ret) - goto free_adj; - } else if (dev_list == &dev->adj_list.upper) { - sprintf(linkname, "upper_%s", adj_dev->name); - ret = sysfs_create_link(&(dev->dev.kobj), - &(adj_dev->dev.kobj), linkname); + if (netdev_adjacent_is_neigh_list(dev, dev_list)) { + ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list); if (ret) goto free_adj; } @@ -4647,14 +4710,8 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, return 0; remove_symlinks: - if (dev_list == &dev->adj_list.lower) { - sprintf(linkname, "lower_%s", adj_dev->name); - sysfs_remove_link(&(dev->dev.kobj), linkname); - } else if (dev_list == &dev->adj_list.upper) { - sprintf(linkname, "upper_%s", adj_dev->name); - sysfs_remove_link(&(dev->dev.kobj), linkname); - } - + if (netdev_adjacent_is_neigh_list(dev, dev_list)) + netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); free_adj: kfree(adj); dev_put(adj_dev); @@ -4662,12 +4719,11 @@ free_adj: return ret; } -void __netdev_adjacent_dev_remove(struct net_device *dev, - struct net_device *adj_dev, - struct list_head *dev_list) +static void __netdev_adjacent_dev_remove(struct net_device *dev, + struct net_device *adj_dev, + struct list_head *dev_list) { struct netdev_adjacent *adj; - char linkname[IFNAMSIZ+7]; adj = __netdev_find_adj(dev, adj_dev, dev_list); @@ -4687,13 +4743,8 @@ void __netdev_adjacent_dev_remove(struct net_device *dev, if (adj->master) sysfs_remove_link(&(dev->dev.kobj), "master"); - if (dev_list == &dev->adj_list.lower) { - sprintf(linkname, "lower_%s", adj_dev->name); - sysfs_remove_link(&(dev->dev.kobj), linkname); - } else if (dev_list == &dev->adj_list.upper) { - sprintf(linkname, "upper_%s", adj_dev->name); - sysfs_remove_link(&(dev->dev.kobj), linkname); - } + if (netdev_adjacent_is_neigh_list(dev, dev_list)) + netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); list_del_rcu(&adj->list); pr_debug("dev_put for %s, because link removed from %s to %s\n", @@ -4702,11 +4753,11 @@ void __netdev_adjacent_dev_remove(struct net_device *dev, kfree_rcu(adj, rcu); } -int __netdev_adjacent_dev_link_lists(struct net_device *dev, - struct net_device *upper_dev, - struct list_head *up_list, - struct list_head *down_list, - void *private, bool master) +static int __netdev_adjacent_dev_link_lists(struct net_device *dev, + struct net_device *upper_dev, + struct list_head *up_list, + struct list_head *down_list, + void *private, bool master) { int ret; @@ -4725,8 +4776,8 @@ int __netdev_adjacent_dev_link_lists(struct net_device *dev, return 0; } -int __netdev_adjacent_dev_link(struct net_device *dev, - struct net_device *upper_dev) +static int __netdev_adjacent_dev_link(struct net_device *dev, + struct net_device *upper_dev) { return __netdev_adjacent_dev_link_lists(dev, upper_dev, &dev->all_adj_list.upper, @@ -4734,26 +4785,26 @@ int __netdev_adjacent_dev_link(struct net_device *dev, NULL, false); } -void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, - struct net_device *upper_dev, - struct list_head *up_list, - struct list_head *down_list) +static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, + struct net_device *upper_dev, + struct list_head *up_list, + struct list_head *down_list) { __netdev_adjacent_dev_remove(dev, upper_dev, up_list); __netdev_adjacent_dev_remove(upper_dev, dev, down_list); } -void __netdev_adjacent_dev_unlink(struct net_device *dev, - struct net_device *upper_dev) +static void __netdev_adjacent_dev_unlink(struct net_device *dev, + struct net_device *upper_dev) { __netdev_adjacent_dev_unlink_lists(dev, upper_dev, &dev->all_adj_list.upper, &upper_dev->all_adj_list.lower); } -int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, - struct net_device *upper_dev, - void *private, bool master) +static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, + struct net_device *upper_dev, + void *private, bool master) { int ret = __netdev_adjacent_dev_link(dev, upper_dev); @@ -4772,8 +4823,8 @@ int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, return 0; } -void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, - struct net_device *upper_dev) +static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, + struct net_device *upper_dev) { __netdev_adjacent_dev_unlink(dev, upper_dev); __netdev_adjacent_dev_unlink_lists(dev, upper_dev, @@ -4962,20 +5013,24 @@ void netdev_upper_dev_unlink(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_dev_unlink); -void *netdev_lower_dev_get_private_rcu(struct net_device *dev, - struct net_device *lower_dev) +void netdev_adjacent_rename_links(struct net_device *dev, char *oldname) { - struct netdev_adjacent *lower; + struct netdev_adjacent *iter; - if (!lower_dev) - return NULL; - lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower); - if (!lower) - return NULL; + list_for_each_entry(iter, &dev->adj_list.upper, list) { + netdev_adjacent_sysfs_del(iter->dev, oldname, + &iter->dev->adj_list.lower); + netdev_adjacent_sysfs_add(iter->dev, dev, + &iter->dev->adj_list.lower); + } - return lower->private; + list_for_each_entry(iter, &dev->adj_list.lower, list) { + netdev_adjacent_sysfs_del(iter->dev, oldname, + &iter->dev->adj_list.upper); + netdev_adjacent_sysfs_add(iter->dev, dev, + &iter->dev->adj_list.upper); + } } -EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu); void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev) @@ -5309,6 +5364,17 @@ int dev_change_flags(struct net_device *dev, unsigned int flags) } EXPORT_SYMBOL(dev_change_flags); +static int __dev_set_mtu(struct net_device *dev, int new_mtu) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (ops->ndo_change_mtu) + return ops->ndo_change_mtu(dev, new_mtu); + + dev->mtu = new_mtu; + return 0; +} + /** * dev_set_mtu - Change maximum transfer unit * @dev: device @@ -5318,8 +5384,7 @@ EXPORT_SYMBOL(dev_change_flags); */ int dev_set_mtu(struct net_device *dev, int new_mtu) { - const struct net_device_ops *ops = dev->netdev_ops; - int err; + int err, orig_mtu; if (new_mtu == dev->mtu) return 0; @@ -5331,14 +5396,25 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) if (!netif_device_present(dev)) return -ENODEV; - err = 0; - if (ops->ndo_change_mtu) - err = ops->ndo_change_mtu(dev, new_mtu); - else - dev->mtu = new_mtu; + err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev); + err = notifier_to_errno(err); + if (err) + return err; - if (!err) - call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + orig_mtu = dev->mtu; + err = __dev_set_mtu(dev, new_mtu); + + if (!err) { + err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + err = notifier_to_errno(err); + if (err) { + /* setting mtu back and notifying everyone again, + * so that they have a chance to revert changes. + */ + __dev_set_mtu(dev, orig_mtu); + call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + } + } return err; } EXPORT_SYMBOL(dev_set_mtu); @@ -5692,7 +5768,7 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, } EXPORT_SYMBOL(netif_stacked_transfer_operstate); -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS static int netif_alloc_rx_queues(struct net_device *dev) { unsigned int i, count = dev->num_rx_queues; @@ -5831,13 +5907,8 @@ int register_netdevice(struct net_device *dev) dev->features |= NETIF_F_SOFT_FEATURES; dev->wanted_features = dev->features & dev->hw_features; - /* Turn on no cache copy if HW is doing checksum */ if (!(dev->flags & IFF_LOOPBACK)) { dev->hw_features |= NETIF_F_NOCACHE_COPY; - if (dev->features & NETIF_F_ALL_CSUM) { - dev->wanted_features |= NETIF_F_NOCACHE_COPY; - dev->features |= NETIF_F_NOCACHE_COPY; - } } /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. @@ -6224,7 +6295,7 @@ void netdev_freemem(struct net_device *dev) * @rxqs: the number of RX subqueues to allocate * * Allocates a struct net_device with private data area for driver use - * and performs basic initialization. Also allocates subquue structs + * and performs basic initialization. Also allocates subqueue structs * for each queue on the device. */ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, @@ -6242,7 +6313,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, return NULL; } -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS if (rxqs < 1) { pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); return NULL; @@ -6298,7 +6369,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, if (netif_alloc_netdev_queues(dev)) goto free_all; -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS dev->num_rx_queues = rxqs; dev->real_num_rx_queues = rxqs; if (netif_alloc_rx_queues(dev)) @@ -6318,7 +6389,7 @@ free_all: free_pcpu: free_percpu(dev->pcpu_refcnt); netif_free_tx_queues(dev); -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS kfree(dev->_rx); #endif @@ -6343,7 +6414,7 @@ void free_netdev(struct net_device *dev) release_net(dev_net(dev)); netif_free_tx_queues(dev); -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS kfree(dev->_rx); #endif @@ -6613,11 +6684,11 @@ static int dev_cpu_callback(struct notifier_block *nfb, /* Process offline CPU's input_pkt_queue */ while ((skb = __skb_dequeue(&oldsd->process_queue))) { - netif_rx(skb); + netif_rx_internal(skb); input_queue_head_incr(oldsd); } while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { - netif_rx(skb); + netif_rx_internal(skb); input_queue_head_incr(oldsd); } @@ -6930,28 +7001,18 @@ static int __init net_dev_init(void) for_each_possible_cpu(i) { struct softnet_data *sd = &per_cpu(softnet_data, i); - memset(sd, 0, sizeof(*sd)); skb_queue_head_init(&sd->input_pkt_queue); skb_queue_head_init(&sd->process_queue); - sd->completion_queue = NULL; INIT_LIST_HEAD(&sd->poll_list); - sd->output_queue = NULL; sd->output_queue_tailp = &sd->output_queue; #ifdef CONFIG_RPS sd->csd.func = rps_trigger_softirq; sd->csd.info = sd; - sd->csd.flags = 0; sd->cpu = i; #endif sd->backlog.poll = process_backlog; sd->backlog.weight = weight_p; - sd->backlog.gro_list = NULL; - sd->backlog.gro_count = 0; - -#ifdef CONFIG_NET_FLOW_LIMIT - sd->flow_limit = NULL; -#endif } dev_boot_phase = 0; |