diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/datagram.c | 24 | ||||
-rw-r--r-- | net/core/dev.c | 347 | ||||
-rw-r--r-- | net/core/dev_addr_lists.c | 4 | ||||
-rw-r--r-- | net/core/dst.c | 15 | ||||
-rw-r--r-- | net/core/ethtool.c | 20 | ||||
-rw-r--r-- | net/core/fib_rules.c | 13 | ||||
-rw-r--r-- | net/core/filter.c | 4 | ||||
-rw-r--r-- | net/core/flow.c | 50 | ||||
-rw-r--r-- | net/core/kmap_skb.h | 2 | ||||
-rw-r--r-- | net/core/link_watch.c | 9 | ||||
-rw-r--r-- | net/core/neighbour.c | 44 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 12 | ||||
-rw-r--r-- | net/core/netpoll.c | 4 | ||||
-rw-r--r-- | net/core/pktgen.c | 25 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 34 | ||||
-rw-r--r-- | net/core/scm.c | 10 | ||||
-rw-r--r-- | net/core/secure_seq.c | 2 | ||||
-rw-r--r-- | net/core/skbuff.c | 196 | ||||
-rw-r--r-- | net/core/sock.c | 24 | ||||
-rw-r--r-- | net/core/timestamping.c | 12 | ||||
-rw-r--r-- | net/core/user_dma.c | 6 |
21 files changed, 570 insertions, 287 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c index 18ac112ea7a..68bbf9f65cb 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -324,15 +324,15 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, /* Copy paged appendix. Hmm... why does this look so complicated? */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { int err; u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; @@ -410,15 +410,15 @@ int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset, /* Copy paged appendix. Hmm... why does this look so complicated? */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { int err; u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; @@ -500,15 +500,15 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, /* Copy paged appendix. Hmm... why does this look so complicated? */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { int err; u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; @@ -585,16 +585,16 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { __wsum csum2; int err = 0; u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; diff --git a/net/core/dev.c b/net/core/dev.c index 17d67b579be..edcf019c056 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -133,6 +133,10 @@ #include <linux/pci.h> #include <linux/inetdevice.h> #include <linux/cpu_rmap.h> +#include <linux/if_tunnel.h> +#include <linux/if_pppox.h> +#include <linux/ppp_defs.h> +#include <linux/net_tstamp.h> #include "net-sysfs.h" @@ -1474,6 +1478,57 @@ static inline void net_timestamp_check(struct sk_buff *skb) __net_timestamp(skb); } +static int net_hwtstamp_validate(struct ifreq *ifr) +{ + struct hwtstamp_config cfg; + enum hwtstamp_tx_types tx_type; + enum hwtstamp_rx_filters rx_filter; + int tx_type_valid = 0; + int rx_filter_valid = 0; + + if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) + return -EFAULT; + + if (cfg.flags) /* reserved for future extensions */ + return -EINVAL; + + tx_type = cfg.tx_type; + rx_filter = cfg.rx_filter; + + switch (tx_type) { + case HWTSTAMP_TX_OFF: + case HWTSTAMP_TX_ON: + case HWTSTAMP_TX_ONESTEP_SYNC: + tx_type_valid = 1; + break; + } + + switch (rx_filter) { + case HWTSTAMP_FILTER_NONE: + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + rx_filter_valid = 1; + break; + } + + if (!tx_type_valid || !rx_filter_valid) + return -ERANGE; + + return 0; +} + static inline bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb) { @@ -1515,6 +1570,14 @@ static inline bool is_skb_forwardable(struct net_device *dev, */ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, GFP_ATOMIC)) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + } + skb_orphan(skb); nf_reset(skb); @@ -1947,9 +2010,11 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) #ifdef CONFIG_HIGHMEM int i; if (!(dev->features & NETIF_F_HIGHDMA)) { - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - if (PageHighMem(skb_shinfo(skb)->frags[i].page)) + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + if (PageHighMem(skb_frag_page(frag))) return 1; + } } if (PCI_DMA_BUS_IS_PHYS) { @@ -1958,7 +2023,8 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) if (!pdev) return 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page); + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + dma_addr_t addr = page_to_phys(skb_frag_page(frag)); if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask) return 1; } @@ -2519,25 +2585,31 @@ static inline void ____napi_schedule(struct softnet_data *sd, /* * __skb_get_rxhash: calculate a flow hash based on src/dst addresses - * and src/dst port numbers. Returns a non-zero hash number on success - * and 0 on failure. + * and src/dst port numbers. Sets rxhash in skb to non-zero hash value + * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb + * if hash is a canonical 4-tuple hash over transport ports. */ -__u32 __skb_get_rxhash(struct sk_buff *skb) +void __skb_get_rxhash(struct sk_buff *skb) { int nhoff, hash = 0, poff; const struct ipv6hdr *ip6; const struct iphdr *ip; + const struct vlan_hdr *vlan; u8 ip_proto; - u32 addr1, addr2, ihl; + u32 addr1, addr2; + u16 proto; union { u32 v32; u16 v16[2]; } ports; nhoff = skb_network_offset(skb); + proto = skb->protocol; - switch (skb->protocol) { +again: + switch (proto) { case __constant_htons(ETH_P_IP): +ip: if (!pskb_may_pull(skb, sizeof(*ip) + nhoff)) goto done; @@ -2548,9 +2620,10 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) ip_proto = ip->protocol; addr1 = (__force u32) ip->saddr; addr2 = (__force u32) ip->daddr; - ihl = ip->ihl; + nhoff += ip->ihl * 4; break; case __constant_htons(ETH_P_IPV6): +ipv6: if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff)) goto done; @@ -2558,20 +2631,71 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) ip_proto = ip6->nexthdr; addr1 = (__force u32) ip6->saddr.s6_addr32[3]; addr2 = (__force u32) ip6->daddr.s6_addr32[3]; - ihl = (40 >> 2); + nhoff += 40; break; + case __constant_htons(ETH_P_8021Q): + if (!pskb_may_pull(skb, sizeof(*vlan) + nhoff)) + goto done; + vlan = (const struct vlan_hdr *) (skb->data + nhoff); + proto = vlan->h_vlan_encapsulated_proto; + nhoff += sizeof(*vlan); + goto again; + case __constant_htons(ETH_P_PPP_SES): + if (!pskb_may_pull(skb, PPPOE_SES_HLEN + nhoff)) + goto done; + proto = *((__be16 *) (skb->data + nhoff + + sizeof(struct pppoe_hdr))); + nhoff += PPPOE_SES_HLEN; + switch (proto) { + case __constant_htons(PPP_IP): + goto ip; + case __constant_htons(PPP_IPV6): + goto ipv6; + default: + goto done; + } default: goto done; } + switch (ip_proto) { + case IPPROTO_GRE: + if (pskb_may_pull(skb, nhoff + 16)) { + u8 *h = skb->data + nhoff; + __be16 flags = *(__be16 *)h; + + /* + * Only look inside GRE if version zero and no + * routing + */ + if (!(flags & (GRE_VERSION|GRE_ROUTING))) { + proto = *(__be16 *)(h + 2); + nhoff += 4; + if (flags & GRE_CSUM) + nhoff += 4; + if (flags & GRE_KEY) + nhoff += 4; + if (flags & GRE_SEQ) + nhoff += 4; + goto again; + } + } + break; + case IPPROTO_IPIP: + goto again; + default: + break; + } + ports.v32 = 0; poff = proto_ports_offset(ip_proto); if (poff >= 0) { - nhoff += ihl * 4 + poff; + nhoff += poff; if (pskb_may_pull(skb, nhoff + 4)) { ports.v32 = * (__force u32 *) (skb->data + nhoff); if (ports.v16[1] < ports.v16[0]) swap(ports.v16[0], ports.v16[1]); + skb->l4_rxhash = 1; } } @@ -2584,7 +2708,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) hash = 1; done: - return hash; + skb->rxhash = hash; } EXPORT_SYMBOL(__skb_get_rxhash); @@ -2598,10 +2722,7 @@ static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow *rflow, u16 next_cpu) { - u16 tcpu; - - tcpu = rflow->cpu = next_cpu; - if (tcpu != RPS_NO_CPU) { + if (next_cpu != RPS_NO_CPU) { #ifdef CONFIG_RFS_ACCEL struct netdev_rx_queue *rxqueue; struct rps_dev_flow_table *flow_table; @@ -2629,16 +2750,16 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, goto out; old_rflow = rflow; rflow = &flow_table->flows[flow_id]; - rflow->cpu = next_cpu; rflow->filter = rc; if (old_rflow->filter == rflow->filter) old_rflow->filter = RPS_NO_FILTER; out: #endif rflow->last_qtail = - per_cpu(softnet_data, tcpu).input_queue_head; + per_cpu(softnet_data, next_cpu).input_queue_head; } + rflow->cpu = next_cpu; return rflow; } @@ -2673,13 +2794,13 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, map = rcu_dereference(rxqueue->rps_map); if (map) { if (map->len == 1 && - !rcu_dereference_raw(rxqueue->rps_flow_table)) { + !rcu_access_pointer(rxqueue->rps_flow_table)) { tcpu = map->cpus[0]; if (cpu_online(tcpu)) cpu = tcpu; goto done; } - } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) { + } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) { goto done; } @@ -3094,8 +3215,8 @@ void netdev_rx_handler_unregister(struct net_device *dev) { ASSERT_RTNL(); - rcu_assign_pointer(dev->rx_handler, NULL); - rcu_assign_pointer(dev->rx_handler_data, NULL); + RCU_INIT_POINTER(dev->rx_handler, NULL); + RCU_INIT_POINTER(dev->rx_handler_data, NULL); } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); @@ -3162,6 +3283,17 @@ another_round: ncls: #endif + if (vlan_tx_tag_present(skb)) { + if (pt_prev) { + ret = deliver_skb(skb, pt_prev, orig_dev); + pt_prev = NULL; + } + if (vlan_do_receive(&skb)) + goto another_round; + else if (unlikely(!skb)) + goto out; + } + rx_handler = rcu_dereference(skb->dev->rx_handler); if (rx_handler) { if (pt_prev) { @@ -3182,18 +3314,6 @@ ncls: } } - if (vlan_tx_tag_present(skb)) { - if (pt_prev) { - ret = deliver_skb(skb, pt_prev, orig_dev); - pt_prev = NULL; - } - if (vlan_do_receive(&skb)) { - ret = __netif_receive_skb(skb); - goto out; - } else if (unlikely(!skb)) - goto out; - } - /* deliver only exact match when indicated */ null_or_dev = deliver_exact ? skb->dev : NULL; @@ -3421,10 +3541,10 @@ pull: skb->data_len -= grow; skb_shinfo(skb)->frags[0].page_offset += grow; - skb_shinfo(skb)->frags[0].size -= grow; + skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow); - if (unlikely(!skb_shinfo(skb)->frags[0].size)) { - put_page(skb_shinfo(skb)->frags[0].page); + if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) { + skb_frag_unref(skb, 0); memmove(skb_shinfo(skb)->frags, skb_shinfo(skb)->frags + 1, --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); @@ -3488,11 +3608,10 @@ void skb_gro_reset_offset(struct sk_buff *skb) NAPI_GRO_CB(skb)->frag0_len = 0; if (skb->mac_header == skb->tail && - !PageHighMem(skb_shinfo(skb)->frags[0].page)) { + !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) { NAPI_GRO_CB(skb)->frag0 = - page_address(skb_shinfo(skb)->frags[0].page) + - skb_shinfo(skb)->frags[0].page_offset; - NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size; + skb_frag_address(&skb_shinfo(skb)->frags[0]); + NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]); } } EXPORT_SYMBOL(skb_gro_reset_offset); @@ -3974,6 +4093,60 @@ static int dev_ifconf(struct net *net, char __user *arg) } #ifdef CONFIG_PROC_FS + +#define BUCKET_SPACE (32 - NETDEV_HASHBITS) + +struct dev_iter_state { + struct seq_net_private p; + unsigned int pos; /* bucket << BUCKET_SPACE + offset */ +}; + +#define get_bucket(x) ((x) >> BUCKET_SPACE) +#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1)) +#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) + +static inline struct net_device *dev_from_same_bucket(struct seq_file *seq) +{ + struct dev_iter_state *state = seq->private; + struct net *net = seq_file_net(seq); + struct net_device *dev; + struct hlist_node *p; + struct hlist_head *h; + unsigned int count, bucket, offset; + + bucket = get_bucket(state->pos); + offset = get_offset(state->pos); + h = &net->dev_name_head[bucket]; + count = 0; + hlist_for_each_entry_rcu(dev, p, h, name_hlist) { + if (count++ == offset) { + state->pos = set_bucket_offset(bucket, count); + return dev; + } + } + + return NULL; +} + +static inline struct net_device *dev_from_new_bucket(struct seq_file *seq) +{ + struct dev_iter_state *state = seq->private; + struct net_device *dev; + unsigned int bucket; + + bucket = get_bucket(state->pos); + do { + dev = dev_from_same_bucket(seq); + if (dev) + return dev; + + bucket++; + state->pos = set_bucket_offset(bucket, 0); + } while (bucket < NETDEV_HASHENTRIES); + + return NULL; +} + /* * This is invoked by the /proc filesystem handler to display a device * in detail. @@ -3981,33 +4154,33 @@ static int dev_ifconf(struct net *net, char __user *arg) void *dev_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { - struct net *net = seq_file_net(seq); - loff_t off; - struct net_device *dev; + struct dev_iter_state *state = seq->private; rcu_read_lock(); if (!*pos) return SEQ_START_TOKEN; - off = 1; - for_each_netdev_rcu(net, dev) - if (off++ == *pos) - return dev; + /* check for end of the hash */ + if (state->pos == 0 && *pos > 1) + return NULL; - return NULL; + return dev_from_new_bucket(seq); } void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct net_device *dev = v; + struct net_device *dev; + + ++*pos; if (v == SEQ_START_TOKEN) - dev = first_net_device_rcu(seq_file_net(seq)); - else - dev = next_net_device_rcu(dev); + return dev_from_new_bucket(seq); - ++*pos; - return dev; + dev = dev_from_same_bucket(seq); + if (dev) + return dev; + + return dev_from_new_bucket(seq); } void dev_seq_stop(struct seq_file *seq, void *v) @@ -4106,7 +4279,7 @@ static const struct seq_operations dev_seq_ops = { static int dev_seq_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &dev_seq_ops, - sizeof(struct seq_net_private)); + sizeof(struct dev_iter_state)); } static const struct file_operations dev_seq_fops = { @@ -4489,9 +4662,7 @@ void __dev_set_rx_mode(struct net_device *dev) if (!netif_device_present(dev)) return; - if (ops->ndo_set_rx_mode) - ops->ndo_set_rx_mode(dev); - else { + if (!(dev->priv_flags & IFF_UNICAST_FLT)) { /* Unicast addresses changes may only happen under the rtnl, * therefore calling __dev_set_promiscuity here is safe. */ @@ -4502,10 +4673,10 @@ void __dev_set_rx_mode(struct net_device *dev) __dev_set_promiscuity(dev, -1); dev->uc_promisc = false; } - - if (ops->ndo_set_multicast_list) - ops->ndo_set_multicast_list(dev); } + + if (ops->ndo_set_rx_mode) + ops->ndo_set_rx_mode(dev); } void dev_set_rx_mode(struct net_device *dev) @@ -4516,30 +4687,6 @@ void dev_set_rx_mode(struct net_device *dev) } /** - * dev_ethtool_get_settings - call device's ethtool_ops::get_settings() - * @dev: device - * @cmd: memory area for ethtool_ops::get_settings() result - * - * The cmd arg is initialized properly (cleared and - * ethtool_cmd::cmd field set to ETHTOOL_GSET). - * - * Return device's ethtool_ops::get_settings() result value or - * -EOPNOTSUPP when device doesn't expose - * ethtool_ops::get_settings() operation. - */ -int dev_ethtool_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) -{ - if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) - return -EOPNOTSUPP; - - memset(cmd, 0, sizeof(struct ethtool_cmd)); - cmd->cmd = ETHTOOL_GSET; - return dev->ethtool_ops->get_settings(dev, cmd); -} -EXPORT_SYMBOL(dev_ethtool_get_settings); - -/** * dev_get_flags - get flags reported to userspace * @dev: device * @@ -4855,7 +5002,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) return -EOPNOTSUPP; case SIOCADDMULTI: - if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || + if (!ops->ndo_set_rx_mode || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) @@ -4863,7 +5010,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); case SIOCDELMULTI: - if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || + if (!ops->ndo_set_rx_mode || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) @@ -4880,6 +5027,12 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) ifr->ifr_newname[IFNAMSIZ-1] = '\0'; return dev_change_name(dev, ifr->ifr_newname); + case SIOCSHWTSTAMP: + err = net_hwtstamp_validate(ifr); + if (err) + return err; + /* fall through */ + /* * Unknown or private ioctl */ @@ -5194,7 +5347,7 @@ static void rollback_registered_many(struct list_head *head) dev = list_first_entry(head, struct net_device, unreg_list); call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); - rcu_barrier(); + synchronize_net(); list_for_each_entry(dev, head, unreg_list) dev_put(dev); @@ -5707,6 +5860,12 @@ void netdev_run_todo(void) __rtnl_unlock(); + /* Wait for rcu callbacks to finish before attempting to drain + * the device list. This usually avoids a 250ms wait. + */ + if (!list_empty(&list)) + rcu_barrier(); + while (!list_empty(&list)) { struct net_device *dev = list_first_entry(&list, struct net_device, todo_list); @@ -5727,8 +5886,8 @@ void netdev_run_todo(void) /* paranoia */ BUG_ON(netdev_refcnt_read(dev)); - WARN_ON(rcu_dereference_raw(dev->ip_ptr)); - WARN_ON(rcu_dereference_raw(dev->ip6_ptr)); + WARN_ON(rcu_access_pointer(dev->ip_ptr)); + WARN_ON(rcu_access_pointer(dev->ip6_ptr)); WARN_ON(dev->dn_ptr); if (dev->destructor) @@ -5932,7 +6091,7 @@ void free_netdev(struct net_device *dev) kfree(dev->_rx); #endif - kfree(rcu_dereference_raw(dev->ingress_queue)); + kfree(rcu_dereference_protected(dev->ingress_queue, 1)); /* Flush device addresses */ dev_addr_flush(dev); @@ -6107,6 +6266,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); + rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); /* * Flush the unicast and multicast chains @@ -6290,7 +6450,7 @@ const char *netdev_drivername(const struct net_device *dev) return empty; } -static int __netdev_printk(const char *level, const struct net_device *dev, +int __netdev_printk(const char *level, const struct net_device *dev, struct va_format *vaf) { int r; @@ -6305,6 +6465,7 @@ static int __netdev_printk(const char *level, const struct net_device *dev, return r; } +EXPORT_SYMBOL(__netdev_printk); int netdev_printk(const char *level, const struct net_device *dev, const char *format, ...) diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index e2e66939ed0..283d1b86387 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -591,8 +591,8 @@ EXPORT_SYMBOL(dev_mc_del_global); * addresses that have no users left. The source device must be * locked by netif_tx_lock_bh. * - * This function is intended to be called from the dev->set_multicast_list - * or dev->set_rx_mode function of layered software devices. + * This function is intended to be called from the ndo_set_rx_mode + * function of layered software devices. */ int dev_mc_sync(struct net_device *to, struct net_device *from) { diff --git a/net/core/dst.c b/net/core/dst.c index 14b33baf073..d5e2c4c0910 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -171,7 +171,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst_init_metrics(dst, dst_default_metrics, true); dst->expires = 0UL; dst->path = dst; - dst->_neighbour = NULL; + RCU_INIT_POINTER(dst->_neighbour, NULL); #ifdef CONFIG_XFRM dst->xfrm = NULL; #endif @@ -229,11 +229,11 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) smp_rmb(); again: - neigh = dst->_neighbour; + neigh = rcu_dereference_protected(dst->_neighbour, 1); child = dst->child; if (neigh) { - dst->_neighbour = NULL; + RCU_INIT_POINTER(dst->_neighbour, NULL); neigh_release(neigh); } @@ -360,14 +360,19 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, if (!unregister) { dst->input = dst->output = dst_discard; } else { + struct neighbour *neigh; + dst->dev = dev_net(dst->dev)->loopback_dev; dev_hold(dst->dev); dev_put(dev); - if (dst->_neighbour && dst->_neighbour->dev == dev) { - dst->_neighbour->dev = dst->dev; + rcu_read_lock(); + neigh = dst_get_neighbour(dst); + if (neigh && neigh->dev == dev) { + neigh->dev = dst->dev; dev_hold(dst->dev); dev_put(dev); } + rcu_read_unlock(); } } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 6cdba5fc2be..f4448170712 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -569,15 +569,25 @@ int __ethtool_set_flags(struct net_device *dev, u32 data) return 0; } -static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) +int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { - struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; - int err; + ASSERT_RTNL(); - if (!dev->ethtool_ops->get_settings) + if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) return -EOPNOTSUPP; - err = dev->ethtool_ops->get_settings(dev, &cmd); + memset(cmd, 0, sizeof(struct ethtool_cmd)); + cmd->cmd = ETHTOOL_GSET; + return dev->ethtool_ops->get_settings(dev, cmd); +} +EXPORT_SYMBOL(__ethtool_get_settings); + +static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) +{ + int err; + struct ethtool_cmd cmd; + + err = __ethtool_get_settings(dev, &cmd); if (err < 0) return err; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index e7ab0c0285b..57e8f95110e 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -384,8 +384,8 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) */ list_for_each_entry(r, &ops->rules_list, list) { if (r->action == FR_ACT_GOTO && - r->target == rule->pref) { - BUG_ON(rtnl_dereference(r->ctarget) != NULL); + r->target == rule->pref && + rtnl_dereference(r->ctarget) == NULL) { rcu_assign_pointer(r->ctarget, rule); if (--ops->unresolved_rules == 0) break; @@ -475,8 +475,11 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) list_del_rcu(&rule->list); - if (rule->action == FR_ACT_GOTO) + if (rule->action == FR_ACT_GOTO) { ops->nr_goto_rules--; + if (rtnl_dereference(rule->ctarget) == NULL) + ops->unresolved_rules--; + } /* * Check if this rule is a target to any of them. If so, @@ -487,7 +490,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (ops->nr_goto_rules > 0) { list_for_each_entry(tmp, &ops->rules_list, list) { if (rtnl_dereference(tmp->ctarget) == rule) { - rcu_assign_pointer(tmp->ctarget, NULL); + RCU_INIT_POINTER(tmp->ctarget, NULL); ops->unresolved_rules++; } } @@ -545,7 +548,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh->flags = rule->flags; if (rule->action == FR_ACT_GOTO && - rcu_dereference_raw(rule->ctarget) == NULL) + rcu_access_pointer(rule->ctarget) == NULL) frh->flags |= FIB_RULE_UNRESOLVED; if (rule->iifname[0]) { diff --git a/net/core/filter.c b/net/core/filter.c index 36f975fa87c..5dea4527921 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -436,7 +436,7 @@ error: * * Returns 0 if the rule set is legal or -EINVAL if not. */ -int sk_chk_filter(struct sock_filter *filter, int flen) +int sk_chk_filter(struct sock_filter *filter, unsigned int flen) { /* * Valid instructions are initialized to non-0. @@ -645,7 +645,7 @@ int sk_detach_filter(struct sock *sk) filter = rcu_dereference_protected(sk->sk_filter, sock_owned_by_user(sk)); if (filter) { - rcu_assign_pointer(sk->sk_filter, NULL); + RCU_INIT_POINTER(sk->sk_filter, NULL); sk_filter_uncharge(sk, filter); ret = 0; } diff --git a/net/core/flow.c b/net/core/flow.c index bf32c33cad3..8ae42de9c79 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -30,6 +30,7 @@ struct flow_cache_entry { struct hlist_node hlist; struct list_head gc_list; } u; + struct net *net; u16 family; u8 dir; u32 genid; @@ -172,29 +173,26 @@ static void flow_new_hash_rnd(struct flow_cache *fc, static u32 flow_hash_code(struct flow_cache *fc, struct flow_cache_percpu *fcp, - const struct flowi *key) + const struct flowi *key, + size_t keysize) { const u32 *k = (const u32 *) key; + const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32); - return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) + return jhash2(k, length, fcp->hash_rnd) & (flow_cache_hash_size(fc) - 1); } -typedef unsigned long flow_compare_t; - /* I hear what you're saying, use memcmp. But memcmp cannot make - * important assumptions that we can here, such as alignment and - * constant size. + * important assumptions that we can here, such as alignment. */ -static int flow_key_compare(const struct flowi *key1, const struct flowi *key2) +static int flow_key_compare(const struct flowi *key1, const struct flowi *key2, + size_t keysize) { const flow_compare_t *k1, *k1_lim, *k2; - const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t); - - BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t)); k1 = (const flow_compare_t *) key1; - k1_lim = k1 + n_elem; + k1_lim = k1 + keysize; k2 = (const flow_compare_t *) key2; @@ -215,6 +213,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, struct flow_cache_entry *fle, *tfle; struct hlist_node *entry; struct flow_cache_object *flo; + size_t keysize; unsigned int hash; local_bh_ |