diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-11 16:32:41 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-11 16:32:41 -0800 |
commit | 4162cf64973df51fc885825bc9ca4d055891c49f (patch) | |
tree | f218c7bd298f4d41be94d08a314eb9fbc3fcb4ea /net | |
parent | fb7b5a956992fdc53d0b9c8ea29b51b92839dc1b (diff) | |
parent | 343a8d13aae58dec562dbb5c7d48a53e9b847871 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6: (67 commits)
cxgb4vf: recover from failure in cxgb4vf_open()
netfilter: ebtables: make broute table work again
netfilter: fix race in conntrack between dump_table and destroy
ah: reload pointers to skb data after calling skb_cow_data()
ah: update maximum truncated ICV length
xfrm: check trunc_len in XFRMA_ALG_AUTH_TRUNC
ehea: Increase the skb array usage
net/fec: remove config FEC2 as it's used nowhere
pcnet_cs: add new_id
tcp: disallow bind() to reuse addr/port
net/r8169: Update the function of parsing firmware
net: ppp: use {get,put}_unaligned_be{16,32}
CAIF: Fix IPv6 support in receive path for GPRS/3G
arp: allow to invalidate specific ARP entries
net_sched: factorize qdisc stats handling
mlx4: Call alloc_etherdev to allocate RX and TX queues
net: Add alloc_netdev_mqs function
caif: don't set connection request param size before copying data
cxgb4vf: fix mailbox data/control coherency domain race
qlcnic: change module parameter permissions
...
Diffstat (limited to 'net')
45 files changed, 235 insertions, 279 deletions
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 1bf0cf50379..8184c031d02 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -740,12 +740,12 @@ static int setsockopt(struct socket *sock, if (cf_sk->sk.sk_protocol != CAIFPROTO_UTIL) return -ENOPROTOOPT; lock_sock(&(cf_sk->sk)); - cf_sk->conn_req.param.size = ol; if (ol > sizeof(cf_sk->conn_req.param.data) || copy_from_user(&cf_sk->conn_req.param.data, ov, ol)) { release_sock(&cf_sk->sk); return -EINVAL; } + cf_sk->conn_req.param.size = ol; release_sock(&cf_sk->sk); return 0; diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 84a422c9894..fa9dab372b6 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -76,6 +76,8 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) struct chnl_net *priv = container_of(layr, struct chnl_net, chnl); int pktlen; int err = 0; + const u8 *ip_version; + u8 buf; priv = container_of(layr, struct chnl_net, chnl); @@ -90,7 +92,21 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) * send the packet to the net stack. */ skb->dev = priv->netdev; - skb->protocol = htons(ETH_P_IP); + + /* check the version of IP */ + ip_version = skb_header_pointer(skb, 0, 1, &buf); + if (!ip_version) + return -EINVAL; + switch (*ip_version >> 4) { + case 4: + skb->protocol = htons(ETH_P_IP); + break; + case 6: + skb->protocol = htons(ETH_P_IPV6); + break; + default: + return -EINVAL; + } /* If we change the header in loop mode, the checksum is corrupted. */ if (priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP) diff --git a/net/core/dev.c b/net/core/dev.c index a215269d2e3..a3ef808b5e3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1732,33 +1732,6 @@ void netif_device_attach(struct net_device *dev) } EXPORT_SYMBOL(netif_device_attach); -static bool can_checksum_protocol(unsigned long features, __be16 protocol) -{ - return ((features & NETIF_F_NO_CSUM) || - ((features & NETIF_F_V4_CSUM) && - protocol == htons(ETH_P_IP)) || - ((features & NETIF_F_V6_CSUM) && - protocol == htons(ETH_P_IPV6)) || - ((features & NETIF_F_FCOE_CRC) && - protocol == htons(ETH_P_FCOE))); -} - -static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) -{ - __be16 protocol = skb->protocol; - int features = dev->features; - - if (vlan_tx_tag_present(skb)) { - features &= dev->vlan_features; - } else if (protocol == htons(ETH_P_8021Q)) { - struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; - protocol = veh->h_vlan_encapsulated_proto; - features &= dev->vlan_features; - } - - return can_checksum_protocol(features, protocol); -} - /** * skb_dev_set -- assign a new device to a buffer * @skb: buffer for the new device @@ -1971,16 +1944,14 @@ static void dev_gso_skb_destructor(struct sk_buff *skb) /** * dev_gso_segment - Perform emulated hardware segmentation on skb. * @skb: buffer to segment + * @features: device features as applicable to this skb * * This function segments the given skb and stores the list of segments * in skb->next. */ -static int dev_gso_segment(struct sk_buff *skb) +static int dev_gso_segment(struct sk_buff *skb, int features) { - struct net_device *dev = skb->dev; struct sk_buff *segs; - int features = dev->features & ~(illegal_highdma(dev, skb) ? - NETIF_F_SG : 0); segs = skb_gso_segment(skb, features); @@ -2017,22 +1988,52 @@ static inline void skb_orphan_try(struct sk_buff *skb) } } -int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev) +static bool can_checksum_protocol(unsigned long features, __be16 protocol) +{ + return ((features & NETIF_F_GEN_CSUM) || + ((features & NETIF_F_V4_CSUM) && + protocol == htons(ETH_P_IP)) || + ((features & NETIF_F_V6_CSUM) && + protocol == htons(ETH_P_IPV6)) || + ((features & NETIF_F_FCOE_CRC) && + protocol == htons(ETH_P_FCOE))); +} + +static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features) +{ + if (!can_checksum_protocol(protocol, features)) { + features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_SG; + } else if (illegal_highdma(skb->dev, skb)) { + features &= ~NETIF_F_SG; + } + + return features; +} + +int netif_skb_features(struct sk_buff *skb) { __be16 protocol = skb->protocol; + int features = skb->dev->features; if (protocol == htons(ETH_P_8021Q)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; - } else if (!skb->vlan_tci) - return dev->features; + } else if (!vlan_tx_tag_present(skb)) { + return harmonize_features(skb, protocol, features); + } - if (protocol != htons(ETH_P_8021Q)) - return dev->features & dev->vlan_features; - else - return 0; + features &= skb->dev->vlan_features; + + if (protocol != htons(ETH_P_8021Q)) { + return harmonize_features(skb, protocol, features); + } else { + features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | + NETIF_F_GEN_CSUM; + return harmonize_features(skb, protocol, features); + } } -EXPORT_SYMBOL(netif_get_vlan_features); +EXPORT_SYMBOL(netif_skb_features); /* * Returns true if either: @@ -2042,22 +2043,13 @@ EXPORT_SYMBOL(netif_get_vlan_features); * support DMA from it. */ static inline int skb_needs_linearize(struct sk_buff *skb, - struct net_device *dev) + int features) { - if (skb_is_nonlinear(skb)) { - int features = dev->features; - - if (vlan_tx_tag_present(skb)) - features &= dev->vlan_features; - - return (skb_has_frag_list(skb) && - !(features & NETIF_F_FRAGLIST)) || + return skb_is_nonlinear(skb) && + ((skb_has_frag_list(skb) && + !(features & NETIF_F_FRAGLIST)) || (skb_shinfo(skb)->nr_frags && - (!(features & NETIF_F_SG) || - illegal_highdma(dev, skb))); - } - - return 0; + !(features & NETIF_F_SG))); } int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, @@ -2067,6 +2059,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int rc = NETDEV_TX_OK; if (likely(!skb->next)) { + int features; + /* * If device doesnt need skb->dst, release it right now while * its hot in this cpu cache @@ -2079,8 +2073,10 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, skb_orphan_try(skb); + features = netif_skb_features(skb); + if (vlan_tx_tag_present(skb) && - !(dev->features & NETIF_F_HW_VLAN_TX)) { + !(features & NETIF_F_HW_VLAN_TX)) { skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); if (unlikely(!skb)) goto out; @@ -2088,13 +2084,13 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, skb->vlan_tci = 0; } - if (netif_needs_gso(dev, skb)) { - if (unlikely(dev_gso_segment(skb))) + if (netif_needs_gso(skb, features)) { + if (unlikely(dev_gso_segment(skb, features))) goto out_kfree_skb; if (skb->next) goto gso; } else { - if (skb_needs_linearize(skb, dev) && + if (skb_needs_linearize(skb, features) && __skb_linearize(skb)) goto out_kfree_skb; @@ -2105,7 +2101,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (skb->ip_summed == CHECKSUM_PARTIAL) { skb_set_transport_header(skb, skb_checksum_start_offset(skb)); - if (!dev_can_checksum(dev, skb) && + if (!(features & NETIF_F_ALL_CSUM) && skb_checksum_help(skb)) goto out_kfree_skb; } @@ -2301,7 +2297,10 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, */ if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) skb_dst_force(skb); - __qdisc_update_bstats(q, skb->len); + + qdisc_skb_cb(skb)->pkt_len = skb->len; + qdisc_bstats_update(q, skb); + if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { if (unlikely(contended)) { spin_unlock(&q->busylock); @@ -5621,18 +5620,20 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) } /** - * alloc_netdev_mq - allocate network device + * alloc_netdev_mqs - allocate network device * @sizeof_priv: size of private data to allocate space for * @name: device name format string * @setup: callback to initialize device - * @queue_count: the number of subqueues to allocate + * @txqs: the number of TX subqueues to allocate + * @rxqs: the number of RX subqueues to allocate * * Allocates a struct net_device with private data area for driver use * and performs basic initialization. Also allocates subquue structs - * for each queue on the device at the end of the netdevice. + * for each queue on the device. */ -struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, - void (*setup)(struct net_device *), unsigned int queue_count) +struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, + void (*setup)(struct net_device *), + unsigned int txqs, unsigned int rxqs) { struct net_device *dev; size_t alloc_size; @@ -5640,12 +5641,20 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, BUG_ON(strlen(name) >= sizeof(dev->name)); - if (queue_count < 1) { + if (txqs < 1) { pr_err("alloc_netdev: Unable to allocate device " "with zero queues.\n"); return NULL; } +#ifdef CONFIG_RPS + if (rxqs < 1) { + pr_err("alloc_netdev: Unable to allocate device " + "with zero RX queues.\n"); + return NULL; + } +#endif + alloc_size = sizeof(struct net_device); if (sizeof_priv) { /* ensure 32-byte alignment of private area */ @@ -5676,14 +5685,14 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev_net_set(dev, &init_net); - dev->num_tx_queues = queue_count; - dev->real_num_tx_queues = queue_count; + dev->num_tx_queues = txqs; + dev->real_num_tx_queues = txqs; if (netif_alloc_netdev_queues(dev)) goto free_pcpu; #ifdef CONFIG_RPS - dev->num_rx_queues = queue_count; - dev->real_num_rx_queues = queue_count; + dev->num_rx_queues = rxqs; + dev->real_num_rx_queues = rxqs; if (netif_alloc_rx_queues(dev)) goto free_pcpu; #endif @@ -5711,7 +5720,7 @@ free_p: kfree(p); return NULL; } -EXPORT_SYMBOL(alloc_netdev_mq); +EXPORT_SYMBOL(alloc_netdev_mqs); /** * free_netdev - free network device diff --git a/net/core/filter.c b/net/core/filter.c index 2b27d4efdd4..afc58374ca9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -158,7 +158,7 @@ EXPORT_SYMBOL(sk_filter); /** * sk_run_filter - run a filter on a socket * @skb: buffer to run the filter on - * @filter: filter to apply + * @fentry: filter to apply * * Decode and apply filter instructions to the skb->data. * Return length to keep, 0 for none. @skb is the data we are diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 750db57f3bb..a5f7535aab5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1820,7 +1820,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN)) return -EPERM; - if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { + if (kind == 2 && (nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { struct sock *rtnl; rtnl_dumpit_func dumpit; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 45087052d89..5fdb0722901 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -426,7 +426,8 @@ static inline void dccp_update_gsr(struct sock *sk, u64 seq) { struct dccp_sock *dp = dccp_sk(sk); - dp->dccps_gsr = seq; + if (after48(seq, dp->dccps_gsr)) + dp->dccps_gsr = seq; /* Sequence validity window depends on remote Sequence Window (7.5.1) */ dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4); /* diff --git a/net/dccp/input.c b/net/dccp/input.c index 15af247ea00..8cde009e8b8 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -260,7 +260,7 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) */ if (time_before(now, (dp->dccps_rate_last + sysctl_dccp_sync_ratelimit))) - return 0; + return -1; DCCP_WARN("Step 6 failed for %s packet, " "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 563943822e5..42348824ee3 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -21,7 +21,8 @@ /* Boundary values */ static int zero = 0, u8_max = 0xFF; -static unsigned long seqw_min = 32; +static unsigned long seqw_min = DCCPF_SEQ_WMIN, + seqw_max = 0xFFFFFFFF; /* maximum on 32 bit */ static struct ctl_table dccp_default_table[] = { { @@ -31,6 +32,7 @@ static struct ctl_table dccp_default_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, .extra1 = &seqw_min, /* RFC 4340, 7.5.2 */ + .extra2 = &seqw_max, }, { .procname = "rx_ccid", diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index f00ef2f1d81..f9d7ac924f1 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -347,10 +347,11 @@ void ether_setup(struct net_device *dev) EXPORT_SYMBOL(ether_setup); /** - * alloc_etherdev_mq - Allocates and sets up an Ethernet device + * alloc_etherdev_mqs - Allocates and sets up an Ethernet device * @sizeof_priv: Size of additional driver-private structure to be allocated * for this Ethernet device - * @queue_count: The number of queues this device has. + * @txqs: The number of TX queues this device has. + * @txqs: The number of RX queues this device has. * * Fill in the fields of the device structure with Ethernet-generic * values. Basically does everything except registering the device. @@ -360,11 +361,12 @@ EXPORT_SYMBOL(ether_setup); * this private data area. */ -struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count) +struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs, + unsigned int rxqs) { - return alloc_netdev_mq(sizeof_priv, "eth%d", ether_setup, queue_count); + return alloc_netdev_mqs(sizeof_priv, "eth%d", ether_setup, txqs, rxqs); } -EXPORT_SYMBOL(alloc_etherdev_mq); +EXPORT_SYMBOL(alloc_etherdev_mqs); static size_t _format_mac_addr(char *buf, int buflen, const unsigned char *addr, int len) diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 880a5ec6dce..86961bec70a 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -314,14 +314,15 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) skb->ip_summed = CHECKSUM_NONE; - ah = (struct ip_auth_hdr *)skb->data; - iph = ip_hdr(skb); - ihl = ip_hdrlen(skb); if ((err = skb_cow_data(skb, 0, &trailer)) < 0) goto out; nfrags = err; + ah = (struct ip_auth_hdr *)skb->data; + iph = ip_hdr(skb); + ihl = ip_hdrlen(skb); + work_iph = ah_alloc_tmp(ahash, nfrags, ihl + ahp->icv_trunc_len); if (!work_iph) goto out; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index a2fc7b961db..04c8b69fd42 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1143,6 +1143,23 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) return err; } +int arp_invalidate(struct net_device *dev, __be32 ip) +{ + struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev); + int err = -ENXIO; + + if (neigh) { + if (neigh->nud_state & ~NUD_NOARP) + err = neigh_update(neigh, NULL, NUD_FAILED, + NEIGH_UPDATE_F_OVERRIDE| + NEIGH_UPDATE_F_ADMIN); + neigh_release(neigh); + } + + return err; +} +EXPORT_SYMBOL(arp_invalidate); + static int arp_req_delete_public(struct net *net, struct arpreq *r, struct net_device *dev) { @@ -1163,7 +1180,6 @@ static int arp_req_delete(struct net *net, struct arpreq *r, { int err; __be32 ip; - struct neighbour *neigh; if (r->arp_flags & ATF_PUBL) return arp_req_delete_public(net, r, dev); @@ -1181,16 +1197,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r, if (!dev) return -EINVAL; } - err = -ENXIO; - neigh = neigh_lookup(&arp_tbl, &ip, dev); - if (neigh) { - if (neigh->nud_state & ~NUD_NOARP) - err = neigh_update(neigh, NULL, NUD_FAILED, - NEIGH_UPDATE_F_OVERRIDE| - NEIGH_UPDATE_F_ADMIN); - neigh_release(neigh); - } - return err; + return arp_invalidate(dev, ip); } /* diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 25e318153f1..97e5fb76526 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -73,7 +73,7 @@ int inet_csk_bind_conflict(const struct sock *sk, !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { if (!reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) { + ((1 << sk2->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) { const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || sk2_rcv_saddr == sk_rcv_saddr(sk)) @@ -122,7 +122,8 @@ again: (tb->num_owners < smallest_size || smallest_size == -1)) { smallest_size = tb->num_owners; smallest_rover = rover; - if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) { + if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 && + !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { spin_unlock(&head->lock); snum = smallest_rover; goto have_snum; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 2ada17129fc..2746c1fa641 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -858,7 +858,7 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) nlmsg_len(nlh) < hdrlen) return -EINVAL; - if (nlh->nlmsg_flags & NLM_F_DUMP) { + if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { if (nlmsg_attrlen(nlh, hdrlen)) { struct nlattr *attr; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 3fac340a28d..e855fffaed9 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -710,42 +710,25 @@ static void get_counters(const struct xt_table_info *t, struct arpt_entry *iter; unsigned int cpu; unsigned int i; - unsigned int curcpu = get_cpu(); - - /* Instead of clearing (by a previous call to memset()) - * the counters and using adds, we set the counters - * with data used by 'current' CPU - * - * Bottom half has to be disabled to prevent deadlock - * if new softirq were to run and call ipt_do_table - */ - local_bh_disable(); - i = 0; - xt_entry_foreach(iter, t->entries[curcpu], t->size) { - SET_COUNTER(counters[i], iter->counters.bcnt, - iter->counters.pcnt); - ++i; - } - local_bh_enable(); - /* Processing counters from other cpus, we can let bottom half enabled, - * (preemption is disabled) - */ for_each_possible_cpu(cpu) { - if (cpu == curcpu) - continue; + seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; + i = 0; - local_bh_disable(); - xt_info_wrlock(cpu); xt_entry_foreach(iter, t->entries[cpu], t->size) { - ADD_COUNTER(counters[i], iter->counters.bcnt, - iter->counters.pcnt); + u64 bcnt, pcnt; + unsigned int start; + + do { + start = read_seqbegin(lock); + bcnt = iter->counters.bcnt; + pcnt = iter->counters.pcnt; + } while (read_seqretry(lock, start)); + + ADD_COUNTER(counters[i], bcnt, pcnt); ++i; } - xt_info_wrunlock(cpu); - local_bh_enable(); } - put_cpu(); } static struct xt_counters *alloc_counters(const struct xt_table *table) @@ -759,7 +742,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) * about). */ countersize = sizeof(struct xt_counters) * private->number; - counters = vmalloc(countersize); + counters = vzalloc(countersize); if (counters == NULL) return ERR_PTR(-ENOMEM); @@ -1007,7 +990,7 @@ static int __do_replace(struct net *net, const char *name, struct arpt_entry *iter; ret = 0; - counters = vmalloc(num_counters * sizeof(struct xt_counters)); + counters = vzalloc(num_counters * sizeof(struct xt_counters)); if (!counters) { ret = -ENOMEM; goto out; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index a846d633b3b..652efea013d 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -884,42 +884,25 @@ get_counters(const struct xt_table_info *t, struct ipt_entry *iter; unsigned int cpu; unsigned int i; - unsigned int curcpu = get_cpu(); - - /* Instead of clearing (by a previous call to memset()) - * the counters and using adds, we set the counters - * with data used by 'current' CPU. - * - * Bottom half has to be disabled to prevent deadlock - * if new softirq were to run and call ipt_do_table - */ - local_bh_disable(); - i = 0; - xt_entry_foreach(iter, t->entries[curcpu], t->size) { - SET_COUNTER(counters[i], iter->counters.bcnt, - iter->counters.pcnt); - ++i; - } - local_bh_enable(); - /* Processing counters from other cpus, we can let bottom half enabled, - * (preemption is disabled) - */ for_each_possible_cpu(cpu) { - if (cpu == curcpu) - continue; + seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; + i = 0; - local_bh_disable(); - xt_info_wrlock(cpu); xt_entry_foreach(iter, t->entries[cpu], t->size) { - ADD_COUNTER(counters[i], iter->counters.bcnt, - iter->counters.pcnt); + u64 bcnt, pcnt; + unsigned int start; + + do { + start = read_seqbegin(lock); + bcnt = iter->counters.bcnt; + pcnt = iter->counters.pcnt; + } while (read_seqretry(lock, start)); + + ADD_COUNTER(counters[i], bcnt, pcnt); ++i; /* macro does multi eval of i |