aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-18 14:40:30 -0700
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-18 14:40:30 -0700
commita57793651ff1a09ef18bade998632435ca2dc13f (patch)
treefffc839d7b001f196421f09f0a06491588835fe1 /net
parent9cf52b2921fbe62566b6b2ee79f71203749c9e5e (diff)
parent52f095ee88d8851866bc7694ab991ca5abf21d5e (diff)
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
* 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6: (51 commits) [IPV6]: Fix again the fl6_sock_lookup() fixed locking [NETFILTER]: nf_conntrack_tcp: fix connection reopening fix [IPV6]: Fix race in ipv6_flowlabel_opt() when inserting two labels [IPV6]: Lost locking in fl6_sock_lookup [IPV6]: Lost locking when inserting a flowlabel in ipv6_fl_list [NETFILTER]: xt_sctp: fix mistake to pass a pointer where array is required [NET]: Fix OOPS due to missing check in dev_parse_header(). [TCP]: Remove lost_retrans zero seqno special cases [NET]: fix carrier-on bug? [NET]: Fix uninitialised variable in ip_frag_reasm() [IPSEC]: Rename mode to outer_mode and add inner_mode [IPSEC]: Disallow combinations of RO and AH/ESP/IPCOMP [IPSEC]: Use the top IPv4 route's peer instead of the bottom [IPSEC]: Store afinfo pointer in xfrm_mode [IPSEC]: Add missing BEET checks [IPSEC]: Move type and mode map into xfrm_state.c [IPSEC]: Fix length check in xfrm_parse_spi [IPSEC]: Move ip_summed zapping out of xfrm6_rcv_spi [IPSEC]: Get nexthdr from caller in xfrm6_rcv_spi [IPSEC]: Move tunnel parsing for IPv4 out of xfrm4_input ...
Diffstat (limited to 'net')
-rw-r--r--net/atm/br2684.c121
-rw-r--r--net/core/filter.c58
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/sock.c14
-rw-r--r--net/dccp/input.c3
-rw-r--r--net/dccp/sysctl.c3
-rw-r--r--net/ieee80211/ieee80211_crypt_tkip.c2
-rw-r--r--net/ipv4/inet_fragment.c89
-rw-r--r--net/ipv4/ip_fragment.c159
-rw-r--r--net/ipv4/tcp_input.c6
-rw-r--r--net/ipv4/xfrm4_input.c40
-rw-r--r--net/ipv4/xfrm4_mode_beet.c1
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c1
-rw-r--r--net/ipv4/xfrm4_output.c2
-rw-r--r--net/ipv4/xfrm4_policy.c27
-rw-r--r--net/ipv4/xfrm4_state.c1
-rw-r--r--net/ipv4/xfrm4_tunnel.c11
-rw-r--r--net/ipv6/addrconf.c10
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/ah6.c11
-rw-r--r--net/ipv6/esp6.c9
-rw-r--r--net/ipv6/ip6_flowlabel.c57
-rw-r--r--net/ipv6/ipcomp6.c9
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c112
-rw-r--r--net/ipv6/reassembly.c131
-rw-r--r--net/ipv6/xfrm6_input.c14
-rw-r--r--net/ipv6/xfrm6_mode_beet.c1
-rw-r--r--net/ipv6/xfrm6_mode_ro.c9
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c1
-rw-r--r--net/ipv6/xfrm6_output.c2
-rw-r--r--net/ipv6/xfrm6_policy.c17
-rw-r--r--net/ipv6/xfrm6_state.c7
-rw-r--r--net/ipv6/xfrm6_tunnel.c4
-rw-r--r--net/irda/ircomm/ircomm_tty_attach.c15
-rw-r--r--net/mac80211/ieee80211_ioctl.c16
-rw-r--r--net/mac80211/ieee80211_sta.c71
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c11
-rw-r--r--net/netfilter/xt_sctp.c18
-rw-r--r--net/sched/sch_generic.c7
-rw-r--r--net/xfrm/xfrm_input.c5
-rw-r--r--net/xfrm/xfrm_output.c4
-rw-r--r--net/xfrm/xfrm_policy.c179
-rw-r--r--net/xfrm/xfrm_state.c206
43 files changed, 661 insertions, 806 deletions
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index c742d37bfb9..ba6428f204f 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -24,16 +24,6 @@ Author: Marcell GAL, 2000, XDSL Ltd, Hungary
#include "common.h"
-/*
- * Define this to use a version of the code which interacts with the higher
- * layers in a more intellegent way, by always reserving enough space for
- * our header at the begining of the packet. However, there may still be
- * some problems with programs like tcpdump. In 2.5 we'll sort out what
- * we need to do to get this perfect. For now we just will copy the packet
- * if we need space for the header
- */
-/* #define FASTER_VERSION */
-
#ifdef SKB_DEBUG
static void skb_debug(const struct sk_buff *skb)
{
@@ -69,9 +59,7 @@ struct br2684_vcc {
#ifdef CONFIG_ATM_BR2684_IPFILTER
struct br2684_filter filter;
#endif /* CONFIG_ATM_BR2684_IPFILTER */
-#ifndef FASTER_VERSION
unsigned copies_needed, copies_failed;
-#endif /* FASTER_VERSION */
};
struct br2684_dev {
@@ -147,13 +135,6 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
struct br2684_vcc *brvcc)
{
struct atm_vcc *atmvcc;
-#ifdef FASTER_VERSION
- if (brvcc->encaps == e_llc)
- memcpy(skb_push(skb, 8), llc_oui_pid_pad, 8);
- /* last 2 bytes of llc_oui_pid_pad are managed by header routines;
- yes, you got it: 8 + 2 = sizeof(llc_oui_pid_pad)
- */
-#else
int minheadroom = (brvcc->encaps == e_llc) ? 10 : 2;
if (skb_headroom(skb) < minheadroom) {
struct sk_buff *skb2 = skb_realloc_headroom(skb, minheadroom);
@@ -170,7 +151,6 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
skb_copy_to_linear_data(skb, llc_oui_pid_pad, 10);
else
memset(skb->data, 0, 2);
-#endif /* FASTER_VERSION */
skb_debug(skb);
ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc;
@@ -237,87 +217,6 @@ static struct net_device_stats *br2684_get_stats(struct net_device *dev)
return &BRPRIV(dev)->stats;
}
-#ifdef FASTER_VERSION
-/*
- * These mirror eth_header and eth_header_cache. They are not usually
- * exported for use in modules, so we grab them from net_device
- * after ether_setup() is done with it. Bit of a hack.
- */
-static int (*my_eth_header)(struct sk_buff *, struct net_device *,
- unsigned short, void *, void *, unsigned);
-static int (*my_eth_header_cache)(struct neighbour *, struct hh_cache *);
-
-static int
-br2684_header(struct sk_buff *skb, struct net_device *dev,
- unsigned short type, void *daddr, void *saddr, unsigned len)
-{
- u16 *pad_before_eth;
- int t = my_eth_header(skb, dev, type, daddr, saddr, len);
- if (t > 0) {
- pad_before_eth = (u16 *) skb_push(skb, 2);
- *pad_before_eth = 0;
- return dev->hard_header_len; /* or return 16; ? */
- } else
- return t;
-}
-
-static int
-br2684_header_cache(struct neighbour *neigh, struct hh_cache *hh)
-{
-/* hh_data is 16 bytes long. if encaps is ether-llc we need 24, so
-xmit will add the additional header part in that case */
- u16 *pad_before_eth = (u16 *)(hh->hh_data);
- int t = my_eth_header_cache(neigh, hh);
- DPRINTK("br2684_header_cache, neigh=%p, hh_cache=%p\n", neigh, hh);
- if (t < 0)
- return t;
- else {
- *pad_before_eth = 0;
- hh->hh_len = PADLEN + ETH_HLEN;
- }
- return 0;
-}
-
-/*
- * This is similar to eth_type_trans, which cannot be used because of
- * our dev->hard_header_len
- */
-static inline __be16 br_type_trans(struct sk_buff *skb, struct net_device *dev)
-{
- struct ethhdr *eth;
- unsigned char *rawp;
- eth = eth_hdr(skb);
-
- if (is_multicast_ether_addr(eth->h_dest)) {
- if (!compare_ether_addr(eth->h_dest, dev->broadcast))
- skb->pkt_type = PACKET_BROADCAST;
- else
- skb->pkt_type = PACKET_MULTICAST;
- }
-
- else if (compare_ether_addr(eth->h_dest, dev->dev_addr))
- skb->pkt_type = PACKET_OTHERHOST;
-
- if (ntohs(eth->h_proto) >= 1536)
- return eth->h_proto;
-
- rawp = skb->data;
-
- /*
- * This is a magic hack to spot IPX packets. Older Novell breaks
- * the protocol design and runs IPX over 802.3 without an 802.2 LLC
- * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
- * won't work for fault tolerant netware but does for the rest.
- */
- if (*(unsigned short *) rawp == 0xFFFF)
- return htons(ETH_P_802_3);
-
- /*
- * Real 802.2 LLC
- */
- return htons(ETH_P_802_2);
-}
-#endif /* FASTER_VERSION */
/*
* We remember when the MAC gets set, so we don't override it later with
@@ -448,17 +347,8 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
return;
}
-#ifdef FASTER_VERSION
- /* FIXME: tcpdump shows that pointer to mac header is 2 bytes earlier,
- than should be. What else should I set? */
- skb_pull(skb, plen);
- skb_set_mac_header(skb, -ETH_HLEN);
- skb->pkt_type = PACKET_HOST;
- skb->protocol = br_type_trans(skb, net_dev);
-#else
skb_pull(skb, plen - ETH_HLEN);
skb->protocol = eth_type_trans(skb, net_dev);
-#endif /* FASTER_VERSION */
#ifdef CONFIG_ATM_BR2684_IPFILTER
if (unlikely(packet_fails_filter(skb->protocol, brvcc, skb))) {
brdev->stats.rx_dropped++;
@@ -584,13 +474,6 @@ static void br2684_setup(struct net_device *netdev)
ether_setup(netdev);
brdev->net_dev = netdev;
-#ifdef FASTER_VERSION
- my_eth_header = netdev->hard_header;
- netdev->hard_header = br2684_header;
- my_eth_header_cache = netdev->hard_header_cache;
- netdev->hard_header_cache = br2684_header_cache;
- netdev->hard_header_len = sizeof(llc_oui_pid_pad) + ETH_HLEN; /* 10 + 14 */
-#endif
my_eth_mac_addr = netdev->set_mac_address;
netdev->set_mac_address = br2684_mac_addr;
netdev->hard_start_xmit = br2684_start_xmit;
@@ -719,16 +602,12 @@ static int br2684_seq_show(struct seq_file *seq, void *v)
list_for_each_entry(brvcc, &brdev->brvccs, brvccs) {
seq_printf(seq, " vcc %d.%d.%d: encaps=%s"
-#ifndef FASTER_VERSION
", failed copies %u/%u"
-#endif /* FASTER_VERSION */
"\n", brvcc->atmvcc->dev->number,
brvcc->atmvcc->vpi, brvcc->atmvcc->vci,
(brvcc->encaps == e_llc) ? "LLC" : "VC"
-#ifndef FASTER_VERSION
, brvcc->copies_failed
, brvcc->copies_needed
-#endif /* FASTER_VERSION */
);
#ifdef CONFIG_ATM_BR2684_IPFILTER
#define b1(var, byte) ((u8 *) &brvcc->filter.var)[byte]
diff --git a/net/core/filter.c b/net/core/filter.c
index bd903aaf7aa..1f0068eae50 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -387,6 +387,25 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
}
/**
+ * sk_filter_rcu_release: Release a socket filter by rcu_head
+ * @rcu: rcu_head that contains the sk_filter to free
+ */
+static void sk_filter_rcu_release(struct rcu_head *rcu)
+{
+ struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
+
+ sk_filter_release(fp);
+}
+
+static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp)
+{
+ unsigned int size = sk_filter_len(fp);
+
+ atomic_sub(size, &sk->sk_omem_alloc);
+ call_rcu_bh(&fp->rcu, sk_filter_rcu_release);
+}
+
+/**
* sk_attach_filter - attach a socket filter
* @fprog: the filter program
* @sk: the socket to use
@@ -398,7 +417,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
*/
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
- struct sk_filter *fp;
+ struct sk_filter *fp, *old_fp;
unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
int err;
@@ -418,19 +437,34 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
fp->len = fprog->len;
err = sk_chk_filter(fp->insns, fp->len);
- if (!err) {
- struct sk_filter *old_fp;
-
- rcu_read_lock_bh();
- old_fp = rcu_dereference(sk->sk_filter);
- rcu_assign_pointer(sk->sk_filter, fp);
- rcu_read_unlock_bh();
- fp = old_fp;
+ if (err) {
+ sk_filter_uncharge(sk, fp);
+ return err;
}
- if (fp)
- sk_filter_release(sk, fp);
- return err;
+ rcu_read_lock_bh();
+ old_fp = rcu_dereference(sk->sk_filter);
+ rcu_assign_pointer(sk->sk_filter, fp);
+ rcu_read_unlock_bh();
+
+ sk_filter_delayed_uncharge(sk, old_fp);
+ return 0;
+}
+
+int sk_detach_filter(struct sock *sk)
+{
+ int ret = -ENOENT;
+ struct sk_filter *filter;
+
+ rcu_read_lock_bh();
+ filter = rcu_dereference(sk->sk_filter);
+ if (filter) {
+ rcu_assign_pointer(sk->sk_filter, NULL);
+ sk_filter_delayed_uncharge(sk, filter);
+ ret = 0;
+ }
+ rcu_read_unlock_bh();
+ return ret;
}
EXPORT_SYMBOL(sk_chk_filter);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 2100c734b10..8cae60c5338 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2454,7 +2454,7 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev)
spin_lock(&x->lock);
iph = ip_hdr(skb);
- err = x->mode->output(x, skb);
+ err = x->outer_mode->output(x, skb);
if (err)
goto error;
err = x->type->output(x, skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index d45ecdccc6a..d292b4113d6 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -428,7 +428,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, int optlen)
{
struct sock *sk=sock->sk;
- struct sk_filter *filter;
int val;
int valbool;
struct linger ling;
@@ -652,16 +651,7 @@ set_rcvbuf:
break;
case SO_DETACH_FILTER:
- rcu_read_lock_bh();
- filter = rcu_dereference(sk->sk_filter);
- if (filter) {
- rcu_assign_pointer(sk->sk_filter, NULL);
- sk_filter_release(sk, filter);
- rcu_read_unlock_bh();
- break;
- }
- rcu_read_unlock_bh();
- ret = -ENONET;
+ ret = sk_detach_filter(sk);
break;
case SO_PASSSEC:
@@ -925,7 +915,7 @@ void sk_free(struct sock *sk)
filter = rcu_dereference(sk->sk_filter);
if (filter) {
- sk_filter_release(sk, filter);
+ sk_filter_uncharge(sk, filter);
rcu_assign_pointer(sk->sk_filter, NULL);
}
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 19d7e1dbd87..3560a2a875a 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -19,6 +19,9 @@
#include "ccid.h"
#include "dccp.h"
+/* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 */
+int sysctl_dccp_sync_ratelimit __read_mostly = HZ / 8;
+
static void dccp_fin(struct sock *sk, struct sk_buff *skb)
{
sk->sk_shutdown |= RCV_SHUTDOWN;
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 9364b2fb4db..c62c05039f6 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -18,9 +18,6 @@
#error This file should not be compiled without CONFIG_SYSCTL defined
#endif
-/* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 */
-int sysctl_dccp_sync_ratelimit __read_mostly = HZ / 8;
-
static struct ctl_table dccp_default_table[] = {
{
.procname = "seq_window",
diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c
index 6cc54eeca3e..72e6ab66834 100644
--- a/net/ieee80211/ieee80211_crypt_tkip.c
+++ b/net/ieee80211/ieee80211_crypt_tkip.c
@@ -586,7 +586,7 @@ static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr)
if (stype & IEEE80211_STYPE_QOS_DATA) {
const struct ieee80211_hdr_3addrqos *qoshdr =
(struct ieee80211_hdr_3addrqos *)skb->data;
- hdr[12] = qoshdr->qos_ctl & cpu_to_le16(IEEE80211_QCTL_TID);
+ hdr[12] = le16_to_cpu(qoshdr->qos_ctl) & IEEE80211_QCTL_TID;
} else
hdr[12] = 0; /* priority */
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 484cf512858..e15e04fc666 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -136,7 +136,9 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
*work -= f->qsize;
atomic_sub(f->qsize, &f->mem);
- f->destructor(q);
+ if (f->destructor)
+ f->destructor(q);
+ kfree(q);
}
EXPORT_SYMBOL(inet_frag_destroy);
@@ -172,3 +174,88 @@ int inet_frag_evictor(struct inet_frags *f)
return evicted;
}
EXPORT_SYMBOL(inet_frag_evictor);
+
+static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in,
+ struct inet_frags *f, unsigned int hash, void *arg)
+{
+ struct inet_frag_queue *qp;
+#ifdef CONFIG_SMP
+ struct hlist_node *n;
+#endif
+
+ write_lock(&f->lock);
+#ifdef CONFIG_SMP
+ /* With SMP race we have to recheck hash table, because
+ * such entry could be created on other cpu, while we
+ * promoted read lock to write lock.
+ */
+ hlist_for_each_entry(qp, n, &f->hash[hash], list) {
+ if (f->match(qp, arg)) {
+ atomic_inc(&qp->refcnt);
+ write_unlock(&f->lock);
+ qp_in->last_in |= COMPLETE;
+ inet_frag_put(qp_in, f);
+ return qp;
+ }
+ }
+#endif
+ qp = qp_in;
+ if (!mod_timer(&qp->timer, jiffies + f->ctl->timeout))
+ atomic_inc(&qp->refcnt);
+
+ atomic_inc(&qp->refcnt);
+ hlist_add_head(&qp->list, &f->hash[hash]);
+ list_add_tail(&qp->lru_list, &f->lru_list);
+ f->nqueues++;
+ write_unlock(&f->lock);
+ return qp;
+}
+
+static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg)
+{
+ struct inet_frag_queue *q;
+
+ q = kzalloc(f->qsize, GFP_ATOMIC);
+ if (q == NULL)
+ return NULL;
+
+ f->constructor(q, arg);
+ atomic_add(f->qsize, &f->mem);
+ setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
+ spin_lock_init(&q->lock);
+ atomic_set(&q->refcnt, 1);
+
+ return q;
+}
+
+static struct inet_frag_queue *inet_frag_create(struct inet_frags *f,
+ void *arg, unsigned int hash)
+{
+ struct inet_frag_queue *q;
+
+ q = inet_frag_alloc(f, arg);
+ if (q == NULL)
+ return NULL;
+
+ return inet_frag_intern(q, f, hash, arg);
+}
+
+struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key,
+ unsigned int hash)
+{
+ struct inet_frag_queue *q;
+ struct hlist_node *n;
+
+ read_lock(&f->lock);
+ hlist_for_each_entry(q, n, &f->hash[hash], list) {
+ if (f->match(q, key)) {
+ atomic_inc(&q->refcnt);
+ read_unlock(&f->lock);
+ return q;
+ }
+ }
+ read_unlock(&f->lock);
+
+ return inet_frag_create(f, key, hash);
+}
+EXPORT_SYMBOL(inet_frag_find);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 443b3f89192..2143bf30597 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -108,6 +108,11 @@ int ip_frag_mem(void)
static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
struct net_device *dev);
+struct ip4_create_arg {
+ struct iphdr *iph;
+ u32 user;
+};
+
static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
{
return jhash_3words((__force u32)id << 16 | prot,
@@ -123,6 +128,19 @@ static unsigned int ip4_hashfn(struct inet_frag_queue *q)
return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
}
+static int ip4_frag_match(struct inet_frag_queue *q, void *a)
+{
+ struct ipq *qp;
+ struct ip4_create_arg *arg = a;
+
+ qp = container_of(q, struct ipq, q);
+ return (qp->id == arg->iph->id &&
+ qp->saddr == arg->iph->saddr &&
+ qp->daddr == arg->iph->daddr &&
+ qp->protocol == arg->iph->protocol &&
+ qp->user == arg->user);
+}
+
/* Memory Tracking Functions. */
static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
{
@@ -132,6 +150,20 @@ static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
kfree_skb(skb);
}
+static void ip4_frag_init(struct inet_frag_queue *q, void *a)
+{
+ struct ipq *qp = container_of(q, struct ipq, q);
+ struct ip4_create_arg *arg = a;
+
+ qp->protocol = arg->iph->protocol;
+ qp->id = arg->iph->id;
+ qp->saddr = arg->iph->saddr;
+ qp->daddr = arg->iph->daddr;
+ qp->user = arg->user;
+ qp->peer = sysctl_ipfrag_max_dist ?
+ inet_getpeer(arg->iph->saddr, 1) : NULL;
+}
+
static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
{
struct ipq *qp;
@@ -139,17 +171,6 @@ static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
qp = container_of(q, struct ipq, q);
if (qp->peer)
inet_putpeer(qp->peer);
- kfree(qp);
-}
-
-static __inline__ struct ipq *frag_alloc_queue(void)
-{
- struct ipq *qp = kzalloc(sizeof(struct ipq), GFP_ATOMIC);
-
- if (!qp)
- return NULL;
- atomic_add(sizeof(struct ipq), &ip4_frags.mem);
- return qp;
}
@@ -185,7 +206,9 @@ static void ip_evictor(void)
*/
static void ip_expire(unsigned long arg)
{
- struct ipq *qp = (struct ipq *) arg;
+ struct ipq *qp;
+
+ qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
spin_lock(&qp->q.lock);
@@ -210,112 +233,30 @@ out:
ipq_put(qp);
}
-/* Creation primitives. */
-
-static struct ipq *ip_frag_intern(struct ipq *qp_in)
+/* Find the correct entry in the "incomplete datagrams" queue for
+ * this IP datagram, and create new one, if nothing is found.
+ */
+static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
{
- struct ipq *qp;
-#ifdef CONFIG_SMP
- struct hlist_node *n;
-#endif
+ struct inet_frag_queue *q;
+ struct ip4_create_arg arg;
unsigned int hash;
- write_lock(&ip4_frags.lock);
- hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr,
- qp_in->protocol);
-#ifdef CONFIG_SMP
- /* With SMP race we have to recheck hash table, because
- * such entry could be created on other cpu, while we
- * promoted read lock to write lock.
- */
- hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) {
- if (qp->id == qp_in->id &&
- qp->saddr == qp_in->saddr &&
- qp->daddr == qp_in->daddr &&
- qp->protocol == qp_in->protocol &&
- qp->user == qp_in->user) {
- atomic_inc(&qp->q.refcnt);
- write_unlock(&ip4_frags.lock);
- qp_in->q.last_in |= COMPLETE;
- ipq_put(qp_in);
- return qp;
- }
- }
-#endif
- qp = qp_in;
-
- if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout))
- atomic_inc(&qp->q.refcnt);
+ arg.iph = iph;
+ arg.user = user;
+ hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
- atomic_inc(&qp->q.refcnt);
- hlist_add_head(&qp->q.list, &ip4_frags.hash[hash]);
- INIT_LIST_HEAD(&qp->q.lru_list);
- list_add_tail(&qp->q.lru_list, &ip4_frags.lru_list);
- ip4_frags.nqueues++;
- write_unlock(&ip4_frags.lock);
- return qp;
-}
-
-/* Add an entry to the 'ipq' queue for a newly received IP datagram. */
-static struct ipq *ip_frag_create(struct iphdr *iph, u32 user)
-{
- struct ipq *qp;
-
- if ((qp = frag_alloc_queue()) == NULL)
+ q = inet_frag_find(&ip4_frags, &arg, hash);
+ if (q == NULL)
goto out_nomem;
- qp->protocol = iph->protocol;
- qp->id = iph->id;
- qp->saddr = iph->saddr;
- qp->daddr = iph->daddr;
- qp->user = user;
- qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
-
- /* Initialize a timer for this entry. */
- init_timer(&qp->q.timer);
- qp->q.timer.data = (unsigned long) qp; /* pointer to queue */
- qp->q.timer.function = ip_expire; /* expire function */
- spin_lock_init(&qp->q.lock);
- atomic_set(&qp->q.refcnt, 1);
-
- return ip_frag_intern(qp);
+ return container_of(q, struct ipq, q);
out_nomem:
LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n");
return NULL;
}
-/* Find the correct entry in the "incomplete datagrams" queue for
- * this IP datagram, and create new one, if nothing is found.
- */
-static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
-{
- __be16 id = iph->id;
- __be32 saddr = iph->saddr;
- __be32 daddr = iph->daddr;
- __u8 protocol = iph->protocol;
- unsigned int hash;
- struct ipq *qp;
- struct hlist_node *n;
-
- read_lock(&ip4_frags.lock);
- hash = ipqhashfn(id, saddr, daddr, protocol);
- hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) {
- if (qp->id == id &&
- qp->saddr == saddr &&
- qp->daddr == daddr &&
- qp->protocol == protocol &&
- qp->user == user) {
- atomic_inc(&qp->q.refcnt);
- read_unlock(&ip4_frags.lock);
- return qp;
- }
- }
- read_unlock(&ip4_frags.lock);
-
- return ip_frag_create(iph, user);
-}
-
/* Is the fragment too far ahead to be part of ipq? */
static inline int ip_frag_too_far(struct ipq *qp)
{
@@ -545,7 +486,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
if (prev) {
head = prev->next;
fp = skb_clone(head, GFP_ATOMIC);
-
if (!fp)
goto out_nomem;
@@ -571,7 +511,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
goto out_oversize;
/* Head of list must not be cloned. */
- err = -ENOMEM;
if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
goto out_nomem;
@@ -627,6 +566,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
out_nomem:
LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing "
"queue %p\n", qp);
+ err = -ENOMEM;
goto out_fail;
out_oversize:
if (net_ratelimit())
@@ -671,9 +611,12 @@ void __init ipfrag_init(void)
{
ip4_frags.ctl = &ip4_frags_ctl;
ip4_frags.hashfn = ip4_hashfn;
+ ip4_frags.constructor = ip4_frag_init;
ip4_frags.destructor = ip4_frag_free;
ip4_frags.skb_free = NULL;
ip4_frags.qsize = sizeof(struct ipq);
+ ip4_frags.match = ip4_frag_match;
+ ip4_frags.frag_expire = ip_expire;
inet_frags_init(&ip4_frags);
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0f00966b178..9288220b73a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1121,7 +1121,7 @@ static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto)
struct sk_buff *skb;
int flag = 0;
int cnt = 0;
- u32 new_low_seq = 0;
+ u32 new_low_seq = tp->snd_nxt;
tcp_for_write_queue(skb, sk) {
u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
@@ -1153,7 +1153,7 @@ static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto)
NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
}
} else {
- if (!new_low_seq || before(ack_seq, new_low_seq))
+ if (before(ack_seq, new_low_seq))
new_low_seq = ack_seq;
cnt += tcp_skb_pcount(skb);
}
@@ -1242,7 +1242,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_