aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c124
-rw-r--r--net/ipv4/arp.c8
-rw-r--r--net/ipv4/fib_trie.c101
-rw-r--r--net/ipv4/inet_timewait_sock.c2
-rw-r--r--net/ipv4/ip_gre.c8
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/ipip.c8
-rw-r--r--net/ipv4/ipmr.c4
-rw-r--r--net/ipv4/netfilter/arp_tables.c47
-rw-r--r--net/ipv4/netfilter/arptable_filter.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c51
-rw-r--r--net/ipv4/netfilter/iptable_filter.c10
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c16
-rw-r--r--net/ipv4/netfilter/iptable_raw.c10
-rw-r--r--net/ipv4/netfilter/iptable_security.c12
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c22
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c8
-rw-r--r--net/ipv4/protocol.c19
-rw-r--r--net/ipv4/raw.c9
-rw-r--r--net/ipv4/route.c22
-rw-r--r--net/ipv4/tcp.c16
-rw-r--r--net/ipv4/tcp_cong.c4
-rw-r--r--net/ipv4/tcp_input.c5
-rw-r--r--net/ipv4/tcp_ipv4.c66
-rw-r--r--net/ipv4/tcp_minisocks.c25
-rw-r--r--net/ipv4/tcp_output.c63
-rw-r--r--net/ipv4/tcp_timer.c16
-rw-r--r--net/ipv4/udp.c156
-rw-r--r--net/ipv4/xfrm4_policy.c37
31 files changed, 528 insertions, 361 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 566ea6c4321..6c30a73f03f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -124,7 +124,6 @@ static struct list_head inetsw[SOCK_MAX];
static DEFINE_SPINLOCK(inetsw_lock);
struct ipv4_config ipv4_config;
-
EXPORT_SYMBOL(ipv4_config);
/* New destruction routine */
@@ -139,12 +138,12 @@ void inet_sock_destruct(struct sock *sk)
sk_mem_reclaim(sk);
if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) {
- printk("Attempt to release TCP socket in state %d %p\n",
+ pr_err("Attempt to release TCP socket in state %d %p\n",
sk->sk_state, sk);
return;
}
if (!sock_flag(sk, SOCK_DEAD)) {
- printk("Attempt to release alive inet socket %p\n", sk);
+ pr_err("Attempt to release alive inet socket %p\n", sk);
return;
}
@@ -157,6 +156,7 @@ void inet_sock_destruct(struct sock *sk)
dst_release(sk->sk_dst_cache);
sk_refcnt_debug_dec(sk);
}
+EXPORT_SYMBOL(inet_sock_destruct);
/*
* The routines beyond this point handle the behaviour of an AF_INET
@@ -219,6 +219,7 @@ out:
release_sock(sk);
return err;
}
+EXPORT_SYMBOL(inet_listen);
u32 inet_ehash_secret __read_mostly;
EXPORT_SYMBOL(inet_ehash_secret);
@@ -435,9 +436,11 @@ int inet_release(struct socket *sock)
}
return 0;
}
+EXPORT_SYMBOL(inet_release);
/* It is off by default, see below. */
int sysctl_ip_nonlocal_bind __read_mostly;
+EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
@@ -519,6 +522,7 @@ out_release_sock:
out:
return err;
}
+EXPORT_SYMBOL(inet_bind);
int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
int addr_len, int flags)
@@ -532,6 +536,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
return -EAGAIN;
return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len);
}
+EXPORT_SYMBOL(inet_dgram_connect);
static long inet_wait_for_connect(struct sock *sk, long timeo)
{
@@ -641,6 +646,7 @@ sock_error:
sock->state = SS_DISCONNECTING;
goto out;
}
+EXPORT_SYMBOL(inet_stream_connect);
/*
* Accept a pending connection. The TCP layer now gives BSD semantics.
@@ -668,6 +674,7 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags)
do_err:
return err;
}
+EXPORT_SYMBOL(inet_accept);
/*
@@ -699,6 +706,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
*uaddr_len = sizeof(*sin);
return 0;
}
+EXPORT_SYMBOL(inet_getname);
int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
size_t size)
@@ -711,9 +719,11 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
return sk->sk_prot->sendmsg(iocb, sk, msg, size);
}
+EXPORT_SYMBOL(inet_sendmsg);
-static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
+static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
+ size_t size, int flags)
{
struct sock *sk = sock->sk;
@@ -780,6 +790,7 @@ int inet_shutdown(struct socket *sock, int how)
release_sock(sk);
return err;
}
+EXPORT_SYMBOL(inet_shutdown);
/*
* ioctl() calls you can issue on an INET socket. Most of these are
@@ -798,44 +809,45 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
struct net *net = sock_net(sk);
switch (cmd) {
- case SIOCGSTAMP:
- err = sock_get_timestamp(sk, (struct timeval __user *)arg);
- break;
- case SIOCGSTAMPNS:
- err = sock_get_timestampns(sk, (struct timespec __user *)arg);
- break;
- case SIOCADDRT:
- case SIOCDELRT:
- case SIOCRTMSG:
- err = ip_rt_ioctl(net, cmd, (void __user *)arg);
- break;
- case SIOCDARP:
- case SIOCGARP:
- case SIOCSARP:
- err = arp_ioctl(net, cmd, (void __user *)arg);
- break;
- case SIOCGIFADDR:
- case SIOCSIFADDR:
- case SIOCGIFBRDADDR:
- case SIOCSIFBRDADDR:
- case SIOCGIFNETMASK:
- case SIOCSIFNETMASK:
- case SIOCGIFDSTADDR:
- case SIOCSIFDSTADDR:
- case SIOCSIFPFLAGS:
- case SIOCGIFPFLAGS:
- case SIOCSIFFLAGS:
- err = devinet_ioctl(net, cmd, (void __user *)arg);
- break;
- default:
- if (sk->sk_prot->ioctl)
- err = sk->sk_prot->ioctl(sk, cmd, arg);
- else
- err = -ENOIOCTLCMD;
- break;
+ case SIOCGSTAMP:
+ err = sock_get_timestamp(sk, (struct timeval __user *)arg);
+ break;
+ case SIOCGSTAMPNS:
+ err = sock_get_timestampns(sk, (struct timespec __user *)arg);
+ break;
+ case SIOCADDRT:
+ case SIOCDELRT:
+ case SIOCRTMSG:
+ err = ip_rt_ioctl(net, cmd, (void __user *)arg);
+ break;
+ case SIOCDARP:
+ case SIOCGARP:
+ case SIOCSARP:
+ err = arp_ioctl(net, cmd, (void __user *)arg);
+ break;
+ case SIOCGIFADDR:
+ case SIOCSIFADDR:
+ case SIOCGIFBRDADDR:
+ case SIOCSIFBRDADDR:
+ case SIOCGIFNETMASK:
+ case SIOCSIFNETMASK:
+ case SIOCGIFDSTADDR:
+ case SIOCSIFDSTADDR:
+ case SIOCSIFPFLAGS:
+ case SIOCGIFPFLAGS:
+ case SIOCSIFFLAGS:
+ err = devinet_ioctl(net, cmd, (void __user *)arg);
+ break;
+ default:
+ if (sk->sk_prot->ioctl)
+ err = sk->sk_prot->ioctl(sk, cmd, arg);
+ else
+ err = -ENOIOCTLCMD;
+ break;
}
return err;
}
+EXPORT_SYMBOL(inet_ioctl);
const struct proto_ops inet_stream_ops = {
.family = PF_INET,
@@ -862,6 +874,7 @@ const struct proto_ops inet_stream_ops = {
.compat_getsockopt = compat_sock_common_getsockopt,
#endif
};
+EXPORT_SYMBOL(inet_stream_ops);
const struct proto_ops inet_dgram_ops = {
.family = PF_INET,
@@ -887,6 +900,7 @@ const struct proto_ops inet_dgram_ops = {
.compat_getsockopt = compat_sock_common_getsockopt,
#endif
};
+EXPORT_SYMBOL(inet_dgram_ops);
/*
* For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
@@ -1016,6 +1030,7 @@ out_illegal:
p->type);
goto out;
}
+EXPORT_SYMBOL(inet_register_protosw);
void inet_unregister_protosw(struct inet_protosw *p)
{
@@ -1031,6 +1046,7 @@ void inet_unregister_protosw(struct inet_protosw *p)
synchronize_net();
}
}
+EXPORT_SYMBOL(inet_unregister_protosw);
/*
* Shall we try to damage output packets if routing dev changes?
@@ -1141,7 +1157,6 @@ int inet_sk_rebuild_header(struct sock *sk)
return err;
}
-
EXPORT_SYMBOL(inet_sk_rebuild_header);
static int inet_gso_send_check(struct sk_buff *skb)
@@ -1187,6 +1202,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
int proto;
int ihl;
int id;
+ unsigned int offset = 0;
if (!(features & NETIF_F_V4_CSUM))
features &= ~NETIF_F_SG;
@@ -1229,7 +1245,14 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
skb = segs;
do {
iph = ip_hdr(skb);
- iph->id = htons(id++);
+ if (proto == IPPROTO_UDP) {
+ iph->id = htons(id);
+ iph->frag_off = htons(offset >> 3);
+ if (skb->next != NULL)
+ iph->frag_off |= htons(IP_MF);
+ offset += (skb->len - skb->mac_len - iph->ihl * 4);
+ } else
+ iph->id = htons(id++);
iph->tot_len = htons(skb->len - skb->mac_len);
iph->check = 0;
iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
@@ -1361,7 +1384,6 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
}
return rc;
}
-
EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
unsigned long snmp_fold_field(void *mib[], int offt)
@@ -1425,6 +1447,8 @@ static struct net_protocol tcp_protocol = {
static struct net_protocol udp_protocol = {
.handler = udp_rcv,
.err_handler = udp_err,
+ .gso_send_check = udp4_ufo_send_check,
+ .gso_segment = udp4_ufo_fragment,
.no_policy = 1,
.netns_ok = 1,
};
@@ -1666,19 +1690,3 @@ static int __init ipv4_proc_init(void)
MODULE_ALIAS_NETPROTO(PF_INET);
-EXPORT_SYMBOL(inet_accept);
-EXPORT_SYMBOL(inet_bind);
-EXPORT_SYMBOL(inet_dgram_connect);
-EXPORT_SYMBOL(inet_dgram_ops);
-EXPORT_SYMBOL(inet_getname);
-EXPORT_SYMBOL(inet_ioctl);
-EXPORT_SYMBOL(inet_listen);
-EXPORT_SYMBOL(inet_register_protosw);
-EXPORT_SYMBOL(inet_release);
-EXPORT_SYMBOL(inet_sendmsg);
-EXPORT_SYMBOL(inet_shutdown);
-EXPORT_SYMBOL(inet_sock_destruct);
-EXPORT_SYMBOL(inet_stream_connect);
-EXPORT_SYMBOL(inet_stream_ops);
-EXPORT_SYMBOL(inet_unregister_protosw);
-EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 090e9991ac2..4e80f336c0c 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -130,7 +130,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
static void parp_redo(struct sk_buff *skb);
-static struct neigh_ops arp_generic_ops = {
+static const struct neigh_ops arp_generic_ops = {
.family = AF_INET,
.solicit = arp_solicit,
.error_report = arp_error_report,
@@ -140,7 +140,7 @@ static struct neigh_ops arp_generic_ops = {
.queue_xmit = dev_queue_xmit,
};
-static struct neigh_ops arp_hh_ops = {
+static const struct neigh_ops arp_hh_ops = {
.family = AF_INET,
.solicit = arp_solicit,
.error_report = arp_error_report,
@@ -150,7 +150,7 @@ static struct neigh_ops arp_hh_ops = {
.queue_xmit = dev_queue_xmit,
};
-static struct neigh_ops arp_direct_ops = {
+static const struct neigh_ops arp_direct_ops = {
.family = AF_INET,
.output = dev_queue_xmit,
.connected_output = dev_queue_xmit,
@@ -158,7 +158,7 @@ static struct neigh_ops arp_direct_ops = {
.queue_xmit = dev_queue_xmit,
};
-struct neigh_ops arp_broken_ops = {
+const struct neigh_ops arp_broken_ops = {
.family = AF_INET,
.solicit = arp_solicit,
.error_report = arp_error_report,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 63c2fa7b68c..291bdf50a21 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -48,7 +48,7 @@
* Patrick McHardy <kaber@trash.net>
*/
-#define VERSION "0.408"
+#define VERSION "0.409"
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -164,6 +164,14 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn);
static struct tnode *halve(struct trie *t, struct tnode *tn);
/* tnodes to free after resize(); protected by RTNL */
static struct tnode *tnode_free_head;
+static size_t tnode_free_size;
+
+/*
+ * synchronize_rcu after call_rcu for that many pages; it should be especially
+ * useful before resizing the root node with PREEMPT_NONE configs; the value was
+ * obtained experimentally, aiming to avoid visible slowdown.
+ */
+static const int sync_pages = 128;
static struct kmem_cache *fn_alias_kmem __read_mostly;
static struct kmem_cache *trie_leaf_kmem __read_mostly;
@@ -317,8 +325,7 @@ static inline void check_tnode(const struct tnode *tn)
static const int halve_threshold = 25;
static const int inflate_threshold = 50;
static const int halve_threshold_root = 15;
-static const int inflate_threshold_root = 25;
-
+static const int inflate_threshold_root = 30;
static void __alias_free_mem(struct rcu_head *head)
{
@@ -393,6 +400,8 @@ static void tnode_free_safe(struct tnode *tn)
BUG_ON(IS_LEAF(tn));
tn->tnode_free = tnode_free_head;
tnode_free_head = tn;
+ tnode_free_size += sizeof(struct tnode) +
+ (sizeof(struct node *) << tn->bits);
}
static void tnode_free_flush(void)
@@ -404,6 +413,11 @@ static void tnode_free_flush(void)
tn->tnode_free = NULL;
tnode_free(tn);
}
+
+ if (tnode_free_size >= PAGE_SIZE * sync_pages) {
+ tnode_free_size = 0;
+ synchronize_rcu();
+ }
}
static struct leaf *leaf_new(void)
@@ -499,14 +513,14 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
rcu_assign_pointer(tn->child[i], n);
}
+#define MAX_WORK 10
static struct node *resize(struct trie *t, struct tnode *tn)
{
int i;
- int err = 0;
struct tnode *old_tn;
int inflate_threshold_use;
int halve_threshold_use;
- int max_resize;
+ int max_work;
if (!tn)
return NULL;
@@ -521,18 +535,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
}
/* One child */
if (tn->empty_children == tnode_child_length(tn) - 1)
- for (i = 0; i < tnode_child_length(tn); i++) {
- struct node *n;
-
- n = tn->child[i];
- if (!n)
- continue;
-
- /* compress one level */
- node_set_parent(n, NULL);
- tnode_free_safe(tn);
- return n;
- }
+ goto one_child;
/*
* Double as long as the resulting node has a number of
* nonempty nodes that are above the threshold.
@@ -601,14 +604,17 @@ static struct node *resize(struct trie *t, struct tnode *tn)
/* Keep root node larger */
- if (!tn->parent)
+ if (!node_parent((struct node*) tn)) {
inflate_threshold_use = inflate_threshold_root;
- else
+ halve_threshold_use = halve_threshold_root;
+ }
+ else {
inflate_threshold_use = inflate_threshold;
+ halve_threshold_use = halve_threshold;
+ }
- err = 0;
- max_resize = 10;
- while ((tn->full_children > 0 && max_resize-- &&
+ max_work = MAX_WORK;
+ while ((tn->full_children > 0 && max_work-- &&
50 * (tn->full_children + tnode_child_length(tn)
- tn->empty_children)
>= inflate_threshold_use * tnode_child_length(tn))) {
@@ -625,35 +631,19 @@ static struct node *resize(struct trie *t, struct tnode *tn)
}
}
- if (max_resize < 0) {
- if (!tn->parent)
- pr_warning("Fix inflate_threshold_root."
- " Now=%d size=%d bits\n",
- inflate_threshold_root, tn->bits);
- else
- pr_warning("Fix inflate_threshold."
- " Now=%d size=%d bits\n",
- inflate_threshold, tn->bits);
- }
-
check_tnode(tn);
+ /* Return if at least one inflate is run */
+ if( max_work != MAX_WORK)
+ return (struct node *) tn;
+
/*
* Halve as long as the number of empty children in this
* node is above threshold.
*/
-
- /* Keep root node larger */
-
- if (!tn->parent)
- halve_threshold_use = halve_threshold_root;
- else
- halve_threshold_use = halve_threshold;
-
- err = 0;
- max_resize = 10;
- while (tn->bits > 1 && max_resize-- &&
+ max_work = MAX_WORK;
+ while (tn->bits > 1 && max_work-- &&
100 * (tnode_child_length(tn) - tn->empty_children) <
halve_threshold_use * tnode_child_length(tn)) {
@@ -668,19 +658,10 @@ static struct node *resize(struct trie *t, struct tnode *tn)
}
}
- if (max_resize < 0) {
- if (!tn->parent)
- pr_warning("Fix halve_threshold_root."
- " Now=%d size=%d bits\n",
- halve_threshold_root, tn->bits);
- else
- pr_warning("Fix halve_threshold."
- " Now=%d size=%d bits\n",
- halve_threshold, tn->bits);
- }
/* Only one child remains */
- if (tn->empty_children == tnode_child_length(tn) - 1)
+ if (tn->empty_children == tnode_child_length(tn) - 1) {
+one_child:
for (i = 0; i < tnode_child_length(tn); i++) {
struct node *n;
@@ -694,7 +675,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
tnode_free_safe(tn);
return n;
}
-
+ }
return (struct node *) tn;
}
@@ -1435,7 +1416,7 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length),
pos, bits);
- n = tnode_get_child(pn, cindex);
+ n = tnode_get_child_rcu(pn, cindex);
if (n == NULL) {
#ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -1570,7 +1551,7 @@ backtrace:
if (chopped_off <= pn->bits) {
cindex &= ~(1 << (chopped_off-1));
} else {
- struct tnode *parent = node_parent((struct node *) pn);
+ struct tnode *parent = node_parent_rcu((struct node *) pn);
if (!parent)
goto failed;
@@ -1783,7 +1764,7 @@ static struct leaf *trie_firstleaf(struct trie *t)
static struct leaf *trie_nextleaf(struct leaf *l)
{
struct node *c = (struct node *) l;
- struct tnode *p = node_parent(c);
+ struct tnode *p = node_parent_rcu(c);
if (!p)
return NULL; /* trie with just one leaf */
@@ -2391,7 +2372,7 @@ static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
}
}
-static const char *rtn_type_names[__RTN_MAX] = {
+static const char *const rtn_type_names[__RTN_MAX] = {
[RTN_UNSPEC] = "UNSPEC",
[RTN_UNICAST] = "UNICAST",
[RTN_LOCAL] = "LOCAL",
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 61283f92882..13f0781f35c 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -218,8 +218,8 @@ void inet_twdr_hangman(unsigned long data)
/* We purged the entire slot, anything left? */
if (twdr->tw_count)
need_timer = 1;
+ twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1));
}
- twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1));
if (need_timer)
mod_timer(&twdr->tw_timer, jiffies + twdr->period);
out:
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 82c11dd10a6..533afaadefd 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -662,7 +662,7 @@ drop_nolock:
return(0);
}
-static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct net_device_stats *stats = &tunnel->dev->stats;
@@ -821,7 +821,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
stats->tx_dropped++;
dev_kfree_skb(skb);
tunnel->recursion--;
- return 0;
+ return NETDEV_TX_OK;
}
if (skb->sk)
skb_set_owner_w(new_skb, skb->sk);
@@ -889,7 +889,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
IPTUNNEL_XMIT();
tunnel->recursion--;
- return 0;
+ return NETDEV_TX_OK;
tx_error_icmp:
dst_link_failure(skb);
@@ -898,7 +898,7 @@ tx_error:
stats->tx_errors++;
dev_kfree_skb(skb);
tunnel->recursion--;
- return 0;
+ return NETDEV_TX_OK;
}
static int ipgre_tunnel_bind_dev(struct net_device *dev)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7ffcd96fe59..9fe5d7b8158 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1304,7 +1304,7 @@ int ip_push_pending_frames(struct sock *sk)
err = ip_local_out(skb);
if (err) {
if (err > 0)
- err = inet->recverr ? net_xmit_errno(err) : 0;
+ err = net_xmit_errno(err);
if (err)
goto error;
}
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 93e2b787da2..62548cb0923 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -387,7 +387,7 @@ static int ipip_rcv(struct sk_buff *skb)
* and that skb is filled properly by that function.
*/
-static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct net_device_stats *stats = &tunnel->dev->stats;
@@ -486,7 +486,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
stats->tx_dropped++;
dev_kfree_skb(skb);
tunnel->recursion--;
- return 0;
+ return NETDEV_TX_OK;
}
if (skb->sk)
skb_set_owner_w(new_skb, skb->sk);
@@ -524,7 +524,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
IPTUNNEL_XMIT();
tunnel->recursion--;
- return 0;
+ return NETDEV_TX_OK;
tx_error_icmp:
dst_link_failure(skb);
@@ -532,7 +532,7 @@ tx_error:
stats->tx_errors++;
dev_kfree_skb(skb);
tunnel->recursion--;
- return 0;
+ return NETDEV_TX_OK;
}
static void ipip_tunnel_bind_dev(struct net_device *dev)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9a8da5ed92b..65d421cf5bc 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -201,7 +201,7 @@ failure:
#ifdef CONFIG_IP_PIMSM
-static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct net *net = dev_net(dev);
@@ -212,7 +212,7 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
IGMPMSG_WHOLEPKT);
read_unlock(&mrt_lock);
kfree_skb(skb);
- return 0;
+ return NETDEV_TX_OK;
}
static const struct net_device_ops reg_vif_netdev_ops = {
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 7505dff4ffd..27774c99d88 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -8,7 +8,7 @@
* Copyright (C) 2002 David S. Miller (davem@redhat.com)
*
*/
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
@@ -341,15 +341,11 @@ unsigned int arpt_do_table(struct sk_buff *skb,
}
/* All zeroes == unconditional rule. */
-static inline int unconditional(const struct arpt_arp *arp)
+static inline bool unconditional(const struct arpt_arp *arp)
{
- unsigned int i;
+ static const struct arpt_arp uncond;
- for (i = 0; i < sizeof(*arp)/sizeof(__u32); i++)
- if (((__u32 *)arp)[i])
- return 0;
-
- return 1;
+ return memcmp(arp, &uncond, sizeof(uncond)) == 0;
}
/* Figures out from what hook each rule can be called: returns 0 if
@@ -537,12 +533,28 @@ out:
return ret;
}
+static bool check_underflow(struct arpt_entry *e)
+{
+ const struct arpt_entry_target *t;
+ unsigned int verdict;
+
+ if (!unconditional(&e->arp))
+ return false;
+ t = arpt_get_target(e);
+ if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
+ return false;
+ verdict = ((struct arpt_standard_target *)t)->verdict;
+ verdict = -verdict - 1;
+ return verdict == NF_DROP || verdict == NF_ACCEPT;
+}
+
static inline int check_entry_size_and_hooks(struct arpt_entry *e,
struct xt_table_info *newinfo,
unsigned char *base,
unsigned char *limit,
const unsigned int *hook_entries,
const unsigned int *underflows,
+ unsigned int valid_hooks,
unsigned int *i)
{
unsigned int h;
@@ -562,15 +574,21 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
/* Check hooks & underflows */
for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
+ if (!(valid_hooks & (1 << h)))
+ continue;
if ((unsigned char *)e - base == hook_entries[h])
newinfo->hook_entry[h] = hook_entries[h];
- if ((unsigned char *)e - base == underflows[h])
+ if ((unsigned char *)e - base == underflows[h]) {
+ if (!check_underflow(e)) {
+ pr_err("Underflows must be unconditional and "
+ "use the STANDARD target with "
+ "ACCEPT/DROP\n");
+ return -EINVAL;
+ }
newinfo->underflow[h] = underflows[h];
+ }
}
- /* FIXME: underflows must be unconditional, standard verdicts
- < 0 (not ARPT_RETURN). --RR */
-
/* Clear counters and comefrom */
e->counters = ((struct xt_counters) { 0, 0 });
e->comefrom = 0;
@@ -630,7 +648,7 @@ static int translate_table(const char *name,
newinfo,
entry0,
entry0 + size,
- hook_entries, underflows, &i);
+ hook_entries, underflows, valid_hooks, &i);
duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
if (ret != 0)
return ret;
@@ -1760,7 +1778,8 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
return ret;
}
-struct xt_table *arpt_register_table(struct net *net, struct xt_table *table,
+struct xt_table *arpt_register_table(struct net *net,
+ const struct xt_table *table,
const struct arpt_replace *repl)
{
int ret;
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 6ecfdae7c58..97337601827 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -15,7 +15,7 @@ MODULE_DESCRIPTION("arptables filter table");
#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
(1 << NF_ARP_FORWARD))
-static struct
+static const struct
{
struct arpt_replace repl;
struct arpt_standard entries[3];
@@ -45,7 +45,7 @@ static struct
.term = ARPT_ERROR_INIT,
};
-static struct xt_table packet_filter = {
+static const struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
.me = THIS_MODULE,
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index fdefae6b5df..cde755d5eea 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -8,6 +8,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/cache.h>
#include <linux/capability.h>
#include <linux/skbuff.h>
@@ -190,16 +191,11 @@ get_entry(void *base, unsigned int offset)
/* All zeroes == unconditional rule. */
/* Mildly perf critical (only if packet tracing is on) */
-static inline int
-unconditional(const struct ipt_ip *ip)
+static inline bool unconditional(const struct ipt_ip *ip)
{
- unsigned int i;
-
- for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
- if (((__u32 *)ip)[i])
- return 0;
+ static const struct ipt_ip uncond;
- return 1;
+ return memcmp(ip, &uncond, sizeof(uncond)) == 0;
#undef FWINV
}
@@ -315,7 +311,6 @@ ipt_do_table(struct sk_buff *skb,
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
const struct iphdr *ip;
- u_int16_t datalen;
bool hotdrop = false;
/* Initializing verdict to NF_DROP keeps gcc happy. */
unsigned int verdict = NF_DROP;
@@ -328,7 +323,6 @@ ipt_do_table(struct sk_buff *skb,
/* Initialization */
ip = ip_hdr(skb);
- datalen = skb->len - ip->ihl * 4;
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;
/* We handle fragments by dealing with the first fragment as
@@ -427,8 +421,6 @@ ipt_do_table(struct sk_buff *skb,
#endif
/* Target might have changed stuff. */
ip = ip_hdr(skb);
- datalen = skb->len - ip->ihl * 4;
-
if (verdict == IPT_CONTINUE)
e = ipt_next_entry(e);
else
@@ -716,6 +708,21 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
return ret;
}
+static bool check_underflow(struct ipt_entry *e)
+{
+ const struct ipt_entry_target *t;
+ unsigned int verdict;
+
+ if (!unconditional(&e->ip))
+ return false;
+ t = ipt_get_target(e);
+ if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
+ return false;
+ verdict = ((struct ipt_standard_target *)t)->verdict;
+ verdict = -verdict - 1;
+ return verdict == NF_DROP || verdict == NF_ACCEPT;
+}
+
static int
check_entry_size_and_hooks(struct ipt_entry *e,
struct xt_table_info *newinfo,
@@ -723,6 +730,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
unsigned char *limit,
const unsigned int *hook_entries,
const unsigned int *underflows,
+ unsigned int valid_hooks,
unsigned int *i)
{
unsigned int h;
@@ -742,15 +750,21 @@ check_entry_size_and_hooks(struct ipt_entry *e,
/* Check hooks & underflows */
for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+ if (!(valid_hooks & (1 << h)))
+ continue;
if ((unsigned char *)e - base == hook_entries[h])
newinfo->hook_entry[h] = hook_entries[h];
- if ((unsigned char *)e - base == underflows[h])
+ if ((unsigned char *)e - base == underflows[h]) {
+ if (!check_underflow(e)) {
+ pr_err("Underflows must be unconditional and "
+ "use the STANDARD target with "
+ "ACCEPT/DROP\n");
+ return -EINVAL;
+ }
newinfo->underflow[h] = underflows[h];
+ }
}
- /* FIXME: underflows must be unconditional, standard verdicts
- < 0 (not IPT_RETURN). --RR */
-
/* Clear counters and comefrom */
e->counters = ((struct xt_counters) { 0, 0 });
e->comefrom = 0;
@@ -813,7 +827,7 @@ translate_table(const char *name,
newinfo,
entry0,
entry0 + size,
- hook_entries, underflows, &i);
+ hook_entries, underflows, valid_hooks, &i);
if (ret != 0)
return ret;
@@ -2051,7 +2065,8 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
return ret;
}
-struct xt_table *ipt_register_table(struct net *net, struct xt_table *table,
+struct xt_table *ipt_register_table(struct net *net,
+ const struct xt_table *table,
const struct ipt_replace *repl)
{
int ret;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index c30a969724f..df566cbd68e 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -53,11 +53,11 @@ static struct
.term = IPT_ERROR_INIT, /* ERROR */
};
-static struct xt_table packet_filter = {
+static const struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
.me = THIS_MODULE,
- .af = AF_INET,
+ .af = NFPROTO_IPV4,
};
/* The work comes in here from netfilter.c. */
@@ -102,21 +102,21 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = {
{
.hook = ipt_local_in_hook,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_FILTER,
},
{
.hook = ipt_hook,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_FORWARD,
.priority = NF_IP_PRI_FILTER,
},
{
.hook = ipt_local_out_hook,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_FILTER,
},
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 4087614d951..036047f9b0f 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -28,7 +28,7 @@ MODULE_DESCRIPTION("iptables mangle table");
(1 << NF_INET_POST_ROUTING))
/* Ouch - five different hooks? Maybe this should be a config option..... -- BC */
-static struct
+static const struct
{
struct ipt_replace repl;
struct ipt_standard entries[5];
@@ -64,11 +64,11 @@ static struct
.term = IPT_ERROR_INIT, /* ERROR */
};
-static struct xt_table packet_mangler = {
+static const struct xt_table packet_mangler = {
.name = "mangle",
.valid_hooks = MANGLE_VALID_HOOKS,
.me = THIS_MODULE,
- .af = AF_INET,
+ .af = NFPROTO_IPV4,
};
/* The work comes in here from netfilter.c. */
@@ -162,35 +162,35 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = {
{
.hook = ipt_pre_routing_hook,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_MANGLE,
},
{
.hook = ipt_local_in_hook,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_MANGLE,
},
{
.hook = ipt_forward_hook,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_FORWARD,
.priority = NF_IP_PRI_MANGLE,
},
{
.hook = ipt_local_hook,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_MANGLE,
},
{
.hook = ipt_post_routing_hook,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_MANGLE,
},
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index e5356da1fb5..993edc23be0 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -9,7 +9,7 @@
#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
-static struct
+static const struct
{
struct ipt_replace repl;
struct ipt_standard entries[2];
@@ -36,11 +36,11 @@ static struct
.term = IPT_ERROR_INIT, /* ERROR */
};
-static struct xt_table packet_raw = {
+static const struct xt_table packet_raw = {
.name = "raw",
.valid_hooks = RAW_VALID_HOOKS,
.me = THIS_MODULE,
- .af = AF_INET,
+ .af = NFPROTO_IPV4,
};
/* The work comes in here from netfilter.c. */
@@ -74,14 +74,14 @@ ipt_local_hook(unsigned int hook,
static struct nf_hook_ops ipt_ops[] __read_mostly = {
{
.hook = ipt_hook,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_RAW,
.owner = THIS_MODULE,
},
{
.hook = ipt_local_hook,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_RAW,
.owner = THIS_MODULE,
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 29ab630f240..99eb76c65d2 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -27,7 +27,7 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules");
(1 << NF_INET_FORWARD) | \
(1 << NF_INET_LOCAL_OUT)
-static struct
+static const struct
{
struct ipt_replace repl;
struct ipt_standard entries[3];
@@ -57,11 +57,11 @@ static struct
.term = IPT_ERROR_INIT, /* ERROR */
};
-static struct xt_table security_table = {
+static const struct xt_table security_table = {
.name = "security",
.valid_hooks = SECURITY_VALID_HOOKS,
.me = THIS_MODULE,
- .af = AF_INET,
+ .af = NFPROTO_IPV4,
};
static unsigned int
@@ -105,21 +105,21 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = {
{
.hook = ipt_local_in_hook,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_SECURITY,
},
{
.hook = ipt_forward_hook,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_FORWARD,
.priority = NF_IP_PRI_SECURITY,
},
{
.hook = ipt_local_out_hook,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_SECURITY,
},
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 7d2ead7228a..aa95bb82ee6 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -26,6 +26,7 @@
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#include <net/netfilter/nf_log.h>
int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
struct nf_conn *ct,
@@ -113,8 +114,11 @@ static unsigned int ipv4_confirm(unsigned int hooknum,
ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
ct, ctinfo);
- if (ret != NF_ACCEPT)
+ if (ret != NF_ACCEPT) {
+ nf_log_packet(NFPROTO_IPV4, hooknum, skb, in, out, NULL,
+ "nf_ct_%s: dropping packet", helper->name);
return ret;
+ }
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
typeof(nf_nat_seq_adjust_hook) seq_adjust;
@@ -158,28 +162,28 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
{
.hook = ipv4_conntrack_in,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK,
},
{
.hook = ipv4_conntrack_local,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_CONNTRACK,
},
{
.hook = ipv4_confirm,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
{
.hook = ipv4_confirm,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
@@ -256,11 +260,11 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
tuple.dst.u3.ip = inet->daddr;
tuple.dst.u.tcp.port = inet->dport;
tuple.src.l3num = PF_INET;
- tuple.dst.protonum = IPPROTO_TCP;
+ tuple.dst.protonum = sk->sk_protocol;
- /* We only do TCP at the moment: is there a better way? */
- if (strcmp(sk->sk_prot->name, "TCP")) {
- pr_debug("SO_ORIGINAL_DST: Not a TCP socket\n");
+ /* We only do TCP and SCTP at the moment: is there a better way? */
+ if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) {
+ pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
return -ENOPROTOOPT;
}
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 3229e0a81ba..68afc6ecd34 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -212,7 +212,7 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple,
maxip = ntohl(range->max_ip);
j = jhash_2words((__force u32)tuple->src.u3.ip,
range->flags & IP_NAT_RANGE_PERSISTENT ?
- (__force u32)tuple->dst.u3.ip : 0, 0);
+ 0 : (__force u32)tuple->dst.u3.ip, 0);
j = ((u64)j * (maxip - minip + 1)) >> 32;
*var_ipp = htonl(minip + j);
}
@@ -620,7 +620,7 @@ static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
};
static int
-nfnetlink_parse_nat(struct nlattr *nat,
+nfnetlink_parse_nat(const struct nlattr *nat,
const struct nf_conn *ct, struct nf_nat_range *range)
{
struct nlattr *tb[CTA_NAT_MAX+1];
@@ -656,7 +656,7 @@ nfnetlink_parse_nat(struct nlattr *nat,
static int
nfnetlink_parse_nat_setup(struct nf_conn *ct,
enum nf_nat_manip_type manip,
- struct nlattr *attr)
+ const struct nlattr *attr)
{
struct nf_nat_range range;
@@ -671,7 +671,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
static int
nfnetlink_parse_nat_setup(struct nf_conn *ct,
enum nf_nat_manip_type manip,
- struct nlattr *attr)
+ const struct nlattr *attr)
{
return -EOPNOTSUPP;
}
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 6348a793936..9e81e0dfb4e 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -28,7 +28,7 @@
(1 << NF_INET_POST_ROUTING) | \
(1 << NF_INET_LOCAL_OUT))
-static struct
+static const struct
{
struct ipt_replace repl;
struct ipt_standard entries[3];
@@ -58,11 +58,11 @@ static struct
.term = IPT_ERROR_INIT, /* ERROR */
};
-static struct xt_table nat_table = {
+static const struct xt_table nat_table = {
.name = "nat",
.valid_hooks = NAT_VALID_HOOKS,
.me = THIS_MODULE,
- .af = AF_INET,
+ .af = NFPROTO_IPV4,
};
/* Source NAT */
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 5567bd0d075..5f41d017ddd 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -251,7 +251,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
{
.hook = nf_nat_in,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_NAT_DST,
},
@@ -259,7 +259,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
{
.hook = nf_nat_out,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_NAT_SRC,
},
@@ -267,7 +267,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
{
.hook = nf_nat_local_fn,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST,
},
@@ -275,7 +275,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
{
.hook = nf_nat_fn,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC,
},
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index ea50da0649f..a2e5fc0a15e 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -22,26 +22,11 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
+#include <linux/cache.h>
#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/inet.h>
#include <linux/netdevice.h>
-#include <linux/timer.h>
-#include <net/ip.h>
+#include <linux/spinlock.h>
#include <net/protocol.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/icmp.h>
-#include <net/udp.h>
-#include <net/ipip.h>
-#include <linux/igmp.h>
struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp;
static DEFINE_SPINLOCK(inet_proto_lock);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 2979f14bb18..ebb1e5848bc 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -375,7 +375,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
dst_output);
if (err > 0)
- err = inet->recverr ? net_xmit_errno(err) : 0;
+ err = net_xmit_errno(err);
if (err)
goto error;
out:
@@ -386,6 +386,8 @@ error_fault:
kfree_skb(skb);
error:
IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
+ if (err == -ENOBUFS && !inet->recverr)
+ err = 0;
return err;
}
@@ -576,8 +578,11 @@ back_from_confirm:
&ipc, &rt, msg->msg_flags);
if (err)
ip_flush_pending_frames(sk);
- else if (!(msg->msg_flags & MSG_MORE))
+ else if (!(msg->msg_flags & MSG_MORE)) {
err = ip_push_pending_frames(sk);
+ if (err == -ENOBUFS && !inet->recverr)
+ err = 0;
+ }
release_sock(sk);
}
done:
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 278f46f5011..91867d3e632 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1514,13 +1514,17 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
void ip_rt_send_redirect(struct sk_buff *skb)
{
struct rtable *rt = skb_rtable(skb);
- struct in_device *in_dev = in_dev_get(rt->u.dst.dev);
+ struct in_device *in_dev;
+ int log_martians;
- if (!in_dev)
+ rcu_read_lock();
+ in_dev = __in_dev_get_rcu(rt->u.dst.dev);
+ if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
+ rcu_read_unlock();
return;
-
- if (!IN_DEV_TX_REDIRECTS(in_dev))
- goto out;
+ }
+ log_martians = IN_DEV_LOG_MARTIANS(in_dev);
+ rcu_read_unlock();
/* No redirected packets during ip_rt_redirect_silence;
* reset the algorithm.
@@ -1533,7 +1537,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
*/
if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) {
rt->u.dst.rate_last = jiffies;
- goto out;
+ return;
}
/* Check for load limit; set rate_last to the latest sent
@@ -1547,7 +1551,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
rt->u.dst.rate_last = jiffies;
++rt->u.dst.rate_tokens;
#ifdef CONFIG_IP_ROUTE_VERBOSE
- if (IN_DEV_LOG_MARTIANS(in_dev) &&
+ if (log_martians &&
rt->u.dst.rate_tokens == ip_rt_redirect_number &&
net_ratelimit())
printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
@@ -1555,8 +1559,6 @@ void ip_rt_send_redirect(struct sk_buff *skb)
&rt->rt_dst, &rt->rt_gateway);
#endif
}
-out:
- in_dev_put(in_dev);
}
static int ip_error(struct sk_buff *skb)
@@ -3442,7 +3444,7 @@ int __init ip_rt_init(void)
printk(KERN_ERR "Unable to create route proc files\n");
#ifdef CONFIG_XFRM
xfrm_init();
- xfrm4_init();
+ xfrm4_init(ip_rt_max_size);
#endif
rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 91145244ea6..edeea060db4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1839,7 +1839,7 @@ void tcp_close(struct sock *sk, long timeout)
/* Unread data was tossed, zap the connection. */
NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
tcp_set_state(sk, TCP_CLOSE);
- tcp_send_active_reset(sk, GFP_KERNEL);
+ tcp_send_active_reset(sk, sk->sk_allocation);
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, 0);
@@ -2336,13 +2336,13 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
val = !!(tp->nonagle&TCP_NAGLE_CORK);
break;
case TCP_KEEPIDLE:
- val = (tp->keepalive_time ? : sysctl_tcp_keepalive_time) / HZ;
+ val = keepalive_time_when(tp) / HZ;
break;
case TCP_KEEPINTVL:
- val = (tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl) / HZ;
+ val = keepalive_intvl_when(tp) / HZ;
break;
case TCP_KEEPCNT:
- val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
+ val = keepalive_probes(tp);
break;
case TCP_SYNCNT:
val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
@@ -2658,7 +2658,7 @@ void tcp_free_md5sig_pool(void)
EXPORT_SYMBOL(tcp_free_md5sig_pool);
-static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(void)
+static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(struct sock *sk)
{
int cpu;
struct tcp_md5sig_pool **pool;
@@ -2671,7 +2671,7 @@ static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(void)
struct tcp_md5sig_pool *p;
struct crypto_hash *hash;
- p = kzalloc(sizeof(*p), GFP_KERNEL);
+ p = kzalloc(sizeof(*p), sk->sk_allocation);
if (!p)
goto out_free;
*per_cpu_ptr(pool, cpu) = p;
@@ -2688,7 +2688,7 @@ out_free:
return NULL;
}
-struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(void)
+struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *sk)
{
struct tcp_md5sig_pool **pool;
int alloc = 0;
@@ -2709,7 +2709,7 @@ retry:
if (alloc) {
/* we cannot hold spinlock here because this may sleep. */
- struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool();
+ struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(sk);
spin_lock_bh(&tcp_md5sig_pool_lock);
if (!p) {
tcp_md5sig_users--;
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index e92beb9e55e..6428b342b16 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -116,7 +116,7 @@ int tcp_set_default_congestion_control(const char *name)
spin_lock(&tcp_cong_list_lock);
ca = tcp_ca_find(name);
#ifdef CONFIG_MODULES
- if (!ca && capable(CAP_SYS_MODULE)) {
+ if (!ca && capable(CAP_NET_ADMIN)) {
spin_unlock(&tcp_cong_list_lock);
request_module("tcp_%s", name);
@@ -246,7 +246,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
#ifdef CONFIG_MODULES
/* not found attempt to autoload module */
- if (!ca && capable(CAP_SYS_MODULE)) {
+ if (!ca && capable(CAP_NET_ADMIN)) {
rcu_read_unlock();
request_module("tcp_%s", name);
rcu_read_lock();
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2bdb0da237e..af6d6fa00db 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -685,7 +685,7 @@ static inline void tcp_set_rto(struct sock *sk)
* is invisible. Actually, Linux-2.4 also generates erratic
* ACKs in some circumstances.
*/
- inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar;
+ inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
/* 2. Fixups made earlier cannot be right.
* If we do not estimate RTO correctly without them,
@@ -696,8 +696,7 @@ static inline void tcp_set_rto(struct sock *sk)
/* NOTE: clamping at TCP_RTO_MIN is not required, current algo
* guarantees that rto is higher.
*/
- if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
- inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
+ tcp_bound_rto(sk);
}
/* Save metrics learned by this TCP session.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 6d88219c5e2..0543561da99 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -328,26 +328,29 @@ static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
*
*/
-void tcp_v4_err(struct sk_buff *skb, u32 info)
+void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
{
- struct iphdr *iph = (struct iphdr *)skb->data;
- struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
+ struct iphdr *iph = (struct iphdr *)icmp_skb->data;
+ struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
+ struct inet_connection_sock *icsk;
struct tcp_sock *tp;
struct inet_sock *inet;
- const int type = icmp_hdr(skb)->type;
- const int code = icmp_hdr(skb)->code;
+ const int type = icmp_hdr(icmp_skb)->type;
+ const int code = icmp_hdr(icmp_skb)->code;
struct sock *sk;
+ struct sk_buff *skb;
__u32 seq;
+ __u32 remaining;
int err;
- struct net *net = dev_net(skb->dev);
+ struct net *net = dev_net(icmp_skb->dev);
- if (skb->len < (iph->ihl << 2) + 8) {
+ if (icmp_skb->len < (iph->ihl << 2) + 8) {
ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
return;
}
sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
- iph->saddr, th->source, inet_iif(skb));
+ iph->saddr, th->source, inet_iif(icmp_skb));
if (!sk) {
ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
return;
@@ -367,6 +370,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
if (sk->sk_state == TCP_CLOSE)
goto out;
+ icsk = inet_csk(sk);
tp = tcp_sk(sk);
seq = ntohl(th->seq);
if (sk->sk_state != TCP_LISTEN &&
@@ -393,6 +397,39 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
}
err = icmp_err_convert[code].errno;
+ /* check if icmp_skb allows revert of backoff
+ * (see draft-zimmermann-tcp-lcd) */
+ if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
+ break;
+ if (seq != tp->snd_una || !icsk->icsk_retransmits ||
+ !icsk->icsk_backoff)
+ break;
+
+ icsk->icsk_backoff--;
+ inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
+ icsk->icsk_backoff;
+ tcp_bound_rto(sk);
+
+ skb = tcp_write_queue_head(sk);
+ BUG_ON(!skb);
+
+ remaining = icsk->icsk_rto - min(icsk->icsk_rto,
+ tcp_time_stamp - TCP_SKB_CB(skb)->when);
+
+ if (remaining) {
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+ remaining, TCP_RTO_MAX);
+ } else if (sock_owned_by_user(sk)) {
+ /* RTO revert clocked out retransmission,
+ * but socket is locked. Will defer. */
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+ HZ/20, TCP_RTO_MAX);
+ } else {
+ /* RTO revert clocked out retransmission.
+ * Will retransmit now */
+ tcp_retransmit_timer(sk);
+ }
+
break;
case ICMP_TIME_EXCEEDED:
err = EHOSTUNREACH;
@@ -849,7 +886,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
}
sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
}
- if (tcp_alloc_md5sig_pool() == NULL) {
+ if (tcp_alloc_md5sig_pool(sk) == NULL) {
kfree(newkey);
return -ENOMEM;
}
@@ -970,8 +1007,9 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
if (!tcp_sk(sk)->md5sig_info) {
struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL);
+ struct tcp_md5sig_info *p;
+ p = kzalloc(sizeof(*p), sk->sk_allocation);
if (!p)
return -EINVAL;
@@ -979,7 +1017,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
}
- newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
+ newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
if (!newkey)
return -ENOMEM;
return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
@@ -1158,7 +1196,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = {
};
#ifdef CONFIG_TCP_MD5SIG
-static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
+static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
.md5_lookup = tcp_v4_reqsk_md5_lookup,
.calc_md5_hash = tcp_v4_md5_hash_skb,
};
@@ -1717,7 +1755,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
return 0;
}
-struct inet_connection_sock_af_ops ipv4_specific = {
+const struct inet_connection_sock_af_ops ipv4_specific = {
.queue_xmit = ip_queue_xmit,
.send_check = tcp_v4_send_check,
.rebuild_header = inet_sk_rebuild_header,
@@ -1737,7 +1775,7 @@ struct inet_connection_sock_af_ops ipv4_specific = {
};
#ifdef CONFIG_TCP_MD5SIG
-static struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
+static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
.md5_lookup = tcp_v4_md5_lookup,
.calc_md5_hash = tcp_v4_md5_hash_skb,
.md5_add = tcp_v4_md5_add_func,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f8d67ccc64f..e48c37d74d7 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -322,7 +322,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
if (key != NULL) {
memcpy(&tcptw->tw_md5_key, key->key, key->keylen);
tcptw->tw_md5_keylen = key->keylen;
- if (tcp_alloc_md5sig_pool() == NULL)
+ if (tcp_alloc_md5sig_pool(sk) == NULL)
BUG();
}
} while (0);
@@ -657,29 +657,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
if (child == NULL)
goto listen_overflow;
-#ifdef CONFIG_TCP_MD5SIG
- else {
- /* Copy over the MD5 key from the original socket */
- struct tcp_md5sig_key *key;
- struct tcp_sock *tp = tcp_sk(sk);
- key = tp->af_specific->md5_lookup(sk, child);
- if (key != NULL) {
- /*
- * We're using one, so create a matching key on the
- * newsk structure. If we fail to get memory then we
- * end up not copying the key across. Shucks.
- */
- char *newkey = kmemdup(key->key, key->keylen,
- GFP_ATOMIC);
- if (newkey) {
- if (!tcp_alloc_md5sig_pool())
- BUG();
- tp->af_specific->md5_add(child, child, newkey,
- key->keylen);
- }
- }
- }
-#endif
inet_csk_reqsk_queue_unlink(sk, req, prev);
inet_csk_reqsk_queue_removed(sk, req);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bd62712848f..5200aab0ca9 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -59,6 +59,7 @@ int sysctl_tcp_base_mss __read_mostly = 512;
/* By default, RFC2861 behavior. */
int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
+/* Account for new data that has been sent to the network. */
static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -142,6 +143,7 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
tp->snd_cwnd_used = 0;
}
+/* Congestion state accounting after a packet has been sent. */
static void tcp_event_data_sent(struct tcp_sock *tp,
struct sk_buff *skb, struct sock *sk)
{
@@ -161,6 +163,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
icsk->icsk_ack.pingpong = 1;
}
+/* Account for an ACK we sent. */
static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
{
tcp_dec_quickack_mode(sk, pkts);
@@ -276,6 +279,7 @@ static u16 tcp_select_window(struct sock *sk)
return new_win;
}
+/* Packet ECN state for a SYN-ACK */
static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
{
TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR;
@@ -283,6 +287,7 @@ static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE;
}
+/* Packet ECN state for a SYN. */
static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -301,6 +306,9 @@ TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th)
th->ece = 1;
}
+/* Set up ECN state for a packet on a ESTABLISHED socket that is about to
+ * be sent.
+ */
static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
int tcp_header_len)
{
@@ -362,7 +370,9 @@ struct tcp_out_options {
__u32 tsval, tsecr; /* need to include OPTION_TS */
};
-/* Beware: Something in the Internet is very sensitive to the ordering of
+/* Write previously computed TCP options to the packet.
+ *
+ * Beware: Something in the Internet is very sensitive to the ordering of
* TCP options, we learned this through the hard way, so be careful here.
* Luckily we can at least blame others for their non-compliance but from
* inter-operatibility perspective it seems that we're somewhat stuck with
@@ -445,6 +455,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
}
}
+/* Compute TCP options for SYN packets. This is not the final
+ * network wire format yet.
+ */
static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
struct tcp_out_options *opts,
struct tcp_md5sig_key **md5) {
@@ -493,6 +506,7 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
return size;
}
+/* Set up TCP options for SYN-ACKs. */
static unsigned tcp_synack_options(struct sock *sk,
struct request_sock *req,
unsigned mss, struct sk_buff *skb,
@@ -541,6 +555,9 @@ static unsigned tcp_synack_options(struct sock *sk,
return size;
}
+/* Compute TCP options for ESTABLISHED sockets. This is not the
+ * final wire format yet.
+ */
static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
struct tcp_out_options *opts,
struct tcp_md5sig_key **md5) {
@@ -705,7 +722,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
return net_xmit_eval(err);
}
-/* This routine just queue's the buffer
+/* This routine just queues the buffer for sending.
*
* NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
* otherwise socket can stall.
@@ -722,6 +739,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
sk_mem_charge(sk, skb->truesize);
}
+/* Initialize TSO segments for a packet. */
static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb,
unsigned int mss_now)
{
@@ -909,6 +927,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
skb->len = skb->data_len;
}
+/* Remove acked data from a packet in the transmit queue. */
int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
{
if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
@@ -937,7 +956,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
return 0;
}
-/* Not accounting for SACKs here. */
+/* Calculate MSS. Not accounting for SACKs here. */
int tcp_mtu_to_mss(struct sock *sk, int pmtu)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -981,6 +1000,7 @@ int tcp_mss_to_mtu(struct sock *sk, int mss)
return mtu;
}
+/* MTU probing init per socket */
void tcp_mtup_init(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -1143,7 +1163,8 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
return 0;
}
-/* This must be invoked the first time we consider transmitting
+/* Intialize TSO state of a skb.
+ * This must be invoked the first time we consider transmitting
* SKB onto the wire.
*/
static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb,
@@ -1158,6 +1179,7 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb,
return tso_segs;
}
+/* Minshall's variant of the Nagle send check. */
static inline int tcp_minshall_check(const struct tcp_sock *tp)
{
return after(tp->snd_sml, tp->snd_una) &&
@@ -1242,6 +1264,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
return cwnd_quota;
}
+/* Test if sending is allowed right now. */
int tcp_may_send_now(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -1378,6 +1401,10 @@ send_now:
}
/* Create a new MTU probe if we are ready.
+ * MTU probe is regularly attempting to increase the path MTU by
+ * deliberately sending larger packets. This discovers routing
+ * changes resulting in larger path MTUs.
+ *
* Returns 0 if we should wait to probe (no cwnd available),
* 1 if a probe was sent,
* -1 otherwise
@@ -1790,6 +1817,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
sk_wmem_free_skb(sk, next_skb);
}
+/* Check if coalescing SKBs is legal. */
static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb)
{
if (tcp_skb_pcount(skb) > 1)
@@ -1808,6 +1836,9 @@ static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb)
return 1;
}
+/* Collapse packets in the retransmit queue to make to create
+ * less packets on the wire. This is only done on retransmission.
+ */
static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
int space)
{
@@ -1957,6 +1988,9 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
return err;
}
+/* Check if we forward retransmits are possible in the current
+ * window/congestion state.
+ */
static int tcp_can_forward_retransmit(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2101,7 +2135,8 @@ void tcp_send_fin(struct sock *sk)
} else {
/* Socket is locked, keep trying until memory is available. */
for (;;) {
- skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL);
+ skb = alloc_skb_fclone(MAX_TCP_HEADER,
+ sk->sk_allocation);
if (skb)
break;
yield();
@@ -2145,7 +2180,8 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
}
-/* WARNING: This routine must only be called when we have already sent
+/* Send a crossed SYN-ACK during socket establishment.
+ * WARNING: This routine must only be called when we have already sent
* a SYN packet that crossed the incoming SYN that caused this routine
* to get called. If this assumption fails then the initial rcv_wnd
* and rcv_wscale values will not be correct.
@@ -2180,9 +2216,7 @@ int tcp_send_synack(struct sock *sk)
return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
-/*
- * Prepare a SYN-ACK.
- */
+/* Prepare a SYN-ACK. */
struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
struct request_sock *req)
{
@@ -2269,9 +2303,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
return skb;
}
-/*
- * Do all connect socket setups that can be done AF independent.
- */
+/* Do all connect socket setups that can be done AF independent. */
static void tcp_connect_init(struct sock *sk)
{
struct dst_entry *dst = __sk_dst_get(sk);
@@ -2330,9 +2362,7 @@ static void tcp_connect_init(struct sock *sk)
tcp_clear_retrans(tp);
}
-/*
- * Build a SYN and send it off.
- */
+/* Build a SYN and send it off. */
int tcp_connect(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2359,7 +2389,7 @@ int tcp_connect(struct sock *sk)
sk->sk_wmem_queued += buff->truesize;
sk_mem_charge(sk, buff->truesize);
tp->packets_out += tcp_skb_pcount(buff);
- tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
+ tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
/* We change tp->snd_nxt after the tcp_transmit_skb() call
* in order to make this packet get counted in tcpOutSegs.
@@ -2493,6 +2523,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
}
+/* Initiate keepalive or window probe from timer. */
int tcp_write_wakeup(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b144a26359b..cdb2ca7684d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -137,13 +137,14 @@ static int tcp_write_timeout(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
int retry_until;
+ bool do_reset;
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
if (icsk->icsk_retransmits)
dst_negative_advice(&sk->sk_dst_cache);
retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
} else {
- if (icsk->icsk_retransmits >= sysctl_tcp_retries1) {
+ if (retransmits_timed_out(sk, sysctl_tcp_retries1)) {
/* Black hole detection */
tcp_mtu_probing(icsk, sk);
@@ -155,13 +156,15 @@ static int tcp_write_timeout(struct sock *sk)
const int alive = (icsk->icsk_rto < TCP_RTO_MAX);
retry_until = tcp_orphan_retries(sk, alive);
+ do_reset = alive ||
+ !retransmits_timed_out(sk, retry_until);
- if (tcp_out_of_resources(sk, alive || icsk->icsk_retransmits < retry_until))
+ if (tcp_out_of_resources(sk, do_reset))
return 1;
}
}
- if (icsk->icsk_retransmits >= retry_until) {
+ if (retransmits_timed_out(sk, retry_until)) {
/* Has it gone just too far? */
tcp_write_err(sk);
return 1;
@@ -279,7 +282,7 @@ static void tcp_probe_timer(struct sock *sk)
* The TCP retransmit timer.
*/
-static void tcp_retransmit_timer(struct sock *sk)
+void tcp_retransmit_timer(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -385,7 +388,7 @@ static void tcp_retransmit_timer(struct sock *sk)
out_reset_timer:
icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
- if (icsk->icsk_retransmits > sysctl_tcp_retries1)
+ if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1))
__sk_dst_reset(sk);
out:;
@@ -499,8 +502,7 @@ static void tcp_keepalive_timer (unsigned long data)
elapsed = tcp_time_stamp - tp->rcv_tstamp;
if (elapsed >= keepalive_time_when(tp)) {
- if ((!tp->keepalive_probes && icsk->icsk_probes_out >= sysctl_tcp_keepalive_probes) ||
- (tp->keepalive_probes && icsk->icsk_probes_out >= tp->keepalive_probes)) {
+ if (icsk->icsk_probes_out >= keepalive_probes(tp)) {
tcp_send_active_reset(sk, GFP_ATOMIC);
tcp_write_err(sk);
goto out;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 80e3812837a..ebaaa7f973d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -110,11 +110,12 @@ struct udp_table udp_table;
EXPORT_SYMBOL(udp_table);
int sysctl_udp_mem[3] __read_mostly;
-int sysctl_udp_rmem_min __read_mostly;
-int sysctl_udp_wmem_min __read_mostly;
-
EXPORT_SYMBOL(sysctl_udp_mem);
+
+int sysctl_udp_rmem_min __read_mostly;
EXPORT_SYMBOL(sysctl_udp_rmem_min);
+
+int sysctl_udp_wmem_min __read_mostly;
EXPORT_SYMBOL(sysctl_udp_wmem_min);
atomic_t udp_memory_allocated;
@@ -158,7 +159,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
*/
int udp_lib_get_port(struct sock *sk, unsigned short snum,
int (*saddr_comp)(const struct sock *sk1,
- const struct sock *sk2 ) )
+ const struct sock *sk2))
{
struct udp_hslot *hslot;
struct udp_table *udptable = sk->sk_prot->h.udp_table;
@@ -221,14 +222,15 @@ fail_unlock:
fail:
return error;
}
+EXPORT_SYMBOL(udp_lib_get_port);
static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
{
struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
- return ( !ipv6_only_sock(sk2) &&
- (!inet1->rcv_saddr || !inet2->rcv_saddr ||
- inet1->rcv_saddr == inet2->rcv_saddr ));
+ return (!ipv6_only_sock(sk2) &&
+ (!inet1->rcv_saddr || !inet2->rcv_saddr ||
+ inet1->rcv_saddr == inet2->rcv_saddr));
}
int udp_v4_get_port(struct sock *sk, unsigned short snum)
@@ -383,8 +385,8 @@ found:
void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
{
struct inet_sock *inet;
- struct iphdr *iph = (struct iphdr*)skb->data;
- struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
+ struct iphdr *iph = (struct iphdr *)skb->data;
+ struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2));
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
struct sock *sk;
@@ -439,7 +441,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
goto out;
} else {
- ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
+ ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1));
}
sk->sk_err = err;
sk->sk_error_report(sk);
@@ -474,7 +476,7 @@ EXPORT_SYMBOL(udp_flush_pending_frames);
* (checksum field must be zeroed out)
*/
static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
- __be32 src, __be32 dst, int len )
+ __be32 src, __be32 dst, int len)
{
unsigned int offset;
struct udphdr *uh = udp_hdr(skb);
@@ -545,7 +547,7 @@ static int udp_push_pending_frames(struct sock *sk)
} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
- udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len);
+ udp4_hwcsum_outgoing(sk, skb, fl->fl4_src, fl->fl4_dst, up->len);
goto send;
} else /* `normal' UDP */
@@ -553,18 +555,24 @@ static int udp_push_pending_frames(struct sock *sk)
/* add protocol-dependent pseudo-header */
uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
- sk->sk_protocol, csum );
+ sk->sk_protocol, csum);
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
send:
err = ip_push_pending_frames(sk);
+ if (err) {
+ if (err == -ENOBUFS && !inet->recverr) {
+ UDP_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_SNDBUFERRORS, is_udplite);
+ err = 0;
+ }
+ } else
+ UDP_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_OUTDATAGRAMS, is_udplite);
out:
up->len = 0;
up->pending = 0;
- if (!err)
- UDP_INC_STATS_USER(sock_net(sk),
- UDP_MIB_OUTDATAGRAMS, is_udplite);
return err;
}
@@ -592,7 +600,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
* Check the flags.
*/
- if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */
+ if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */
return -EOPNOTSUPP;
ipc.opt = NULL;
@@ -619,7 +627,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
* Get and verify the address.
*/
if (msg->msg_name) {
- struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
+ struct sockaddr_in * usin = (struct sockaddr_in *)msg->msg_name;
if (msg->msg_namelen < sizeof(*usin))
return -EINVAL;
if (usin->sin_family != AF_INET) {
@@ -684,7 +692,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
}
if (connected)
- rt = (struct rtable*)sk_dst_check(sk, 0);
+ rt = (struct rtable *)sk_dst_check(sk, 0);
if (rt == NULL) {
struct flowi fl = { .oif = ipc.oif,
@@ -782,6 +790,7 @@ do_confirm:
err = 0;
goto out;
}
+EXPORT_SYMBOL(udp_sendmsg);
int udp_sendpage(struct sock *sk, struct page *page, int offset,
size_t size, int flags)
@@ -871,6 +880,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
return 0;
}
+EXPORT_SYMBOL(udp_ioctl);
/*
* This should be easy, if there is something there we
@@ -892,7 +902,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
* Check any passed addresses
*/
if (addr_len)
- *addr_len=sizeof(*sin);
+ *addr_len = sizeof(*sin);
if (flags & MSG_ERRQUEUE)
return ip_recv_error(sk, msg, len);
@@ -923,9 +933,11 @@ try_again:
if (skb_csum_unnecessary(skb))
err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
- msg->msg_iov, copied );
+ msg->msg_iov, copied);
else {
- err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
+ err = skb_copy_and_csum_datagram_iovec(skb,
+ sizeof(struct udphdr),
+ msg->msg_iov);
if (err == -EINVAL)
goto csum_copy_err;
@@ -941,8 +953,7 @@ try_again:
sock_recv_timestamp(msg, sk, skb);
/* Copy the address. */
- if (sin)
- {
+ if (sin) {
sin->sin_family = AF_INET;
sin->sin_port = udp_hdr(skb)->source;
sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
@@ -995,6 +1006,7 @@ int udp_disconnect(struct sock *sk, int flags)
sk_dst_reset(sk);
return 0;
}
+EXPORT_SYMBOL(udp_disconnect);
void udp_lib_unhash(struct sock *sk)
{
@@ -1044,7 +1056,7 @@ drop:
* Note that in the success and error cases, the skb is assumed to
* have either been requeued or freed.
*/
-int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
+int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
struct udp_sock *up = udp_sk(sk);
int rc;
@@ -1214,7 +1226,7 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
if (uh->check == 0) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
} else if (skb->ip_summed == CHECKSUM_COMPLETE) {
- if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
+ if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
proto, skb->csum))
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
@@ -1355,7 +1367,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
int err = 0;
int is_udplite = IS_UDPLITE(sk);
- if (optlen<sizeof(int))
+ if (optlen < sizeof(int))
return -EINVAL;
if (get_user(val, (int __user *)optval))
@@ -1426,6 +1438,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
return err;
}
+EXPORT_SYMBOL(udp_lib_setsockopt);
int udp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, int optlen)
@@ -1453,7 +1466,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
struct udp_sock *up = udp_sk(sk);
int val, len;
- if (get_user(len,optlen))
+ if (get_user(len, optlen))
return -EFAULT;
len = min_t(unsigned int, len, sizeof(int));
@@ -1486,10 +1499,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
if (put_user(len, optlen))
return -EFAULT;
- if (copy_to_user(optval, &val,len))
+ if (copy_to_user(optval, &val, len))
return -EFAULT;
return 0;
}
+EXPORT_SYMBOL(udp_lib_getsockopt);
int udp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
@@ -1528,9 +1542,9 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
int is_lite = IS_UDPLITE(sk);
/* Check for false positives due to checksum errors */
- if ( (mask & POLLRDNORM) &&
- !(file->f_flags & O_NONBLOCK) &&
- !(sk->sk_shutdown & RCV_SHUTDOWN)){
+ if ((mask & POLLRDNORM) &&
+ !(file->f_flags & O_NONBLOCK) &&
+ !(sk->sk_shutdown & RCV_SHUTDOWN)) {
struct sk_buff_head *rcvq = &sk->sk_receive_queue;
struct sk_buff *skb;
@@ -1552,6 +1566,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
return mask;
}
+EXPORT_SYMBOL(udp_poll);
struct proto udp_prot = {
.name = "UDP",
@@ -1582,6 +1597,7 @@ struct proto udp_prot = {
.compat_getsockopt = compat_udp_getsockopt,
#endif
};
+EXPORT_SYMBOL(udp_prot);
/* ------------------------------------------------------------------------ */
#ifdef CONFIG_PROC_FS
@@ -1703,11 +1719,13 @@ int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
rc = -ENOMEM;
return rc;
}
+EXPORT_SYMBOL(udp_proc_register);
void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo)
{
proc_net_remove(net, afinfo->name);
}
+EXPORT_SYMBOL(udp_proc_unregister);
/* ------------------------------------------------------------------------ */
static void udp4_format_sock(struct sock *sp, struct seq_file *f,
@@ -1741,7 +1759,7 @@ int udp4_seq_show(struct seq_file *seq, void *v)
int len;
udp4_format_sock(v, seq, state->bucket, &len);
- seq_printf(seq, "%*s\n", 127 - len ,"");
+ seq_printf(seq, "%*s\n", 127 - len, "");
}
return 0;
}
@@ -1816,16 +1834,64 @@ void __init udp_init(void)
sysctl_udp_wmem_min = SK_MEM_QUANTUM;
}
-EXPORT_SYMBOL(udp_disconnect);
-EXPORT_SYMBOL(udp_ioctl);
-EXPORT_SYMBOL(udp_prot);
-EXPORT_SYMBOL(udp_sendmsg);
-EXPORT_SYMBOL(udp_lib_getsockopt);
-EXPORT_SYMBOL(udp_lib_setsockopt);
-EXPORT_SYMBOL(udp_poll);
-EXPORT_SYMBOL(udp_lib_get_port);
+int udp4_ufo_send_check(struct sk_buff *skb)
+{
+ const struct iphdr *iph;
+ struct udphdr *uh;
+
+ if (!pskb_may_pull(skb, sizeof(*uh)))
+ return -EINVAL;
+
+ iph = ip_hdr(skb);
+ uh = udp_hdr(skb);
+
+ uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
+ IPPROTO_UDP, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ return 0;
+}
+
+struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ unsigned int mss;
+ int offset;
+ __wsum csum;
+
+ mss = skb_shinfo(skb)->gso_size;
+ if (unlikely(skb->len <= mss))
+ goto out;
+
+ if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
+ /* Packet is from an untrusted source, reset gso_segs. */
+ int type = skb_shinfo(skb)->gso_type;
+
+ if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
+ !(type & (SKB_GSO_UDP))))
+ goto out;
+
+ skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
+
+ segs = NULL;
+ goto out;
+ }
+
+ /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
+ * do checksum of UDP packets sent as multiple IP fragments.
+ */
+ offset = skb->csum_start - skb_headroom(skb);
+ csum = skb_checksum(skb, offset, skb->len - offset, 0);
+ offset += skb->csum_offset;
+ *(__sum16 *)(skb->data + offset) = csum_fold(csum);
+ skb->ip_summed = CHECKSUM_NONE;
+
+ /* Fragment the skb. IP headers of the fragments are updated in
+ * inet_gso_segment()
+ */
+ segs = skb_segment(skb, features);
+out:
+ return segs;
+}
-#ifdef CONFIG_PROC_FS
-EXPORT_SYMBOL(udp_proc_register);
-EXPORT_SYMBOL(udp_proc_unregister);
-#endif
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 0071ee6f441..74fb2eb833e 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -264,6 +264,22 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
.fill_dst = xfrm4_fill_dst,
};
+#ifdef CONFIG_SYSCTL
+static struct ctl_table xfrm4_policy_table[] = {
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "xfrm4_gc_thresh",
+ .data = &xfrm4_dst_ops.gc_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ { }
+};
+
+static struct ctl_table_header *sysctl_hdr;
+#endif
+
static void __init xfrm4_policy_init(void)
{
xfrm_policy_register_afinfo(&xfrm4_policy_afinfo);
@@ -271,12 +287,31 @@ static void __init xfrm4_policy_init(void)
static void __exit xfrm4_policy_fini(void)
{
+#ifdef CONFIG_SYSCTL
+ if (sysctl_hdr)
+ unregister_net_sysctl_table(sysctl_hdr);
+#endif
xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo);
}
-void __init xfrm4_init(void)
+void __init xfrm4_init(int rt_max_size)
{
xfrm4_state_init();
xfrm4_policy_init();
+ /*
+ * Select a default value for the gc_thresh based on the main route
+ * table hash size. It seems to me the worst case scenario is when
+ * we have ipsec operating in transport mode, in which we create a
+ * dst_entry per socket. The xfrm gc algorithm starts trying to remove
+ * entries at gc_thresh, and prevents new allocations as 2*gc_thresh
+ * so lets set an initial xfrm gc_thresh value at the rt_max_size/2.
+ * That will let us store an ipsec connection per route table entry,
+ * and start cleaning when were 1/2 full
+ */
+ xfrm4_dst_ops.gc_thresh = rt_max_size/2;
+#ifdef CONFIG_SYSCTL
+ sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path,
+ xfrm4_policy_table);
+#endif
}