Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next

Pull networking changes from David Miller: 1) Get rid of the error prone NLA_PUT*() macros that used an embedded goto. 2) Kill off the token-ring and MCA networking drivers, from Paul Gortmaker. 3) Reduce high-order allocations made by datagram AF_UNIX sockets, from Eric Dumazet. 4) Add PTP hardware clock support to IGB and IXGBE, from Richard Cochran and Jacob Keller. 5) Allow users to query timestamping capabilities of a card via ethtool, from Richard Cochran. 6) Add loadbalance mode to the teaming driver, from Jiri Pirko. Part of this is that we can now have BPF filters not attached to sockets, and the loadbalancing function is calculated using one. 7) Francois Romieu went through the network drivers removing gratuitous uses of netdev->base_addr, perhaps some day we can remove it completely but it's used for ISA probing still. 8) Add a BPF JIT for sparc. I know, who cares, right? :-) 9) Move networking sysctl registry away from using the compatability mode interfaces in the sysctl code. From Eric W Biederman. 10) Pavel Emelyanov added a way to save and restore TCP socket state via TCP_REPAIR, TCP_REPAIR_QUEUE, and TCP_QUEUE_SEQ socket options as well as a way to forcefully bind a socket to a port via the sk->sk_reuse value SK_FORCE_REUSE. There is also a TCP_REPAIR_OPTIONS which allows to reinstante the TCP options enabled on the connection. 11) Several enhancements from Eric Dumazet that, in particular, can enhance splice performance on TCP sockets significantly. a) Reset the offset of the per-socket sendmsg page when we know we're the only use of the page in linear_to_page(). b) Add facilities such that skb->data can be backed a page rather than SLAB kmalloc'd memory. In particular devices which were receiving into linear RX buffers can now end up providing paged data. The big result is that code like splice and GRO do not have to copy any more. 12) Allow a pure sender to more gracefully handle ACK backlogs in TCP. What can happen at high rates is that the sender hasn't grown his receive buffer limits at all (he's not receiving data so really doesn't need to), but the non-data ACKs consume receive buffer space. sk_add_backlog() is too aggressive in dropping frames in this case, so relax it's requirements by using the receive buffer plus the send buffer limit as the backlog limit instead of just the former. Also from Eric Dumazet. 13) Add ipv6 support to L2TP, from Benjamin LaHaise, James Chapman, and Chris Elston. 14) Implement TCP early retransmit (RFC 5827), from Yuchung Cheng. Basically, we can start fast retransmit before hiting the dupack threshold under certain conditions. 15) New CODEL active queue management packet scheduler, from Eric Dumazet based upon initial work by Dave Taht. Basically, the big feature is that packets are dropped (or ECN bits are set) based upon how long packets live in the queue, rather than the queue length (which is what RED uses). * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1341 commits) drivers/net/stmmac: seq_file fix memory leak ipv6/exthdrs: strict Pad1 and PadN check USB: qmi_wwan: Add ZTE (Vodafone) K3520-Z USB: qmi_wwan: Add ZTE (Vodafone) K3765-Z USB: qmi_wwan: Make forced int 4 whitelist generic net/ipv4: replace simple_strtoul with kstrtoul net/ipv4/ipconfig: neaten __setup placement net: qmi_wwan: Add Vodafone/Huawei K5005 support net: cdc_ether: Add ZTE WWAN matches before generic Ethernet ipv6: use skb coalescing in reassembly ipv4: use skb coalescing in defragmentation net: introduce skb_try_coalesce() net:ipv6:fixed space issues relating to operators. net:ipv6:fixed a trailing white space issue. ipv6: disable GSO on sockets hitting dst_allfrag tg3: use netdev_alloc_frag() API net: napi_frags_skb() is static ppp: avoid false drop_monitor false positives ipv6: bool/const conversions phase2 ipx: Remove spurious NULL checking in ipx_ioctl(). ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2012-05-21 10:03:46 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-05-21 10:03:46 -0700
commit: cb62ab71fe2b16e8203a0f0a2ef4eda23d761338 (patch)
tree: 536ba39658e47d511a489c52f7aac60cd78967e5 /net/core/sock.c
parent: 31ed8e6f93a27304c9e157dab0267772cd94eaad (diff)
parent: 74863948f925d9f3bb4e3d3a783e49e9c662d839 (diff)
1 files changed, 44 insertions, 51 deletions
diff --git a/net/core/sock.c b/net/core/sock.c
index b2e14c07d92..5efcd6307fa 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -89,6 +89,8 @@
  *		2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/types.h>
@@ -113,6 +115,7 @@
 #include <linux/user_namespace.h>
 #include <linux/static_key.h>
 #include <linux/memcontrol.h>
+#include <linux/prefetch.h>
 
 #include <asm/uaccess.h>
 
@@ -258,7 +261,9 @@ static struct lock_class_key af_callback_keys[AF_MAX];
 
 /* Run time adjustable parameters. */
 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
+EXPORT_SYMBOL(sysctl_wmem_max);
 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
+EXPORT_SYMBOL(sysctl_rmem_max);
 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 
@@ -294,9 +299,8 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 		*timeo_p = 0;
 		if (warned < 10 && net_ratelimit()) {
 			warned++;
-			printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
-			       "tries to set negative timeout\n",
-				current->comm, task_pid_nr(current));
+			pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
+				__func__, current->comm, task_pid_nr(current));
 		}
 		return 0;
 	}
@@ -314,8 +318,8 @@ static void sock_warn_obsolete_bsdism(const char *name)
 	static char warncomm[TASK_COMM_LEN];
 	if (strcmp(warncomm, current->comm) && warned < 5) {
 		strcpy(warncomm,  current->comm);
-		printk(KERN_WARNING "process `%s' is using obsolete "
-		       "%s SO_BSDCOMPAT\n", warncomm, name);
+		pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
+			warncomm, name);
 		warned++;
 	}
 }
@@ -389,7 +393,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 
 	skb->dev = NULL;
 
-	if (sk_rcvqueues_full(sk, skb)) {
+	if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
 		atomic_inc(&sk->sk_drops);
 		goto discard_and_relse;
 	}
@@ -406,7 +410,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 		rc = sk_backlog_rcv(sk, skb);
 
 		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
-	} else if (sk_add_backlog(sk, skb)) {
+	} else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
 		bh_unlock_sock(sk);
 		atomic_inc(&sk->sk_drops);
 		goto discard_and_relse;
@@ -561,7 +565,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 			sock_valbool_flag(sk, SOCK_DBG, valbool);
 		break;
 	case SO_REUSEADDR:
-		sk->sk_reuse = valbool;
+		sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
 		break;
 	case SO_TYPE:
 	case SO_PROTOCOL:
@@ -577,23 +581,15 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 		break;
 	case SO_SNDBUF:
 		/* Don't error on this BSD doesn't and if you think
-		   about it this is right. Otherwise apps have to
-		   play 'guess the biggest size' games. RCVBUF/SNDBUF
-		   are treated in BSD as hints */
-
-		if (val > sysctl_wmem_max)
-			val = sysctl_wmem_max;
+		 * about it this is right. Otherwise apps have to
+		 * play 'guess the biggest size' games. RCVBUF/SNDBUF
+		 * are treated in BSD as hints
+		 */
+		val = min_t(u32, val, sysctl_wmem_max);
 set_sndbuf:
 		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
-		if ((val * 2) < SOCK_MIN_SNDBUF)
-			sk->sk_sndbuf = SOCK_MIN_SNDBUF;
-		else
-			sk->sk_sndbuf = val * 2;
-
-		/*
-		 *	Wake up sending tasks if we
-		 *	upped the value.
-		 */
+		sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF);
+		/* Wake up sending tasks if we upped the value. */
 		sk->sk_write_space(sk);
 		break;
 
@@ -606,12 +602,11 @@ set_sndbuf:
 
 	case SO_RCVBUF:
 		/* Don't error on this BSD doesn't and if you think
-		   about it this is right. Otherwise apps have to
-		   play 'guess the biggest size' games. RCVBUF/SNDBUF
-		   are treated in BSD as hints */
-
-		if (val > sysctl_rmem_max)
-			val = sysctl_rmem_max;
+		 * about it this is right. Otherwise apps have to
+		 * play 'guess the biggest size' games. RCVBUF/SNDBUF
+		 * are treated in BSD as hints
+		 */
+		val = min_t(u32, val, sysctl_rmem_max);
 set_rcvbuf:
 		sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
 		/*
@@ -629,10 +624,7 @@ set_rcvbuf:
 		 * returning the value we actually used in getsockopt
 		 * is the most desirable behavior.
 		 */
-		if ((val * 2) < SOCK_MIN_RCVBUF)
-			sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
-		else
-			sk->sk_rcvbuf = val * 2;
+		sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF);
 		break;
 
 	case SO_RCVBUFFORCE:
@@ -858,7 +850,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		break;
 
 	case SO_BROADCAST:
-		v.val = !!sock_flag(sk, SOCK_BROADCAST);
+		v.val = sock_flag(sk, SOCK_BROADCAST);
 		break;
 
 	case SO_SNDBUF:
@@ -874,7 +866,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		break;
 
 	case SO_KEEPALIVE:
-		v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
+		v.val = sock_flag(sk, SOCK_KEEPOPEN);
 		break;
 
 	case SO_TYPE:
@@ -896,7 +888,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		break;
 
 	case SO_OOBINLINE:
-		v.val = !!sock_flag(sk, SOCK_URGINLINE);
+		v.val = sock_flag(sk, SOCK_URGINLINE);
 		break;
 
 	case SO_NO_CHECK:
@@ -909,7 +901,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 
 	case SO_LINGER:
 		lv		= sizeof(v.ling);
-		v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
+		v.ling.l_onoff	= sock_flag(sk, SOCK_LINGER);
 		v.ling.l_linger	= sk->sk_lingertime / HZ;
 		break;
 
@@ -975,7 +967,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		break;
 
 	case SO_PASSCRED:
-		v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
+		v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
 		break;
 
 	case SO_PEERCRED:
@@ -1010,7 +1002,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		break;
 
 	case SO_PASSSEC:
-		v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
+		v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
 		break;
 
 	case SO_PEERSEC:
@@ -1021,11 +1013,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		break;
 
 	case SO_RXQ_OVFL:
-		v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
+		v.val = sock_flag(sk, SOCK_RXQ_OVFL);
 		break;
 
 	case SO_WIFI_STATUS:
-		v.val = !!sock_flag(sk, SOCK_WIFI_STATUS);
+		v.val = sock_flag(sk, SOCK_WIFI_STATUS);
 		break;
 
 	case SO_PEEK_OFF:
@@ -1035,7 +1027,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_peek_off;
 		break;
 	case SO_NOFCS:
-		v.val = !!sock_flag(sk, SOCK_NOFCS);
+		v.val = sock_flag(sk, SOCK_NOFCS);
 		break;
 	default:
 		return -ENOPROTOOPT;
@@ -1247,8 +1239,8 @@ static void __sk_free(struct sock *sk)
 	sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
 
 	if (atomic_read(&sk->sk_omem_alloc))
-		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
-		       __func__, atomic_read(&sk->sk_omem_alloc));
+		pr_debug("%s: optmem leakage (%d bytes) detected\n",
+			 __func__, atomic_read(&sk->sk_omem_alloc));
 
 	if (sk->sk_peer_cred)
 		put_cred(sk->sk_peer_cred);
@@ -1534,7 +1526,7 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
  */
 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
 {
-	if ((unsigned)size <= sysctl_optmem_max &&
+	if ((unsigned int)size <= sysctl_optmem_max &&
 	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
 		void *mem;
 		/* First do the add, to avoid the race if kmalloc
@@ -1712,6 +1704,7 @@ static void __release_sock(struct sock *sk)
 		do {
 			struct sk_buff *next = skb->next;
 
+			prefetch(next);
 			WARN_ON_ONCE(skb_dst_is_noref(skb));
 			skb->next = NULL;
 			sk_backlog_rcv(sk, skb);
@@ -2432,7 +2425,7 @@ static void assign_proto_idx(struct proto *prot)
 	prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
 
 	if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
-		printk(KERN_ERR "PROTO_INUSE_NR exhausted\n");
+		pr_err("PROTO_INUSE_NR exhausted\n");
 		return;
 	}
 
@@ -2462,8 +2455,8 @@ int proto_register(struct proto *prot, int alloc_slab)
 					NULL);
 
 		if (prot->slab == NULL) {
-			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
-			       prot->name);
+			pr_crit("%s: Can't create sock SLAB cache!\n",
+				prot->name);
 			goto out;
 		}
 
@@ -2477,8 +2470,8 @@ int proto_register(struct proto *prot, int alloc_slab)
 								 SLAB_HWCACHE_ALIGN, NULL);
 
 			if (prot->rsk_prot->slab == NULL) {
-				printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
-				       prot->name);
+				pr_crit("%s: Can't create request sock SLAB cache!\n",
+					prot->name);
 				goto out_free_request_sock_slab_name;
 			}
 		}
@@ -2576,7 +2569,7 @@ static char proto_method_implemented(const void *method)
 }
 static long sock_prot_memory_allocated(struct proto *proto)
 {
-	return proto->memory_allocated != NULL ? proto_memory_allocated(proto): -1L;
+	return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
 }
 
 static char *sock_prot_memory_pressure(struct proto *proto)
author	Linus Torvalds <torvalds@linux-foundation.org>	2012-05-21 10:03:46 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-05-21 10:03:46 -0700
commit	cb62ab71fe2b16e8203a0f0a2ef4eda23d761338 (patch)
tree	536ba39658e47d511a489c52f7aac60cd78967e5 /net/core/sock.c
parent	31ed8e6f93a27304c9e157dab0267772cd94eaad (diff)
parent	74863948f925d9f3bb4e3d3a783e49e9c662d839 (diff)