From 27bf69708394ca270f9ec53875aa7309c86dfc1d Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Wed, 16 Oct 2013 21:07:39 +0530 Subject: net: use DMA_COMPLETE for dma completion status Acked-by: David S. Miller Acked-by: Dan Williams Acked-by: Linus Walleij Signed-off-by: Vinod Koul --- net/ipv4/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6e5617b9f9d..d2652fb3232 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1429,7 +1429,7 @@ static void tcp_service_net_dma(struct sock *sk, bool wait) do { if (dma_async_is_tx_complete(tp->ucopy.dma_chan, last_issued, &done, - &used) == DMA_SUCCESS) { + &used) == DMA_COMPLETE) { /* Safe to free early-copied skbs now */ __skb_queue_purge(&sk->sk_async_wait_queue); break; @@ -1437,7 +1437,7 @@ static void tcp_service_net_dma(struct sock *sk, bool wait) struct sk_buff *skb; while ((skb = skb_peek(&sk->sk_async_wait_queue)) && (dma_async_is_complete(skb->dma_cookie, done, - used) == DMA_SUCCESS)) { + used) == DMA_COMPLETE)) { __skb_dequeue(&sk->sk_async_wait_queue); kfree_skb(skb); } -- cgit v1.2.3-70-g09d2 From 60c7a3c9c77239d5a7430c42a2c796881716cca1 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 2 Nov 2013 02:36:31 -0700 Subject: Bluetooth: Fix issue with RFCOMM getsockopt operation The commit 94a86df01082557e2de45865e538d7fb6c46231c seem to have uncovered a long standing bug that did not trigger so far. BUG: unable to handle kernel paging request at 00000009dd503502 IP: [] rfcomm_sock_getsockopt+0x128/0x200 PGD 0 Oops: 0000 [#1] SMP Modules linked in: ath5k ath mac80211 cfg80211 CPU: 2 PID: 1459 Comm: bluetoothd Not tainted 3.11.0-133163-gcebd830 #2 Hardware name: System manufacturer System Product Name/P6T DELUXE V2, BIOS 1202 12/22/2010 task: ffff8803304106a0 ti: ffff88033046a000 task.ti: ffff88033046a000 RIP: 0010:[] [] rfcomm_sock_getsockopt+0x128/0x200 RSP: 0018:ffff88033046bed8 EFLAGS: 00010246 RAX: 00000009dd503502 RBX: 0000000000000003 RCX: 00007fffa2ed5548 RDX: 0000000000000003 RSI: 0000000000000012 RDI: ffff88032fd37480 RBP: ffff88033046bf28 R08: 00007fffa2ed554c R09: ffff88032f5707d8 R10: 00007fffa2ed5548 R11: 0000000000000202 R12: ffff880330bbd000 R13: 00007fffa2ed5548 R14: 0000000000000003 R15: 00007fffa2ed554c FS: 00007fc44cfac700(0000) GS:ffff88033fc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000009dd503502 CR3: 00000003304c2000 CR4: 00000000000007e0 Stack: ffff88033046bf28 ffffffff815b0f2f ffff88033046bf18 0002ffff81105ef6 0000000600000000 ffff88032fd37480 0000000000000012 00007fffa2ed5548 0000000000000003 00007fffa2ed554c ffff88033046bf78 ffffffff814c0380 Call Trace: [] ? rfcomm_sock_setsockopt+0x5f/0x190 [] SyS_getsockopt+0x60/0xb0 [] system_call_fastpath+0x16/0x1b Code: 02 00 00 00 0f 47 d0 4c 89 ef e8 74 13 cd ff 83 f8 01 19 c9 f7 d1 83 e1 f2 e9 4b ff ff ff 0f 1f 44 00 00 49 8b 84 24 70 02 00 00 <4c> 8b 30 4c 89 c0 e8 2d 19 cd ff 85 c0 49 89 d7 b9 f2 ff ff ff RIP [] rfcomm_sock_getsockopt+0x128/0x200 RSP CR2: 00000009dd503502 It triggers in the following segment of the code: 0x1313 is in rfcomm_sock_getsockopt (net/bluetooth/rfcomm/sock.c:743). 738 739 static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) 740 { 741 struct sock *sk = sock->sk; 742 struct rfcomm_conninfo cinfo; 743 struct l2cap_conn *conn = l2cap_pi(sk)->chan->conn; 744 int len, err = 0; 745 u32 opt; 746 747 BT_DBG("sk %p", sk); The l2cap_pi(sk) is wrong here since it should have been rfcomm_pi(sk), but that socket of course does not contain the low-level connection details requested here. Tracking down the actual offending commit, it seems that this has been introduced when doing some L2CAP refactoring: commit 8c1d787be4b62d2d1b6f04953eca4bcf7c839d44 Author: Gustavo F. Padovan Date: Wed Apr 13 20:23:55 2011 -0300 @@ -743,6 +743,7 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u struct sock *sk = sock->sk; struct sock *l2cap_sk; struct rfcomm_conninfo cinfo; + struct l2cap_conn *conn = l2cap_pi(sk)->chan->conn; int len, err = 0; u32 opt; @@ -787,8 +788,8 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u l2cap_sk = rfcomm_pi(sk)->dlc->session->sock->sk; - cinfo.hci_handle = l2cap_pi(l2cap_sk)->conn->hcon->handle; - memcpy(cinfo.dev_class, l2cap_pi(l2cap_sk)->conn->hcon->dev_class, 3); + cinfo.hci_handle = conn->hcon->handle; + memcpy(cinfo.dev_class, conn->hcon->dev_class, 3); The l2cap_sk got accidentally mixed into the sk (which is RFCOMM) and now causing a problem within getsocketopt() system call. To fix this, just re-introduce l2cap_sk and make sure the right socket is used for the low-level connection details. Reported-by: Fabio Rossi Reported-by: Janusz Dziedzic Tested-by: Janusz Dziedzic Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/rfcomm/sock.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 30b3721dc6d..7096cfe3bd5 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -732,8 +732,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, c static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; + struct sock *l2cap_sk; + struct l2cap_conn *conn; struct rfcomm_conninfo cinfo; - struct l2cap_conn *conn = l2cap_pi(sk)->chan->conn; int len, err = 0; u32 opt; @@ -776,6 +777,9 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u break; } + l2cap_sk = rfcomm_pi(sk)->dlc->session->sock->sk; + conn = l2cap_pi(l2cap_sk)->chan->conn; + memset(&cinfo, 0, sizeof(cinfo)); cinfo.hci_handle = conn->hcon->handle; memcpy(cinfo.dev_class, conn->hcon->dev_class, 3); -- cgit v1.2.3-70-g09d2 From c507f138fc7c2025d75b65018810dc0561d0bdb9 Mon Sep 17 00:00:00 2001 From: Seung-Woo Kim Date: Tue, 5 Nov 2013 16:02:24 +0900 Subject: Bluetooth: Fix RFCOMM bind fail for L2CAP sock L2CAP socket bind checks its bdaddr type but RFCOMM kernel thread does not assign proper bdaddr type for L2CAP sock. This can cause that RFCOMM failure. Signed-off-by: Seung-Woo Kim Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index ca957d34b0c..292fa64a19d 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -694,6 +694,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, addr.l2_family = AF_BLUETOOTH; addr.l2_psm = 0; addr.l2_cid = 0; + addr.l2_bdaddr_type = BDADDR_BREDR; *err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); if (*err < 0) goto failed; @@ -1983,6 +1984,7 @@ static int rfcomm_add_listener(bdaddr_t *ba) addr.l2_family = AF_BLUETOOTH; addr.l2_psm = __constant_cpu_to_le16(RFCOMM_PSM); addr.l2_cid = 0; + addr.l2_bdaddr_type = BDADDR_BREDR; err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); if (err < 0) { BT_ERR("Bind failed %d", err); -- cgit v1.2.3-70-g09d2 From 8992da099332db896144465a01ad636f58bdebd9 Mon Sep 17 00:00:00 2001 From: Seung-Woo Kim Date: Tue, 5 Nov 2013 17:15:42 +0900 Subject: Bluetooth: Fix to set proper bdaddr_type for RFCOMM connect L2CAP socket validates proper bdaddr_type for connect, so this patch fixes to set explictly bdaddr_type for RFCOMM connect. Signed-off-by: Seung-Woo Kim Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 292fa64a19d..ce7521120d6 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -720,6 +720,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, addr.l2_family = AF_BLUETOOTH; addr.l2_psm = __constant_cpu_to_le16(RFCOMM_PSM); addr.l2_cid = 0; + addr.l2_bdaddr_type = BDADDR_BREDR; *err = kernel_connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK); if (*err == 0 || *err == -EINPROGRESS) return s; -- cgit v1.2.3-70-g09d2 From 31e8ce80baa7eaed9eec611deba982b24beed9e5 Mon Sep 17 00:00:00 2001 From: Seung-Woo Kim Date: Tue, 5 Nov 2013 18:46:33 +0900 Subject: Bluetooth: Fix crash in l2cap_chan_send after l2cap_chan_del Removing a bond and disconnecting from a specific remote device can cause l2cap_chan_send() is called after l2cap_chan_del() is called. This causes following crash. [ 1384.972086] Unable to handle kernel NULL pointer dereference at virtual address 00000008 [ 1384.972090] pgd = c0004000 [ 1384.972125] [00000008] *pgd=00000000 [ 1384.972137] Internal error: Oops: 17 [#1] PREEMPT SMP ARM [ 1384.972144] Modules linked in: [ 1384.972156] CPU: 0 PID: 841 Comm: krfcommd Not tainted 3.10.14-gdf22a71-dirty #435 [ 1384.972162] task: df29a100 ti: df178000 task.ti: df178000 [ 1384.972182] PC is at l2cap_create_basic_pdu+0x30/0x1ac [ 1384.972191] LR is at l2cap_chan_send+0x100/0x1d4 [ 1384.972198] pc : [] lr : [] psr: 40000113 [ 1384.972198] sp : df179d40 ip : c083a010 fp : 00000008 [ 1384.972202] r10: 00000004 r9 : 0000065a r8 : 000003f5 [ 1384.972206] r7 : 00000000 r6 : 00000000 r5 : df179e84 r4 : da557000 [ 1384.972210] r3 : 00000000 r2 : 00000004 r1 : df179e84 r0 : 00000000 [ 1384.972215] Flags: nZcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment kernel [ 1384.972220] Control: 10c53c7d Table: 5c8b004a DAC: 00000015 [ 1384.972224] Process krfcommd (pid: 841, stack limit = 0xdf178238) [ 1384.972229] Stack: (0xdf179d40 to 0xdf17a000) [ 1384.972238] 9d40: 00000000 da557000 00000004 df179e84 00000004 000003f5 0000065a 00000000 [ 1384.972245] 9d60: 00000008 c0521c78 df179e84 da557000 00000004 da557204 de0c6800 df179e84 [ 1384.972253] 9d80: da557000 00000004 da557204 c0526b7c 00000004 df724000 df179e84 00000004 [ 1384.972260] 9da0: df179db0 df29a100 c083bc48 c045481c 00000001 00000000 00000000 00000000 [ 1384.972267] 9dc0: 00000000 df29a100 00000000 00000000 00000000 00000000 df179e10 00000000 [ 1384.972274] 9de0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 1384.972281] 9e00: 00000000 00000000 00000000 00000000 df179e4c c000ec80 c0b538c0 00000004 [ 1384.972288] 9e20: df724000 df178000 00000000 df179e84 c0b538c0 00000000 df178000 c07f4570 [ 1384.972295] 9e40: dcad9c00 df179e74 c07f4394 df179e60 df178000 00000000 df179e84 de247010 [ 1384.972303] 9e60: 00000043 c0454dec 00000001 00000004 df315c00 c0530598 00000004 df315c0c [ 1384.972310] 9e80: ffffc32c 00000000 00000000 df179ea0 00000001 00000000 00000000 00000000 [ 1384.972317] 9ea0: df179ebc 00000004 df315c00 c05df838 00000000 c0530810 c07d08c0 d7017303 [ 1384.972325] 9ec0: 6ec245b9 00000000 df315c00 c0531b04 c07f3fe0 c07f4018 da67a300 df315c00 [ 1384.972332] 9ee0: 00000000 c05334e0 df315c00 df315b80 df315c00 de0c6800 da67a300 00000000 [ 1384.972339] 9f00: de0c684c c0533674 df204100 df315c00 df315c00 df204100 df315c00 c082b138 [ 1384.972347] 9f20: c053385c c0533754 a0000113 df178000 00000001 c083bc48 00000000 c053385c [ 1384.972354] 9f40: 00000000 00000000 00000000 c05338c4 00000000 df9f0000 df9f5ee4 df179f6c [ 1384.972360] 9f60: df178000 c0049db4 00000000 00000000 c07f3ff8 00000000 00000000 00000000 [ 1384.972368] 9f80: df179f80 df179f80 00000000 00000000 df179f90 df179f90 df9f5ee4 c0049cfc [ 1384.972374] 9fa0: 00000000 00000000 00000000 c000f168 00000000 00000000 00000000 00000000 [ 1384.972381] 9fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 1384.972388] 9fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00010000 00000600 [ 1384.972411] [] (l2cap_create_basic_pdu+0x30/0x1ac) from [] (l2cap_chan_send+0x100/0x1d4) [ 1384.972425] [] (l2cap_chan_send+0x100/0x1d4) from [] (l2cap_sock_sendmsg+0xa8/0x104) [ 1384.972440] [] (l2cap_sock_sendmsg+0xa8/0x104) from [] (sock_sendmsg+0xac/0xcc) [ 1384.972453] [] (sock_sendmsg+0xac/0xcc) from [] (kernel_sendmsg+0x2c/0x34) [ 1384.972469] [] (kernel_sendmsg+0x2c/0x34) from [] (rfcomm_send_frame+0x58/0x7c) [ 1384.972481] [] (rfcomm_send_frame+0x58/0x7c) from [] (rfcomm_send_ua+0x98/0xbc) [ 1384.972494] [] (rfcomm_send_ua+0x98/0xbc) from [] (rfcomm_recv_disc+0xac/0x100) [ 1384.972506] [] (rfcomm_recv_disc+0xac/0x100) from [] (rfcomm_recv_frame+0x144/0x264) [ 1384.972519] [] (rfcomm_recv_frame+0x144/0x264) from [] (rfcomm_process_rx+0x74/0xfc) [ 1384.972531] [] (rfcomm_process_rx+0x74/0xfc) from [] (rfcomm_process_sessions+0x58/0x160) [ 1384.972543] [] (rfcomm_process_sessions+0x58/0x160) from [] (rfcomm_run+0x68/0x110) [ 1384.972558] [] (rfcomm_run+0x68/0x110) from [] (kthread+0xb8/0xbc) [ 1384.972576] [] (kthread+0xb8/0xbc) from [] (ret_from_fork+0x14/0x2c) [ 1384.972586] Code: e3100004 e1a07003 e5946000 1a000057 (e5969008) [ 1384.972614] ---[ end trace 6170b7ce00144e8c ]--- Signed-off-by: Seung-Woo Kim Signed-off-by: Johan Hedberg --- net/bluetooth/l2cap_core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 63fa11109a1..11b5d097f60 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -2452,6 +2452,9 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, int err; struct sk_buff_head seg_queue; + if (!chan->conn) + return -ENOTCONN; + /* Connectionless channel */ if (chan->chan_type == L2CAP_CHAN_CONN_LESS) { skb = l2cap_create_connless_pdu(chan, msg, len, priority); -- cgit v1.2.3-70-g09d2 From 86ca9eac31d0d8c4fe61b5726e6d63197bc435a6 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 5 Nov 2013 11:30:39 +0200 Subject: Bluetooth: Fix rejecting SMP security request in slave role The SMP security request is for a slave role device to request the master role device to initiate a pairing request. If we receive this command while we're in the slave role we should reject it appropriately. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/smp.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index b5562abdd6e..ccad6c1e3ec 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -739,6 +739,9 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) BT_DBG("conn %p", conn); + if (!(conn->hcon->link_mode & HCI_LM_MASTER)) + return SMP_CMD_NOTSUPP; + hcon->pending_sec_level = authreq_to_seclevel(rp->auth_req); if (smp_ltk_encrypt(conn, hcon->pending_sec_level)) -- cgit v1.2.3-70-g09d2 From 4819224853dff325f0aabdb3dc527d768fa482e3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 6 Nov 2013 21:48:22 -0800 Subject: netfilter: fix connlimit Kconfig prompt string Under Core Netfilter Configuration, connlimit match support has an extra double quote at the end of it. Fixes a portion of kernel bugzilla #52671: https://bugzilla.kernel.org/show_bug.cgi?id=52671 Signed-off-by: Randy Dunlap Reported-by: lailavrazda1979@gmail.com Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 48acec17e27..c3398cd99b9 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -909,7 +909,7 @@ config NETFILTER_XT_MATCH_CONNLABEL connection simultaneously. config NETFILTER_XT_MATCH_CONNLIMIT - tristate '"connlimit" match support"' + tristate '"connlimit" match support' depends on NF_CONNTRACK depends on NETFILTER_ADVANCED ---help--- -- cgit v1.2.3-70-g09d2 From b26d4cd385fc51e8844e2cdf9ba2051f5bba11a5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Oct 2013 18:47:37 -0400 Subject: consolidate simple ->d_delete() instances Rename simple_delete_dentry() to always_delete_dentry() and export it. Export simple_dentry_operations, while we are at it, and get rid of their duplicates Signed-off-by: Al Viro --- arch/ia64/kernel/perfmon.c | 8 +------- fs/9p/vfs_dentry.c | 19 +------------------ fs/configfs/dir.c | 12 +----------- fs/efivarfs/super.c | 11 +---------- fs/hostfs/hostfs_kern.c | 11 +---------- fs/libfs.c | 12 +++++++----- fs/proc/generic.c | 18 +----------------- fs/proc/namespaces.c | 8 +------- include/linux/fs.h | 2 ++ kernel/cgroup.c | 7 +------ net/sunrpc/rpc_pipe.c | 11 +---------- 11 files changed, 18 insertions(+), 101 deletions(-) (limited to 'net') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 5a9ff1c3c3e..cb592773c78 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2166,12 +2166,6 @@ static const struct file_operations pfm_file_ops = { .flush = pfm_flush }; -static int -pfmfs_delete_dentry(const struct dentry *dentry) -{ - return 1; -} - static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen) { return dynamic_dname(dentry, buffer, buflen, "pfm:[%lu]", @@ -2179,7 +2173,7 @@ static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen) } static const struct dentry_operations pfmfs_dentry_operations = { - .d_delete = pfmfs_delete_dentry, + .d_delete = always_delete_dentry, .d_dname = pfmfs_dname, }; diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index f039b104a98..b03dd23feda 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -42,23 +42,6 @@ #include "v9fs_vfs.h" #include "fid.h" -/** - * v9fs_dentry_delete - called when dentry refcount equals 0 - * @dentry: dentry in question - * - * By returning 1 here we should remove cacheing of unused - * dentry components. - * - */ - -static int v9fs_dentry_delete(const struct dentry *dentry) -{ - p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n", - dentry->d_name.name, dentry); - - return 1; -} - /** * v9fs_cached_dentry_delete - called when dentry refcount equals 0 * @dentry: dentry in question @@ -134,6 +117,6 @@ const struct dentry_operations v9fs_cached_dentry_operations = { }; const struct dentry_operations v9fs_dentry_operations = { - .d_delete = v9fs_dentry_delete, + .d_delete = always_delete_dentry, .d_release = v9fs_dentry_release, }; diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 277bd1be21f..4522e075577 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -66,19 +66,9 @@ static void configfs_d_iput(struct dentry * dentry, iput(inode); } -/* - * We _must_ delete our dentries on last dput, as the chain-to-parent - * behavior is required to clear the parents of default_groups. - */ -static int configfs_d_delete(const struct dentry *dentry) -{ - return 1; -} - const struct dentry_operations configfs_dentry_ops = { .d_iput = configfs_d_iput, - /* simple_delete_dentry() isn't exported */ - .d_delete = configfs_d_delete, + .d_delete = always_delete_dentry, }; #ifdef CONFIG_LOCKDEP diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index a8766b880c0..becc725a195 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -83,19 +83,10 @@ static int efivarfs_d_hash(const struct dentry *dentry, struct qstr *qstr) return 0; } -/* - * Retaining negative dentries for an in-memory filesystem just wastes - * memory and lookup time: arrange for them to be deleted immediately. - */ -static int efivarfs_delete_dentry(const struct dentry *dentry) -{ - return 1; -} - static struct dentry_operations efivarfs_d_ops = { .d_compare = efivarfs_d_compare, .d_hash = efivarfs_d_hash, - .d_delete = efivarfs_delete_dentry, + .d_delete = always_delete_dentry, }; static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 25437280a20..db23ce1bd90 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -33,15 +33,6 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode) #define FILE_HOSTFS_I(file) HOSTFS_I(file_inode(file)) -static int hostfs_d_delete(const struct dentry *dentry) -{ - return 1; -} - -static const struct dentry_operations hostfs_dentry_ops = { - .d_delete = hostfs_d_delete, -}; - /* Changed in hostfs_args before the kernel starts running */ static char *root_ino = ""; static int append = 0; @@ -925,7 +916,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) sb->s_blocksize_bits = 10; sb->s_magic = HOSTFS_SUPER_MAGIC; sb->s_op = &hostfs_sbops; - sb->s_d_op = &hostfs_dentry_ops; + sb->s_d_op = &simple_dentry_operations; sb->s_maxbytes = MAX_LFS_FILESIZE; /* NULL is printed as by sprintf: avoid that. */ diff --git a/fs/libfs.c b/fs/libfs.c index 5de06947ba5..a1844244246 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -47,10 +47,16 @@ EXPORT_SYMBOL(simple_statfs); * Retaining negative dentries for an in-memory filesystem just wastes * memory and lookup time: arrange for them to be deleted immediately. */ -static int simple_delete_dentry(const struct dentry *dentry) +int always_delete_dentry(const struct dentry *dentry) { return 1; } +EXPORT_SYMBOL(always_delete_dentry); + +const struct dentry_operations simple_dentry_operations = { + .d_delete = always_delete_dentry, +}; +EXPORT_SYMBOL(simple_dentry_operations); /* * Lookup the data. This is trivial - if the dentry didn't already @@ -58,10 +64,6 @@ static int simple_delete_dentry(const struct dentry *dentry) */ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - static const struct dentry_operations simple_dentry_operations = { - .d_delete = simple_delete_dentry, - }; - if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); if (!dentry->d_sb->s_d_op) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 737e15615b0..cca93b6fb9a 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -174,22 +174,6 @@ static const struct inode_operations proc_link_inode_operations = { .follow_link = proc_follow_link, }; -/* - * As some entries in /proc are volatile, we want to - * get rid of unused dentries. This could be made - * smarter: we could keep a "volatile" flag in the - * inode to indicate which ones to keep. - */ -static int proc_delete_dentry(const struct dentry * dentry) -{ - return 1; -} - -static const struct dentry_operations proc_dentry_operations = -{ - .d_delete = proc_delete_dentry, -}; - /* * Don't create negative dentries here, return -ENOENT by hand * instead. @@ -209,7 +193,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, inode = proc_get_inode(dir->i_sb, de); if (!inode) return ERR_PTR(-ENOMEM); - d_set_d_op(dentry, &proc_dentry_operations); + d_set_d_op(dentry, &simple_dentry_operations); d_add(dentry, inode); return NULL; } diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 49a7fff2e83..9ae46b87470 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -42,12 +42,6 @@ static const struct inode_operations ns_inode_operations = { .setattr = proc_setattr, }; -static int ns_delete_dentry(const struct dentry *dentry) -{ - /* Don't cache namespace inodes when not in use */ - return 1; -} - static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) { struct inode *inode = dentry->d_inode; @@ -59,7 +53,7 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) const struct dentry_operations ns_dentry_operations = { - .d_delete = ns_delete_dentry, + .d_delete = always_delete_dentry, .d_dname = ns_dname, }; diff --git a/include/linux/fs.h b/include/linux/fs.h index bf5d574ebdf..121f11f001c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2622,7 +2622,9 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping, extern int simple_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata); +extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); +extern const struct dentry_operations simple_dentry_operations; extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e0839bcd48c..4c62513fe19 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -895,11 +895,6 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) iput(inode); } -static int cgroup_delete(const struct dentry *d) -{ - return 1; -} - static void remove_dir(struct dentry *d) { struct dentry *parent = dget(d->d_parent); @@ -1486,7 +1481,7 @@ static int cgroup_get_rootdir(struct super_block *sb) { static const struct dentry_operations cgroup_dops = { .d_iput = cgroup_diput, - .d_delete = cgroup_delete, + .d_delete = always_delete_dentry, }; struct inode *inode = diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index d0d14a04dce..bf04b30a788 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -471,15 +471,6 @@ struct rpc_filelist { umode_t mode; }; -static int rpc_delete_dentry(const struct dentry *dentry) -{ - return 1; -} - -static const struct dentry_operations rpc_dentry_operations = { - .d_delete = rpc_delete_dentry, -}; - static struct inode * rpc_get_inode(struct super_block *sb, umode_t mode) { @@ -1266,7 +1257,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = RPCAUTH_GSSMAGIC; sb->s_op = &s_ops; - sb->s_d_op = &rpc_dentry_operations; + sb->s_d_op = &simple_dentry_operations; sb->s_time_gran = 1; inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO); -- cgit v1.2.3-70-g09d2 From a6441b7a39f18acb68c83cd738f1310881aa8a0b Mon Sep 17 00:00:00 2001 From: Martin Topholm Date: Thu, 14 Nov 2013 15:35:30 +0100 Subject: netfilter: synproxy: send mss option to backend When the synproxy_parse_options is called on the client ack the mss option will not be present. Consequently mss wont be included in the backend syn packet, which falls back to 536 bytes mss. Therefore XT_SYNPROXY_OPT_MSS is explicitly flagged when recovering mss value from cookie. Signed-off-by: Martin Topholm Reviewed-by: Jesper Dangaard Brouer Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_SYNPROXY.c | 1 + net/ipv6/netfilter/ip6t_SYNPROXY.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 01cffeaa008..f13bd91d9a5 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -244,6 +244,7 @@ synproxy_recv_client_ack(const struct synproxy_net *snet, this_cpu_inc(snet->stats->cookie_valid); opts->mss = mss; + opts->options |= XT_SYNPROXY_OPT_MSS; if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_check_timestamp_cookie(opts); diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index bf9f612c1bc..f78f41aca8e 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -259,6 +259,7 @@ synproxy_recv_client_ack(const struct synproxy_net *snet, this_cpu_inc(snet->stats->cookie_valid); opts->mss = mss; + opts->options |= XT_SYNPROXY_OPT_MSS; if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_check_timestamp_cookie(opts); -- cgit v1.2.3-70-g09d2 From c1898c4c295b735c05af4c09664993fd8f257c2b Mon Sep 17 00:00:00 2001 From: Martin Topholm Date: Thu, 14 Nov 2013 15:35:31 +0100 Subject: netfilter: synproxy: correct wscale option passing Timestamp are used to store additional syncookie parameters such as sack, ecn, and wscale. The wscale value we need to encode is the client's wscale, since we can't recover that later in the session. Next overwrite the wscale option so the later synproxy_send_client_synack will send the backend's wscale to the client. Signed-off-by: Martin Topholm Reviewed-by: Jesper Dangaard Brouer Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_synproxy_core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index cdf4567ba9b..9858e3e51a3 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -151,9 +151,10 @@ void synproxy_init_timestamp_cookie(const struct xt_synproxy_info *info, opts->tsecr = opts->tsval; opts->tsval = tcp_time_stamp & ~0x3f; - if (opts->options & XT_SYNPROXY_OPT_WSCALE) - opts->tsval |= info->wscale; - else + if (opts->options & XT_SYNPROXY_OPT_WSCALE) { + opts->tsval |= opts->wscale; + opts->wscale = info->wscale; + } else opts->tsval |= 0xf; if (opts->options & XT_SYNPROXY_OPT_SACK_PERM) -- cgit v1.2.3-70-g09d2 From 23dfe136e2bf8d9ea1095704c535368a9bc721da Mon Sep 17 00:00:00 2001 From: Phil Oester Date: Sat, 16 Nov 2013 20:37:46 -0800 Subject: netfilter: fix wrong byte order in nf_ct_seqadj_set internal information In commit 41d73ec053d2, sequence number adjustments were moved to a separate file. Unfortunately, the sequence numbers that are stored in the nf_ct_seqadj structure are expressed in host byte order. The necessary ntohl call was removed when the call to adjust_tcp_sequence was collapsed into nf_ct_seqadj_set. This broke the FTP NAT helper. Fix it by adding back the byte order conversions. Reported-by: Dawid Stawiarski Signed-off-by: Phil Oester Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_seqadj.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index 5f9bfd060de..17c1bcb182c 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -41,8 +41,8 @@ int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo, spin_lock_bh(&ct->lock); this_way = &seqadj->seq[dir]; if (this_way->offset_before == this_way->offset_after || - before(this_way->correction_pos, seq)) { - this_way->correction_pos = seq; + before(this_way->correction_pos, ntohl(seq))) { + this_way->correction_pos = ntohl(seq); this_way->offset_before = this_way->offset_after; this_way->offset_after += off; } -- cgit v1.2.3-70-g09d2 From 8691a9a3382f17fbf1ed808c956672c70369a2e0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 16 Nov 2013 22:16:47 +0100 Subject: netfilter: nft_compat: fix error path in nft_parse_compat() The patch 0ca743a55991: "netfilter: nf_tables: add compatibility layer for x_tables", leads to the following Smatch warning: "net/netfilter/nft_compat.c:140 nft_parse_compat() warn: signedness bug returning '(-34)'" This nft_parse_compat function returns error codes but the return type is u8 so the error codes are transformed into small positive values. The callers don't check the return. Reported-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index a82667c6472..da0c1f4ada1 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -128,7 +128,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] [NFTA_RULE_COMPAT_FLAGS] = { .type = NLA_U32 }, }; -static u8 nft_parse_compat(const struct nlattr *attr, bool *inv) +static int nft_parse_compat(const struct nlattr *attr, u8 *proto, bool *inv) { struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1]; u32 flags; @@ -148,7 +148,8 @@ static u8 nft_parse_compat(const struct nlattr *attr, bool *inv) if (flags & NFT_RULE_COMPAT_F_INV) *inv = true; - return ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); + *proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); + return 0; } static int @@ -166,8 +167,11 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info); - if (ctx->nla[NFTA_RULE_COMPAT]) - proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv); + if (ctx->nla[NFTA_RULE_COMPAT]) { + ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv); + if (ret < 0) + goto err; + } nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); @@ -356,8 +360,11 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info); - if (ctx->nla[NFTA_RULE_COMPAT]) - proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv); + if (ctx->nla[NFTA_RULE_COMPAT]) { + ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv); + if (ret < 0) + goto err; + } nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); -- cgit v1.2.3-70-g09d2 From 0c3c6c00c69649f4749642b3e5d82125fde1600c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 18 Nov 2013 12:53:59 +0100 Subject: netfilter: nf_conntrack: decrement global counter after object release nf_conntrack_free() decrements our counter (net->ct.count) before releasing the conntrack object. That counter is used in the nf_conntrack_cleanup_net_list path to check if it's time to kmem_cache_destroy our cache of conntrack objects. I think we have a race there that should be easier to trigger (although still hard) with CONFIG_DEBUG_OBJECTS_FREE as object releases become slowier according to the following splat: [ 1136.321305] WARNING: CPU: 2 PID: 2483 at lib/debugobjects.c:260 debug_print_object+0x83/0xa0() [ 1136.321311] ODEBUG: free active (active state 0) object type: timer_list hint: delayed_work_timer_fn+0x0/0x20 ... [ 1136.321390] Call Trace: [ 1136.321398] [] dump_stack+0x45/0x56 [ 1136.321405] [] warn_slowpath_common+0x78/0xa0 [ 1136.321410] [] warn_slowpath_fmt+0x47/0x50 [ 1136.321414] [] debug_print_object+0x83/0xa0 [ 1136.321420] [] ? execute_in_process_context+0x90/0x90 [ 1136.321424] [] debug_check_no_obj_freed+0x20b/0x250 [ 1136.321429] [] ? kmem_cache_destroy+0x92/0x100 [ 1136.321433] [] kmem_cache_free+0x125/0x210 [ 1136.321436] [] kmem_cache_destroy+0x92/0x100 [ 1136.321443] [] nf_conntrack_cleanup_net_list+0x126/0x160 [nf_conntrack] [ 1136.321449] [] nf_conntrack_pernet_exit+0x6d/0x80 [nf_conntrack] [ 1136.321453] [] ops_exit_list.isra.3+0x53/0x60 [ 1136.321457] [] cleanup_net+0x100/0x1b0 [ 1136.321460] [] process_one_work+0x18e/0x430 [ 1136.321463] [] worker_thread+0x119/0x390 [ 1136.321467] [] ? manage_workers.isra.23+0x2a0/0x2a0 [ 1136.321470] [] kthread+0xbb/0xc0 [ 1136.321472] [] ? kthread_create_on_node+0x110/0x110 [ 1136.321477] [] ret_from_fork+0x7c/0xb0 [ 1136.321479] [] ? kthread_create_on_node+0x110/0x110 [ 1136.321481] ---[ end trace 25f53c192da70825 ]--- Reported-by: Linus Torvalds Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e22d950c60b..43549eb7a7b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -764,9 +764,10 @@ void nf_conntrack_free(struct nf_conn *ct) struct net *net = nf_ct_net(ct); nf_ct_ext_destroy(ct); - atomic_dec(&net->ct.count); nf_ct_ext_free(ct); kmem_cache_free(net->ct.nf_conntrack_cachep, ct); + smp_mb__before_atomic_dec(); + atomic_dec(&net->ct.count); } EXPORT_SYMBOL_GPL(nf_conntrack_free); -- cgit v1.2.3-70-g09d2 From acab78b99633f12aa2b697474562e19c5718a1ca Mon Sep 17 00:00:00 2001 From: Luís Fernando Cornachioni Estrozi Date: Wed, 6 Nov 2013 21:39:32 +0000 Subject: netfilter: ebt_ip6: fix source and destination matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This bug was introduced on commit 0898f99a2. This just recovers two checks that existed before as suggested by Bart De Schuymer. Signed-off-by: Luís Fernando Cornachioni Estrozi Signed-off-by: Bart De Schuymer Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_ip6.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c index 99c85668f55..17fd5f2cb4b 100644 --- a/net/bridge/netfilter/ebt_ip6.c +++ b/net/bridge/netfilter/ebt_ip6.c @@ -48,10 +48,12 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) if (info->bitmask & EBT_IP6_TCLASS && FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS)) return false; - if (FWINV(ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk, - &info->saddr), EBT_IP6_SOURCE) || + if ((info->bitmask & EBT_IP6_SOURCE && + FWINV(ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk, + &info->saddr), EBT_IP6_SOURCE)) || + (info->bitmask & EBT_IP6_DEST && FWINV(ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk, - &info->daddr), EBT_IP6_DEST)) + &info->daddr), EBT_IP6_DEST))) return false; if (info->bitmask & EBT_IP6_PROTO) { uint8_t nexthdr = ih6->nexthdr; -- cgit v1.2.3-70-g09d2 From dcdfdf56b4a6c9437fc37dbc9cee94a788f9b0c4 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 19 Nov 2013 19:12:34 -0800 Subject: ipv4: fix race in concurrent ip_route_input_slow() CPUs can ask for local route via ip_route_input_noref() concurrently. if nh_rth_input is not cached yet, CPUs will proceed to allocate equivalent DSTs on 'lo' and then will try to cache them in nh_rth_input via rt_cache_route() Most of the time they succeed, but on occasion the following two lines: orig = *p; prev = cmpxchg(p, orig, rt); in rt_cache_route() do race and one of the cpus fails to complete cmpxchg. But ip_route_input_slow() doesn't check the return code of rt_cache_route(), so dst is leaking. dst_destroy() is never called and 'lo' device refcnt doesn't go to zero, which can be seen in the logs as: unregister_netdevice: waiting for lo to become free. Usage count = 1 Adding mdelay() between above two lines makes it easily reproducible. Fix it similar to nh_pcpu_rth_output case. Fixes: d2d68ba9fe8b ("ipv4: Cache input routes in fib_info nexthops.") Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/ipv4/route.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f428935c50d..f8da2827801 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1776,8 +1776,12 @@ local_input: rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } - if (do_cache) - rt_cache_route(&FIB_RES_NH(res), rth); + if (do_cache) { + if (unlikely(!rt_cache_route(&FIB_RES_NH(res), rth))) { + rth->dst.flags |= DST_NOCACHE; + rt_add_uncached_list(rth); + } + } skb_dst_set(skb, &rth->dst); err = 0; goto out; -- cgit v1.2.3-70-g09d2 From d2615bf450694c1302d86b9cc8a8958edfe4c3a4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 19 Nov 2013 20:47:15 -0500 Subject: net: core: Always propagate flag changes to interfaces The following commit: b6c40d68ff6498b7f63ddf97cf0aa818d748dee7 net: only invoke dev->change_rx_flags when device is UP tried to fix a problem with VLAN devices and promiscuouse flag setting. The issue was that VLAN device was setting a flag on an interface that was down, thus resulting in bad promiscuity count. This commit blocked flag propagation to any device that is currently down. A later commit: deede2fabe24e00bd7e246eb81cd5767dc6fcfc7 vlan: Don't propagate flag changes on down interfaces fixed VLAN code to only propagate flags when the VLAN interface is up, thus fixing the same issue as above, only localized to VLAN. The problem we have now is that if we have create a complex stack involving multiple software devices like bridges, bonds, and vlans, then it is possible that the flags would not propagate properly to the physical devices. A simple examle of the scenario is the following: eth0----> bond0 ----> bridge0 ---> vlan50 If bond0 or eth0 happen to be down at the time bond0 is added to the bridge, then eth0 will never have promisc mode set which is currently required for operation as part of the bridge. As a result, packets with vlan50 will be dropped by the interface. The only 2 devices that implement the special flag handling are VLAN and DSA and they both have required code to prevent incorrect flag propagation. As a result we can remove the generic solution introduced in b6c40d68ff6498b7f63ddf97cf0aa818d748dee7 and leave it to the individual devices to decide whether they will block flag propagation or not. Reported-by: Stefan Priebe Suggested-by: Veaceslav Falico Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 7e00a7342ee..ba3b7ea5ebb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4996,7 +4996,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; - if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) + if (ops->ndo_change_rx_flags) ops->ndo_change_rx_flags(dev, flags); } -- cgit v1.2.3-70-g09d2 From f873042093c0b418d2351fe142222b625c740149 Mon Sep 17 00:00:00 2001 From: Ding Tianhong Date: Sat, 7 Dec 2013 22:12:05 +0800 Subject: bridge: flush br's address entry in fdb when remove the bridge dev When the following commands are executed: brctl addbr br0 ifconfig br0 hw ether rmmod bridge The calltrace will occur: [ 563.312114] device eth1 left promiscuous mode [ 563.312188] br0: port 1(eth1) entered disabled state [ 563.468190] kmem_cache_destroy bridge_fdb_cache: Slab cache still has objects [ 563.468197] CPU: 6 PID: 6982 Comm: rmmod Tainted: G O 3.12.0-0.7-default+ #9 [ 563.468199] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 563.468200] 0000000000000880 ffff88010f111e98 ffffffff814d1c92 ffff88010f111eb8 [ 563.468204] ffffffff81148efd ffff88010f111eb8 0000000000000000 ffff88010f111ec8 [ 563.468206] ffffffffa062a270 ffff88010f111ed8 ffffffffa063ac76 ffff88010f111f78 [ 563.468209] Call Trace: [ 563.468218] [] dump_stack+0x6a/0x78 [ 563.468234] [] kmem_cache_destroy+0xfd/0x100 [ 563.468242] [] br_fdb_fini+0x10/0x20 [bridge] [ 563.468247] [] br_deinit+0x4e/0x50 [bridge] [ 563.468254] [] SyS_delete_module+0x199/0x2b0 [ 563.468259] [] system_call_fastpath+0x16/0x1b [ 570.377958] Bridge firewalling registered --------------------------- cut here ------------------------------- The reason is that when the bridge dev's address is changed, the br_fdb_change_mac_address() will add new address in fdb, but when the bridge was removed, the address entry in the fdb did not free, the bridge_fdb_cache still has objects when destroy the cache, Fix this by flushing the bridge address entry when removing the bridge. v2: according to the Toshiaki Makita and Vlad's suggestion, I only delete the vlan0 entry, it still have a leak here if the vlan id is other number, so I need to call fdb_delete_by_port(br, NULL, 1) to flush all entries whose dst is NULL for the bridge. Suggested-by: Toshiaki Makita Suggested-by: Vlad Yasevich Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- net/bridge/br_if.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 6e6194fcd88..4bf02adb5dc 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -172,6 +172,8 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) del_nbp(p); } + br_fdb_delete_by_port(br, NULL, 1); + br_vlan_flush(br); del_timer_sync(&br->gc_timer); -- cgit v1.2.3-70-g09d2 From f3d3342602f8bcbf37d7c46641cb9bca7618eb1c Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 21 Nov 2013 03:14:22 +0100 Subject: net: rework recvmsg handler msg_name and msg_namelen logic This patch now always passes msg->msg_namelen as 0. recvmsg handlers must set msg_namelen to the proper size <= sizeof(struct sockaddr_storage) to return msg_name to the user. This prevents numerous uninitialized memory leaks we had in the recvmsg handlers and makes it harder for new code to accidentally leak uninitialized memory. Optimize for the case recvfrom is called with NULL as address. We don't need to copy the address at all, so set it to NULL before invoking the recvmsg handler. We can do so, because all the recvmsg handlers must cope with the case a plain read() is called on them. read() also sets msg_name to NULL. Also document these changes in include/linux/net.h as suggested by David Miller. Changes since RFC: Set msg->msg_name = NULL if user specified a NULL in msg_name but had a non-null msg_namelen in verify_iovec/verify_compat_iovec. This doesn't affect sendto as it would bail out earlier while trying to copy-in the address. It also more naturally reflects the logic by the callers of verify_iovec. With this change in place I could remove " if (!uaddr || msg_sys->msg_namelen == 0) msg->msg_name = NULL ". This change does not alter the user visible error logic as we ignore msg_namelen as long as msg_name is NULL. Also remove two unnecessary curly brackets in ___sys_recvmsg and change comments to netdev style. Cc: David Miller Suggested-by: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- crypto/algif_hash.c | 2 -- crypto/algif_skcipher.c | 1 - drivers/isdn/mISDN/socket.c | 13 ++++--------- drivers/net/ppp/pppoe.c | 2 -- include/linux/net.h | 8 ++++++++ net/appletalk/ddp.c | 16 +++++++--------- net/atm/common.c | 2 -- net/ax25/af_ax25.c | 4 ++-- net/bluetooth/af_bluetooth.c | 9 ++------- net/bluetooth/hci_sock.c | 2 -- net/bluetooth/rfcomm/sock.c | 1 - net/bluetooth/sco.c | 1 - net/caif/caif_socket.c | 4 ---- net/compat.c | 3 ++- net/core/iovec.c | 3 ++- net/ipx/af_ipx.c | 3 +-- net/irda/af_irda.c | 4 ---- net/iucv/af_iucv.c | 2 -- net/key/af_key.c | 1 - net/l2tp/l2tp_ppp.c | 2 -- net/llc/af_llc.c | 2 -- net/netlink/af_netlink.c | 2 -- net/netrom/af_netrom.c | 3 +-- net/nfc/llcp_sock.c | 2 -- net/nfc/rawsock.c | 2 -- net/packet/af_packet.c | 32 +++++++++++++++----------------- net/rds/recv.c | 2 -- net/rose/af_rose.c | 8 +++++--- net/rxrpc/ar-recvmsg.c | 9 ++++++--- net/socket.c | 19 +++++++++++-------- net/tipc/socket.c | 6 ------ net/unix/af_unix.c | 5 ----- net/vmw_vsock/af_vsock.c | 2 -- net/vmw_vsock/vmci_transport.c | 2 -- net/x25/af_x25.c | 3 +-- 35 files changed, 67 insertions(+), 115 deletions(-) (limited to 'net') diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 0262210cad3..ef5356cd280 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -161,8 +161,6 @@ static int hash_recvmsg(struct kiocb *unused, struct socket *sock, else if (len < ds) msg->msg_flags |= MSG_TRUNC; - msg->msg_namelen = 0; - lock_sock(sk); if (ctx->more) { ctx->more = 0; diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index a1c4f0a5558..6a6dfc062d2 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -432,7 +432,6 @@ static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock, long copied = 0; lock_sock(sk); - msg->msg_namelen = 0; for (iov = msg->msg_iov, iovlen = msg->msg_iovlen; iovlen > 0; iovlen--, iov++) { unsigned long seglen = iov->iov_len; diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index e47dcb9d1e9..5cefb479c70 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -117,7 +117,6 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock, { struct sk_buff *skb; struct sock *sk = sock->sk; - struct sockaddr_mISDN *maddr; int copied, err; @@ -135,9 +134,9 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) return err; - if (msg->msg_namelen >= sizeof(struct sockaddr_mISDN)) { - msg->msg_namelen = sizeof(struct sockaddr_mISDN); - maddr = (struct sockaddr_mISDN *)msg->msg_name; + if (msg->msg_name) { + struct sockaddr_mISDN *maddr = msg->msg_name; + maddr->family = AF_ISDN; maddr->dev = _pms(sk)->dev->id; if ((sk->sk_protocol == ISDN_P_LAPD_TE) || @@ -150,11 +149,7 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock, maddr->sapi = _pms(sk)->ch.addr & 0xFF; maddr->tei = (_pms(sk)->ch.addr >> 8) & 0xFF; } - } else { - if (msg->msg_namelen) - printk(KERN_WARNING "%s: too small namelen %d\n", - __func__, msg->msg_namelen); - msg->msg_namelen = 0; + msg->msg_namelen = sizeof(*maddr); } copied = skb->len + MISDN_HEADER_LEN; diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 5f66e30d982..82ee6ed954c 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -979,8 +979,6 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock, if (error < 0) goto end; - m->msg_namelen = 0; - if (skb) { total_len = min_t(size_t, total_len, skb->len); error = skb_copy_datagram_iovec(skb, 0, m->msg_iov, total_len); diff --git a/include/linux/net.h b/include/linux/net.h index b292a043557..4bcee94cef9 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -164,6 +164,14 @@ struct proto_ops { #endif int (*sendmsg) (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len); + /* Notes for implementing recvmsg: + * =============================== + * msg->msg_namelen should get updated by the recvmsg handlers + * iff msg_name != NULL. It is by default 0 to prevent + * returning uninitialized memory to user space. The recvfrom + * handlers can assume that msg.msg_name is either NULL or has + * a minimum size of sizeof(struct sockaddr_storage). + */ int (*recvmsg) (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len, int flags); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 7fee50d637f..7d424ac6e76 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1735,7 +1735,6 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr size_t size, int flags) { struct sock *sk = sock->sk; - struct sockaddr_at *sat = (struct sockaddr_at *)msg->msg_name; struct ddpehdr *ddp; int copied = 0; int offset = 0; @@ -1764,14 +1763,13 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr } err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copied); - if (!err) { - if (sat) { - sat->sat_family = AF_APPLETALK; - sat->sat_port = ddp->deh_sport; - sat->sat_addr.s_node = ddp->deh_snode; - sat->sat_addr.s_net = ddp->deh_snet; - } - msg->msg_namelen = sizeof(*sat); + if (!err && msg->msg_name) { + struct sockaddr_at *sat = msg->msg_name; + sat->sat_family = AF_APPLETALK; + sat->sat_port = ddp->deh_sport; + sat->sat_addr.s_node = ddp->deh_snode; + sat->sat_addr.s_net = ddp->deh_snet; + msg->msg_namelen = sizeof(*sat); } skb_free_datagram(sk, skb); /* Free the datagram. */ diff --git a/net/atm/common.c b/net/atm/common.c index 737bef59ce8..7b491006eaf 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -531,8 +531,6 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, struct sk_buff *skb; int copied, error = -EINVAL; - msg->msg_namelen = 0; - if (sock->state != SS_CONNECTED) return -ENOTCONN; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index a00123ebb0a..7bb1605bdfd 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1636,11 +1636,11 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock, skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - if (msg->msg_namelen != 0) { - struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name; + if (msg->msg_name) { ax25_digi digi; ax25_address src; const unsigned char *mac = skb_mac_header(skb); + struct sockaddr_ax25 *sax = msg->msg_name; memset(sax, 0, sizeof(struct full_sockaddr_ax25)); ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL, diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index f6a1671ea2f..56ca494621c 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -224,10 +224,9 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) { - if (sk->sk_shutdown & RCV_SHUTDOWN) { - msg->msg_namelen = 0; + if (sk->sk_shutdown & RCV_SHUTDOWN) return 0; - } + return err; } @@ -245,8 +244,6 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (bt_sk(sk)->skb_msg_name) bt_sk(sk)->skb_msg_name(skb, msg->msg_name, &msg->msg_namelen); - else - msg->msg_namelen = 0; } skb_free_datagram(sk, skb); @@ -295,8 +292,6 @@ int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags & MSG_OOB) return -EOPNOTSUPP; - msg->msg_namelen = 0; - BT_DBG("sk %p size %zu", sk, size); lock_sock(sk); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 71f0be17308..6a6c8bb4fd7 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -856,8 +856,6 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) return err; - msg->msg_namelen = 0; - copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index c4d3d423f89..c80766f892c 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -615,7 +615,6 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { rfcomm_dlc_accept(d); - msg->msg_namelen = 0; return 0; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 12a0e51e21e..24fa3964b3c 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -711,7 +711,6 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { sco_conn_defer_accept(pi->conn->hcon, pi->setting); sk->sk_state = BT_CONFIG; - msg->msg_namelen = 0; release_sock(sk); return 0; diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 05a41c7ec30..d6be3edb7a4 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -286,8 +286,6 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, if (m->msg_flags&MSG_OOB) goto read_error; - m->msg_namelen = 0; - skb = skb_recv_datagram(sk, flags, 0 , &ret); if (!skb) goto read_error; @@ -361,8 +359,6 @@ static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags&MSG_OOB) goto out; - msg->msg_namelen = 0; - /* * Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg diff --git a/net/compat.c b/net/compat.c index 89032580bd1..618c6a8a911 100644 --- a/net/compat.c +++ b/net/compat.c @@ -93,7 +93,8 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov, if (err < 0) return err; } - kern_msg->msg_name = kern_address; + if (kern_msg->msg_name) + kern_msg->msg_name = kern_address; } else kern_msg->msg_name = NULL; diff --git a/net/core/iovec.c b/net/core/iovec.c index 4cdb7c48dad..b61869429f4 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -48,7 +48,8 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a if (err < 0) return err; } - m->msg_name = address; + if (m->msg_name) + m->msg_name = address; } else { m->msg_name = NULL; } diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 7a1e0fc1bd4..e096025b477 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1823,8 +1823,6 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, if (skb->tstamp.tv64) sk->sk_stamp = skb->tstamp; - msg->msg_namelen = sizeof(*sipx); - if (sipx) { sipx->sipx_family = AF_IPX; sipx->sipx_port = ipx->ipx_source.sock; @@ -1832,6 +1830,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, sipx->sipx_network = IPX_SKB_CB(skb)->ipx_source_net; sipx->sipx_type = ipx->ipx_type; sipx->sipx_zero = 0; + msg->msg_namelen = sizeof(*sipx); } rc = copied; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 0f676908d15..de7db23049f 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1385,8 +1385,6 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock, IRDA_DEBUG(4, "%s()\n", __func__); - msg->msg_namelen = 0; - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); if (!skb) @@ -1451,8 +1449,6 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, noblock); - msg->msg_namelen = 0; - do { int chunk; struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 168aff5e60d..c4b7218058b 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1324,8 +1324,6 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, int err = 0; u32 offset; - msg->msg_namelen = 0; - if ((sk->sk_state == IUCV_DISCONN) && skb_queue_empty(&iucv->backlog_skb_q) && skb_queue_empty(&sk->sk_receive_queue) && diff --git a/net/key/af_key.c b/net/key/af_key.c index 911ef03bf8f..545f047868a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3616,7 +3616,6 @@ static int pfkey_recvmsg(struct kiocb *kiocb, if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) goto out; - msg->msg_namelen = 0; skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err); if (skb == NULL) goto out; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index ffda81ef1a7..be5fadf3473 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -197,8 +197,6 @@ static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock, if (sk->sk_state & PPPOX_BOUND) goto end; - msg->msg_namelen = 0; - err = 0; skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 6cba486353e..7b01b9f5846 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -720,8 +720,6 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, int target; /* Read at least this many bytes */ long timeo; - msg->msg_namelen = 0; - lock_sock(sk); copied = -ENOTCONN; if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN)) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f0176e1a5a8..bca50b95c18 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2335,8 +2335,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, } #endif - msg->msg_namelen = 0; - copied = data_skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 698814bfa7a..53c19a35fc6 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1179,10 +1179,9 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, sax->sax25_family = AF_NETROM; skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call, AX25_ADDR_LEN); + msg->msg_namelen = sizeof(*sax); } - msg->msg_namelen = sizeof(*sax); - skb_free_datagram(sk, skb); release_sock(sk); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index d308402b67d..824c6056bf8 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -807,8 +807,6 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, pr_debug("%p %zu\n", sk, len); - msg->msg_namelen = 0; - lock_sock(sk); if (sk->sk_state == LLCP_CLOSED && diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index cd958b381f9..66bcd2eb577 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -244,8 +244,6 @@ static int rawsock_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) return rc; - msg->msg_namelen = 0; - copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2e8286b47c2..61bd50adead 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2660,7 +2660,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; - struct sockaddr_ll *sll; int vnet_hdr_len = 0; err = -EINVAL; @@ -2744,22 +2743,10 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, goto out_free; } - /* - * If the address length field is there to be filled in, we fill - * it in now. - */ - - sll = &PACKET_SKB_CB(skb)->sa.ll; - if (sock->type == SOCK_PACKET) - msg->msg_namelen = sizeof(struct sockaddr_pkt); - else - msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); - - /* - * You lose any data beyond the buffer you gave. If it worries a - * user program they can ask the device for its MTU anyway. + /* You lose any data beyond the buffer you gave. If it worries + * a user program they can ask the device for its MTU + * anyway. */ - copied = skb->len; if (copied > len) { copied = len; @@ -2772,9 +2759,20 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, sock_recv_ts_and_drops(msg, sk, skb); - if (msg->msg_name) + if (msg->msg_name) { + /* If the address length field is there to be filled + * in, we fill it in now. + */ + if (sock->type == SOCK_PACKET) { + msg->msg_namelen = sizeof(struct sockaddr_pkt); + } else { + struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; + msg->msg_namelen = sll->sll_halen + + offsetof(struct sockaddr_ll, sll_addr); + } memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, msg->msg_namelen); + } if (pkt_sk(sk)->auxdata) { struct tpacket_auxdata aux; diff --git a/net/rds/recv.c b/net/rds/recv.c index 9f0f17cf6bf..de339b24ca1 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -410,8 +410,6 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo); - msg->msg_namelen = 0; - if (msg_flags & MSG_OOB) goto out; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index e98fcfbe600..33af77246bf 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1216,7 +1216,6 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); - struct sockaddr_rose *srose = (struct sockaddr_rose *)msg->msg_name; size_t copied; unsigned char *asmptr; struct sk_buff *skb; @@ -1252,8 +1251,11 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - if (srose != NULL) { - memset(srose, 0, msg->msg_namelen); + if (msg->msg_name) { + struct sockaddr_rose *srose; + + memset(msg->msg_name, 0, sizeof(struct full_sockaddr_rose)); + srose = msg->msg_name; srose->srose_family = AF_ROSE; srose->srose_addr = rose->dest_addr; srose->srose_call = rose->dest_call; diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 4b48687c389..898492a8d61 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -143,10 +143,13 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, /* copy the peer address and timestamp */ if (!continue_call) { - if (msg->msg_name && msg->msg_namelen > 0) + if (msg->msg_name) { + size_t len = + sizeof(call->conn->trans->peer->srx); memcpy(msg->msg_name, - &call->conn->trans->peer->srx, - sizeof(call->conn->trans->peer->srx)); + &call->conn->trans->peer->srx, len); + msg->msg_namelen = len; + } sock_recv_ts_and_drops(msg, &rx->sk, skb); } diff --git a/net/socket.c b/net/socket.c index c226aceee65..fc285564e49 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1840,8 +1840,10 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, msg.msg_iov = &iov; iov.iov_len = size; iov.iov_base = ubuf; - msg.msg_name = (struct sockaddr *)&address; - msg.msg_namelen = sizeof(address); + /* Save some cycles and don't copy the address if not needed */ + msg.msg_name = addr ? (struct sockaddr *)&address : NULL; + /* We assume all kernel code knows the size of sockaddr_storage */ + msg.msg_namelen = 0; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg, size, flags); @@ -2221,16 +2223,14 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, goto out; } - /* - * Save the user-mode address (verify_iovec will change the - * kernel msghdr to use the kernel address space) + /* Save the user-mode address (verify_iovec will change the + * kernel msghdr to use the kernel address space) */ - uaddr = (__force void __user *)msg_sys->msg_name; uaddr_len = COMPAT_NAMELEN(msg); - if (MSG_CMSG_COMPAT & flags) { + if (MSG_CMSG_COMPAT & flags) err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE); - } else + else err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE); if (err < 0) goto out_freeiov; @@ -2239,6 +2239,9 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, cmsg_ptr = (unsigned long)msg_sys->msg_control; msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); + /* We assume all kernel code knows the size of sockaddr_storage */ + msg_sys->msg_namelen = 0; + if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3906527259d..3b61851bb92 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -980,9 +980,6 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock, goto exit; } - /* will be updated in set_orig_addr() if needed */ - m->msg_namelen = 0; - timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); restart: @@ -1091,9 +1088,6 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, goto exit; } - /* will be updated in set_orig_addr() if needed */ - m->msg_namelen = 0; - target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c1f403bed68..01625ccc3ae 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1754,7 +1754,6 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk) { struct unix_sock *u = unix_sk(sk); - msg->msg_namelen = 0; if (u->addr) { msg->msg_namelen = u->addr->len; memcpy(msg->msg_name, u->addr->name, u->addr->len); @@ -1778,8 +1777,6 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags&MSG_OOB) goto out; - msg->msg_namelen = 0; - err = mutex_lock_interruptible(&u->readlock); if (err) { err = sock_intr_errno(sock_rcvtimeo(sk, noblock)); @@ -1924,8 +1921,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT); - msg->msg_namelen = 0; - /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg */ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 545c08b8a1d..5adfd94c5b8 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1662,8 +1662,6 @@ vsock_stream_recvmsg(struct kiocb *kiocb, vsk = vsock_sk(sk); err = 0; - msg->msg_namelen = 0; - lock_sock(sk); if (sk->sk_state != SS_CONNECTED) { diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 9d6986634e0..687360da62d 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1746,8 +1746,6 @@ static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, if (flags & MSG_OOB || flags & MSG_ERRQUEUE) return -EOPNOTSUPP; - msg->msg_namelen = 0; - /* Retrieve the head sk_buff from the socket's receive queue. */ err = 0; skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 45a3ab5612c..7622789d375 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1340,10 +1340,9 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, if (sx25) { sx25->sx25_family = AF_X25; sx25->sx25_addr = x25->dest_addr; + msg->msg_namelen = sizeof(*sx25); } - msg->msg_namelen = sizeof(struct sockaddr_x25); - x25_check_rbuf(sk); rc = copied; out_free_dgram: -- cgit v1.2.3-70-g09d2 From 68c6beb373955da0886d8f4f5995b3922ceda4be Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 21 Nov 2013 03:14:34 +0100 Subject: net: add BUG_ON if kernel advertises msg_namelen > sizeof(struct sockaddr_storage) In that case it is probable that kernel code overwrote part of the stack. So we should bail out loudly here. The BUG_ON may be removed in future if we are sure all protocols are conformant. Suggested-by: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index fc285564e49..0b18693f2be 100644 --- a/net/socket.c +++ b/net/socket.c @@ -221,12 +221,13 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen, int err; int len; + BUG_ON(klen > sizeof(struct sockaddr_storage)); err = get_user(len, ulen); if (err) return err; if (len > klen) len = klen; - if (len < 0 || len > sizeof(struct sockaddr_storage)) + if (len < 0) return -EINVAL; if (len) { if (audit_sockaddr(klen, kaddr)) -- cgit v1.2.3-70-g09d2 From aec6f90d412fe30b88fdea3d1880734c837e15d4 Mon Sep 17 00:00:00 2001 From: Michael Opdenacker Date: Thu, 21 Nov 2013 04:43:11 +0100 Subject: wimax: remove dead code This removes a code line that is between a "return 0;" and an error label. This code line can never be reached. Found by Coverity (CID: 1130529) Signed-off-by: Michael Opdenacker Acked-by: Johannes Berg Signed-off-by: David S. Miller --- net/wimax/stack.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/wimax/stack.c b/net/wimax/stack.c index ef2191b969a..ec8b577db13 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -610,7 +610,6 @@ int __init wimax_subsys_init(void) d_fnend(4, NULL, "() = 0\n"); return 0; - genl_unregister_family(&wimax_gnl_family); error_register_family: d_fnend(4, NULL, "() = %d\n", result); return result; -- cgit v1.2.3-70-g09d2 From e40526cb20b5ee53419452e1f03d97092f144418 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 21 Nov 2013 16:50:58 +0100 Subject: packet: fix use after free race in send path when dev is released Salam reported a use after free bug in PF_PACKET that occurs when we're sending out frames on a socket bound device and suddenly the net device is being unregistered. It appears that commit 827d9780 introduced a possible race condition between {t,}packet_snd() and packet_notifier(). In the case of a bound socket, packet_notifier() can drop the last reference to the net_device and {t,}packet_snd() might end up suddenly sending a packet over a freed net_device. To avoid reverting 827d9780 and thus introducing a performance regression compared to the current state of things, we decided to hold a cached RCU protected pointer to the net device and maintain it on write side via bind spin_lock protected register_prot_hook() and __unregister_prot_hook() calls. In {t,}packet_snd() path, we access this pointer under rcu_read_lock through packet_cached_dev_get() that holds reference to the device to prevent it from being freed through packet_notifier() while we're in send path. This is okay to do as dev_put()/dev_hold() are per-cpu counters, so this should not be a performance issue. Also, the code simplifies a bit as we don't need need_rls_dev anymore. Fixes: 827d978037d7 ("af-packet: Use existing netdev reference for bound sockets.") Reported-by: Salam Noureddine Signed-off-by: Daniel Borkmann Signed-off-by: Salam Noureddine Cc: Ben Greear Cc: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 59 ++++++++++++++++++++++++++++++-------------------- net/packet/internal.h | 1 + 2 files changed, 37 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 61bd50adead..ac27c86ef6d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -244,11 +244,15 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static void register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); + if (!po->running) { - if (po->fanout) + if (po->fanout) { __fanout_link(sk, po); - else + } else { dev_add_pack(&po->prot_hook); + rcu_assign_pointer(po->cached_dev, po->prot_hook.dev); + } + sock_hold(sk); po->running = 1; } @@ -266,10 +270,13 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) struct packet_sock *po = pkt_sk(sk); po->running = 0; - if (po->fanout) + if (po->fanout) { __fanout_unlink(sk, po); - else + } else { __dev_remove_pack(&po->prot_hook); + RCU_INIT_POINTER(po->cached_dev, NULL); + } + __sock_put(sk); if (sync) { @@ -2052,12 +2059,24 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, return tp_len; } +static struct net_device *packet_cached_dev_get(struct packet_sock *po) +{ + struct net_device *dev; + + rcu_read_lock(); + dev = rcu_dereference(po->cached_dev); + if (dev) + dev_hold(dev); + rcu_read_unlock(); + + return dev; +} + static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; int err, reserve = 0; void *ph; struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; @@ -2070,7 +2089,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) mutex_lock(&po->pg_vec_lock); if (saddr == NULL) { - dev = po->prot_hook.dev; + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -2084,19 +2103,17 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; if (unlikely(dev == NULL)) goto out; - - reserve = dev->hard_header_len; - err = -ENETDOWN; if (unlikely(!(dev->flags & IFF_UP))) goto out_put; + reserve = dev->hard_header_len; + size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); @@ -2173,8 +2190,7 @@ out_status: __packet_set_status(po, ph, status); kfree_skb(skb); out_put: - if (need_rls_dev) - dev_put(dev); + dev_put(dev); out: mutex_unlock(&po->pg_vec_lock); return err; @@ -2212,7 +2228,6 @@ static int packet_snd(struct socket *sock, struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; unsigned char *addr; int err, reserve = 0; struct virtio_net_hdr vnet_hdr = { 0 }; @@ -2228,7 +2243,7 @@ static int packet_snd(struct socket *sock, */ if (saddr == NULL) { - dev = po->prot_hook.dev; + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -2240,19 +2255,17 @@ static int packet_snd(struct socket *sock, proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; - if (dev == NULL) + if (unlikely(dev == NULL)) goto out_unlock; - if (sock->type == SOCK_RAW) - reserve = dev->hard_header_len; - err = -ENETDOWN; - if (!(dev->flags & IFF_UP)) + if (unlikely(!(dev->flags & IFF_UP))) goto out_unlock; + if (sock->type == SOCK_RAW) + reserve = dev->hard_header_len; if (po->has_vnet_hdr) { vnet_hdr_len = sizeof(vnet_hdr); @@ -2386,15 +2399,14 @@ static int packet_snd(struct socket *sock, if (err > 0 && (err = net_xmit_errno(err)) != 0) goto out_unlock; - if (need_rls_dev) - dev_put(dev); + dev_put(dev); return len; out_free: kfree_skb(skb); out_unlock: - if (dev && need_rls_dev) + if (dev) dev_put(dev); out: return err; @@ -2614,6 +2626,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po = pkt_sk(sk); sk->sk_family = PF_PACKET; po->num = proto; + RCU_INIT_POINTER(po->cached_dev, NULL); sk->sk_destruct = packet_sock_destruct; sk_refcnt_debug_inc(sk); diff --git a/net/packet/internal.h b/net/packet/internal.h index c4e4b456120..1035fa2d909 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -113,6 +113,7 @@ struct packet_sock { unsigned int tp_loss:1; unsigned int tp_tx_has_off:1; unsigned int tp_tstamp; + struct net_device __rcu *cached_dev; struct packet_type prot_hook ____cacheline_aligned_in_smp; }; -- cgit v1.2.3-70-g09d2 From 220815a9665f7deca98a09ecca655044f94cfa44 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 21 Nov 2013 18:17:04 +0100 Subject: genetlink: fix genlmsg_multicast() bug Unfortunately, I introduced a tremendously stupid bug into genlmsg_multicast() when doing all those multicast group changes: it adjusts the group number, but then passes it to genlmsg_multicast_netns() which does that again. Somehow, my tests failed to catch this, so add a warning into genlmsg_multicast_netns() and remove the offending group ID adjustment. Also add a warning to the similar code in other functions so people who misuse them are more loudly warned. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/net/genetlink.h | 5 +---- net/netlink/genetlink.c | 4 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index ace4abf118d..771af09e90e 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -265,7 +265,7 @@ static inline int genlmsg_multicast_netns(struct genl_family *family, struct net *net, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - if (group >= family->n_mcgrps) + if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; group = family->mcgrp_offset + group; return nlmsg_multicast(net->genl_sock, skb, portid, group, flags); @@ -283,9 +283,6 @@ static inline int genlmsg_multicast(struct genl_family *family, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - if (group >= family->n_mcgrps) - return -EINVAL; - group = family->mcgrp_offset + group; return genlmsg_multicast_netns(family, &init_net, skb, portid, group, flags); } diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 7dbc4f732c7..4518a57aa5f 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1045,7 +1045,7 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - if (group >= family->n_mcgrps) + if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; group = family->mcgrp_offset + group; return genlmsg_mcast(skb, portid, group, flags); @@ -1062,7 +1062,7 @@ void genl_notify(struct genl_family *family, if (nlh) report = nlmsg_report(nlh); - if (group >= family->n_mcgrps) + if (WARN_ON_ONCE(group >= family->n_mcgrps)) return; group = family->mcgrp_offset + group; nlmsg_notify(sk, skb, portid, group, report, flags); -- cgit v1.2.3-70-g09d2 From 9d8506cc2d7ea1f911c72c100193a3677f6668c3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 21 Nov 2013 11:10:04 -0800 Subject: gso: handle new frag_list of frags GRO packets Recently GRO started generating packets with frag_lists of frags. This was not handled by GSO, thus leading to a crash. Thankfully these packets are of a regular form and are easy to handle. This patch handles them in two ways. For completely non-linear frag_list entries, we simply continue to iterate over the frag_list frags once we exhaust the normal frags. For frag_list entries with linear parts, we call pskb_trim on the first part of the frag_list skb, and then process the rest of the frags in the usual way. This patch also kills a chunk of dead frag_list code that has obviously never ever been run since it ends up generating a bogus GSO-segmented packet with a frag_list entry. Future work is planned to split super big packets into TSO ones. Fixes: 8a29111c7ca6 ("net: gro: allow to build full sized skb") Reported-by: Christoph Paasch Reported-by: Jerry Chu Reported-by: Sander Eikelenboom Signed-off-by: Herbert Xu Signed-off-by: Eric Dumazet Tested-by: Sander Eikelenboom Tested-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 75 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 50 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8cec1e6b844..2718fed53d8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2796,6 +2796,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; struct sk_buff *fskb = skb_shinfo(skb)->frag_list; + skb_frag_t *skb_frag = skb_shinfo(skb)->frags; unsigned int mss = skb_shinfo(skb)->gso_size; unsigned int doffset = skb->data - skb_mac_header(skb); unsigned int offset = doffset; @@ -2835,16 +2836,38 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) if (hsize > len || !sg) hsize = len; - if (!hsize && i >= nfrags) { - BUG_ON(fskb->len != len); + if (!hsize && i >= nfrags && skb_headlen(fskb) && + (skb_headlen(fskb) == len || sg)) { + BUG_ON(skb_headlen(fskb) > len); + + i = 0; + nfrags = skb_shinfo(fskb)->nr_frags; + skb_frag = skb_shinfo(fskb)->frags; + pos += skb_headlen(fskb); + + while (pos < offset + len) { + BUG_ON(i >= nfrags); + + size = skb_frag_size(skb_frag); + if (pos + size > offset + len) + break; + + i++; + pos += size; + skb_frag++; + } - pos += len; nskb = skb_clone(fskb, GFP_ATOMIC); fskb = fskb->next; if (unlikely(!nskb)) goto err; + if (unlikely(pskb_trim(nskb, len))) { + kfree_skb(nskb); + goto err; + } + hsize = skb_end_offset(nskb); if (skb_cow_head(nskb, doffset + headroom)) { kfree_skb(nskb); @@ -2881,7 +2904,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) nskb->data - tnl_hlen, doffset + tnl_hlen); - if (fskb != skb_shinfo(skb)->frag_list) + if (nskb->len == len + doffset) goto perform_csum_check; if (!sg) { @@ -2899,8 +2922,28 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; - while (pos < offset + len && i < nfrags) { - *frag = skb_shinfo(skb)->frags[i]; + while (pos < offset + len) { + if (i >= nfrags) { + BUG_ON(skb_headlen(fskb)); + + i = 0; + nfrags = skb_shinfo(fskb)->nr_frags; + skb_frag = skb_shinfo(fskb)->frags; + + BUG_ON(!nfrags); + + fskb = fskb->next; + } + + if (unlikely(skb_shinfo(nskb)->nr_frags >= + MAX_SKB_FRAGS)) { + net_warn_ratelimited( + "skb_segment: too many frags: %u %u\n", + pos, mss); + goto err; + } + + *frag = *skb_frag; __skb_frag_ref(frag); size = skb_frag_size(frag); @@ -2913,6 +2956,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) if (pos + size <= offset + len) { i++; + skb_frag++; pos += size; } else { skb_frag_size_sub(frag, pos + size - (offset + len)); @@ -2922,25 +2966,6 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) frag++; } - if (pos < offset + len) { - struct sk_buff *fskb2 = fskb; - - BUG_ON(pos + fskb->len != offset + len); - - pos += fskb->len; - fskb = fskb->next; - - if (fskb2->next) { - fskb2 = skb_clone(fskb2, GFP_ATOMIC); - if (!fskb2) - goto err; - } else - skb_get(fskb2); - - SKB_FRAG_ASSERT(nskb); - skb_shinfo(nskb)->frag_list = fskb2; - } - skip_fraglist: nskb->data_len = len - hsize; nskb->len += nskb->data_len; -- cgit v1.2.3-70-g09d2 From 044c8d4b15743f6e0a4cb6f2aeb32745a116ebff Mon Sep 17 00:00:00 2001 From: Yuanhan Liu Date: Thu, 21 Nov 2013 14:32:01 -0800 Subject: kernel: remove CONFIG_USE_GENERIC_SMP_HELPERS cleanly Remove CONFIG_USE_GENERIC_SMP_HELPERS left by commit 0a06ff068f12 ("kernel: remove CONFIG_USE_GENERIC_SMP_HELPERS"). Signed-off-by: Yuanhan Liu Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/null_blk.c | 8 ++++---- net/Kconfig | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index b5d842370cc..ea192ec029c 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -223,7 +223,7 @@ static void null_softirq_done_fn(struct request *rq) blk_end_request_all(rq, 0); } -#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) +#ifdef CONFIG_SMP static void null_ipi_cmd_end_io(void *data) { @@ -260,7 +260,7 @@ static void null_cmd_end_ipi(struct nullb_cmd *cmd) put_cpu(); } -#endif /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */ +#endif /* CONFIG_SMP */ static inline void null_handle_cmd(struct nullb_cmd *cmd) { @@ -270,7 +270,7 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd) end_cmd(cmd); break; case NULL_IRQ_SOFTIRQ: -#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) +#ifdef CONFIG_SMP null_cmd_end_ipi(cmd); #else end_cmd(cmd); @@ -571,7 +571,7 @@ static int __init null_init(void) { unsigned int i; -#if !defined(CONFIG_SMP) || !defined(CONFIG_USE_GENERIC_SMP_HELPERS) +#if !defined(CONFIG_SMP) if (irqmode == NULL_IRQ_SOFTIRQ) { pr_warn("null_blk: softirq completions not available.\n"); pr_warn("null_blk: using direct completions.\n"); diff --git a/net/Kconfig b/net/Kconfig index 0715db64a5c..d334678c0bd 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -224,7 +224,7 @@ source "net/hsr/Kconfig" config RPS boolean - depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS + depends on SMP && SYSFS default y config RFS_ACCEL @@ -235,7 +235,7 @@ config RFS_ACCEL config XPS boolean - depends on SMP && USE_GENERIC_SMP_HELPERS + depends on SMP default y config NETPRIO_CGROUP -- cgit v1.2.3-70-g09d2 From d6c416148545bd99d3cc05e672460168245cc156 Mon Sep 17 00:00:00 2001 From: Chang Xiangzhong Date: Thu, 21 Nov 2013 22:56:28 +0100 Subject: net: sctp: find the correct highest_new_tsn in sack Function sctp_check_transmitted(transport t, ...) would iterate all of transport->transmitted queue and looking for the highest __newly__ acked tsn. The original algorithm would depend on the order of the assoc->transport_list (in function sctp_outq_sack line 1215 - 1226). The result might not be the expected due to the order of the tranport_list. Solution: checking if the exising is smaller than the new one before assigning Signed-off-by: Chang Xiangzhong Acked-by: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 94df7587786..abb6db008df 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1391,7 +1391,8 @@ static void sctp_check_transmitted(struct sctp_outq *q, */ if (!tchunk->tsn_gap_acked) { tchunk->tsn_gap_acked = 1; - *highest_new_tsn_in_sack = tsn; + if (TSN_lt(*highest_new_tsn_in_sack, tsn)) + *highest_new_tsn_in_sack = tsn; bytes_acked += sctp_data_size(tchunk); if (!tchunk->transport) migrate_bytes += sctp_data_size(tchunk); -- cgit v1.2.3-70-g09d2 From cc5c00bbb44c5d68b883aa5cb9d01514a2525d94 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 22 Nov 2013 10:31:29 +0800 Subject: gro: Only verify TCP checksums for candidates In some cases we may receive IP packets that are longer than their stated lengths. Such packets are never merged in GRO. However, we may end up computing their checksums incorrectly and end up allowing packets with a bogus checksum enter our stack with the checksum status set as verified. Since such packets are rare and not performance-critical, this patch simply skips the checksum verification for them. Reported-by: Alexander Duyck Signed-off-by: Herbert Xu Acked-by: Alexander Duyck Thanks, Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_offload.c | 5 +++++ net/ipv6/tcpv6_offload.c | 5 +++++ 2 files changed, 10 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index a2b68a108ea..55aeec93014 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -276,6 +276,10 @@ static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff * __wsum wsum; __sum16 sum; + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (NAPI_GRO_CB(skb)->flush) + goto skip_csum; + switch (skb->ip_summed) { case CHECKSUM_COMPLETE: if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, @@ -301,6 +305,7 @@ flush: break; } +skip_csum: return tcp_gro_receive(head, skb); } diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index c1097c79890..71923d14127 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -39,6 +39,10 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, __wsum wsum; __sum16 sum; + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (NAPI_GRO_CB(skb)->flush) + goto skip_csum; + switch (skb->ip_summed) { case CHECKSUM_COMPLETE: if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, @@ -65,6 +69,7 @@ flush: break; } +skip_csum: return tcp_gro_receive(head, skb); } -- cgit v1.2.3-70-g09d2 From b8ee93ba80b5a0b6c3c06b65c34dd1276f16c047 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 22 Nov 2013 10:32:11 +0800 Subject: gro: Clean up tcpX_gro_receive checksum verification This patch simplifies the checksum verification in tcpX_gro_receive by reusing the CHECKSUM_COMPLETE code for CHECKSUM_NONE. All it does for CHECKSUM_NONE is compute the partial checksum and then treat it as if it came from the hardware (CHECKSUM_COMPLETE). Signed-off-by: Herbert Xu Cheers, Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_offload.c | 26 ++++++++++---------------- net/ipv6/tcpv6_offload.c | 27 ++++++++++----------------- 2 files changed, 20 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 55aeec93014..05606353c7e 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -274,35 +274,29 @@ static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff * { const struct iphdr *iph = skb_gro_network_header(skb); __wsum wsum; - __sum16 sum; /* Don't bother verifying checksum if we're going to flush anyway. */ if (NAPI_GRO_CB(skb)->flush) goto skip_csum; + wsum = skb->csum; + switch (skb->ip_summed) { + case CHECKSUM_NONE: + wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), + 0); + + /* fall through */ + case CHECKSUM_COMPLETE: if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, - skb->csum)) { + wsum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; break; } -flush: + NAPI_GRO_CB(skb)->flush = 1; return NULL; - - case CHECKSUM_NONE: - wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb_gro_len(skb), IPPROTO_TCP, 0); - sum = csum_fold(skb_checksum(skb, - skb_gro_offset(skb), - skb_gro_len(skb), - wsum)); - if (sum) - goto flush; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; } skip_csum: diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 71923d14127..6d18157dc32 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -37,36 +37,29 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, { const struct ipv6hdr *iph = skb_gro_network_header(skb); __wsum wsum; - __sum16 sum; /* Don't bother verifying checksum if we're going to flush anyway. */ if (NAPI_GRO_CB(skb)->flush) goto skip_csum; + wsum = skb->csum; + switch (skb->ip_summed) { + case CHECKSUM_NONE: + wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), + wsum); + + /* fall through */ + case CHECKSUM_COMPLETE: if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, - skb->csum)) { + wsum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; break; } -flush: + NAPI_GRO_CB(skb)->flush = 1; return NULL; - - case CHECKSUM_NONE: - wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, - skb_gro_len(skb), - IPPROTO_TCP, 0)); - sum = csum_fold(skb_checksum(skb, - skb_gro_offset(skb), - skb_gro_len(skb), - wsum)); - if (sum) - goto flush; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; } skip_csum: -- cgit v1.2.3-70-g09d2 From fb10f802b0fb76079612cb78505cbc9ad81e683b Mon Sep 17 00:00:00 2001 From: Gao feng Date: Fri, 22 Nov 2013 14:48:40 +0800 Subject: tcp_memcg: remove useless var old_lim nobody needs it. remove. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- net/ipv4/tcp_memcontrol.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 03e9154f7e6..269a89ecd2f 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -60,7 +60,6 @@ EXPORT_SYMBOL(tcp_destroy_cgroup); static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) { struct cg_proto *cg_proto; - u64 old_lim; int i; int ret; @@ -71,7 +70,6 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) if (val > RES_COUNTER_MAX) val = RES_COUNTER_MAX; - old_lim = res_counter_read_u64(&cg_proto->memory_allocated, RES_LIMIT); ret = res_counter_set_limit(&cg_proto->memory_allocated, val); if (ret) return ret; -- cgit v1.2.3-70-g09d2 From ca15a078bd907df5fc1c009477869c5cbde3b753 Mon Sep 17 00:00:00 2001 From: Oussama Ghorbel Date: Fri, 22 Nov 2013 16:23:20 +0100 Subject: sit: generate icmpv6 error when receiving icmpv4 error Send icmpv6 error with type "destination unreachable" and code "address unreachable" when receiving icmpv4 error and sufficient data bytes are available This patch enhances the compliance of sit tunnel with section 3.4 of rfc 4213 Signed-off-by: Oussama Ghorbel Signed-off-by: David S. Miller --- net/ipv6/sit.c | 44 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1b4a4a95367..8435267836a 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -478,14 +478,44 @@ static void ipip6_tunnel_uninit(struct net_device *dev) dev_put(dev); } +/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH + * if sufficient data bytes are available + */ +static int ipip6_err_gen_icmpv6_unreach(struct sk_buff *skb) +{ + const struct iphdr *iph = (const struct iphdr *) skb->data; + struct rt6_info *rt; + struct sk_buff *skb2; + + if (!pskb_may_pull(skb, iph->ihl * 4 + sizeof(struct ipv6hdr) + 8)) + return 1; + + skb2 = skb_clone(skb, GFP_ATOMIC); + + if (!skb2) + return 1; + + skb_dst_drop(skb2); + skb_pull(skb2, iph->ihl * 4); + skb_reset_network_header(skb2); + + rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0); + + if (rt && rt->dst.dev) + skb2->dev = rt->dst.dev; + + icmpv6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); + + if (rt) + ip6_rt_put(rt); + + kfree_skb(skb2); + + return 0; +} static int ipip6_err(struct sk_buff *skb, u32 info) { - -/* All the routers (except for Linux) return only - 8 bytes of packet payload. It means, that precise relaying of - ICMP in the real Internet is absolutely infeasible. - */ const struct iphdr *iph = (const struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; @@ -500,7 +530,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info) case ICMP_DEST_UNREACH: switch (code) { case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: /* Impossible event. */ return 0; default: @@ -545,6 +574,9 @@ static int ipip6_err(struct sk_buff *skb, u32 info) goto out; err = 0; + if (!ipip6_err_gen_icmpv6_unreach(skb)) + goto out; + if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) goto out; -- cgit v1.2.3-70-g09d2 From 85fbaa75037d0b6b786ff18658ddf0b4014ce2a4 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 23 Nov 2013 00:46:12 +0100 Subject: inet: fix addr_len/msg->msg_namelen assignment in recv_error and rxpmtu functions Commit bceaa90240b6019ed73b49965eac7d167610be69 ("inet: prevent leakage of uninitialized memory to user in recv syscalls") conditionally updated addr_len if the msg_name is written to. The recv_error and rxpmtu functions relied on the recvmsg functions to set up addr_len before. As this does not happen any more we have to pass addr_len to those functions as well and set it to the size of the corresponding sockaddr length. This broke traceroute and such. Fixes: bceaa90240b6 ("inet: prevent leakage of uninitialized memory to user in recv syscalls") Reported-by: Brad Spengler Reported-by: Tom Labanowski Cc: mpb Cc: David S. Miller Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- include/net/ipv6.h | 6 ++++-- include/net/ping.h | 3 ++- net/ipv4/ip_sockglue.c | 3 ++- net/ipv4/ping.c | 5 +++-- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/datagram.c | 7 +++++-- net/ipv6/ping.c | 3 ++- net/ipv6/raw.c | 4 ++-- net/ipv6/udp.c | 4 ++-- net/l2tp/l2tp_ip6.c | 2 +- 12 files changed, 26 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/include/net/ip.h b/include/net/ip.h index 217bc5bfc6c..5a25f36fe3a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -473,7 +473,7 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *)); -int ip_recv_error(struct sock *sk, struct msghdr *msg, int len); +int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len); void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 2a5f668cd68..eb198acaac1 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -776,8 +776,10 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len); -int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len); -int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len); +int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, + int *addr_len); +int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, + int *addr_len); void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info); diff --git a/include/net/ping.h b/include/net/ping.h index 3f67704f374..90f48417b03 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -31,7 +31,8 @@ /* Compatibility glue so we can support IPv6 when it's compiled as a module */ struct pingv6_ops { - int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len); + int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len, + int *addr_len); int (*ip6_datagram_recv_ctl)(struct sock *sk, struct msghdr *msg, struct sk_buff *skb); int (*icmpv6_err_convert)(u8 type, u8 code, int *err); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 3f858266fa7..ddf32a6bc41 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -386,7 +386,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf /* * Handle MSG_ERRQUEUE */ -int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) +int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct sock_exterr_skb *serr; struct sk_buff *skb, *skb2; @@ -423,6 +423,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) serr->addr_offset); sin->sin_port = serr->port; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); + *addr_len = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 876c6ca2d8f..840cf1b9e6e 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -841,10 +841,11 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_ERRQUEUE) { if (family == AF_INET) { - return ip_recv_error(sk, msg, len); + return ip_recv_error(sk, msg, len, addr_len); #if IS_ENABLED(CONFIG_IPV6) } else if (family == AF_INET6) { - return pingv6_ops.ipv6_recv_error(sk, msg, len); + return pingv6_ops.ipv6_recv_error(sk, msg, len, + addr_len); #endif } } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 5cb8ddb505e..23c3e5b5bb5 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -697,7 +697,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto out; if (flags & MSG_ERRQUEUE) { - err = ip_recv_error(sk, msg, len); + err = ip_recv_error(sk, msg, len, addr_len); goto out; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5944d7d668d..44dfaa09b58 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1236,7 +1236,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, bool slow; if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len); + return ip_recv_error(sk, msg, len, addr_len); try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index a454b0ff57c..d690204a027 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -318,7 +318,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) /* * Handle MSG_ERRQUEUE */ -int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; @@ -369,6 +369,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) &sin->sin6_addr); sin->sin6_scope_id = 0; } + *addr_len = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); @@ -423,7 +424,8 @@ EXPORT_SYMBOL_GPL(ipv6_recv_error); /* * Handle IPV6_RECVPATHMTU */ -int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) +int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, + int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; @@ -457,6 +459,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) sin->sin6_port = 0; sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id; sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr; + *addr_len = sizeof(*sin); } put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 8815e31a87f..a83243c3d65 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -57,7 +57,8 @@ static struct inet_protosw pingv6_protosw = { /* Compatibility glue so we can support IPv6 when it's compiled as a module */ -static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, + int *addr_len) { return -EAFNOSUPPORT; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e24ff1df040..7fb4e14c467 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -466,10 +466,10 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, return -EOPNOTSUPP; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) - return ipv6_recv_rxpmtu(sk, msg, len); + return ipv6_recv_rxpmtu(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 81eb8cf8389..bcd5699313c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -393,10 +393,10 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, bool slow; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) - return ipv6_recv_rxpmtu(sk, msg, len); + return ipv6_recv_rxpmtu(sk, msg, len, addr_len); try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index cfd65304be6..d9b437e5500 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -665,7 +665,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, *addr_len = sizeof(*lsa); if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) -- cgit v1.2.3-70-g09d2 From 1fa4c710b6fe7b0aac9907240291b6fe6aafc3b8 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 23 Nov 2013 07:22:33 +0100 Subject: ipv6: fix leaking uninitialized port number of offender sockaddr Offenders don't have port numbers, so set it to 0. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index d690204a027..8dfe1f4d3c1 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -378,6 +378,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) { sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; + sin->sin6_port = 0; if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) -- cgit v1.2.3-70-g09d2 From 4d0820cf6a55d72350cb2d24a4504f62fbde95d9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 23 Nov 2013 12:59:20 -0800 Subject: sch_tbf: handle too small burst If a too small burst is inadvertently set on TBF, we might trigger a bug in tbf_segment(), as 'skb' instead of 'segs' was used in a qdisc_reshape_fail() call. tc qdisc add dev eth0 root handle 1: tbf latency 50ms burst 1KB rate 50mbit Fix the bug, and add a warning, as such configuration is not going to work anyway for non GSO packets. (For some reason, one has to use a burst >= 1520 to get a working configuration, even with old kernels. This is a probable iproute2/tc bug) Based on a report and initial patch from Yang Yingliang Fixes: e43ac79a4bc6 ("sch_tbf: segment too big GSO packets") Signed-off-by: Eric Dumazet Reported-by: Yang Yingliang Signed-off-by: David S. Miller --- net/sched/sch_tbf.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 68f98595819..a6090051c5d 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -21,6 +21,7 @@ #include #include #include +#include /* Simple Token Bucket Filter. @@ -117,6 +118,22 @@ struct tbf_sched_data { }; +/* + * Return length of individual segments of a gso packet, + * including all headers (MAC, IP, TCP/UDP) + */ +static unsigned int skb_gso_seglen(const struct sk_buff *skb) +{ + unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + const struct skb_shared_info *shinfo = skb_shinfo(skb); + + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + hdr_len += tcp_hdrlen(skb); + else + hdr_len += sizeof(struct udphdr); + return hdr_len + shinfo->gso_size; +} + /* GSO packet is too big, segment it so that tbf can transmit * each segment in time */ @@ -136,12 +153,8 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) while (segs) { nskb = segs->next; segs->next = NULL; - if (likely(segs->len <= q->max_size)) { - qdisc_skb_cb(segs)->pkt_len = segs->len; - ret = qdisc_enqueue(segs, q->qdisc); - } else { - ret = qdisc_reshape_fail(skb, sch); - } + qdisc_skb_cb(segs)->pkt_len = segs->len; + ret = qdisc_enqueue(segs, q->qdisc); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) sch->qstats.drops++; @@ -163,7 +176,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) int ret; if (qdisc_pkt_len(skb) > q->max_size) { - if (skb_is_gso(skb)) + if (skb_is_gso(skb) && skb_gso_seglen(skb) <= q->max_size) return tbf_segment(skb, sch); return qdisc_reshape_fail(skb, sch); } @@ -319,6 +332,11 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) if (max_size < 0) goto done; + if (max_size < psched_mtu(qdisc_dev(sch))) + pr_warn_ratelimited("sch_tbf: burst %u is lower than device %s mtu (%u) !\n", + max_size, qdisc_dev(sch)->name, + psched_mtu(qdisc_dev(sch))); + if (q->qdisc != &noop_qdisc) { err = fifo_set_limit(q->qdisc, qopt->limit); if (err) -- cgit v1.2.3-70-g09d2 From 2d3db210860f1df099a35b1dd54cca35454e0361 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Tue, 29 Oct 2013 18:11:59 -0400 Subject: Revert "mac80211: allow disable power save in mesh" This reverts commit ee1f668136b2fb6640ee2d54c2a525ea41f98211. The aformentioned commit added a check to allow 'iw wlan0 set power_save off' to work for mesh interfaces. However, this is problematic because it also allows 'iw wlan0 set power_save on', which will crash in short order because all of the subsequent code manipulates sdata->u.mgd. The power-saving states for mesh interfaces can be manipulated through the mesh config, e.g: 'iw wlan0 set mesh_param mesh_power_save=active' (which, despite the name, actualy disables power saving since the setting refers to the type of sleep the interface undergoes). Cc: stable@vger.kernel.org Fixes: ee1f668136b2 ("mac80211: allow disable power save in mesh") Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 95667b088c5..0ec245120a6 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2488,8 +2488,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - if (sdata->vif.type != NL80211_IFTYPE_STATION && - sdata->vif.type != NL80211_IFTYPE_MESH_POINT) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) -- cgit v1.2.3-70-g09d2 From 1fe4517cebc35ef900fa483d19c3090681f3c7bc Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 30 Oct 2013 16:09:33 +0100 Subject: cfg80211: fix ibss wext chandef creation The wext internal chandefs for ibss should be created using the cfg80211_chandef_create() functions. Initializing fields manually is error-prone. Reported-by: Dirk Gouders Signed-off-by: Simon Wunderlich Signed-off-by: Johannes Berg --- net/wireless/ibss.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 9d797df5664..89737ee2669 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -262,7 +262,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, /* try to find an IBSS channel if none requested ... */ if (!wdev->wext.ibss.chandef.chan) { - wdev->wext.ibss.chandef.width = NL80211_CHAN_WIDTH_20_NOHT; + struct ieee80211_channel *new_chan = NULL; for (band = 0; band < IEEE80211_NUM_BANDS; band++) { struct ieee80211_supported_band *sband; @@ -278,18 +278,19 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, continue; if (chan->flags & IEEE80211_CHAN_DISABLED) continue; - wdev->wext.ibss.chandef.chan = chan; - wdev->wext.ibss.chandef.center_freq1 = - chan->center_freq; + new_chan = chan; break; } - if (wdev->wext.ibss.chandef.chan) + if (new_chan) break; } - if (!wdev->wext.ibss.chandef.chan) + if (!new_chan) return -EINVAL; + + cfg80211_chandef_create(&wdev->wext.ibss.chandef, new_chan, + NL80211_CHAN_NO_HT); } /* don't join -- SSID is not there */ @@ -363,9 +364,8 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, return err; if (chan) { - wdev->wext.ibss.chandef.chan = chan; - wdev->wext.ibss.chandef.width = NL80211_CHAN_WIDTH_20_NOHT; - wdev->wext.ibss.chandef.center_freq1 = freq; + cfg80211_chandef_create(&wdev->wext.ibss.chandef, chan, + NL80211_CHAN_NO_HT); wdev->wext.ibss.channel_fixed = true; } else { /* cfg80211_ibss_wext_join will pick one if needed */ -- cgit v1.2.3-70-g09d2 From 84a3d1c97d024acd1d27ebbc10cb95784b11f4e7 Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Tue, 5 Nov 2013 14:48:46 +0100 Subject: mac80211: DFS setup chandef for radar_event correctly Setup chandef for radar event correctly, before we will clear this in ieee80211_dfs_cac_cancel() function. Without this patch mac80211 will report wrong channel width in case we will get radar event during active CAC. Signed-off-by: Janusz Dziedzic Reviewed-by: Luis R. Rodriguez Signed-off-by: Johannes Berg --- net/mac80211/util.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 592a18171f9..e9ce36d32ef 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2278,17 +2278,15 @@ void ieee80211_dfs_radar_detected_work(struct work_struct *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, radar_detected_work); - struct cfg80211_chan_def chandef; + struct cfg80211_chan_def chandef = local->hw.conf.chandef; ieee80211_dfs_cac_cancel(local); if (local->use_chanctx) /* currently not handled */ WARN_ON(1); - else { - chandef = local->hw.conf.chandef; + else cfg80211_radar_event(local->hw.wiphy, &chandef, GFP_KERNEL); - } } void ieee80211_radar_detected(struct ieee80211_hw *hw) -- cgit v1.2.3-70-g09d2 From 18db594a1005d908d995a2fc8f5a7bf4286fdca0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 6 Nov 2013 10:34:36 +0100 Subject: mac80211: fix scheduled scan rtnl deadlock When changing cfg80211 to use RTNL locking, this caused a deadlock in mac80211 as it calls cfg80211_sched_scan_stopped() from a work item that's on a workqueue that is flushed with the RTNL held. Fix this by simply using schedule_work(), the work only needs to finish running before the wiphy is unregistered, no other synchronisation (e.g. with suspend) is really required since for suspend userspace is already blocked anyway when we flush the workqueue so will only pick up the event after resume. Cc: stable@vger.kernel.org Fixes: 5fe231e87372 ("cfg80211: vastly simplify locking") Reported-and-tested-by: Eliad Peller Signed-off-by: Johannes Berg --- net/mac80211/main.c | 1 + net/mac80211/scan.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 21d5d44444d..e765f77bb97 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1047,6 +1047,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) cancel_work_sync(&local->restart_work); cancel_work_sync(&local->reconfig_filter); + flush_work(&local->sched_scan_stopped_work); ieee80211_clear_tx_pending(local); rate_control_deinitialize(local); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 5ad66a83ef7..bcc4833d754 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -1088,6 +1088,6 @@ void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw) trace_api_sched_scan_stopped(local); - ieee80211_queue_work(&local->hw, &local->sched_scan_stopped_work); + schedule_work(&local->sched_scan_stopped_work); } EXPORT_SYMBOL(ieee80211_sched_scan_stopped); -- cgit v1.2.3-70-g09d2 From ae917c9f55862691e31b84de7ec29bedcb83971c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Oct 2013 11:05:22 +0200 Subject: nl80211: check nla_put_* return values Coverity pointed out that in a few functions we don't check the return value of the nla_put_*() calls. Most of these are fairly harmless because the input isn't very dynamic and controlled by the kernel, but the pattern is simply wrong, so fix this. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 52 +++++++++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a1eb2107317..0ffb1837137 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9633,8 +9633,9 @@ static int nl80211_add_scan_req(struct sk_buff *msg, nla_put(msg, NL80211_ATTR_IE, req->ie_len, req->ie)) goto nla_put_failure; - if (req->flags) - nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->flags); + if (req->flags && + nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->flags)) + goto nla_put_failure; return 0; nla_put_failure: @@ -11118,16 +11119,18 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, wakeup->pattern_idx)) goto free_msg; - if (wakeup->tcp_match) - nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH); + if (wakeup->tcp_match && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH)) + goto free_msg; - if (wakeup->tcp_connlost) - nla_put_flag(msg, - NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST); + if (wakeup->tcp_connlost && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST)) + goto free_msg; - if (wakeup->tcp_nomoretokens) - nla_put_flag(msg, - NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS); + if (wakeup->tcp_nomoretokens && + nla_put_flag(msg, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS)) + goto free_msg; if (wakeup->packet) { u32 pkt_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211; @@ -11263,24 +11266,29 @@ void cfg80211_ft_event(struct net_device *netdev, return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FT_EVENT); - if (!hdr) { - nlmsg_free(msg); - return; - } + if (!hdr) + goto out; + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ft_event->target_ap)) + goto out; - nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); - nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); - nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ft_event->target_ap); - if (ft_event->ies) - nla_put(msg, NL80211_ATTR_IE, ft_event->ies_len, ft_event->ies); - if (ft_event->ric_ies) - nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len, - ft_event->ric_ies); + if (ft_event->ies && + nla_put(msg, NL80211_ATTR_IE, ft_event->ies_len, ft_event->ies)) + goto out; + if (ft_event->ric_ies && + nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len, + ft_event->ric_ies)) + goto out; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL); + return; + out: + nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_ft_event); -- cgit v1.2.3-70-g09d2 From 9fe271af7d4de96471c5aaee2f4d0d1576050497 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Oct 2013 11:15:12 +0200 Subject: nl80211: fix error path in nl80211_get_key() Coverity pointed out that in the (practically impossible) error case we leak the message - fix this. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0ffb1837137..f1370ed9f49 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2687,7 +2687,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_NEW_KEY); if (!hdr) - return -ENOBUFS; + goto nla_put_failure; cookie.msg = msg; cookie.idx = key_idx; -- cgit v1.2.3-70-g09d2 From 7fa322c878d70e38675f50e17acdce7fa3f5ac8c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Oct 2013 11:16:58 +0200 Subject: nl80211: check nla_nest_start() return value Coverity pointed out that we might dereference NULL later if nla_nest_start() returns a failure. This isn't really true since we'd bomb out before, but we should check the return value directly, so do that. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f1370ed9f49..e20c27ff0f1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -11094,6 +11094,8 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, struct nlattr *reasons; reasons = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS); + if (!reasons) + goto free_msg; if (wakeup->disconnect && nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) -- cgit v1.2.3-70-g09d2 From 57fb089f480d199e4275da086d407b978de67214 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 8 Nov 2013 17:31:37 +0100 Subject: mac80211: fix crash when using AP VLAN interfaces Commit "mac80211: implement SMPS for AP" applies to AP_VLAN as well. It assumes that sta->sdata->vif.bss_conf.bssid is present, which did not get set for AP_VLAN. Initialize it to sdata->vif.addr like for other interface types. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index ff101ea1d9a..36c3a4cbcab 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1325,7 +1325,6 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.bssid = NULL; break; case NL80211_IFTYPE_AP_VLAN: - break; case NL80211_IFTYPE_P2P_DEVICE: sdata->vif.bss_conf.bssid = sdata->vif.addr; break; -- cgit v1.2.3-70-g09d2 From 6c751ef8a1a15d633cd755eafa86ede9c32b2617 Mon Sep 17 00:00:00 2001 From: Javier Lopez Date: Wed, 6 Nov 2013 10:04:29 -0800 Subject: mac80211: fix for mesh beacon update on powersave Mesh beacon was not being rebuild after user triggered a mesh powersave change. To solve this issue use ieee80211_mbss_info_change_notify instead of ieee80211_bss_info_change_notify. This helper function forces mesh beacon to be rebuild and then notifies the driver about the beacon change. Signed-off-by: Javier Lopez Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 0ec245120a6..9e7e68d7b1a 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1368,7 +1368,7 @@ static int sta_apply_parameters(struct ieee80211_local *local, changed |= ieee80211_mps_set_sta_local_pm(sta, params->local_pm); - ieee80211_bss_info_change_notify(sdata, changed); + ieee80211_mbss_info_change_notify(sdata, changed); #endif } -- cgit v1.2.3-70-g09d2 From 351df099721e02e1a25a498268e52c0378c0e272 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Wed, 13 Nov 2013 23:07:07 +0100 Subject: mac80211: minstrel_ht: fix rates selection When initializing rates selections starting indexes upon stats update, the minstrel_sta->max_* rates should be 'group * MCS_GROUP_RATES + i' not 'i'. This affects settings where one of the peers does not support any of the rates of the group 0 (i.e. when ht_cap.mcs.rx_mask[0] == 0). Signed-off-by: Karl Beldan Acked-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_ht.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 5d60779a0c1..47aa6f81566 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -277,13 +277,15 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) if (!(mg->supported & BIT(i))) continue; + index = MCS_GROUP_RATES * group + i; + /* initialize rates selections starting indexes */ if (!mg_rates_valid) { mg->max_tp_rate = mg->max_tp_rate2 = mg->max_prob_rate = i; if (!mi_rates_valid) { mi->max_tp_rate = mi->max_tp_rate2 = - mi->max_prob_rate = i; + mi->max_prob_rate = index; mi_rates_valid = true; } mg_rates_valid = true; @@ -291,7 +293,6 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) mr = &mg->rates[i]; mr->retry_updated = false; - index = MCS_GROUP_RATES * group + i; minstrel_calc_rate_ewma(mr); minstrel_ht_calc_tp(mi, group, i); -- cgit v1.2.3-70-g09d2 From 9f16d84ad73ea04145a5dc85c8f1067915b37eea Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 17 Nov 2013 10:37:34 +0100 Subject: cfg80211: disable 5/10 MHz support for all drivers Due to nl80211 API breakage, 5/10 MHz support is broken for all drivers. Fixing it requires adding new API, but that can't be done as a bugfix commit since that would require either updating all APIs in the trees needing the bugfix or cause different kernels to have incompatible API. Therefore, just disable 5/10 MHz support for all drivers. Cc: stable@vger.kernel.org [3.12] Signed-off-by: Johannes Berg --- net/wireless/core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index aff959e5a1b..00a65ba3aea 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -451,6 +451,9 @@ int wiphy_register(struct wiphy *wiphy) int i; u16 ifmodes = wiphy->interface_modes; + /* support for 5/10 MHz is broken due to nl80211 API mess - disable */ + wiphy->flags &= ~WIPHY_FLAG_SUPPORTS_5_10_MHZ; + #ifdef CONFIG_PM if (WARN_ON(wiphy->wowlan && (wiphy->wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && -- cgit v1.2.3-70-g09d2 From 051a41fa4ee14f5c39668f0980973b9a195de560 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 20 Nov 2013 11:28:27 +0100 Subject: mac80211: don't attempt to reorder multicast frames Multicast frames can't be transmitted as part of an aggregation session (such a session couldn't even be set up) so don't try to reorder them. Trying to do so would cause the reorder to stop working correctly since multicast QoS frames (as transmitted by the Aruba APs this was found with) would cause sequence number confusion in the buffer. Cc: stable@vger.kernel.org Reported-by: Blaise Gassend Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index caecef870c0..2b0debb0422 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -911,7 +911,8 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, u16 sc; u8 tid, ack_policy; - if (!ieee80211_is_data_qos(hdr->frame_control)) + if (!ieee80211_is_data_qos(hdr->frame_control) || + is_multicast_ether_addr(hdr->addr1)) goto dont_reorder; /* -- cgit v1.2.3-70-g09d2 From 3f718fd8401d7db86b9efc3ea1cdf5df41354b9f Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Fri, 8 Nov 2013 15:09:43 +0800 Subject: mac80211: fix the mesh channel switch support Mesh STA receiving the mesh CSA action frame is not able to trigger the mesh channel switch due to the incorrect handling and comparison of mesh channel switch parameters element (MCSP)'s TTL. Make sure the MCSP's TTL is updated accordingly before calling the ieee80211_mesh_process_chnswitch. Also, we update the beacon before forwarding the CSA action frame, so MCSP's precedence value and initiator flag need to be updated prior to this. Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 10 +++++++++- net/mac80211/ieee80211_i.h | 1 + net/mac80211/mesh.c | 20 ++++++++++++-------- net/mac80211/spectmgmt.c | 2 ++ net/mac80211/util.c | 5 ----- 5 files changed, 24 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9e7e68d7b1a..364ce0c5962 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3119,9 +3119,17 @@ static int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, params->chandef.chan->band) return -EINVAL; + ifmsh->chsw_init = true; + if (!ifmsh->pre_value) + ifmsh->pre_value = 1; + else + ifmsh->pre_value++; + err = ieee80211_mesh_csa_beacon(sdata, params, true); - if (err < 0) + if (err < 0) { + ifmsh->chsw_init = false; return err; + } break; #endif default: diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 29dc505be12..4aea4e79111 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1228,6 +1228,7 @@ struct ieee80211_csa_ie { u8 mode; u8 count; u8 ttl; + u16 pre_value; }; /* Parsed Information Elements */ diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 896fe3bd599..ba105257d03 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -943,14 +943,19 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, params.chandef.chan->center_freq); params.block_tx = csa_ie.mode & WLAN_EID_CHAN_SWITCH_PARAM_TX_RESTRICT; - if (beacon) + if (beacon) { ifmsh->chsw_ttl = csa_ie.ttl - 1; - else - ifmsh->chsw_ttl = 0; + if (ifmsh->pre_value >= csa_ie.pre_value) + return false; + ifmsh->pre_value = csa_ie.pre_value; + } - if (ifmsh->chsw_ttl > 0) + if (ifmsh->chsw_ttl < ifmsh->mshcfg.dot11MeshTTL) { if (ieee80211_mesh_csa_beacon(sdata, ¶ms, false) < 0) return false; + } else { + return false; + } sdata->csa_radar_required = params.radar_required; @@ -1163,7 +1168,6 @@ static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata, offset_ttl = (len < 42) ? 7 : 10; *(pos + offset_ttl) -= 1; *(pos + offset_ttl + 1) &= ~WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR; - sdata->u.mesh.chsw_ttl = *(pos + offset_ttl); memcpy(mgmt_fwd, mgmt, len); eth_broadcast_addr(mgmt_fwd->da); @@ -1182,7 +1186,7 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, u16 pre_value; bool fwd_csa = true; size_t baselen; - u8 *pos, ttl; + u8 *pos; if (mgmt->u.action.u.measurement.action_code != WLAN_ACTION_SPCT_CHL_SWITCH) @@ -1193,8 +1197,8 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, u.action.u.chan_switch.variable); ieee802_11_parse_elems(pos, len - baselen, false, &elems); - ttl = elems.mesh_chansw_params_ie->mesh_ttl; - if (!--ttl) + ifmsh->chsw_ttl = elems.mesh_chansw_params_ie->mesh_ttl; + if (!--ifmsh->chsw_ttl) fwd_csa = false; pre_value = le16_to_cpu(elems.mesh_chansw_params_ie->mesh_pre_value); diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index a40da20b32e..6ab00907008 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -78,6 +78,8 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, if (elems->mesh_chansw_params_ie) { csa_ie->ttl = elems->mesh_chansw_params_ie->mesh_ttl; csa_ie->mode = elems->mesh_chansw_params_ie->mesh_flags; + csa_ie->pre_value = le16_to_cpu( + elems->mesh_chansw_params_ie->mesh_pre_value); } new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index e9ce36d32ef..9f9b9bd3fd4 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2457,14 +2457,9 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, WLAN_EID_CHAN_SWITCH_PARAM_TX_RESTRICT : 0x00; put_unaligned_le16(WLAN_REASON_MESH_CHAN, pos); /* Reason Cd */ pos += 2; - if (!ifmsh->pre_value) - ifmsh->pre_value = 1; - else - ifmsh->pre_value++; pre_value = cpu_to_le16(ifmsh->pre_value); memcpy(pos, &pre_value, 2); /* Precedence Value */ pos += 2; - ifmsh->chsw_init = true; } ieee80211_tx_skb(sdata, skb); -- cgit v1.2.3-70-g09d2 From 12b5f34d2d5934e998975bbae4e29f81d94052f6 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Mon, 18 Nov 2013 19:06:46 +0200 Subject: mac80211: fix connection polling Commit 392b9ff ("mac80211: change beacon/connection polling") removed the IEEE80211_STA_BEACON_POLL flag. However, it accidentally removed the setting of IEEE80211_STA_CONNECTION_POLL, making the connection polling completely useless (the flag is always clear, so the result is never being checked). Fix it. Signed-off-by: Eliad Peller Acked-by: Stanislaw Gruszka Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d7504ab61a3..b3a3ce31665 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1910,6 +1910,8 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL) already = true; + ifmgd->flags |= IEEE80211_STA_CONNECTION_POLL; + mutex_unlock(&sdata->local->mtx); if (already) -- cgit v1.2.3-70-g09d2 From 1b09cd82d8c479700ef6185665839d1020b02519 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 20 Nov 2013 19:40:41 +0100 Subject: cfg80211: ignore supported rates for nonexistant bands on scan Fixes wpa_supplicant p2p_find on 5GHz-only devices Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e20c27ff0f1..138dc3bb8b6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5349,6 +5349,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) err = -EINVAL; goto out_free; } + + if (!wiphy->bands[band]) + continue; + err = ieee80211_get_ratemask(wiphy->bands[band], nla_data(attr), nla_len(attr), -- cgit v1.2.3-70-g09d2 From 5664da4429c177495256f958194c241625074ec0 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Wed, 20 Nov 2013 19:13:35 +0100 Subject: mac80211: use capped prob when computing throughputs Commit 3e8b1eb "mac80211/minstrel_ht: improve rate selection stability" introduced a local capped prob in minstrel_ht_calc_tp but omitted to use it to compute the per rate throughput. Signed-off-by: Karl Beldan Cc: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_ht.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 47aa6f81566..4096ff6cc24 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -226,7 +226,7 @@ minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate) nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); nsecs += minstrel_mcs_groups[group].duration[rate]; - tp = 1000000 * ((mr->probability * 1000) / nsecs); + tp = 1000000 * ((prob * 1000) / nsecs); mr->cur_tp = MINSTREL_TRUNC(tp); } -- cgit v1.2.3-70-g09d2 From 24d47300d118c5909a51b7270276d749cce150a2 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Tue, 19 Nov 2013 17:12:05 +0100 Subject: mac80211: set hw initial idle state ATM, the first call of ieee80211_do_open will configure the hw as non-idle, even if the interface being brought up is not a monitor, and this leads to inconsistent sequences like: register_hw() do_open(sta) hw_config(non-idle) (.. sta is non-idle ..) scan(sta) hw_config(idle) (after scan finishes) do_stop(sta) do_open(sta) (.. sta is idle ..) Signed-off-by: Karl Beldan Signed-off-by: Johannes Berg --- net/mac80211/main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index e765f77bb97..7d1c3ac48ed 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -940,6 +940,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n", result); + local->hw.conf.flags = IEEE80211_CONF_IDLE; + ieee80211_led_init(local); rtnl_lock(); -- cgit v1.2.3-70-g09d2 From b49faea7655ec10ade15d7d007e4218ca578a513 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 14 Nov 2013 10:41:01 -0500 Subject: netfilter: ipset: fix incorret comparison in hash_netnet4_data_equal() Both sides of the comparison are the same, looks like a cut-and-paste error. Spotted by Coverity. Signed-off-by: Dave Jones Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_netnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 2bc2dec20b0..6226803fc49 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -59,7 +59,7 @@ hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1, u32 *multi) { return ip1->ipcmp == ip2->ipcmp && - ip2->ccmp == ip2->ccmp; + ip1->ccmp == ip2->ccmp; } static inline int -- cgit v1.2.3-70-g09d2 From c297c8b99b07f496ff69a719cfb8e8fe852832ed Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 20 Nov 2013 13:00:17 -0500 Subject: SUNRPC: do not fail gss proc NULL calls with EACCES Otherwise RPCSEC_GSS_DESTROY messages are not sent. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 97912b40c25..42fdfc634e5 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1517,7 +1517,7 @@ out: static int gss_refresh_null(struct rpc_task *task) { - return -EACCES; + return 0; } static __be32 * -- cgit v1.2.3-70-g09d2 From 0f0e2159c0c101426b705d70f43b9f423d51c869 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sat, 23 Nov 2013 13:01:50 +0100 Subject: genetlink: Fix uninitialized variable in genl_validate_assign_mc_groups() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/netlink/genetlink.c: In function ‘genl_validate_assign_mc_groups’: net/netlink/genetlink.c:217: warning: ‘err’ may be used uninitialized in this function Commit 2a94fe48f32ccf7321450a2cc07f2b724a444e5b ("genetlink: make multicast groups const, prevent abuse") split genl_register_mc_group() in multiple functions, but dropped the initialization of err. Initialize err to zero to fix this. Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 4518a57aa5f..803206e8245 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -214,7 +214,7 @@ static int genl_validate_assign_mc_groups(struct genl_family *family) { int first_id; int n_groups = family->n_mcgrps; - int err, i; + int err = 0, i; bool groups_allocated = false; if (!n_groups) -- cgit v1.2.3-70-g09d2 From 5e53e689b737526308db2b5c9f56e9d0371a1676 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 24 Nov 2013 21:09:26 +0100 Subject: genetlink/pmcraid: use proper genetlink multicast API The pmcraid driver is abusing the genetlink API and is using its family ID as the multicast group ID, which is invalid and may belong to somebody else (and likely will.) Make it use the correct API, but since this may already be used as-is by userspace, reserve a family ID for this code and also reserve that group ID to not break userspace assumptions. My previous patch broke event delivery in the driver as I missed that it wasn't using the right API and forgot to update it later in my series. While changing this, I noticed that the genetlink code could use the static group ID instead of a strcmp(), so also do that for the VFS_DQUOT family. Cc: Anil Ravindranath Cc: "James E.J. Bottomley" Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- drivers/scsi/pmcraid.c | 20 +++++++++++++++----- include/uapi/linux/genetlink.h | 1 + net/netlink/genetlink.c | 11 +++++++++-- 3 files changed, 25 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index bd6f743d87a..892ea616137 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -1404,11 +1404,22 @@ enum { }; #define PMCRAID_AEN_CMD_MAX (__PMCRAID_AEN_CMD_MAX - 1) +static struct genl_multicast_group pmcraid_mcgrps[] = { + { .name = "events", /* not really used - see ID discussion below */ }, +}; + static struct genl_family pmcraid_event_family = { - .id = GENL_ID_GENERATE, + /* + * Due to prior multicast group abuse (the code having assumed that + * the family ID can be used as a multicast group ID) we need to + * statically allocate a family (and thus group) ID. + */ + .id = GENL_ID_PMCRAID, .name = "pmcraid", .version = 1, - .maxattr = PMCRAID_AEN_ATTR_MAX + .maxattr = PMCRAID_AEN_ATTR_MAX, + .mcgrps = pmcraid_mcgrps, + .n_mcgrps = ARRAY_SIZE(pmcraid_mcgrps), }; /** @@ -1511,9 +1522,8 @@ static int pmcraid_notify_aen( return result; } - result = - genlmsg_multicast(&pmcraid_event_family, skb, 0, - pmcraid_event_family.id, GFP_ATOMIC); + result = genlmsg_multicast(&pmcraid_event_family, skb, + 0, 0, GFP_ATOMIC); /* If there are no listeners, genlmsg_multicast may return non-zero * value. diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h index 1af72d8228e..c3363ba1ae0 100644 --- a/include/uapi/linux/genetlink.h +++ b/include/uapi/linux/genetlink.h @@ -28,6 +28,7 @@ struct genlmsghdr { #define GENL_ID_GENERATE 0 #define GENL_ID_CTRL NLMSG_MIN_TYPE #define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1) +#define GENL_ID_PMCRAID (NLMSG_MIN_TYPE + 2) /************************************************************************** * Controller diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 803206e8245..713671ae45a 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -74,9 +74,12 @@ static struct list_head family_ht[GENL_FAM_TAB_SIZE]; * Bit 17 is marked as already used since the VFS quota code * also abused this API and relied on family == group ID, we * cater to that by giving it a static family and group ID. + * Bit 18 is marked as already used since the PMCRAID driver + * did the same thing as the VFS quota code (maybe copied?) */ static unsigned long mc_group_start = 0x3 | BIT(GENL_ID_CTRL) | - BIT(GENL_ID_VFS_DQUOT); + BIT(GENL_ID_VFS_DQUOT) | + BIT(GENL_ID_PMCRAID); static unsigned long *mc_groups = &mc_group_start; static unsigned long mc_groups_longs = 1; @@ -139,6 +142,7 @@ static u16 genl_generate_id(void) for (i = 0; i <= GENL_MAX_ID - GENL_MIN_ID; i++) { if (id_gen_idx != GENL_ID_VFS_DQUOT && + id_gen_idx != GENL_ID_PMCRAID && !genl_family_find_byid(id_gen_idx)) return id_gen_idx; if (++id_gen_idx > GENL_MAX_ID) @@ -236,9 +240,12 @@ static int genl_validate_assign_mc_groups(struct genl_family *family) } else if (strcmp(family->name, "NET_DM") == 0) { first_id = 1; BUG_ON(n_groups != 1); - } else if (strcmp(family->name, "VFS_DQUOT") == 0) { + } else if (family->id == GENL_ID_VFS_DQUOT) { first_id = GENL_ID_VFS_DQUOT; BUG_ON(n_groups != 1); + } else if (family->id == GENL_ID_PMCRAID) { + first_id = GENL_ID_PMCRAID; + BUG_ON(n_groups != 1); } else { groups_allocated = true; err = genl_allocate_reserve_groups(n_groups, &first_id); -- cgit v1.2.3-70-g09d2 From 6eabca54d6781f61c7318517c1463a098acb7a87 Mon Sep 17 00:00:00 2001 From: Xufeng Zhang Date: Mon, 25 Nov 2013 11:26:57 +0800 Subject: sctp: Restore 'resent' bit to avoid retransmitted chunks for RTT measurements Currently retransmitted DATA chunks could also be used for RTT measurements since there are no flag to identify whether the transmitted DATA chunk is a new one or a retransmitted one. This problem is introduced by commit ae19c5486 ("sctp: remove 'resent' bit from the chunk") which inappropriately removed the 'resent' bit completely, instead of doing this, we should set the resent bit only for the retransmitted DATA chunks. Signed-off-by: Xufeng Zhang Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + net/sctp/output.c | 3 ++- net/sctp/outqueue.c | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 2174d8da077..ea0ca5f6e62 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -629,6 +629,7 @@ struct sctp_chunk { #define SCTP_NEED_FRTX 0x1 #define SCTP_DONT_FRTX 0x2 __u16 rtt_in_progress:1, /* This chunk used for RTT calc? */ + resent:1, /* Has this chunk ever been resent. */ has_tsn:1, /* Does this chunk have a TSN yet? */ has_ssn:1, /* Does this chunk have a SSN yet? */ singleton:1, /* Only chunk in the packet? */ diff --git a/net/sctp/output.c b/net/sctp/output.c index e650978daf2..0e2644d0a77 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -474,10 +474,11 @@ int sctp_packet_transmit(struct sctp_packet *packet) * for a given destination transport address. */ - if (!tp->rto_pending) { + if (!chunk->resent && !tp->rto_pending) { chunk->rtt_in_progress = 1; tp->rto_pending = 1; } + has_data = 1; } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index abb6db008df..f51ba985a36 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -446,6 +446,8 @@ void sctp_retransmit_mark(struct sctp_outq *q, transport->rto_pending = 0; } + chunk->resent = 1; + /* Move the chunk to the retransmit queue. The chunks * on the retransmit queue are always kept in order. */ @@ -1375,6 +1377,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, * instance). */ if (!tchunk->tsn_gap_acked && + !tchunk->resent && tchunk->rtt_in_progress) { tchunk->rtt_in_progress = 0; rtt = jiffies - tchunk->sent_at; -- cgit v1.2.3-70-g09d2 From 66028310aedc782e3a9a221f1a9ec12b7e587e50 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 25 Nov 2013 17:21:24 +0800 Subject: sit: use kfree_skb to replace dev_kfree_skb In failure case, we should use kfree_skb not dev_kfree_skb to free skbuff, dev_kfree_skb is defined as consume_skb. Trace takes advantage of this point. Signed-off-by: Gao feng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/sit.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 8435267836a..366fbba3359 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -951,7 +951,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (!new_skb) { ip_rt_put(rt); dev->stats.tx_dropped++; - dev_kfree_skb(skb); + kfree_skb(skb); return NETDEV_TX_OK; } if (skb->sk) @@ -977,7 +977,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, tx_error_icmp: dst_link_failure(skb); tx_error: - dev_kfree_skb(skb); + kfree_skb(skb); out: dev->stats.tx_errors++; return NETDEV_TX_OK; @@ -1017,7 +1017,7 @@ static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, tx_err: dev->stats.tx_errors++; - dev_kfree_skb(skb); + kfree_skb(skb); return NETDEV_TX_OK; } -- cgit v1.2.3-70-g09d2 From 39af0c409e8c06b51caf7dc43e49ef96ae790a55 Mon Sep 17 00:00:00 2001 From: Baker Zhang Date: Tue, 26 Nov 2013 09:29:15 +0800 Subject: net: remove outdated comment for ipv4 and ipv6 protocol handler since f9242b6b28d61295f2bf7e8adfb1060b382e5381 inet: Sanitize inet{,6} protocol demux. there are not pretended hash tables for ipv4 or ipv6 protocol handler. Signed-off-by: Baker Zhang Signed-off-by: David S. Miller --- net/ipv4/protocol.c | 8 -------- net/ipv6/protocol.c | 4 ---- 2 files changed, 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index ce848461acb..46d6a1c923a 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -31,10 +31,6 @@ const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly; -/* - * Add a protocol handler to the hash tables - */ - int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) { if (!prot->netns_ok) { @@ -55,10 +51,6 @@ int inet_add_offload(const struct net_offload *prot, unsigned char protocol) } EXPORT_SYMBOL(inet_add_offload); -/* - * Remove a protocol from the hash tables. - */ - int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) { int ret; diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index 22d1bd4670d..e048cf1bb6a 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -36,10 +36,6 @@ int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol } EXPORT_SYMBOL(inet6_add_protocol); -/* - * Remove a protocol from the hash tables. - */ - int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol) { int ret; -- cgit v1.2.3-70-g09d2 From ec6f809ff6f19fafba3212f6aff0dda71dfac8e8 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Fri, 29 Nov 2013 09:53:23 +0100 Subject: af_packet: block BH in prb_shutdown_retire_blk_timer() Currently we're using plain spin_lock() in prb_shutdown_retire_blk_timer(), however the timer might fire right in the middle and thus try to re-aquire the same spinlock, leaving us in a endless loop. To fix that, use the spin_lock_bh() to block it. Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.") CC: "David S. Miller" CC: Daniel Borkmann CC: Willem de Bruijn CC: Phil Sutter CC: Eric Dumazet Reported-by: Jan Stancek Tested-by: Jan Stancek Signed-off-by: Veaceslav Falico Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/packet/af_packet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ac27c86ef6d..ba2548bd85b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -439,9 +439,9 @@ static void prb_shutdown_retire_blk_timer(struct packet_sock *po, pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; - spin_lock(&rb_queue->lock); + spin_lock_bh(&rb_queue->lock); pkc->delete_blk_timer = 1; - spin_unlock(&rb_queue->lock); + spin_unlock_bh(&rb_queue->lock); prb_del_retire_blk_timer(pkc); } -- cgit v1.2.3-70-g09d2 From db31c55a6fb245fdbb752a2ca4aefec89afabb06 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 27 Nov 2013 15:40:21 +0300 Subject: net: clamp ->msg_namelen instead of returning an error If kmsg->msg_namelen > sizeof(struct sockaddr_storage) then in the original code that would lead to memory corruption in the kernel if you had audit configured. If you didn't have audit configured it was harmless. There are some programs such as beta versions of Ruby which use too large of a buffer and returning an error code breaks them. We should clamp the ->msg_namelen value instead. Fixes: 1661bf364ae9 ("net: heap overflow in __audit_sockaddr()") Reported-by: Eric Wong Signed-off-by: Dan Carpenter Tested-by: Eric Wong Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/compat.c | 2 +- net/socket.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/compat.c b/net/compat.c index 618c6a8a911..dd32e34c1e2 100644 --- a/net/compat.c +++ b/net/compat.c @@ -72,7 +72,7 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg) __get_user(kmsg->msg_flags, &umsg->msg_flags)) return -EFAULT; if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) - return -EINVAL; + kmsg->msg_namelen = sizeof(struct sockaddr_storage); kmsg->msg_name = compat_ptr(tmp1); kmsg->msg_iov = compat_ptr(tmp2); kmsg->msg_control = compat_ptr(tmp3); diff --git a/net/socket.c b/net/socket.c index 0b18693f2be..e83c416708a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1973,7 +1973,7 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) return -EFAULT; if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) - return -EINVAL; + kmsg->msg_namelen = sizeof(struct sockaddr_storage); return 0; } -- cgit v1.2.3-70-g09d2 From d3f7d56a7a4671d395e8af87071068a195257bf6 Mon Sep 17 00:00:00 2001 From: Shawn Landden Date: Sun, 24 Nov 2013 22:36:28 -0800 Subject: net: update consumers of MSG_MORE to recognize MSG_SENDPAGE_NOTLAST Commit 35f9c09fe (tcp: tcp_sendpages() should call tcp_push() once) added an internal flag MSG_SENDPAGE_NOTLAST, similar to MSG_MORE. algif_hash, algif_skcipher, and udp used MSG_MORE from tcp_sendpages() and need to see the new flag as identical to MSG_MORE. This fixes sendfile() on AF_ALG. v3: also fix udp Cc: Tom Herbert Cc: Eric Dumazet Cc: David S. Miller Cc: # 3.4.x + 3.2.x Reported-and-tested-by: Shawn Landden Original-patch: Richard Weinberger Signed-off-by: Shawn Landden Signed-off-by: David S. Miller --- crypto/algif_hash.c | 3 +++ crypto/algif_skcipher.c | 3 +++ net/ipv4/udp.c | 3 +++ 3 files changed, 9 insertions(+) (limited to 'net') diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index ef5356cd280..850246206b1 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -114,6 +114,9 @@ static ssize_t hash_sendpage(struct socket *sock, struct page *page, struct hash_ctx *ctx = ask->private; int err; + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + lock_sock(sk); sg_init_table(ctx->sgl.sg, 1); sg_set_page(ctx->sgl.sg, page, size, offset); diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 6a6dfc062d2..a19c027b29b 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -378,6 +378,9 @@ static ssize_t skcipher_sendpage(struct socket *sock, struct page *page, struct skcipher_sg_list *sgl; int err = -EINVAL; + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + lock_sock(sk); if (!ctx->more && ctx->used) goto unlock; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 44dfaa09b58..bf2b85b2549 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1098,6 +1098,9 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, struct udp_sock *up = udp_sk(sk); int ret; + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + if (!up->pending) { struct msghdr msg = { .msg_flags = flags|MSG_MORE }; -- cgit v1.2.3-70-g09d2 From f1d8cba61c3c4b1eb88e507249c4cb8d635d9a76 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 Nov 2013 09:51:22 -0800 Subject: inet: fix possible seqlock deadlocks In commit c9e9042994d3 ("ipv4: fix possible seqlock deadlock") I left another places where IP_INC_STATS_BH() were improperly used. udp_sendmsg(), ping_v4_sendmsg() and tcp_v4_connect() are called from process context, not from softirq context. This was detected by lockdep seqlock support. Reported-by: jongman heo Fixes: 584bdf8cbdf6 ("[IPV4]: Fix "ipOutNoRoutes" counter error for TCP and UDP") Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") Signed-off-by: Eric Dumazet Cc: Hannes Frederic Sowa Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/ping.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/udp.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 840cf1b9e6e..242e7f4ed6f 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -772,7 +772,7 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, err = PTR_ERR(rt); rt = NULL; if (err == -ENETUNREACH) - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); goto out; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 59a6f8b90cd..06721392475 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -177,7 +177,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) - IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); return err; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index bf2b85b2549..44f6a20fa29 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -999,7 +999,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, err = PTR_ERR(rt); rt = NULL; if (err == -ENETUNREACH) - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); goto out; } -- cgit v1.2.3-70-g09d2 From 7f88c6b23afbd31545c676dea77ba9593a1a14bf Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 29 Nov 2013 06:39:44 +0100 Subject: ipv6: fix possible seqlock deadlock in ip6_finish_output2 IPv6 stats are 64 bits and thus are protected with a seqlock. By not disabling bottom-half we could deadlock here if we don't disable bh and a softirq reentrantly updates the same mib. Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 59df872e2f4..4acdb63495d 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -116,8 +116,8 @@ static int ip6_finish_output2(struct sk_buff *skb) } rcu_read_unlock_bh(); - IP6_INC_STATS_BH(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; } -- cgit v1.2.3-70-g09d2 From 213e3bc723e53af0976421d2808ea3f6cc821c56 Mon Sep 17 00:00:00 2001 From: Arvid Brodin Date: Fri, 29 Nov 2013 23:37:07 +0100 Subject: net/hsr: Very small fix of comment style. Signed-off-by: Arvid Brodin Signed-off-by: David S. Miller --- net/hsr/hsr_framereg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 003f5bb3acd..4bdab152187 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -288,7 +288,8 @@ void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr, static bool seq_nr_after(u16 a, u16 b) { /* Remove inconsistency where - * seq_nr_after(a, b) == seq_nr_before(a, b) */ + * seq_nr_after(a, b) == seq_nr_before(a, b) + */ if ((int) b - a == 32768) return false; -- cgit v1.2.3-70-g09d2 From 98bf8362220af717862b8262b21348774890b7b4 Mon Sep 17 00:00:00 2001 From: Arvid Brodin Date: Fri, 29 Nov 2013 23:38:16 +0100 Subject: net/hsr: Support iproute print_opt ('ip -details ...') This implements the rtnl_link_ops fill_info routine for HSR. Signed-off-by: Arvid Brodin Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 4 +++- net/hsr/hsr_netlink.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index b78566f59ab..6db460121f8 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -488,7 +488,9 @@ enum { IFLA_HSR_UNSPEC, IFLA_HSR_SLAVE1, IFLA_HSR_SLAVE2, - IFLA_HSR_MULTICAST_SPEC, + IFLA_HSR_MULTICAST_SPEC, /* Last byte of supervision addr */ + IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */ + IFLA_HSR_SEQ_NR, __IFLA_HSR_MAX, }; diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 5325af85eea..01a5261ac7a 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -23,6 +23,8 @@ static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = { [IFLA_HSR_SLAVE1] = { .type = NLA_U32 }, [IFLA_HSR_SLAVE2] = { .type = NLA_U32 }, [IFLA_HSR_MULTICAST_SPEC] = { .type = NLA_U8 }, + [IFLA_HSR_SUPERVISION_ADDR] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [IFLA_HSR_SEQ_NR] = { .type = NLA_U16 }, }; @@ -59,6 +61,31 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, return hsr_dev_finalize(dev, link, multicast_spec); } +static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct hsr_priv *hsr_priv; + + hsr_priv = netdev_priv(dev); + + if (hsr_priv->slave[0]) + if (nla_put_u32(skb, IFLA_HSR_SLAVE1, hsr_priv->slave[0]->ifindex)) + goto nla_put_failure; + + if (hsr_priv->slave[1]) + if (nla_put_u32(skb, IFLA_HSR_SLAVE2, hsr_priv->slave[1]->ifindex)) + goto nla_put_failure; + + if (nla_put(skb, IFLA_HSR_SUPERVISION_ADDR, ETH_ALEN, + hsr_priv->sup_multicast_addr) || + nla_put_u16(skb, IFLA_HSR_SEQ_NR, hsr_priv->sequence_nr)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + static struct rtnl_link_ops hsr_link_ops __read_mostly = { .kind = "hsr", .maxtype = IFLA_HSR_MAX, @@ -66,6 +93,7 @@ static struct rtnl_link_ops hsr_link_ops __read_mostly = { .priv_size = sizeof(struct hsr_priv), .setup = hsr_dev_setup, .newlink = hsr_newlink, + .fill_info = hsr_fill_info, }; -- cgit v1.2.3-70-g09d2 From 7c2781fa92f5b9ca3188817a56a2ced0400355f3 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 29 Nov 2013 11:02:43 -0800 Subject: netem: missing break in ge loss generator There is a missing break statement in the Gilbert Elliot loss model generator which makes state machine behave incorrectly. Reported-by: Martin Burri Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 75c94e59a3b..6e91323f3da 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -268,6 +268,7 @@ static bool loss_gilb_ell(struct netem_sched_data *q) clg->state = 2; if (net_random() < clg->a4) return true; + break; case 2: if (net_random() < clg->a2) clg->state = 1; -- cgit v1.2.3-70-g09d2 From ab6c27be8178a4682446faa5aa017b948997937f Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 29 Nov 2013 11:03:35 -0800 Subject: netem: fix loss 4 state model Patch from developers of the alternative loss models, downloaded from: http://netgroup.uniroma2.it/twiki/bin/view.cgi/Main/NetemCLG "In the case 1 of the switch statement in the if conditions we need to add clg->a4 to clg->a1, according to the model." Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 6e91323f3da..9685624f7bc 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -215,10 +215,10 @@ static bool loss_4state(struct netem_sched_data *q) if (rnd < clg->a4) { clg->state = 4; return true; - } else if (clg->a4 < rnd && rnd < clg->a1) { + } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) { clg->state = 3; return true; - } else if (clg->a1 < rnd) + } else if (clg->a1 + clg->a4 < rnd) clg->state = 1; break; -- cgit v1.2.3-70-g09d2 From eff7979f00b2c546f36f6829f4072c8db54763a9 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 29 Nov 2013 11:04:26 -0800 Subject: netem: fix gemodel loss generator Patch from developers of the alternative loss models, downloaded from: http://netgroup.uniroma2.it/twiki/bin/view.cgi/Main/NetemCLG "in case 2, of the switch we change the direction of the inequality to net_random()>clg->a3, because clg->a3 is h in the GE model and when h is 0 all packets will be lost." Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 9685624f7bc..bccd52b36e9 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -272,7 +272,7 @@ static bool loss_gilb_ell(struct netem_sched_data *q) case 2: if (net_random() < clg->a2) clg->state = 1; - if (clg->a3 > net_random()) + if (net_random() > clg->a3) return true; } -- cgit v1.2.3-70-g09d2 From 3868204d6b89ea373a273e760609cb08020beb1a Mon Sep 17 00:00:00 2001 From: "fan.du" Date: Sun, 1 Dec 2013 16:28:48 +0800 Subject: {pktgen, xfrm} Update IPv4 header total len and checksum after tranformation commit a553e4a6317b2cfc7659542c10fe43184ffe53da ("[PKTGEN]: IPSEC support") tried to support IPsec ESP transport transformation for pktgen, but acctually this doesn't work at all for two reasons(The orignal transformed packet has bad IPv4 checksum value, as well as wrong auth value, reported by wireshark) - After transpormation, IPv4 header total length needs update, because encrypted payload's length is NOT same as that of plain text. - After transformation, IPv4 checksum needs re-caculate because of payload has been changed. With this patch, armmed pktgen with below cofiguration, Wireshark is able to decrypted ESP packet generated by pktgen without any IPv4 checksum error or auth value error. pgset "flag IPSEC" pgset "flows 1" Signed-off-by: Fan Du Signed-off-by: David S. Miller --- net/core/pktgen.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 261357a6630..a797fff7f22 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2527,6 +2527,8 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, if (x) { int ret; __u8 *eth; + struct iphdr *iph; + nhead = x->props.header_len - skb_headroom(skb); if (nhead > 0) { ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); @@ -2548,6 +2550,11 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, eth = (__u8 *) skb_push(skb, ETH_HLEN); memcpy(eth, pkt_dev->hh, 12); *(u16 *) ð[12] = protocol; + + /* Update IPv4 header len as well as checksum value */ + iph = ip_hdr(skb); + iph->tot_len = htons(skb->len - ETH_HLEN); + ip_send_check(iph); } } return 1; -- cgit v1.2.3-70-g09d2 From dda444d52496aa8ddc501561bca580f1374a96a9 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Tue, 26 Nov 2013 16:07:26 +0100 Subject: cfg80211: disable CSA for all drivers The channel switch announcement code has some major locking problems which can cause a deadlock in worst case. A series of fixes has been proposed, but these are non-trivial and need to be tested first. Therefore disable CSA completely for 3.13. Signed-off-by: Simon Wunderlich Signed-off-by: Johannes Berg --- net/wireless/core.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 00a65ba3aea..52b865fb735 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -454,6 +454,12 @@ int wiphy_register(struct wiphy *wiphy) /* support for 5/10 MHz is broken due to nl80211 API mess - disable */ wiphy->flags &= ~WIPHY_FLAG_SUPPORTS_5_10_MHZ; + /* + * There are major locking problems in nl80211/mac80211 for CSA, + * disable for all drivers until this has been reworked. + */ + wiphy->flags &= ~WIPHY_FLAG_HAS_CHANNEL_SWITCH; + #ifdef CONFIG_PM if (WARN_ON(wiphy->wowlan && (wiphy->wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && -- cgit v1.2.3-70-g09d2 From 0834ae3c3af44480834cce128b6fef83006e537f Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Tue, 26 Nov 2013 16:45:18 +0100 Subject: mac80211: check csa wiphy flag in ibss before switching When external CSA IEs are received (beacons or action messages), a channel switch is triggered as well. This should only be allowed on devices which actually support channel switches, otherwise disconnect. (For the corresponding userspace invocation, the wiphy flag is checked in nl80211). Signed-off-by: Simon Wunderlich Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 531be040b9a..27a39de8967 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -823,6 +823,10 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, if (err) return false; + /* channel switch is not supported, disconnect */ + if (!(sdata->local->hw.wiphy->flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)) + goto disconnect; + params.count = csa_ie.count; params.chandef = csa_ie.chandef; -- cgit v1.2.3-70-g09d2 From 30e56918dd1e6d64350661f186657f6a6f2646e6 Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Tue, 26 Nov 2013 15:46:56 +0800 Subject: ipv6: judge the accept_ra_defrtr before calling rt6_route_rcv when dealing with a RA message, if accept_ra_defrtr is false, the kernel will not add the default route, and then deal with the following route information options. Unfortunately, those options maybe contain default route, so let's judge the accept_ra_defrtr before calling rt6_route_rcv. Signed-off-by: Duan Jiong Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 3512177deb4..30086517139 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1277,6 +1277,9 @@ skip_linkparms: ri->prefix_len == 0) continue; #endif + if (ri->prefix_len == 0 && + !in6_dev->cnf.accept_ra_defrtr) + continue; if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen) continue; rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3, -- cgit v1.2.3-70-g09d2 From 57ec0afe293834f8ca9499214ed74748d89eaa44 Mon Sep 17 00:00:00 2001 From: François-Xavier Le Bail Date: Mon, 2 Dec 2013 11:28:49 +0100 Subject: ipv6: fix third arg of anycast_dst_alloc(), must be bool. Signed-off-by: Francois-Xavier Le Bail Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 12c97d8aa6b..d5fa5b8c443 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2613,7 +2613,7 @@ static void init_loopback(struct net_device *dev) if (sp_ifa->rt) continue; - sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, 0); + sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, false); /* Failure cases are ignored */ if (!IS_ERR(sp_rt)) { -- cgit v1.2.3-70-g09d2 From 7150aede5dd241539686e17d9592f5ebd28a2cda Mon Sep 17 00:00:00 2001 From: Kamala R Date: Mon, 2 Dec 2013 19:55:21 +0530 Subject: IPv6: Fixed support for blackhole and prohibit routes The behaviour of blackhole and prohibit routes has been corrected by setting the input and output pointers of the dst variable appropriately. For blackhole routes, they are set to dst_discard and to ip6_pkt_discard and ip6_pkt_discard_out respectively for prohibit routes. ipv6: ip6_pkt_prohibit(_out) should not depend on CONFIG_IPV6_MULTIPLE_TABLES We need ip6_pkt_prohibit(_out) available without CONFIG_IPV6_MULTIPLE_TABLES Signed-off-by: Kamala R Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7faa9d5e150..ddb9d41c8ee 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -84,6 +84,8 @@ static int ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); static int ip6_pkt_discard_out(struct sk_buff *skb); +static int ip6_pkt_prohibit(struct sk_buff *skb); +static int ip6_pkt_prohibit_out(struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); @@ -234,9 +236,6 @@ static const struct rt6_info ip6_null_entry_template = { #ifdef CONFIG_IPV6_MULTIPLE_TABLES -static int ip6_pkt_prohibit(struct sk_buff *skb); -static int ip6_pkt_prohibit_out(struct sk_buff *skb); - static const struct rt6_info ip6_prohibit_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -1565,21 +1564,24 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } } - rt->dst.output = ip6_pkt_discard_out; - rt->dst.input = ip6_pkt_discard; rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; switch (cfg->fc_type) { case RTN_BLACKHOLE: rt->dst.error = -EINVAL; + rt->dst.output = dst_discard; + rt->dst.input = dst_discard; break; case RTN_PROHIBIT: rt->dst.error = -EACCES; + rt->dst.output = ip6_pkt_prohibit_out; + rt->dst.input = ip6_pkt_prohibit; break; case RTN_THROW: - rt->dst.error = -EAGAIN; - break; default: - rt->dst.error = -ENETUNREACH; + rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN + : -ENETUNREACH; + rt->dst.output = ip6_pkt_discard_out; + rt->dst.input = ip6_pkt_discard; break; } goto install_route; @@ -2144,8 +2146,6 @@ static int ip6_pkt_discard_out(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); } -#ifdef CONFIG_IPV6_MULTIPLE_TABLES - static int ip6_pkt_prohibit(struct sk_buff *skb) { return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); @@ -2157,8 +2157,6 @@ static int ip6_pkt_prohibit_out(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); } -#endif - /* * Allocate a dst for local (unicast / anycast) address. */ -- cgit v1.2.3-70-g09d2 From 18fc25c94eadc52a42c025125af24657a93638c0 Mon Sep 17 00:00:00 2001 From: Venkat Venkatsubra Date: Mon, 2 Dec 2013 15:41:39 -0800 Subject: rds: prevent BUG_ON triggered on congestion update to loopback After congestion update on a local connection, when rds_ib_xmit returns less bytes than that are there in the message, rds_send_xmit calls back rds_ib_xmit with an offset that causes BUG_ON(off & RDS_FRAG_SIZE) to trigger. For a 4Kb PAGE_SIZE rds_ib_xmit returns min(8240,4096)=4096 when actually the message contains 8240 bytes. rds_send_xmit thinks there is more to send and calls rds_ib_xmit again with a data offset "off" of 4096-48(rds header) =4048 bytes thus hitting the BUG_ON(off & RDS_FRAG_SIZE) [RDS_FRAG_SIZE=4k]. The commit 6094628bfd94323fc1cea05ec2c6affd98c18f7f "rds: prevent BUG_ON triggering on congestion map updates" introduced this regression. That change was addressing the triggering of a different BUG_ON in rds_send_xmit() on PowerPC architecture with 64Kbytes PAGE_SIZE: BUG_ON(ret != 0 && conn->c_xmit_sg == rm->data.op_nents); This was the sequence it was going through: (rds_ib_xmit) /* Do not send cong updates to IB loopback */ if (conn->c_loopback && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { rds_cong_map_updated(conn->c_fcong, ~(u64) 0); return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; } rds_ib_xmit returns 8240 rds_send_xmit: c_xmit_data_off = 0 + 8240 - 48 (rds header accounted only the first time) = 8192 c_xmit_data_off < 65536 (sg->length), so calls rds_ib_xmit again rds_ib_xmit returns 8240 rds_send_xmit: c_xmit_data_off = 8192 + 8240 = 16432, calls rds_ib_xmit again and so on (c_xmit_data_off 24672,32912,41152,49392,57632) rds_ib_xmit returns 8240 On this iteration this sequence causes the BUG_ON in rds_send_xmit: while (ret) { tmp = min_t(int, ret, sg->length - conn->c_xmit_data_off); [tmp = 65536 - 57632 = 7904] conn->c_xmit_data_off += tmp; [c_xmit_data_off = 57632 + 7904 = 65536] ret -= tmp; [ret = 8240 - 7904 = 336] if (conn->c_xmit_data_off == sg->length) { conn->c_xmit_data_off = 0; sg++; conn->c_xmit_sg++; BUG_ON(ret != 0 && conn->c_xmit_sg == rm->data.op_nents); [c_xmit_sg = 1, rm->data.op_nents = 1] What the current fix does: Since the congestion update over loopback is not actually transmitted as a message, all that rds_ib_xmit needs to do is let the caller think the full message has been transmitted and not return partial bytes. It will return 8240 (RDS_CONG_MAP_BYTES+48) when PAGE_SIZE is 4Kb. And 64Kb+48 when page size is 64Kb. Reported-by: Josh Hunt Tested-by: Honggang Li Acked-by: Bang Nguyen Signed-off-by: Venkat Venkatsubra Signed-off-by: David S. Miller --- net/rds/ib_send.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index e5909498117..37be6e226d1 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -552,9 +552,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { rds_cong_map_updated(conn->c_fcong, ~(u64) 0); scat = &rm->data.op_sg[sg]; - ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; - ret = min_t(int, ret, scat->length - conn->c_xmit_data_off); - return ret; + ret = max_t(int, RDS_CONG_MAP_BYTES, scat->length); + return sizeof(struct rds_header) + ret; } /* FIXME we may overallocate here */ -- cgit v1.2.3-70-g09d2 From 76c82d7a3d24a4ae1f9b098287c18055546c1a47 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 4 Dec 2013 09:26:52 -0500 Subject: net_sched: Fail if missing mandatory action operation methods Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_api.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index fd7072827a4..618695e8419 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -270,6 +270,10 @@ int tcf_register_action(struct tc_action_ops *act) { struct tc_action_ops *a, **ap; + /* Must supply act, dump, cleanup and init */ + if (!act->act || !act->dump || !act->cleanup || !act->init) + return -EINVAL; + write_lock(&act_mod_lock); for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) { if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) { @@ -381,7 +385,7 @@ int tcf_action_exec(struct sk_buff *skb, const struct tc_action *act, } while ((a = act) != NULL) { repeat: - if (a->ops && a->ops->act) { + if (a->ops) { ret = a->ops->act(skb, a, res); if (TC_MUNGED & skb->tc_verd) { /* copied already, allow trampling */ @@ -405,7 +409,7 @@ void tcf_action_destroy(struct tc_action *act, int bind) struct tc_action *a; for (a = act; a; a = act) { - if (a->ops && a->ops->cleanup) { + if (a->ops) { if (a->ops->cleanup(a, bind) == ACT_P_DELETED) module_put(a->ops->owner); act = act->next; @@ -424,7 +428,7 @@ tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { int err = -EINVAL; - if (a->ops == NULL || a->ops->dump == NULL) + if (a->ops == NULL) return err; return a->ops->dump(skb, a, bind, ref); } @@ -436,7 +440,7 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - if (a->ops == NULL || a->ops->dump == NULL) + if (a->ops == NULL) return err; if (nla_put_string(skb, TCA_KIND, a->ops->kind)) -- cgit v1.2.3-70-g09d2 From 63ef6174654a986f263d25e957ef9d1ff243f649 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 4 Dec 2013 09:26:53 -0500 Subject: net_sched: Default action lookup method for actions Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 618695e8419..d1a022e441b 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -274,6 +274,9 @@ int tcf_register_action(struct tc_action_ops *act) if (!act->act || !act->dump || !act->cleanup || !act->init) return -EINVAL; + if (!act->lookup) + act->lookup = tcf_hash_search; + write_lock(&act_mod_lock); for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) { if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) { @@ -727,8 +730,6 @@ tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid) a->ops = tc_lookup_action(tb[TCA_ACT_KIND]); if (a->ops == NULL) goto err_free; - if (a->ops->lookup == NULL) - goto err_mod; err = -ENOENT; if (a->ops->lookup(a, index) == 0) goto err_mod; -- cgit v1.2.3-70-g09d2 From 43c00dcf8888daea234226e8adf09c37b00d2245 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 4 Dec 2013 09:26:54 -0500 Subject: net_sched: Use default action lookup functions Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_csum.c | 1 - net/sched/act_gact.c | 1 - net/sched/act_ipt.c | 2 -- net/sched/act_mirred.c | 1 - net/sched/act_nat.c | 1 - net/sched/act_pedit.c | 1 - net/sched/act_police.c | 1 - 7 files changed, 8 deletions(-) (limited to 'net') diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 3a4c0caa1f7..4225a9382a2 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -585,7 +585,6 @@ static struct tc_action_ops act_csum_ops = { .act = tcf_csum, .dump = tcf_csum_dump, .cleanup = tcf_csum_cleanup, - .lookup = tcf_hash_search, .init = tcf_csum_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index fd2b3cff5fa..15851da99f3 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -206,7 +206,6 @@ static struct tc_action_ops act_gact_ops = { .act = tcf_gact, .dump = tcf_gact_dump, .cleanup = tcf_gact_cleanup, - .lookup = tcf_hash_search, .init = tcf_gact_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 60d88b6b956..1d3e19180c2 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -298,7 +298,6 @@ static struct tc_action_ops act_ipt_ops = { .act = tcf_ipt, .dump = tcf_ipt_dump, .cleanup = tcf_ipt_cleanup, - .lookup = tcf_hash_search, .init = tcf_ipt_init, .walk = tcf_generic_walker }; @@ -312,7 +311,6 @@ static struct tc_action_ops act_xt_ops = { .act = tcf_ipt, .dump = tcf_ipt_dump, .cleanup = tcf_ipt_cleanup, - .lookup = tcf_hash_search, .init = tcf_ipt_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 977c10e0631..6cb16ec3d62 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -271,7 +271,6 @@ static struct tc_action_ops act_mirred_ops = { .act = tcf_mirred, .dump = tcf_mirred_dump, .cleanup = tcf_mirred_cleanup, - .lookup = tcf_hash_search, .init = tcf_mirred_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 876f0ef2969..30c13dedc94 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -308,7 +308,6 @@ static struct tc_action_ops act_nat_ops = { .act = tcf_nat, .dump = tcf_nat_dump, .cleanup = tcf_nat_cleanup, - .lookup = tcf_hash_search, .init = tcf_nat_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 7ed78c9e505..ab4fc56f885 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -243,7 +243,6 @@ static struct tc_action_ops act_pedit_ops = { .act = tcf_pedit, .dump = tcf_pedit_dump, .cleanup = tcf_pedit_cleanup, - .lookup = tcf_hash_search, .init = tcf_pedit_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 272d8e924cf..16a62c36928 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -407,7 +407,6 @@ static struct tc_action_ops act_police_ops = { .act = tcf_act_police, .dump = tcf_act_police_dump, .cleanup = tcf_act_police_cleanup, - .lookup = tcf_hash_search, .init = tcf_act_police_locate, .walk = tcf_act_police_walker }; -- cgit v1.2.3-70-g09d2 From 382ca8a1ad8963c7676585f9e25f4c5ff8b28439 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 4 Dec 2013 09:26:55 -0500 Subject: net_sched: Provide default walker function for actions Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_api.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index d1a022e441b..69cb848e834 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -274,8 +274,11 @@ int tcf_register_action(struct tc_action_ops *act) if (!act->act || !act->dump || !act->cleanup || !act->init) return -EINVAL; + /* Supply defaults */ if (!act->lookup) act->lookup = tcf_hash_search; + if (!act->walk) + act->walk = tcf_generic_walker; write_lock(&act_mod_lock); for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) { @@ -1089,12 +1092,6 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) memset(&a, 0, sizeof(struct tc_action)); a.ops = a_o; - if (a_o->walk == NULL) { - WARN(1, "tc_dump_action: %s !capable of dumping table\n", - a_o->kind); - goto out_module_put; - } - nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*t), 0); if (!nlh) -- cgit v1.2.3-70-g09d2 From 651a6493ae5c055c78777bb7178c23b5565631da Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 4 Dec 2013 09:26:56 -0500 Subject: net_sched: Use default action walker methods Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_csum.c | 1 - net/sched/act_gact.c | 1 - net/sched/act_ipt.c | 2 -- net/sched/act_mirred.c | 1 - net/sched/act_nat.c | 1 - net/sched/act_pedit.c | 1 - net/sched/act_simple.c | 1 - net/sched/act_skbedit.c | 1 - 8 files changed, 9 deletions(-) (limited to 'net') diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 4225a9382a2..5c5edf56adb 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -586,7 +586,6 @@ static struct tc_action_ops act_csum_ops = { .dump = tcf_csum_dump, .cleanup = tcf_csum_cleanup, .init = tcf_csum_init, - .walk = tcf_generic_walker }; MODULE_DESCRIPTION("Checksum updating actions"); diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 15851da99f3..5645a4d32ab 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -207,7 +207,6 @@ static struct tc_action_ops act_gact_ops = { .dump = tcf_gact_dump, .cleanup = tcf_gact_cleanup, .init = tcf_gact_init, - .walk = tcf_generic_walker }; MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 1d3e19180c2..882a89762f7 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -299,7 +299,6 @@ static struct tc_action_ops act_ipt_ops = { .dump = tcf_ipt_dump, .cleanup = tcf_ipt_cleanup, .init = tcf_ipt_init, - .walk = tcf_generic_walker }; static struct tc_action_ops act_xt_ops = { @@ -312,7 +311,6 @@ static struct tc_action_ops act_xt_ops = { .dump = tcf_ipt_dump, .cleanup = tcf_ipt_cleanup, .init = tcf_ipt_init, - .walk = tcf_generic_walker }; MODULE_AUTHOR("Jamal Hadi Salim(2002-13)"); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6cb16ec3d62..252378121ce 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -272,7 +272,6 @@ static struct tc_action_ops act_mirred_ops = { .dump = tcf_mirred_dump, .cleanup = tcf_mirred_cleanup, .init = tcf_mirred_init, - .walk = tcf_generic_walker }; MODULE_AUTHOR("Jamal Hadi Salim(2002)"); diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 30c13dedc94..6a15ace0024 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -309,7 +309,6 @@ static struct tc_action_ops act_nat_ops = { .dump = tcf_nat_dump, .cleanup = tcf_nat_cleanup, .init = tcf_nat_init, - .walk = tcf_generic_walker }; MODULE_DESCRIPTION("Stateless NAT actions"); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index ab4fc56f885..03b67674169 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -244,7 +244,6 @@ static struct tc_action_ops act_pedit_ops = { .dump = tcf_pedit_dump, .cleanup = tcf_pedit_cleanup, .init = tcf_pedit_init, - .walk = tcf_generic_walker }; MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 7725eb4ab75..31157d3e729 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -201,7 +201,6 @@ static struct tc_action_ops act_simp_ops = { .dump = tcf_simp_dump, .cleanup = tcf_simp_cleanup, .init = tcf_simp_init, - .walk = tcf_generic_walker, }; MODULE_AUTHOR("Jamal Hadi Salim(2005)"); diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index cb4221171f9..35ea643b432 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -203,7 +203,6 @@ static struct tc_action_ops act_skbedit_ops = { .dump = tcf_skbedit_dump, .cleanup = tcf_skbedit_cleanup, .init = tcf_skbedit_init, - .walk = tcf_generic_walker, }; MODULE_AUTHOR("Alexander Duyck, "); -- cgit v1.2.3-70-g09d2 From 78ac814f120da17053b3d52aa215c7c547c5e77d Mon Sep 17 00:00:00 2001 From: wangweidong Date: Wed, 4 Dec 2013 17:32:39 +0800 Subject: sctp: disable max_burst when the max_burst is 0 As Michael pointed out that when max_burst is 0, it just disable max_burst. It declared in rfc6458#section-8.1.24. so add the check in sctp_transport_burst_limited, when it 0, just do nothing. Reviewed-by: Daniel Borkmann Suggested-by: Vlad Yasevich Suggested-by: Michael Tuexen Signed-off-by: Wang Weidong Acked-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/transport.c b/net/sctp/transport.c index e332efb124c..efc46ffed1f 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -573,7 +573,7 @@ void sctp_transport_burst_limited(struct sctp_transport *t) u32 old_cwnd = t->cwnd; u32 max_burst_bytes; - if (t->burst_limited) + if (t->burst_limited || asoc->max_burst == 0) return; max_burst_bytes = t->flight_size + (asoc->max_burst * asoc->pathmtu); -- cgit v1.2.3-70-g09d2 From 7f2cbdc28c034ef2c3be729681f631d5744e3cd5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 4 Dec 2013 20:12:04 -0800 Subject: tcp_memcontrol: Cleanup/fix cg_proto->memory_pressure handling. kill memcg_tcp_enter_memory_pressure. The only function of memcg_tcp_enter_memory_pressure was to reduce deal with the unnecessary abstraction that was tcp_memcontrol. Now that struct tcp_memcontrol is gone remove this unnecessary function, the unnecessary function pointer, and modify sk_enter_memory_pressure to set this field directly, just as sk_leave_memory_pressure cleas this field directly. This fixes a small bug I intruduced when killing struct tcp_memcontrol that caused memcg_tcp_enter_memory_pressure to never be called and thus failed to ever set cg_proto->memory_pressure. Remove the cg_proto enter_memory_pressure function as it now serves no useful purpose. Don't test cg_proto->memory_presser in sk_leave_memory_pressure before clearing it. The test was originally there to ensure that the pointer was non-NULL. Now that cg_proto is not a pointer the pointer does not matter. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/net/sock.h | 6 ++---- net/ipv4/tcp_memcontrol.c | 7 ------- 2 files changed, 2 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/net/sock.h b/include/net/sock.h index e3a18ff0c38..2ef3c3eca47 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1035,7 +1035,6 @@ enum cg_proto_flags { }; struct cg_proto { - void (*enter_memory_pressure)(struct sock *sk); struct res_counter memory_allocated; /* Current allocated memory. */ struct percpu_counter sockets_allocated; /* Current number of sockets. */ int memory_pressure; @@ -1155,8 +1154,7 @@ static inline void sk_leave_memory_pressure(struct sock *sk) struct proto *prot = sk->sk_prot; for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) - if (cg_proto->memory_pressure) - cg_proto->memory_pressure = 0; + cg_proto->memory_pressure = 0; } } @@ -1171,7 +1169,7 @@ static inline void sk_enter_memory_pressure(struct sock *sk) struct proto *prot = sk->sk_prot; for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) - cg_proto->enter_memory_pressure(sk); + cg_proto->memory_pressure = 1; } sk->sk_prot->enter_memory_pressure(sk); diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 269a89ecd2f..f7e522c558b 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -6,13 +6,6 @@ #include #include -static void memcg_tcp_enter_memory_pressure(struct sock *sk) -{ - if (sk->sk_cgrp->memory_pressure) - sk->sk_cgrp->memory_pressure = 1; -} -EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure); - int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { /* -- cgit v1.2.3-70-g09d2 From 239c78db9c41a8f524cce60507440d72229d73bc Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 5 Dec 2013 23:29:19 +0100 Subject: net: clear local_df when passing skb between namespaces We must clear local_df when passing the skb between namespaces as the packet is not local to the new namespace any more and thus may not get fragmented by local rules. Fred Templin noticed that other namespaces do fragment IPv6 packets while forwarding. Instead they should have send back a PTB. The same problem should be present when forwarding DF-IPv4 packets between namespaces. Reported-by: Templin, Fred L Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/core/skbuff.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2718fed53d8..06e72d3cdf6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3584,6 +3584,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->tstamp.tv64 = 0; skb->pkt_type = PACKET_HOST; skb->skb_iif = 0; + skb->local_df = 0; skb_dst_drop(skb); skb->mark = 0; secpath_reset(skb); -- cgit v1.2.3-70-g09d2 From 859828c0ea476b42f3a93d69d117aaba90994b6f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 5 Dec 2013 16:27:37 +0100 Subject: br: fix use of ->rx_handler_data in code executed on non-rx_handler path br_stp_rcv() is reached by non-rx_handler path. That means there is no guarantee that dev is bridge port and therefore simple NULL check of ->rx_handler_data is not enough. There is need to check if dev is really bridge port and since only rcu read lock is held here, do it by checking ->rx_handler pointer. Note that synchronize_net() in netdev_rx_handler_unregister() ensures this approach as valid. Introduced originally by: commit f350a0a87374418635689471606454abc7beaa3a "bridge: use rx_handler_data pointer to store net_bridge_port pointer" Fixed but not in the best way by: commit b5ed54e94d324f17c97852296d61a143f01b227a "bridge: fix RCU races with bridge port" Reintroduced by: commit 716ec052d2280d511e10e90ad54a86f5b5d4dcc2 "bridge: fix NULL pointer deref of br_port_get_rcu" Please apply to stable trees as well. Thanks. RH bugzilla reference: https://bugzilla.redhat.com/show_bug.cgi?id=1025770 Reported-by: Laine Stump Debugged-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Signed-off-by: Jiri Pirko Acked-by: Michael S. Tsirkin Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/bridge/br_private.h | 10 ++++++++++ net/bridge/br_stp_bpdu.c | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 229d820bdf0..045d56eaeca 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -426,6 +426,16 @@ netdev_features_t br_features_recompute(struct net_bridge *br, int br_handle_frame_finish(struct sk_buff *skb); rx_handler_result_t br_handle_frame(struct sk_buff **pskb); +static inline bool br_rx_handler_check_rcu(const struct net_device *dev) +{ + return rcu_dereference(dev->rx_handler) == br_handle_frame; +} + +static inline struct net_bridge_port *br_port_get_check_rcu(const struct net_device *dev) +{ + return br_rx_handler_check_rcu(dev) ? br_port_get_rcu(dev) : NULL; +} + /* br_ioctl.c */ int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 8660ea3be70..bdb459d21ad 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -153,7 +153,7 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, if (buf[0] != 0 || buf[1] != 0 || buf[2] != 0) goto err; - p = br_port_get_rcu(dev); + p = br_port_get_check_rcu(dev); if (!p) goto err; -- cgit v1.2.3-70-g09d2 From b4ef4ce09308955d1aa54a289c0162607b3aa16c Mon Sep 17 00:00:00 2001 From: Sergey Popovich Date: Fri, 6 Dec 2013 10:57:19 +0200 Subject: netfilter: xt_hashlimit: fix proc entry leak in netns destroy path In (32263dd1b netfilter: xt_hashlimit: fix namespace destroy path) the hashlimit_net_exit() function is always called right before hashlimit_mt_destroy() to release netns data. If you use xt_hashlimit with IPv4 and IPv6 together, this produces the following splat via netconsole in the netns destroy path: Pid: 9499, comm: kworker/u:0 Tainted: G WC O 3.2.0-5-netctl-amd64-core2 Call Trace: [] ? warn_slowpath_common+0x78/0x8c [] ? warn_slowpath_fmt+0x45/0x4a [] ? remove_proc_entry+0xd8/0x22e [] ? kfree+0x5b/0x6c [] ? hashlimit_net_exit+0x45/0x8d [xt_hashlimit] [] ? ops_exit_list+0x1c/0x44 [] ? cleanup_net+0xf1/0x180 [] ? should_resched+0x5/0x23 [] ? process_one_work+0x161/0x269 [] ? cwq_activate_delayed_work+0x3c/0x48 [] ? worker_thread+0xc2/0x145 [] ? manage_workers.isra.25+0x15b/0x15b [] ? kthread+0x76/0x7e [] ? kernel_thread_helper+0x4/0x10 [] ? kthread_worker_fn+0x139/0x139 [] ? gs_change+0x13/0x13 ---[ end trace d8c3cc0ad163ef79 ]--- ------------[ cut here ]------------ WARNING: at /usr/src/linux-3.2.52/debian/build/source_netctl/fs/proc/generic.c:849 remove_proc_entry+0x217/0x22e() Hardware name: remove_proc_entry: removing non-empty directory 'net/ip6t_hashlimit', leaking at least 'IN-REJECT' This is due to lack of removal net/ip6t_hashlimit/* entries in hashlimit_proc_net_exit(), since only IPv4 entries are deleted. Fix it by always removing the IPv4 and IPv6 entries and their parent directories in the netns destroy path. Signed-off-by: Sergey Popovich Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_hashlimit.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 9ff035c7140..a3910fc2122 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -325,21 +325,24 @@ static void htable_gc(unsigned long htlong) add_timer(&ht->timer); } -static void htable_destroy(struct xt_hashlimit_htable *hinfo) +static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo) { struct hashlimit_net *hashlimit_net = hashlimit_pernet(hinfo->net); struct proc_dir_entry *parent; - del_timer_sync(&hinfo->timer); - if (hinfo->family == NFPROTO_IPV4) parent = hashlimit_net->ipt_hashlimit; else parent = hashlimit_net->ip6t_hashlimit; - if(parent != NULL) + if (parent != NULL) remove_proc_entry(hinfo->name, parent); +} +static void htable_destroy(struct xt_hashlimit_htable *hinfo) +{ + del_timer_sync(&hinfo->timer); + htable_remove_proc_entry(hinfo); htable_selective_cleanup(hinfo, select_all); kfree(hinfo->name); vfree(hinfo); @@ -883,21 +886,15 @@ static int __net_init hashlimit_proc_net_init(struct net *net) static void __net_exit hashlimit_proc_net_exit(struct net *net) { struct xt_hashlimit_htable *hinfo; - struct proc_dir_entry *pde; struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); - /* recent_net_exit() is called before recent_mt_destroy(). Make sure - * that the parent xt_recent proc entry is is empty before trying to - * remove it. + /* hashlimit_net_exit() is called before hashlimit_mt_destroy(). + * Make sure that the parent ipt_hashlimit and ip6t_hashlimit proc + * entries is empty before trying to remove it. */ mutex_lock(&hashlimit_mutex); - pde = hashlimit_net->ipt_hashlimit; - if (pde == NULL) - pde = hashlimit_net->ip6t_hashlimit; - hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) - remove_proc_entry(hinfo->name, pde); - + htable_remove_proc_entry(hinfo); hashlimit_net->ipt_hashlimit = NULL; hashlimit_net->ip6t_hashlimit = NULL; mutex_unlock(&hashlimit_mutex); -- cgit v1.2.3-70-g09d2 From cf9dc09d0949f0b5953fb08caa10bba0dc7ee71f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 24 Nov 2013 20:39:10 +0100 Subject: netfilter: nf_tables: fix missing rules flushing per table This patch allows you to atomically remove all rules stored in a table via the NFT_MSG_DELRULE command. You only need to indicate the specific table and no chain to flush all rules stored in that table. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 46 +++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index dcddc49c0e0..f93b7d06f4b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1717,6 +1717,19 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule) return -ENOENT; } +static int nf_table_delrule_by_chain(struct nft_ctx *ctx) +{ + struct nft_rule *rule; + int err; + + list_for_each_entry(rule, &ctx->chain->rules, list) { + err = nf_tables_delrule_one(ctx, rule); + if (err < 0) + return err; + } + return 0; +} + static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1725,8 +1738,8 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, const struct nft_af_info *afi; struct net *net = sock_net(skb->sk); const struct nft_table *table; - struct nft_chain *chain; - struct nft_rule *rule, *tmp; + struct nft_chain *chain = NULL; + struct nft_rule *rule; int family = nfmsg->nfgen_family, err = 0; struct nft_ctx ctx; @@ -1738,22 +1751,29 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(table)) return PTR_ERR(table); - chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); - if (IS_ERR(chain)) - return PTR_ERR(chain); + if (nla[NFTA_RULE_CHAIN]) { + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + } nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); - if (nla[NFTA_RULE_HANDLE]) { - rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); - if (IS_ERR(rule)) - return PTR_ERR(rule); + if (chain) { + if (nla[NFTA_RULE_HANDLE]) { + rule = nf_tables_rule_lookup(chain, + nla[NFTA_RULE_HANDLE]); + if (IS_ERR(rule)) + return PTR_ERR(rule); - err = nf_tables_delrule_one(&ctx, rule); - } else { - /* Remove all rules in this chain */ - list_for_each_entry_safe(rule, tmp, &chain->rules, list) { err = nf_tables_delrule_one(&ctx, rule); + } else { + err = nf_table_delrule_by_chain(&ctx); + } + } else { + list_for_each_entry(chain, &table->chains, list) { + ctx.chain = chain; + err = nf_table_delrule_by_chain(&ctx); if (err < 0) break; } -- cgit v1.2.3-70-g09d2 From 66e56cd46b93ef407c60adcac62cf33b06119d50 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 6 Dec 2013 11:36:15 +0100 Subject: packet: fix send path when running with proto == 0 Commit e40526cb20b5 introduced a cached dev pointer, that gets hooked into register_prot_hook(), __unregister_prot_hook() to update the device used for the send path. We need to fix this up, as otherwise this will not work with sockets created with protocol = 0, plus with sll_protocol = 0 passed via sockaddr_ll when doing the bind. So instead, assign the pointer directly. The compiler can inline these helper functions automagically. While at it, also assume the cached dev fast-path as likely(), and document this variant of socket creation as it seems it is not widely used (seems not even the author of TX_RING was aware of that in his reference example [1]). Tested with reproducer from e40526cb20b5. [1] http://wiki.ipxwarzone.com/index.php5?title=Linux_packet_mmap#Example Fixes: e40526cb20b5 ("packet: fix use after free race in send path when dev is released") Signed-off-by: Daniel Borkmann Tested-by: Salam Noureddine Tested-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- Documentation/networking/packet_mmap.txt | 10 +++++ net/packet/af_packet.c | 65 ++++++++++++++++++++------------ 2 files changed, 50 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index c01223628a8..8e48e3b1422 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt @@ -123,6 +123,16 @@ Transmission process is similar to capture as shown below. [shutdown] close() --------> destruction of the transmission socket and deallocation of all associated resources. +Socket creation and destruction is also straight forward, and is done +the same way as in capturing described in the previous paragraph: + + int fd = socket(PF_PACKET, mode, 0); + +The protocol can optionally be 0 in case we only want to transmit +via this socket, which avoids an expensive call to packet_rcv(). +In this case, you also need to bind(2) the TX_RING with sll_protocol = 0 +set. Otherwise, htons(ETH_P_ALL) or any other protocol, for example. + Binding the socket to your network interface is mandatory (with zero copy) to know the header size of frames used in the circular buffer. diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ba2548bd85b..88cfbc18955 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -237,6 +237,30 @@ struct packet_skb_cb { static void __fanout_unlink(struct sock *sk, struct packet_sock *po); static void __fanout_link(struct sock *sk, struct packet_sock *po); +static struct net_device *packet_cached_dev_get(struct packet_sock *po) +{ + struct net_device *dev; + + rcu_read_lock(); + dev = rcu_dereference(po->cached_dev); + if (likely(dev)) + dev_hold(dev); + rcu_read_unlock(); + + return dev; +} + +static void packet_cached_dev_assign(struct packet_sock *po, + struct net_device *dev) +{ + rcu_assign_pointer(po->cached_dev, dev); +} + +static void packet_cached_dev_reset(struct packet_sock *po) +{ + RCU_INIT_POINTER(po->cached_dev, NULL); +} + /* register_prot_hook must be invoked with the po->bind_lock held, * or from a context in which asynchronous accesses to the packet * socket is not possible (packet_create()). @@ -246,12 +270,10 @@ static void register_prot_hook(struct sock *sk) struct packet_sock *po = pkt_sk(sk); if (!po->running) { - if (po->fanout) { + if (po->fanout) __fanout_link(sk, po); - } else { + else dev_add_pack(&po->prot_hook); - rcu_assign_pointer(po->cached_dev, po->prot_hook.dev); - } sock_hold(sk); po->running = 1; @@ -270,12 +292,11 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) struct packet_sock *po = pkt_sk(sk); po->running = 0; - if (po->fanout) { + + if (po->fanout) __fanout_unlink(sk, po); - } else { + else __dev_remove_pack(&po->prot_hook); - RCU_INIT_POINTER(po->cached_dev, NULL); - } __sock_put(sk); @@ -2059,19 +2080,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, return tp_len; } -static struct net_device *packet_cached_dev_get(struct packet_sock *po) -{ - struct net_device *dev; - - rcu_read_lock(); - dev = rcu_dereference(po->cached_dev); - if (dev) - dev_hold(dev); - rcu_read_unlock(); - - return dev; -} - static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { struct sk_buff *skb; @@ -2088,7 +2096,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) mutex_lock(&po->pg_vec_lock); - if (saddr == NULL) { + if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; @@ -2242,7 +2250,7 @@ static int packet_snd(struct socket *sock, * Get and verify the address. */ - if (saddr == NULL) { + if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; @@ -2451,6 +2459,8 @@ static int packet_release(struct socket *sock) spin_lock(&po->bind_lock); unregister_prot_hook(sk, false); + packet_cached_dev_reset(po); + if (po->prot_hook.dev) { dev_put(po->prot_hook.dev); po->prot_hook.dev = NULL; @@ -2506,14 +2516,17 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc spin_lock(&po->bind_lock); unregister_prot_hook(sk, true); + po->num = protocol; po->prot_hook.type = protocol; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); - po->prot_hook.dev = dev; + po->prot_hook.dev = dev; po->ifindex = dev ? dev->ifindex : 0; + packet_cached_dev_assign(po, dev); + if (protocol == 0) goto out_unlock; @@ -2626,7 +2639,8 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po = pkt_sk(sk); sk->sk_family = PF_PACKET; po->num = proto; - RCU_INIT_POINTER(po->cached_dev, NULL); + + packet_cached_dev_reset(po); sk->sk_destruct = packet_sock_destruct; sk_refcnt_debug_inc(sk); @@ -3337,6 +3351,7 @@ static int packet_notifier(struct notifier_block *this, sk->sk_error_report(sk); } if (msg == NETDEV_UNREGISTER) { + packet_cached_dev_reset(po); po->ifindex = -1; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); -- cgit v1.2.3-70-g09d2 From a3300ef4bbb1f1e33ff0400e1e6cf7733d988f4f Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 7 Dec 2013 03:33:45 +0100 Subject: ipv6: don't count addrconf generated routes against gc limit Brett Ciphery reported that new ipv6 addresses failed to get installed because the addrconf generated dsts where counted against the dst gc limit. We don't need to count those routes like we currently don't count administratively added routes. Because the max_addresses check enforces a limit on unbounded address generation first in case someone plays with router advertisments, we are still safe here. Reported-by: Brett Ciphery Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ddb9d41c8ee..a0a48ac3403 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2166,12 +2166,10 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, bool anycast) { struct net *net = dev_net(idev->dev); - struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL); - - if (!rt) { - net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n"); + struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, + DST_NOCOUNT, NULL); + if (!rt) return ERR_PTR(-ENOMEM); - } in6_dev_hold(idev); -- cgit v1.2.3-70-g09d2 From d323e92cc3f4edd943610557c9ea1bb4bb5056e8 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sun, 8 Dec 2013 09:36:56 -0500 Subject: net: drop_monitor: fix the value of maxattr maxattr in genl_family should be used to save the max attribute type, but not the max command type. Drop monitor doesn't support any attributes, so we should leave it as zero. Signed-off-by: David S. Miller --- net/core/drop_monitor.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 95897183226..e70301eb7a4 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -64,7 +64,6 @@ static struct genl_family net_drop_monitor_family = { .hdrsize = 0, .name = "NET_DM", .version = 2, - .maxattr = NET_DM_CMD_MAX, }; static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); -- cgit v1.2.3-70-g09d2 From 673498b8ed4c4d4b7221c5309d891c5eac2b7528 Mon Sep 17 00:00:00 2001 From: Stefan Tomanek Date: Tue, 10 Dec 2013 23:21:25 +0100 Subject: inet: fix NULL pointer Oops in fib(6)_rule_suppress This changes ensures that the routing entry investigated by the suppress function actually does point to a device struct before following that pointer, fixing a possible kernel oops situation when verifying the interface group associated with a routing table entry. According to Daniel Golle, this Oops can be triggered by a user process trying to establish an outgoing IPv6 connection while having no real IPv6 connectivity set up (only autoassigned link-local addresses). Fixes: 6ef94cfafba15 ("fib_rules: add route suppression based on ifgroup") Reported-by: Daniel Golle Tested-by: Daniel Golle Signed-off-by: Stefan Tomanek Signed-off-by: David S. Miller --- net/ipv4/fib_rules.c | 5 ++++- net/ipv6/fib6_rules.c | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 523be38e37d..f2e15738534 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -104,7 +104,10 @@ errout: static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) { struct fib_result *result = (struct fib_result *) arg->result; - struct net_device *dev = result->fi->fib_dev; + struct net_device *dev = NULL; + + if (result->fi) + dev = result->fi->fib_dev; /* do not accept result if the route does * not meet the required prefix length diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index e27591635f9..3fd0a578329 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -122,7 +122,11 @@ out: static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) { struct rt6_info *rt = (struct rt6_info *) arg->result; - struct net_device *dev = rt->rt6i_idev->dev; + struct net_device *dev = NULL; + + if (rt->rt6i_idev) + dev = rt->rt6i_idev->dev; + /* do not accept result if the route does * not meet the required prefix length */ -- cgit v1.2.3-70-g09d2 From 12663bfc97c8b3fdb292428105dd92d563164050 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 7 Dec 2013 17:26:27 -0500 Subject: net: unix: allow set_peek_off to fail unix_dgram_recvmsg() will hold the readlock of the socket until recv is complete. In the same time, we may try to setsockopt(SO_PEEK_OFF) which will hang until unix_dgram_recvmsg() will complete (which can take a while) without allowing us to break out of it, triggering a hung task spew. Instead, allow set_peek_off to fail, this way userspace will not hang. Signed-off-by: Sasha Levin Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/net.h | 2 +- net/core/sock.c | 2 +- net/unix/af_unix.c | 8 ++++++-- 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/linux/net.h b/include/linux/net.h index 4bcee94cef9..69be3e6079c 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -181,7 +181,7 @@ struct proto_ops { int offset, size_t size, int flags); ssize_t (*splice_read)(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); - void (*set_peek_off)(struct sock *sk, int val); + int (*set_peek_off)(struct sock *sk, int val); }; #define DECLARE_SOCKADDR(type, dst, src) \ diff --git a/net/core/sock.c b/net/core/sock.c index ab20ed9b0f3..5393b4b719d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -882,7 +882,7 @@ set_rcvbuf: case SO_PEEK_OFF: if (sock->ops->set_peek_off) - sock->ops->set_peek_off(sk, val); + ret = sock->ops->set_peek_off(sk, val); else ret = -EOPNOTSUPP; break; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 01625ccc3ae..a0ca162e5bd 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -530,13 +530,17 @@ static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, int); -static void unix_set_peek_off(struct sock *sk, int val) +static int unix_set_peek_off(struct sock *sk, int val) { struct unix_sock *u = unix_sk(sk); - mutex_lock(&u->readlock); + if (mutex_lock_interruptible(&u->readlock)) + return -EINTR; + sk->sk_peek_off = val; mutex_unlock(&u->readlock); + + return 0; } -- cgit v1.2.3-70-g09d2 From 993b858e37b3120ee76d9957a901cca22312ffaa Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Mon, 9 Dec 2013 22:54:46 -0800 Subject: tipc: correct the order of stopping services at rmmod The 'signal handler' service in TIPC is a mechanism that makes it possible to postpone execution of functions, by launcing them into a job queue for execution in a separate tasklet, independent of the launching execution thread. When we do rmmod on the tipc module, this service is stopped after the network service. At the same time, the stopping of the network service may itself launch jobs for execution, with the risk that these functions may be scheduled for execution after the data structures meant to be accessed by the job have already been deleted. We have seen this happen, most often resulting in an oops. This commit ensures that the signal handler is the very first to be stopped when TIPC is shut down, so there are no surprises during the cleanup of the other services. Signed-off-by: Jon Maloy Reviewed-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/tipc/core.c b/net/tipc/core.c index fd4eeeaa972..c6d3f75a9e1 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -113,7 +113,6 @@ err: static void tipc_core_stop(void) { tipc_netlink_stop(); - tipc_handler_stop(); tipc_cfg_stop(); tipc_subscr_stop(); tipc_nametbl_stop(); @@ -146,9 +145,10 @@ static int tipc_core_start(void) res = tipc_subscr_start(); if (!res) res = tipc_cfg_init(); - if (res) + if (res) { + tipc_handler_stop(); tipc_core_stop(); - + } return res; } @@ -178,6 +178,7 @@ static int __init tipc_init(void) static void __exit tipc_exit(void) { + tipc_handler_stop(); tipc_core_stop_net(); tipc_core_stop(); pr_info("Deactivated\n"); -- cgit v1.2.3-70-g09d2 From 00ede977098be3296d42d05a4265ec5ec4a28419 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 9 Dec 2013 22:54:47 -0800 Subject: tipc: protect handler_enabled variable with qitem_lock spin lock 'handler_enabled' is a global flag indicating whether the TIPC signal handling service is enabled or not. The lack of lock protection for this flag incurs a risk for contention, so that a tipc_k_signal() call might queue a signal handler to a destroyed signal queue, with unpredictable results. To correct this, we let the already existing 'qitem_lock' protect the flag, as it already does with the queue itself. This way, we ensure that the flag always is consistent across all cores. Signed-off-by: Ying Xue Reviewed-by: Paul Gortmaker Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/handler.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/tipc/handler.c b/net/tipc/handler.c index b36f0fcd9bd..e4bc8a29674 100644 --- a/net/tipc/handler.c +++ b/net/tipc/handler.c @@ -56,12 +56,13 @@ unsigned int tipc_k_signal(Handler routine, unsigned long argument) { struct queue_item *item; + spin_lock_bh(&qitem_lock); if (!handler_enabled) { pr_err("Signal request ignored by handler\n"); + spin_unlock_bh(&qitem_lock); return -ENOPROTOOPT; } - spin_lock_bh(&qitem_lock); item = kmem_cache_alloc(tipc_queue_item_cache, GFP_ATOMIC); if (!item) { pr_err("Signal queue out of memory\n"); @@ -112,10 +113,14 @@ void tipc_handler_stop(void) struct list_head *l, *n; struct queue_item *item; - if (!handler_enabled) + spin_lock_bh(&qitem_lock); + if (!handler_enabled) { + spin_unlock_bh(&qitem_lock); return; - + } handler_enabled = 0; + spin_unlock_bh(&qitem_lock); + tasklet_kill(&tipc_tasklet); spin_lock_bh(&qitem_lock); -- cgit v1.2.3-70-g09d2 From 9f70f46bd4c7267d48ef461a1d613ec9ec0d520c Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 10 Dec 2013 06:48:15 -0500 Subject: sctp: properly latch and use autoclose value from sock to association Currently, sctp associations latch a sockets autoclose value to an association at association init time, subject to capping constraints from the max_autoclose sysctl value. This leads to an odd situation where an application may set a socket level autoclose timeout, but sliently sctp will limit the autoclose timeout to something less than that. Fix this by modifying the autoclose setsockopt function to check the limit, cap it and warn the user via syslog that the timeout is capped. This will allow getsockopt to return valid autoclose timeout values that reflect what subsequent associations actually use. While were at it, also elimintate the assoc->autoclose variable, it duplicates whats in the timeout array, which leads to multiple sources for the same information, that may differ (as the former isn't subject to any capping). This gives us the timeout information in a canonical place and saves some space in the association structure as well. Signed-off-by: Neil Horman Acked-by: Vlad Yasevich CC: Wang Weidong CC: David Miller CC: Vlad Yasevich CC: netdev@vger.kernel.org Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 6 ------ net/sctp/associola.c | 5 +---- net/sctp/output.c | 3 ++- net/sctp/sm_statefuns.c | 12 ++++++------ net/sctp/socket.c | 4 ++++ 5 files changed, 13 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ea0ca5f6e62..67b5d006827 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1726,12 +1726,6 @@ struct sctp_association { /* How many duplicated TSNs have we seen? */ int numduptsns; - /* Number of seconds of idle time before an association is closed. - * In the association context, this is really used as a boolean - * since the real timeout is stored in the timeouts array - */ - __u32 autoclose; - /* These are to support * "SCTP Extensions for Dynamic Reconfiguration of IP Addresses * and Enforcement of Flow and Message Limits" diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 68a27f9796d..31ed008c8e1 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -154,8 +154,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; - asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = - min_t(unsigned long, sp->autoclose, net->sctp.max_autoclose) * HZ; + asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = sp->autoclose * HZ; /* Initializes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) @@ -291,8 +290,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->peer.ipv6_address = 1; INIT_LIST_HEAD(&asoc->asocs); - asoc->autoclose = sp->autoclose; - asoc->default_stream = sp->default_stream; asoc->default_ppid = sp->default_ppid; asoc->default_flags = sp->default_flags; diff --git a/net/sctp/output.c b/net/sctp/output.c index 0e2644d0a77..0fb140f8f08 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -581,7 +581,8 @@ int sctp_packet_transmit(struct sctp_packet *packet) unsigned long timeout; /* Restart the AUTOCLOSE timer when sending data. */ - if (sctp_state(asoc, ESTABLISHED) && asoc->autoclose) { + if (sctp_state(asoc, ESTABLISHED) && + asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; timeout = asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index dfe3f36ff2a..a26065be728 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -820,7 +820,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, SCTP_INC_STATS(net, SCTP_MIB_PASSIVEESTABS); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); - if (new_asoc->autoclose) + if (new_asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -908,7 +908,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net, SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); SCTP_INC_STATS(net, SCTP_MIB_ACTIVEESTABS); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); - if (asoc->autoclose) + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -2970,7 +2970,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net, if (chunk->chunk_hdr->flags & SCTP_DATA_SACK_IMM) force = SCTP_FORCE(); - if (asoc->autoclose) { + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); } @@ -3878,7 +3878,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net, SCTP_CHUNK(chunk)); /* Count this as receiving DATA. */ - if (asoc->autoclose) { + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); } @@ -5267,7 +5267,7 @@ sctp_disposition_t sctp_sf_do_9_2_start_shutdown( sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - if (asoc->autoclose) + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -5346,7 +5346,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack( sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN)); - if (asoc->autoclose) + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 72046b9729a..5455043f449 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2196,6 +2196,7 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); + struct net *net = sock_net(sk); /* Applicable to UDP-style socket only */ if (sctp_style(sk, TCP)) @@ -2205,6 +2206,9 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, if (copy_from_user(&sp->autoclose, optval, optlen)) return -EFAULT; + if (sp->autoclose > net->sctp.max_autoclose) + sp->autoclose = net->sctp.max_autoclose; + return 0; } -- cgit v1.2.3-70-g09d2 From ce7a3bdf18a8dbcba1409f5d335c56fde432ca89 Mon Sep 17 00:00:00 2001 From: Florent Fourcot Date: Tue, 10 Dec 2013 15:15:46 +0100 Subject: ipv6: do not erase dst address with flow label destination This patch is following b579035ff766c9412e2b92abf5cab794bff102b6 "ipv6: remove old conditions on flow label sharing" Since there is no reason to restrict a label to a destination, we should not erase the destination value of a socket with the value contained in the flow label storage. This patch allows to really have the same flow label to more than one destination. Signed-off-by: Florent Fourcot Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 1 - net/ipv6/datagram.c | 1 - net/ipv6/raw.c | 1 - net/ipv6/tcp_ipv6.c | 1 - net/ipv6/udp.c | 1 - net/l2tp/l2tp_ip6.c | 1 - 6 files changed, 6 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4ac71ff7c2e..2b90a786e47 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -851,7 +851,6 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - usin->sin6_addr = flowlabel->dst; fl6_sock_release(flowlabel); } } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 8dfe1f4d3c1..93b1aa34c43 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -73,7 +73,6 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - usin->sin6_addr = flowlabel->dst; } } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 7fb4e14c467..b6bb87e5580 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -792,7 +792,6 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - daddr = &flowlabel->dst; } } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0740f93a114..f67033b4bb6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -156,7 +156,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - usin->sin6_addr = flowlabel->dst; fl6_sock_release(flowlabel); } } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index bcd5699313c..089c741a399 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1140,7 +1140,6 @@ do_udp_sendmsg: flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - daddr = &flowlabel->dst; } } diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index d9b437e5500..bb6e206ea70 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -528,7 +528,6 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - daddr = &flowlabel->dst; } } -- cgit v1.2.3-70-g09d2 From 85f935d41af097a87067367be66de52896b964e1 Mon Sep 17 00:00:00 2001 From: wangweidong Date: Wed, 11 Dec 2013 09:50:38 +0800 Subject: sctp: check the rto_min and rto_max in setsockopt When we set 0 to rto_min or rto_max, just not change the value. Also we should check the rto_min > rto_max. Suggested-by: Vlad Yasevich Signed-off-by: Wang Weidong Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5455043f449..42b709c95cf 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2815,6 +2815,8 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigne { struct sctp_rtoinfo rtoinfo; struct sctp_association *asoc; + unsigned long rto_min, rto_max; + struct sctp_sock *sp = sctp_sk(sk); if (optlen != sizeof (struct sctp_rtoinfo)) return -EINVAL; @@ -2828,26 +2830,36 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigne if (!asoc && rtoinfo.srto_assoc_id && sctp_style(sk, UDP)) return -EINVAL; + rto_max = rtoinfo.srto_max; + rto_min = rtoinfo.srto_min; + + if (rto_max) + rto_max = asoc ? msecs_to_jiffies(rto_max) : rto_max; + else + rto_max = asoc ? asoc->rto_max : sp->rtoinfo.srto_max; + + if (rto_min) + rto_min = asoc ? msecs_to_jiffies(rto_min) : rto_min; + else + rto_min = asoc ? asoc->rto_min : sp->rtoinfo.srto_min; + + if (rto_min > rto_max) + return -EINVAL; + if (asoc) { if (rtoinfo.srto_initial != 0) asoc->rto_initial = msecs_to_jiffies(rtoinfo.srto_initial); - if (rtoinfo.srto_max != 0) - asoc->rto_max = msecs_to_jiffies(rtoinfo.srto_max); - if (rtoinfo.srto_min != 0) - asoc->rto_min = msecs_to_jiffies(rtoinfo.srto_min); + asoc->rto_max = rto_max; + asoc->rto_min = rto_min; } else { /* If there is no association or the association-id = 0 * set the values to the endpoint. */ - struct sctp_sock *sp = sctp_sk(sk); - if (rtoinfo.srto_initial != 0) sp->rtoinfo.srto_initial = rtoinfo.srto_initial; - if (rtoinfo.srto_max != 0) - sp->rtoinfo.srto_max = rtoinfo.srto_max; - if (rtoinfo.srto_min != 0) - sp->rtoinfo.srto_min = rtoinfo.srto_min; + sp->rtoinfo.srto_max = rto_max; + sp->rtoinfo.srto_min = rto_min; } return 0; -- cgit v1.2.3-70-g09d2 From 4f3fdf3bc59cafd14c3bc2c2369efad34c7aa8b5 Mon Sep 17 00:00:00 2001 From: wangweidong Date: Wed, 11 Dec 2013 09:50:39 +0800 Subject: sctp: add check rto_min and rto_max in sysctl rto_min should be smaller than rto_max while rto_max should be larger than rto_min. Add two proc_handler for the checking. Suggested-by: Vlad Yasevich Signed-off-by: Wang Weidong Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sysctl.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 65 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 6b36561a1b3..43b5e324387 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -61,6 +61,13 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, void __user *buffer, size_t *lenp, loff_t *ppos); +static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); +static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); + static struct ctl_table sctp_table[] = { { .procname = "sctp_mem", @@ -102,17 +109,17 @@ static struct ctl_table sctp_net_table[] = { .data = &init_net.sctp.rto_min, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_sctp_do_rto_min, .extra1 = &one, - .extra2 = &timer_max + .extra2 = &init_net.sctp.rto_max }, { .procname = "rto_max", .data = &init_net.sctp.rto_max, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .proc_handler = proc_sctp_do_rto_max, + .extra1 = &init_net.sctp.rto_min, .extra2 = &timer_max }, { @@ -342,6 +349,60 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, return ret; } +static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct net *net = current->nsproxy->net_ns; + int new_value; + struct ctl_table tbl; + unsigned int min = *(unsigned int *) ctl->extra1; + unsigned int max = *(unsigned int *) ctl->extra2; + int ret; + + memset(&tbl, 0, sizeof(struct ctl_table)); + tbl.maxlen = sizeof(unsigned int); + + if (write) + tbl.data = &new_value; + else + tbl.data = &net->sctp.rto_min; + ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); + if (write) { + if (ret || new_value > max || new_value < min) + return -EINVAL; + net->sctp.rto_min = new_value; + } + return ret; +} + +static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct net *net = current->nsproxy->net_ns; + int new_value; + struct ctl_table tbl; + unsigned int min = *(unsigned int *) ctl->extra1; + unsigned int max = *(unsigned int *) ctl->extra2; + int ret; + + memset(&tbl, 0, sizeof(struct ctl_table)); + tbl.maxlen = sizeof(unsigned int); + + if (write) + tbl.data = &new_value; + else + tbl.data = &net->sctp.rto_max; + ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); + if (write) { + if (ret || new_value > max || new_value < min) + return -EINVAL; + net->sctp.rto_max = new_value; + } + return ret; +} + int sctp_sysctl_net_register(struct net *net) { struct ctl_table *table; -- cgit v1.2.3-70-g09d2 From b486b2289e40797e386a18048a66b535206a463b Mon Sep 17 00:00:00 2001 From: wangweidong Date: Wed, 11 Dec 2013 09:50:40 +0800 Subject: sctp: fix up a spacing fix up spacing of proc_sctp_do_hmac_alg for according to the proc_sctp_do_rto_min[max] in sysctl.c Suggested-by: Daniel Borkmann Signed-off-by: Wang Weidong Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sysctl.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 43b5e324387..b0565afb61c 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -56,10 +56,8 @@ extern long sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; extern int sysctl_sctp_wmem[3]; -static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, - int write, +static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, - loff_t *ppos); static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, @@ -301,8 +299,7 @@ static struct ctl_table sctp_net_table[] = { { /* sentinel */ } }; -static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, - int write, +static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { -- cgit v1.2.3-70-g09d2 From 8afdd99a1315e759de04ad6e2344f0c5f17ecb1b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Dec 2013 18:07:23 -0800 Subject: udp: ipv4: fix an use after free in __udp4_lib_rcv() Dave Jones reported a use after free in UDP stack : [ 5059.434216] ========================= [ 5059.434314] [ BUG: held lock freed! ] [ 5059.434420] 3.13.0-rc3+ #9 Not tainted [ 5059.434520] ------------------------- [ 5059.434620] named/863 is freeing memory ffff88005e960000-ffff88005e96061f, with a lock still held there! [ 5059.434815] (slock-AF_INET){+.-...}, at: [] udp_queue_rcv_skb+0xd1/0x4b0 [ 5059.435012] 3 locks held by named/863: [ 5059.435086] #0: (rcu_read_lock){.+.+..}, at: [] __netif_receive_skb_core+0x11d/0x940 [ 5059.435295] #1: (rcu_read_lock){.+.+..}, at: [] ip_local_deliver_finish+0x3e/0x410 [ 5059.435500] #2: (slock-AF_INET){+.-...}, at: [] udp_queue_rcv_skb+0xd1/0x4b0 [ 5059.435734] stack backtrace: [ 5059.435858] CPU: 0 PID: 863 Comm: named Not tainted 3.13.0-rc3+ #9 [loadavg: 0.21 0.06 0.06 1/115 1365] [ 5059.436052] Hardware name: /D510MO, BIOS MOPNV10J.86A.0175.2010.0308.0620 03/08/2010 [ 5059.436223] 0000000000000002 ffff88007e203ad8 ffffffff8153a372 ffff8800677130e0 [ 5059.436390] ffff88007e203b10 ffffffff8108cafa ffff88005e960000 ffff88007b00cfc0 [ 5059.436554] ffffea00017a5800 ffffffff8141c490 0000000000000246 ffff88007e203b48 [ 5059.436718] Call Trace: [ 5059.436769] [] dump_stack+0x4d/0x66 [ 5059.436904] [] debug_check_no_locks_freed+0x15a/0x160 [ 5059.437037] [] ? __sk_free+0x110/0x230 [ 5059.437147] [] kmem_cache_free+0x6a/0x150 [ 5059.437260] [] __sk_free+0x110/0x230 [ 5059.437364] [] sk_free+0x19/0x20 [ 5059.437463] [] sock_edemux+0x25/0x40 [ 5059.437567] [] sock_queue_rcv_skb+0x81/0x280 [ 5059.437685] [] ? udp_queue_rcv_skb+0xd1/0x4b0 [ 5059.437805] [] __udp_queue_rcv_skb+0x42/0x240 [ 5059.437925] [] ? _raw_spin_lock+0x65/0x70 [ 5059.438038] [] udp_queue_rcv_skb+0x26b/0x4b0 [ 5059.438155] [] __udp4_lib_rcv+0x152/0xb00 [ 5059.438269] [] udp_rcv+0x15/0x20 [ 5059.438367] [] ip_local_deliver_finish+0x10f/0x410 [ 5059.438492] [] ? ip_local_deliver_finish+0x3e/0x410 [ 5059.438621] [] ip_local_deliver+0x43/0x80 [ 5059.438733] [] ip_rcv_finish+0x140/0x5a0 [ 5059.438843] [] ip_rcv+0x296/0x3f0 [ 5059.438945] [] __netif_receive_skb_core+0x742/0x940 [ 5059.439074] [] ? __netif_receive_skb_core+0x11d/0x940 [ 5059.442231] [] ? trace_hardirqs_on+0xd/0x10 [ 5059.442231] [] __netif_receive_skb+0x13/0x60 [ 5059.442231] [] netif_receive_skb+0x1e/0x1f0 [ 5059.442231] [] napi_gro_receive+0x70/0xa0 [ 5059.442231] [] rtl8169_poll+0x166/0x700 [r8169] [ 5059.442231] [] net_rx_action+0x129/0x1e0 [ 5059.442231] [] __do_softirq+0xed/0x240 [ 5059.442231] [] irq_exit+0x125/0x140 [ 5059.442231] [] do_IRQ+0x51/0xc0 [ 5059.442231] [] common_interrupt+0x6f/0x6f We need to keep a reference on the socket, by using skb_steal_sock() at the right place. Note that another patch is needed to fix a race in udp_sk_rx_dst_set(), as we hold no lock protecting the dst. Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") Reported-by: Dave Jones Signed-off-by: Eric Dumazet Cc: Shawn Bohrer Signed-off-by: David S. Miller --- net/ipv4/udp.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 44f6a20fa29..2e2aecbe22c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -560,15 +560,11 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, struct udp_table *udptable) { - struct sock *sk; const struct iphdr *iph = ip_hdr(skb); - if (unlikely(sk = skb_steal_sock(skb))) - return sk; - else - return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, - iph->daddr, dport, inet_iif(skb), - udptable); + return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + udptable); } struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, @@ -1739,15 +1735,15 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp4_csum_init(skb, uh, proto)) goto csum_error; - if (skb->sk) { + sk = skb_steal_sock(skb); + if (sk) { int ret; - sk = skb->sk; if (unlikely(sk->sk_rx_dst == NULL)) udp_sk_rx_dst_set(sk, skb); ret = udp_queue_rcv_skb(sk, skb); - + sock_put(sk); /* a return value > 0 means to resubmit the input, but * it wants the return to be -protocol, or 0 */ -- cgit v1.2.3-70-g09d2 From cc106e441a63bec3b1cb72948df82ea15945c449 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 10 Dec 2013 14:59:27 +0800 Subject: net: sched: tbf: fix the calculation of max_size Current max_size is caluated from rate table. Now, the rate table has been replaced and it's wrong to caculate max_size based on this rate table. It can lead wrong calculation of max_size. The burst in kernel may be lower than user asked, because burst may gets some loss when transform it to buffer(E.g. "burst 40kb rate 30mbit/s") and it seems we cannot avoid this loss. Burst's value(max_size) based on rate table may be equal user asked. If a packet's length is max_size, this packet will be stalled in tbf_dequeue() because its length is above the burst in kernel so that it cannot get enough tokens. The max_size guards against enqueuing packet sizes above q->buffer "time" in tbf_enqueue(). To make consistent with the calculation of tokens, this patch add a helper psched_ns_t2l() to calculate burst(max_size) directly to fix this problem. After this fix, we can support to using 64bit rates to calculate burst as well. Signed-off-by: Yang Yingliang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_tbf.c | 115 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 70 insertions(+), 45 deletions(-) (limited to 'net') diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index a6090051c5d..a44928c6ba2 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -118,6 +118,30 @@ struct tbf_sched_data { }; +/* Time to Length, convert time in ns to length in bytes + * to determinate how many bytes can be sent in given time. + */ +static u64 psched_ns_t2l(const struct psched_ratecfg *r, + u64 time_in_ns) +{ + /* The formula is : + * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC + */ + u64 len = time_in_ns * r->rate_bytes_ps; + + do_div(len, NSEC_PER_SEC); + + if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) + len = (len / 53) * 48; + + if (len > r->overhead) + len -= r->overhead; + else + len = 0; + + return len; +} + /* * Return length of individual segments of a gso packet, * including all headers (MAC, IP, TCP/UDP) @@ -289,10 +313,11 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) struct tbf_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_TBF_MAX + 1]; struct tc_tbf_qopt *qopt; - struct qdisc_rate_table *rtab = NULL; - struct qdisc_rate_table *ptab = NULL; struct Qdisc *child = NULL; - int max_size, n; + struct psched_ratecfg rate; + struct psched_ratecfg peak; + u64 max_size; + s64 buffer, mtu; u64 rate64 = 0, prate64 = 0; err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy); @@ -304,38 +329,13 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) goto done; qopt = nla_data(tb[TCA_TBF_PARMS]); - rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]); - if (rtab == NULL) - goto done; - - if (qopt->peakrate.rate) { - if (qopt->peakrate.rate > qopt->rate.rate) - ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]); - if (ptab == NULL) - goto done; - } - - for (n = 0; n < 256; n++) - if (rtab->data[n] > qopt->buffer) - break; - max_size = (n << qopt->rate.cell_log) - 1; - if (ptab) { - int size; - - for (n = 0; n < 256; n++) - if (ptab->data[n] > qopt->mtu) - break; - size = (n << qopt->peakrate.cell_log) - 1; - if (size < max_size) - max_size = size; - } - if (max_size < 0) - goto done; + if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE) + qdisc_put_rtab(qdisc_get_rtab(&qopt->rate, + tb[TCA_TBF_RTAB])); - if (max_size < psched_mtu(qdisc_dev(sch))) - pr_warn_ratelimited("sch_tbf: burst %u is lower than device %s mtu (%u) !\n", - max_size, qdisc_dev(sch)->name, - psched_mtu(qdisc_dev(sch))); + if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE) + qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate, + tb[TCA_TBF_PTAB])); if (q->qdisc != &noop_qdisc) { err = fifo_set_limit(q->qdisc, qopt->limit); @@ -349,6 +349,39 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) } } + buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U); + mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U); + + if (tb[TCA_TBF_RATE64]) + rate64 = nla_get_u64(tb[TCA_TBF_RATE64]); + psched_ratecfg_precompute(&rate, &qopt->rate, rate64); + + max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U); + + if (qopt->peakrate.rate) { + if (tb[TCA_TBF_PRATE64]) + prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]); + psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64); + if (peak.rate_bytes_ps <= rate.rate_bytes_ps) { + pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n", + peak.rate_bytes_ps, rate.rate_bytes_ps); + err = -EINVAL; + goto done; + } + + max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu)); + } + + if (max_size < psched_mtu(qdisc_dev(sch))) + pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n", + max_size, qdisc_dev(sch)->name, + psched_mtu(qdisc_dev(sch))); + + if (!max_size) { + err = -EINVAL; + goto done; + } + sch_tree_lock(sch); if (child) { qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); @@ -362,13 +395,9 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) q->tokens = q->buffer; q->ptokens = q->mtu; - if (tb[TCA_TBF_RATE64]) - rate64 = nla_get_u64(tb[TCA_TBF_RATE64]); - psched_ratecfg_precompute(&q->rate, &rtab->rate, rate64); - if (ptab) { - if (tb[TCA_TBF_PRATE64]) - prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]); - psched_ratecfg_precompute(&q->peak, &ptab->rate, prate64); + memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg)); + if (qopt->peakrate.rate) { + memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg)); q->peak_present = true; } else { q->peak_present = false; @@ -377,10 +406,6 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) sch_tree_unlock(sch); err = 0; done: - if (rtab) - qdisc_put_rtab(rtab); - if (ptab) - qdisc_put_rtab(ptab); return err; } -- cgit v1.2.3-70-g09d2 From 1598f7cb4745c40467decc91ee44ec615773eb45 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 10 Dec 2013 14:59:28 +0800 Subject: net: sched: htb: fix the calculation of quantum Now, 32bit rates may be not the true rate. So use rate_bytes_ps which is from max(rate32, rate64) to calcualte quantum. Signed-off-by: Yang Yingliang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 0e1e38b4002..717b2108f85 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1477,11 +1477,22 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, sch_tree_lock(sch); } + rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; + + ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; + + psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64); + psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64); + /* it used to be a nasty bug here, we have to check that node * is really leaf before changing cl->un.leaf ! */ if (!cl->level) { - cl->quantum = hopt->rate.rate / q->rate2quantum; + u64 quantum = cl->rate.rate_bytes_ps; + + do_div(quantum, q->rate2quantum); + cl->quantum = min_t(u64, quantum, INT_MAX); + if (!hopt->quantum && cl->quantum < 1000) { pr_warning( "HTB: quantum of class %X is small. Consider r2q change.\n", @@ -1500,13 +1511,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->prio = TC_HTB_NUMPRIO - 1; } - rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; - - ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; - - psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64); - psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64); - cl->buffer = PSCHED_TICKS2NS(hopt->buffer); cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer); -- cgit v1.2.3-70-g09d2 From 610438b74496b2986a9025f8e23c134cb638e338 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Dec 2013 08:10:05 -0800 Subject: udp: ipv4: fix potential use after free in udp_v4_early_demux() pskb_may_pull() can reallocate skb->head, we need to move the initialization of iph and uh pointers after its call. Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") Signed-off-by: Eric Dumazet Cc: Shawn Bohrer Signed-off-by: David S. Miller --- net/ipv4/udp.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2e2aecbe22c..16d246a51a0 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1909,17 +1909,20 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, void udp_v4_early_demux(struct sk_buff *skb) { - const struct iphdr *iph = ip_hdr(skb); - const struct udphdr *uh = udp_hdr(skb); + struct net *net = dev_net(skb->dev); + const struct iphdr *iph; + const struct udphdr *uh; struct sock *sk; struct dst_entry *dst; - struct net *net = dev_net(skb->dev); int dif = skb->dev->ifindex; /* validate the packet */ if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) return; + iph = ip_hdr(skb); + uh = udp_hdr(skb); + if (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, -- cgit v1.2.3-70-g09d2 From 975022310233fb0f0193873d79a7b8438070fa82 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Dec 2013 14:46:51 -0800 Subject: udp: ipv4: must add synchronization in udp_sk_rx_dst_set() Unlike TCP, UDP input path does not hold the socket lock. Before messing with sk->sk_rx_dst, we must use a spinlock, otherwise multiple cpus could leak a refcount. This patch also takes care of renewing a stale dst entry. (When the sk->sk_rx_dst would not be used by IP early demux) Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") Signed-off-by: Eric Dumazet Cc: Shawn Bohrer Signed-off-by: David S. Miller --- net/ipv4/udp.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 16d246a51a0..62c19fdd102 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1599,12 +1599,21 @@ static void flush_stack(struct sock **stack, unsigned int count, kfree_skb(skb1); } -static void udp_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +/* For TCP sockets, sk_rx_dst is protected by socket lock + * For UDP, we use sk_dst_lock to guard against concurrent changes. + */ +static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) { - struct dst_entry *dst = skb_dst(skb); + struct dst_entry *old; - dst_hold(dst); - sk->sk_rx_dst = dst; + spin_lock(&sk->sk_dst_lock); + old = sk->sk_rx_dst; + if (likely(old != dst)) { + dst_hold(dst); + sk->sk_rx_dst = dst; + dst_release(old); + } + spin_unlock(&sk->sk_dst_lock); } /* @@ -1737,10 +1746,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, sk = skb_steal_sock(skb); if (sk) { + struct dst_entry *dst = skb_dst(skb); int ret; - if (unlikely(sk->sk_rx_dst == NULL)) - udp_sk_rx_dst_set(sk, skb); + if (unlikely(sk->sk_rx_dst != dst)) + udp_sk_rx_dst_set(sk, dst); ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); -- cgit v1.2.3-70-g09d2 From d55d282e6af88120ad90e93a88f70e3116dc0e3d Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 12 Dec 2013 10:57:22 +0800 Subject: sch_tbf: use do_div() for 64-bit divide It's doing a 64-bit divide which is not supported on 32-bit architectures in psched_ns_t2l(). The correct way to do this is to use do_div(). It's introduced by commit cc106e441a63 ("net: sched: tbf: fix the calculation of max_size") Reported-by: kbuild test robot Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller --- net/sched/sch_tbf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index a44928c6ba2..887e672f9d7 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -131,8 +131,10 @@ static u64 psched_ns_t2l(const struct psched_ratecfg *r, do_div(len, NSEC_PER_SEC); - if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) - len = (len / 53) * 48; + if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) { + do_div(len, 53); + len = len * 48; + } if (len > r->overhead) len -= r->overhead; -- cgit v1.2.3-70-g09d2