From 013dbb325be702d777118d5e4ffefff3dad2b153 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 19 Jun 2013 14:32:33 -0400 Subject: net: delete __cpuinit usage from all net files The __cpuinit type of throwaway sections might have made sense some time ago when RAM was more constrained, but now the savings do not offset the cost and complications. For example, the fix in commit 5e427ec2d0 ("x86: Fix bit corruption at CPU resume time") is a good example of the nasty type of bugs that can be created with improper use of the various __init prefixes. After a discussion on LKML[1] it was decided that cpuinit should go the way of devinit and be phased out. Once all the users are gone, we can then finally remove the macros themselves from linux/init.h. This removes all the net/* uses of the __cpuinit macros from all C files. [1] https://lkml.org/lkml/2013/5/20/589 Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Paul Gortmaker --- net/core/flow.c | 4 ++-- net/iucv/iucv.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/flow.c b/net/core/flow.c index 7102f166482..dfa602ceb8c 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -403,7 +403,7 @@ void flow_cache_flush_deferred(void) schedule_work(&flow_cache_flush_work); } -static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) +static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) { struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc); @@ -421,7 +421,7 @@ static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) return 0; } -static int __cpuinit flow_cache_cpu(struct notifier_block *nfb, +static int flow_cache_cpu(struct notifier_block *nfb, unsigned long action, void *hcpu) { diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 4fe76ff214c..cd5b8ec9be0 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -621,7 +621,7 @@ static void iucv_disable(void) put_online_cpus(); } -static int __cpuinit iucv_cpu_notify(struct notifier_block *self, +static int iucv_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { cpumask_t cpumask; -- cgit v1.2.3-18-g5258 From f09eca8db0184aeb6b9718a987cfb3653ad7c4ae Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 9 Jul 2013 20:16:39 +0200 Subject: netfilter: ctnetlink: fix incorrect NAT expectation dumping nf_ct_expect_alloc leaves unset the expectation NAT fields. However, ctnetlink_exp_dump_expect expects them to be zeroed in case they are not used, which may not be the case. This results in dumping the NAT tuple of the expectation when it should not. Fix it by zeroing the NAT fields of the expectation. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_expect.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index c63b618cd61..4fd1ca94fd4 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -293,6 +293,11 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, sizeof(exp->tuple.dst.u3) - len); exp->tuple.dst.u.all = *dst; + +#ifdef CONFIG_NF_NAT_NEEDED + memset(&exp->saved_addr, 0, sizeof(exp->saved_addr)); + memset(&exp->saved_proto, 0, sizeof(exp->saved_proto)); +#endif } EXPORT_SYMBOL_GPL(nf_ct_expect_init); -- cgit v1.2.3-18-g5258 From baf60efa585c78b269f0097288868a51ccc61f55 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 11 Jul 2013 19:22:19 -0700 Subject: netfilter: xt_socket: fix broken v0 support commit 681f130f39e10 ("netfilter: xt_socket: add XT_SOCKET_NOWILDCARD flag") added a potential NULL dereference if an old iptables package uses v0 of the match. Fix this by removing the test on @info in fast path. IPv6 can remove the test as well, as it uses v1 or v2. Reported-by: Neal Cardwell Signed-off-by: Eric Dumazet Cc: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_socket.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index f8b71911037..20b15916f40 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -172,7 +172,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, /* Ignore non-transparent sockets, if XT_SOCKET_TRANSPARENT is used */ - if (info && info->flags & XT_SOCKET_TRANSPARENT) + if (info->flags & XT_SOCKET_TRANSPARENT) transparent = ((sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->transparent) || (sk->sk_state == TCP_TIME_WAIT && @@ -196,7 +196,11 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, static bool socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par) { - return socket_match(skb, par, NULL); + static struct xt_socket_mtinfo1 xt_info_v0 = { + .flags = 0, + }; + + return socket_match(skb, par, &xt_info_v0); } static bool @@ -314,7 +318,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) /* Ignore non-transparent sockets, if XT_SOCKET_TRANSPARENT is used */ - if (info && info->flags & XT_SOCKET_TRANSPARENT) + if (info->flags & XT_SOCKET_TRANSPARENT) transparent = ((sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->transparent) || (sk->sk_state == TCP_TIME_WAIT && -- cgit v1.2.3-18-g5258 From 1540c5d3cbf7670eb68a0d02611ec73e5604a91a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 14 Jul 2013 22:57:50 -0400 Subject: SUNRPC: Fix another issue with rpc_client_register() Fix the error pathway if rpcauth_create() fails. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 9963584605c..74f6a704e37 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -309,6 +309,7 @@ static int rpc_client_register(const struct rpc_create_args *args, return 0; err_auth: pipefs_sb = rpc_get_sb_net(net); + rpc_unregister_client(clnt); __rpc_clnt_remove_pipedir(clnt); out: if (pipefs_sb) -- cgit v1.2.3-18-g5258 From b2781e1021525649c0b33fffd005ef219da33926 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 12 Jul 2013 09:39:03 +0300 Subject: svcrdma: underflow issue in decode_write_list() My static checker marks everything from ntohl() as untrusted and it complains we could have an underflow problem doing: return (u32 *)&ary->wc_array[nchunks]; Also on 32 bit systems the upper bound check could overflow. Cc: stable@vger.kernel.org Signed-off-by: Dan Carpenter Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_marshal.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c index 8d2edddf48c..65b146297f5 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c +++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c @@ -98,6 +98,7 @@ void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch, */ static u32 *decode_write_list(u32 *va, u32 *vaend) { + unsigned long start, end; int nchunks; struct rpcrdma_write_array *ary = @@ -113,9 +114,12 @@ static u32 *decode_write_list(u32 *va, u32 *vaend) return NULL; } nchunks = ntohl(ary->wc_nchunks); - if (((unsigned long)&ary->wc_array[0] + - (sizeof(struct rpcrdma_write_chunk) * nchunks)) > - (unsigned long)vaend) { + + start = (unsigned long)&ary->wc_array[0]; + end = (unsigned long)vaend; + if (nchunks < 0 || + nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) || + (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) { dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", ary, nchunks, vaend); return NULL; @@ -129,6 +133,7 @@ static u32 *decode_write_list(u32 *va, u32 *vaend) static u32 *decode_reply_array(u32 *va, u32 *vaend) { + unsigned long start, end; int nchunks; struct rpcrdma_write_array *ary = (struct rpcrdma_write_array *)va; @@ -143,9 +148,12 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend) return NULL; } nchunks = ntohl(ary->wc_nchunks); - if (((unsigned long)&ary->wc_array[0] + - (sizeof(struct rpcrdma_write_chunk) * nchunks)) > - (unsigned long)vaend) { + + start = (unsigned long)&ary->wc_array[0]; + end = (unsigned long)vaend; + if (nchunks < 0 || + nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) || + (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) { dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", ary, nchunks, vaend); return NULL; -- cgit v1.2.3-18-g5258 From a0ec570f4f69c4cb700d743a915096c2c8f56a99 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Tue, 25 Jun 2013 09:17:17 +0200 Subject: nl80211: fix mgmt tx status and testmode reporting for netns These two events were sent to the default network namespace. This caused AP mode in a non-default netns to not work correctly. Mgmt tx status was multicasted to a different (default) netns instead of the one the AP was in. Cc: stable@vger.kernel.org Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1cc47aca7f0..9fb8820b75c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6613,12 +6613,14 @@ EXPORT_SYMBOL(cfg80211_testmode_alloc_event_skb); void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) { + struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0]; void *hdr = ((void **)skb->cb)[1]; struct nlattr *data = ((void **)skb->cb)[2]; nla_nest_end(skb, data); genlmsg_end(skb, hdr); - genlmsg_multicast(skb, 0, nl80211_testmode_mcgrp.id, gfp); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), skb, 0, + nl80211_testmode_mcgrp.id, gfp); } EXPORT_SYMBOL(cfg80211_testmode_event); #endif @@ -10064,7 +10066,8 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, genlmsg_end(msg, hdr); - genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); return; nla_put_failure: -- cgit v1.2.3-18-g5258 From 923a0e7dee8c436108279568cf34444749ac796f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 28 Jun 2013 11:38:54 +0200 Subject: cfg80211: fix bugs in new SME implementation When splitting the SME implementation from the MLME code, I introduced a few bugs: * association failures no longer sent a connect-failure event * getting disassociated from the AP caused deauth to be sent but state wasn't cleaned up, leading to warnings * authentication failures weren't cleaned up properly, causing new connection attempts to warn and fail Fix these bugs. Signed-off-by: Johannes Berg --- net/wireless/sme.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 1d3cfb1a3f2..81c8a10d743 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -34,8 +34,10 @@ struct cfg80211_conn { CFG80211_CONN_SCAN_AGAIN, CFG80211_CONN_AUTHENTICATE_NEXT, CFG80211_CONN_AUTHENTICATING, + CFG80211_CONN_AUTH_FAILED, CFG80211_CONN_ASSOCIATE_NEXT, CFG80211_CONN_ASSOCIATING, + CFG80211_CONN_ASSOC_FAILED, CFG80211_CONN_DEAUTH, CFG80211_CONN_CONNECTED, } state; @@ -164,6 +166,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) NULL, 0, params->key, params->key_len, params->key_idx, NULL, 0); + case CFG80211_CONN_AUTH_FAILED: + return -ENOTCONN; case CFG80211_CONN_ASSOCIATE_NEXT: BUG_ON(!rdev->ops->assoc); wdev->conn->state = CFG80211_CONN_ASSOCIATING; @@ -188,10 +192,17 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) WLAN_REASON_DEAUTH_LEAVING, false); return err; + case CFG80211_CONN_ASSOC_FAILED: + cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, + NULL, 0, + WLAN_REASON_DEAUTH_LEAVING, false); + return -ENOTCONN; case CFG80211_CONN_DEAUTH: cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, NULL, 0, WLAN_REASON_DEAUTH_LEAVING, false); + /* free directly, disconnected event already sent */ + cfg80211_sme_free(wdev); return 0; default: return 0; @@ -371,7 +382,7 @@ bool cfg80211_sme_rx_assoc_resp(struct wireless_dev *wdev, u16 status) return true; } - wdev->conn->state = CFG80211_CONN_DEAUTH; + wdev->conn->state = CFG80211_CONN_ASSOC_FAILED; schedule_work(&rdev->conn_work); return false; } @@ -383,7 +394,13 @@ void cfg80211_sme_deauth(struct wireless_dev *wdev) void cfg80211_sme_auth_timeout(struct wireless_dev *wdev) { - cfg80211_sme_free(wdev); + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + + if (!wdev->conn) + return; + + wdev->conn->state = CFG80211_CONN_AUTH_FAILED; + schedule_work(&rdev->conn_work); } void cfg80211_sme_disassoc(struct wireless_dev *wdev) @@ -399,7 +416,13 @@ void cfg80211_sme_disassoc(struct wireless_dev *wdev) void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev) { - cfg80211_sme_disassoc(wdev); + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + + if (!wdev->conn) + return; + + wdev->conn->state = CFG80211_CONN_ASSOC_FAILED; + schedule_work(&rdev->conn_work); } static int cfg80211_sme_connect(struct wireless_dev *wdev, -- cgit v1.2.3-18-g5258 From f77b86d7d3acf9dfcb5ee834628d12207584b2cb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Jun 2013 15:43:38 +0200 Subject: regulatory: add missing rtnl locking restore_regulatory_settings() requires the RTNL to be held, add the missing locking in reg_timeout_work(). Signed-off-by: Johannes Berg --- net/wireless/reg.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 5a24c986f34..5a950f36bae 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2279,7 +2279,9 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy) static void reg_timeout_work(struct work_struct *work) { REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n"); + rtnl_lock(); restore_regulatory_settings(true); + rtnl_unlock(); } int __init regulatory_init(void) -- cgit v1.2.3-18-g5258 From 1cd158573951f737fbc878a35cb5eb47bf9af3d5 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 28 Jun 2013 21:04:35 +0200 Subject: mac80211/minstrel_ht: fix cck rate sampling The CCK group needs special treatment to set the right flags and rate index. Add this missing check to prevent setting broken rates for tx packets. Cc: stable@vger.kernel.org # 3.10 Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_ht.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 5b2d3012b98..f5aed963b22 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -804,10 +804,18 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, sample_group = &minstrel_mcs_groups[sample_idx / MCS_GROUP_RATES]; info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; + rate->count = 1; + + if (sample_idx / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) { + int idx = sample_idx % ARRAY_SIZE(mp->cck_rates); + rate->idx = mp->cck_rates[idx]; + rate->flags = 0; + return; + } + rate->idx = sample_idx % MCS_GROUP_RATES + (sample_group->streams - 1) * MCS_GROUP_RATES; rate->flags = IEEE80211_TX_RC_MCS | sample_group->flags; - rate->count = 1; } static void -- cgit v1.2.3-18-g5258 From e13bae4f807401729b3f27c7e882a96b8b292809 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 8 Jul 2013 10:43:31 +0200 Subject: mac80211: fix ethtool stats for non-station interfaces As reported in https://bugzilla.kernel.org/show_bug.cgi?id=60514, the station loop never initialises 'sinfo' and therefore adds up a stack values, leaking stack information (the number of times it adds values is easily obtained another way.) Fix this by initialising the sinfo for each station to add. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 8184d121ff0..43dd7525bfc 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -666,6 +666,8 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy, if (sta->sdata->dev != dev) continue; + sinfo.filled = 0; + sta_set_sinfo(sta, &sinfo); i = 0; ADD_STA_STATS(sta); } -- cgit v1.2.3-18-g5258 From 83374fe9de455e37c2a039603d2538eb77e8ec4d Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Thu, 11 Jul 2013 18:24:03 +0800 Subject: nl80211: fix the setting of RSSI threshold value for mesh RSSI threshold value used for mesh peering should be in negative value. After range checks to mesh parameters is introduced, this is not allowed. Fix this. Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9fb8820b75c..25d217d9080 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4770,9 +4770,9 @@ do { \ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, 0, 1, mask, NL80211_MESHCONF_FORWARDING, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, 1, 255, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, -255, 0, mask, NL80211_MESHCONF_RSSI_THRESHOLD, - nla_get_u32); + nla_get_s32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, 0, 16, mask, NL80211_MESHCONF_HT_OPMODE, nla_get_u16); -- cgit v1.2.3-18-g5258 From 6b0f32745dcfba01d7be33acd1b40306c7a914c6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Jul 2013 22:33:26 +0200 Subject: mac80211: fix duplicate retransmission detection The duplicate retransmission detection code in mac80211 erroneously attempts to do the check for every frame, even frames that don't have a sequence control field or that don't use it (QoS-Null frames.) This is problematic because it causes the code to access data beyond the end of the SKB and depending on the data there will drop packets erroneously. Correct the code to not do duplicate detection for such frames. I found this error while testing AP powersave, it lead to retransmitted PS-Poll frames being dropped entirely as the data beyond the end of the SKB was always zero. Cc: stable@vger.kernel.org [all versions] Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 23dbcfc69b3..2c5a79bd377 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -936,8 +936,14 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); - /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */ - if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) { + /* + * Drop duplicate 802.11 retransmissions + * (IEEE 802.11-2012: 9.3.2.10 "Duplicate detection and recovery") + */ + if (rx->skb->len >= 24 && rx->sta && + !ieee80211_is_ctl(hdr->frame_control) && + !ieee80211_is_qos_nullfunc(hdr->frame_control) && + !is_multicast_ether_addr(hdr->addr1)) { if (unlikely(ieee80211_has_retry(hdr->frame_control) && rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) { -- cgit v1.2.3-18-g5258 From 5c9fc93bc9bc417418fc1b6366833ae6a07b804d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 15 Jul 2013 14:35:06 +0200 Subject: mac80211/minstrel: fix NULL pointer dereference issue When priv_sta == NULL, mi->prev_sample is dereferenced too early. Move the assignment further down, after the rate_control_send_low call. Reported-by: Krzysztof Mazur Cc: stable@vger.kernel.org # 3.10 Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index ac7ef5414bd..e6512e2ffd2 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -290,7 +290,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, struct minstrel_rate *msr, *mr; unsigned int ndx; bool mrr_capable; - bool prev_sample = mi->prev_sample; + bool prev_sample; int delta; int sampling_ratio; @@ -314,6 +314,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, (mi->sample_count + mi->sample_deferred / 2); /* delta < 0: no sampling required */ + prev_sample = mi->prev_sample; mi->prev_sample = false; if (delta < 0 || (!mrr_capable && prev_sample)) return; -- cgit v1.2.3-18-g5258 From f2f79cca13e36972978ffd1fb6483c8a1141b510 Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Sat, 13 Jul 2013 11:26:51 +0300 Subject: ndisc: bool initializations should use true and false Signed-off-by: Daniel Baluta Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b3b5730b48c..24c03396e00 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -479,7 +479,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, if (ifp) { src_addr = solicited_addr; if (ifp->flags & IFA_F_OPTIMISTIC) - override = 0; + override = false; inc_opt |= ifp->idev->cnf.force_tllao; in6_ifa_put(ifp); } else { @@ -557,7 +557,7 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, } if (ipv6_addr_any(saddr)) - inc_opt = 0; + inc_opt = false; if (inc_opt) optlen += ndisc_opt_addr_space(dev); @@ -790,7 +790,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && - inc != 0 && + inc && idev->nd_parms->proxy_delay != 0) { /* * for anycast or proxy, -- cgit v1.2.3-18-g5258 From 8a73125c36809527236e1d8d367a09a3f0e9f9e1 Mon Sep 17 00:00:00 2001 From: Dragos Foianu Date: Sat, 13 Jul 2013 14:43:00 +0100 Subject: ethtool: fixed trailing statements in ethtool Applied fixes suggested by checkpatch.pl. Signed-off-by: Dragos Foianu Signed-off-by: David S. Miller --- net/core/ethtool.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index ab5fa6336c8..78e9d9223e4 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -279,11 +279,16 @@ static u32 __ethtool_get_flags(struct net_device *dev) { u32 flags = 0; - if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO; - if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) flags |= ETH_FLAG_RXVLAN; - if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) flags |= ETH_FLAG_TXVLAN; - if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE; - if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH; + if (dev->features & NETIF_F_LRO) + flags |= ETH_FLAG_LRO; + if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) + flags |= ETH_FLAG_RXVLAN; + if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) + flags |= ETH_FLAG_TXVLAN; + if (dev->features & NETIF_F_NTUPLE) + flags |= ETH_FLAG_NTUPLE; + if (dev->features & NETIF_F_RXHASH) + flags |= ETH_FLAG_RXHASH; return flags; } @@ -295,11 +300,16 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data) if (data & ~ETH_ALL_FLAGS) return -EINVAL; - if (data & ETH_FLAG_LRO) features |= NETIF_F_LRO; - if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_CTAG_RX; - if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_CTAG_TX; - if (data & ETH_FLAG_NTUPLE) features |= NETIF_F_NTUPLE; - if (data & ETH_FLAG_RXHASH) features |= NETIF_F_RXHASH; + if (data & ETH_FLAG_LRO) + features |= NETIF_F_LRO; + if (data & ETH_FLAG_RXVLAN) + features |= NETIF_F_HW_VLAN_CTAG_RX; + if (data & ETH_FLAG_TXVLAN) + features |= NETIF_F_HW_VLAN_CTAG_TX; + if (data & ETH_FLAG_NTUPLE) + features |= NETIF_F_NTUPLE; + if (data & ETH_FLAG_RXHASH) + features |= NETIF_F_RXHASH; /* allow changing only bits set in hw_features */ changed = (features ^ dev->features) & ETH_ALL_FEATURES; -- cgit v1.2.3-18-g5258 From b6a82dd233cabcc1517c0744d7a8f0b61f559caf Mon Sep 17 00:00:00 2001 From: Dragos Foianu Date: Sat, 13 Jul 2013 15:03:55 +0100 Subject: net/irda: fixed style issues in irlan_eth Applied fixes suggested by checkpatch.pl Signed-off-by: Dragos Foianu Signed-off-by: David S. Miller --- net/irda/irlan/irlan_eth.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index d14152e866d..ffcec225b5d 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -44,12 +44,12 @@ static int irlan_eth_open(struct net_device *dev); static int irlan_eth_close(struct net_device *dev); static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, struct net_device *dev); -static void irlan_eth_set_multicast_list( struct net_device *dev); +static void irlan_eth_set_multicast_list(struct net_device *dev); static const struct net_device_ops irlan_eth_netdev_ops = { - .ndo_open = irlan_eth_open, - .ndo_stop = irlan_eth_close, - .ndo_start_xmit = irlan_eth_xmit, + .ndo_open = irlan_eth_open, + .ndo_stop = irlan_eth_close, + .ndo_start_xmit = irlan_eth_xmit, .ndo_set_rx_mode = irlan_eth_set_multicast_list, .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, @@ -110,7 +110,7 @@ static int irlan_eth_open(struct net_device *dev) { struct irlan_cb *self = netdev_priv(dev); - IRDA_DEBUG(2, "%s()\n", __func__ ); + IRDA_DEBUG(2, "%s()\n", __func__); /* Ready to play! */ netif_stop_queue(dev); /* Wait until data link is ready */ @@ -137,7 +137,7 @@ static int irlan_eth_close(struct net_device *dev) { struct irlan_cb *self = netdev_priv(dev); - IRDA_DEBUG(2, "%s()\n", __func__ ); + IRDA_DEBUG(2, "%s()\n", __func__); /* Stop device */ netif_stop_queue(dev); @@ -310,35 +310,32 @@ static void irlan_eth_set_multicast_list(struct net_device *dev) { struct irlan_cb *self = netdev_priv(dev); - IRDA_DEBUG(2, "%s()\n", __func__ ); + IRDA_DEBUG(2, "%s()\n", __func__); /* Check if data channel has been connected yet */ if (self->client.state != IRLAN_DATA) { - IRDA_DEBUG(1, "%s(), delaying!\n", __func__ ); + IRDA_DEBUG(1, "%s(), delaying!\n", __func__); return; } if (dev->flags & IFF_PROMISC) { /* Enable promiscuous mode */ IRDA_WARNING("Promiscuous mode not implemented by IrLAN!\n"); - } - else if ((dev->flags & IFF_ALLMULTI) || + } else if ((dev->flags & IFF_ALLMULTI) || netdev_mc_count(dev) > HW_MAX_ADDRS) { /* Disable promiscuous mode, use normal mode. */ - IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__ ); + IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__); /* hardware_set_filter(NULL); */ irlan_set_multicast_filter(self, TRUE); - } - else if (!netdev_mc_empty(dev)) { - IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__ ); + } else if (!netdev_mc_empty(dev)) { + IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__); /* Walk the address list, and load the filter */ /* hardware_set_filter(dev->mc_list); */ irlan_set_multicast_filter(self, TRUE); - } - else { - IRDA_DEBUG(4, "%s(), Clearing multicast filter\n", __func__ ); + } else { + IRDA_DEBUG(4, "%s(), Clearing multicast filter\n", __func__); irlan_set_multicast_filter(self, FALSE); } -- cgit v1.2.3-18-g5258 From 21d1196a35f5686c4323e42a62fdb4b23b0ab4a3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Jul 2013 20:03:19 -0700 Subject: ipv4: set transport header earlier commit 45f00f99d6e ("ipv4: tcp: clean up tcp_v4_early_demux()") added a performance regression for non GRO traffic, basically disabling IP early demux. IPv6 stack resets transport header in ip6_rcv() before calling IP early demux in ip6_rcv_finish(), while IPv4 does this only in ip_local_deliver_finish(), _after_ IP early demux. GRO traffic happened to enable IP early demux because transport header is also set in inet_gro_receive() Instead of reverting the faulty commit, we can make IPv4/IPv6 behave the same : transport_header should be set in ip_rcv() instead of ip_local_deliver_finish() ip_local_deliver_finish() can also use skb_network_header_len() which is faster than ip_hdrlen() Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 3da817b89e9..15e3e683ade 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -190,10 +190,7 @@ static int ip_local_deliver_finish(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); - __skb_pull(skb, ip_hdrlen(skb)); - - /* Point into the IP datagram, just past the header. */ - skb_reset_transport_header(skb); + __skb_pull(skb, skb_network_header_len(skb)); rcu_read_lock(); { @@ -437,6 +434,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, goto drop; } + skb->transport_header = skb->network_header + iph->ihl*4; + /* Remove any debris in the socket control block */ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); -- cgit v1.2.3-18-g5258 From ae8e9c5a1a7889315229a741fd48a5dd0bc2964c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 16 Jul 2013 17:09:15 -0700 Subject: net: Fix sysfs_format_mac() code duplication. It's just a duplicate implementation of "%*phC". Thanks to Joe Perches for showing that we had exactly this support in the lib/vsprintf.c code already. Signed-off-by: David S. Miller --- net/ethernet/eth.c | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) (limited to 'net') diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 5359560926b..be1f64d3535 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -401,27 +401,8 @@ struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs, } EXPORT_SYMBOL(alloc_etherdev_mqs); -static size_t _format_mac_addr(char *buf, int buflen, - const unsigned char *addr, int len) -{ - int i; - char *cp = buf; - - for (i = 0; i < len; i++) { - cp += scnprintf(cp, buflen - (cp - buf), "%02x", addr[i]); - if (i == len - 1) - break; - cp += scnprintf(cp, buflen - (cp - buf), ":"); - } - return cp - buf; -} - ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len) { - size_t l; - - l = _format_mac_addr(buf, PAGE_SIZE, addr, len); - l += scnprintf(buf + l, PAGE_SIZE - l, "\n"); - return (ssize_t)l; + return scnprintf(buf, PAGE_SIZE, "%*phC\n", len, addr); } EXPORT_SYMBOL(sysfs_format_mac); -- cgit v1.2.3-18-g5258 From 87f40dd6ce7042caca0b3b557e8923127f51f902 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Tue, 16 Jul 2013 08:52:30 +0200 Subject: pkt_sched: sch_qfq: remove a source of high packet delay/jitter QFQ+ inherits from QFQ a design choice that may cause a high packet delay/jitter and a severe short-term unfairness. As QFQ, QFQ+ uses a special quantity, the system virtual time, to track the service provided by the ideal system it approximates. When a packet is dequeued, this quantity must be incremented by the size of the packet, divided by the sum of the weights of the aggregates waiting to be served. Tracking this sum correctly is a non-trivial task, because, to preserve tight service guarantees, the decrement of this sum must be delayed in a special way [1]: this sum can be decremented only after that its value would decrease also in the ideal system approximated by QFQ+. For efficiency, QFQ+ keeps track only of the 'instantaneous' weight sum, increased and decreased immediately as the weight of an aggregate changes, and as an aggregate is created or destroyed (which, in its turn, happens as a consequence of some class being created/destroyed/changed). However, to avoid the problems caused to service guarantees by these immediate decreases, QFQ+ increments the system virtual time using the maximum value allowed for the weight sum, 2^10, in place of the dynamic, instantaneous value. The instantaneous value of the weight sum is used only to check whether a request of weight increase or a class creation can be satisfied. Unfortunately, the problems caused by this choice are worse than the temporary degradation of the service guarantees that may occur, when a class is changed or destroyed, if the instantaneous value of the weight sum was used to update the system virtual time. In fact, the fraction of the link bandwidth guaranteed by QFQ+ to each aggregate is equal to the ratio between the weight of the aggregate and the sum of the weights of the competing aggregates. The packet delay guaranteed to the aggregate is instead inversely proportional to the guaranteed bandwidth. By using the maximum possible value, and not the actual value of the weight sum, QFQ+ provides each aggregate with the worst possible service guarantees, and not with service guarantees related to the actual set of competing aggregates. To see the consequences of this fact, consider the following simple example. Suppose that only the following aggregates are backlogged, i.e., that only the classes in the following aggregates have packets to transmit: one aggregate with weight 10, say A, and ten aggregates with weight 1, say B1, B2, ..., B10. In particular, suppose that these aggregates are always backlogged. Given the weight distribution, the smoothest and fairest service order would be: A B1 A B2 A B3 A B4 A B5 A B6 A B7 A B8 A B9 A B10 A B1 A B2 ... QFQ+ would provide exactly this optimal service if it used the actual value for the weight sum instead of the maximum possible value, i.e., 11 instead of 2^10. In contrast, since QFQ+ uses the latter value, it serves aggregates as follows (easy to prove and to reproduce experimentally): A B1 B2 B3 B4 B5 B6 B7 B8 B9 B10 A A A A A A A A A A B1 B2 ... B10 A A ... By replacing 10 with N in the above example, and by increasing N, one can increase at will the maximum packet delay and the jitter experienced by the classes in aggregate A. This patch addresses this issue by just using the above 'instantaneous' value of the weight sum, instead of the maximum possible value, when updating the system virtual time. After the instantaneous weight sum is decreased, QFQ+ may deviate from the ideal service for a time interval in the order of the time to serve one maximum-size packet for each backlogged class. The worst-case extent of the deviation exhibited by QFQ+ during this time interval [1] is basically the same as of the deviation described above (but, without this patch, QFQ+ suffers from such a deviation all the time). Finally, this patch modifies the comment to the function qfq_slot_insert, to make it coherent with the fact that the weight sum used by QFQ+ can now be lower than the maximum possible value. [1] P. Valente, "Extending WF2Q+ to support a dynamic traffic mix", Proceedings of AAA-IDEA'05, June 2005. Signed-off-by: Paolo Valente Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 85 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index a7ab323849b..8056fb4e618 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -113,7 +113,6 @@ #define FRAC_BITS 30 /* fixed point arithmetic */ #define ONE_FP (1UL << FRAC_BITS) -#define IWSUM (ONE_FP/QFQ_MAX_WSUM) #define QFQ_MTU_SHIFT 16 /* to support TSO/GSO */ #define QFQ_MIN_LMAX 512 /* see qfq_slot_insert */ @@ -189,6 +188,7 @@ struct qfq_sched { struct qfq_aggregate *in_serv_agg; /* Aggregate being served. */ u32 num_active_agg; /* Num. of active aggregates */ u32 wsum; /* weight sum */ + u32 iwsum; /* inverse weight sum */ unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */ struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */ @@ -314,6 +314,7 @@ static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg, q->wsum += (int) agg->class_weight * (new_num_classes - agg->num_classes); + q->iwsum = ONE_FP / q->wsum; agg->num_classes = new_num_classes; } @@ -340,6 +341,10 @@ static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg) { if (!hlist_unhashed(&agg->nonfull_next)) hlist_del_init(&agg->nonfull_next); + q->wsum -= agg->class_weight; + if (q->wsum != 0) + q->iwsum = ONE_FP / q->wsum; + if (q->in_serv_agg == agg) q->in_serv_agg = qfq_choose_next_agg(q); kfree(agg); @@ -834,38 +839,60 @@ static void qfq_make_eligible(struct qfq_sched *q) } } - /* - * The index of the slot in which the aggregate is to be inserted must - * not be higher than QFQ_MAX_SLOTS-2. There is a '-2' and not a '-1' - * because the start time of the group may be moved backward by one - * slot after the aggregate has been inserted, and this would cause - * non-empty slots to be right-shifted by one position. + * The index of the slot in which the input aggregate agg is to be + * inserted must not be higher than QFQ_MAX_SLOTS-2. There is a '-2' + * and not a '-1' because the start time of the group may be moved + * backward by one slot after the aggregate has been inserted, and + * this would cause non-empty slots to be right-shifted by one + * position. + * + * QFQ+ fully satisfies this bound to the slot index if the parameters + * of the classes are not changed dynamically, and if QFQ+ never + * happens to postpone the service of agg unjustly, i.e., it never + * happens that the aggregate becomes backlogged and eligible, or just + * eligible, while an aggregate with a higher approximated finish time + * is being served. In particular, in this case QFQ+ guarantees that + * the timestamps of agg are low enough that the slot index is never + * higher than 2. Unfortunately, QFQ+ cannot provide the same + * guarantee if it happens to unjustly postpone the service of agg, or + * if the parameters of some class are changed. + * + * As for the first event, i.e., an out-of-order service, the + * upper bound to the slot index guaranteed by QFQ+ grows to + * 2 + + * QFQ_MAX_AGG_CLASSES * ((1<budget -= len; - q->V += (u64)len * IWSUM; + q->V += (u64)len * q->iwsum; pr_debug("qfq dequeue: len %u F %lld now %lld\n", len, (unsigned long long) in_serv_agg->F, (unsigned long long) q->V); -- cgit v1.2.3-18-g5258 From d4b812dea4a236f729526facf97df1a9d18e191c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Jul 2013 07:19:26 -0700 Subject: vlan: mask vlan prio bits In commit 48cc32d38a52d0b68f91a171a8d00531edc6a46e ("vlan: don't deliver frames for unknown vlans to protocols") Florian made sure we set pkt_type to PACKET_OTHERHOST if the vlan id is set and we could find a vlan device for this particular id. But we also have a problem if prio bits are set. Steinar reported an issue on a router receiving IPv6 frames with a vlan tag of 4000 (id 0, prio 2), and tunneled into a sit device, because skb->vlan_tci is set. Forwarded frame is completely corrupted : We can see (8100:4000) being inserted in the middle of IPv6 source address : 16:48:00.780413 IP6 2001:16d8:8100:4000:ee1c:0:9d9:bc87 > 9f94:4d95:2001:67c:29f4::: ICMP6, unknown icmp6 type (0), length 64 0x0000: 0000 0029 8000 c7c3 7103 0001 a0ae e651 0x0010: 0000 0000 ccce 0b00 0000 0000 1011 1213 0x0020: 1415 1617 1819 1a1b 1c1d 1e1f 2021 2223 0x0030: 2425 2627 2829 2a2b 2c2d 2e2f 3031 3233 It seems we are not really ready to properly cope with this right now. We can probably do better in future kernels : vlan_get_ingress_priority() should be a netdev property instead of a per vlan_dev one. For stable kernels, lets clear vlan_tci to fix the bugs. Reported-by: Steinar H. Gunderson Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/8021q/vlan_core.c | 2 +- net/core/dev.c | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 8a15eaadc4b..4a78c4de9f2 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -9,7 +9,7 @@ bool vlan_do_receive(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; __be16 vlan_proto = skb->vlan_proto; - u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; + u16 vlan_id = vlan_tx_tag_get_id(skb); struct net_device *vlan_dev; struct vlan_pcpu_stats *rx_stats; diff --git a/net/core/dev.c b/net/core/dev.c index a3d8d44cb7f..26755dd40da 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3580,8 +3580,15 @@ ncls: } } - if (vlan_tx_nonzero_tag_present(skb)) - skb->pkt_type = PACKET_OTHERHOST; + if (unlikely(vlan_tx_tag_present(skb))) { + if (vlan_tx_tag_get_id(skb)) + skb->pkt_type = PACKET_OTHERHOST; + /* Note: we might in the future use prio bits + * and set skb->priority like in vlan_do_receive() + * For the time being, just ignore Priority Code Point + */ + skb->vlan_tci = 0; + } /* deliver only exact match when indicated */ null_or_dev = deliver_exact ? skb->dev : NULL; -- cgit v1.2.3-18-g5258 From 3e3aac497513c669e1c62c71e1d552ea85c1d974 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Jul 2013 09:35:10 -0700 Subject: vlan: fix a race in egress prio management egress_priority_map[] hash table updates are protected by rtnl, and we never remove elements until device is dismantled. We have to make sure that before inserting an new element in hash table, all its fields are committed to memory or else another cpu could find corrupt values and crash. Signed-off-by: Eric Dumazet Cc: Patrick McHardy Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 3a8c8fd63c8..1cd3d2a406f 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -73,6 +73,8 @@ vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb) { struct vlan_priority_tci_mapping *mp; + smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */ + mp = vlan_dev_priv(dev)->egress_priority_map[(skb->priority & 0xF)]; while (mp) { if (mp->priority == skb->priority) { @@ -249,6 +251,11 @@ int vlan_dev_set_egress_priority(const struct net_device *dev, np->next = mp; np->priority = skb_prio; np->vlan_qos = vlan_qos; + /* Before inserting this element in hash table, make sure all its fields + * are committed to memory. + * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask() + */ + smp_wmb(); vlan->egress_priority_map[skb_prio & 0xF] = np; if (vlan_qos) vlan->nr_egress_mappings++; -- cgit v1.2.3-18-g5258 From 7427b370e0aa6226c763af94fc5c4e3433383543 Mon Sep 17 00:00:00 2001 From: Frederic Danis Date: Thu, 20 Jun 2013 11:11:04 +0200 Subject: NFC: Fix NCI over SPI build kbuild test robot found following error: net/built-in.o: In function `nci_spi_send': >> spi.c:(.text+0x19a76f): undefined reference to `crc_ccitt' Add CRC_CCITT module to Kconfig to fix it Reported-by: kbuild test robot. Signed-off-by: Frederic Danis Signed-off-by: Samuel Ortiz --- net/nfc/nci/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/nfc/nci/Kconfig b/net/nfc/nci/Kconfig index 2a2416080b4..a4f1e42e348 100644 --- a/net/nfc/nci/Kconfig +++ b/net/nfc/nci/Kconfig @@ -11,6 +11,7 @@ config NFC_NCI config NFC_NCI_SPI depends on NFC_NCI && SPI + select CRC_CCITT bool "NCI over SPI protocol support" default n help -- cgit v1.2.3-18-g5258 From 651e92716aaae60fc41b9652f54cb6803896e0da Mon Sep 17 00:00:00 2001 From: Michal Tesar Date: Fri, 19 Jul 2013 14:09:01 +0200 Subject: sysctl net: Keep tcp_syn_retries inside the boundary Limit the min/max value passed to the /proc/sys/net/ipv4/tcp_syn_retries. Signed-off-by: Michal Tesar Signed-off-by: David S. Miller --- net/ipv4/sysctl_net_ipv4.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index b2c123c44d6..610e324348d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -36,6 +36,8 @@ static int tcp_adv_win_scale_min = -31; static int tcp_adv_win_scale_max = 31; static int ip_ttl_min = 1; static int ip_ttl_max = 255; +static int tcp_syn_retries_min = 1; +static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; @@ -332,7 +334,9 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_syn_retries, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_syn_retries_min, + .extra2 = &tcp_syn_retries_max }, { .procname = "tcp_synack_retries", -- cgit v1.2.3-18-g5258 From 1faabf2aab1fdaa1ace4e8c829d1b9cf7bfec2f1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Jul 2013 20:07:16 -0700 Subject: bridge: do not call setup_timer() multiple times commit 9f00b2e7cf24 ("bridge: only expire the mdb entry when query is received") added a nasty bug as an active timer can be reinitialized. setup_timer() must be done once, no matter how many time mod_timer() is called. br_multicast_new_group() is the right place to do this. Reported-by: Srivatsa S. Bhat Diagnosed-by: Thomas Gleixner Signed-off-by: Eric Dumazet Tested-by: Srivatsa S. Bhat Cc: Cong Wang Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 69af490cce4..4b99c9a2704 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -619,6 +619,9 @@ rehash: mp->br = br; mp->addr = *group; + setup_timer(&mp->timer, br_multicast_group_expired, + (unsigned long)mp); + hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]); mdb->size++; @@ -1126,7 +1129,6 @@ static int br_ip4_multicast_query(struct net_bridge *br, if (!mp) goto out; - setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp); mod_timer(&mp->timer, now + br->multicast_membership_interval); mp->timer_armed = true; @@ -1204,7 +1206,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (!mp) goto out; - setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp); mod_timer(&mp->timer, now + br->multicast_membership_interval); mp->timer_armed = true; -- cgit v1.2.3-18-g5258 From 40d18ff959fe8b847be4f7b03f84644a7c18211e Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Fri, 19 Jul 2013 17:37:39 +0800 Subject: mac80211: prevent the buffering or frame transmission to non-assoc mesh STA This patch is intended to avoid the buffering to non-assoc mesh STA and also to avoid the triggering of frame to non-assoc mesh STA which could cause kernel panic in specific hw. One of the examples, is kernel panic happens to ath9k if user space inserts the mesh STA and not proceed with the SAE and AMPE, and later the same mesh STA is detected again. The sta_state of the mesh STA remains at IEEE80211_STA_NONE and if the ieee80211_sta_ps_deliver_wakeup is called and subsequently the ath_tx_aggr_wakeup, the kernel panic due to ath_tx_node_init is not called before to initialize the require data structures. This issue is reported by Cedric Voncken before. http://www.spinics.net/lists/linux-wireless/msg106342.html [<831ea6b4>] ath_tx_aggr_wakeup+0x44/0xcc [ath9k] [<83084214>] ieee80211_sta_ps_deliver_wakeup+0xb8/0x208 [mac80211] [<830b9824>] ieee80211_mps_sta_status_update+0x94/0x108 [mac80211] [<83099398>] ieee80211_sta_ps_transition+0xc94/0x34d8 [mac80211] [<8022399c>] nf_iterate+0x98/0x104 [<8309bb60>] ieee80211_sta_ps_transition+0x345c/0x34d8 [mac80211] Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg --- net/mac80211/mesh_ps.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c index 3b7bfc01ee3..22290a929b9 100644 --- a/net/mac80211/mesh_ps.c +++ b/net/mac80211/mesh_ps.c @@ -229,6 +229,10 @@ void ieee80211_mps_sta_status_update(struct sta_info *sta) enum nl80211_mesh_power_mode pm; bool do_buffer; + /* For non-assoc STA, prevent buffering or frame transmission */ + if (sta->sta_state < IEEE80211_STA_ASSOC) + return; + /* * use peer-specific power mode if peering is established and the * peer's power mode is known -- cgit v1.2.3-18-g5258 From cd34f647a78e7f2296fcb72392b9e5c832793e65 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 23 Jul 2013 13:56:50 +0200 Subject: mac80211: fix monitor interface suspend crash regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My commit: commit 12e7f517029dad819c45eca9ca01fdb9ba57616b Author: Stanislaw Gruszka Date: Thu Feb 28 10:55:26 2013 +0100 mac80211: cleanup generic suspend/resume procedures removed check for deleting MONITOR and AP_VLAN when suspend. That can cause a crash (i.e. in iwlagn_mac_remove_interface()) since we remove interface in the driver that we did not add before. Reference: http://marc.info/?l=linux-kernel&m=137391815113860&w=2 Bisected-by: Ortwin Glück Reported-and-tested-by: Ortwin Glück Cc: stable@vger.kernel.org # 3.10 Signed-off-by: Stanislaw Gruszka Signed-off-by: Johannes Berg --- net/mac80211/pm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 7fc5d0d8149..34012620434 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -99,10 +99,13 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) } mutex_unlock(&local->sta_mtx); - /* remove all interfaces */ + /* remove all interfaces that were created in the driver */ list_for_each_entry(sdata, &local->interfaces, list) { - if (!ieee80211_sdata_running(sdata)) + if (!ieee80211_sdata_running(sdata) || + sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sdata->vif.type == NL80211_IFTYPE_MONITOR) continue; + drv_remove_interface(local, sdata); } -- cgit v1.2.3-18-g5258 From f585a991e1d1612265f0d4e812f77e40dd54975b Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Mon, 22 Jul 2013 12:01:58 -0700 Subject: fib_trie: potential out of bounds access in trie_show_stats() With the <= max condition in the for loop, it will be always go 1 element further than needed. If the condition for the while loop is never met, then max is MAX_STAT_DEPTH, and for loop will walk off the end of nodesizes[]. Signed-off-by: Jerry Snitselaar Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 49616fed934..108a1e9c9ea 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2133,7 +2133,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) max--; pointers = 0; - for (i = 1; i <= max; i++) + for (i = 1; i < max; i++) if (stat->nodesizes[i] != 0) { seq_printf(seq, " %u: %u", i, stat->nodesizes[i]); pointers += (1<nodesizes[i]; -- cgit v1.2.3-18-g5258 From 905a6f96a1b18e490a75f810d733ced93c39b0e5 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 22 Jul 2013 23:45:53 +0200 Subject: ipv6: take rtnl_lock and mark mrt6 table as freed on namespace cleanup Otherwise we end up dereferencing the already freed net->ipv6.mrt pointer which leads to a panic (from Srivatsa S. Bhat): BUG: unable to handle kernel paging request at ffff882018552020 IP: [] ip6mr_sk_done+0x32/0xb0 [ipv6] PGD 290a067 PUD 207ffe0067 PMD 207ff1d067 PTE 8000002018552060 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC Modules linked in: ebtable_nat ebtables nfs fscache nf_conntrack_ipv4 nf_defrag_ipv4 ipt_REJECT xt_CHECKSUM iptable_mangle iptable_filter ip_tables nfsd lockd nfs_acl exportfs auth_rpcgss autofs4 sunrpc 8021q garp bridge stp llc ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter +ip6_tables ipv6 vfat fat vhost_net macvtap macvlan vhost tun kvm_intel kvm uinput iTCO_wdt iTCO_vendor_support cdc_ether usbnet mii microcode i2c_i801 i2c_core lpc_ich mfd_core shpchp ioatdma dca mlx4_core be2net wmi acpi_cpufreq mperf ext4 jbd2 mbcache dm_mirror dm_region_hash dm_log dm_mod CPU: 0 PID: 7 Comm: kworker/u33:0 Not tainted 3.11.0-rc1-ea45e-a #4 Hardware name: IBM -[8737R2A]-/00Y2738, BIOS -[B2E120RUS-1.20]- 11/30/2012 Workqueue: netns cleanup_net task: ffff8810393641c0 ti: ffff881039366000 task.ti: ffff881039366000 RIP: 0010:[] [] ip6mr_sk_done+0x32/0xb0 [ipv6] RSP: 0018:ffff881039367bd8 EFLAGS: 00010286 RAX: ffff881039367fd8 RBX: ffff882018552000 RCX: dead000000200200 RDX: 0000000000000000 RSI: ffff881039367b68 RDI: ffff881039367b68 RBP: ffff881039367bf8 R08: ffff881039367b68 R09: 2222222222222222 R10: 2222222222222222 R11: 2222222222222222 R12: ffff882015a7a040 R13: ffff882014eb89c0 R14: ffff8820289e2800 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88103fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff882018552020 CR3: 0000000001c0b000 CR4: 00000000000407f0 Stack: ffff881039367c18 ffff882014eb89c0 ffff882015e28c00 0000000000000000 ffff881039367c18 ffffffffa034d9d1 ffff8820289e2800 ffff882014eb89c0 ffff881039367c58 ffffffff815bdecb ffffffff815bddf2 ffff882014eb89c0 Call Trace: [] rawv6_close+0x21/0x40 [ipv6] [] inet_release+0xfb/0x220 [] ? inet_release+0x22/0x220 [] inet6_release+0x3f/0x50 [ipv6] [] sock_release+0x29/0xa0 [] sk_release_kernel+0x30/0x70 [] icmpv6_sk_exit+0x3b/0x80 [ipv6] [] ops_exit_list+0x39/0x60 [] cleanup_net+0xfb/0x1a0 [] process_one_work+0x1da/0x610 [] ? process_one_work+0x169/0x610 [] worker_thread+0x120/0x3a0 [] ? process_one_work+0x610/0x610 [] kthread+0xee/0x100 [] ? __init_kthread_worker+0x70/0x70 [] ret_from_fork+0x7c/0xb0 [] ? __init_kthread_worker+0x70/0x70 Code: 20 48 89 5d e8 4c 89 65 f0 4c 89 6d f8 66 66 66 66 90 4c 8b 67 30 49 89 fd e8 db 3c 1e e1 49 8b 9c 24 90 08 00 00 48 85 db 74 06 <4c> 39 6b 20 74 20 bb f3 ff ff ff e8 8e 3c 1e e1 89 d8 4c 8b 65 RIP [] ip6mr_sk_done+0x32/0xb0 [ipv6] RSP CR2: ffff882018552020 Reported-by: Srivatsa S. Bhat Tested-by: Srivatsa S. Bhat Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 583e8d435f9..03986d31fa4 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -259,10 +259,12 @@ static void __net_exit ip6mr_rules_exit(struct net *net) { struct mr6_table *mrt, *next; + rtnl_lock(); list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { list_del(&mrt->list); ip6mr_free_table(mrt); } + rtnl_unlock(); fib_rules_unregister(net->ipv6.mr6_rules_ops); } #else @@ -289,7 +291,10 @@ static int __net_init ip6mr_rules_init(struct net *net) static void __net_exit ip6mr_rules_exit(struct net *net) { + rtnl_lock(); ip6mr_free_table(net->ipv6.mrt6); + net->ipv6.mrt6 = NULL; + rtnl_unlock(); } #endif -- cgit v1.2.3-18-g5258 From deceb4c062a8dd63fe554c3be2b4bf9151a5cedf Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 23 Jul 2013 20:22:39 +0100 Subject: net: fix comment above build_skb() build_skb() specifies that the data parameter must come from a kmalloc'd area, this is only true if frag_size equals 0, because then build_skb() will use kzsize(data) to figure out the actual data size. Update the comment to reflect that special condition. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 20e02d2605e..3df4d4ccf44 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -309,7 +309,8 @@ EXPORT_SYMBOL(__alloc_skb); * @frag_size: size of fragment, or 0 if head was kmalloced * * Allocate a new &sk_buff. Caller provides space holding head and - * skb_shared_info. @data must have been allocated by kmalloc() + * skb_shared_info. @data must have been allocated by kmalloc() only if + * @frag_size is 0, otherwise data should come from the page allocator. * The return is the new skb buffer. * On a failure the return is %NULL, and @data is not freed. * Notes : -- cgit v1.2.3-18-g5258 From 23df0b731954502a9391e739b92927cee4360343 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Sun, 21 Jul 2013 16:36:48 +0300 Subject: regulatory: use correct regulatory initiator on wiphy register The current regdomain was not always set by the core. This causes cards with a custom regulatory domain to ignore user initiated changes if done before the card was registered. Signed-off-by: Arik Nemtsov Acked-by: Luis R. Rodriguez Signed-off-by: Johannes Berg --- net/wireless/reg.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 5a950f36bae..de06d5d1287 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2247,10 +2247,13 @@ int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) void wiphy_regulatory_register(struct wiphy *wiphy) { + struct regulatory_request *lr; + if (!reg_dev_ignore_cell_hint(wiphy)) reg_num_devs_support_basehint++; - wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE); + lr = get_last_request(); + wiphy_update_regulatory(wiphy, lr->initiator); } void wiphy_regulatory_deregister(struct wiphy *wiphy) -- cgit v1.2.3-18-g5258 From da9910ac4a816b4340944c78d94c02a35527db46 Mon Sep 17 00:00:00 2001 From: Jaganath Kanakkassery Date: Fri, 21 Jun 2013 19:55:11 +0530 Subject: Bluetooth: Fix invalid length check in l2cap_information_rsp() The length check is invalid since the length varies with type of info response. This was introduced by the commit cb3b3152b2f5939d67005cff841a1ca748b19888 Because of this, l2cap info rsp is not handled and command reject is sent. > ACL data: handle 11 flags 0x02 dlen 16 L2CAP(s): Info rsp: type 2 result 0 Extended feature mask 0x00b8 Enhanced Retransmission mode Streaming mode FCS Option Fixed Channels < ACL data: handle 11 flags 0x00 dlen 10 L2CAP(s): Command rej: reason 0 Command not understood Cc: stable@vger.kernel.org Signed-off-by: Jaganath Kanakkassery Signed-off-by: Chan-Yeol Park Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 4be6a264b47..68843a28a7a 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4333,7 +4333,7 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) data; u16 type, result; - if (cmd_len != sizeof(*rsp)) + if (cmd_len < sizeof(*rsp)) return -EPROTO; type = __le16_to_cpu(rsp->type); -- cgit v1.2.3-18-g5258 From fcee337704d76446e0d4714cc5eff53e896f7c6f Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Thu, 11 Jul 2013 11:34:28 +0100 Subject: Bluetooth: Fix race between hci_register_dev() and hci_dev_open() If hci_dev_open() is called after hci_register_dev() added the device to the hci_dev_list but before the workqueue are created we could run into a NULL pointer dereference (see below). This bug is very unlikely to happen, systems using bluetoothd to manage their bluetooth devices will never see this happen. BUG: unable to handle kernel NULL pointer dereference 0100 IP: [] __queue_work+0x32/0x3d0 (...) Call Trace: [] queue_work_on+0x45/0x50 [] hci_req_run+0xbf/0xf0 [bluetooth] [] ? hci_init2_req+0x720/0x720 [bluetooth] [] __hci_req_sync+0xd6/0x1c0 [bluetooth] [] ? try_to_wake_up+0x2b0/0x2b0 [] ? usb_autopm_put_interface+0x30/0x40 [] hci_dev_open+0x275/0x2e0 [bluetooth] [] hci_sock_ioctl+0x1f2/0x3f0 [bluetooth] [] sock_do_ioctl+0x30/0x70 [] sock_ioctl+0x79/0x2f0 [] do_vfs_ioctl+0x96/0x560 [] SyS_ioctl+0x91/0xb0 [] system_call_fastpath+0x1a/0x1f Reported-by: Sedat Dilek Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ace5e55fe5a..64d33d1e14c 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2207,10 +2207,6 @@ int hci_register_dev(struct hci_dev *hdev) BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); - write_lock(&hci_dev_list_lock); - list_add(&hdev->list, &hci_dev_list); - write_unlock(&hci_dev_list_lock); - hdev->workqueue = alloc_workqueue(hdev->name, WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, 1); if (!hdev->workqueue) { @@ -2246,6 +2242,10 @@ int hci_register_dev(struct hci_dev *hdev) if (hdev->dev_type != HCI_AMP) set_bit(HCI_AUTO_OFF, &hdev->dev_flags); + write_lock(&hci_dev_list_lock); + list_add(&hdev->list, &hci_dev_list); + write_unlock(&hci_dev_list_lock); + hci_notify(hdev, HCI_DEV_REG); hci_dev_hold(hdev); @@ -2258,9 +2258,6 @@ err_wqueue: destroy_workqueue(hdev->req_workqueue); err: ida_simple_remove(&hci_index_ida, hdev->id); - write_lock(&hci_dev_list_lock); - list_del(&hdev->list); - write_unlock(&hci_dev_list_lock); return error; } -- cgit v1.2.3-18-g5258 From 555445cd11803c6bc93b2be31968f3949ef7708b Mon Sep 17 00:00:00 2001 From: Francesco Fusco Date: Wed, 24 Jul 2013 10:39:06 +0200 Subject: neigh: prevent overflowing params in /proc/sys/net/ipv4/neigh/ Without this patch, the fields app_solicit, gc_thresh1, gc_thresh2, gc_thresh3, proxy_qlen, ucast_solicit, mcast_solicit could have assumed negative values when setting large numbers. Signed-off-by: Francesco Fusco Signed-off-by: David S. Miller --- net/core/neighbour.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index b7de821f98d..9232c68941a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2767,6 +2767,7 @@ EXPORT_SYMBOL(neigh_app_ns); #ifdef CONFIG_SYSCTL static int zero; +static int int_max = INT_MAX; static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); static int proc_unres_qlen(struct ctl_table *ctl, int write, @@ -2819,19 +2820,25 @@ static struct neigh_sysctl_table { .procname = "mcast_solicit", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_UCAST_PROBE] = { .procname = "ucast_solicit", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_APP_PROBE] = { .procname = "app_solicit", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_RETRANS_TIME] = { .procname = "retrans_time", @@ -2874,7 +2881,9 @@ static struct neigh_sysctl_table { .procname = "proxy_qlen", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_ANYCAST_DELAY] = { .procname = "anycast_delay", @@ -2916,19 +2925,25 @@ static struct neigh_sysctl_table { .procname = "gc_thresh1", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_GC_THRESH2] = { .procname = "gc_thresh2", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_GC_THRESH3] = { .procname = "gc_thresh3", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, {}, }, -- cgit v1.2.3-18-g5258 From c74f2b2678f40b80265dd53556f1f778c8e1823f Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 26 Jul 2013 11:00:10 +0200 Subject: genetlink: release cb_lock before requesting additional module Requesting external module with cb_lock taken can result in the deadlock like showed below: [ 2458.111347] Showing all locks held in the system: [ 2458.111347] 1 lock held by NetworkManager/582: [ 2458.111347] #0: (cb_lock){++++++}, at: [] genl_rcv+0x19/0x40 [ 2458.111347] 1 lock held by modprobe/603: [ 2458.111347] #0: (cb_lock){++++++}, at: [] genl_lock_all+0x15/0x30 [ 2461.579457] SysRq : Show Blocked State [ 2461.580103] task PC stack pid father [ 2461.580103] NetworkManager D ffff880034b84500 4040 582 1 0x00000080 [ 2461.580103] ffff8800197ff720 0000000000000046 00000000001d5340 ffff8800197fffd8 [ 2461.580103] ffff8800197fffd8 00000000001d5340 ffff880019631700 7fffffffffffffff [ 2461.580103] ffff8800197ff880 ffff8800197ff878 ffff880019631700 ffff880019631700 [ 2461.580103] Call Trace: [ 2461.580103] [] schedule+0x29/0x70 [ 2461.580103] [] schedule_timeout+0x1c1/0x360 [ 2461.580103] [] ? mark_held_locks+0xbb/0x140 [ 2461.580103] [] ? _raw_spin_unlock_irq+0x2c/0x50 [ 2461.580103] [] ? trace_hardirqs_on_caller+0xfd/0x1c0 [ 2461.580103] [] wait_for_completion_killable+0xe8/0x170 [ 2461.580103] [] ? wake_up_state+0x20/0x20 [ 2461.580103] [] call_usermodehelper_exec+0x1a5/0x210 [ 2461.580103] [] ? wait_for_completion_killable+0x3d/0x170 [ 2461.580103] [] __request_module+0x1b3/0x370 [ 2461.580103] [] ? trace_hardirqs_on_caller+0xfd/0x1c0 [ 2461.580103] [] ctrl_getfamily+0x159/0x190 [ 2461.580103] [] genl_family_rcv_msg+0x1f4/0x2e0 [ 2461.580103] [] ? genl_family_rcv_msg+0x2e0/0x2e0 [ 2461.580103] [] genl_rcv_msg+0x8e/0xd0 [ 2461.580103] [] netlink_rcv_skb+0xa9/0xc0 [ 2461.580103] [] genl_rcv+0x28/0x40 [ 2461.580103] [] netlink_unicast+0xdd/0x190 [ 2461.580103] [] netlink_sendmsg+0x329/0x750 [ 2461.580103] [] sock_sendmsg+0x99/0xd0 [ 2461.580103] [] ? local_clock+0x5f/0x70 [ 2461.580103] [] ? lock_release_non_nested+0x308/0x350 [ 2461.580103] [] ___sys_sendmsg+0x39e/0x3b0 [ 2461.580103] [] ? kvm_clock_read+0x2f/0x50 [ 2461.580103] [] ? sched_clock+0x9/0x10 [ 2461.580103] [] ? sched_clock_local+0x1d/0x80 [ 2461.580103] [] ? sched_clock_cpu+0xa8/0x100 [ 2461.580103] [] ? trace_hardirqs_off+0xd/0x10 [ 2461.580103] [] ? local_clock+0x5f/0x70 [ 2461.580103] [] ? lock_release_holdtime.part.28+0xf/0x1a0 [ 2461.580103] [] ? fget_light+0xf9/0x510 [ 2461.580103] [] ? fget_light+0x3c/0x510 [ 2461.580103] [] __sys_sendmsg+0x42/0x80 [ 2461.580103] [] SyS_sendmsg+0x12/0x20 [ 2461.580103] [] system_call_fastpath+0x16/0x1b [ 2461.580103] modprobe D ffff88000f2c8000 4632 603 602 0x00000080 [ 2461.580103] ffff88000f04fba8 0000000000000046 00000000001d5340 ffff88000f04ffd8 [ 2461.580103] ffff88000f04ffd8 00000000001d5340 ffff8800377d4500 ffff8800377d4500 [ 2461.580103] ffffffff81d0b260 ffffffff81d0b268 ffffffff00000000 ffffffff81d0b2b0 [ 2461.580103] Call Trace: [ 2461.580103] [] schedule+0x29/0x70 [ 2461.580103] [] rwsem_down_write_failed+0xed/0x1a0 [ 2461.580103] [] ? update_cpu_load_active+0x10/0xb0 [ 2461.580103] [] call_rwsem_down_write_failed+0x13/0x20 [ 2461.580103] [] ? down_write+0x9d/0xb2 [ 2461.580103] [] ? genl_lock_all+0x15/0x30 [ 2461.580103] [] genl_lock_all+0x15/0x30 [ 2461.580103] [] genl_register_family+0x53/0x1f0 [ 2461.580103] [] ? 0xffffffffa01dbfff [ 2461.580103] [] genl_register_family_with_ops+0x20/0x80 [ 2461.580103] [] ? 0xffffffffa01dbfff [ 2461.580103] [] nl80211_init+0x24/0xf0 [cfg80211] [ 2461.580103] [] ? 0xffffffffa01dbfff [ 2461.580103] [] cfg80211_init+0x43/0xdb [cfg80211] [ 2461.580103] [] do_one_initcall+0xfa/0x1b0 [ 2461.580103] [] ? set_memory_nx+0x43/0x50 [ 2461.580103] [] load_module+0x1c6f/0x27f0 [ 2461.580103] [] ? store_uevent+0x40/0x40 [ 2461.580103] [] SyS_finit_module+0x86/0xb0 [ 2461.580103] [] system_call_fastpath+0x16/0x1b [ 2461.580103] Sched Debug Version: v0.10, 3.11.0-0.rc1.git4.1.fc20.x86_64 #1 Problem start to happen after adding net-pf-16-proto-16-family-nl80211 alias name to cfg80211 module by below commit (though that commit itself is perfectly fine): commit fb4e156886ce6e8309e912d8b370d192330d19d3 Author: Marcel Holtmann Date: Sun Apr 28 16:22:06 2013 -0700 nl80211: Add generic netlink module alias for cfg80211/nl80211 Reported-and-tested-by: Jeff Layton Reported-by: Richard W.M. Jones Signed-off-by: Stanislaw Gruszka Reviewed-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 2fd6dbea327..1076fe16b12 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -877,8 +877,10 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) #ifdef CONFIG_MODULES if (res == NULL) { genl_unlock(); + up_read(&cb_lock); request_module("net-pf-%d-proto-%d-family-%s", PF_NETLINK, NETLINK_GENERIC, name); + down_read(&cb_lock); genl_lock(); res = genl_family_find_byname(name); } -- cgit v1.2.3-18-g5258 From 3f8e2d75c14660abc8b69206f30190ab93304379 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 24 Jul 2013 02:32:46 +0300 Subject: Bluetooth: Fix HCI init for BlueFRITZ! devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit None of the BlueFRITZ! devices with manufacurer ID 31 (AVM Berlin) support HCI_Read_Local_Supported_Commands. It is safe to use the manufacturer ID (instead of e.g. a USB ID specific quirk) because the company never created any newer controllers. < HCI Command: Read Local Supported Comm.. (0x04|0x0002) plen 0 [hci0] 0.210014 > HCI Event: Command Status (0x0f) plen 4 [hci0] 0.217361 Read Local Supported Commands (0x04|0x0002) ncmd 1 Status: Unknown HCI Command (0x01) Reported-by: Jörg Esser Signed-off-by: Johan Hedberg Tested-by: Jörg Esser Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 64d33d1e14c..0176f200ccb 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -513,7 +513,10 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) hci_setup_event_mask(req); - if (hdev->hci_ver > BLUETOOTH_VER_1_1) + /* AVM Berlin (31), aka "BlueFRITZ!", doesn't support the read + * local supported commands HCI command. + */ + if (hdev->manufacturer != 31 && hdev->hci_ver > BLUETOOTH_VER_1_1) hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); if (lmp_ssp_capable(hdev)) { -- cgit v1.2.3-18-g5258 From 53e21fbc288218a423959f878c86471a0e323a9a Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sat, 27 Jul 2013 14:11:14 -0500 Subject: Bluetooth: Fix calling request callback more than once In certain circumstances, such as an HCI driver using __hci_cmd_sync_ev with HCI_EV_CMD_COMPLETE as the expected completion event there is the chance that hci_event_packet will call hci_req_cmd_complete twice (once for the explicitly looked after event and another time in the actual handler of cmd_complete). In the case of __hci_cmd_sync_ev this introduces a race where the first call wakes up the blocking __hci_cmd_sync_ev and lets it complete. However, by the time that a second __hci_cmd_sync_ev call is already in progress the second hci_req_cmd_complete call (from the previous operation) will wake up the blocking function prematurely and cause it to fail, as witnessed by the following log: [ 639.232195] hci_rx_work: hci0 Event packet [ 639.232201] hci_req_cmd_complete: opcode 0xfc8e status 0x00 [ 639.232205] hci_sent_cmd_data: hci0 opcode 0xfc8e [ 639.232210] hci_req_sync_complete: hci0 result 0x00 [ 639.232220] hci_cmd_complete_evt: hci0 opcode 0xfc8e [ 639.232225] hci_req_cmd_complete: opcode 0xfc8e status 0x00 [ 639.232228] __hci_cmd_sync_ev: hci0 end: err 0 [ 639.232234] __hci_cmd_sync_ev: hci0 [ 639.232238] hci_req_add_ev: hci0 opcode 0xfc8e plen 250 [ 639.232242] hci_prepare_cmd: skb len 253 [ 639.232246] hci_req_run: length 1 [ 639.232250] hci_sent_cmd_data: hci0 opcode 0xfc8e [ 639.232255] hci_req_sync_complete: hci0 result 0x00 [ 639.232266] hci_cmd_work: hci0 cmd_cnt 1 cmd queued 1 [ 639.232271] __hci_cmd_sync_ev: hci0 end: err 0 [ 639.232276] Bluetooth: hci0 sending Intel patch command (0xfc8e) failed (-61) Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0176f200ccb..48e1e0438f3 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3442,8 +3442,16 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status) */ if (hdev->sent_cmd) { req_complete = bt_cb(hdev->sent_cmd)->req.complete; - if (req_complete) + + if (req_complete) { + /* We must set the complete callback to NULL to + * avoid calling the callback more than once if + * this function gets called again. + */ + bt_cb(hdev->sent_cmd)->req.complete = NULL; + goto call_complete; + } } /* Remove all pending commands belonging to this request */ -- cgit v1.2.3-18-g5258 From a0db856a95a29efb1c23db55c02d9f0ff4f0db48 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 30 Jul 2013 00:16:21 -0700 Subject: net_sched: Fix stack info leak in cbq_dump_wrr(). Make sure the reserved fields, and padding (if any), are fully initialized. Based upon a patch by Dan Carpenter and feedback from Joe Perches. Signed-off-by: David S. Miller --- net/sched/sch_cbq.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 71a56886255..7a42c81a19e 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1465,6 +1465,7 @@ static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) unsigned char *b = skb_tail_pointer(skb); struct tc_cbq_wrropt opt; + memset(&opt, 0, sizeof(opt)); opt.flags = 0; opt.allot = cl->allot; opt.priority = cl->priority + 1; -- cgit v1.2.3-18-g5258 From c319d50bfcf678c2857038276d9fab3c6646f3bf Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 30 Jul 2013 22:34:28 +0200 Subject: nl80211: fix another nl80211_fam.attrbuf race This is similar to the race Linus had reported, but in this case it's an older bug: nl80211_prepare_wdev_dump() uses the wiphy index in cb->args[0] as it is and thus parses the message over and over again instead of just once because 0 is the first valid wiphy index. Similar code in nl80211_testmode_dump() correctly offsets the wiphy_index by 1, do that here as well. Cc: stable@vger.kernel.org Reported-by: Ben Hutchings Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 25d217d9080..3fcba69817e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -441,10 +441,12 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, goto out_unlock; } *rdev = wiphy_to_dev((*wdev)->wiphy); - cb->args[0] = (*rdev)->wiphy_idx; + /* 0 is the first index - add 1 to parse only once */ + cb->args[0] = (*rdev)->wiphy_idx + 1; cb->args[1] = (*wdev)->identifier; } else { - struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0]); + /* subtract the 1 again here */ + struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1); struct wireless_dev *tmp; if (!wiphy) { -- cgit v1.2.3-18-g5258 From 9ea7187c53f63e31f2d1b2b1e474e31808565009 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 31 Jul 2013 01:19:43 +0200 Subject: NFC: netlink: Rename CMD_FW_UPLOAD to CMD_FW_DOWNLOAD Loading a firmware into a target is typically called firmware download, not firmware upload. So we rename the netlink API to NFC_CMD_FW_DOWNLOAD in order to avoid any terminology confusion from userspace. Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 20 ++++++++++---------- net/nfc/hci/core.c | 8 ++++---- net/nfc/netlink.c | 12 ++++++------ net/nfc/nfc.h | 6 +++--- 4 files changed, 23 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index dc96a83aa6a..1d074dd1650 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -44,7 +44,7 @@ DEFINE_MUTEX(nfc_devlist_mutex); /* NFC device ID bitmap */ static DEFINE_IDA(nfc_index_ida); -int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name) +int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name) { int rc = 0; @@ -62,28 +62,28 @@ int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name) goto error; } - if (!dev->ops->fw_upload) { + if (!dev->ops->fw_download) { rc = -EOPNOTSUPP; goto error; } - dev->fw_upload_in_progress = true; - rc = dev->ops->fw_upload(dev, firmware_name); + dev->fw_download_in_progress = true; + rc = dev->ops->fw_download(dev, firmware_name); if (rc) - dev->fw_upload_in_progress = false; + dev->fw_download_in_progress = false; error: device_unlock(&dev->dev); return rc; } -int nfc_fw_upload_done(struct nfc_dev *dev, const char *firmware_name) +int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name) { - dev->fw_upload_in_progress = false; + dev->fw_download_in_progress = false; - return nfc_genl_fw_upload_done(dev, firmware_name); + return nfc_genl_fw_download_done(dev, firmware_name); } -EXPORT_SYMBOL(nfc_fw_upload_done); +EXPORT_SYMBOL(nfc_fw_download_done); /** * nfc_dev_up - turn on the NFC device @@ -110,7 +110,7 @@ int nfc_dev_up(struct nfc_dev *dev) goto error; } - if (dev->fw_upload_in_progress) { + if (dev->fw_download_in_progress) { rc = -EBUSY; goto error; } diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 7b1c186736e..fe66908401f 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -809,14 +809,14 @@ static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb) } } -static int hci_fw_upload(struct nfc_dev *nfc_dev, const char *firmware_name) +static int hci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name) { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); - if (!hdev->ops->fw_upload) + if (!hdev->ops->fw_download) return -ENOTSUPP; - return hdev->ops->fw_upload(hdev, firmware_name); + return hdev->ops->fw_download(hdev, firmware_name); } static struct nfc_ops hci_nfc_ops = { @@ -831,7 +831,7 @@ static struct nfc_ops hci_nfc_ops = { .im_transceive = hci_transceive, .tm_send = hci_tm_send, .check_presence = hci_check_presence, - .fw_upload = hci_fw_upload, + .fw_download = hci_fw_download, .discover_se = hci_discover_se, .enable_se = hci_enable_se, .disable_se = hci_disable_se, diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index b05ad909778..f16fd59d416 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1089,7 +1089,7 @@ exit: return rc; } -static int nfc_genl_fw_upload(struct sk_buff *skb, struct genl_info *info) +static int nfc_genl_fw_download(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc; @@ -1108,13 +1108,13 @@ static int nfc_genl_fw_upload(struct sk_buff *skb, struct genl_info *info) nla_strlcpy(firmware_name, info->attrs[NFC_ATTR_FIRMWARE_NAME], sizeof(firmware_name)); - rc = nfc_fw_upload(dev, firmware_name); + rc = nfc_fw_download(dev, firmware_name); nfc_put_device(dev); return rc; } -int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name) +int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name) { struct sk_buff *msg; void *hdr; @@ -1124,7 +1124,7 @@ int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, - NFC_CMD_FW_UPLOAD); + NFC_CMD_FW_DOWNLOAD); if (!hdr) goto free_msg; @@ -1251,8 +1251,8 @@ static struct genl_ops nfc_genl_ops[] = { .policy = nfc_genl_policy, }, { - .cmd = NFC_CMD_FW_UPLOAD, - .doit = nfc_genl_fw_upload, + .cmd = NFC_CMD_FW_DOWNLOAD, + .doit = nfc_genl_fw_download, .policy = nfc_genl_policy, }, { diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index ee85a1fc1b2..820a7850c36 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -123,10 +123,10 @@ static inline void nfc_device_iter_exit(struct class_dev_iter *iter) class_dev_iter_exit(iter); } -int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name); -int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name); +int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name); +int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name); -int nfc_fw_upload_done(struct nfc_dev *dev, const char *firmware_name); +int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name); int nfc_dev_up(struct nfc_dev *dev); -- cgit v1.2.3-18-g5258 From ff862a4668dd6dba962b1d2d8bd344afa6375683 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 28 Jul 2013 23:04:45 +0300 Subject: af_key: more info leaks in pfkey messages This is inspired by a5cc68f3d6 "af_key: fix info leaks in notify messages". There are some struct members which don't get initialized and could disclose small amounts of private information. Acked-by: Mathias Krause Signed-off-by: Dan Carpenter Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/key/af_key.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 9da862070dd..ab8bd2cabfa 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2081,6 +2081,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy * pol->sadb_x_policy_type = IPSEC_POLICY_NONE; } pol->sadb_x_policy_dir = dir+1; + pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = xp->index; pol->sadb_x_policy_priority = xp->priority; @@ -3137,7 +3138,9 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; pol->sadb_x_policy_dir = XFRM_POLICY_OUT + 1; + pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = xp->index; + pol->sadb_x_policy_priority = xp->priority; /* Set sadb_comb's. */ if (x->id.proto == IPPROTO_AH) @@ -3525,6 +3528,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; pol->sadb_x_policy_dir = dir + 1; + pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = 0; pol->sadb_x_policy_priority = 0; -- cgit v1.2.3-18-g5258 From e1ee3673a83cc02b6b5e43c9e647d8dd5e1c4e26 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Mon, 29 Jul 2013 12:30:04 +0200 Subject: genetlink: fix usage of NLM_F_EXCL or NLM_F_REPLACE Currently, it is not possible to use neither NLM_F_EXCL nor NLM_F_REPLACE from genetlink. This is due to this checking in genl_family_rcv_msg: if (nlh->nlmsg_flags & NLM_F_DUMP) NLM_F_DUMP is NLM_F_MATCH|NLM_F_ROOT. Thus, if NLM_F_EXCL or NLM_F_REPLACE flag is set, genetlink believes that you're requesting a dump and it calls the .dumpit callback. The solution that I propose is to refine this checking to make it stricter: if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) And given the combination NLM_F_REPLACE and NLM_F_EXCL does not make sense to me, it removes the ambiguity. There was a patch that tried to fix this some time ago (0ab03c2 netlink: test for all flags of the NLM_F_DUMP composite) but it tried to resolve this ambiguity in *all* existing netlink subsystems, not only genetlink. That patch was reverted since it broke iproute2, which is using NLM_F_ROOT to request the dump of the routing cache. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 1076fe16b12..512718adb0d 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -571,7 +571,7 @@ static int genl_family_rcv_msg(struct genl_family *family, !capable(CAP_NET_ADMIN)) return -EPERM; - if (nlh->nlmsg_flags & NLM_F_DUMP) { + if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { struct netlink_dump_control c = { .dump = ops->dumpit, .done = ops->done, -- cgit v1.2.3-18-g5258 From b56e4b857c5210e848bfb80e074e5756a36cd523 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Wed, 31 Jul 2013 12:12:24 -0700 Subject: mac80211: fix infinite loop in ieee80211_determine_chantype Commit "3d9646d mac80211: fix channel selection bug" introduced a possible infinite loop by moving the out target above the chandef_downgrade while loop. When we downgrade to NL80211_CHAN_WIDTH_20_NOHT, we jump back up to re-run the while loop...indefinitely. Replace goto with break and carry on. This may not be sufficient to connect to the AP, but will at least keep the cpu from livelocking. Thanks to Derek Atkins as an extra pair of debugging eyes. Cc: stable@kernel.org Signed-off-by: Chris Wright Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ae31968d42d..e3e7d2be9e4 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -338,7 +338,7 @@ out: if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) { ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; - goto out; + break; } ret |= chandef_downgrade(chandef); -- cgit v1.2.3-18-g5258 From cb236d2d713cff83d024a82b836757d9e2b50715 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jul 2013 23:07:43 +0200 Subject: mac80211: don't wait for TX status forever TX status notification can get lost, or the frames could get stuck on the queue, so don't wait for the callback from the driver forever and instead time out after half a second. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e3e7d2be9e4..e5c3cf40506 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -31,10 +31,12 @@ #include "led.h" #define IEEE80211_AUTH_TIMEOUT (HZ / 5) +#define IEEE80211_AUTH_TIMEOUT_LONG (HZ / 2) #define IEEE80211_AUTH_TIMEOUT_SHORT (HZ / 10) #define IEEE80211_AUTH_MAX_TRIES 3 #define IEEE80211_AUTH_WAIT_ASSOC (HZ * 5) #define IEEE80211_ASSOC_TIMEOUT (HZ / 5) +#define IEEE80211_ASSOC_TIMEOUT_LONG (HZ / 2) #define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10) #define IEEE80211_ASSOC_MAX_TRIES 3 @@ -3394,10 +3396,13 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) if (tx_flags == 0) { auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; - ifmgd->auth_data->timeout_started = true; + auth_data->timeout_started = true; run_again(sdata, auth_data->timeout); } else { - auth_data->timeout_started = false; + auth_data->timeout = + round_jiffies_up(jiffies + IEEE80211_AUTH_TIMEOUT_LONG); + auth_data->timeout_started = true; + run_again(sdata, auth_data->timeout); } return 0; @@ -3434,7 +3439,11 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata) assoc_data->timeout_started = true; run_again(sdata, assoc_data->timeout); } else { - assoc_data->timeout_started = false; + assoc_data->timeout = + round_jiffies_up(jiffies + + IEEE80211_ASSOC_TIMEOUT_LONG); + assoc_data->timeout_started = true; + run_again(sdata, assoc_data->timeout); } return 0; -- cgit v1.2.3-18-g5258 From 5cdaed1e878d723d56d04ae0be1738124acf9f46 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Jul 2013 11:23:06 +0200 Subject: mac80211: ignore HT primary channel while connected While we're connected, the AP shouldn't change the primary channel in the HT information. We checked this, and dropped the connection if it did change it. Unfortunately, this is causing problems on some APs, e.g. on the Netgear WRT610NL: the beacons seem to always contain a bad channel and if we made a connection using a probe response (correct data) we drop the connection immediately and can basically not connect properly at all. Work around this by ignoring the HT primary channel information in beacons if we're already connected. Also print out more verbose messages in the other situations to help diagnose similar bugs quicker in the future. Cc: stable@vger.kernel.org [3.10] Acked-by: Andy Isaacson Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e5c3cf40506..077a9536083 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -211,8 +211,9 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel, const struct ieee80211_ht_operation *ht_oper, const struct ieee80211_vht_operation *vht_oper, - struct cfg80211_chan_def *chandef, bool verbose) + struct cfg80211_chan_def *chandef, bool tracking) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct cfg80211_chan_def vht_chandef; u32 ht_cfreq, ret; @@ -231,7 +232,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan, channel->band); /* check that channel matches the right operating channel */ - if (channel->center_freq != ht_cfreq) { + if (!tracking && channel->center_freq != ht_cfreq) { /* * It's possible that some APs are confused here; * Netgear WNDR3700 sometimes reports 4 higher than @@ -239,11 +240,10 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, * since we look at probe response/beacon data here * it should be OK. */ - if (verbose) - sdata_info(sdata, - "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n", - channel->center_freq, ht_cfreq, - ht_oper->primary_chan, channel->band); + sdata_info(sdata, + "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n", + channel->center_freq, ht_cfreq, + ht_oper->primary_chan, channel->band); ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; goto out; } @@ -297,7 +297,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, channel->band); break; default: - if (verbose) + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) sdata_info(sdata, "AP VHT operation IE has invalid channel width (%d), disable VHT\n", vht_oper->chan_width); @@ -306,7 +306,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, } if (!cfg80211_chandef_valid(&vht_chandef)) { - if (verbose) + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) sdata_info(sdata, "AP VHT information is invalid, disable VHT\n"); ret = IEEE80211_STA_DISABLE_VHT; @@ -319,7 +319,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, } if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) { - if (verbose) + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) sdata_info(sdata, "AP VHT information doesn't match HT, disable VHT\n"); ret = IEEE80211_STA_DISABLE_VHT; @@ -346,7 +346,7 @@ out: ret |= chandef_downgrade(chandef); } - if (chandef->width != vht_chandef.width && verbose) + if (chandef->width != vht_chandef.width && !tracking) sdata_info(sdata, "capabilities/regulatory prevented using AP HT/VHT configuration, downgraded\n"); @@ -386,7 +386,7 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, /* calculate new channel (type) based on HT/VHT operation IEs */ flags = ieee80211_determine_chantype(sdata, sband, chan, ht_oper, - vht_oper, &chandef, false); + vht_oper, &chandef, true); /* * Downgrade the new channel if we associated with restricted @@ -3838,7 +3838,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= ieee80211_determine_chantype(sdata, sband, cbss->channel, ht_oper, vht_oper, - &chandef, true); + &chandef, false); sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss), local->rx_chains); -- cgit v1.2.3-18-g5258 From 74418edec915d0f446debebde08d170c7b8ba0ee Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 30 Jul 2013 10:11:25 +0200 Subject: cfg80211: fix P2P GO interface teardown When a P2P GO interface goes down, cfg80211 doesn't properly tear it down, leading to warnings later. Add the GO interface type to the enumeration to tear it down like AP interfaces. Otherwise, we leave it pending and mac80211's state can get very confused, leading to warnings later. Cc: stable@vger.kernel.org Reported-by: Ilan Peer Tested-by: Ilan Peer Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/wireless/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 4f9f216665e..a8c29fa4f1b 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -765,6 +765,7 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, cfg80211_leave_mesh(rdev, dev); break; case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: cfg80211_stop_ap(rdev, dev); break; default: -- cgit v1.2.3-18-g5258 From ddfe49b42d8ad4bfdf92d63d4a74f162660d878d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Jul 2013 20:52:03 +0200 Subject: mac80211: continue using disabled channels while connected In case the AP has different regulatory information than we do, it can happen that we connect to an AP based on e.g. the world roaming regulatory data, and then update our database with the AP's country information disables the channel the AP is using. If this happens on an HT AP, the bandwidth tracking code will hit the WARN_ON() and disconnect. Since that's not very useful, ignore the channel-disable flag in bandwidth tracking. Cc: stable@vger.kernel.org Reported-by: Chris Wright Tested-by: Chris Wright Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 077a9536083..cc9e02d79b5 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -335,8 +335,17 @@ out: if (ret & IEEE80211_STA_DISABLE_VHT) vht_chandef = *chandef; + /* + * Ignore the DISABLED flag when we're already connected and only + * tracking the APs beacon for bandwidth changes - otherwise we + * might get disconnected here if we connect to an AP, update our + * regulatory information based on the AP's country IE and the + * information we have is wrong/outdated and disables the channel + * that we're actually using for the connection to the AP. + */ while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, - IEEE80211_CHAN_DISABLED)) { + tracking ? 0 : + IEEE80211_CHAN_DISABLED)) { if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) { ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; -- cgit v1.2.3-18-g5258 From 8cb3b9c3642c0263d48f31d525bcee7170eedc20 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 30 Jul 2013 13:23:39 +0300 Subject: net_sched: info leak in atm_tc_dump_class() The "pvc" struct has a hole after pvc.sap_family which is not cleared. Signed-off-by: Dan Carpenter Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/sch_atm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index ca8e0a57d94..1f9c31411f1 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -605,6 +605,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, struct sockaddr_atmpvc pvc; int state; + memset(&pvc, 0, sizeof(pvc)); pvc.sap_family = AF_ATMPVC; pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1; pvc.sap_addr.vpi = flow->vcc->vpi; -- cgit v1.2.3-18-g5258 From b00589af3b04736376f24625ab0b394642e89e29 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Thu, 1 Aug 2013 01:06:20 +0200 Subject: bridge: disable snooping if there is no querier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If there is no querier on a link then we won't get periodic reports and therefore won't be able to learn about multicast listeners behind ports, potentially leading to lost multicast packets, especially for multicast listeners that joined before the creation of the bridge. These lost multicast packets can appear since c5c23260594 ("bridge: Add multicast_querier toggle and disable queries by default") in particular. With this patch we are flooding multicast packets if our querier is disabled and if we didn't detect any other querier. A grace period of the Maximum Response Delay of the querier is added to give multicast responses enough time to arrive and to be learned from before disabling the flooding behaviour again. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- net/bridge/br_device.c | 3 ++- net/bridge/br_input.c | 3 ++- net/bridge/br_multicast.c | 39 ++++++++++++++++++++++++++++++--------- net/bridge/br_private.h | 12 ++++++++++++ 4 files changed, 46 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 2ef66781fed..69363bd37f6 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -70,7 +70,8 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) } mdst = br_mdb_get(br, skb, vid); - if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) + if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && + br_multicast_querier_exists(br)) br_multicast_deliver(mdst, skb); else br_flood_deliver(br, skb, false); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 1b8b8b824cd..8c561c0aa63 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -101,7 +101,8 @@ int br_handle_frame_finish(struct sk_buff *skb) unicast = false; } else if (is_multicast_ether_addr(dest)) { mdst = br_mdb_get(br, skb, vid); - if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) { + if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && + br_multicast_querier_exists(br)) { if ((mdst && mdst->mglist) || br_multicast_is_router(br)) skb2 = skb; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 4b99c9a2704..61c5e819380 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1014,6 +1014,16 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, } #endif +static void br_multicast_update_querier_timer(struct net_bridge *br, + unsigned long max_delay) +{ + if (!timer_pending(&br->multicast_querier_timer)) + br->multicast_querier_delay_time = jiffies + max_delay; + + mod_timer(&br->multicast_querier_timer, + jiffies + br->multicast_querier_interval); +} + /* * Add port to router_list * list is maintained ordered by pointer value @@ -1064,11 +1074,11 @@ timer: static void br_multicast_query_received(struct net_bridge *br, struct net_bridge_port *port, - int saddr) + int saddr, + unsigned long max_delay) { if (saddr) - mod_timer(&br->multicast_querier_timer, - jiffies + br->multicast_querier_interval); + br_multicast_update_querier_timer(br, max_delay); else if (timer_pending(&br->multicast_querier_timer)) return; @@ -1096,8 +1106,6 @@ static int br_ip4_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - br_multicast_query_received(br, port, !!iph->saddr); - group = ih->group; if (skb->len == sizeof(*ih)) { @@ -1121,6 +1129,8 @@ static int br_ip4_multicast_query(struct net_bridge *br, IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; } + br_multicast_query_received(br, port, !!iph->saddr, max_delay); + if (!group) goto out; @@ -1176,8 +1186,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - br_multicast_query_received(br, port, !ipv6_addr_any(&ip6h->saddr)); - if (skb->len == sizeof(*mld)) { if (!pskb_may_pull(skb, sizeof(*mld))) { err = -EINVAL; @@ -1198,6 +1206,9 @@ static int br_ip6_multicast_query(struct net_bridge *br, max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(ntohs(mld2q->mld2q_mrc)) : 1; } + br_multicast_query_received(br, port, !ipv6_addr_any(&ip6h->saddr), + max_delay); + if (!group) goto out; @@ -1643,6 +1654,8 @@ void br_multicast_init(struct net_bridge *br) br->multicast_querier_interval = 255 * HZ; br->multicast_membership_interval = 260 * HZ; + br->multicast_querier_delay_time = 0; + spin_lock_init(&br->multicast_lock); setup_timer(&br->multicast_router_timer, br_multicast_local_router_expired, 0); @@ -1831,6 +1844,8 @@ unlock: int br_multicast_set_querier(struct net_bridge *br, unsigned long val) { + unsigned long max_delay; + val = !!val; spin_lock_bh(&br->multicast_lock); @@ -1838,8 +1853,14 @@ int br_multicast_set_querier(struct net_bridge *br, unsigned long val) goto unlock; br->multicast_querier = val; - if (val) - br_multicast_start_querier(br); + if (!val) + goto unlock; + + max_delay = br->multicast_query_response_interval; + if (!timer_pending(&br->multicast_querier_timer)) + br->multicast_querier_delay_time = jiffies + max_delay; + + br_multicast_start_querier(br); unlock: spin_unlock_bh(&br->multicast_lock); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 3be89b3ce17..2f7da41851b 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -267,6 +267,7 @@ struct net_bridge unsigned long multicast_query_interval; unsigned long multicast_query_response_interval; unsigned long multicast_startup_query_interval; + unsigned long multicast_querier_delay_time; spinlock_t multicast_lock; struct net_bridge_mdb_htable __rcu *mdb; @@ -501,6 +502,13 @@ static inline bool br_multicast_is_router(struct net_bridge *br) (br->multicast_router == 1 && timer_pending(&br->multicast_router_timer)); } + +static inline bool br_multicast_querier_exists(struct net_bridge *br) +{ + return time_is_before_jiffies(br->multicast_querier_delay_time) && + (br->multicast_querier || + timer_pending(&br->multicast_querier_timer)); +} #else static inline int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, @@ -557,6 +565,10 @@ static inline bool br_multicast_is_router(struct net_bridge *br) { return 0; } +static inline bool br_multicast_querier_exists(struct net_bridge *br) +{ + return false; +} static inline void br_mdb_init(void) { } -- cgit v1.2.3-18-g5258 From 71ffe9c77dd7a2b62207953091efa8dafec958dd Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 25 Jul 2013 10:37:49 +0200 Subject: netfilter: xt_TCPMSS: fix handling of malformed TCP header and options Make sure the packet has enough room for the TCP header and that it is not malformed. While at it, store tcph->doff*4 in a variable, as it is used several times. This patch also fixes a possible off by one in case of malformed TCP options. Reported-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_TCPMSS.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 7011c71646f..6113cc7efff 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -52,7 +52,8 @@ tcpmss_mangle_packet(struct sk_buff *skb, { const struct xt_tcpmss_info *info = par->targinfo; struct tcphdr *tcph; - unsigned int tcplen, i; + int len, tcp_hdrlen; + unsigned int i; __be16 oldval; u16 newmss; u8 *opt; @@ -64,11 +65,14 @@ tcpmss_mangle_packet(struct sk_buff *skb, if (!skb_make_writable(skb, skb->len)) return -1; - tcplen = skb->len - tcphoff; + len = skb->len - tcphoff; + if (len < (int)sizeof(struct tcphdr)) + return -1; + tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); + tcp_hdrlen = tcph->doff * 4; - /* Header cannot be larger than the packet */ - if (tcplen < tcph->doff*4) + if (len < tcp_hdrlen) return -1; if (info->mss == XT_TCPMSS_CLAMP_PMTU) { @@ -87,9 +91,8 @@ tcpmss_mangle_packet(struct sk_buff *skb, newmss = info->mss; opt = (u_int8_t *)tcph; - for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) { - if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS && - opt[i+1] == TCPOLEN_MSS) { + for (i = sizeof(struct tcphdr); i <= tcp_hdrlen - TCPOLEN_MSS; i += optlen(opt, i)) { + if (opt[i] == TCPOPT_MSS && opt[i+1] == TCPOLEN_MSS) { u_int16_t oldmss; oldmss = (opt[i+2] << 8) | opt[i+3]; @@ -112,9 +115,10 @@ tcpmss_mangle_packet(struct sk_buff *skb, } /* There is data after the header so the option can't be added - without moving it, and doing so may make the SYN packet - itself too large. Accept the packet unmodified instead. */ - if (tcplen > tcph->doff*4) + * without moving it, and doing so may make the SYN packet + * itself too large. Accept the packet unmodified instead. + */ + if (len > tcp_hdrlen) return 0; /* @@ -143,10 +147,10 @@ tcpmss_mangle_packet(struct sk_buff *skb, newmss = min(newmss, (u16)1220); opt = (u_int8_t *)tcph + sizeof(struct tcphdr); - memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); + memmove(opt + TCPOLEN_MSS, opt, len - sizeof(struct tcphdr)); inet_proto_csum_replace2(&tcph->check, skb, - htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1); + htons(len), htons(len + TCPOLEN_MSS), 1); opt[0] = TCPOPT_MSS; opt[1] = TCPOLEN_MSS; opt[2] = (newmss & 0xff00) >> 8; -- cgit v1.2.3-18-g5258 From a206bcb3b02025b23137f3228109d72e0f835c05 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 25 Jul 2013 10:46:46 +0200 Subject: netfilter: xt_TCPOPTSTRIP: fix possible off by one access Fix a possible off by one access since optlen() touches opt[offset+1] unsafely when i == tcp_hdrlen(skb) - 1. This patch replaces tcp_hdrlen() by the local variable tcp_hdrlen that stores the TCP header length, to save some cycles. Reported-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_TCPOPTSTRIP.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c index b68fa191710..625fa1d636a 100644 --- a/net/netfilter/xt_TCPOPTSTRIP.c +++ b/net/netfilter/xt_TCPOPTSTRIP.c @@ -38,7 +38,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb, struct tcphdr *tcph; u_int16_t n, o; u_int8_t *opt; - int len; + int len, tcp_hdrlen; /* This is a fragment, no TCP header is available */ if (par->fragoff != 0) @@ -52,7 +52,9 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb, return NF_DROP; tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); - if (tcph->doff * 4 > len) + tcp_hdrlen = tcph->doff * 4; + + if (len < tcp_hdrlen) return NF_DROP; opt = (u_int8_t *)tcph; @@ -61,10 +63,10 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb, * Walk through all TCP options - if we find some option to remove, * set all octets to %TCPOPT_NOP and adjust checksum. */ - for (i = sizeof(struct tcphdr); i < tcp_hdrlen(skb); i += optl) { + for (i = sizeof(struct tcphdr); i < tcp_hdrlen - 1; i += optl) { optl = optlen(opt, i); - if (i + optl > tcp_hdrlen(skb)) + if (i + optl > tcp_hdrlen) break; if (!tcpoptstrip_test_bit(info->strip_bmap, opt[i])) -- cgit v1.2.3-18-g5258 From 447383d2ba6061bb069da45f95f223a01bba61dd Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 25 Jul 2013 11:30:23 +1000 Subject: NFSD/sunrpc: avoid deadlock on TCP connection due to memory pressure. Since we enabled auto-tuning for sunrpc TCP connections we do not guarantee that there is enough write-space on each connection to queue a reply. If memory pressure causes the window to shrink too small, the request throttling in sunrpc/svc will not accept any requests so no more requests will be handled. Even when pressure decreases the window will not grow again until data is sent on the connection. This means we get a deadlock: no requests will be handled until there is more space, and no space will be allocated until a request is handled. This can be simulated by modifying svc_tcp_has_wspace to inflate the number of byte required and removing the 'svc_sock_setbufsize' calls in svc_setup_socket. I found that multiplying by 16 was enough to make the requirement exceed the default allocation. With this modification in place: mount -o vers=3,proto=tcp 127.0.0.1:/home /mnt would block and eventually time out because the nfs server could not accept any requests. This patch relaxes the request throttling to always allow at least one request through per connection. It does this by checking both sk_stream_min_wspace() and xprt->xpt_reserved are zero. The first is zero when the TCP transmit queue is empty. The second is zero when there are no RPC requests being processed. When both of these are zero the socket is idle and so one more request can safely be allowed through. Applying this patch allows the above mount command to succeed cleanly. Tracing shows that the allocated write buffer space quickly grows and after a few requests are handled, the extra tests are no longer needed to permit further requests to be processed. The main purpose of request throttling is to handle the case when one client is slow at collecting replies and the send queue gets full of replies that the client hasn't acknowledged (at the TCP level) yet. As we only change behaviour when the send queue is empty this main purpose is still preserved. Reported-by: Ben Myers Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 305374d4fb9..7762b9f8a8b 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1193,7 +1193,9 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt) if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) return 1; required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg; - if (sk_stream_wspace(svsk->sk_sk) >= required) + if (sk_stream_wspace(svsk->sk_sk) >= required || + (sk_stream_min_wspace(svsk->sk_sk) == 0 && + atomic_read(&xprt->xpt_reserved) == 0)) return 1; set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); return 0; -- cgit v1.2.3-18-g5258 From 9f96392b0ae6aefc02a9b900c3f4889dfafc8402 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 10 Jun 2013 16:06:44 -0400 Subject: svcrpc: fix gss_rpc_upcall create error Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/gss_rpc_upcall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index d304f41260f..1e1ccf539fa 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -120,7 +120,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt) if (IS_ERR(clnt)) { dprintk("RPC: failed to create AF_LOCAL gssproxy " "client (errno %ld).\n", PTR_ERR(clnt)); - result = -PTR_ERR(clnt); + result = PTR_ERR(clnt); *_clnt = NULL; goto out; } -- cgit v1.2.3-18-g5258 From dc43376c26cef74226174a2394f37f2a3f8a8639 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 7 Jun 2013 10:11:19 -0400 Subject: svcrpc: fix gss-proxy xdr decoding oops Uninitialized stack data was being used as the destination for memcpy's. Longer term we'll just delete some of this code; all we're doing is skipping over xdr that we don't care about. Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/gss_rpc_xdr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index 357f613df7f..3c85d1c8a02 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -430,7 +430,7 @@ static int dummy_enc_nameattr_array(struct xdr_stream *xdr, static int dummy_dec_nameattr_array(struct xdr_stream *xdr, struct gssx_name_attr_array *naa) { - struct gssx_name_attr dummy; + struct gssx_name_attr dummy = { .attr = {.len = 0} }; u32 count, i; __be32 *p; @@ -493,12 +493,13 @@ static int gssx_enc_name(struct xdr_stream *xdr, return err; } + static int gssx_dec_name(struct xdr_stream *xdr, struct gssx_name *name) { - struct xdr_netobj dummy_netobj; - struct gssx_name_attr_array dummy_name_attr_array; - struct gssx_option_array dummy_option_array; + struct xdr_netobj dummy_netobj = { .len = 0 }; + struct gssx_name_attr_array dummy_name_attr_array = { .count = 0 }; + struct gssx_option_array dummy_option_array = { .count = 0 }; int err; /* name->display_name */ -- cgit v1.2.3-18-g5258 From 743e217129f69aab074abe520a464fd0c6b1cca1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 31 Jul 2013 14:11:14 -0400 Subject: svcrpc: fix kfree oops in gss-proxy code mech_oid.data is an array, not kmalloc()'d memory. Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/gss_rpc_upcall.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index 1e1ccf539fa..af7ffd447fe 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -328,7 +328,6 @@ void gssp_free_upcall_data(struct gssp_upcall_data *data) kfree(data->in_handle.data); kfree(data->out_handle.data); kfree(data->out_token.data); - kfree(data->mech_oid.data); free_svc_cred(&data->creds); } -- cgit v1.2.3-18-g5258 From 7193bd17ea92c4c89016c304362c9be93ce50050 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 31 Jul 2013 17:51:42 -0400 Subject: svcrpc: set cr_gss_mech from gss-proxy as well as legacy upcall The change made to rsc_parse() in 0dc1531aca7fd1440918bd55844a054e9c29acad "svcrpc: store gss mech in svc_cred" should also have been propagated to the gss-proxy codepath. This fixes a crash in the gss-proxy case. Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/svcauth_gss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index d0347d148b3..09fb638bcaa 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1180,6 +1180,7 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, gm = gss_mech_get_by_OID(&ud->mech_oid); if (!gm) goto out; + rsci.cred.cr_gss_mech = gm; status = -EINVAL; /* mech-specific data: */ @@ -1195,7 +1196,6 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, rscp = rsc_update(cd, &rsci, rscp); status = 0; out: - gss_mech_put(gm); rsc_free(&rsci); if (rscp) cache_put(&rscp->h, cd); -- cgit v1.2.3-18-g5258 From 2ac3ac8f86f2fe065d746d9a9abaca867adec577 Mon Sep 17 00:00:00 2001 From: Michal Kubeček Date: Thu, 1 Aug 2013 10:04:14 +0200 Subject: ipv6: prevent fib6_run_gc() contention On a high-traffic router with many processors and many IPv6 dst entries, soft lockup in fib6_run_gc() can occur when number of entries reaches gc_thresh. This happens because fib6_run_gc() uses fib6_gc_lock to allow only one thread to run the garbage collector but ip6_dst_gc() doesn't update net->ipv6.ip6_rt_last_gc until fib6_run_gc() returns. On a system with many entries, this can take some time so that in the meantime, other threads pass the tests in ip6_dst_gc() (ip6_rt_last_gc is still not updated) and wait for the lock. They then have to run the garbage collector one after another which blocks them for quite long. Resolve this by replacing special value ~0UL of expire parameter to fib6_run_gc() by explicit "force" parameter to choose between spin_lock_bh() and spin_trylock_bh() and call fib6_run_gc() with force=false if gc_thresh is reached but not max_size. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 19 ++++++++----------- net/ipv6/ndisc.c | 4 ++-- net/ipv6/route.c | 4 ++-- 3 files changed, 12 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5fc9c7a68d8..d872553ca93 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1632,19 +1632,16 @@ static int fib6_age(struct rt6_info *rt, void *arg) static DEFINE_SPINLOCK(fib6_gc_lock); -void fib6_run_gc(unsigned long expires, struct net *net) +void fib6_run_gc(unsigned long expires, struct net *net, bool force) { - if (expires != ~0UL) { + if (force) { spin_lock_bh(&fib6_gc_lock); - gc_args.timeout = expires ? (int)expires : - net->ipv6.sysctl.ip6_rt_gc_interval; - } else { - if (!spin_trylock_bh(&fib6_gc_lock)) { - mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); - return; - } - gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval; + } else if (!spin_trylock_bh(&fib6_gc_lock)) { + mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); + return; } + gc_args.timeout = expires ? (int)expires : + net->ipv6.sysctl.ip6_rt_gc_interval; gc_args.more = icmp6_dst_gc(); @@ -1661,7 +1658,7 @@ void fib6_run_gc(unsigned long expires, struct net *net) static void fib6_gc_timer_cb(unsigned long arg) { - fib6_run_gc(0, (struct net *)arg); + fib6_run_gc(0, (struct net *)arg, true); } static int __net_init fib6_net_init(struct net *net) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 24c03396e00..79aa9652ed8 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1576,7 +1576,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); - fib6_run_gc(~0UL, net); + fib6_run_gc(0, net, false); idev = in6_dev_get(dev); if (!idev) break; @@ -1586,7 +1586,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, break; case NETDEV_DOWN: neigh_ifdown(&nd_tbl, dev); - fib6_run_gc(~0UL, net); + fib6_run_gc(0, net, false); break; case NETDEV_NOTIFY_PEERS: ndisc_send_unsol_na(dev); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a8c891aa246..824c424f964 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1326,7 +1326,7 @@ static int ip6_dst_gc(struct dst_ops *ops) goto out; net->ipv6.ip6_rt_gc_expire++; - fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); + fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size); net->ipv6.ip6_rt_last_gc = now; entries = dst_entries_get_slow(ops); if (entries < ops->gc_thresh) @@ -2827,7 +2827,7 @@ int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, net = (struct net *)ctl->extra1; delay = net->ipv6.sysctl.flush_delay; proc_dointvec(ctl, write, buffer, lenp, ppos); - fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); + fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); return 0; } -- cgit v1.2.3-18-g5258 From 49a18d86f66d33a20144ecb5a34bba0d1856b260 Mon Sep 17 00:00:00 2001 From: Michal Kubeček Date: Thu, 1 Aug 2013 10:04:24 +0200 Subject: ipv6: update ip6_rt_last_gc every time GC is run As pointed out by Eric Dumazet, net->ipv6.ip6_rt_last_gc should hold the last time garbage collector was run so that we should update it whenever fib6_run_gc() calls fib6_clean_all(), not only if we got there from ip6_dst_gc(). Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 6 +++++- net/ipv6/route.c | 4 +--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index d872553ca93..bff3d821c7e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1634,6 +1634,8 @@ static DEFINE_SPINLOCK(fib6_gc_lock); void fib6_run_gc(unsigned long expires, struct net *net, bool force) { + unsigned long now; + if (force) { spin_lock_bh(&fib6_gc_lock); } else if (!spin_trylock_bh(&fib6_gc_lock)) { @@ -1646,10 +1648,12 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force) gc_args.more = icmp6_dst_gc(); fib6_clean_all(net, fib6_age, 0, NULL); + now = jiffies; + net->ipv6.ip6_rt_last_gc = now; if (gc_args.more) mod_timer(&net->ipv6.ip6_fib_timer, - round_jiffies(jiffies + round_jiffies(now + net->ipv6.sysctl.ip6_rt_gc_interval)); else del_timer(&net->ipv6.ip6_fib_timer); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 824c424f964..b70f8979003 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1311,7 +1311,6 @@ static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), static int ip6_dst_gc(struct dst_ops *ops) { - unsigned long now = jiffies; struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; @@ -1321,13 +1320,12 @@ static int ip6_dst_gc(struct dst_ops *ops) int entries; entries = dst_entries_get_fast(ops); - if (time_after(rt_last_gc + rt_min_interval, now) && + if (time_after(rt_last_gc + rt_min_interval, jiffies) && entries <= rt_max_size) goto out; net->ipv6.ip6_rt_gc_expire++; fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size); - net->ipv6.ip6_rt_last_gc = now; entries = dst_entries_get_slow(ops); if (entries < ops->gc_thresh) net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; -- cgit v1.2.3-18-g5258 From 3f8f52982ad020f0704548c46de66bf464d3b967 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 1 Aug 2013 10:41:27 +0200 Subject: ipv6: move peer_addr init into ipv6_add_addr() Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cfdcf7b2daf..a0ce957fb67 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -813,7 +813,8 @@ static u32 inet6_addr_hash(const struct in6_addr *addr) /* On success it returns ifp with increased reference count */ static struct inet6_ifaddr * -ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, +ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, + const struct in6_addr *peer_addr, int pfxlen, int scope, u32 flags) { struct inet6_ifaddr *ifa = NULL; @@ -863,6 +864,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, } ifa->addr = *addr; + if (peer_addr) + ifa->peer_addr = *peer_addr; spin_lock_init(&ifa->lock); spin_lock_init(&ifa->state_lock); @@ -1123,8 +1126,8 @@ retry: ift = !max_addresses || ipv6_count_addresses(idev) < max_addresses ? - ipv6_add_addr(idev, &addr, tmp_plen, ipv6_addr_scope(&addr), - addr_flags) : NULL; + ipv6_add_addr(idev, &addr, NULL, tmp_plen, + ipv6_addr_scope(&addr), addr_flags) : NULL; if (IS_ERR_OR_NULL(ift)) { in6_ifa_put(ifp); in6_dev_put(idev); @@ -2179,7 +2182,8 @@ ok: */ if (!max_addresses || ipv6_count_addresses(in6_dev) < max_addresses) - ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len, + ifp = ipv6_add_addr(in6_dev, &addr, NULL, + pinfo->prefix_len, addr_type&IPV6_ADDR_SCOPE_MASK, addr_flags); @@ -2455,15 +2459,13 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p prefered_lft = timeout; } - ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); + ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags); if (!IS_ERR(ifp)) { spin_lock_bh(&ifp->lock); ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; ifp->tstamp = jiffies; - if (peer_pfx) - ifp->peer_addr = *peer_pfx; spin_unlock_bh(&ifp->lock); addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, @@ -2557,7 +2559,7 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, { struct inet6_ifaddr *ifp; - ifp = ipv6_add_addr(idev, addr, plen, scope, IFA_F_PERMANENT); + ifp = ipv6_add_addr(idev, addr, NULL, plen, scope, IFA_F_PERMANENT); if (!IS_ERR(ifp)) { spin_lock_bh(&ifp->lock); ifp->flags &= ~IFA_F_TENTATIVE; @@ -2683,7 +2685,7 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr #endif - ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags); + ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags); if (!IS_ERR(ifp)) { addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); addrconf_dad_start(ifp); -- cgit v1.2.3-18-g5258 From 8a226b2cfa776db6011fc84b71578513161cd3d3 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 1 Aug 2013 10:41:28 +0200 Subject: ipv6: prevent race between address creation and removal There's a race in IPv6 automatic addess assignment. The address is created with zero lifetime when it's added to various address lists. Before it gets assigned the correct lifetime, there's a window where a new address may be configured. This causes the semi-initiated address to be deleted in addrconf_verify. This was discovered as a reference leak caused by concurrent run of __ipv6_ifa_notify for both RTM_NEWADDR and RTM_DELADDR with the same address. Fix this by setting the lifetime before the address is added to inet6_addr_lst. A few notes: 1. In addrconf_prefix_rcv, by setting update_lft to zero, the if (update_lft) { ... } condition is no longer executed for newly created addresses. This is okay, as the ifp fields are set in ipv6_add_addr now and ipv6_ifa_notify is called (and has been called) through addrconf_dad_start. 2. The removal of the whole block under ifp->lock in inet6_addr_add is okay, too, as tstamp is initialized to jiffies in ipv6_add_addr. Signed-off-by: Jiri Benc Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a0ce957fb67..da4241c8c7d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -815,7 +815,7 @@ static u32 inet6_addr_hash(const struct in6_addr *addr) static struct inet6_ifaddr * ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, const struct in6_addr *peer_addr, int pfxlen, - int scope, u32 flags) + int scope, u32 flags, u32 valid_lft, u32 prefered_lft) { struct inet6_ifaddr *ifa = NULL; struct rt6_info *rt; @@ -875,6 +875,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, ifa->scope = scope; ifa->prefix_len = pfxlen; ifa->flags = flags | IFA_F_TENTATIVE; + ifa->valid_lft = valid_lft; + ifa->prefered_lft = prefered_lft; ifa->cstamp = ifa->tstamp = jiffies; ifa->tokenized = false; @@ -1127,7 +1129,8 @@ retry: ift = !max_addresses || ipv6_count_addresses(idev) < max_addresses ? ipv6_add_addr(idev, &addr, NULL, tmp_plen, - ipv6_addr_scope(&addr), addr_flags) : NULL; + ipv6_addr_scope(&addr), addr_flags, + tmp_valid_lft, tmp_prefered_lft) : NULL; if (IS_ERR_OR_NULL(ift)) { in6_ifa_put(ifp); in6_dev_put(idev); @@ -1139,8 +1142,6 @@ retry: spin_lock_bh(&ift->lock); ift->ifpub = ifp; - ift->valid_lft = tmp_valid_lft; - ift->prefered_lft = tmp_prefered_lft; ift->cstamp = now; ift->tstamp = tmp_tstamp; spin_unlock_bh(&ift->lock); @@ -2185,14 +2186,16 @@ ok: ifp = ipv6_add_addr(in6_dev, &addr, NULL, pinfo->prefix_len, addr_type&IPV6_ADDR_SCOPE_MASK, - addr_flags); + addr_flags, valid_lft, + prefered_lft); if (IS_ERR_OR_NULL(ifp)) { in6_dev_put(in6_dev); return; } - update_lft = create = 1; + update_lft = 0; + create = 1; ifp->cstamp = jiffies; ifp->tokenized = tokenized; addrconf_dad_start(ifp); @@ -2213,7 +2216,7 @@ ok: stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; else stored_lft = 0; - if (!update_lft && stored_lft) { + if (!update_lft && !create && stored_lft) { if (valid_lft > MIN_VALID_LIFETIME || valid_lft > stored_lft) update_lft = 1; @@ -2459,15 +2462,10 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p prefered_lft = timeout; } - ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags); + ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags, + valid_lft, prefered_lft); if (!IS_ERR(ifp)) { - spin_lock_bh(&ifp->lock); - ifp->valid_lft = valid_lft; - ifp->prefered_lft = prefered_lft; - ifp->tstamp = jiffies; - spin_unlock_bh(&ifp->lock); - addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, expires, flags); /* @@ -2559,7 +2557,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, { struct inet6_ifaddr *ifp; - ifp = ipv6_add_addr(idev, addr, NULL, plen, scope, IFA_F_PERMANENT); + ifp = ipv6_add_addr(idev, addr, NULL, plen, + scope, IFA_F_PERMANENT, 0, 0); if (!IS_ERR(ifp)) { spin_lock_bh(&ifp->lock); ifp->flags &= ~IFA_F_TENTATIVE; @@ -2685,7 +2684,7 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr #endif - ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags); + ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, 0, 0); if (!IS_ERR(ifp)) { addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); addrconf_dad_start(ifp); -- cgit v1.2.3-18-g5258 From e0d1095ae3405404d247afb00233ef837d58da83 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 1 Aug 2013 11:10:25 +0800 Subject: net: rename CONFIG_NET_LL_RX_POLL to CONFIG_NET_RX_BUSY_POLL Eliezer renames several *ll_poll to *busy_poll, but forgets CONFIG_NET_LL_RX_POLL, so in case of confusion, rename it too. Cc: Eliezer Tamir Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/Kconfig | 2 +- net/core/skbuff.c | 2 +- net/core/sock.c | 6 +++--- net/core/sysctl_net_core.c | 2 +- net/socket.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index 37702491abe..2b406608a1a 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -244,7 +244,7 @@ config NETPRIO_CGROUP Cgroup subsystem for use in assigning processes to network priorities on a per-interface basis -config NET_LL_RX_POLL +config NET_RX_BUSY_POLL boolean default y diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3df4d4ccf44..2c3d0f53d19 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -740,7 +740,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) skb_copy_secmark(new, old); -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL new->napi_id = old->napi_id; #endif } diff --git a/net/core/sock.c b/net/core/sock.c index 548d716c5f6..2c097c5a35d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -900,7 +900,7 @@ set_rcvbuf: sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); break; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL case SO_BUSY_POLL: /* allow unprivileged users to decrease the value */ if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN)) @@ -1170,7 +1170,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); break; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL case SO_BUSY_POLL: v.val = sk->sk_ll_usec; break; @@ -2292,7 +2292,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp = ktime_set(-1L, 0); -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL sk->sk_napi_id = 0; sk->sk_ll_usec = sysctl_net_busy_read; #endif diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 66096861663..b59b6804fd9 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -298,7 +298,7 @@ static struct ctl_table net_core_table[] = { .proc_handler = flow_limit_table_len_sysctl }, #endif /* CONFIG_NET_FLOW_LIMIT */ -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL { .procname = "busy_poll", .data = &sysctl_net_busy_poll, diff --git a/net/socket.c b/net/socket.c index 829b460acb8..b2d7c629eeb 100644 --- a/net/socket.c +++ b/net/socket.c @@ -106,7 +106,7 @@ #include #include -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sysctl_net_busy_read __read_mostly; unsigned int sysctl_net_busy_poll __read_mostly; #endif -- cgit v1.2.3-18-g5258 From c756891a4e1c08c43780e17aca1d2b849ef31d1a Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 1 Aug 2013 08:29:18 -0400 Subject: tipc: fix oops when creating server socket fails When creation of TIPC internal server socket fails, we get an oops with the following dump: BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 IP: [] tipc_close_conn+0x59/0xb0 [tipc] PGD 13719067 PUD 12008067 PMD 0 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC Modules linked in: tipc(+) CPU: 4 PID: 4340 Comm: insmod Not tainted 3.10.0+ #1 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 task: ffff880014360000 ti: ffff88001374c000 task.ti: ffff88001374c000 RIP: 0010:[] [] tipc_close_conn+0x59/0xb0 [tipc] RSP: 0018:ffff88001374dc98 EFLAGS: 00010292 RAX: 0000000000000000 RBX: ffff880012ac09d8 RCX: 0000000000000000 RDX: 0000000000000046 RSI: 0000000000000001 RDI: ffff880014360000 RBP: ffff88001374dcb8 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffa0016fa0 R13: ffffffffa0017010 R14: ffffffffa0017010 R15: ffff880012ac09d8 FS: 0000000000000000(0000) GS:ffff880016600000(0063) knlGS:00000000f76668d0 CS: 0010 DS: 002b ES: 002b CR0: 000000008005003b CR2: 0000000000000020 CR3: 0000000012227000 CR4: 00000000000006e0 Stack: ffff88001374dcb8 ffffffffa0016fa0 0000000000000000 0000000000000001 ffff88001374dcf8 ffffffffa0012922 ffff88001374dce8 00000000ffffffea ffffffffa0017100 0000000000000000 ffff8800134241a8 ffffffffa0017150 Call Trace: [] tipc_server_stop+0xa2/0x1b0 [tipc] [] tipc_subscr_stop+0x15/0x20 [tipc] [] tipc_core_stop+0x1d/0x33 [tipc] [] tipc_init+0xd4/0xf8 [tipc] [] ? 0xffffffffa001efff [] do_one_initcall+0x3f/0x150 [] ? __blocking_notifier_call_chain+0x7d/0xd0 [] load_module+0x11aa/0x19c0 [] ? show_initstate+0x50/0x50 [] ? retint_restore_args+0xe/0xe [] SyS_init_module+0xd9/0x110 [] sysenter_dispatch+0x7/0x1f Code: 6c 24 70 4c 89 ef e8 b7 04 8f e1 8b 73 04 4c 89 e7 e8 7c 9e 32 e1 41 83 ac 24 b8 00 00 00 01 4c 89 ef e8 eb 0a 8f e1 48 8b 43 08 <4c> 8b 68 20 4d 8d a5 48 03 00 00 4c 89 e7 e8 04 05 8f e1 4c 89 RIP [] tipc_close_conn+0x59/0xb0 [tipc] RSP CR2: 0000000000000020 ---[ end trace b02321f40e4269a3 ]--- We have the following call chain: tipc_core_start() ret = tipc_subscr_start() ret = tipc_server_start(){ server->enabled = 1; ret = tipc_open_listening_sock() } I.e., the server->enabled flag is unconditionally set to 1, whatever the return value of tipc_open_listening_sock(). This causes a crash when tipc_core_start() tries to clean up resources after a failed initialization: if (ret == failed) tipc_subscr_stop() tipc_server_stop(){ if (server->enabled) tipc_close_conn(){ NULL reference of con->sock-sk OOPS! } } To avoid this, tipc_server_start() should only set server->enabled to 1 in case of a succesful socket creation. In case of failure, it should release all allocated resources before returning. Problem introduced in commit c5fa7b3cf3cb22e4ac60485fc2dc187fe012910f ("tipc: introduce new TIPC server infrastructure") in v3.11-rc1. Note that it won't be seen often; it takes a module load under memory constrained conditions in order to trigger the failure condition. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/server.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/tipc/server.c b/net/tipc/server.c index 19da5abe0fa..fd3fa57a410 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -355,8 +355,12 @@ static int tipc_open_listening_sock(struct tipc_server *s) return PTR_ERR(con); sock = tipc_create_listen_sock(con); - if (!sock) + if (!sock) { + idr_remove(&s->conn_idr, con->conid); + s->idr_in_use--; + kfree(con); return -EINVAL; + } tipc_register_callbacks(sock, con); return 0; @@ -563,9 +567,14 @@ int tipc_server_start(struct tipc_server *s) kmem_cache_destroy(s->rcvbuf_cache); return ret; } + ret = tipc_open_listening_sock(s); + if (ret < 0) { + tipc_work_stop(s); + kmem_cache_destroy(s->rcvbuf_cache); + return ret; + } s->enabled = 1; - - return tipc_open_listening_sock(s); + return ret; } void tipc_server_stop(struct tipc_server *s) -- cgit v1.2.3-18-g5258 From cbd375567f7e4811b1c721f75ec519828ac6583f Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 1 Aug 2013 22:32:07 -0700 Subject: htb: fix sign extension bug When userspace passes a large priority value the assignment of the unsigned value hopt->prio to signed int cl->prio causes cl->prio to become negative and the comparison is with TC_HTB_NUMPRIO is always false. The result is that HTB crashes by referencing outside the array when processing packets. With this patch the large value wraps around like other values outside the normal range. See: https://bugzilla.kernel.org/show_bug.cgi?id=60669 Signed-off-by: Stephen Hemminger Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index c2124ea29f4..45e751527df 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -100,7 +100,7 @@ struct htb_class { struct psched_ratecfg ceil; s64 buffer, cbuffer;/* token bucket depth/rate */ s64 mbuffer; /* max wait time */ - int prio; /* these two are used only by leaves... */ + u32 prio; /* these two are used only by leaves... */ int quantum; /* but stored for parent-to-leaf return */ struct tcf_proto *filter_list; /* class attached filters */ -- cgit v1.2.3-18-g5258 From 446266b0c742a2c9ee8f0dce759a0117bce58a86 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 2 Aug 2013 11:32:43 +0200 Subject: net: rtm_to_ifaddr: free ifa if ifa_cacheinfo processing fails Commit 5c766d642 ("ipv4: introduce address lifetime") leaves the ifa resource that was allocated via inet_alloc_ifa() unfreed when returning the function with -EINVAL. Thus, free it first via inet_free_ifa(). Signed-off-by: Daniel Borkmann Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 8d48c392adc..34ca6d5a3a4 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -772,7 +772,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, ci = nla_data(tb[IFA_CACHEINFO]); if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { err = -EINVAL; - goto errout; + goto errout_free; } *pvalid_lft = ci->ifa_valid; *pprefered_lft = ci->ifa_prefered; @@ -780,6 +780,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, return ifa; +errout_free: + inet_free_ifa(ifa); errout: return ERR_PTR(err); } -- cgit v1.2.3-18-g5258 From 5f671d6b4ec3e6d66c2a868738af2cdea09e7509 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 2 Aug 2013 18:36:40 +0400 Subject: net: check net.core.somaxconn sysctl values It's possible to assign an invalid value to the net.core.somaxconn sysctl variable, because there is no checks at all. The sk_max_ack_backlog field of the sock structure is defined as unsigned short. Therefore, the backlog argument in inet_listen() shouldn't exceed USHRT_MAX. The backlog argument in the listen() syscall is truncated to the somaxconn value. So, the somaxconn value shouldn't exceed 65535 (USHRT_MAX). Also, negative values of somaxconn are meaningless. before: $ sysctl -w net.core.somaxconn=256 net.core.somaxconn = 256 $ sysctl -w net.core.somaxconn=65536 net.core.somaxconn = 65536 $ sysctl -w net.core.somaxconn=-100 net.core.somaxconn = -100 after: $ sysctl -w net.core.somaxconn=256 net.core.somaxconn = 256 $ sysctl -w net.core.somaxconn=65536 error: "Invalid argument" setting key "net.core.somaxconn" $ sysctl -w net.core.somaxconn=-100 error: "Invalid argument" setting key "net.core.somaxconn" Based on a prior patch from Changli Gao. Signed-off-by: Roman Gushchin Reported-by: Changli Gao Suggested-by: Eric Dumazet Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sysctl_net_core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index b59b6804fd9..31107abd278 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -21,7 +21,9 @@ #include #include +static int zero = 0; static int one = 1; +static int ushort_max = USHRT_MAX; #ifdef CONFIG_RPS static int rps_sock_flow_sysctl(struct ctl_table *table, int write, @@ -339,7 +341,9 @@ static struct ctl_table netns_core_table[] = { .data = &init_net.core.sysctl_somaxconn, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .extra1 = &zero, + .extra2 = &ushort_max, + .proc_handler = proc_dointvec_minmax }, { } }; -- cgit v1.2.3-18-g5258 From 6a8b7f0c85f1f42eb8b6e68ef3d5ba8020d8e272 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 2 Aug 2013 14:45:08 -0400 Subject: netlabel: use domain based selectors when address based selectors are not available NetLabel has the ability to selectively assign network security labels to outbound traffic based on either the LSM's "domain" (different for each LSM), the network destination, or a combination of both. Depending on the type of traffic, local or forwarded, and the type of traffic selector, domain or address based, different hooks are used to label the traffic; the goal being minimal overhead. Unfortunately, there is a bug such that a system using NetLabel domain based traffic selectors does not correctly label outbound local traffic that is not assigned to a socket. The issue is that in these cases the associated NetLabel hook only looks at the address based selectors and not the domain based selectors. This patch corrects this by checking both the domain and address based selectors so that the correct labeling is applied, regardless of the configuration type. In order to acomplish this fix, this patch also simplifies some of the NetLabel domainhash structures to use a more common outbound traffic mapping type: struct netlbl_dommap_def. This simplifies some of the code in this patch and paves the way for further simplifications in the future. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/netlabel/netlabel_cipso_v4.c | 4 +- net/netlabel/netlabel_domainhash.c | 104 +++++++++++++++++-------------------- net/netlabel/netlabel_domainhash.h | 46 ++++++++-------- net/netlabel/netlabel_kapi.c | 88 +++++++++++++------------------ net/netlabel/netlabel_mgmt.c | 44 ++++++++-------- net/netlabel/netlabel_unlabeled.c | 2 +- 6 files changed, 130 insertions(+), 158 deletions(-) (limited to 'net') diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index c15042f987b..a1100640495 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -691,8 +691,8 @@ static int netlbl_cipsov4_remove_cb(struct netlbl_dom_map *entry, void *arg) { struct netlbl_domhsh_walk_arg *cb_arg = arg; - if (entry->type == NETLBL_NLTYPE_CIPSOV4 && - entry->type_def.cipsov4->doi == cb_arg->doi) + if (entry->def.type == NETLBL_NLTYPE_CIPSOV4 && + entry->def.cipso->doi == cb_arg->doi) return netlbl_domhsh_remove_entry(entry, cb_arg->audit_info); return 0; diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 6bb1d42f0fa..85d842e6e43 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -84,15 +84,15 @@ static void netlbl_domhsh_free_entry(struct rcu_head *entry) #endif /* IPv6 */ ptr = container_of(entry, struct netlbl_dom_map, rcu); - if (ptr->type == NETLBL_NLTYPE_ADDRSELECT) { + if (ptr->def.type == NETLBL_NLTYPE_ADDRSELECT) { netlbl_af4list_foreach_safe(iter4, tmp4, - &ptr->type_def.addrsel->list4) { + &ptr->def.addrsel->list4) { netlbl_af4list_remove_entry(iter4); kfree(netlbl_domhsh_addr4_entry(iter4)); } #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_safe(iter6, tmp6, - &ptr->type_def.addrsel->list6) { + &ptr->def.addrsel->list6) { netlbl_af6list_remove_entry(iter6); kfree(netlbl_domhsh_addr6_entry(iter6)); } @@ -213,21 +213,21 @@ static void netlbl_domhsh_audit_add(struct netlbl_dom_map *entry, if (addr4 != NULL) { struct netlbl_domaddr4_map *map4; map4 = netlbl_domhsh_addr4_entry(addr4); - type = map4->type; - cipsov4 = map4->type_def.cipsov4; + type = map4->def.type; + cipsov4 = map4->def.cipso; netlbl_af4list_audit_addr(audit_buf, 0, NULL, addr4->addr, addr4->mask); #if IS_ENABLED(CONFIG_IPV6) } else if (addr6 != NULL) { struct netlbl_domaddr6_map *map6; map6 = netlbl_domhsh_addr6_entry(addr6); - type = map6->type; + type = map6->def.type; netlbl_af6list_audit_addr(audit_buf, 0, NULL, &addr6->addr, &addr6->mask); #endif /* IPv6 */ } else { - type = entry->type; - cipsov4 = entry->type_def.cipsov4; + type = entry->def.type; + cipsov4 = entry->def.cipso; } switch (type) { case NETLBL_NLTYPE_UNLABELED: @@ -265,26 +265,25 @@ static int netlbl_domhsh_validate(const struct netlbl_dom_map *entry) if (entry == NULL) return -EINVAL; - switch (entry->type) { + switch (entry->def.type) { case NETLBL_NLTYPE_UNLABELED: - if (entry->type_def.cipsov4 != NULL || - entry->type_def.addrsel != NULL) + if (entry->def.cipso != NULL || entry->def.addrsel != NULL) return -EINVAL; break; case NETLBL_NLTYPE_CIPSOV4: - if (entry->type_def.cipsov4 == NULL) + if (entry->def.cipso == NULL) return -EINVAL; break; case NETLBL_NLTYPE_ADDRSELECT: - netlbl_af4list_foreach(iter4, &entry->type_def.addrsel->list4) { + netlbl_af4list_foreach(iter4, &entry->def.addrsel->list4) { map4 = netlbl_domhsh_addr4_entry(iter4); - switch (map4->type) { + switch (map4->def.type) { case NETLBL_NLTYPE_UNLABELED: - if (map4->type_def.cipsov4 != NULL) + if (map4->def.cipso != NULL) return -EINVAL; break; case NETLBL_NLTYPE_CIPSOV4: - if (map4->type_def.cipsov4 == NULL) + if (map4->def.cipso == NULL) return -EINVAL; break; default: @@ -292,9 +291,9 @@ static int netlbl_domhsh_validate(const struct netlbl_dom_map *entry) } } #if IS_ENABLED(CONFIG_IPV6) - netlbl_af6list_foreach(iter6, &entry->type_def.addrsel->list6) { + netlbl_af6list_foreach(iter6, &entry->def.addrsel->list6) { map6 = netlbl_domhsh_addr6_entry(iter6); - switch (map6->type) { + switch (map6->def.type) { case NETLBL_NLTYPE_UNLABELED: break; default: @@ -402,32 +401,31 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, rcu_assign_pointer(netlbl_domhsh_def, entry); } - if (entry->type == NETLBL_NLTYPE_ADDRSELECT) { + if (entry->def.type == NETLBL_NLTYPE_ADDRSELECT) { netlbl_af4list_foreach_rcu(iter4, - &entry->type_def.addrsel->list4) + &entry->def.addrsel->list4) netlbl_domhsh_audit_add(entry, iter4, NULL, ret_val, audit_info); #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(iter6, - &entry->type_def.addrsel->list6) + &entry->def.addrsel->list6) netlbl_domhsh_audit_add(entry, NULL, iter6, ret_val, audit_info); #endif /* IPv6 */ } else netlbl_domhsh_audit_add(entry, NULL, NULL, ret_val, audit_info); - } else if (entry_old->type == NETLBL_NLTYPE_ADDRSELECT && - entry->type == NETLBL_NLTYPE_ADDRSELECT) { + } else if (entry_old->def.type == NETLBL_NLTYPE_ADDRSELECT && + entry->def.type == NETLBL_NLTYPE_ADDRSELECT) { struct list_head *old_list4; struct list_head *old_list6; - old_list4 = &entry_old->type_def.addrsel->list4; - old_list6 = &entry_old->type_def.addrsel->list6; + old_list4 = &entry_old->def.addrsel->list4; + old_list6 = &entry_old->def.addrsel->list6; /* we only allow the addition of address selectors if all of * the selectors do not exist in the existing domain map */ - netlbl_af4list_foreach_rcu(iter4, - &entry->type_def.addrsel->list4) + netlbl_af4list_foreach_rcu(iter4, &entry->def.addrsel->list4) if (netlbl_af4list_search_exact(iter4->addr, iter4->mask, old_list4)) { @@ -435,8 +433,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, goto add_return; } #if IS_ENABLED(CONFIG_IPV6) - netlbl_af6list_foreach_rcu(iter6, - &entry->type_def.addrsel->list6) + netlbl_af6list_foreach_rcu(iter6, &entry->def.addrsel->list6) if (netlbl_af6list_search_exact(&iter6->addr, &iter6->mask, old_list6)) { @@ -446,7 +443,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, #endif /* IPv6 */ netlbl_af4list_foreach_safe(iter4, tmp4, - &entry->type_def.addrsel->list4) { + &entry->def.addrsel->list4) { netlbl_af4list_remove_entry(iter4); iter4->valid = 1; ret_val = netlbl_af4list_add(iter4, old_list4); @@ -457,7 +454,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, } #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_safe(iter6, tmp6, - &entry->type_def.addrsel->list6) { + &entry->def.addrsel->list6) { netlbl_af6list_remove_entry(iter6); iter6->valid = 1; ret_val = netlbl_af6list_add(iter6, old_list6); @@ -538,18 +535,18 @@ int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, struct netlbl_af4list *iter4; struct netlbl_domaddr4_map *map4; - switch (entry->type) { + switch (entry->def.type) { case NETLBL_NLTYPE_ADDRSELECT: netlbl_af4list_foreach_rcu(iter4, - &entry->type_def.addrsel->list4) { + &entry->def.addrsel->list4) { map4 = netlbl_domhsh_addr4_entry(iter4); - cipso_v4_doi_putdef(map4->type_def.cipsov4); + cipso_v4_doi_putdef(map4->def.cipso); } /* no need to check the IPv6 list since we currently * support only unlabeled protocols for IPv6 */ break; case NETLBL_NLTYPE_CIPSOV4: - cipso_v4_doi_putdef(entry->type_def.cipsov4); + cipso_v4_doi_putdef(entry->def.cipso); break; } call_rcu(&entry->rcu, netlbl_domhsh_free_entry); @@ -590,20 +587,21 @@ int netlbl_domhsh_remove_af4(const char *domain, entry_map = netlbl_domhsh_search(domain); else entry_map = netlbl_domhsh_search_def(domain); - if (entry_map == NULL || entry_map->type != NETLBL_NLTYPE_ADDRSELECT) + if (entry_map == NULL || + entry_map->def.type != NETLBL_NLTYPE_ADDRSELECT) goto remove_af4_failure; spin_lock(&netlbl_domhsh_lock); entry_addr = netlbl_af4list_remove(addr->s_addr, mask->s_addr, - &entry_map->type_def.addrsel->list4); + &entry_map->def.addrsel->list4); spin_unlock(&netlbl_domhsh_lock); if (entry_addr == NULL) goto remove_af4_failure; - netlbl_af4list_foreach_rcu(iter4, &entry_map->type_def.addrsel->list4) + netlbl_af4list_foreach_rcu(iter4, &entry_map->def.addrsel->list4) goto remove_af4_single_addr; #if IS_ENABLED(CONFIG_IPV6) - netlbl_af6list_foreach_rcu(iter6, &entry_map->type_def.addrsel->list6) + netlbl_af6list_foreach_rcu(iter6, &entry_map->def.addrsel->list6) goto remove_af4_single_addr; #endif /* IPv6 */ /* the domain mapping is empty so remove it from the mapping table */ @@ -616,7 +614,7 @@ remove_af4_single_addr: * shouldn't be a problem */ synchronize_rcu(); entry = netlbl_domhsh_addr4_entry(entry_addr); - cipso_v4_doi_putdef(entry->type_def.cipsov4); + cipso_v4_doi_putdef(entry->def.cipso); kfree(entry); return 0; @@ -693,8 +691,8 @@ struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain) * responsible for ensuring that rcu_read_[un]lock() is called. * */ -struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, - __be32 addr) +struct netlbl_dommap_def *netlbl_domhsh_getentry_af4(const char *domain, + __be32 addr) { struct netlbl_dom_map *dom_iter; struct netlbl_af4list *addr_iter; @@ -702,15 +700,13 @@ struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, dom_iter = netlbl_domhsh_search_def(domain); if (dom_iter == NULL) return NULL; - if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT) - return NULL; - addr_iter = netlbl_af4list_search(addr, - &dom_iter->type_def.addrsel->list4); + if (dom_iter->def.type != NETLBL_NLTYPE_ADDRSELECT) + return &dom_iter->def; + addr_iter = netlbl_af4list_search(addr, &dom_iter->def.addrsel->list4); if (addr_iter == NULL) return NULL; - - return netlbl_domhsh_addr4_entry(addr_iter); + return &(netlbl_domhsh_addr4_entry(addr_iter)->def); } #if IS_ENABLED(CONFIG_IPV6) @@ -725,7 +721,7 @@ struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, * responsible for ensuring that rcu_read_[un]lock() is called. * */ -struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, +struct netlbl_dommap_def *netlbl_domhsh_getentry_af6(const char *domain, const struct in6_addr *addr) { struct netlbl_dom_map *dom_iter; @@ -734,15 +730,13 @@ struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, dom_iter = netlbl_domhsh_search_def(domain); if (dom_iter == NULL) return NULL; - if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT) - return NULL; - addr_iter = netlbl_af6list_search(addr, - &dom_iter->type_def.addrsel->list6); + if (dom_iter->def.type != NETLBL_NLTYPE_ADDRSELECT) + return &dom_iter->def; + addr_iter = netlbl_af6list_search(addr, &dom_iter->def.addrsel->list6); if (addr_iter == NULL) return NULL; - - return netlbl_domhsh_addr6_entry(addr_iter); + return &(netlbl_domhsh_addr6_entry(addr_iter)->def); } #endif /* IPv6 */ diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 90872c4ca30..b9be0eed898 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -43,37 +43,35 @@ #define NETLBL_DOMHSH_BITSIZE 7 /* Domain mapping definition structures */ +struct netlbl_domaddr_map { + struct list_head list4; + struct list_head list6; +}; +struct netlbl_dommap_def { + u32 type; + union { + struct netlbl_domaddr_map *addrsel; + struct cipso_v4_doi *cipso; + }; +}; #define netlbl_domhsh_addr4_entry(iter) \ container_of(iter, struct netlbl_domaddr4_map, list) struct netlbl_domaddr4_map { - u32 type; - union { - struct cipso_v4_doi *cipsov4; - } type_def; + struct netlbl_dommap_def def; struct netlbl_af4list list; }; #define netlbl_domhsh_addr6_entry(iter) \ container_of(iter, struct netlbl_domaddr6_map, list) struct netlbl_domaddr6_map { - u32 type; - - /* NOTE: no 'type_def' union needed at present since we don't currently - * support any IPv6 labeling protocols */ + struct netlbl_dommap_def def; struct netlbl_af6list list; }; -struct netlbl_domaddr_map { - struct list_head list4; - struct list_head list6; -}; + struct netlbl_dom_map { char *domain; - u32 type; - union { - struct cipso_v4_doi *cipsov4; - struct netlbl_domaddr_map *addrsel; - } type_def; + struct netlbl_dommap_def def; u32 valid; struct list_head list; @@ -97,16 +95,16 @@ int netlbl_domhsh_remove_af4(const char *domain, int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info); int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info); struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); -struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, - __be32 addr); +struct netlbl_dommap_def *netlbl_domhsh_getentry_af4(const char *domain, + __be32 addr); +#if IS_ENABLED(CONFIG_IPV6) +struct netlbl_dommap_def *netlbl_domhsh_getentry_af6(const char *domain, + const struct in6_addr *addr); +#endif /* IPv6 */ + int netlbl_domhsh_walk(u32 *skip_bkt, u32 *skip_chain, int (*callback) (struct netlbl_dom_map *entry, void *arg), void *cb_arg); -#if IS_ENABLED(CONFIG_IPV6) -struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, - const struct in6_addr *addr); -#endif /* IPv6 */ - #endif diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 7c94aedd091..96a458e12f6 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -122,7 +122,7 @@ int netlbl_cfg_unlbl_map_add(const char *domain, } if (addr == NULL && mask == NULL) - entry->type = NETLBL_NLTYPE_UNLABELED; + entry->def.type = NETLBL_NLTYPE_UNLABELED; else if (addr != NULL && mask != NULL) { addrmap = kzalloc(sizeof(*addrmap), GFP_ATOMIC); if (addrmap == NULL) @@ -137,7 +137,7 @@ int netlbl_cfg_unlbl_map_add(const char *domain, map4 = kzalloc(sizeof(*map4), GFP_ATOMIC); if (map4 == NULL) goto cfg_unlbl_map_add_failure; - map4->type = NETLBL_NLTYPE_UNLABELED; + map4->def.type = NETLBL_NLTYPE_UNLABELED; map4->list.addr = addr4->s_addr & mask4->s_addr; map4->list.mask = mask4->s_addr; map4->list.valid = 1; @@ -154,7 +154,7 @@ int netlbl_cfg_unlbl_map_add(const char *domain, map6 = kzalloc(sizeof(*map6), GFP_ATOMIC); if (map6 == NULL) goto cfg_unlbl_map_add_failure; - map6->type = NETLBL_NLTYPE_UNLABELED; + map6->def.type = NETLBL_NLTYPE_UNLABELED; map6->list.addr = *addr6; map6->list.addr.s6_addr32[0] &= mask6->s6_addr32[0]; map6->list.addr.s6_addr32[1] &= mask6->s6_addr32[1]; @@ -174,8 +174,8 @@ int netlbl_cfg_unlbl_map_add(const char *domain, break; } - entry->type_def.addrsel = addrmap; - entry->type = NETLBL_NLTYPE_ADDRSELECT; + entry->def.addrsel = addrmap; + entry->def.type = NETLBL_NLTYPE_ADDRSELECT; } else { ret_val = -EINVAL; goto cfg_unlbl_map_add_failure; @@ -355,8 +355,8 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, } if (addr == NULL && mask == NULL) { - entry->type_def.cipsov4 = doi_def; - entry->type = NETLBL_NLTYPE_CIPSOV4; + entry->def.cipso = doi_def; + entry->def.type = NETLBL_NLTYPE_CIPSOV4; } else if (addr != NULL && mask != NULL) { addrmap = kzalloc(sizeof(*addrmap), GFP_ATOMIC); if (addrmap == NULL) @@ -367,8 +367,8 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, addrinfo = kzalloc(sizeof(*addrinfo), GFP_ATOMIC); if (addrinfo == NULL) goto out_addrinfo; - addrinfo->type_def.cipsov4 = doi_def; - addrinfo->type = NETLBL_NLTYPE_CIPSOV4; + addrinfo->def.cipso = doi_def; + addrinfo->def.type = NETLBL_NLTYPE_CIPSOV4; addrinfo->list.addr = addr->s_addr & mask->s_addr; addrinfo->list.mask = mask->s_addr; addrinfo->list.valid = 1; @@ -376,8 +376,8 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, if (ret_val != 0) goto cfg_cipsov4_map_add_failure; - entry->type_def.addrsel = addrmap; - entry->type = NETLBL_NLTYPE_ADDRSELECT; + entry->def.addrsel = addrmap; + entry->def.type = NETLBL_NLTYPE_ADDRSELECT; } else { ret_val = -EINVAL; goto out_addrmap; @@ -657,14 +657,14 @@ int netlbl_sock_setattr(struct sock *sk, } switch (family) { case AF_INET: - switch (dom_entry->type) { + switch (dom_entry->def.type) { case NETLBL_NLTYPE_ADDRSELECT: ret_val = -EDESTADDRREQ; break; case NETLBL_NLTYPE_CIPSOV4: ret_val = cipso_v4_sock_setattr(sk, - dom_entry->type_def.cipsov4, - secattr); + dom_entry->def.cipso, + secattr); break; case NETLBL_NLTYPE_UNLABELED: ret_val = 0; @@ -754,23 +754,22 @@ int netlbl_conn_setattr(struct sock *sk, { int ret_val; struct sockaddr_in *addr4; - struct netlbl_domaddr4_map *af4_entry; + struct netlbl_dommap_def *entry; rcu_read_lock(); switch (addr->sa_family) { case AF_INET: addr4 = (struct sockaddr_in *)addr; - af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, - addr4->sin_addr.s_addr); - if (af4_entry == NULL) { + entry = netlbl_domhsh_getentry_af4(secattr->domain, + addr4->sin_addr.s_addr); + if (entry == NULL) { ret_val = -ENOENT; goto conn_setattr_return; } - switch (af4_entry->type) { + switch (entry->type) { case NETLBL_NLTYPE_CIPSOV4: ret_val = cipso_v4_sock_setattr(sk, - af4_entry->type_def.cipsov4, - secattr); + entry->cipso, secattr); break; case NETLBL_NLTYPE_UNLABELED: /* just delete the protocols we support for right now @@ -812,36 +811,21 @@ int netlbl_req_setattr(struct request_sock *req, const struct netlbl_lsm_secattr *secattr) { int ret_val; - struct netlbl_dom_map *dom_entry; - struct netlbl_domaddr4_map *af4_entry; - u32 proto_type; - struct cipso_v4_doi *proto_cv4; + struct netlbl_dommap_def *entry; rcu_read_lock(); - dom_entry = netlbl_domhsh_getentry(secattr->domain); - if (dom_entry == NULL) { - ret_val = -ENOENT; - goto req_setattr_return; - } switch (req->rsk_ops->family) { case AF_INET: - if (dom_entry->type == NETLBL_NLTYPE_ADDRSELECT) { - struct inet_request_sock *req_inet = inet_rsk(req); - af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, - req_inet->rmt_addr); - if (af4_entry == NULL) { - ret_val = -ENOENT; - goto req_setattr_return; - } - proto_type = af4_entry->type; - proto_cv4 = af4_entry->type_def.cipsov4; - } else { - proto_type = dom_entry->type; - proto_cv4 = dom_entry->type_def.cipsov4; + entry = netlbl_domhsh_getentry_af4(secattr->domain, + inet_rsk(req)->rmt_addr); + if (entry == NULL) { + ret_val = -ENOENT; + goto req_setattr_return; } - switch (proto_type) { + switch (entry->type) { case NETLBL_NLTYPE_CIPSOV4: - ret_val = cipso_v4_req_setattr(req, proto_cv4, secattr); + ret_val = cipso_v4_req_setattr(req, + entry->cipso, secattr); break; case NETLBL_NLTYPE_UNLABELED: /* just delete the protocols we support for right now @@ -899,23 +883,21 @@ int netlbl_skbuff_setattr(struct sk_buff *skb, { int ret_val; struct iphdr *hdr4; - struct netlbl_domaddr4_map *af4_entry; + struct netlbl_dommap_def *entry; rcu_read_lock(); switch (family) { case AF_INET: hdr4 = ip_hdr(skb); - af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, - hdr4->daddr); - if (af4_entry == NULL) { + entry = netlbl_domhsh_getentry_af4(secattr->domain,hdr4->daddr); + if (entry == NULL) { ret_val = -ENOENT; goto skbuff_setattr_return; } - switch (af4_entry->type) { + switch (entry->type) { case NETLBL_NLTYPE_CIPSOV4: - ret_val = cipso_v4_skbuff_setattr(skb, - af4_entry->type_def.cipsov4, - secattr); + ret_val = cipso_v4_skbuff_setattr(skb, entry->cipso, + secattr); break; case NETLBL_NLTYPE_UNLABELED: /* just delete the protocols we support for right now diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index c5384ffc614..dd1c37d7acb 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -104,7 +104,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, ret_val = -ENOMEM; goto add_failure; } - entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); + entry->def.type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); if (info->attrs[NLBL_MGMT_A_DOMAIN]) { size_t tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]); entry->domain = kmalloc(tmp_size, GFP_KERNEL); @@ -116,12 +116,12 @@ static int netlbl_mgmt_add_common(struct genl_info *info, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size); } - /* NOTE: internally we allow/use a entry->type value of + /* NOTE: internally we allow/use a entry->def.type value of * NETLBL_NLTYPE_ADDRSELECT but we don't currently allow users * to pass that as a protocol value because we need to know the * "real" protocol */ - switch (entry->type) { + switch (entry->def.type) { case NETLBL_NLTYPE_UNLABELED: break; case NETLBL_NLTYPE_CIPSOV4: @@ -132,7 +132,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, cipsov4 = cipso_v4_doi_getdef(tmp_val); if (cipsov4 == NULL) goto add_failure; - entry->type_def.cipsov4 = cipsov4; + entry->def.cipso = cipsov4; break; default: goto add_failure; @@ -172,9 +172,9 @@ static int netlbl_mgmt_add_common(struct genl_info *info, map->list.addr = addr->s_addr & mask->s_addr; map->list.mask = mask->s_addr; map->list.valid = 1; - map->type = entry->type; + map->def.type = entry->def.type; if (cipsov4) - map->type_def.cipsov4 = cipsov4; + map->def.cipso = cipsov4; ret_val = netlbl_af4list_add(&map->list, &addrmap->list4); if (ret_val != 0) { @@ -182,8 +182,8 @@ static int netlbl_mgmt_add_common(struct genl_info *info, goto add_failure; } - entry->type = NETLBL_NLTYPE_ADDRSELECT; - entry->type_def.addrsel = addrmap; + entry->def.type = NETLBL_NLTYPE_ADDRSELECT; + entry->def.addrsel = addrmap; #if IS_ENABLED(CONFIG_IPV6) } else if (info->attrs[NLBL_MGMT_A_IPV6ADDR]) { struct in6_addr *addr; @@ -223,7 +223,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, map->list.addr.s6_addr32[3] &= mask->s6_addr32[3]; map->list.mask = *mask; map->list.valid = 1; - map->type = entry->type; + map->def.type = entry->def.type; ret_val = netlbl_af6list_add(&map->list, &addrmap->list6); if (ret_val != 0) { @@ -231,8 +231,8 @@ static int netlbl_mgmt_add_common(struct genl_info *info, goto add_failure; } - entry->type = NETLBL_NLTYPE_ADDRSELECT; - entry->type_def.addrsel = addrmap; + entry->def.type = NETLBL_NLTYPE_ADDRSELECT; + entry->def.addrsel = addrmap; #endif /* IPv6 */ } @@ -281,14 +281,13 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, return ret_val; } - switch (entry->type) { + switch (entry->def.type) { case NETLBL_NLTYPE_ADDRSELECT: nla_a = nla_nest_start(skb, NLBL_MGMT_A_SELECTORLIST); if (nla_a == NULL) return -ENOMEM; - netlbl_af4list_foreach_rcu(iter4, - &entry->type_def.addrsel->list4) { + netlbl_af4list_foreach_rcu(iter4, &entry->def.addrsel->list4) { struct netlbl_domaddr4_map *map4; struct in_addr addr_struct; @@ -310,13 +309,13 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, return ret_val; map4 = netlbl_domhsh_addr4_entry(iter4); ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, - map4->type); + map4->def.type); if (ret_val != 0) return ret_val; - switch (map4->type) { + switch (map4->def.type) { case NETLBL_NLTYPE_CIPSOV4: ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI, - map4->type_def.cipsov4->doi); + map4->def.cipso->doi); if (ret_val != 0) return ret_val; break; @@ -325,8 +324,7 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, nla_nest_end(skb, nla_b); } #if IS_ENABLED(CONFIG_IPV6) - netlbl_af6list_foreach_rcu(iter6, - &entry->type_def.addrsel->list6) { + netlbl_af6list_foreach_rcu(iter6, &entry->def.addrsel->list6) { struct netlbl_domaddr6_map *map6; nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR); @@ -345,7 +343,7 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, return ret_val; map6 = netlbl_domhsh_addr6_entry(iter6); ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, - map6->type); + map6->def.type); if (ret_val != 0) return ret_val; @@ -356,14 +354,14 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, nla_nest_end(skb, nla_a); break; case NETLBL_NLTYPE_UNLABELED: - ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type); + ret_val = nla_put_u32(skb,NLBL_MGMT_A_PROTOCOL,entry->def.type); break; case NETLBL_NLTYPE_CIPSOV4: - ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type); + ret_val = nla_put_u32(skb,NLBL_MGMT_A_PROTOCOL,entry->def.type); if (ret_val != 0) return ret_val; ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI, - entry->type_def.cipsov4->doi); + entry->def.cipso->doi); break; } diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index af3531926ee..8f0897407a2 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1541,7 +1541,7 @@ int __init netlbl_unlabel_defconf(void) entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) return -ENOMEM; - entry->type = NETLBL_NLTYPE_UNLABELED; + entry->def.type = NETLBL_NLTYPE_UNLABELED; ret_val = netlbl_domhsh_add_default(entry, &audit_info); if (ret_val != 0) return ret_val; -- cgit v1.2.3-18-g5258 From e4d091d7bf787cd303383725b8071d0bae76f981 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 1 Aug 2013 12:36:57 +0300 Subject: netfilter: nfnetlink_{log,queue}: fix information leaks in netlink message These structs have a "_pad" member. Also the "phw" structs have an 8 byte "hw_addr[]" array but sometimes only the first 6 bytes are initialized. Signed-off-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 6 +++++- net/netfilter/nfnetlink_queue_core.c | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 962e9792e31..d92cc317bf8 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -419,6 +419,7 @@ __build_packet_message(struct nfnl_log_net *log, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = htons(inst->group_num); + memset(&pmsg, 0, sizeof(pmsg)); pmsg.hw_protocol = skb->protocol; pmsg.hook = hooknum; @@ -498,7 +499,10 @@ __build_packet_message(struct nfnl_log_net *log, if (indev && skb->dev && skb->mac_header != skb->network_header) { struct nfulnl_msg_packet_hw phw; - int len = dev_parse_header(skb, phw.hw_addr); + int len; + + memset(&phw, 0, sizeof(phw)); + len = dev_parse_header(skb, phw.hw_addr); if (len > 0) { phw.hw_addrlen = htons(len); if (nla_put(inst->skb, NFULA_HWADDR, sizeof(phw), &phw)) diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 971ea145ab3..8a703c3dd31 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -463,7 +463,10 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (indev && entskb->dev && entskb->mac_header != entskb->network_header) { struct nfqnl_msg_packet_hw phw; - int len = dev_parse_header(entskb, phw.hw_addr); + int len; + + memset(&phw, 0, sizeof(phw)); + len = dev_parse_header(entskb, phw.hw_addr); if (len) { phw.hw_addrlen = htons(len); if (nla_put(skb, NFQA_HWADDR, sizeof(phw), &phw)) -- cgit v1.2.3-18-g5258 From d9af2d67e490b48f0d36f448d34e7bab9425f142 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 5 Aug 2013 16:47:38 +0200 Subject: net/vmw_vsock/af_vsock.c: drop unneeded semicolon Drop the semicolon at the end of the list_for_each_entry loop header. Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 593071dabd1..4d9334683f8 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -347,7 +347,7 @@ void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) { struct vsock_sock *vsk; list_for_each_entry(vsk, &vsock_connected_table[i], - connected_table); + connected_table) fn(sk_vsock(vsk)); } -- cgit v1.2.3-18-g5258 From 0369722f024cd374f74eac6d261014403aa27ea2 Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Sat, 3 Aug 2013 22:07:46 +0200 Subject: vlan: make vlan_dev_real_dev work over stacked vlans Sometimes we might have stacked vlans on top of each other, and we're interested in the first non-vlan real device on the path, so transform vlan_dev_real_dev to go over the stacked vlans and extract the first non-vlan device. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller --- net/8021q/vlan_core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 4a78c4de9f2..6ee48aac776 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -91,7 +91,12 @@ EXPORT_SYMBOL(__vlan_find_dev_deep); struct net_device *vlan_dev_real_dev(const struct net_device *dev) { - return vlan_dev_priv(dev)->real_dev; + struct net_device *ret = vlan_dev_priv(dev)->real_dev; + + while (is_vlan_dev(ret)) + ret = vlan_dev_priv(ret)->real_dev; + + return ret; } EXPORT_SYMBOL(vlan_dev_real_dev); -- cgit v1.2.3-18-g5258 From 07ce76aa9bcf8bc106a53c67548c5602f1598595 Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Sat, 3 Aug 2013 22:07:47 +0200 Subject: net_sched: make dev_trans_start return vlan's real dev trans_start Vlan devices are LLTX and don't update their own trans_start, so if dev_trans_start has to be called with a vlan device then 0 or a stale value will be returned. Currently the bonding is the only such user, and it's needed for proper arp monitoring when the slaves are vlans. Fix this by extracting the vlan's real device trans_start. Suggested-by: David Miller Signed-off-by: Nikolay Aleksandrov Acked-by: Veaceslav Falico Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 4626cef4b76..eeb8276d7a8 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -207,15 +208,19 @@ void __qdisc_run(struct Qdisc *q) unsigned long dev_trans_start(struct net_device *dev) { - unsigned long val, res = dev->trans_start; + unsigned long val, res; unsigned int i; + if (is_vlan_dev(dev)) + dev = vlan_dev_real_dev(dev); + res = dev->trans_start; for (i = 0; i < dev->num_tx_queues; i++) { val = netdev_get_tx_queue(dev, i)->trans_start; if (val && time_after(val, res)) res = val; } dev->trans_start = res; + return res; } EXPORT_SYMBOL(dev_trans_start); -- cgit v1.2.3-18-g5258 From 7921895a5e852fc99de347bc0600659997de9298 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 5 Aug 2013 12:49:35 +0200 Subject: net: esp{4,6}: fix potential MTU calculation overflows Commit 91657eafb ("xfrm: take net hdr len into account for esp payload size calculation") introduced a possible interger overflow in esp{4,6}_get_mtu() handlers in case of x->props.mode equals XFRM_MODE_TUNNEL. Thus, the following expression will overflow unsigned int net_adj; ... net_adj = 0; ... return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) - net_adj) & ~(align - 1)) + (net_adj - 2); where (net_adj - 2) would be evaluated as + (0 - 2) in an unsigned context. Fix it by simply removing brackets as those operations here do not need to have special precedence. Signed-off-by: Daniel Borkmann Cc: Benjamin Poirier Cc: Steffen Klassert Acked-by: Benjamin Poirier Signed-off-by: David S. Miller --- net/ipv4/esp4.c | 2 +- net/ipv6/esp6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index ab3d814bc80..109ee89f123 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -477,7 +477,7 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) } return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) - - net_adj) & ~(align - 1)) + (net_adj - 2); + net_adj) & ~(align - 1)) + net_adj - 2; } static void esp4_err(struct sk_buff *skb, u32 info) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 40ffd72243a..aeac0dc3635 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -425,7 +425,7 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) net_adj = 0; return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) - - net_adj) & ~(align - 1)) + (net_adj - 2); + net_adj) & ~(align - 1)) + net_adj - 2; } static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, -- cgit v1.2.3-18-g5258 From fc7f8f5c53fdb82d4689e24df3da1a88bc3859f7 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Fri, 2 Aug 2013 19:07:38 +0200 Subject: neighbour: populate neigh_parms on alloc before calling ndo_neigh_setup dev->ndo_neigh_setup() might need some of the values of neigh_parms, so populate them before calling it. Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller --- net/core/neighbour.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 9232c68941a..60533db8b72 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1441,16 +1441,18 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, atomic_set(&p->refcnt, 1); p->reachable_time = neigh_rand_reach_time(p->base_reachable_time); + dev_hold(dev); + p->dev = dev; + write_pnet(&p->net, hold_net(net)); + p->sysctl_table = NULL; if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { + release_net(net); + dev_put(dev); kfree(p); return NULL; } - dev_hold(dev); - p->dev = dev; - write_pnet(&p->net, hold_net(net)); - p->sysctl_table = NULL; write_lock_bh(&tbl->lock); p->next = tbl->parms.next; tbl->parms.next = p; -- cgit v1.2.3-18-g5258 From aab515d7c32a34300312416c50314e755ea6f765 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Aug 2013 11:18:49 -0700 Subject: fib_trie: remove potential out of bound access AddressSanitizer [1] dynamic checker pointed a potential out of bound access in leaf_walk_rcu() We could allocate one more slot in tnode_new() to leave the prefetch() in-place but it looks not worth the pain. Bug added in commit 82cfbb008572b ("[IPV4] fib_trie: iterator recode") [1] : https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel Reported-by: Andrey Konovalov Signed-off-by: Eric Dumazet Cc: Dmitry Vyukov Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 108a1e9c9ea..3df6d3edb2a 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -71,7 +71,6 @@ #include #include #include -#include #include #include #include @@ -1761,10 +1760,8 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) if (!c) continue; - if (IS_LEAF(c)) { - prefetch(rcu_dereference_rtnl(p->child[idx])); + if (IS_LEAF(c)) return (struct leaf *) c; - } /* Rescan start scanning in new node */ p = (struct tnode *) c; -- cgit v1.2.3-18-g5258 From 248ba8ec05a2c3b118c2224e57eb10c128176ab1 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Tue, 6 Aug 2013 00:32:05 +0200 Subject: bridge: don't try to update timers in case of broken MLD queries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we are reading an uninitialized value for the max_delay variable when snooping an MLD query message of invalid length and would update our timers with that. Fixing this by simply ignoring such broken MLD queries (just like we do for IGMP already). This is a regression introduced by: "bridge: disable snooping if there is no querier" (b00589af3b04) Reported-by: Paul Bolle Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 61c5e819380..08e576ada0b 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1195,7 +1195,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, max_delay = msecs_to_jiffies(ntohs(mld->mld_maxdelay)); if (max_delay) group = &mld->mld_mca; - } else if (skb->len >= sizeof(*mld2q)) { + } else { if (!pskb_may_pull(skb, sizeof(*mld2q))) { err = -EINVAL; goto out; -- cgit v1.2.3-18-g5258 From 00326ed6442c66021cd4b5e19e80f3e2027d5d42 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Aug 2013 14:10:43 -0400 Subject: SUNRPC: Don't auto-disconnect from the local rpcbind socket There is no need for the kernel to time out the AF_LOCAL connection to the rpcbind socket, and doing so is problematic because when it is time to reconnect, our process may no longer be using the same mount namespace. Reported-by: Nix Signed-off-by: Trond Myklebust Cc: Jeff Layton Cc: stable@vger.kernel.org # 3.9.x --- net/sunrpc/rpcb_clnt.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 3df764dc330..b0f72322715 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -238,6 +238,14 @@ static int rpcb_create_local_unix(struct net *net) .program = &rpcb_program, .version = RPCBVERS_2, .authflavor = RPC_AUTH_NULL, + /* + * We turn off the idle timeout to prevent the kernel + * from automatically disconnecting the socket. + * Otherwise, we'd have to cache the mount namespace + * of the caller and somehow pass that to the socket + * reconnect code. + */ + .flags = RPC_CLNT_CREATE_NO_IDLE_TIMEOUT, }; struct rpc_clnt *clnt, *clnt4; int result = 0; -- cgit v1.2.3-18-g5258 From 2ed0edf9090bf4afa2c6fc4f38575a85a80d4b20 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Aug 2013 17:10:15 -0700 Subject: tcp: cubic: fix overflow error in bictcp_update() commit 17a6e9f1aa9 ("tcp_cubic: fix clock dependency") added an overflow error in bictcp_update() in following code : /* change the unit from HZ to bictcp_HZ */ t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3) - ca->epoch_start) << BICTCP_HZ) / HZ; Because msecs_to_jiffies() being unsigned long, compiler does implicit type promotion. We really want to constrain (tcp_time_stamp - ca->epoch_start) to a signed 32bit value, or else 't' has unexpected high values. This bugs triggers an increase of retransmit rates ~24 days after boot [1], as the high order bit of tcp_time_stamp flips. [1] for hosts with HZ=1000 Big thanks to Van Jacobson for spotting this problem. Diagnosed-by: Van Jacobson Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Yuchung Cheng Cc: Stephen Hemminger Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index a9077f441cb..b6b591f0a78 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -206,8 +206,8 @@ static u32 cubic_root(u64 a) */ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) { - u64 offs; - u32 delta, t, bic_target, max_cnt; + u32 delta, bic_target, max_cnt; + u64 offs, t; ca->ack_cnt++; /* count the number of ACKs */ @@ -250,9 +250,11 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) * if the cwnd < 1 million packets !!! */ + t = (s32)(tcp_time_stamp - ca->epoch_start); + t += msecs_to_jiffies(ca->delay_min >> 3); /* change the unit from HZ to bictcp_HZ */ - t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3) - - ca->epoch_start) << BICTCP_HZ) / HZ; + t <<= BICTCP_HZ; + do_div(t, HZ); if (t < ca->bic_K) /* t - K */ offs = ca->bic_K - t; -- cgit v1.2.3-18-g5258 From 15401946f9b720efdd20bda3ae79725e9c586897 Mon Sep 17 00:00:00 2001 From: Wang Sheng-Hui Date: Tue, 6 Aug 2013 08:44:46 +0800 Subject: bridge: correct the comment for file br_sysfs_br.c br_sysfs_if.c is for sysfs attributes of bridge ports, while br_sysfs_br.c is for sysfs attributes of bridge itself. Correct the comment here. Signed-off-by: Wang Sheng-Hui Signed-off-by: David S. Miller --- net/bridge/br_sysfs_br.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 394bb96b608..3b9637fb793 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -1,5 +1,5 @@ /* - * Sysfs attributes of bridge ports + * Sysfs attributes of bridge * Linux ethernet bridge * * Authors: -- cgit v1.2.3-18-g5258 From cd6b423afd3c08b27e1fed52db828ade0addbc6b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Aug 2013 20:05:12 -0700 Subject: tcp: cubic: fix bug in bictcp_acked() While investigating about strange increase of retransmit rates on hosts ~24 days after boot, Van found hystart was disabled if ca->epoch_start was 0, as following condition is true when tcp_time_stamp high order bit is set. (s32)(tcp_time_stamp - ca->epoch_start) < HZ Quoting Van : At initialization & after every loss ca->epoch_start is set to zero so I believe that the above line will turn off hystart as soon as the 2^31 bit is set in tcp_time_stamp & hystart will stay off for 24 days. I think we've observed that cubic's restart is too aggressive without hystart so this might account for the higher drop rate we observe. Diagnosed-by: Van Jacobson Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index b6b591f0a78..b6ae92a51f5 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -416,7 +416,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) return; /* Discard delay samples right after fast recovery */ - if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ) + if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ) return; delay = (rtt_us << 3) / USEC_PER_MSEC; -- cgit v1.2.3-18-g5258 From 786615bc1ce84150ded80daea6bd9f6297f48e73 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Aug 2013 16:04:47 -0400 Subject: SUNRPC: If the rpcbind channel is disconnected, fail the call to unregister If rpcbind causes our connection to the AF_LOCAL socket to close after we've registered a service, then we want to be careful about reconnecting since the mount namespace may have changed. By simply refusing to reconnect the AF_LOCAL socket in the case of unregister, we avoid the need to somehow save the mount namespace. While this may lead to some services not unregistering properly, it should be safe. Signed-off-by: Trond Myklebust Cc: Nix Cc: Jeff Layton Cc: stable@vger.kernel.org # 3.9.x --- net/sunrpc/clnt.c | 4 ++++ net/sunrpc/netns.h | 1 + net/sunrpc/rpcb_clnt.c | 40 +++++++++++++++++++++++++++------------- 3 files changed, 32 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 74f6a704e37..ecbc4e3d83a 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1660,6 +1660,10 @@ call_connect(struct rpc_task *task) task->tk_action = call_connect_status; if (task->tk_status < 0) return; + if (task->tk_flags & RPC_TASK_NOCONNECT) { + rpc_exit(task, -ENOTCONN); + return; + } xprt_connect(task); } } diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index 74d948f5d5a..779742cfc1f 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -23,6 +23,7 @@ struct sunrpc_net { struct rpc_clnt *rpcb_local_clnt4; spinlock_t rpcb_clnt_lock; unsigned int rpcb_users; + unsigned int rpcb_is_af_local : 1; struct mutex gssp_lock; wait_queue_head_t gssp_wq; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index b0f72322715..1891a1022c1 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -204,13 +204,15 @@ void rpcb_put_local(struct net *net) } static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt, - struct rpc_clnt *clnt4) + struct rpc_clnt *clnt4, + bool is_af_local) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); /* Protected by rpcb_create_local_mutex */ sn->rpcb_local_clnt = clnt; sn->rpcb_local_clnt4 = clnt4; + sn->rpcb_is_af_local = is_af_local ? 1 : 0; smp_wmb(); sn->rpcb_users = 1; dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " @@ -271,7 +273,7 @@ static int rpcb_create_local_unix(struct net *net) clnt4 = NULL; } - rpcb_set_local(net, clnt, clnt4); + rpcb_set_local(net, clnt, clnt4, true); out: return result; @@ -323,7 +325,7 @@ static int rpcb_create_local_net(struct net *net) clnt4 = NULL; } - rpcb_set_local(net, clnt, clnt4); + rpcb_set_local(net, clnt, clnt4, false); out: return result; @@ -384,13 +386,16 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *hostname, return rpc_create(&args); } -static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) +static int rpcb_register_call(struct sunrpc_net *sn, struct rpc_clnt *clnt, struct rpc_message *msg, bool is_set) { - int result, error = 0; + int flags = RPC_TASK_NOCONNECT; + int error, result = 0; + if (is_set || !sn->rpcb_is_af_local) + flags = RPC_TASK_SOFTCONN; msg->rpc_resp = &result; - error = rpc_call_sync(clnt, msg, RPC_TASK_SOFTCONN); + error = rpc_call_sync(clnt, msg, flags); if (error < 0) { dprintk("RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); @@ -447,16 +452,19 @@ int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short .rpc_argp = &map, }; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + bool is_set = false; dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), prog, vers, prot, port); msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET]; - if (port) + if (port != 0) { msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; + is_set = true; + } - return rpcb_register_call(sn->rpcb_local_clnt, &msg); + return rpcb_register_call(sn, sn->rpcb_local_clnt, &msg, is_set); } /* @@ -469,6 +477,7 @@ static int rpcb_register_inet4(struct sunrpc_net *sn, const struct sockaddr_in *sin = (const struct sockaddr_in *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin->sin_port); + bool is_set = false; int result; map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); @@ -479,10 +488,12 @@ static int rpcb_register_inet4(struct sunrpc_net *sn, map->r_addr, map->r_netid); msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - if (port) + if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + is_set = true; + } - result = rpcb_register_call(sn->rpcb_local_clnt4, msg); + result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set); kfree(map->r_addr); return result; } @@ -497,6 +508,7 @@ static int rpcb_register_inet6(struct sunrpc_net *sn, const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin6->sin6_port); + bool is_set = false; int result; map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); @@ -507,10 +519,12 @@ static int rpcb_register_inet6(struct sunrpc_net *sn, map->r_addr, map->r_netid); msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - if (port) + if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + is_set = true; + } - result = rpcb_register_call(sn->rpcb_local_clnt4, msg); + result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set); kfree(map->r_addr); return result; } @@ -527,7 +541,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn, map->r_addr = ""; msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - return rpcb_register_call(sn->rpcb_local_clnt4, msg); + return rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, false); } /** -- cgit v1.2.3-18-g5258 From 3e3be275851bc6fc90bfdcd732cd95563acd982b Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 7 Aug 2013 02:34:31 +0200 Subject: ipv6: don't stop backtracking in fib6_lookup_1 if subtree does not match In case a subtree did not match we currently stop backtracking and return NULL (root table from fib_lookup). This could yield in invalid routing table lookups when using subtrees. Instead continue to backtrack until a valid subtree or node is found and return this match. Also remove unneeded NULL check. Reported-by: Teco Boot Cc: YOSHIFUJI Hideaki Cc: David Lamparter Cc: Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index bff3d821c7e..c4ff5bbb45c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -993,14 +993,22 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { #ifdef CONFIG_IPV6_SUBTREES - if (fn->subtree) - fn = fib6_lookup_1(fn->subtree, args + 1); + if (fn->subtree) { + struct fib6_node *sfn; + sfn = fib6_lookup_1(fn->subtree, + args + 1); + if (!sfn) + goto backtrack; + fn = sfn; + } #endif - if (!fn || fn->fn_flags & RTN_RTINFO) + if (fn->fn_flags & RTN_RTINFO) return fn; } } - +#ifdef CONFIG_IPV6_SUBTREES +backtrack: +#endif if (fn->fn_flags & RTN_ROOT) break; -- cgit v1.2.3-18-g5258 From 77a482bdb2e68d13fae87541b341905ba70d572b Mon Sep 17 00:00:00 2001 From: Timo Teräs Date: Tue, 6 Aug 2013 13:45:43 +0300 Subject: ip_gre: fix ipgre_header to return correct offset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix ipgre_header() (header_ops->create) to return the correct amount of bytes pushed. Most callers of dev_hard_header() seem to care only if it was success, but af_packet.c uses it as offset to the skb to copy from userspace only once. In practice this fixes packet socket sendto()/sendmsg() to gre tunnels. Regression introduced in c54419321455631079c7d6e60bc732dd0c5914c5 ("GRE: Refactor GRE tunneling code.") Cc: Pravin B Shelar Signed-off-by: Timo Teräs Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 1f6eab66f7c..8d6939eeb49 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -383,7 +383,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, if (daddr) memcpy(&iph->daddr, daddr, 4); if (iph->daddr) - return t->hlen; + return t->hlen + sizeof(*iph); return -(t->hlen + sizeof(*iph)); } -- cgit v1.2.3-18-g5258 From e11aada32b39a060e26fa4091cb968bd42e3bcbf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Aug 2013 04:35:06 -0700 Subject: net: flow_dissector: add 802.1ad support Same behavior than 802.1q : finds the encapsulated protocol and skip 32bit header. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 00ee068efc1..b84a1b155bc 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -65,6 +65,7 @@ ipv6: nhoff += sizeof(struct ipv6hdr); break; } + case __constant_htons(ETH_P_8021AD): case __constant_htons(ETH_P_8021Q): { const struct vlan_hdr *vlan; struct vlan_hdr _vlan; -- cgit v1.2.3-18-g5258 From 288a9376371d425edeeea41a0310922c5fb2092d Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Wed, 7 Aug 2013 11:33:25 +0300 Subject: net: rename busy poll MIB counter Rename mib counter from "low latency" to "busy poll" v1 also moved the counter to the ip MIB (suggested by Shawn Bohrer) Eric Dumazet suggested that the current location is better. So v2 just renames the counter to fit the new naming convention. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- net/ipv4/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 6577a1149a4..463bd127334 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -273,7 +273,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), - SNMP_MIB_ITEM("LowLatencyRxPackets", LINUX_MIB_LOWLATENCYRXPACKETS), + SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS), SNMP_MIB_SENTINEL }; -- cgit v1.2.3-18-g5258 From 645359930231d5e78fd3296a38b98c1a658a7ade Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Thu, 8 Aug 2013 15:19:48 -0700 Subject: rtnetlink: Fix inverted check in ndo_dflt_fdb_del() Fix inverted check when deleting an fdb entry. Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 3de740834d1..82d96852712 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2156,7 +2156,7 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm, /* If aging addresses are supported device will need to * implement its own handler for this. */ - if (ndm->ndm_state & NUD_PERMANENT) { + if (!(ndm->ndm_state & NUD_PERMANENT)) { pr_info("%s: FDB only supports static addresses\n", dev->name); return -EINVAL; } -- cgit v1.2.3-18-g5258 From 356d7d88e088687b6578ca64601b0a2c9d145296 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Fri, 9 Aug 2013 17:21:27 -0700 Subject: netfilter: nf_conntrack: fix tcp_in_window for Fast Open Currently the conntrack checks if the ending sequence of a packet falls within the observed receive window. However it does so even if it has not observe any packet from the remote yet and uses an uninitialized receive window (td_maxwin). If a connection uses Fast Open to send a SYN-data packet which is dropped afterward in the network. The subsequent SYNs retransmits will all fail this check and be discarded, leading to a connection timeout. This is because the SYN retransmit does not contain data payload so end == initial sequence number (isn) + 1 sender->td_end == isn + syn_data_len receiver->td_maxwin == 0 The fix is to only apply this check after td_maxwin is initialized. Reported-by: Michael Chan Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 7dcc376eea5..2f8010707d0 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -526,7 +526,7 @@ static bool tcp_in_window(const struct nf_conn *ct, const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; __u32 seq, ack, sack, end, win, swin; s16 receiver_offset; - bool res; + bool res, in_recv_win; /* * Get the required data from the packet. @@ -649,14 +649,18 @@ static bool tcp_in_window(const struct nf_conn *ct, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); + /* Is the ending sequence in the receive window (if available)? */ + in_recv_win = !receiver->td_maxwin || + after(end, sender->td_end - receiver->td_maxwin - 1); + pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n", before(seq, sender->td_maxend + 1), - after(end, sender->td_end - receiver->td_maxwin - 1), + (in_recv_win ? 1 : 0), before(sack, receiver->td_end + 1), after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)); if (before(seq, sender->td_maxend + 1) && - after(end, sender->td_end - receiver->td_maxwin - 1) && + in_recv_win && before(sack, receiver->td_end + 1) && after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) { /* @@ -725,7 +729,7 @@ static bool tcp_in_window(const struct nf_conn *ct, nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? - after(end, sender->td_end - receiver->td_maxwin - 1) ? + in_recv_win ? before(sack, receiver->td_end + 1) ? after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG" : "ACK is under the lower bound (possible overly delayed ACK)" -- cgit v1.2.3-18-g5258 From 9d2c9488cedb666bc8206fbdcdc1575e0fbc5929 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Tue, 6 Aug 2013 20:21:15 +0200 Subject: batman-adv: fix potential kernel paging errors for unicast transmissions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are several functions which might reallocate skb data. Currently some places keep reusing their old ethhdr pointer regardless of whether they became invalid after such a reallocation or not. This potentially leads to kernel paging errors. This patch fixes these by refetching the ethdr pointer after the potential reallocations. Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bridge_loop_avoidance.c | 2 ++ net/batman-adv/gateway_client.c | 13 ++++++++++++- net/batman-adv/gateway_client.h | 3 +-- net/batman-adv/soft-interface.c | 9 ++++++++- net/batman-adv/unicast.c | 13 ++++++++++--- 5 files changed, 33 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index e14531f1ce1..264de88db32 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1529,6 +1529,8 @@ out: * in these cases, the skb is further handled by this function and * returns 1, otherwise it returns 0 and the caller shall further * process the skb. + * + * This call might reallocate skb data. */ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index f105219f4a4..7614af31daf 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -508,6 +508,7 @@ out: return 0; } +/* this call might reallocate skb data */ static bool batadv_is_type_dhcprequest(struct sk_buff *skb, int header_len) { int ret = false; @@ -568,6 +569,7 @@ out: return ret; } +/* this call might reallocate skb data */ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) { struct ethhdr *ethhdr; @@ -619,6 +621,12 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) if (!pskb_may_pull(skb, *header_len + sizeof(*udphdr))) return false; + + /* skb->data might have been reallocated by pskb_may_pull() */ + ethhdr = (struct ethhdr *)skb->data; + if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) + ethhdr = (struct ethhdr *)(skb->data + VLAN_HLEN); + udphdr = (struct udphdr *)(skb->data + *header_len); *header_len += sizeof(*udphdr); @@ -634,12 +642,14 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) return true; } +/* this call might reallocate skb data */ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, - struct sk_buff *skb, struct ethhdr *ethhdr) + struct sk_buff *skb) { struct batadv_neigh_node *neigh_curr = NULL, *neigh_old = NULL; struct batadv_orig_node *orig_dst_node = NULL; struct batadv_gw_node *curr_gw = NULL; + struct ethhdr *ethhdr; bool ret, out_of_range = false; unsigned int header_len = 0; uint8_t curr_tq_avg; @@ -648,6 +658,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, if (!ret) goto out; + ethhdr = (struct ethhdr *)skb->data; orig_dst_node = batadv_transtable_search(bat_priv, ethhdr->h_source, ethhdr->h_dest); if (!orig_dst_node) diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index 039902dca4a..1037d75da51 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -34,7 +34,6 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv, void batadv_gw_node_purge(struct batadv_priv *bat_priv); int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset); bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len); -bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, - struct sk_buff *skb, struct ethhdr *ethhdr); +bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb); #endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */ diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 700d0b49742..0f04e1c302b 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -180,6 +180,9 @@ static int batadv_interface_tx(struct sk_buff *skb, if (batadv_bla_tx(bat_priv, skb, vid)) goto dropped; + /* skb->data might have been reallocated by batadv_bla_tx() */ + ethhdr = (struct ethhdr *)skb->data; + /* Register the client MAC in the transtable */ if (!is_multicast_ether_addr(ethhdr->h_source)) batadv_tt_local_add(soft_iface, ethhdr->h_source, skb->skb_iif); @@ -220,6 +223,10 @@ static int batadv_interface_tx(struct sk_buff *skb, default: break; } + + /* reminder: ethhdr might have become unusable from here on + * (batadv_gw_is_dhcp_target() might have reallocated skb data) + */ } /* ethernet packet should be broadcasted */ @@ -266,7 +273,7 @@ static int batadv_interface_tx(struct sk_buff *skb, /* unicast packet */ } else { if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_OFF) { - ret = batadv_gw_out_of_range(bat_priv, skb, ethhdr); + ret = batadv_gw_out_of_range(bat_priv, skb); if (ret) goto dropped; } diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index dc8b5d4dd63..688a0419756 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -326,7 +326,9 @@ static bool batadv_unicast_push_and_fill_skb(struct sk_buff *skb, int hdr_size, * @skb: the skb containing the payload to encapsulate * @orig_node: the destination node * - * Returns false if the payload could not be encapsulated or true otherwise + * Returns false if the payload could not be encapsulated or true otherwise. + * + * This call might reallocate skb data. */ static bool batadv_unicast_prepare_skb(struct sk_buff *skb, struct batadv_orig_node *orig_node) @@ -343,7 +345,9 @@ static bool batadv_unicast_prepare_skb(struct sk_buff *skb, * @orig_node: the destination node * @packet_subtype: the batman 4addr packet subtype to use * - * Returns false if the payload could not be encapsulated or true otherwise + * Returns false if the payload could not be encapsulated or true otherwise. + * + * This call might reallocate skb data. */ bool batadv_unicast_4addr_prepare_skb(struct batadv_priv *bat_priv, struct sk_buff *skb, @@ -401,7 +405,7 @@ int batadv_unicast_generic_send_skb(struct batadv_priv *bat_priv, struct batadv_neigh_node *neigh_node; int data_len = skb->len; int ret = NET_RX_DROP; - unsigned int dev_mtu; + unsigned int dev_mtu, header_len; /* get routing information */ if (is_multicast_ether_addr(ethhdr->h_dest)) { @@ -429,10 +433,12 @@ find_router: switch (packet_type) { case BATADV_UNICAST: batadv_unicast_prepare_skb(skb, orig_node); + header_len = sizeof(struct batadv_unicast_packet); break; case BATADV_UNICAST_4ADDR: batadv_unicast_4addr_prepare_skb(bat_priv, skb, orig_node, packet_subtype); + header_len = sizeof(struct batadv_unicast_4addr_packet); break; default: /* this function supports UNICAST and UNICAST_4ADDR only. It @@ -441,6 +447,7 @@ find_router: goto out; } + ethhdr = (struct ethhdr *)(skb->data + header_len); unicast_packet = (struct batadv_unicast_packet *)skb->data; /* inform the destination node that we are still missing a correct route -- cgit v1.2.3-18-g5258 From d4cca39d90fca21c04315095de5d0e734e839a8b Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 9 Aug 2013 17:12:58 +0800 Subject: tipc: avoid possible deadlock while enable and disable bearer We met lockdep warning when enable and disable the bearer for commands such as: tipc-config -netid=1234 -addr=1.1.3 -be=eth:eth0 tipc-config -netid=1234 -addr=1.1.3 -bd=eth:eth0 --------------------------------------------------- [ 327.693595] ====================================================== [ 327.693994] [ INFO: possible circular locking dependency detected ] [ 327.694519] 3.11.0-rc3-wwd-default #4 Tainted: G O [ 327.694882] ------------------------------------------------------- [ 327.695385] tipc-config/5825 is trying to acquire lock: [ 327.695754] (((timer))#2){+.-...}, at: [] del_timer_sync+0x0/0xd0 [ 327.696018] [ 327.696018] but task is already holding lock: [ 327.696018] (&(&b_ptr->lock)->rlock){+.-...}, at: [] bearer_disable+ 0xdd/0x120 [tipc] [ 327.696018] [ 327.696018] which lock already depends on the new lock. [ 327.696018] [ 327.696018] [ 327.696018] the existing dependency chain (in reverse order) is: [ 327.696018] [ 327.696018] -> #1 (&(&b_ptr->lock)->rlock){+.-...}: [ 327.696018] [] validate_chain+0x6dd/0x870 [ 327.696018] [] __lock_acquire+0x3db/0x670 [ 327.696018] [] lock_acquire+0x103/0x130 [ 327.696018] [] _raw_spin_lock_bh+0x41/0x80 [ 327.696018] [] disc_timeout+0x18/0xd0 [tipc] [ 327.696018] [] call_timer_fn+0xda/0x1e0 [ 327.696018] [] run_timer_softirq+0x2a7/0x2d0 [ 327.696018] [] __do_softirq+0x16a/0x2e0 [ 327.696018] [] irq_exit+0xd5/0xe0 [ 327.696018] [] smp_apic_timer_interrupt+0x45/0x60 [ 327.696018] [] apic_timer_interrupt+0x6f/0x80 [ 327.696018] [] arch_cpu_idle+0x1e/0x30 [ 327.696018] [] cpu_idle_loop+0x1fd/0x280 [ 327.696018] [] cpu_startup_entry+0x1e/0x20 [ 327.696018] [] start_secondary+0x89/0x90 [ 327.696018] [ 327.696018] -> #0 (((timer))#2){+.-...}: [ 327.696018] [] check_prev_add+0x43e/0x4b0 [ 327.696018] [] validate_chain+0x6dd/0x870 [ 327.696018] [] __lock_acquire+0x3db/0x670 [ 327.696018] [] lock_acquire+0x103/0x130 [ 327.696018] [] del_timer_sync+0x3d/0xd0 [ 327.696018] [] tipc_disc_delete+0x15/0x30 [tipc] [ 327.696018] [] bearer_disable+0xef/0x120 [tipc] [ 327.696018] [] tipc_disable_bearer+0x2f/0x60 [tipc] [ 327.696018] [] tipc_cfg_do_cmd+0x2e2/0x550 [tipc] [ 327.696018] [] handle_cmd+0x49/0xe0 [tipc] [ 327.696018] [] genl_family_rcv_msg+0x268/0x340 [ 327.696018] [] genl_rcv_msg+0x70/0xd0 [ 327.696018] [] netlink_rcv_skb+0x89/0xb0 [ 327.696018] [] genl_rcv+0x27/0x40 [ 327.696018] [] netlink_unicast+0x15e/0x1b0 [ 327.696018] [] netlink_sendmsg+0x22f/0x400 [ 327.696018] [] __sock_sendmsg+0x66/0x80 [ 327.696018] [] sock_aio_write+0x107/0x120 [ 327.696018] [] do_sync_write+0x7d/0xc0 [ 327.696018] [] vfs_write+0x186/0x190 [ 327.696018] [] SyS_write+0x60/0xb0 [ 327.696018] [] system_call_fastpath+0x16/0x1b [ 327.696018] [ 327.696018] other info that might help us debug this: [ 327.696018] [ 327.696018] Possible unsafe locking scenario: [ 327.696018] [ 327.696018] CPU0 CPU1 [ 327.696018] ---- ---- [ 327.696018] lock(&(&b_ptr->lock)->rlock); [ 327.696018] lock(((timer))#2); [ 327.696018] lock(&(&b_ptr->lock)->rlock); [ 327.696018] lock(((timer))#2); [ 327.696018] [ 327.696018] *** DEADLOCK *** [ 327.696018] [ 327.696018] 5 locks held by tipc-config/5825: [ 327.696018] #0: (cb_lock){++++++}, at: [] genl_rcv+0x18/0x40 [ 327.696018] #1: (genl_mutex){+.+.+.}, at: [] genl_rcv_msg+0xa6/0xd0 [ 327.696018] #2: (config_mutex){+.+.+.}, at: [] tipc_cfg_do_cmd+0x39/ 0x550 [tipc] [ 327.696018] #3: (tipc_net_lock){++.-..}, at: [] tipc_disable_bearer+ 0x18/0x60 [tipc] [ 327.696018] #4: (&(&b_ptr->lock)->rlock){+.-...}, at: [] bearer_disable+0xdd/0x120 [tipc] [ 327.696018] [ 327.696018] stack backtrace: [ 327.696018] CPU: 2 PID: 5825 Comm: tipc-config Tainted: G O 3.11.0-rc3-wwd- default #4 [ 327.696018] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 327.696018] 00000000ffffffff ffff880037fa77a8 ffffffff814d03dd 0000000000000000 [ 327.696018] ffff880037fa7808 ffff880037fa77e8 ffffffff810b1c4f 0000000037fa77e8 [ 327.696018] ffff880037fa7808 ffff880037e4db40 0000000000000000 ffff880037e4e318 [ 327.696018] Call Trace: [ 327.696018] [] dump_stack+0x4d/0xa0 [ 327.696018] [] print_circular_bug+0x10f/0x120 [ 327.696018] [] check_prev_add+0x43e/0x4b0 [ 327.696018] [] validate_chain+0x6dd/0x870 [ 327.696018] [] ? sched_clock_cpu+0xd8/0x110 [ 327.696018] [] __lock_acquire+0x3db/0x670 [ 327.696018] [] lock_acquire+0x103/0x130 [ 327.696018] [] ? try_to_del_timer_sync+0x70/0x70 [ 327.696018] [] del_timer_sync+0x3d/0xd0 [ 327.696018] [] ? try_to_del_timer_sync+0x70/0x70 [ 327.696018] [] tipc_disc_delete+0x15/0x30 [tipc] [ 327.696018] [] bearer_disable+0xef/0x120 [tipc] [ 327.696018] [] tipc_disable_bearer+0x2f/0x60 [tipc] [ 327.696018] [] tipc_cfg_do_cmd+0x2e2/0x550 [tipc] [ 327.696018] [] ? security_capable+0x13/0x20 [ 327.696018] [] handle_cmd+0x49/0xe0 [tipc] [ 327.696018] [] genl_family_rcv_msg+0x268/0x340 [ 327.696018] [] genl_rcv_msg+0x70/0xd0 [ 327.696018] [] ? genl_lock+0x20/0x20 [ 327.696018] [] netlink_rcv_skb+0x89/0xb0 [ 327.696018] [] ? genl_rcv+0x18/0x40 [ 327.696018] [] genl_rcv+0x27/0x40 [ 327.696018] [] netlink_unicast+0x15e/0x1b0 [ 327.696018] [] ? memcpy_fromiovec+0x6c/0x90 [ 327.696018] [] netlink_sendmsg+0x22f/0x400 [ 327.696018] [] __sock_sendmsg+0x66/0x80 [ 327.696018] [] sock_aio_write+0x107/0x120 [ 327.696018] [] ? release_sock+0x8c/0xa0 [ 327.696018] [] do_sync_write+0x7d/0xc0 [ 327.696018] [] ? rw_verify_area+0x54/0x100 [ 327.696018] [] vfs_write+0x186/0x190 [ 327.696018] [] SyS_write+0x60/0xb0 [ 327.696018] [] system_call_fastpath+0x16/0x1b ----------------------------------------------------------------------- The problem is that the tipc_link_delete() will cancel the timer disc_timeout() when the b_ptr->lock is hold, but the disc_timeout() still call b_ptr->lock to finish the work, so the dead lock occurs. We should unlock the b_ptr->lock when del the disc_timeout(). Remove link_timeout() still met the same problem, the patch: http://article.gmane.org/gmane.network.tipc.general/4380 fix the problem, so no need to send patch for fix link_timeout() deadlock warming. Signed-off-by: Wang Weidong Signed-off-by: Ding Tianhong Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bearer.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index cb29ef7ba2f..609c30c8081 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -460,6 +460,7 @@ static void bearer_disable(struct tipc_bearer *b_ptr) { struct tipc_link *l_ptr; struct tipc_link *temp_l_ptr; + struct tipc_link_req *temp_req; pr_info("Disabling bearer <%s>\n", b_ptr->name); spin_lock_bh(&b_ptr->lock); @@ -468,9 +469,13 @@ static void bearer_disable(struct tipc_bearer *b_ptr) list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { tipc_link_delete(l_ptr); } - if (b_ptr->link_req) - tipc_disc_delete(b_ptr->link_req); + temp_req = b_ptr->link_req; + b_ptr->link_req = NULL; spin_unlock_bh(&b_ptr->lock); + + if (temp_req) + tipc_disc_delete(temp_req); + memset(b_ptr, 0, sizeof(struct tipc_bearer)); } -- cgit v1.2.3-18-g5258 From ac4f9599362475662efb6efbb334cbcec98d4778 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 9 Aug 2013 15:09:08 +0200 Subject: net: sctp: sctp_assoc_control_transport: fix MTU size in SCTP_PF state The SCTP Quick failover draft [1] section 5.1, point 5 says that the cwnd should be 1 MTU. So, instead of 1, set it to 1 MTU. [1] https://tools.ietf.org/html/draft-nishida-tsvwg-sctp-failover-05 Reported-by: Karl Heiss Signed-off-by: Daniel Borkmann Cc: Neil Horman Acked-by: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/associola.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index bce5b79662a..ab67efc64b2 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -846,12 +846,12 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, else spc_state = SCTP_ADDR_AVAILABLE; /* Don't inform ULP about transition from PF to - * active state and set cwnd to 1, see SCTP + * active state and set cwnd to 1 MTU, see SCTP * Quick failover draft section 5.1, point 5 */ if (transport->state == SCTP_PF) { ulp_notify = false; - transport->cwnd = 1; + transport->cwnd = asoc->pathmtu; } transport->state = SCTP_ACTIVE; break; -- cgit v1.2.3-18-g5258 From 771085d6bf3c52de29fc213e5bad07a82e57c23e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 9 Aug 2013 16:25:21 +0200 Subject: net: sctp: sctp_transport_destroy{, _rcu}: fix potential pointer corruption Probably this one is quite unlikely to be triggered, but it's more safe to do the call_rcu() at the end after we have dropped the reference on the asoc and freed sctp packet chunks. The reason why is because in sctp_transport_destroy_rcu() the transport is being kfree()'d, and if we're unlucky enough we could run into corrupted pointers. Probably that's more of theoretical nature, but it's safer to have this simple fix. Introduced by commit 8c98653f ("sctp: sctp_close: fix release of bindings for deferred call_rcu's"). I also did the 8c98653f regression test and it's fine that way. Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/transport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/transport.c b/net/sctp/transport.c index bdbbc3fd7c1..8fdd16046d6 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -181,12 +181,12 @@ static void sctp_transport_destroy(struct sctp_transport *transport) return; } - call_rcu(&transport->rcu, sctp_transport_destroy_rcu); - sctp_packet_free(&transport->packet); if (transport->asoc) sctp_association_put(transport->asoc); + + call_rcu(&transport->rcu, sctp_transport_destroy_rcu); } /* Start T3_rtx timer if it is not already running and update the heartbeat -- cgit v1.2.3-18-g5258 From 58ad436fcf49810aa006016107f494c9ac9013db Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Aug 2013 09:04:05 +0200 Subject: genetlink: fix family dump race When dumping generic netlink families, only the first dump call is locked with genl_lock(), which protects the list of families, and thus subsequent calls can access the data without locking, racing against family addition/removal. This can cause a crash. Fix it - the locking needs to be conditional because the first time around it's already locked. A similar bug was reported to me on an old kernel (3.4.47) but the exact scenario that happened there is no longer possible, on those kernels the first round wasn't locked either. Looking at the current code I found the race described above, which had also existed on the old kernel. Cc: stable@vger.kernel.org Reported-by: Andrei Otcheretianski Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 512718adb0d..f85f8a2ad6c 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -789,6 +789,10 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) struct net *net = sock_net(skb->sk); int chains_to_skip = cb->args[0]; int fams_to_skip = cb->args[1]; + bool need_locking = chains_to_skip || fams_to_skip; + + if (need_locking) + genl_lock(); for (i = chains_to_skip; i < GENL_FAM_TAB_SIZE; i++) { n = 0; @@ -810,6 +814,9 @@ errout: cb->args[0] = i; cb->args[1] = n; + if (need_locking) + genl_unlock(); + return skb->len; } -- cgit v1.2.3-18-g5258 From 4221f40513233fa8edeef7fc82e44163fde03b9b Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 13 Aug 2013 01:41:06 -0700 Subject: ip_tunnel: Do not use inner ip-header-id for tunnel ip-header-id. Using inner-id for tunnel id is not safe in some rare cases. E.g. packets coming from multiple sources entering same tunnel can have same id. Therefore on tunnel packet receive we could have packets from two different stream but with same source and dst IP with same ip-id which could confuse ip packet reassembly. Following patch reverts optimization from commit 490ab08127 (IP_GRE: Fix IP-Identification.) CC: Jarno Rajahalme CC: Ansis Atteka Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel_core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 7167b08977d..850525b3489 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -76,9 +76,7 @@ int iptunnel_xmit(struct net *net, struct rtable *rt, iph->daddr = dst; iph->saddr = src; iph->ttl = ttl; - tunnel_ip_select_ident(skb, - (const struct iphdr *)skb_inner_network_header(skb), - &rt->dst); + __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1); err = ip_local_out(skb); if (unlikely(net_xmit_eval(err))) -- cgit v1.2.3-18-g5258 From 3e805ad288c524bb65aad3f1e004402223d3d504 Mon Sep 17 00:00:00 2001 From: Asbjoern Sloth Toennesen Date: Mon, 12 Aug 2013 16:30:09 +0000 Subject: rtnetlink: rtnl_bridge_getlink: Call nlmsg_find_attr() with ifinfomsg header Fix the iproute2 command `bridge vlan show`, after switching from rtgenmsg to ifinfomsg. Let's start with a little history: Feb 20: Vlad Yasevich got his VLAN-aware bridge patchset included in the 3.9 merge window. In the kernel commit 6cbdceeb, he added attribute support to bridge GETLINK requests sent with rtgenmsg. Mar 6th: Vlad got this iproute2 reference implementation of the bridge vlan netlink interface accepted (iproute2 9eff0e5c) Apr 25th: iproute2 switched from using rtgenmsg to ifinfomsg (63338dca) http://patchwork.ozlabs.org/patch/239602/ http://marc.info/?t=136680900700007 Apr 28th: Linus released 3.9 Apr 30th: Stephen released iproute2 3.9.0 The `bridge vlan show` command haven't been working since the switch to ifinfomsg, or in a released version of iproute2. Since the kernel side only supports rtgenmsg, which iproute2 switched away from just prior to the iproute2 3.9.0 release. I haven't been able to find any documentation, about neither rtgenmsg nor ifinfomsg, and in which situation to use which, but kernel commit 88c5b5ce seams to suggest that ifinfomsg should be used. Fixing this in kernel will break compatibility, but I doubt that anybody have been using it due to this bug in the user space reference implementation, at least not without noticing this bug. That said the functionality is still fully functional in 3.9, when reversing iproute2 commit 63338dca. This could also be fixed in iproute2, but thats an ugly patch that would reintroduce rtgenmsg in iproute2, and from searching in netdev it seams like rtgenmsg usage is discouraged. I'm assuming that the only reason that Vlad implemented the kernel side to use rtgenmsg, was because iproute2 was using it at the time. Signed-off-by: Asbjoern Sloth Toennesen Reviewed-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 82d96852712..ca198c1d1d3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2384,7 +2384,7 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) struct nlattr *extfilt; u32 filter_mask = 0; - extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg), + extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct ifinfomsg), IFLA_EXT_MASK); if (extfilt) filter_mask = nla_get_u32(extfilt); -- cgit v1.2.3-18-g5258 From 30444e981ba28e892c439017fbc011d867f02a7d Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 13 May 2013 08:41:06 -0700 Subject: openvswitch: Fix bad merge resolution. git silently included an extra hunk in vport_cmd_set() during automatic merging. This code is unreachable so it does not actually introduce a problem but it is clearly incorrect. Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f7e3a0d84c4..f2ed7600084 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2076,9 +2076,6 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) ovs_notify(reply, info, &ovs_dp_vport_multicast_group); return 0; - rtnl_unlock(); - return 0; - exit_free: kfree_skb(reply); exit_unlock: -- cgit v1.2.3-18-g5258 From 42415c90ceaf50c792e29823e359463bc6d4ee05 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 30 Jul 2013 15:44:14 -0700 Subject: openvswitch: Use correct type while allocating flex array. Flex array is used to allocate hash buckets which is type struct hlist_head, but we use `struct hlist_head *` to calculate array size. Since hlist_head is of size pointer it works fine. Following patch use correct type. Signed-off-by: Pravin B Shelar Signed-off-by: Jesse Gross --- net/openvswitch/flow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 5c519b121e1..1aa84dc5877 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -240,7 +240,7 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets) struct flex_array *buckets; int i, err; - buckets = flex_array_alloc(sizeof(struct hlist_head *), + buckets = flex_array_alloc(sizeof(struct hlist_head), n_buckets, GFP_KERNEL); if (!buckets) return NULL; -- cgit v1.2.3-18-g5258 From 36bf5cc66d60868bcc10aff209defed5a7b71c1d Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 14 Aug 2013 15:50:36 -0700 Subject: openvswitch: Reset tunnel key between input and output. It doesn't make sense to output a tunnel packet using the same parameters that it was received with since that will generally just result in the packet going back to us. As a result, userspace assumes that the tunnel key is cleared when transitioning through the switch. In the majority of cases this doesn't matter since a packet is either going to a tunnel port (in which the key is overwritten with new values) or to a non-tunnel port (in which case the key is ignored). However, it's theoreticaly possible that userspace could rely on the documented behavior, so this corrects it. Signed-off-by: Jesse Gross --- net/openvswitch/actions.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 22c5f399f1c..ab101f71544 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -535,6 +535,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb) { struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts); + OVS_CB(skb)->tun_key = NULL; return do_execute_actions(dp, skb, acts->actions, acts->actions_len, false); } -- cgit v1.2.3-18-g5258 From 8a8e3d84b1719a56f9151909e80ea6ebc5b8e318 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 14 Aug 2013 23:47:11 +0200 Subject: net_sched: restore "linklayer atm" handling commit 56b765b79 ("htb: improved accuracy at high rates") broke the "linklayer atm" handling. tc class add ... htb rate X ceil Y linklayer atm The linklayer setting is implemented by modifying the rate table which is send to the kernel. No direct parameter were transferred to the kernel indicating the linklayer setting. The commit 56b765b79 ("htb: improved accuracy at high rates") removed the use of the rate table system. To keep compatible with older iproute2 utils, this patch detects the linklayer by parsing the rate table. It also supports future versions of iproute2 to send this linklayer parameter to the kernel directly. This is done by using the __reserved field in struct tc_ratespec, to convey the choosen linklayer option, but only using the lower 4 bits of this field. Linklayer detection is limited to speeds below 100Mbit/s, because at high rates the rtab is gets too inaccurate, so bad that several fields contain the same values, this resembling the ATM detect. Fields even start to contain "0" time to send, e.g. at 1000Mbit/s sending a 96 bytes packet cost "0", thus the rtab have been more broken than we first realized. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/sched/sch_api.c | 41 +++++++++++++++++++++++++++++++++++++++++ net/sched/sch_generic.c | 1 + net/sched/sch_htb.c | 13 +++++++++++++ 3 files changed, 55 insertions(+) (limited to 'net') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 281c1bded1f..51b968d3feb 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -285,6 +285,45 @@ static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind) return q; } +/* The linklayer setting were not transferred from iproute2, in older + * versions, and the rate tables lookup systems have been dropped in + * the kernel. To keep backward compatible with older iproute2 tc + * utils, we detect the linklayer setting by detecting if the rate + * table were modified. + * + * For linklayer ATM table entries, the rate table will be aligned to + * 48 bytes, thus some table entries will contain the same value. The + * mpu (min packet unit) is also encoded into the old rate table, thus + * starting from the mpu, we find low and high table entries for + * mapping this cell. If these entries contain the same value, when + * the rate tables have been modified for linklayer ATM. + * + * This is done by rounding mpu to the nearest 48 bytes cell/entry, + * and then roundup to the next cell, calc the table entry one below, + * and compare. + */ +static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab) +{ + int low = roundup(r->mpu, 48); + int high = roundup(low+1, 48); + int cell_low = low >> r->cell_log; + int cell_high = (high >> r->cell_log) - 1; + + /* rtab is too inaccurate at rates > 100Mbit/s */ + if ((r->rate > (100000000/8)) || (rtab[0] == 0)) { + pr_debug("TC linklayer: Giving up ATM detection\n"); + return TC_LINKLAYER_ETHERNET; + } + + if ((cell_high > cell_low) && (cell_high < 256) + && (rtab[cell_low] == rtab[cell_high])) { + pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n", + cell_low, cell_high, rtab[cell_high]); + return TC_LINKLAYER_ATM; + } + return TC_LINKLAYER_ETHERNET; +} + static struct qdisc_rate_table *qdisc_rtab_list; struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab) @@ -308,6 +347,8 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *ta rtab->rate = *r; rtab->refcnt = 1; memcpy(rtab->data, nla_data(tab), 1024); + if (r->linklayer == TC_LINKLAYER_UNAWARE) + r->linklayer = __detect_linklayer(r, rtab->data); rtab->next = qdisc_rtab_list; qdisc_rtab_list = rtab; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index eeb8276d7a8..48be3d5c0d9 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -909,6 +909,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r, memset(r, 0, sizeof(*r)); r->overhead = conf->overhead; r->rate_bytes_ps = conf->rate; + r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK); r->mult = 1; /* * The deal here is to replace a divide by a reciprocal one diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 45e751527df..c2178b15ca6 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1329,6 +1329,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)*arg, *parent; struct nlattr *opt = tca[TCA_OPTIONS]; + struct qdisc_rate_table *rtab = NULL, *ctab = NULL; struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_opt *hopt; @@ -1350,6 +1351,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!hopt->rate.rate || !hopt->ceil.rate) goto failure; + /* Keeping backward compatible with rate_table based iproute2 tc */ + if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) { + rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]); + if (rtab) + qdisc_put_rtab(rtab); + } + if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) { + ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]); + if (ctab) + qdisc_put_rtab(ctab); + } + if (!cl) { /* new class */ struct Qdisc *new_q; int prio; -- cgit v1.2.3-18-g5258