From 40b9397a1a61a37917b93e7d57e6f2faf3a086b4 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 14 May 2014 17:53:35 +0200 Subject: Bluetooth: Fix L2CAP LE debugfs entries permissions 0466 was probably meant to be 0644, there's no reason why everyone except root could write there. Signed-off-by: Samuel Ortiz Signed-off-by: Marcel Holtmann Cc: stable@vger.kernel.org --- net/bluetooth/l2cap_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index a1e5bb7d06e..dc4d301d3a7 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7519,9 +7519,9 @@ int __init l2cap_init(void) l2cap_debugfs = debugfs_create_file("l2cap", 0444, bt_debugfs, NULL, &l2cap_debugfs_fops); - debugfs_create_u16("l2cap_le_max_credits", 0466, bt_debugfs, + debugfs_create_u16("l2cap_le_max_credits", 0644, bt_debugfs, &le_max_credits); - debugfs_create_u16("l2cap_le_default_mps", 0466, bt_debugfs, + debugfs_create_u16("l2cap_le_default_mps", 0644, bt_debugfs, &le_default_mps); bt_6lowpan_init(); -- cgit v1.2.3-70-g09d2 From f44a5f45f544561302e855e7bd104e5f506ec01b Mon Sep 17 00:00:00 2001 From: Peter Christensen Date: Sat, 24 May 2014 21:40:12 +0200 Subject: ipvs: Fix panic due to non-linear skb Receiving a ICMP response to an IPIP packet in a non-linear skb could cause a kernel panic in __skb_pull. The problem was introduced in commit f2edb9f7706dcb2c0d9a362b2ba849efe3a97f5e ("ipvs: implement passive PMTUD for IPIP packets"). Signed-off-by: Peter Christensen Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 4f26ee46b51..3d2d2c8108c 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1392,15 +1392,19 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) if (ipip) { __be32 info = ic->un.gateway; + __u8 type = ic->type; + __u8 code = ic->code; /* Update the MTU */ if (ic->type == ICMP_DEST_UNREACH && ic->code == ICMP_FRAG_NEEDED) { struct ip_vs_dest *dest = cp->dest; u32 mtu = ntohs(ic->un.frag.mtu); + __be16 frag_off = cih->frag_off; /* Strip outer IP and ICMP, go to IPIP header */ - __skb_pull(skb, ihl + sizeof(_icmph)); + if (pskb_pull(skb, ihl + sizeof(_icmph)) == NULL) + goto ignore_ipip; offset2 -= ihl + sizeof(_icmph); skb_reset_network_header(skb); IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n", @@ -1408,7 +1412,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) ipv4_update_pmtu(skb, dev_net(skb->dev), mtu, 0, 0, 0, 0); /* Client uses PMTUD? */ - if (!(cih->frag_off & htons(IP_DF))) + if (!(frag_off & htons(IP_DF))) goto ignore_ipip; /* Prefer the resulting PMTU */ if (dest) { @@ -1427,12 +1431,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) /* Strip outer IP, ICMP and IPIP, go to IP header of * original request. */ - __skb_pull(skb, offset2); + if (pskb_pull(skb, offset2) == NULL) + goto ignore_ipip; skb_reset_network_header(skb); IP_VS_DBG(12, "Sending ICMP for %pI4->%pI4: t=%u, c=%u, i=%u\n", &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, - ic->type, ic->code, ntohl(info)); - icmp_send(skb, ic->type, ic->code, info); + type, code, ntohl(info)); + icmp_send(skb, type, code, info); /* ICMP can be shorter but anyways, account it */ ip_vs_out_stats(cp, skb); -- cgit v1.2.3-70-g09d2 From af0a171c07174661db71f92e442d4e6e90984b77 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Thu, 24 Apr 2014 03:41:26 +0800 Subject: batman-adv: fix NULL pointer dereferences MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Was introduced with 4c8755d69cbde2ec464a39c932aed0a83f9ff89f ("batman-adv: Send multicast packets to nodes with a WANT_ALL flag") Reported-by: Sven Eckelmann Signed-off-by: Marek Lindner Acked-by: Antonio Quartulli Signed-off-by: Linus Lüssing Signed-off-by: Antonio Quartulli --- net/batman-adv/multicast.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 8c7ca811de6..96b66fd30f9 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -415,7 +415,7 @@ batadv_mcast_forw_ipv4_node_get(struct batadv_priv *bat_priv) hlist_for_each_entry_rcu(tmp_orig_node, &bat_priv->mcast.want_all_ipv4_list, mcast_want_all_ipv4_node) { - if (!atomic_inc_not_zero(&orig_node->refcount)) + if (!atomic_inc_not_zero(&tmp_orig_node->refcount)) continue; orig_node = tmp_orig_node; @@ -442,7 +442,7 @@ batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv) hlist_for_each_entry_rcu(tmp_orig_node, &bat_priv->mcast.want_all_ipv6_list, mcast_want_all_ipv6_node) { - if (!atomic_inc_not_zero(&orig_node->refcount)) + if (!atomic_inc_not_zero(&tmp_orig_node->refcount)) continue; orig_node = tmp_orig_node; @@ -493,7 +493,7 @@ batadv_mcast_forw_unsnoop_node_get(struct batadv_priv *bat_priv) hlist_for_each_entry_rcu(tmp_orig_node, &bat_priv->mcast.want_all_unsnoopables_list, mcast_want_all_unsnoopables_node) { - if (!atomic_inc_not_zero(&orig_node->refcount)) + if (!atomic_inc_not_zero(&tmp_orig_node->refcount)) continue; orig_node = tmp_orig_node; -- cgit v1.2.3-70-g09d2 From 4b9b1cdf83c4facba89e0646aeac8ead679851b8 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 28 May 2014 18:03:48 +0200 Subject: net: fix wrong mac_len calculation for vlans After 1e785f48d29a ("net: Start with correct mac_len in skb_network_protocol") skb->mac_len is used as a start of the calculation in skb_network_protocol() but that is not always correct. If skb->protocol == 8021Q/AD, usually the vlan header is already inserted in the skb (i.e. vlan reorder hdr == 0). Usually when the packet enters dev_hard_xmit it has mac_len == 0 so we take 2 bytes from the destination mac address (skb->data + VLAN_HLEN) as a type in skb_network_protocol() and return vlan_depth == 4. In the case where TSO is off, then the mac_len is set but it's == 18 (ETH_HLEN + VLAN_HLEN), so skb_network_protocol() returns a type from inside the packet and offset == 22. Also make vlan_depth unsigned as suggested before. As suggested by Eric Dumazet, move the while() loop in the if() so we can avoid additional testing in fast path. Here are few netperf tests + debug printk's to illustrate: cat netperf.tso-on.reorder-on.bugged - Vlan -> device (reorder on, default, this case is okay) MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.3.1 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.00 7111.54 [ 81.605435] skb->len 65226 skb->gso_size 1448 skb->proto 0x800 skb->mac_len 0 vlan_depth 0 type 0x800 - Vlan -> device (reorder off, bad) cat netperf.tso-on.reorder-off.bugged MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.3.1 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.00 241.35 [ 204.578332] skb->len 1518 skb->gso_size 0 skb->proto 0x8100 skb->mac_len 0 vlan_depth 4 type 0x5301 0x5301 are the last two bytes of the destination mac. And if we stop TSO, we may get even the following: [ 83.343156] skb->len 2966 skb->gso_size 1448 skb->proto 0x8100 skb->mac_len 18 vlan_depth 22 type 0xb84 Because mac_len already accounts for VLAN_HLEN. After the fix: cat netperf.tso-on.reorder-off.fixed MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.3.1 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.01 5001.46 [ 81.888489] skb->len 65230 skb->gso_size 1448 skb->proto 0x8100 skb->mac_len 0 vlan_depth 18 type 0x800 CC: Vlad Yasevich CC: Eric Dumazet CC: Daniel Borkman CC: David S. Miller Fixes:1e785f48d29a ("net: Start with correct mac_len in skb_network_protocol") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/core/dev.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 9abc503b19b..fb8b0546485 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2283,8 +2283,8 @@ EXPORT_SYMBOL(skb_checksum_help); __be16 skb_network_protocol(struct sk_buff *skb, int *depth) { + unsigned int vlan_depth = skb->mac_len; __be16 type = skb->protocol; - int vlan_depth = skb->mac_len; /* Tunnel gso handlers can set protocol to ethernet. */ if (type == htons(ETH_P_TEB)) { @@ -2297,15 +2297,30 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth) type = eth->h_proto; } - while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { - struct vlan_hdr *vh; - - if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) - return 0; - - vh = (struct vlan_hdr *)(skb->data + vlan_depth); - type = vh->h_vlan_encapsulated_proto; - vlan_depth += VLAN_HLEN; + /* if skb->protocol is 802.1Q/AD then the header should already be + * present at mac_len - VLAN_HLEN (if mac_len > 0), or at + * ETH_HLEN otherwise + */ + if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { + if (vlan_depth) { + if (unlikely(WARN_ON(vlan_depth < VLAN_HLEN))) + return 0; + vlan_depth -= VLAN_HLEN; + } else { + vlan_depth = ETH_HLEN; + } + do { + struct vlan_hdr *vh; + + if (unlikely(!pskb_may_pull(skb, + vlan_depth + VLAN_HLEN))) + return 0; + + vh = (struct vlan_hdr *)(skb->data + vlan_depth); + type = vh->h_vlan_encapsulated_proto; + vlan_depth += VLAN_HLEN; + } while (type == htons(ETH_P_8021Q) || + type == htons(ETH_P_8021AD)); } *depth = vlan_depth; -- cgit v1.2.3-70-g09d2 From c65c7a306610ee7c13669a8f5601b472c19dc6f1 Mon Sep 17 00:00:00 2001 From: Jon Maxwell Date: Thu, 29 May 2014 17:27:16 +1000 Subject: bridge: notify user space after fdb update There has been a number incidents recently where customers running KVM have reported that VM hosts on different Hypervisors are unreachable. Based on pcap traces we found that the bridge was broadcasting the ARP request out onto the network. However some NICs have an inbuilt switch which on occasions were broadcasting the VMs ARP request back through the physical NIC on the Hypervisor. This resulted in the bridge changing ports and incorrectly learning that the VMs mac address was external. As a result the ARP reply was directed back onto the external network and VM never updated it's ARP cache. This patch will notify the bridge command, after a fdb has been updated to identify such port toggling. Signed-off-by: Jon Maxwell Reviewed-by: Jiri Pirko Acked-by: Toshiaki Makita Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 9203d5a1943..474d36f9334 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -487,6 +487,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, { struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; + bool fdb_modified = false; /* some users want to always flood. */ if (hold_time(br) == 0) @@ -507,10 +508,15 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, source->dev->name); } else { /* fastpath: update of existing entry */ - fdb->dst = source; + if (unlikely(source != fdb->dst)) { + fdb->dst = source; + fdb_modified = true; + } fdb->updated = jiffies; if (unlikely(added_by_user)) fdb->added_by_user = 1; + if (unlikely(fdb_modified)) + fdb_notify(br, fdb, RTM_NEWNEIGH); } } else { spin_lock(&br->hash_lock); -- cgit v1.2.3-70-g09d2 From e0d7968ab6c8bce2437b36fa7f04117e333f196d Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Mon, 26 May 2014 15:15:53 +0900 Subject: bridge: Prevent insertion of FDB entry with disallowed vlan br_handle_local_finish() is allowing us to insert an FDB entry with disallowed vlan. For example, when port 1 and 2 are communicating in vlan 10, and even if vlan 10 is disallowed on port 3, port 3 can interfere with their communication by spoofed src mac address with vlan id 10. Note: Even if it is judged that a frame should not be learned, it should not be dropped because it is destined for not forwarding layer but higher layer. See IEEE 802.1Q-2011 8.13.10. Signed-off-by: Toshiaki Makita Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/bridge/br_input.c | 4 ++-- net/bridge/br_private.h | 7 +++++++ net/bridge/br_vlan.c | 28 ++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 7985deaff52..04d6348fd53 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -147,8 +147,8 @@ static int br_handle_local_finish(struct sk_buff *skb) struct net_bridge_port *p = br_port_get_rcu(skb->dev); u16 vid = 0; - br_vlan_get_tag(skb, &vid); - if (p->flags & BR_LEARNING) + /* check if vlan is allowed, to avoid spoofing */ + if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid)) br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false); return 0; /* process further */ } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 06811d79f89..59d3a85c587 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -581,6 +581,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, struct sk_buff *skb, u16 *vid); bool br_allowed_egress(struct net_bridge *br, const struct net_port_vlans *v, const struct sk_buff *skb); +bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid); struct sk_buff *br_handle_vlan(struct net_bridge *br, const struct net_port_vlans *v, struct sk_buff *skb); @@ -648,6 +649,12 @@ static inline bool br_allowed_egress(struct net_bridge *br, return true; } +static inline bool br_should_learn(struct net_bridge_port *p, + struct sk_buff *skb, u16 *vid) +{ + return true; +} + static inline struct sk_buff *br_handle_vlan(struct net_bridge *br, const struct net_port_vlans *v, struct sk_buff *skb) diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 4a371610278..5fee2feaf29 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -241,6 +241,34 @@ bool br_allowed_egress(struct net_bridge *br, return false; } +/* Called under RCU */ +bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) +{ + struct net_bridge *br = p->br; + struct net_port_vlans *v; + + if (!br->vlan_enabled) + return true; + + v = rcu_dereference(p->vlan_info); + if (!v) + return false; + + br_vlan_get_tag(skb, vid); + if (!*vid) { + *vid = br_get_pvid(v); + if (*vid == VLAN_N_VID) + return false; + + return true; + } + + if (test_bit(*vid, v->vlan_bitmap)) + return true; + + return false; +} + /* Must be protected by RTNL. * Must be called with vid in range from 1 to 4094 inclusive. */ -- cgit v1.2.3-70-g09d2 From 39c36094d78c39e038c1e499b2364e13bce36f54 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 May 2014 08:45:14 -0700 Subject: net: fix inet_getid() and ipv6_select_ident() bugs I noticed we were sending wrong IPv4 ID in TCP flows when MTU discovery is disabled. Note how GSO/TSO packets do not have monotonically incrementing ID. 06:37:41.575531 IP (id 14227, proto: TCP (6), length: 4396) 06:37:41.575534 IP (id 14272, proto: TCP (6), length: 65212) 06:37:41.575544 IP (id 14312, proto: TCP (6), length: 57972) 06:37:41.575678 IP (id 14317, proto: TCP (6), length: 7292) 06:37:41.575683 IP (id 14361, proto: TCP (6), length: 63764) It appears I introduced this bug in linux-3.1. inet_getid() must return the old value of peer->ip_id_count, not the new one. Lets revert this part, and remove the prevention of a null identification field in IPv6 Fragment Extension Header, which is dubious and not even done properly. Fixes: 87c48fa3b463 ("ipv6: make fragment identifications less predictable") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inetpeer.h | 9 +-------- net/ipv6/output_core.c | 11 +++-------- 2 files changed, 4 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 6efe73c79c5..058271bde27 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -177,16 +177,9 @@ static inline void inet_peer_refcheck(const struct inet_peer *p) /* can be called with or without local BH being disabled */ static inline int inet_getid(struct inet_peer *p, int more) { - int old, new; more++; inet_peer_refcheck(p); - do { - old = atomic_read(&p->ip_id_count); - new = old + more; - if (!new) - new = 1; - } while (atomic_cmpxchg(&p->ip_id_count, old, new) != old); - return new; + return atomic_add_return(more, &p->ip_id_count) - more; } #endif /* _NET_INETPEER_H */ diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 6313abd53c9..56596ce390a 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -12,7 +12,7 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) { static atomic_t ipv6_fragmentation_id; struct in6_addr addr; - int old, new; + int ident; #if IS_ENABLED(CONFIG_IPV6) struct inet_peer *peer; @@ -26,15 +26,10 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) return; } #endif - do { - old = atomic_read(&ipv6_fragmentation_id); - new = old + 1; - if (!new) - new = 1; - } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old); + ident = atomic_inc_return(&ipv6_fragmentation_id); addr = rt->rt6i_dst.addr; - addr.s6_addr32[0] ^= (__force __be32)new; + addr.s6_addr32[0] ^= (__force __be32)ident; fhdr->identification = htonl(secure_ipv6_id(addr.s6_addr32)); } EXPORT_SYMBOL(ipv6_select_ident); -- cgit v1.2.3-70-g09d2 From 2d7a85f4b06e9c27ff629f07a524c48074f07f81 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 30 May 2014 11:04:00 -0700 Subject: netlink: Only check file credentials for implicit destinations It was possible to get a setuid root or setcap executable to write to it's stdout or stderr (which has been set made a netlink socket) and inadvertently reconfigure the networking stack. To prevent this we check that both the creator of the socket and the currentl applications has permission to reconfigure the network stack. Unfortunately this breaks Zebra which always uses sendto/sendmsg and creates it's socket without any privileges. To keep Zebra working don't bother checking if the creator of the socket has privilege when a destination address is specified. Instead rely exclusively on the privileges of the sender of the socket. Note from Andy: This is exactly Eric's code except for some comment clarifications and formatting fixes. Neither I nor, I think, anyone else is thrilled with this approach, but I'm hesitant to wait on a better fix since 3.15 is almost here. Note to stable maintainers: This is a mess. An earlier series of patches in 3.15 fix a rather serious security issue (CVE-2014-0181), but they did so in a way that breaks Zebra. The offending series includes: commit aa4cf9452f469f16cea8c96283b641b4576d4a7b Author: Eric W. Biederman Date: Wed Apr 23 14:28:03 2014 -0700 net: Add variants of capable for use on netlink messages If a given kernel version is missing that series of fixes, it's probably worth backporting it and this patch. if that series is present, then this fix is critical if you care about Zebra. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" Signed-off-by: Andy Lutomirski Signed-off-by: David S. Miller --- include/linux/netlink.h | 7 ++++--- net/netlink/af_netlink.c | 7 ++++++- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index f64b01787dd..034cda789a1 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -16,9 +16,10 @@ static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) } enum netlink_skb_flags { - NETLINK_SKB_MMAPED = 0x1, /* Packet data is mmaped */ - NETLINK_SKB_TX = 0x2, /* Packet was sent by userspace */ - NETLINK_SKB_DELIVERED = 0x4, /* Packet was delivered */ + NETLINK_SKB_MMAPED = 0x1, /* Packet data is mmaped */ + NETLINK_SKB_TX = 0x2, /* Packet was sent by userspace */ + NETLINK_SKB_DELIVERED = 0x4, /* Packet was delivered */ + NETLINK_SKB_DST = 0x8, /* Dst set in sendto or sendmsg */ }; struct netlink_skb_parms { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 81dca96d2be..f22757a29cd 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1373,7 +1373,9 @@ retry: bool __netlink_ns_capable(const struct netlink_skb_parms *nsp, struct user_namespace *user_ns, int cap) { - return sk_ns_capable(nsp->sk, user_ns, cap); + return ((nsp->flags & NETLINK_SKB_DST) || + file_ns_capable(nsp->sk->sk_socket->file, user_ns, cap)) && + ns_capable(user_ns, cap); } EXPORT_SYMBOL(__netlink_ns_capable); @@ -2293,6 +2295,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sk_buff *skb; int err; struct scm_cookie scm; + u32 netlink_skb_flags = 0; if (msg->msg_flags&MSG_OOB) return -EOPNOTSUPP; @@ -2314,6 +2317,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if ((dst_group || dst_portid) && !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) goto out; + netlink_skb_flags |= NETLINK_SKB_DST; } else { dst_portid = nlk->dst_portid; dst_group = nlk->dst_group; @@ -2343,6 +2347,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, NETLINK_CB(skb).portid = nlk->portid; NETLINK_CB(skb).dst_group = dst_group; NETLINK_CB(skb).creds = siocb->scm->creds; + NETLINK_CB(skb).flags = netlink_skb_flags; err = -EFAULT; if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { -- cgit v1.2.3-70-g09d2 From 0cfa5c07d6d1d7f8e710fc671c5ba1ce85e09fa4 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Fri, 30 May 2014 15:25:59 -0700 Subject: tcp: fix cwnd undo on DSACK in F-RTO This bug is discovered by an recent F-RTO issue on tcpm list https://www.ietf.org/mail-archive/web/tcpm/current/msg08794.html The bug is that currently F-RTO does not use DSACK to undo cwnd in certain cases: upon receiving an ACK after the RTO retransmission in F-RTO, and the ACK has DSACK indicating the retransmission is spurious, the sender only calls tcp_try_undo_loss() if some never retransmisted data is sacked (FLAG_ORIG_DATA_SACKED). The correct behavior is to unconditionally call tcp_try_undo_loss so the DSACK information is used properly to undo the cwnd reduction. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d6b46eb2f94..3a26b3b23f1 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2684,13 +2684,12 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) bool recovered = !before(tp->snd_una, tp->high_seq); if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ - if (flag & FLAG_ORIG_SACK_ACKED) { - /* Step 3.b. A timeout is spurious if not all data are - * lost, i.e., never-retransmitted data are (s)acked. - */ - tcp_try_undo_loss(sk, true); + /* Step 3.b. A timeout is spurious if not all data are + * lost, i.e., never-retransmitted data are (s)acked. + */ + if (tcp_try_undo_loss(sk, flag & FLAG_ORIG_SACK_ACKED)) return; - } + if (after(tp->snd_nxt, tp->high_seq) && (flag & FLAG_DATA_SACKED || is_dupack)) { tp->frto = 0; /* Loss was real: 2nd part of step 3.a */ -- cgit v1.2.3-70-g09d2 From 418c96ac151a16a5094a95d14252c92c1d47ec67 Mon Sep 17 00:00:00 2001 From: Leon Yu Date: Sun, 1 Jun 2014 05:37:25 +0000 Subject: net: filter: fix possible memory leak in __sk_prepare_filter() __sk_prepare_filter() was reworked in commit bd4cf0ed3 (net: filter: rework/optimize internal BPF interpreter's instruction set) so that it should have uncharged memory once things went wrong. However that work isn't complete. Error is handled only in __sk_migrate_filter() while memory can still leak in the error path right after sk_chk_filter(). Fixes: bd4cf0ed331a ("net: filter: rework/optimize internal BPF interpreter's instruction set") Signed-off-by: Leon Yu Acked-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 9d79ca0a6e8..4aec7b93f1a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1559,8 +1559,13 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, fp->jited = 0; err = sk_chk_filter(fp->insns, fp->len); - if (err) + if (err) { + if (sk != NULL) + sk_filter_uncharge(sk, fp); + else + kfree(fp); return ERR_PTR(err); + } /* Probe if we can JIT compile the filter and if so, do * the compilation of the filter. -- cgit v1.2.3-70-g09d2