diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-25 11:17:34 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-25 11:17:34 -0800 |
commit | 4ba9920e5e9c0e16b5ed24292d45322907bb9035 (patch) | |
tree | 7d023baea59ed0886ded1f0b6d1c6385690b88f7 /net | |
parent | 82c477669a4665eb4e52030792051e0559ee2a36 (diff) | |
parent | 8b662fe70c68282f78482dc272df0c4f355e49f5 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
1) BPF debugger and asm tool by Daniel Borkmann.
2) Speed up create/bind in AF_PACKET, also from Daniel Borkmann.
3) Correct reciprocal_divide and update users, from Hannes Frederic
Sowa and Daniel Borkmann.
4) Currently we only have a "set" operation for the hw timestamp socket
ioctl, add a "get" operation to match. From Ben Hutchings.
5) Add better trace events for debugging driver datapath problems, also
from Ben Hutchings.
6) Implement auto corking in TCP, from Eric Dumazet. Basically, if we
have a small send and a previous packet is already in the qdisc or
device queue, defer until TX completion or we get more data.
7) Allow userspace to manage ipv6 temporary addresses, from Jiri Pirko.
8) Add a qdisc bypass option for AF_PACKET sockets, from Daniel
Borkmann.
9) Share IP header compression code between Bluetooth and IEEE802154
layers, from Jukka Rissanen.
10) Fix ipv6 router reachability probing, from Jiri Benc.
11) Allow packets to be captured on macvtap devices, from Vlad Yasevich.
12) Support tunneling in GRO layer, from Jerry Chu.
13) Allow bonding to be configured fully using netlink, from Scott
Feldman.
14) Allow AF_PACKET users to obtain the VLAN TPID, just like they can
already get the TCI. From Atzm Watanabe.
15) New "Heavy Hitter" qdisc, from Terry Lam.
16) Significantly improve the IPSEC support in pktgen, from Fan Du.
17) Allow ipv4 tunnels to cache routes, just like sockets. From Tom
Herbert.
18) Add Proportional Integral Enhanced packet scheduler, from Vijay
Subramanian.
19) Allow openvswitch to mmap'd netlink, from Thomas Graf.
20) Key TCP metrics blobs also by source address, not just destination
address. From Christoph Paasch.
21) Support 10G in generic phylib. From Andy Fleming.
22) Try to short-circuit GRO flow compares using device provided RX
hash, if provided. From Tom Herbert.
The wireless and netfilter folks have been busy little bees too.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2064 commits)
net/cxgb4: Fix referencing freed adapter
ipv6: reallocate addrconf router for ipv6 address when lo device up
fib_frontend: fix possible NULL pointer dereference
rtnetlink: remove IFLA_BOND_SLAVE definition
rtnetlink: remove check for fill_slave_info in rtnl_have_link_slave_info
qlcnic: update version to 5.3.55
qlcnic: Enhance logic to calculate msix vectors.
qlcnic: Refactor interrupt coalescing code for all adapters.
qlcnic: Update poll controller code path
qlcnic: Interrupt code cleanup
qlcnic: Enhance Tx timeout debugging.
qlcnic: Use bool for rx_mac_learn.
bonding: fix u64 division
rtnetlink: add missing IFLA_BOND_AD_INFO_UNSPEC
sfc: Use the correct maximum TX DMA ring size for SFC9100
Add Shradha Shah as the sfc driver maintainer.
net/vxlan: Share RX skb de-marking and checksum checks with ovs
tulip: cleanup by using ARRAY_SIZE()
ip_tunnel: clear IPCB in ip_tunnel_xmit() in case dst_link_failure() is called
net/cxgb4: Don't retrieve stats during recovery
...
Diffstat (limited to 'net')
511 files changed, 18338 insertions, 10268 deletions
diff --git a/net/802/garp.c b/net/802/garp.c index 5d9630a0eb9..b38ee6dcba4 100644 --- a/net/802/garp.c +++ b/net/802/garp.c @@ -397,7 +397,7 @@ static void garp_join_timer_arm(struct garp_applicant *app) { unsigned long delay; - delay = (u64)msecs_to_jiffies(garp_join_time) * net_random() >> 32; + delay = (u64)msecs_to_jiffies(garp_join_time) * prandom_u32() >> 32; mod_timer(&app->join_timer, jiffies + delay); } diff --git a/net/802/hippi.c b/net/802/hippi.c index 51a1f530417..5ff2a718ddc 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -172,14 +172,14 @@ EXPORT_SYMBOL(hippi_mac_addr); int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p) { /* Never send broadcast/multicast ARP messages */ - p->mcast_probes = 0; + NEIGH_VAR_INIT(p, MCAST_PROBES, 0); /* In IPv6 unicast probes are valid even on NBMA, * because they are encapsulated in normal IPv6 protocol. * Should be a generic flag. */ if (p->tbl->family != AF_INET6) - p->ucast_probes = 0; + NEIGH_VAR_INIT(p, UCAST_PROBES, 0); return 0; } EXPORT_SYMBOL(hippi_neigh_setup_dev); diff --git a/net/802/mrp.c b/net/802/mrp.c index 3ed61621587..72db2785ef2 100644 --- a/net/802/mrp.c +++ b/net/802/mrp.c @@ -583,7 +583,7 @@ static void mrp_join_timer_arm(struct mrp_applicant *app) { unsigned long delay; - delay = (u64)msecs_to_jiffies(mrp_join_time) * net_random() >> 32; + delay = (u64)msecs_to_jiffies(mrp_join_time) * prandom_u32() >> 32; mod_timer(&app->join_timer, jiffies + delay); } diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig index b85a91fa61f..42320180967 100644 --- a/net/8021q/Kconfig +++ b/net/8021q/Kconfig @@ -6,11 +6,11 @@ config VLAN_8021Q tristate "802.1Q/802.1ad VLAN Support" ---help--- Select this and you will be able to create 802.1Q VLAN interfaces - on your ethernet interfaces. 802.1Q VLAN supports almost - everything a regular ethernet interface does, including - firewalling, bridging, and of course IP traffic. You will need - the 'vconfig' tool from the VLAN project in order to effectively - use VLANs. See the VLAN web page for more information: + on your Ethernet interfaces. 802.1Q VLAN supports almost + everything a regular Ethernet interface does, including + firewalling, bridging, and of course IP traffic. You will need + the 'ip' utility in order to effectively use VLANs. + See the VLAN web page for more information: <http://www.candelatech.com/~greear/vlan.html> To compile this code as a module, choose M here: the module diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index b3d17d1c49c..ec9909935fb 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -301,7 +301,7 @@ static void vlan_sync_address(struct net_device *dev, !ether_addr_equal(vlandev->dev_addr, dev->dev_addr)) dev_uc_add(dev, vlandev->dev_addr); - memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN); + ether_addr_copy(vlan->real_dev_addr, dev->dev_addr); } static void vlan_transfer_features(struct net_device *dev, diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 47c908f1f62..de51c48c439 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -61,7 +61,7 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb) pr_debug("%s: unable to resolve type %X addresses\n", dev->name, ntohs(veth->h_vlan_encapsulated_proto)); - memcpy(veth->h_source, dev->dev_addr, ETH_ALEN); + ether_addr_copy(veth->h_source, dev->dev_addr); break; } @@ -303,7 +303,7 @@ static int vlan_dev_open(struct net_device *dev) goto clear_allmulti; } - memcpy(vlan->real_dev_addr, real_dev->dev_addr, ETH_ALEN); + ether_addr_copy(vlan->real_dev_addr, real_dev->dev_addr); if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_join(dev); @@ -367,7 +367,7 @@ static int vlan_dev_set_mac_address(struct net_device *dev, void *p) dev_uc_del(real_dev, dev->dev_addr); out: - memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); + ether_addr_copy(dev->dev_addr, addr->sa_data); return 0; } diff --git a/net/Kconfig b/net/Kconfig index d334678c0bd..e411046a62e 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -238,12 +238,19 @@ config XPS depends on SMP default y -config NETPRIO_CGROUP +config CGROUP_NET_PRIO tristate "Network priority cgroup" depends on CGROUPS ---help--- Cgroup subsystem for use in assigning processes to network priorities on - a per-interface basis + a per-interface basis. + +config CGROUP_NET_CLASSID + boolean "Network classid cgroup" + depends on CGROUPS + ---help--- + Cgroup subsystem for use as general purpose socket classid marker that is + being used in cls_cgroup and for netfilter matching. config NET_RX_BUSY_POLL boolean diff --git a/net/Makefile b/net/Makefile index 8fa2f91517f..cbbbe6d657c 100644 --- a/net/Makefile +++ b/net/Makefile @@ -57,7 +57,7 @@ obj-$(CONFIG_CAIF) += caif/ ifneq ($(CONFIG_DCB),) obj-y += dcb/ endif -obj-$(CONFIG_IEEE802154) += ieee802154/ +obj-y += ieee802154/ obj-$(CONFIG_MAC802154) += mac802154/ ifeq ($(CONFIG_NET),y) diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 690356fa52b..d27b86dfb0e 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -40,6 +40,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/export.h> +#include <linux/etherdevice.h> int sysctl_aarp_expiry_time = AARP_EXPIRY_TIME; int sysctl_aarp_tick_time = AARP_TICK_TIME; @@ -67,7 +68,7 @@ struct aarp_entry { unsigned long expires_at; struct atalk_addr target_addr; struct net_device *dev; - char hwaddr[6]; + char hwaddr[ETH_ALEN]; unsigned short xmit_count; struct aarp_entry *next; }; @@ -134,7 +135,7 @@ static void __aarp_send_query(struct aarp_entry *a) eah->pa_len = AARP_PA_ALEN; eah->function = htons(AARP_REQUEST); - memcpy(eah->hw_src, dev->dev_addr, ETH_ALEN); + ether_addr_copy(eah->hw_src, dev->dev_addr); eah->pa_src_zero = 0; eah->pa_src_net = sat->s_net; @@ -181,7 +182,7 @@ static void aarp_send_reply(struct net_device *dev, struct atalk_addr *us, eah->pa_len = AARP_PA_ALEN; eah->function = htons(AARP_REPLY); - memcpy(eah->hw_src, dev->dev_addr, ETH_ALEN); + ether_addr_copy(eah->hw_src, dev->dev_addr); eah->pa_src_zero = 0; eah->pa_src_net = us->s_net; @@ -190,7 +191,7 @@ static void aarp_send_reply(struct net_device *dev, struct atalk_addr *us, if (!sha) memset(eah->hw_dst, '\0', ETH_ALEN); else - memcpy(eah->hw_dst, sha, ETH_ALEN); + ether_addr_copy(eah->hw_dst, sha); eah->pa_dst_zero = 0; eah->pa_dst_net = them->s_net; @@ -232,7 +233,7 @@ static void aarp_send_probe(struct net_device *dev, struct atalk_addr *us) eah->pa_len = AARP_PA_ALEN; eah->function = htons(AARP_PROBE); - memcpy(eah->hw_src, dev->dev_addr, ETH_ALEN); + ether_addr_copy(eah->hw_src, dev->dev_addr); eah->pa_src_zero = 0; eah->pa_src_net = us->s_net; @@ -790,7 +791,7 @@ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, break; /* We can fill one in - this is good. */ - memcpy(a->hwaddr, ea->hw_src, ETH_ALEN); + ether_addr_copy(a->hwaddr, ea->hw_src); __aarp_resolved(&unresolved[hash], a, hash); if (!unresolved_count) mod_timer(&aarp_timer, diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 7d424ac6e76..02806c6b2ff 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1566,7 +1566,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr { struct sock *sk = sock->sk; struct atalk_sock *at = at_sk(sk); - struct sockaddr_at *usat = (struct sockaddr_at *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_at *, usat, msg->msg_name); int flags = msg->msg_flags; int loopback = 0; struct sockaddr_at local_satalk, gsat; @@ -1764,7 +1764,7 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copied); if (!err && msg->msg_name) { - struct sockaddr_at *sat = msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_at *, sat, msg->msg_name); sat->sat_family = AF_APPLETALK; sat->sat_port = ddp->deh_sport; sat->sat_addr.s_node = ddp->deh_snode; diff --git a/net/atm/lec.c b/net/atm/lec.c index f23916be18f..5a2f602d07e 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -521,7 +521,7 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type, if (data != NULL) mesg->sizeoftlvs = data->len; if (mac_addr) - memcpy(&mesg->content.normal.mac_addr, mac_addr, ETH_ALEN); + ether_addr_copy(mesg->content.normal.mac_addr, mac_addr); else mesg->content.normal.targetless_le_arp = 1; if (atm_addr) @@ -1565,7 +1565,7 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv, pr_info("LEC: Arp entry kmalloc failed\n"); return NULL; } - memcpy(to_return->mac_addr, mac_addr, ETH_ALEN); + ether_addr_copy(to_return->mac_addr, mac_addr); INIT_HLIST_NODE(&to_return->next); setup_timer(&to_return->timer, lec_arp_expire_arp, (unsigned long)to_return); @@ -1887,7 +1887,8 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr, entry = tmp; } else { entry->status = ESI_FORWARD_DIRECT; - memcpy(entry->mac_addr, mac_addr, ETH_ALEN); + ether_addr_copy(entry->mac_addr, + mac_addr); entry->last_used = jiffies; lec_arp_add(priv, entry); } @@ -2263,7 +2264,7 @@ lec_arp_check_empties(struct lec_priv *priv, &priv->lec_arp_empty_ones, next) { if (vcc == entry->vcc) { del_timer(&entry->timer); - memcpy(entry->mac_addr, src, ETH_ALEN); + ether_addr_copy(entry->mac_addr, src); entry->status = ESI_FORWARD_DIRECT; entry->last_used = jiffies; /* We might have got an entry */ diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 3af12755cd0..b71ff6b234f 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -478,7 +478,7 @@ static const uint8_t *copy_macs(struct mpoa_client *mpc, return NULL; } } - memcpy(mpc->mps_macs, router_mac, ETH_ALEN); + ether_addr_copy(mpc->mps_macs, router_mac); tlvs += 20; if (device_type == MPS_AND_MPC) tlvs += 20; if (mps_macs > 0) memcpy(mpc->mps_macs, tlvs, mps_macs*ETH_ALEN); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 7bb1605bdfd..c35c3f48fc0 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1435,7 +1435,7 @@ out: static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) { - struct sockaddr_ax25 *usax = (struct sockaddr_ax25 *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_ax25 *, usax, msg->msg_name); struct sock *sk = sock->sk; struct sockaddr_ax25 sax; struct sk_buff *skb; @@ -1640,7 +1640,7 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock, ax25_digi digi; ax25_address src; const unsigned char *mac = skb_mac_header(skb); - struct sockaddr_ax25 *sax = msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_ax25 *, sax, msg->msg_name); memset(sax, 0, sizeof(struct full_sockaddr_ax25)); ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL, diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index 4f4aabbd8ea..42df18f877e 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -1,5 +1,5 @@ # -# Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +# Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: # # Marek Lindner, Simon Wunderlich # @@ -13,9 +13,7 @@ # General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -# 02110-1301, USA +# along with this program; if not, see <http://www.gnu.org/licenses/>. # obj-$(CONFIG_BATMAN_ADV) += batman-adv.o diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h index a4808c29ea3..4e49666f8c6 100644 --- a/net/batman-adv/bat_algo.h +++ b/net/batman-adv/bat_algo.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_BAT_ALGO_H_ diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index b9c8a6eedf4..512159bf607 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -274,7 +272,14 @@ batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, if (!neigh_node) goto out; - spin_lock_init(&neigh_node->bat_iv.lq_update_lock); + if (!atomic_inc_not_zero(&hard_iface->refcount)) { + kfree(neigh_node); + neigh_node = NULL; + goto out; + } + + neigh_node->orig_node = orig_neigh; + neigh_node->if_incoming = hard_iface; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Creating new neighbor %pM for orig_node %pM on interface %s\n", @@ -461,17 +466,9 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, /* send a batman ogm packet */ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) { - struct batadv_hard_iface *hard_iface; struct net_device *soft_iface; struct batadv_priv *bat_priv; struct batadv_hard_iface *primary_if = NULL; - struct batadv_ogm_packet *batadv_ogm_packet; - unsigned char directlink; - uint8_t *packet_pos; - - packet_pos = forw_packet->skb->data; - batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos; - directlink = (batadv_ogm_packet->flags & BATADV_DIRECTLINK ? 1 : 0); if (!forw_packet->if_incoming) { pr_err("Error - can't forward packet: incoming iface not specified\n"); @@ -481,59 +478,48 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) soft_iface = forw_packet->if_incoming->soft_iface; bat_priv = netdev_priv(soft_iface); - if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE) + if (WARN_ON(!forw_packet->if_outgoing)) goto out; - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) + if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface)) goto out; - /* multihomed peer assumed - * non-primary OGMs are only broadcasted on their interface - */ - if ((directlink && (batadv_ogm_packet->ttl == 1)) || - (forw_packet->own && (forw_packet->if_incoming != primary_if))) { - /* FIXME: what about aggregated packets ? */ - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "%s packet (originator %pM, seqno %u, TTL %d) on interface %s [%pM]\n", - (forw_packet->own ? "Sending own" : "Forwarding"), - batadv_ogm_packet->orig, - ntohl(batadv_ogm_packet->seqno), - batadv_ogm_packet->ttl, - forw_packet->if_incoming->net_dev->name, - forw_packet->if_incoming->net_dev->dev_addr); - - /* skb is only used once and than forw_packet is free'd */ - batadv_send_skb_packet(forw_packet->skb, - forw_packet->if_incoming, - batadv_broadcast_addr); - forw_packet->skb = NULL; - + if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE) goto out; - } - /* broadcast on every interface */ - rcu_read_lock(); - list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->soft_iface != soft_iface) - continue; + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; - batadv_iv_ogm_send_to_if(forw_packet, hard_iface); - } - rcu_read_unlock(); + /* only for one specific outgoing interface */ + batadv_iv_ogm_send_to_if(forw_packet, forw_packet->if_outgoing); out: if (primary_if) batadv_hardif_free_ref(primary_if); } -/* return true if new_packet can be aggregated with forw_packet */ +/** + * batadv_iv_ogm_can_aggregate - find out if an OGM can be aggregated on an + * existing forward packet + * @new_bat_ogm_packet: OGM packet to be aggregated + * @bat_priv: the bat priv with all the soft interface information + * @packet_len: (total) length of the OGM + * @send_time: timestamp (jiffies) when the packet is to be sent + * @direktlink: true if this is a direct link packet + * @if_incoming: interface where the packet was received + * @if_outgoing: interface for which the retransmission should be considered + * @forw_packet: the forwarded packet which should be checked + * + * Returns true if new_packet can be aggregated with forw_packet + */ static bool batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, struct batadv_priv *bat_priv, int packet_len, unsigned long send_time, bool directlink, const struct batadv_hard_iface *if_incoming, + const struct batadv_hard_iface *if_outgoing, const struct batadv_forw_packet *forw_packet) { struct batadv_ogm_packet *batadv_ogm_packet; @@ -567,6 +553,10 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, if (!primary_if) goto out; + /* packet is not leaving on the same interface. */ + if (forw_packet->if_outgoing != if_outgoing) + goto out; + /* packets without direct link flag and high TTL * are flooded through the net */ @@ -608,11 +598,22 @@ out: return res; } -/* create a new aggregated packet and add this packet to it */ +/** + * batadv_iv_ogm_aggregate_new - create a new aggregated packet and add this + * packet to it. + * @packet_buff: pointer to the OGM + * @packet_len: (total) length of the OGM + * @send_time: timestamp (jiffies) when the packet is to be sent + * @direct_link: whether this OGM has direct link status + * @if_incoming: interface where the packet was received + * @if_outgoing: interface for which the retransmission should be considered + * @own_packet: true if it is a self-generated ogm + */ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, int packet_len, unsigned long send_time, bool direct_link, struct batadv_hard_iface *if_incoming, + struct batadv_hard_iface *if_outgoing, int own_packet) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); @@ -623,6 +624,9 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, if (!atomic_inc_not_zero(&if_incoming->refcount)) return; + if (!atomic_inc_not_zero(&if_outgoing->refcount)) + goto out_free_incoming; + /* own packet should always be scheduled */ if (!own_packet) { if (!batadv_atomic_dec_not_zero(&bat_priv->batman_queue_left)) { @@ -663,6 +667,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, forw_packet_aggr->own = own_packet; forw_packet_aggr->if_incoming = if_incoming; + forw_packet_aggr->if_outgoing = if_outgoing; forw_packet_aggr->num_packets = 0; forw_packet_aggr->direct_link_flags = BATADV_NO_FLAGS; forw_packet_aggr->send_time = send_time; @@ -685,6 +690,8 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, return; out: + batadv_hardif_free_ref(if_outgoing); +out_free_incoming: batadv_hardif_free_ref(if_incoming); } @@ -708,10 +715,21 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr, } } +/** + * batadv_iv_ogm_queue_add - queue up an OGM for transmission + * @bat_priv: the bat priv with all the soft interface information + * @packet_buff: pointer to the OGM + * @packet_len: (total) length of the OGM + * @if_incoming: interface where the packet was received + * @if_outgoing: interface for which the retransmission should be considered + * @own_packet: true if it is a self-generated ogm + * @send_time: timestamp (jiffies) when the packet is to be sent + */ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, unsigned char *packet_buff, int packet_len, struct batadv_hard_iface *if_incoming, + struct batadv_hard_iface *if_outgoing, int own_packet, unsigned long send_time) { /* _aggr -> pointer to the packet we want to aggregate with @@ -737,6 +755,7 @@ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, bat_priv, packet_len, send_time, direct_link, if_incoming, + if_outgoing, forw_packet_pos)) { forw_packet_aggr = forw_packet_pos; break; @@ -760,7 +779,8 @@ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, batadv_iv_ogm_aggregate_new(packet_buff, packet_len, send_time, direct_link, - if_incoming, own_packet); + if_incoming, if_outgoing, + own_packet); } else { batadv_iv_ogm_aggregate(forw_packet_aggr, packet_buff, packet_len, direct_link); @@ -773,7 +793,8 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, struct batadv_ogm_packet *batadv_ogm_packet, bool is_single_hop_neigh, bool is_from_best_next_hop, - struct batadv_hard_iface *if_incoming) + struct batadv_hard_iface *if_incoming, + struct batadv_hard_iface *if_outgoing) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); uint16_t tvlv_len; @@ -818,7 +839,8 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, batadv_iv_ogm_queue_add(bat_priv, (unsigned char *)batadv_ogm_packet, BATADV_OGM_HLEN + tvlv_len, - if_incoming, 0, batadv_iv_ogm_fwd_send_time()); + if_incoming, if_outgoing, 0, + batadv_iv_ogm_fwd_send_time()); } /** @@ -863,10 +885,11 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); unsigned char **ogm_buff = &hard_iface->bat_iv.ogm_buff; struct batadv_ogm_packet *batadv_ogm_packet; - struct batadv_hard_iface *primary_if; + struct batadv_hard_iface *primary_if, *tmp_hard_iface; int *ogm_buff_len = &hard_iface->bat_iv.ogm_buff_len; uint32_t seqno; uint16_t tvlv_len = 0; + unsigned long send_time; primary_if = batadv_primary_if_get_selected(bat_priv); @@ -889,23 +912,60 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) atomic_inc(&hard_iface->bat_iv.ogm_seqno); batadv_iv_ogm_slide_own_bcast_window(hard_iface); - batadv_iv_ogm_queue_add(bat_priv, hard_iface->bat_iv.ogm_buff, - hard_iface->bat_iv.ogm_buff_len, hard_iface, 1, - batadv_iv_ogm_emit_send_time(bat_priv)); + send_time = batadv_iv_ogm_emit_send_time(bat_priv); + + if (hard_iface != primary_if) { + /* OGMs from secondary interfaces are only scheduled on their + * respective interfaces. + */ + batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, *ogm_buff_len, + hard_iface, hard_iface, 1, send_time); + goto out; + } + + /* OGMs from primary interfaces are scheduled on all + * interfaces. + */ + rcu_read_lock(); + list_for_each_entry_rcu(tmp_hard_iface, &batadv_hardif_list, list) { + if (tmp_hard_iface->soft_iface != hard_iface->soft_iface) + continue; + batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, + *ogm_buff_len, hard_iface, + tmp_hard_iface, 1, send_time); + } + rcu_read_unlock(); + +out: if (primary_if) batadv_hardif_free_ref(primary_if); } +/** + * batadv_iv_ogm_orig_update - use OGM to update corresponding data in an + * originator + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: the orig node who originally emitted the ogm packet + * @orig_ifinfo: ifinfo for the outgoing interface of the orig_node + * @ethhdr: Ethernet header of the OGM + * @batadv_ogm_packet: the ogm packet + * @if_incoming: interface where the packet was received + * @if_outgoing: interface for which the retransmission should be considered + * @dup_status: the duplicate status of this ogm packet. + */ static void batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, + struct batadv_orig_ifinfo *orig_ifinfo, const struct ethhdr *ethhdr, const struct batadv_ogm_packet *batadv_ogm_packet, struct batadv_hard_iface *if_incoming, - const unsigned char *tt_buff, + struct batadv_hard_iface *if_outgoing, enum batadv_dup_status dup_status) { + struct batadv_neigh_ifinfo *neigh_ifinfo = NULL; + struct batadv_neigh_ifinfo *router_ifinfo = NULL; struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; struct batadv_neigh_node *router = NULL; struct batadv_orig_node *orig_node_tmp; @@ -933,12 +993,21 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, if (dup_status != BATADV_NO_DUP) continue; - spin_lock_bh(&tmp_neigh_node->bat_iv.lq_update_lock); - batadv_ring_buffer_set(tmp_neigh_node->bat_iv.tq_recv, - &tmp_neigh_node->bat_iv.tq_index, 0); - tq_avg = batadv_ring_buffer_avg(tmp_neigh_node->bat_iv.tq_recv); - tmp_neigh_node->bat_iv.tq_avg = tq_avg; - spin_unlock_bh(&tmp_neigh_node->bat_iv.lq_update_lock); + /* only update the entry for this outgoing interface */ + neigh_ifinfo = batadv_neigh_ifinfo_get(tmp_neigh_node, + if_outgoing); + if (!neigh_ifinfo) + continue; + + spin_lock_bh(&tmp_neigh_node->ifinfo_lock); + batadv_ring_buffer_set(neigh_ifinfo->bat_iv.tq_recv, + &neigh_ifinfo->bat_iv.tq_index, 0); + tq_avg = batadv_ring_buffer_avg(neigh_ifinfo->bat_iv.tq_recv); + neigh_ifinfo->bat_iv.tq_avg = tq_avg; + spin_unlock_bh(&tmp_neigh_node->ifinfo_lock); + + batadv_neigh_ifinfo_free_ref(neigh_ifinfo); + neigh_ifinfo = NULL; } if (!neigh_node) { @@ -960,39 +1029,49 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, "Updating existing last-hop neighbor of originator\n"); rcu_read_unlock(); + neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing); + if (!neigh_ifinfo) + goto out; neigh_node->last_seen = jiffies; - spin_lock_bh(&neigh_node->bat_iv.lq_update_lock); - batadv_ring_buffer_set(neigh_node->bat_iv.tq_recv, - &neigh_node->bat_iv.tq_index, + spin_lock_bh(&neigh_node->ifinfo_lock); + batadv_ring_buffer_set(neigh_ifinfo->bat_iv.tq_recv, + &neigh_ifinfo->bat_iv.tq_index, batadv_ogm_packet->tq); - tq_avg = batadv_ring_buffer_avg(neigh_node->bat_iv.tq_recv); - neigh_node->bat_iv.tq_avg = tq_avg; - spin_unlock_bh(&neigh_node->bat_iv.lq_update_lock); + tq_avg = batadv_ring_buffer_avg(neigh_ifinfo->bat_iv.tq_recv); + neigh_ifinfo->bat_iv.tq_avg = tq_avg; + spin_unlock_bh(&neigh_node->ifinfo_lock); if (dup_status == BATADV_NO_DUP) { - orig_node->last_ttl = batadv_ogm_packet->ttl; - neigh_node->last_ttl = batadv_ogm_packet->ttl; + orig_ifinfo->last_ttl = batadv_ogm_packet->ttl; + neigh_ifinfo->last_ttl = batadv_ogm_packet->ttl; } - batadv_bonding_candidate_add(bat_priv, orig_node, neigh_node); - /* if this neighbor already is our next hop there is nothing * to change */ - router = batadv_orig_node_get_router(orig_node); + router = batadv_orig_router_get(orig_node, if_outgoing); if (router == neigh_node) goto out; - /* if this neighbor does not offer a better TQ we won't consider it */ - if (router && (router->bat_iv.tq_avg > neigh_node->bat_iv.tq_avg)) - goto out; + if (router) { + router_ifinfo = batadv_neigh_ifinfo_get(router, if_outgoing); + if (!router_ifinfo) + goto out; + + /* if this neighbor does not offer a better TQ we won't + * consider it + */ + if (router_ifinfo->bat_iv.tq_avg > neigh_ifinfo->bat_iv.tq_avg) + goto out; + } /* if the TQ is the same and the link not more symmetric we * won't consider it either */ - if (router && (neigh_node->bat_iv.tq_avg == router->bat_iv.tq_avg)) { + if (router_ifinfo && + (neigh_ifinfo->bat_iv.tq_avg == router_ifinfo->bat_iv.tq_avg)) { orig_node_tmp = router->orig_node; spin_lock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock); if_num = router->if_incoming->if_num; @@ -1009,7 +1088,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, goto out; } - batadv_update_route(bat_priv, orig_node, neigh_node); + batadv_update_route(bat_priv, orig_node, if_outgoing, neigh_node); goto out; unlock: @@ -1019,20 +1098,37 @@ out: batadv_neigh_node_free_ref(neigh_node); if (router) batadv_neigh_node_free_ref(router); + if (neigh_ifinfo) + batadv_neigh_ifinfo_free_ref(neigh_ifinfo); + if (router_ifinfo) + batadv_neigh_ifinfo_free_ref(router_ifinfo); } +/** + * batadv_iv_ogm_calc_tq - calculate tq for current received ogm packet + * @orig_node: the orig node who originally emitted the ogm packet + * @orig_neigh_node: the orig node struct of the neighbor who sent the packet + * @batadv_ogm_packet: the ogm packet + * @if_incoming: interface where the packet was received + * @if_outgoing: interface for which the retransmission should be considered + * + * Returns 1 if the link can be considered bidirectional, 0 otherwise + */ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, struct batadv_orig_node *orig_neigh_node, struct batadv_ogm_packet *batadv_ogm_packet, - struct batadv_hard_iface *if_incoming) + struct batadv_hard_iface *if_incoming, + struct batadv_hard_iface *if_outgoing) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node; + struct batadv_neigh_ifinfo *neigh_ifinfo; uint8_t total_count; uint8_t orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own; unsigned int neigh_rq_inv_cube, neigh_rq_max_cube; int tq_asym_penalty, inv_asym_penalty, if_num, ret = 0; unsigned int combined_tq; + int tq_iface_penalty; /* find corresponding one hop neighbor */ rcu_read_lock(); @@ -1072,7 +1168,13 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); if_num = if_incoming->if_num; orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num]; - neigh_rq_count = neigh_node->bat_iv.real_packet_count; + neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing); + if (neigh_ifinfo) { + neigh_rq_count = neigh_ifinfo->bat_iv.real_packet_count; + batadv_neigh_ifinfo_free_ref(neigh_ifinfo); + } else { + neigh_rq_count = 0; + } spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); /* pay attention to not get a value bigger than 100 % */ @@ -1108,15 +1210,31 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, inv_asym_penalty /= neigh_rq_max_cube; tq_asym_penalty = BATADV_TQ_MAX_VALUE - inv_asym_penalty; - combined_tq = batadv_ogm_packet->tq * tq_own * tq_asym_penalty; - combined_tq /= BATADV_TQ_MAX_VALUE * BATADV_TQ_MAX_VALUE; + /* penalize if the OGM is forwarded on the same interface. WiFi + * interfaces and other half duplex devices suffer from throughput + * drops as they can't send and receive at the same time. + */ + tq_iface_penalty = BATADV_TQ_MAX_VALUE; + if (if_outgoing && (if_incoming == if_outgoing) && + batadv_is_wifi_netdev(if_outgoing->net_dev)) + tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE, + bat_priv); + + combined_tq = batadv_ogm_packet->tq * + tq_own * + tq_asym_penalty * + tq_iface_penalty; + combined_tq /= BATADV_TQ_MAX_VALUE * + BATADV_TQ_MAX_VALUE * + BATADV_TQ_MAX_VALUE; batadv_ogm_packet->tq = combined_tq; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "bidirectional: orig = %-15pM neigh = %-15pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, total tq: %3i\n", + "bidirectional: orig = %-15pM neigh = %-15pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n", orig_node->orig, orig_neigh_node->orig, total_count, - neigh_rq_count, tq_own, - tq_asym_penalty, batadv_ogm_packet->tq); + neigh_rq_count, tq_own, tq_asym_penalty, tq_iface_penalty, + batadv_ogm_packet->tq, if_incoming->net_dev->name, + if_outgoing ? if_outgoing->net_dev->name : "DEFAULT"); /* if link has the minimum required transmission quality * consider it bidirectional @@ -1136,17 +1254,21 @@ out: * @ethhdr: ethernet header of the packet * @batadv_ogm_packet: OGM packet to be considered * @if_incoming: interface on which the OGM packet was received + * @if_outgoing: interface for which the retransmission should be considered * * Returns duplicate status as enum batadv_dup_status */ static enum batadv_dup_status batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, const struct batadv_ogm_packet *batadv_ogm_packet, - const struct batadv_hard_iface *if_incoming) + const struct batadv_hard_iface *if_incoming, + struct batadv_hard_iface *if_outgoing) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_orig_node *orig_node; - struct batadv_neigh_node *tmp_neigh_node; + struct batadv_orig_ifinfo *orig_ifinfo = NULL; + struct batadv_neigh_node *neigh_node; + struct batadv_neigh_ifinfo *neigh_ifinfo; int is_dup; int32_t seq_diff; int need_update = 0; @@ -1161,27 +1283,37 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, if (!orig_node) return BATADV_NO_DUP; + orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing); + if (WARN_ON(!orig_ifinfo)) { + batadv_orig_node_free_ref(orig_node); + return 0; + } + spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); - seq_diff = seqno - orig_node->last_real_seqno; + seq_diff = seqno - orig_ifinfo->last_real_seqno; /* signalize caller that the packet is to be dropped. */ if (!hlist_empty(&orig_node->neigh_list) && batadv_window_protected(bat_priv, seq_diff, - &orig_node->batman_seqno_reset)) { + &orig_ifinfo->batman_seqno_reset)) { ret = BATADV_PROTECTED; goto out; } rcu_read_lock(); - hlist_for_each_entry_rcu(tmp_neigh_node, - &orig_node->neigh_list, list) { - neigh_addr = tmp_neigh_node->addr; - is_dup = batadv_test_bit(tmp_neigh_node->bat_iv.real_bits, - orig_node->last_real_seqno, + hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) { + neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, + if_outgoing); + if (!neigh_ifinfo) + continue; + + neigh_addr = neigh_node->addr; + is_dup = batadv_test_bit(neigh_ifinfo->bat_iv.real_bits, + orig_ifinfo->last_real_seqno, seqno); if (batadv_compare_eth(neigh_addr, ethhdr->h_source) && - tmp_neigh_node->if_incoming == if_incoming) { + neigh_node->if_incoming == if_incoming) { set_mark = 1; if (is_dup) ret = BATADV_NEIGH_DUP; @@ -1192,173 +1324,78 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, } /* if the window moved, set the update flag. */ - bitmap = tmp_neigh_node->bat_iv.real_bits; + bitmap = neigh_ifinfo->bat_iv.real_bits; need_update |= batadv_bit_get_packet(bat_priv, bitmap, seq_diff, set_mark); - packet_count = bitmap_weight(tmp_neigh_node->bat_iv.real_bits, + packet_count = bitmap_weight(bitmap, BATADV_TQ_LOCAL_WINDOW_SIZE); - tmp_neigh_node->bat_iv.real_packet_count = packet_count; + neigh_ifinfo->bat_iv.real_packet_count = packet_count; + batadv_neigh_ifinfo_free_ref(neigh_ifinfo); } rcu_read_unlock(); if (need_update) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "updating last_seqno: old %u, new %u\n", - orig_node->last_real_seqno, seqno); - orig_node->last_real_seqno = seqno; + "%s updating last_seqno: old %u, new %u\n", + if_outgoing ? if_outgoing->net_dev->name : "DEFAULT", + orig_ifinfo->last_real_seqno, seqno); + orig_ifinfo->last_real_seqno = seqno; } out: spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); batadv_orig_node_free_ref(orig_node); + if (orig_ifinfo) + batadv_orig_ifinfo_free_ref(orig_ifinfo); return ret; } -static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, - struct batadv_ogm_packet *batadv_ogm_packet, - const unsigned char *tt_buff, - struct batadv_hard_iface *if_incoming) + +/** + * batadv_iv_ogm_process_per_outif - process a batman iv OGM for an outgoing if + * @skb: the skb containing the OGM + * @orig_node: the (cached) orig node for the originator of this OGM + * @if_incoming: the interface where this packet was received + * @if_outgoing: the interface for which the packet should be considered + */ +static void +batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, + struct batadv_orig_node *orig_node, + struct batadv_hard_iface *if_incoming, + struct batadv_hard_iface *if_outgoing) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); - struct batadv_hard_iface *hard_iface; - struct batadv_orig_node *orig_neigh_node, *orig_node, *orig_node_tmp; struct batadv_neigh_node *router = NULL, *router_router = NULL; + struct batadv_orig_node *orig_neigh_node; + struct batadv_orig_ifinfo *orig_ifinfo; struct batadv_neigh_node *orig_neigh_router = NULL; - int has_directlink_flag; - int is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0; - int is_bidirect; - bool is_single_hop_neigh = false; - bool is_from_best_next_hop = false; - int sameseq, similar_ttl; + struct batadv_neigh_ifinfo *router_ifinfo = NULL; + struct batadv_ogm_packet *ogm_packet; enum batadv_dup_status dup_status; - uint32_t if_incoming_seqno; + bool is_from_best_next_hop = false; + bool is_single_hop_neigh = false; + bool sameseq, similar_ttl; + struct sk_buff *skb_priv; + struct ethhdr *ethhdr; uint8_t *prev_sender; + int is_bidirect; - /* Silently drop when the batman packet is actually not a - * correct packet. - * - * This might happen if a packet is padded (e.g. Ethernet has a - * minimum frame length of 64 byte) and the aggregation interprets - * it as an additional length. - * - * TODO: A more sane solution would be to have a bit in the - * batadv_ogm_packet to detect whether the packet is the last - * packet in an aggregation. Here we expect that the padding - * is always zero (or not 0x01) + /* create a private copy of the skb, as some functions change tq value + * and/or flags. */ - if (batadv_ogm_packet->packet_type != BATADV_IV_OGM) + skb_priv = skb_copy(skb, GFP_ATOMIC); + if (!skb_priv) return; - /* could be changed by schedule_own_packet() */ - if_incoming_seqno = atomic_read(&if_incoming->bat_iv.ogm_seqno); - - if (batadv_ogm_packet->flags & BATADV_DIRECTLINK) - has_directlink_flag = 1; - else - has_directlink_flag = 0; + ethhdr = eth_hdr(skb_priv); + ogm_packet = (struct batadv_ogm_packet *)(skb_priv->data + ogm_offset); - if (batadv_compare_eth(ethhdr->h_source, batadv_ogm_packet->orig)) + dup_status = batadv_iv_ogm_update_seqnos(ethhdr, ogm_packet, + if_incoming, if_outgoing); + if (batadv_compare_eth(ethhdr->h_source, ogm_packet->orig)) is_single_hop_neigh = true; - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Received BATMAN packet via NB: %pM, IF: %s [%pM] (from OG: %pM, via prev OG: %pM, seqno %u, tq %d, TTL %d, V %d, IDF %d)\n", - ethhdr->h_source, if_incoming->net_dev->name, - if_incoming->net_dev->dev_addr, batadv_ogm_packet->orig, - batadv_ogm_packet->prev_sender, - ntohl(batadv_ogm_packet->seqno), batadv_ogm_packet->tq, - batadv_ogm_packet->ttl, - batadv_ogm_packet->version, has_directlink_flag); - - rcu_read_lock(); - list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->if_status != BATADV_IF_ACTIVE) - continue; - - if (hard_iface->soft_iface != if_incoming->soft_iface) - continue; - - if (batadv_compare_eth(ethhdr->h_source, - hard_iface->net_dev->dev_addr)) - is_my_addr = 1; - - if (batadv_compare_eth(batadv_ogm_packet->orig, - hard_iface->net_dev->dev_addr)) - is_my_orig = 1; - - if (batadv_compare_eth(batadv_ogm_packet->prev_sender, - hard_iface->net_dev->dev_addr)) - is_my_oldorig = 1; - } - rcu_read_unlock(); - - if (is_my_addr) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Drop packet: received my own broadcast (sender: %pM)\n", - ethhdr->h_source); - return; - } - - if (is_my_orig) { - unsigned long *word; - int offset; - int32_t bit_pos; - int16_t if_num; - uint8_t *weight; - - orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv, - ethhdr->h_source); - if (!orig_neigh_node) - return; - - /* neighbor has to indicate direct link and it has to - * come via the corresponding interface - * save packet seqno for bidirectional check - */ - if (has_directlink_flag && - batadv_compare_eth(if_incoming->net_dev->dev_addr, - batadv_ogm_packet->orig)) { - if_num = if_incoming->if_num; - offset = if_num * BATADV_NUM_WORDS; - - spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); - word = &(orig_neigh_node->bat_iv.bcast_own[offset]); - bit_pos = if_incoming_seqno - 2; - bit_pos -= ntohl(batadv_ogm_packet->seqno); - batadv_set_bit(word, bit_pos); - weight = &orig_neigh_node->bat_iv.bcast_own_sum[if_num]; - *weight = bitmap_weight(word, - BATADV_TQ_LOCAL_WINDOW_SIZE); - spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); - } - - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Drop packet: originator packet from myself (via neighbor)\n"); - batadv_orig_node_free_ref(orig_neigh_node); - return; - } - - if (is_my_oldorig) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Drop packet: ignoring all rebroadcast echos (sender: %pM)\n", - ethhdr->h_source); - return; - } - - if (batadv_ogm_packet->flags & BATADV_NOT_BEST_NEXT_HOP) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Drop packet: ignoring all packets not forwarded from the best next hop (sender: %pM)\n", - ethhdr->h_source); - return; - } - - orig_node = batadv_iv_ogm_orig_get(bat_priv, batadv_ogm_packet->orig); - if (!orig_node) - return; - - dup_status = batadv_iv_ogm_update_seqnos(ethhdr, batadv_ogm_packet, - if_incoming); - if (dup_status == BATADV_PROTECTED) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: packet within seqno protection time (sender: %pM)\n", @@ -1366,27 +1403,28 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, goto out; } - if (batadv_ogm_packet->tq == 0) { + if (ogm_packet->tq == 0) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: originator packet with tq equal 0\n"); goto out; } - router = batadv_orig_node_get_router(orig_node); + router = batadv_orig_router_get(orig_node, if_outgoing); if (router) { - orig_node_tmp = router->orig_node; - router_router = batadv_orig_node_get_router(orig_node_tmp); + router_router = batadv_orig_router_get(router->orig_node, + if_outgoing); + router_ifinfo = batadv_neigh_ifinfo_get(router, if_outgoing); } - if ((router && router->bat_iv.tq_avg != 0) && + if ((router_ifinfo && router_ifinfo->bat_iv.tq_avg != 0) && (batadv_compare_eth(router->addr, ethhdr->h_source))) is_from_best_next_hop = true; - prev_sender = batadv_ogm_packet->prev_sender; + prev_sender = ogm_packet->prev_sender; /* avoid temporary routing loops */ if (router && router_router && (batadv_compare_eth(router->addr, prev_sender)) && - !(batadv_compare_eth(batadv_ogm_packet->orig, prev_sender)) && + !(batadv_compare_eth(ogm_packet->orig, prev_sender)) && (batadv_compare_eth(router->addr, router_router->addr))) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: ignoring all rebroadcast packets that may make me loop (sender: %pM)\n", @@ -1394,7 +1432,8 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, goto out; } - batadv_tvlv_ogm_receive(bat_priv, batadv_ogm_packet, orig_node); + if (if_outgoing == BATADV_IF_DEFAULT) + batadv_tvlv_ogm_receive(bat_priv, ogm_packet, orig_node); /* if sender is a direct neighbor the sender mac equals * originator mac @@ -1410,9 +1449,10 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, /* Update nc_nodes of the originator */ batadv_nc_update_nc_node(bat_priv, orig_node, orig_neigh_node, - batadv_ogm_packet, is_single_hop_neigh); + ogm_packet, is_single_hop_neigh); - orig_neigh_router = batadv_orig_node_get_router(orig_neigh_node); + orig_neigh_router = batadv_orig_router_get(orig_neigh_node, + if_outgoing); /* drop packet if sender is not a direct neighbor and if we * don't route towards it @@ -1424,28 +1464,48 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, } is_bidirect = batadv_iv_ogm_calc_tq(orig_node, orig_neigh_node, - batadv_ogm_packet, if_incoming); - - batadv_bonding_save_primary(orig_node, orig_neigh_node, - batadv_ogm_packet); + ogm_packet, if_incoming, + if_outgoing); /* update ranking if it is not a duplicate or has the same * seqno and similar ttl as the non-duplicate */ - sameseq = orig_node->last_real_seqno == ntohl(batadv_ogm_packet->seqno); - similar_ttl = orig_node->last_ttl - 3 <= batadv_ogm_packet->ttl; + orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing); + if (!orig_ifinfo) + goto out_neigh; + + sameseq = orig_ifinfo->last_real_seqno == ntohl(ogm_packet->seqno); + similar_ttl = (orig_ifinfo->last_ttl - 3) <= ogm_packet->ttl; + if (is_bidirect && ((dup_status == BATADV_NO_DUP) || - (sameseq && similar_ttl))) - batadv_iv_ogm_orig_update(bat_priv, orig_node, ethhdr, - batadv_ogm_packet, if_incoming, - tt_buff, dup_status); + (sameseq && similar_ttl))) { + batadv_iv_ogm_orig_update(bat_priv, orig_node, + orig_ifinfo, ethhdr, + ogm_packet, if_incoming, + if_outgoing, dup_status); + } + batadv_orig_ifinfo_free_ref(orig_ifinfo); + + /* only forward for specific interface, not for the default one. */ + if (if_outgoing == BATADV_IF_DEFAULT) + goto out_neigh; /* is single hop (direct) neighbor */ if (is_single_hop_neigh) { + /* OGMs from secondary interfaces should only scheduled once + * per interface where it has been received, not multiple times + */ + if ((ogm_packet->ttl <= 2) && + (if_incoming != if_outgoing)) { + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Drop packet: OGM from secondary interface and wrong outgoing interface\n"); + goto out_neigh; + } /* mark direct link on incoming interface */ - batadv_iv_ogm_forward(orig_node, ethhdr, batadv_ogm_packet, + batadv_iv_ogm_forward(orig_node, ethhdr, ogm_packet, is_single_hop_neigh, - is_from_best_next_hop, if_incoming); + is_from_best_next_hop, if_incoming, + if_outgoing); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Forwarding packet: rebroadcast neighbor packet with direct link flag\n"); @@ -1467,9 +1527,9 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Forwarding packet: rebroadcast originator packet\n"); - batadv_iv_ogm_forward(orig_node, ethhdr, batadv_ogm_packet, + batadv_iv_ogm_forward(orig_node, ethhdr, ogm_packet, is_single_hop_neigh, is_from_best_next_hop, - if_incoming); + if_incoming, if_outgoing); out_neigh: if ((orig_neigh_node) && (!is_single_hop_neigh)) @@ -1482,6 +1542,165 @@ out: if (orig_neigh_router) batadv_neigh_node_free_ref(orig_neigh_router); + kfree_skb(skb_priv); +} + +/** + * batadv_iv_ogm_process - process an incoming batman iv OGM + * @skb: the skb containing the OGM + * @ogm_offset: offset to the OGM which should be processed (for aggregates) + * @if_incoming: the interface where this packet was receved + */ +static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, + struct batadv_hard_iface *if_incoming) +{ + struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_orig_node *orig_neigh_node, *orig_node; + struct batadv_hard_iface *hard_iface; + struct batadv_ogm_packet *ogm_packet; + uint32_t if_incoming_seqno; + bool has_directlink_flag; + struct ethhdr *ethhdr; + bool is_my_oldorig = false; + bool is_my_addr = false; + bool is_my_orig = false; + + ogm_packet = (struct batadv_ogm_packet *)(skb->data + ogm_offset); + ethhdr = eth_hdr(skb); + + /* Silently drop when the batman packet is actually not a + * correct packet. + * + * This might happen if a packet is padded (e.g. Ethernet has a + * minimum frame length of 64 byte) and the aggregation interprets + * it as an additional length. + * + * TODO: A more sane solution would be to have a bit in the + * batadv_ogm_packet to detect whether the packet is the last + * packet in an aggregation. Here we expect that the padding + * is always zero (or not 0x01) + */ + if (ogm_packet->packet_type != BATADV_IV_OGM) + return; + + /* could be changed by schedule_own_packet() */ + if_incoming_seqno = atomic_read(&if_incoming->bat_iv.ogm_seqno); + + if (ogm_packet->flags & BATADV_DIRECTLINK) + has_directlink_flag = true; + else + has_directlink_flag = false; + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Received BATMAN packet via NB: %pM, IF: %s [%pM] (from OG: %pM, via prev OG: %pM, seqno %u, tq %d, TTL %d, V %d, IDF %d)\n", + ethhdr->h_source, if_incoming->net_dev->name, + if_incoming->net_dev->dev_addr, ogm_packet->orig, + ogm_packet->prev_sender, ntohl(ogm_packet->seqno), + ogm_packet->tq, ogm_packet->ttl, + ogm_packet->version, has_directlink_flag); + + rcu_read_lock(); + list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { + if (hard_iface->if_status != BATADV_IF_ACTIVE) + continue; + + if (hard_iface->soft_iface != if_incoming->soft_iface) + continue; + + if (batadv_compare_eth(ethhdr->h_source, + hard_iface->net_dev->dev_addr)) + is_my_addr = true; + + if (batadv_compare_eth(ogm_packet->orig, + hard_iface->net_dev->dev_addr)) + is_my_orig = true; + + if (batadv_compare_eth(ogm_packet->prev_sender, + hard_iface->net_dev->dev_addr)) + is_my_oldorig = true; + } + rcu_read_unlock(); + + if (is_my_addr) { + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Drop packet: received my own broadcast (sender: %pM)\n", + ethhdr->h_source); + return; + } + + if (is_my_orig) { + unsigned long *word; + int offset; + int32_t bit_pos; + int16_t if_num; + uint8_t *weight; + + orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv, + ethhdr->h_source); + if (!orig_neigh_node) + return; + + /* neighbor has to indicate direct link and it has to + * come via the corresponding interface + * save packet seqno for bidirectional check + */ + if (has_directlink_flag && + batadv_compare_eth(if_incoming->net_dev->dev_addr, + ogm_packet->orig)) { + if_num = if_incoming->if_num; + offset = if_num * BATADV_NUM_WORDS; + + spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); + word = &(orig_neigh_node->bat_iv.bcast_own[offset]); + bit_pos = if_incoming_seqno - 2; + bit_pos -= ntohl(ogm_packet->seqno); + batadv_set_bit(word, bit_pos); + weight = &orig_neigh_node->bat_iv.bcast_own_sum[if_num]; + *weight = bitmap_weight(word, + BATADV_TQ_LOCAL_WINDOW_SIZE); + spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); + } + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Drop packet: originator packet from myself (via neighbor)\n"); + batadv_orig_node_free_ref(orig_neigh_node); + return; + } + + if (is_my_oldorig) { + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Drop packet: ignoring all rebroadcast echos (sender: %pM)\n", + ethhdr->h_source); + return; + } + + if (ogm_packet->flags & BATADV_NOT_BEST_NEXT_HOP) { + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Drop packet: ignoring all packets not forwarded from the best next hop (sender: %pM)\n", + ethhdr->h_source); + return; + } + + orig_node = batadv_iv_ogm_orig_get(bat_priv, ogm_packet->orig); + if (!orig_node) + return; + + batadv_iv_ogm_process_per_outif(skb, ogm_offset, orig_node, + if_incoming, BATADV_IF_DEFAULT); + + rcu_read_lock(); + list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { + if (hard_iface->if_status != BATADV_IF_ACTIVE) + continue; + + if (hard_iface->soft_iface != bat_priv->soft_iface) + continue; + + batadv_iv_ogm_process_per_outif(skb, ogm_offset, orig_node, + if_incoming, hard_iface); + } + rcu_read_unlock(); + batadv_orig_node_free_ref(orig_node); } @@ -1489,11 +1708,9 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, struct batadv_hard_iface *if_incoming) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); - struct batadv_ogm_packet *batadv_ogm_packet; - struct ethhdr *ethhdr; - int buff_pos = 0, packet_len; - unsigned char *tvlv_buff, *packet_buff; + struct batadv_ogm_packet *ogm_packet; uint8_t *packet_pos; + int ogm_offset; bool ret; ret = batadv_check_management_packet(skb, if_incoming, BATADV_OGM_HLEN); @@ -1510,24 +1727,19 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, batadv_add_counter(bat_priv, BATADV_CNT_MGMT_RX_BYTES, skb->len + ETH_HLEN); - packet_len = skb_headlen(skb); - ethhdr = eth_hdr(skb); - packet_buff = skb->data; - batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff; + ogm_offset = 0; + ogm_packet = (struct batadv_ogm_packet *)skb->data; /* unpack the aggregated packets and process them one by one */ - while (batadv_iv_ogm_aggr_packet(buff_pos, packet_len, - batadv_ogm_packet->tvlv_len)) { - tvlv_buff = packet_buff + buff_pos + BATADV_OGM_HLEN; - - batadv_iv_ogm_process(ethhdr, batadv_ogm_packet, - tvlv_buff, if_incoming); + while (batadv_iv_ogm_aggr_packet(ogm_offset, skb_headlen(skb), + ogm_packet->tvlv_len)) { + batadv_iv_ogm_process(skb, ogm_offset, if_incoming); - buff_pos += BATADV_OGM_HLEN; - buff_pos += ntohs(batadv_ogm_packet->tvlv_len); + ogm_offset += BATADV_OGM_HLEN; + ogm_offset += ntohs(ogm_packet->tvlv_len); - packet_pos = packet_buff + buff_pos; - batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos; + packet_pos = skb->data + ogm_offset; + ogm_packet = (struct batadv_ogm_packet *)packet_pos; } kfree_skb(skb); @@ -1535,17 +1747,49 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, } /** + * batadv_iv_ogm_orig_print_neigh - print neighbors for the originator table + * @orig_node: the orig_node for which the neighbors are printed + * @if_outgoing: outgoing interface for these entries + * @seq: debugfs table seq_file struct + * + * Must be called while holding an rcu lock. + */ +static void +batadv_iv_ogm_orig_print_neigh(struct batadv_orig_node *orig_node, + struct batadv_hard_iface *if_outgoing, + struct seq_file *seq) +{ + struct batadv_neigh_node *neigh_node; + struct batadv_neigh_ifinfo *n_ifinfo; + + hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) { + n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing); + if (!n_ifinfo) + continue; + + seq_printf(seq, " %pM (%3i)", + neigh_node->addr, + n_ifinfo->bat_iv.tq_avg); + + batadv_neigh_ifinfo_free_ref(n_ifinfo); + } +} + +/** * batadv_iv_ogm_orig_print - print the originator table * @bat_priv: the bat priv with all the soft interface information * @seq: debugfs table seq_file struct + * @if_outgoing: the outgoing interface for which this should be printed */ static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv, - struct seq_file *seq) + struct seq_file *seq, + struct batadv_hard_iface *if_outgoing) { - struct batadv_neigh_node *neigh_node, *neigh_node_tmp; + struct batadv_neigh_node *neigh_node; struct batadv_hashtable *hash = bat_priv->orig_hash; int last_seen_msecs, last_seen_secs; struct batadv_orig_node *orig_node; + struct batadv_neigh_ifinfo *n_ifinfo; unsigned long last_seen_jiffies; struct hlist_head *head; int batman_count = 0; @@ -1560,11 +1804,17 @@ static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv, rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - neigh_node = batadv_orig_node_get_router(orig_node); + neigh_node = batadv_orig_router_get(orig_node, + if_outgoing); if (!neigh_node) continue; - if (neigh_node->bat_iv.tq_avg == 0) + n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, + if_outgoing); + if (!n_ifinfo) + goto next; + + if (n_ifinfo->bat_iv.tq_avg == 0) goto next; last_seen_jiffies = jiffies - orig_node->last_seen; @@ -1574,22 +1824,19 @@ static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv, seq_printf(seq, "%pM %4i.%03is (%3i) %pM [%10s]:", orig_node->orig, last_seen_secs, - last_seen_msecs, neigh_node->bat_iv.tq_avg, + last_seen_msecs, n_ifinfo->bat_iv.tq_avg, neigh_node->addr, neigh_node->if_incoming->net_dev->name); - hlist_for_each_entry_rcu(neigh_node_tmp, - &orig_node->neigh_list, list) { - seq_printf(seq, " %pM (%3i)", - neigh_node_tmp->addr, - neigh_node_tmp->bat_iv.tq_avg); - } - + batadv_iv_ogm_orig_print_neigh(orig_node, if_outgoing, + seq); seq_puts(seq, "\n"); batman_count++; next: batadv_neigh_node_free_ref(neigh_node); + if (n_ifinfo) + batadv_neigh_ifinfo_free_ref(n_ifinfo); } rcu_read_unlock(); } @@ -1601,37 +1848,84 @@ next: /** * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors * @neigh1: the first neighbor object of the comparison + * @if_outgoing1: outgoing interface for the first neighbor * @neigh2: the second neighbor object of the comparison + * @if_outgoing2: outgoing interface for the second neighbor * * Returns a value less, equal to or greater than 0 if the metric via neigh1 is * lower, the same as or higher than the metric via neigh2 */ static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1, - struct batadv_neigh_node *neigh2) + struct batadv_hard_iface *if_outgoing1, + struct batadv_neigh_node *neigh2, + struct batadv_hard_iface *if_outgoing2) { + struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo; uint8_t tq1, tq2; + int diff; + + neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); + neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); + + if (!neigh1_ifinfo || !neigh2_ifinfo) { + diff = 0; + goto out; + } + + tq1 = neigh1_ifinfo->bat_iv.tq_avg; + tq2 = neigh2_ifinfo->bat_iv.tq_avg; + diff = tq1 - tq2; - tq1 = neigh1->bat_iv.tq_avg; - tq2 = neigh2->bat_iv.tq_avg; +out: + if (neigh1_ifinfo) + batadv_neigh_ifinfo_free_ref(neigh1_ifinfo); + if (neigh2_ifinfo) + batadv_neigh_ifinfo_free_ref(neigh2_ifinfo); - return tq1 - tq2; + return diff; } /** * batadv_iv_ogm_neigh_is_eob - check if neigh1 is equally good or better than * neigh2 from the metric prospective * @neigh1: the first neighbor object of the comparison + * @if_outgoing: outgoing interface for the first neighbor * @neigh2: the second neighbor object of the comparison - * - * Returns true if the metric via neigh1 is equally good or better than the - * metric via neigh2, false otherwise. + * @if_outgoing2: outgoing interface for the second neighbor + + * Returns true if the metric via neigh1 is equally good or better than + * the metric via neigh2, false otherwise. */ -static bool batadv_iv_ogm_neigh_is_eob(struct batadv_neigh_node *neigh1, - struct batadv_neigh_node *neigh2) +static bool +batadv_iv_ogm_neigh_is_eob(struct batadv_neigh_node *neigh1, + struct batadv_hard_iface *if_outgoing1, + struct batadv_neigh_node *neigh2, + struct batadv_hard_iface *if_outgoing2) { - int diff = batadv_iv_ogm_neigh_cmp(neigh1, neigh2); + struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo; + uint8_t tq1, tq2; + bool ret; - return diff > -BATADV_TQ_SIMILARITY_THRESHOLD; + neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); + neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); + + /* we can't say that the metric is better */ + if (!neigh1_ifinfo || !neigh2_ifinfo) { + ret = false; + goto out; + } + + tq1 = neigh1_ifinfo->bat_iv.tq_avg; + tq2 = neigh2_ifinfo->bat_iv.tq_avg; + ret = (tq1 - tq2) > -BATADV_TQ_SIMILARITY_THRESHOLD; + +out: + if (neigh1_ifinfo) + batadv_neigh_ifinfo_free_ref(neigh1_ifinfo); + if (neigh2_ifinfo) + batadv_neigh_ifinfo_free_ref(neigh2_ifinfo); + + return ret; } static struct batadv_algo_ops batadv_batman_iv __read_mostly = { diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index 973982414d5..9586750022f 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2014 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index a81b9322e38..cc2407351d3 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2014 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_BITARRAY_H_ diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 28eb5e6d0a0..05f0712be5e 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2014 B.A.T.M.A.N. contributors: * * Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -132,7 +130,9 @@ static void batadv_claim_free_ref(struct batadv_bla_claim *claim) call_rcu(&claim->rcu, batadv_claim_free_rcu); } -/* @bat_priv: the bat priv with all the soft interface information +/** + * batadv_claim_hash_find + * @bat_priv: the bat priv with all the soft interface information * @data: search data (may be local/static data) * * looks for a claim in the hash, and returns it if found @@ -451,7 +451,9 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv, batadv_backbone_gw_free_ref(backbone_gw); } -/* @bat_priv: the bat priv with all the soft interface information +/** + * batadv_bla_answer_request - answer a bla request by sending own claims + * @bat_priv: the bat priv with all the soft interface information * @vid: the vid where the request came on * * Repeat all of our own claims, and finally send an ANNOUNCE frame @@ -497,7 +499,9 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, batadv_backbone_gw_free_ref(backbone_gw); } -/* @backbone_gw: the backbone gateway from whom we are out of sync +/** + * batadv_bla_send_request - send a request to repeat claims + * @backbone_gw: the backbone gateway from whom we are out of sync * * When the crc is wrong, ask the backbone gateway for a full table update. * After the request, it will repeat all of his own claims and finally @@ -522,7 +526,9 @@ static void batadv_bla_send_request(struct batadv_bla_backbone_gw *backbone_gw) } } -/* @bat_priv: the bat priv with all the soft interface information +/** + * batadv_bla_send_announce + * @bat_priv: the bat priv with all the soft interface information * @backbone_gw: our backbone gateway which should be announced * * This function sends an announcement. It is called from multiple @@ -846,7 +852,9 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv, } -/* @bat_priv: the bat priv with all the soft interface information +/** + * batadv_bla_process_claim + * @bat_priv: the bat priv with all the soft interface information * @skb: the frame to be checked * * Check if this is a claim frame, and process it accordingly. @@ -1313,7 +1321,9 @@ out: -/* @bat_priv: the bat priv with all the soft interface information +/** + * batadv_bla_is_backbone_gw_orig + * @bat_priv: the bat priv with all the soft interface information * @orig: originator mac address * @vid: VLAN identifier * diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index da173e760e7..43c985d92c3 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2014 B.A.T.M.A.N. contributors: * * Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_BLA_H_ diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 049a7a2ac5b..b758881be10 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -250,6 +248,19 @@ static int batadv_originators_open(struct inode *inode, struct file *file) return single_open(file, batadv_orig_seq_print_text, net_dev); } +/** + * batadv_originators_hardif_open - handles debugfs output for the + * originator table of an hard interface + * @inode: inode pointer to debugfs file + * @file: pointer to the seq_file + */ +static int batadv_originators_hardif_open(struct inode *inode, + struct file *file) +{ + struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_orig_hardif_seq_print_text, net_dev); +} + static int batadv_gateways_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; @@ -371,6 +382,28 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { NULL, }; +#define BATADV_HARDIF_DEBUGINFO(_name, _mode, _open) \ +struct batadv_debuginfo batadv_hardif_debuginfo_##_name = { \ + .attr = { \ + .name = __stringify(_name), \ + .mode = _mode, \ + }, \ + .fops = { \ + .owner = THIS_MODULE, \ + .open = _open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ + }, \ +}; +static BATADV_HARDIF_DEBUGINFO(originators, S_IRUGO, + batadv_originators_hardif_open); + +static struct batadv_debuginfo *batadv_hardif_debuginfos[] = { + &batadv_hardif_debuginfo_originators, + NULL, +}; + void batadv_debugfs_init(void) { struct batadv_debuginfo **bat_debug; @@ -398,6 +431,7 @@ void batadv_debugfs_init(void) return; err: debugfs_remove_recursive(batadv_debugfs); + batadv_debugfs = NULL; } void batadv_debugfs_destroy(void) @@ -406,6 +440,59 @@ void batadv_debugfs_destroy(void) batadv_debugfs = NULL; } +/** + * batadv_debugfs_add_hardif - creates the base directory for a hard interface + * in debugfs. + * @hard_iface: hard interface which should be added. + */ +int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface) +{ + struct batadv_debuginfo **bat_debug; + struct dentry *file; + + if (!batadv_debugfs) + goto out; + + hard_iface->debug_dir = debugfs_create_dir(hard_iface->net_dev->name, + batadv_debugfs); + if (!hard_iface->debug_dir) + goto out; + + for (bat_debug = batadv_hardif_debuginfos; *bat_debug; ++bat_debug) { + file = debugfs_create_file(((*bat_debug)->attr).name, + S_IFREG | ((*bat_debug)->attr).mode, + hard_iface->debug_dir, + hard_iface->net_dev, + &(*bat_debug)->fops); + if (!file) + goto rem_attr; + } + + return 0; +rem_attr: + debugfs_remove_recursive(hard_iface->debug_dir); + hard_iface->debug_dir = NULL; +out: +#ifdef CONFIG_DEBUG_FS + return -ENOMEM; +#else + return 0; +#endif /* CONFIG_DEBUG_FS */ +} + +/** + * batadv_debugfs_del_hardif - delete the base directory for a hard interface + * in debugfs. + * @hard_iface: hard interface which is deleted. + */ +void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface) +{ + if (batadv_debugfs) { + debugfs_remove_recursive(hard_iface->debug_dir); + hard_iface->debug_dir = NULL; + } +} + int batadv_debugfs_add_meshif(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h index f8c3849edff..37c4d6ddd04 100644 --- a/net/batman-adv/debugfs.h +++ b/net/batman-adv/debugfs.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_DEBUGFS_H_ @@ -26,5 +24,7 @@ void batadv_debugfs_init(void); void batadv_debugfs_destroy(void); int batadv_debugfs_add_meshif(struct net_device *dev); void batadv_debugfs_del_meshif(struct net_device *dev); +int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface); +void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface); #endif /* _NET_BATMAN_ADV_DEBUGFS_H_ */ diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index b316a4cb6f1..edee5041189 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2014 B.A.T.M.A.N. contributors: * * Antonio Quartulli * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/if_ether.h> @@ -141,7 +139,7 @@ static int batadv_compare_dat(const struct hlist_node *node, const void *data2) const void *data1 = container_of(node, struct batadv_dat_entry, hash_entry); - return (memcmp(data1, data2, sizeof(__be32)) == 0 ? 1 : 0); + return memcmp(data1, data2, sizeof(__be32)) == 0 ? 1 : 0; } /** @@ -591,7 +589,8 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, if (cand[i].type == BATADV_DAT_CANDIDATE_NOT_FOUND) continue; - neigh_node = batadv_orig_node_get_router(cand[i].orig_node); + neigh_node = batadv_orig_router_get(cand[i].orig_node, + BATADV_IF_DEFAULT); if (!neigh_node) goto free_orig; @@ -1039,9 +1038,9 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, if (hdr_size == sizeof(struct batadv_unicast_4addr_packet)) err = batadv_send_skb_via_tt_4addr(bat_priv, skb_new, BATADV_P_DAT_CACHE_REPLY, - vid); + NULL, vid); else - err = batadv_send_skb_via_tt(bat_priv, skb_new, vid); + err = batadv_send_skb_via_tt(bat_priv, skb_new, NULL, vid); if (err != NET_XMIT_DROP) { batadv_inc_counter(bat_priv, BATADV_CNT_DAT_CACHED_REPLY_TX); diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h index 60d853beb8d..ac9be9b67a2 100644 --- a/net/batman-adv/distributed-arp-table.h +++ b/net/batman-adv/distributed-arp-table.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2014 B.A.T.M.A.N. contributors: * * Antonio Quartulli * @@ -12,13 +12,11 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _NET_BATMAN_ADV_ARP_H_ -#define _NET_BATMAN_ADV_ARP_H_ +#ifndef _NET_BATMAN_ADV_DISTRIBUTED_ARP_TABLE_H_ +#define _NET_BATMAN_ADV_DISTRIBUTED_ARP_TABLE_H_ #ifdef CONFIG_BATMAN_ADV_DAT @@ -169,4 +167,4 @@ static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv, #endif /* CONFIG_BATMAN_ADV_DAT */ -#endif /* _NET_BATMAN_ADV_ARP_H_ */ +#endif /* _NET_BATMAN_ADV_DISTRIBUTED_ARP_TABLE_H_ */ diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 6ddb6145ffb..88df9b1d552 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2013-2014 B.A.T.M.A.N. contributors: * * Martin Hundebøll <martin@hundeboll.net> * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h index ca029e2676e..5d7a0e66a22 100644 --- a/net/batman-adv/fragmentation.h +++ b/net/batman-adv/fragmentation.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2013-2014 B.A.T.M.A.N. contributors: * * Martin Hundebøll <martin@hundeboll.net> * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_FRAGMENTATION_H_ diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 2449afaa763..55cf2260d29 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -30,11 +28,17 @@ #include <linux/udp.h> #include <linux/if_vlan.h> -/* This is the offset of the options field in a dhcp packet starting at - * the beginning of the dhcp header +/* These are the offsets of the "hw type" and "hw address length" in the dhcp + * packet starting at the beginning of the dhcp header */ -#define BATADV_DHCP_OPTIONS_OFFSET 240 -#define BATADV_DHCP_REQUEST 3 +#define BATADV_DHCP_HTYPE_OFFSET 1 +#define BATADV_DHCP_HLEN_OFFSET 2 +/* Value of htype representing Ethernet */ +#define BATADV_DHCP_HTYPE_ETHERNET 0x01 +/* This is the offset of the "chaddr" field in the dhcp packet starting at the + * beginning of the dhcp header + */ +#define BATADV_DHCP_CHADDR_OFFSET 28 static void batadv_gw_node_free_ref(struct batadv_gw_node *gw_node) { @@ -105,7 +109,18 @@ static void batadv_gw_select(struct batadv_priv *bat_priv, spin_unlock_bh(&bat_priv->gw.list_lock); } -void batadv_gw_deselect(struct batadv_priv *bat_priv) +/** + * batadv_gw_reselect - force a gateway reselection + * @bat_priv: the bat priv with all the soft interface information + * + * Set a flag to remind the GW component to perform a new gateway reselection. + * However this function does not ensure that the current gateway is going to be + * deselected. The reselection mechanism may elect the same gateway once again. + * + * This means that invoking batadv_gw_reselect() does not guarantee a gateway + * change and therefore a uevent is not necessarily expected. + */ +void batadv_gw_reselect(struct batadv_priv *bat_priv) { atomic_set(&bat_priv->gw.reselect, 1); } @@ -114,6 +129,7 @@ static struct batadv_gw_node * batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) { struct batadv_neigh_node *router; + struct batadv_neigh_ifinfo *router_ifinfo; struct batadv_gw_node *gw_node, *curr_gw = NULL; uint32_t max_gw_factor = 0, tmp_gw_factor = 0; uint32_t gw_divisor; @@ -130,14 +146,19 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) continue; orig_node = gw_node->orig_node; - router = batadv_orig_node_get_router(orig_node); + router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); if (!router) continue; + router_ifinfo = batadv_neigh_ifinfo_get(router, + BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto next; + if (!atomic_inc_not_zero(&gw_node->refcount)) goto next; - tq_avg = router->bat_iv.tq_avg; + tq_avg = router_ifinfo->bat_iv.tq_avg; switch (atomic_read(&bat_priv->gw_sel_class)) { case 1: /* fast connection */ @@ -182,6 +203,8 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) next: batadv_neigh_node_free_ref(router); + if (router_ifinfo) + batadv_neigh_ifinfo_free_ref(router_ifinfo); } rcu_read_unlock(); @@ -207,6 +230,11 @@ void batadv_gw_check_client_stop(struct batadv_priv *bat_priv) if (!curr_gw) return; + /* deselect the current gateway so that next time that client mode is + * enabled a proper GW_ADD event can be sent + */ + batadv_gw_select(bat_priv, NULL); + /* if batman-adv is switching the gw client mode off and a gateway was * already selected, send a DEL uevent */ @@ -219,6 +247,7 @@ void batadv_gw_election(struct batadv_priv *bat_priv) { struct batadv_gw_node *curr_gw = NULL, *next_gw = NULL; struct batadv_neigh_node *router = NULL; + struct batadv_neigh_ifinfo *router_ifinfo = NULL; char gw_addr[18] = { '\0' }; if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT) @@ -237,9 +266,17 @@ void batadv_gw_election(struct batadv_priv *bat_priv) if (next_gw) { sprintf(gw_addr, "%pM", next_gw->orig_node->orig); - router = batadv_orig_node_get_router(next_gw->orig_node); + router = batadv_orig_router_get(next_gw->orig_node, + BATADV_IF_DEFAULT); if (!router) { - batadv_gw_deselect(bat_priv); + batadv_gw_reselect(bat_priv); + goto out; + } + + router_ifinfo = batadv_neigh_ifinfo_get(router, + BATADV_IF_DEFAULT); + if (!router_ifinfo) { + batadv_gw_reselect(bat_priv); goto out; } } @@ -256,7 +293,8 @@ void batadv_gw_election(struct batadv_priv *bat_priv) next_gw->bandwidth_down / 10, next_gw->bandwidth_down % 10, next_gw->bandwidth_up / 10, - next_gw->bandwidth_up % 10, router->bat_iv.tq_avg); + next_gw->bandwidth_up % 10, + router_ifinfo->bat_iv.tq_avg); batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_ADD, gw_addr); } else { @@ -266,7 +304,8 @@ void batadv_gw_election(struct batadv_priv *bat_priv) next_gw->bandwidth_down / 10, next_gw->bandwidth_down % 10, next_gw->bandwidth_up / 10, - next_gw->bandwidth_up % 10, router->bat_iv.tq_avg); + next_gw->bandwidth_up % 10, + router_ifinfo->bat_iv.tq_avg); batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_CHANGE, gw_addr); } @@ -280,33 +319,47 @@ out: batadv_gw_node_free_ref(next_gw); if (router) batadv_neigh_node_free_ref(router); + if (router_ifinfo) + batadv_neigh_ifinfo_free_ref(router_ifinfo); } void batadv_gw_check_election(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) { + struct batadv_neigh_ifinfo *router_orig_tq = NULL; + struct batadv_neigh_ifinfo *router_gw_tq = NULL; struct batadv_orig_node *curr_gw_orig; struct batadv_neigh_node *router_gw = NULL, *router_orig = NULL; uint8_t gw_tq_avg, orig_tq_avg; curr_gw_orig = batadv_gw_get_selected_orig(bat_priv); if (!curr_gw_orig) - goto deselect; + goto reselect; - router_gw = batadv_orig_node_get_router(curr_gw_orig); + router_gw = batadv_orig_router_get(curr_gw_orig, BATADV_IF_DEFAULT); if (!router_gw) - goto deselect; + goto reselect; + + router_gw_tq = batadv_neigh_ifinfo_get(router_gw, + BATADV_IF_DEFAULT); + if (!router_gw_tq) + goto reselect; /* this node already is the gateway */ if (curr_gw_orig == orig_node) goto out; - router_orig = batadv_orig_node_get_router(orig_node); + router_orig = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); if (!router_orig) goto out; - gw_tq_avg = router_gw->bat_iv.tq_avg; - orig_tq_avg = router_orig->bat_iv.tq_avg; + router_orig_tq = batadv_neigh_ifinfo_get(router_orig, + BATADV_IF_DEFAULT); + if (!router_orig_tq) + goto out; + + gw_tq_avg = router_gw_tq->bat_iv.tq_avg; + orig_tq_avg = router_orig_tq->bat_iv.tq_avg; /* the TQ value has to be better */ if (orig_tq_avg < gw_tq_avg) @@ -323,8 +376,8 @@ void batadv_gw_check_election(struct batadv_priv *bat_priv, "Restarting gateway selection: better gateway found (tq curr: %i, tq new: %i)\n", gw_tq_avg, orig_tq_avg); -deselect: - batadv_gw_deselect(bat_priv); +reselect: + batadv_gw_reselect(bat_priv); out: if (curr_gw_orig) batadv_orig_node_free_ref(curr_gw_orig); @@ -332,6 +385,10 @@ out: batadv_neigh_node_free_ref(router_gw); if (router_orig) batadv_neigh_node_free_ref(router_orig); + if (router_gw_tq) + batadv_neigh_ifinfo_free_ref(router_gw_tq); + if (router_orig_tq) + batadv_neigh_ifinfo_free_ref(router_orig_tq); return; } @@ -454,7 +511,7 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, */ curr_gw = batadv_gw_get_selected_gw_node(bat_priv); if (gw_node == curr_gw) - batadv_gw_deselect(bat_priv); + batadv_gw_reselect(bat_priv); } out: @@ -480,7 +537,7 @@ void batadv_gw_node_purge(struct batadv_priv *bat_priv) struct batadv_gw_node *gw_node, *curr_gw; struct hlist_node *node_tmp; unsigned long timeout = msecs_to_jiffies(2 * BATADV_PURGE_TIMEOUT); - int do_deselect = 0; + int do_reselect = 0; curr_gw = batadv_gw_get_selected_gw_node(bat_priv); @@ -494,7 +551,7 @@ void batadv_gw_node_purge(struct batadv_priv *bat_priv) continue; if (curr_gw == gw_node) - do_deselect = 1; + do_reselect = 1; hlist_del_rcu(&gw_node->list); batadv_gw_node_free_ref(gw_node); @@ -502,9 +559,9 @@ void batadv_gw_node_purge(struct batadv_priv *bat_priv) spin_unlock_bh(&bat_priv->gw.list_lock); - /* gw_deselect() needs to acquire the gw_list_lock */ - if (do_deselect) - batadv_gw_deselect(bat_priv); + /* gw_reselect() needs to acquire the gw_list_lock */ + if (do_reselect) + batadv_gw_reselect(bat_priv); if (curr_gw) batadv_gw_node_free_ref(curr_gw); @@ -517,28 +574,36 @@ static int batadv_write_buffer_text(struct batadv_priv *bat_priv, { struct batadv_gw_node *curr_gw; struct batadv_neigh_node *router; + struct batadv_neigh_ifinfo *router_ifinfo = NULL; int ret = -1; - router = batadv_orig_node_get_router(gw_node->orig_node); + router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); if (!router) goto out; + router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto out; + curr_gw = batadv_gw_get_selected_gw_node(bat_priv); ret = seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n", (curr_gw == gw_node ? "=>" : " "), gw_node->orig_node->orig, - router->bat_iv.tq_avg, router->addr, + router_ifinfo->bat_iv.tq_avg, router->addr, router->if_incoming->net_dev->name, gw_node->bandwidth_down / 10, gw_node->bandwidth_down % 10, gw_node->bandwidth_up / 10, gw_node->bandwidth_up % 10); - batadv_neigh_node_free_ref(router); if (curr_gw) batadv_gw_node_free_ref(curr_gw); out: + if (router_ifinfo) + batadv_neigh_ifinfo_free_ref(router_ifinfo); + if (router) + batadv_neigh_node_free_ref(router); return ret; } @@ -582,80 +647,39 @@ out: return 0; } -/* this call might reallocate skb data */ -static bool batadv_is_type_dhcprequest(struct sk_buff *skb, int header_len) -{ - int ret = false; - unsigned char *p; - int pkt_len; - - if (skb_linearize(skb) < 0) - goto out; - - pkt_len = skb_headlen(skb); - - if (pkt_len < header_len + BATADV_DHCP_OPTIONS_OFFSET + 1) - goto out; - - p = skb->data + header_len + BATADV_DHCP_OPTIONS_OFFSET; - pkt_len -= header_len + BATADV_DHCP_OPTIONS_OFFSET + 1; - - /* Access the dhcp option lists. Each entry is made up by: - * - octet 1: option type - * - octet 2: option data len (only if type != 255 and 0) - * - octet 3: option data - */ - while (*p != 255 && !ret) { - /* p now points to the first octet: option type */ - if (*p == 53) { - /* type 53 is the message type option. - * Jump the len octet and go to the data octet - */ - if (pkt_len < 2) - goto out; - p += 2; - - /* check if the message type is what we need */ - if (*p == BATADV_DHCP_REQUEST) - ret = true; - break; - } else if (*p == 0) { - /* option type 0 (padding), just go forward */ - if (pkt_len < 1) - goto out; - pkt_len--; - p++; - } else { - /* This is any other option. So we get the length... */ - if (pkt_len < 1) - goto out; - pkt_len--; - p++; - - /* ...and then we jump over the data */ - if (pkt_len < 1 + (*p)) - goto out; - pkt_len -= 1 + (*p); - p += 1 + (*p); - } - } -out: - return ret; -} - -/* this call might reallocate skb data */ -bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) +/** + * batadv_gw_dhcp_recipient_get - check if a packet is a DHCP message + * @skb: the packet to check + * @header_len: a pointer to the batman-adv header size + * @chaddr: buffer where the client address will be stored. Valid + * only if the function returns BATADV_DHCP_TO_CLIENT + * + * Returns: + * - BATADV_DHCP_NO if the packet is not a dhcp message or if there was an error + * while parsing it + * - BATADV_DHCP_TO_SERVER if this is a message going to the DHCP server + * - BATADV_DHCP_TO_CLIENT if this is a message going to a DHCP client + * + * This function may re-allocate the data buffer of the skb passed as argument. + */ +enum batadv_dhcp_recipient +batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, + uint8_t *chaddr) { + enum batadv_dhcp_recipient ret = BATADV_DHCP_NO; struct ethhdr *ethhdr; struct iphdr *iphdr; struct ipv6hdr *ipv6hdr; struct udphdr *udphdr; struct vlan_ethhdr *vhdr; + int chaddr_offset; __be16 proto; + uint8_t *p; /* check for ethernet header */ if (!pskb_may_pull(skb, *header_len + ETH_HLEN)) - return false; + return BATADV_DHCP_NO; + ethhdr = (struct ethhdr *)skb->data; proto = ethhdr->h_proto; *header_len += ETH_HLEN; @@ -663,7 +687,7 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) /* check for initial vlan header */ if (proto == htons(ETH_P_8021Q)) { if (!pskb_may_pull(skb, *header_len + VLAN_HLEN)) - return false; + return BATADV_DHCP_NO; vhdr = (struct vlan_ethhdr *)skb->data; proto = vhdr->h_vlan_encapsulated_proto; @@ -674,32 +698,34 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) switch (proto) { case htons(ETH_P_IP): if (!pskb_may_pull(skb, *header_len + sizeof(*iphdr))) - return false; + return BATADV_DHCP_NO; + iphdr = (struct iphdr *)(skb->data + *header_len); *header_len += iphdr->ihl * 4; /* check for udp header */ if (iphdr->protocol != IPPROTO_UDP) - return false; + return BATADV_DHCP_NO; break; case htons(ETH_P_IPV6): if (!pskb_may_pull(skb, *header_len + sizeof(*ipv6hdr))) - return false; + return BATADV_DHCP_NO; + ipv6hdr = (struct ipv6hdr *)(skb->data + *header_len); *header_len += sizeof(*ipv6hdr); /* check for udp header */ if (ipv6hdr->nexthdr != IPPROTO_UDP) - return false; + return BATADV_DHCP_NO; break; default: - return false; + return BATADV_DHCP_NO; } if (!pskb_may_pull(skb, *header_len + sizeof(*udphdr))) - return false; + return BATADV_DHCP_NO; /* skb->data might have been reallocated by pskb_may_pull() */ ethhdr = (struct ethhdr *)skb->data; @@ -710,17 +736,40 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) *header_len += sizeof(*udphdr); /* check for bootp port */ - if ((proto == htons(ETH_P_IP)) && - (udphdr->dest != htons(67))) - return false; + switch (proto) { + case htons(ETH_P_IP): + if (udphdr->dest == htons(67)) + ret = BATADV_DHCP_TO_SERVER; + else if (udphdr->source == htons(67)) + ret = BATADV_DHCP_TO_CLIENT; + break; + case htons(ETH_P_IPV6): + if (udphdr->dest == htons(547)) + ret = BATADV_DHCP_TO_SERVER; + else if (udphdr->source == htons(547)) + ret = BATADV_DHCP_TO_CLIENT; + break; + } - if ((proto == htons(ETH_P_IPV6)) && - (udphdr->dest != htons(547))) - return false; + chaddr_offset = *header_len + BATADV_DHCP_CHADDR_OFFSET; + /* store the client address if the message is going to a client */ + if (ret == BATADV_DHCP_TO_CLIENT && + pskb_may_pull(skb, chaddr_offset + ETH_ALEN)) { + /* check if the DHCP packet carries an Ethernet DHCP */ + p = skb->data + *header_len + BATADV_DHCP_HTYPE_OFFSET; + if (*p != BATADV_DHCP_HTYPE_ETHERNET) + return BATADV_DHCP_NO; + + /* check if the DHCP packet carries a valid Ethernet address */ + p = skb->data + *header_len + BATADV_DHCP_HLEN_OFFSET; + if (*p != ETH_ALEN) + return BATADV_DHCP_NO; + + memcpy(chaddr, skb->data + chaddr_offset, ETH_ALEN); + } - return true; + return ret; } - /** * batadv_gw_out_of_range - check if the dhcp request destination is the best gw * @bat_priv: the bat priv with all the soft interface information @@ -734,6 +783,7 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) * false otherwise. * * This call might reallocate skb data. + * Must be invoked only when the DHCP packet is going TO a DHCP SERVER. */ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb) @@ -741,19 +791,14 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct batadv_neigh_node *neigh_curr = NULL, *neigh_old = NULL; struct batadv_orig_node *orig_dst_node = NULL; struct batadv_gw_node *gw_node = NULL, *curr_gw = NULL; - struct ethhdr *ethhdr; - bool ret, out_of_range = false; - unsigned int header_len = 0; + struct batadv_neigh_ifinfo *curr_ifinfo, *old_ifinfo; + struct ethhdr *ethhdr = (struct ethhdr *)skb->data; + bool out_of_range = false; uint8_t curr_tq_avg; unsigned short vid; vid = batadv_get_vid(skb, 0); - ret = batadv_gw_is_dhcp_target(skb, &header_len); - if (!ret) - goto out; - - ethhdr = (struct ethhdr *)skb->data; orig_dst_node = batadv_transtable_search(bat_priv, ethhdr->h_source, ethhdr->h_dest, vid); if (!orig_dst_node) @@ -763,10 +808,6 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, if (!gw_node->bandwidth_down == 0) goto out; - ret = batadv_is_type_dhcprequest(skb, header_len); - if (!ret) - goto out; - switch (atomic_read(&bat_priv->gw_mode)) { case BATADV_GW_MODE_SERVER: /* If we are a GW then we are our best GW. We can artificially @@ -792,7 +833,14 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, if (!neigh_curr) goto out; - curr_tq_avg = neigh_curr->bat_iv.tq_avg; + curr_ifinfo = batadv_neigh_ifinfo_get(neigh_curr, + BATADV_IF_DEFAULT); + if (!curr_ifinfo) + goto out; + + curr_tq_avg = curr_ifinfo->bat_iv.tq_avg; + batadv_neigh_ifinfo_free_ref(curr_ifinfo); + break; case BATADV_GW_MODE_OFF: default: @@ -803,8 +851,13 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, if (!neigh_old) goto out; - if (curr_tq_avg - neigh_old->bat_iv.tq_avg > BATADV_GW_THRESHOLD) + old_ifinfo = batadv_neigh_ifinfo_get(neigh_old, BATADV_IF_DEFAULT); + if (!old_ifinfo) + goto out; + + if ((curr_tq_avg - old_ifinfo->bat_iv.tq_avg) > BATADV_GW_THRESHOLD) out_of_range = true; + batadv_neigh_ifinfo_free_ref(old_ifinfo); out: if (orig_dst_node) diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index d95c2d23195..7ee53bb7d50 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,16 +12,14 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ #define _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ void batadv_gw_check_client_stop(struct batadv_priv *bat_priv); -void batadv_gw_deselect(struct batadv_priv *bat_priv); +void batadv_gw_reselect(struct batadv_priv *bat_priv); void batadv_gw_election(struct batadv_priv *bat_priv); struct batadv_orig_node * batadv_gw_get_selected_orig(struct batadv_priv *bat_priv); @@ -34,7 +32,9 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node); void batadv_gw_node_purge(struct batadv_priv *bat_priv); int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset); -bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len); bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb); +enum batadv_dhcp_recipient +batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, + uint8_t *chaddr); #endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */ diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index b211b0f9cb7..6f5e621f220 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -164,7 +162,7 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff, if ((down_curr == down_new) && (up_curr == up_new)) return count; - batadv_gw_deselect(bat_priv); + batadv_gw_reselect(bat_priv); batadv_info(net_dev, "Changing gateway bandwidth from: '%u.%u/%u.%u MBit' to: '%u.%u/%u.%u MBit'\n", down_curr / 10, down_curr % 10, up_curr / 10, up_curr % 10, diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h index 56384a4cd18..aa511656194 100644 --- a/net/batman-adv/gateway_common.h +++ b/net/batman-adv/gateway_common.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_GATEWAY_COMMON_H_ diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 57c2a19dcb5..3d417d3641c 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -25,6 +23,7 @@ #include "translation-table.h" #include "routing.h" #include "sysfs.h" +#include "debugfs.h" #include "originator.h" #include "hash.h" #include "bridge_loop_avoidance.h" @@ -88,15 +87,13 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) return false; /* recurse over the parent device */ - parent_dev = dev_get_by_index(&init_net, net_dev->iflink); + parent_dev = __dev_get_by_index(&init_net, net_dev->iflink); /* if we got a NULL parent_dev there is something broken.. */ if (WARN(!parent_dev, "Cannot find parent device")) return false; ret = batadv_is_on_batman_iface(parent_dev); - if (parent_dev) - dev_put(parent_dev); return ret; } @@ -541,6 +538,7 @@ static void batadv_hardif_remove_interface_finish(struct work_struct *work) hard_iface = container_of(work, struct batadv_hard_iface, cleanup_work); + batadv_debugfs_del_hardif(hard_iface); batadv_sysfs_del_hardif(&hard_iface->hardif_obj); batadv_hardif_free_ref(hard_iface); } @@ -571,6 +569,11 @@ batadv_hardif_add_interface(struct net_device *net_dev) hard_iface->net_dev = net_dev; hard_iface->soft_iface = NULL; hard_iface->if_status = BATADV_IF_NOT_IN_USE; + + ret = batadv_debugfs_add_hardif(hard_iface); + if (ret) + goto free_sysfs; + INIT_LIST_HEAD(&hard_iface->list); INIT_WORK(&hard_iface->cleanup_work, batadv_hardif_remove_interface_finish); @@ -587,6 +590,8 @@ batadv_hardif_add_interface(struct net_device *net_dev) return hard_iface; +free_sysfs: + batadv_sysfs_del_hardif(&hard_iface->hardif_obj); free_if: kfree(hard_iface); release_dev: diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index df4c8bd45c4..1918cd50b62 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_HARD_INTERFACE_H_ @@ -42,6 +40,7 @@ enum batadv_hard_if_cleanup { extern struct notifier_block batadv_hard_if_notifier; bool batadv_is_wifi_netdev(struct net_device *net_device); +bool batadv_is_wifi_iface(int ifindex); struct batadv_hard_iface* batadv_hardif_get_by_netdev(const struct net_device *net_dev); int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, @@ -53,6 +52,11 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface); void batadv_update_min_mtu(struct net_device *soft_iface); void batadv_hardif_free_rcu(struct rcu_head *rcu); +/** + * batadv_hardif_free_ref - decrement the hard interface refcounter and + * possibly free it + * @hard_iface: the hard interface to free + */ static inline void batadv_hardif_free_ref(struct batadv_hard_iface *hard_iface) { @@ -60,6 +64,18 @@ batadv_hardif_free_ref(struct batadv_hard_iface *hard_iface) call_rcu(&hard_iface->rcu, batadv_hardif_free_rcu); } +/** + * batadv_hardif_free_ref_now - decrement the hard interface refcounter and + * possibly free it (without rcu callback) + * @hard_iface: the hard interface to free + */ +static inline void +batadv_hardif_free_ref_now(struct batadv_hard_iface *hard_iface) +{ + if (atomic_dec_and_test(&hard_iface->refcount)) + batadv_hardif_free_rcu(&hard_iface->rcu); +} + static inline struct batadv_hard_iface * batadv_primary_if_get_selected(struct batadv_priv *bat_priv) { diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c index 7198dafd3bf..63bdf7e94f1 100644 --- a/net/batman-adv/hash.c +++ b/net/batman-adv/hash.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2014 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index 1b4da72f209..539fc126679 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2014 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_HASH_H_ diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 130cc3217e2..abb9d6e0388 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -217,7 +215,8 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, if (!orig_node) goto dst_unreach; - neigh_node = batadv_orig_node_get_router(orig_node); + neigh_node = batadv_orig_router_get(orig_node, + BATADV_IF_DEFAULT); if (!neigh_node) goto dst_unreach; diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h index 6665080dff7..0c33950aa4a 100644 --- a/net/batman-adv/icmp_socket.h +++ b/net/batman-adv/icmp_socket.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_ICMP_SOCKET_H_ diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index faba0f61ad5..66ae135b9f2 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/crc32c.h> @@ -421,13 +419,23 @@ static void batadv_recv_handler_init(void) for (i = BATADV_UNICAST_MIN; i <= BATADV_UNICAST_MAX; i++) batadv_rx_handler[i] = batadv_recv_unhandled_unicast_packet; - /* compile time checks for struct member offsets */ - BUILD_BUG_ON(offsetof(struct batadv_unicast_4addr_packet, src) != 10); - BUILD_BUG_ON(offsetof(struct batadv_unicast_packet, dest) != 4); - BUILD_BUG_ON(offsetof(struct batadv_unicast_tvlv_packet, dst) != 4); - BUILD_BUG_ON(offsetof(struct batadv_frag_packet, dest) != 4); - BUILD_BUG_ON(offsetof(struct batadv_icmp_packet, dst) != 4); - BUILD_BUG_ON(offsetof(struct batadv_icmp_packet_rr, dst) != 4); + /* compile time checks for sizes */ + BUILD_BUG_ON(sizeof(struct batadv_bla_claim_dst) != 6); + BUILD_BUG_ON(sizeof(struct batadv_ogm_packet) != 24); + BUILD_BUG_ON(sizeof(struct batadv_icmp_header) != 20); + BUILD_BUG_ON(sizeof(struct batadv_icmp_packet) != 20); + BUILD_BUG_ON(sizeof(struct batadv_icmp_packet_rr) != 116); + BUILD_BUG_ON(sizeof(struct batadv_unicast_packet) != 10); + BUILD_BUG_ON(sizeof(struct batadv_unicast_4addr_packet) != 18); + BUILD_BUG_ON(sizeof(struct batadv_frag_packet) != 20); + BUILD_BUG_ON(sizeof(struct batadv_bcast_packet) != 14); + BUILD_BUG_ON(sizeof(struct batadv_coded_packet) != 46); + BUILD_BUG_ON(sizeof(struct batadv_unicast_tvlv_packet) != 20); + BUILD_BUG_ON(sizeof(struct batadv_tvlv_hdr) != 4); + BUILD_BUG_ON(sizeof(struct batadv_tvlv_gateway_data) != 8); + BUILD_BUG_ON(sizeof(struct batadv_tvlv_tt_vlan_data) != 8); + BUILD_BUG_ON(sizeof(struct batadv_tvlv_tt_change) != 12); + BUILD_BUG_ON(sizeof(struct batadv_tvlv_roam_adv) != 8); /* broadcast packet */ batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet; @@ -1173,6 +1181,32 @@ unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len) return vid; } +/** + * batadv_vlan_ap_isola_get - return the AP isolation status for the given vlan + * @bat_priv: the bat priv with all the soft interface information + * @vid: the VLAN identifier for which the AP isolation attributed as to be + * looked up + * + * Returns true if AP isolation is on for the VLAN idenfied by vid, false + * otherwise + */ +bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid) +{ + bool ap_isolation_enabled = false; + struct batadv_softif_vlan *vlan; + + /* if the AP isolation is requested on a VLAN, then check for its + * setting in the proper VLAN private data structure + */ + vlan = batadv_softif_vlan_get(bat_priv, vid); + if (vlan) { + ap_isolation_enabled = atomic_read(&vlan->ap_isolation); + batadv_softif_vlan_free_ref(vlan); + } + + return ap_isolation_enabled; +} + static int batadv_param_set_ra(const char *val, const struct kernel_param *kp) { struct batadv_algo_ops *bat_algo_ops; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index f94f287b867..9374f1a5134 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_MAIN_H_ @@ -26,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2013.5.0" +#define BATADV_SOURCE_VERSION "2014.1.0" #endif /* B.A.T.M.A.N. parameters */ @@ -72,6 +70,14 @@ #define BATADV_NULL_IFINDEX 0 /* dummy ifindex used to avoid iface checks */ +#define BATADV_NO_MARK 0 + +/* default interface for multi interface operation. The default interface is + * used for communication which originated locally (i.e. is not forwarded) + * or where special forwarding is not desired/necessary. + */ +#define BATADV_IF_DEFAULT ((struct batadv_hard_iface *)NULL) + #define BATADV_NUM_WORDS BITS_TO_LONGS(BATADV_TQ_LOCAL_WINDOW_SIZE) #define BATADV_LOG_BUF_LEN 8192 /* has to be a power of 2 */ @@ -266,7 +272,7 @@ static inline void batadv_dbg(int type __always_unused, */ static inline int batadv_compare_eth(const void *data1, const void *data2) { - return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); + return ether_addr_equal_unaligned(data1, data2); } /** @@ -369,5 +375,6 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, uint8_t *src, uint8_t *dst, uint8_t type, uint8_t version, void *tvlv_value, uint16_t tvlv_value_len); unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len); +bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid); #endif /* _NET_BATMAN_ADV_MAIN_H_ */ diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 511d7e1eea3..f1b604d88dc 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2012-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2012-2014 B.A.T.M.A.N. contributors: * * Martin Hundebøll, Jeppe Ledet-Pedersen * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/debugfs.h> @@ -720,9 +718,21 @@ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_ogm_packet *ogm_packet) { - if (orig_node->last_real_seqno != ntohl(ogm_packet->seqno)) + struct batadv_orig_ifinfo *orig_ifinfo; + uint32_t last_real_seqno; + uint8_t last_ttl; + + orig_ifinfo = batadv_orig_ifinfo_get(orig_node, BATADV_IF_DEFAULT); + if (!orig_ifinfo) return false; - if (orig_node->last_ttl != ogm_packet->ttl + 1) + + last_ttl = orig_ifinfo->last_ttl; + last_real_seqno = orig_ifinfo->last_real_seqno; + batadv_orig_ifinfo_free_ref(orig_ifinfo); + + if (last_real_seqno != ntohl(ogm_packet->seqno)) + return false; + if (last_ttl != ogm_packet->ttl + 1) return false; if (!batadv_compare_eth(ogm_packet->orig, ogm_packet->prev_sender)) return false; @@ -1010,6 +1020,8 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, struct batadv_coded_packet *coded_packet; struct batadv_neigh_node *neigh_tmp, *router_neigh; struct batadv_neigh_node *router_coding = NULL; + struct batadv_neigh_ifinfo *router_neigh_ifinfo = NULL; + struct batadv_neigh_ifinfo *router_coding_ifinfo = NULL; uint8_t *first_source, *first_dest, *second_source, *second_dest; __be32 packet_id1, packet_id2; size_t count; @@ -1019,19 +1031,34 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, int coded_size = sizeof(*coded_packet); int header_add = coded_size - unicast_size; - router_neigh = batadv_orig_node_get_router(neigh_node->orig_node); + /* TODO: do we need to consider the outgoing interface for + * coded packets? + */ + router_neigh = batadv_orig_router_get(neigh_node->orig_node, + BATADV_IF_DEFAULT); if (!router_neigh) goto out; + router_neigh_ifinfo = batadv_neigh_ifinfo_get(router_neigh, + BATADV_IF_DEFAULT); + if (!router_neigh_ifinfo) + goto out; + neigh_tmp = nc_packet->neigh_node; - router_coding = batadv_orig_node_get_router(neigh_tmp->orig_node); + router_coding = batadv_orig_router_get(neigh_tmp->orig_node, + BATADV_IF_DEFAULT); if (!router_coding) goto out; - tq_tmp = batadv_nc_random_weight_tq(router_neigh->bat_iv.tq_avg); - tq_weighted_neigh = tq_tmp; - tq_tmp = batadv_nc_random_weight_tq(router_coding->bat_iv.tq_avg); - tq_weighted_coding = tq_tmp; + router_coding_ifinfo = batadv_neigh_ifinfo_get(router_coding, + BATADV_IF_DEFAULT); + if (!router_coding_ifinfo) + goto out; + + tq_tmp = router_neigh_ifinfo->bat_iv.tq_avg; + tq_weighted_neigh = batadv_nc_random_weight_tq(tq_tmp); + tq_tmp = router_coding_ifinfo->bat_iv.tq_avg; + tq_weighted_coding = batadv_nc_random_weight_tq(tq_tmp); /* Select one destination for the MAC-header dst-field based on * weighted TQ-values. @@ -1155,6 +1182,10 @@ out: batadv_neigh_node_free_ref(router_neigh); if (router_coding) batadv_neigh_node_free_ref(router_coding); + if (router_neigh_ifinfo) + batadv_neigh_ifinfo_free_ref(router_neigh_ifinfo); + if (router_coding_ifinfo) + batadv_neigh_ifinfo_free_ref(router_coding_ifinfo); return res; } diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h index d4fd315b526..358c0d686ab 100644 --- a/net/batman-adv/network-coding.h +++ b/net/batman-adv/network-coding.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2012-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2012-2014 B.A.T.M.A.N. contributors: * * Martin Hundebøll, Jeppe Ledet-Pedersen * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_NETWORK_CODING_H_ @@ -64,7 +62,6 @@ static inline int batadv_nc_mesh_init(struct batadv_priv *bat_priv) static inline void batadv_nc_mesh_free(struct batadv_priv *bat_priv) { - return; } static inline void @@ -74,7 +71,6 @@ batadv_nc_update_nc_node(struct batadv_priv *bat_priv, struct batadv_ogm_packet *ogm_packet, int is_single_hop_neigh) { - return; } static inline void @@ -83,17 +79,14 @@ batadv_nc_purge_orig(struct batadv_priv *bat_priv, bool (*to_purge)(struct batadv_priv *, struct batadv_nc_node *)) { - return; } static inline void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv) { - return; } static inline void batadv_nc_init_orig(struct batadv_orig_node *orig_node) { - return; } static inline bool batadv_nc_skb_forward(struct sk_buff *skb, @@ -106,14 +99,12 @@ static inline void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, struct sk_buff *skb) { - return; } static inline void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, struct sk_buff *skb) { - return; } static inline int batadv_nc_nodes_seq_print_text(struct seq_file *seq, diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 8ab14340d10..6df12a2e360 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2009-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -41,7 +39,7 @@ int batadv_compare_orig(const struct hlist_node *node, const void *data2) const void *data1 = container_of(node, struct batadv_orig_node, hash_entry); - return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); + return batadv_compare_eth(data1, data2); } /** @@ -150,20 +148,114 @@ err: return -ENOMEM; } +/** + * batadv_neigh_ifinfo_free_rcu - free the neigh_ifinfo object + * @rcu: rcu pointer of the neigh_ifinfo object + */ +static void batadv_neigh_ifinfo_free_rcu(struct rcu_head *rcu) +{ + struct batadv_neigh_ifinfo *neigh_ifinfo; + + neigh_ifinfo = container_of(rcu, struct batadv_neigh_ifinfo, rcu); + + if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT) + batadv_hardif_free_ref_now(neigh_ifinfo->if_outgoing); + + kfree(neigh_ifinfo); +} + +/** + * batadv_neigh_ifinfo_free_now - decrement the refcounter and possibly free + * the neigh_ifinfo (without rcu callback) + * @neigh_ifinfo: the neigh_ifinfo object to release + */ +static void +batadv_neigh_ifinfo_free_ref_now(struct batadv_neigh_ifinfo *neigh_ifinfo) +{ + if (atomic_dec_and_test(&neigh_ifinfo->refcount)) + batadv_neigh_ifinfo_free_rcu(&neigh_ifinfo->rcu); +} + +/** + * batadv_neigh_ifinfo_free_ref - decrement the refcounter and possibly free + * the neigh_ifinfo + * @neigh_ifinfo: the neigh_ifinfo object to release + */ +void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo) +{ + if (atomic_dec_and_test(&neigh_ifinfo->refcount)) + call_rcu(&neigh_ifinfo->rcu, batadv_neigh_ifinfo_free_rcu); +} + +/** + * batadv_neigh_node_free_rcu - free the neigh_node + * @rcu: rcu pointer of the neigh_node + */ +static void batadv_neigh_node_free_rcu(struct rcu_head *rcu) +{ + struct hlist_node *node_tmp; + struct batadv_neigh_node *neigh_node; + struct batadv_neigh_ifinfo *neigh_ifinfo; + + neigh_node = container_of(rcu, struct batadv_neigh_node, rcu); + + hlist_for_each_entry_safe(neigh_ifinfo, node_tmp, + &neigh_node->ifinfo_list, list) { + batadv_neigh_ifinfo_free_ref_now(neigh_ifinfo); + } + batadv_hardif_free_ref_now(neigh_node->if_incoming); + + kfree(neigh_node); +} + +/** + * batadv_neigh_node_free_ref_now - decrement the neighbors refcounter + * and possibly free it (without rcu callback) + * @neigh_node: neigh neighbor to free + */ +static void +batadv_neigh_node_free_ref_now(struct batadv_neigh_node *neigh_node) +{ + if (atomic_dec_and_test(&neigh_node->refcount)) + batadv_neigh_node_free_rcu(&neigh_node->rcu); +} + +/** + * batadv_neigh_node_free_ref - decrement the neighbors refcounter + * and possibly free it + * @neigh_node: neigh neighbor to free + */ void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node) { if (atomic_dec_and_test(&neigh_node->refcount)) - kfree_rcu(neigh_node, rcu); + call_rcu(&neigh_node->rcu, batadv_neigh_node_free_rcu); } -/* increases the refcounter of a found router */ +/** + * batadv_orig_node_get_router - router to the originator depending on iface + * @orig_node: the orig node for the router + * @if_outgoing: the interface where the payload packet has been received or + * the OGM should be sent to + * + * Returns the neighbor which should be router for this orig_node/iface. + * + * The object is returned with refcounter increased by 1. + */ struct batadv_neigh_node * -batadv_orig_node_get_router(struct batadv_orig_node *orig_node) +batadv_orig_router_get(struct batadv_orig_node *orig_node, + const struct batadv_hard_iface *if_outgoing) { - struct batadv_neigh_node *router; + struct batadv_orig_ifinfo *orig_ifinfo; + struct batadv_neigh_node *router = NULL; rcu_read_lock(); - router = rcu_dereference(orig_node->router); + hlist_for_each_entry_rcu(orig_ifinfo, &orig_node->ifinfo_list, list) { + if (orig_ifinfo->if_outgoing != if_outgoing) + continue; + + router = rcu_dereference(orig_ifinfo->router); + break; + } if (router && !atomic_inc_not_zero(&router->refcount)) router = NULL; @@ -173,6 +265,164 @@ batadv_orig_node_get_router(struct batadv_orig_node *orig_node) } /** + * batadv_orig_ifinfo_get - find the ifinfo from an orig_node + * @orig_node: the orig node to be queried + * @if_outgoing: the interface for which the ifinfo should be acquired + * + * Returns the requested orig_ifinfo or NULL if not found. + * + * The object is returned with refcounter increased by 1. + */ +struct batadv_orig_ifinfo * +batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node, + struct batadv_hard_iface *if_outgoing) +{ + struct batadv_orig_ifinfo *tmp, *orig_ifinfo = NULL; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp, &orig_node->ifinfo_list, + list) { + if (tmp->if_outgoing != if_outgoing) + continue; + + if (!atomic_inc_not_zero(&tmp->refcount)) + continue; + + orig_ifinfo = tmp; + break; + } + rcu_read_unlock(); + + return orig_ifinfo; +} + +/** + * batadv_orig_ifinfo_new - search and possibly create an orig_ifinfo object + * @orig_node: the orig node to be queried + * @if_outgoing: the interface for which the ifinfo should be acquired + * + * Returns NULL in case of failure or the orig_ifinfo object for the if_outgoing + * interface otherwise. The object is created and added to the list + * if it does not exist. + * + * The object is returned with refcounter increased by 1. + */ +struct batadv_orig_ifinfo * +batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node, + struct batadv_hard_iface *if_outgoing) +{ + struct batadv_orig_ifinfo *orig_ifinfo = NULL; + unsigned long reset_time; + + spin_lock_bh(&orig_node->neigh_list_lock); + + orig_ifinfo = batadv_orig_ifinfo_get(orig_node, if_outgoing); + if (orig_ifinfo) + goto out; + + orig_ifinfo = kzalloc(sizeof(*orig_ifinfo), GFP_ATOMIC); + if (!orig_ifinfo) + goto out; + + if (if_outgoing != BATADV_IF_DEFAULT && + !atomic_inc_not_zero(&if_outgoing->refcount)) { + kfree(orig_ifinfo); + orig_ifinfo = NULL; + goto out; + } + + reset_time = jiffies - 1; + reset_time -= msecs_to_jiffies(BATADV_RESET_PROTECTION_MS); + orig_ifinfo->batman_seqno_reset = reset_time; + orig_ifinfo->if_outgoing = if_outgoing; + INIT_HLIST_NODE(&orig_ifinfo->list); + atomic_set(&orig_ifinfo->refcount, 2); + hlist_add_head_rcu(&orig_ifinfo->list, + &orig_node->ifinfo_list); +out: + spin_unlock_bh(&orig_node->neigh_list_lock); + return orig_ifinfo; +} + +/** + * batadv_neigh_ifinfo_get - find the ifinfo from an neigh_node + * @neigh_node: the neigh node to be queried + * @if_outgoing: the interface for which the ifinfo should be acquired + * + * The object is returned with refcounter increased by 1. + * + * Returns the requested neigh_ifinfo or NULL if not found + */ +struct batadv_neigh_ifinfo * +batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, + struct batadv_hard_iface *if_outgoing) +{ + struct batadv_neigh_ifinfo *neigh_ifinfo = NULL, + *tmp_neigh_ifinfo; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_neigh_ifinfo, &neigh->ifinfo_list, + list) { + if (tmp_neigh_ifinfo->if_outgoing != if_outgoing) + continue; + + if (!atomic_inc_not_zero(&tmp_neigh_ifinfo->refcount)) + continue; + + neigh_ifinfo = tmp_neigh_ifinfo; + break; + } + rcu_read_unlock(); + + return neigh_ifinfo; +} + +/** + * batadv_neigh_ifinfo_new - search and possibly create an neigh_ifinfo object + * @neigh_node: the neigh node to be queried + * @if_outgoing: the interface for which the ifinfo should be acquired + * + * Returns NULL in case of failure or the neigh_ifinfo object for the + * if_outgoing interface otherwise. The object is created and added to the list + * if it does not exist. + * + * The object is returned with refcounter increased by 1. + */ +struct batadv_neigh_ifinfo * +batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh, + struct batadv_hard_iface *if_outgoing) +{ + struct batadv_neigh_ifinfo *neigh_ifinfo; + + spin_lock_bh(&neigh->ifinfo_lock); + + neigh_ifinfo = batadv_neigh_ifinfo_get(neigh, if_outgoing); + if (neigh_ifinfo) + goto out; + + neigh_ifinfo = kzalloc(sizeof(*neigh_ifinfo), GFP_ATOMIC); + if (!neigh_ifinfo) + goto out; + + if (if_outgoing && !atomic_inc_not_zero(&if_outgoing->refcount)) { + kfree(neigh_ifinfo); + neigh_ifinfo = NULL; + goto out; + } + + INIT_HLIST_NODE(&neigh_ifinfo->list); + atomic_set(&neigh_ifinfo->refcount, 2); + neigh_ifinfo->if_outgoing = if_outgoing; + + hlist_add_head_rcu(&neigh_ifinfo->list, &neigh->ifinfo_list); + +out: + spin_unlock_bh(&neigh->ifinfo_lock); + + return neigh_ifinfo; +} + +/** * batadv_neigh_node_new - create and init a new neigh_node object * @hard_iface: the interface where the neighbour is connected to * @neigh_addr: the mac address of the neighbour interface @@ -193,13 +443,13 @@ batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, goto out; INIT_HLIST_NODE(&neigh_node->list); + INIT_HLIST_HEAD(&neigh_node->ifinfo_list); + spin_lock_init(&neigh_node->ifinfo_lock); memcpy(neigh_node->addr, neigh_addr, ETH_ALEN); neigh_node->if_incoming = hard_iface; neigh_node->orig_node = orig_node; - INIT_LIST_HEAD(&neigh_node->bonding_list); - /* extra reference for return */ atomic_set(&neigh_node->refcount, 2); @@ -207,30 +457,68 @@ out: return neigh_node; } +/** + * batadv_orig_ifinfo_free_rcu - free the orig_ifinfo object + * @rcu: rcu pointer of the orig_ifinfo object + */ +static void batadv_orig_ifinfo_free_rcu(struct rcu_head *rcu) +{ + struct batadv_orig_ifinfo *orig_ifinfo; + + orig_ifinfo = container_of(rcu, struct batadv_orig_ifinfo, rcu); + + if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT) + batadv_hardif_free_ref_now(orig_ifinfo->if_outgoing); + + kfree(orig_ifinfo); +} + +/** + * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free + * the orig_ifinfo (without rcu callback) + * @orig_ifinfo: the orig_ifinfo object to release + */ +static void +batadv_orig_ifinfo_free_ref_now(struct batadv_orig_ifinfo *orig_ifinfo) +{ + if (atomic_dec_and_test(&orig_ifinfo->refcount)) + batadv_orig_ifinfo_free_rcu(&orig_ifinfo->rcu); +} + +/** + * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free + * the orig_ifinfo + * @orig_ifinfo: the orig_ifinfo object to release + */ +void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo) +{ + if (atomic_dec_and_test(&orig_ifinfo->refcount)) + call_rcu(&orig_ifinfo->rcu, batadv_orig_ifinfo_free_rcu); +} + static void batadv_orig_node_free_rcu(struct rcu_head *rcu) { struct hlist_node *node_tmp; - struct batadv_neigh_node *neigh_node, *tmp_neigh_node; + struct batadv_neigh_node *neigh_node; struct batadv_orig_node *orig_node; + struct batadv_orig_ifinfo *orig_ifinfo; orig_node = container_of(rcu, struct batadv_orig_node, rcu); spin_lock_bh(&orig_node->neigh_list_lock); - /* for all bonding members ... */ - list_for_each_entry_safe(neigh_node, tmp_neigh_node, - &orig_node->bond_list, bonding_list) { - list_del_rcu(&neigh_node->bonding_list); - batadv_neigh_node_free_ref(neigh_node); - } - /* for all neighbors towards this originator ... */ hlist_for_each_entry_safe(neigh_node, node_tmp, &orig_node->neigh_list, list) { hlist_del_rcu(&neigh_node->list); - batadv_neigh_node_free_ref(neigh_node); + batadv_neigh_node_free_ref_now(neigh_node); } + hlist_for_each_entry_safe(orig_ifinfo, node_tmp, + &orig_node->ifinfo_list, list) { + hlist_del_rcu(&orig_ifinfo->list); + batadv_orig_ifinfo_free_ref_now(orig_ifinfo); + } spin_unlock_bh(&orig_node->neigh_list_lock); /* Free nc_nodes */ @@ -327,8 +615,8 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, return NULL; INIT_HLIST_HEAD(&orig_node->neigh_list); - INIT_LIST_HEAD(&orig_node->bond_list); INIT_LIST_HEAD(&orig_node->vlan_list); + INIT_HLIST_HEAD(&orig_node->ifinfo_list); spin_lock_init(&orig_node->bcast_seqno_lock); spin_lock_init(&orig_node->neigh_list_lock); spin_lock_init(&orig_node->tt_buff_lock); @@ -344,15 +632,11 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, orig_node->bat_priv = bat_priv; memcpy(orig_node->orig, addr, ETH_ALEN); batadv_dat_init_orig_node_addr(orig_node); - orig_node->router = NULL; atomic_set(&orig_node->last_ttvn, 0); orig_node->tt_buff = NULL; orig_node->tt_buff_len = 0; reset_time = jiffies - 1 - msecs_to_jiffies(BATADV_RESET_PROTECTION_MS); orig_node->bcast_seqno_reset = reset_time; - orig_node->batman_seqno_reset = reset_time; - - atomic_set(&orig_node->bond_candidates, 0); /* create a vlan object for the "untagged" LAN */ vlan = batadv_orig_node_vlan_new(orig_node, BATADV_NO_FLAGS); @@ -376,20 +660,76 @@ free_orig_node: return NULL; } +/** + * batadv_purge_orig_ifinfo - purge obsolete ifinfo entries from originator + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node which is to be checked + * + * Returns true if any ifinfo entry was purged, false otherwise. + */ +static bool +batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node) +{ + struct batadv_orig_ifinfo *orig_ifinfo; + struct batadv_hard_iface *if_outgoing; + struct hlist_node *node_tmp; + bool ifinfo_purged = false; + + spin_lock_bh(&orig_node->neigh_list_lock); + + /* for all ifinfo objects for this originator */ + hlist_for_each_entry_safe(orig_ifinfo, node_tmp, + &orig_node->ifinfo_list, list) { + if_outgoing = orig_ifinfo->if_outgoing; + + /* always keep the default interface */ + if (if_outgoing == BATADV_IF_DEFAULT) + continue; + + /* don't purge if the interface is not (going) down */ + if ((if_outgoing->if_status != BATADV_IF_INACTIVE) && + (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) && + (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)) + continue; + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "router/ifinfo purge: originator %pM, iface: %s\n", + orig_node->orig, if_outgoing->net_dev->name); + + ifinfo_purged = true; + + hlist_del_rcu(&orig_ifinfo->list); + batadv_orig_ifinfo_free_ref(orig_ifinfo); + if (orig_node->last_bonding_candidate == orig_ifinfo) { + orig_node->last_bonding_candidate = NULL; + batadv_orig_ifinfo_free_ref(orig_ifinfo); + } + } + + spin_unlock_bh(&orig_node->neigh_list_lock); + + return ifinfo_purged; +} + + +/** + * batadv_purge_orig_neighbors - purges neighbors from originator + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node which is to be checked + * + * Returns true if any neighbor was purged, false otherwise + */ static bool batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig_node, - struct batadv_neigh_node **best_neigh) + struct batadv_orig_node *orig_node) { - struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; bool neigh_purged = false; unsigned long last_seen; struct batadv_hard_iface *if_incoming; - *best_neigh = NULL; - spin_lock_bh(&orig_node->neigh_list_lock); /* for all neighbors towards this originator ... */ @@ -418,15 +758,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, neigh_purged = true; hlist_del_rcu(&neigh_node->list); - batadv_bonding_candidate_del(orig_node, neigh_node); batadv_neigh_node_free_ref(neigh_node); - } else { - /* store the best_neighbour if this is the first - * iteration or if a better neighbor has been found - */ - if (!*best_neigh || - bao->bat_neigh_cmp(neigh_node, *best_neigh) > 0) - *best_neigh = neigh_node; } } @@ -434,10 +766,57 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, return neigh_purged; } +/** + * batadv_find_best_neighbor - finds the best neighbor after purging + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node which is to be checked + * @if_outgoing: the interface for which the metric should be compared + * + * Returns the current best neighbor, with refcount increased. + */ +static struct batadv_neigh_node * +batadv_find_best_neighbor(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + struct batadv_hard_iface *if_outgoing) +{ + struct batadv_neigh_node *best = NULL, *neigh; + struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; + + rcu_read_lock(); + hlist_for_each_entry_rcu(neigh, &orig_node->neigh_list, list) { + if (best && (bao->bat_neigh_cmp(neigh, if_outgoing, + best, if_outgoing) <= 0)) + continue; + + if (!atomic_inc_not_zero(&neigh->refcount)) + continue; + + if (best) + batadv_neigh_node_free_ref(best); + + best = neigh; + } + rcu_read_unlock(); + + return best; +} + +/** + * batadv_purge_orig_node - purges obsolete information from an orig_node + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node which is to be checked + * + * This function checks if the orig_node or substructures of it have become + * obsolete, and purges this information if that's the case. + * + * Returns true if the orig_node is to be removed, false otherwise. + */ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) { struct batadv_neigh_node *best_neigh_node; + struct batadv_hard_iface *hard_iface; + bool changed; if (batadv_has_timed_out(orig_node->last_seen, 2 * BATADV_PURGE_TIMEOUT)) { @@ -446,12 +825,39 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, orig_node->orig, jiffies_to_msecs(orig_node->last_seen)); return true; - } else { - if (batadv_purge_orig_neighbors(bat_priv, orig_node, - &best_neigh_node)) - batadv_update_route(bat_priv, orig_node, - best_neigh_node); } + changed = batadv_purge_orig_ifinfo(bat_priv, orig_node); + changed = changed || batadv_purge_orig_neighbors(bat_priv, orig_node); + + if (!changed) + return false; + + /* first for NULL ... */ + best_neigh_node = batadv_find_best_neighbor(bat_priv, orig_node, + BATADV_IF_DEFAULT); + batadv_update_route(bat_priv, orig_node, BATADV_IF_DEFAULT, + best_neigh_node); + if (best_neigh_node) + batadv_neigh_node_free_ref(best_neigh_node); + + /* ... then for all other interfaces. */ + rcu_read_lock(); + list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { + if (hard_iface->if_status != BATADV_IF_ACTIVE) + continue; + + if (hard_iface->soft_iface != bat_priv->soft_iface) + continue; + + best_neigh_node = batadv_find_best_neighbor(bat_priv, + orig_node, + hard_iface); + batadv_update_route(bat_priv, orig_node, hard_iface, + best_neigh_node); + if (best_neigh_node) + batadv_neigh_node_free_ref(best_neigh_node); + } + rcu_read_unlock(); return false; } @@ -534,8 +940,54 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) return 0; } - bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq); + bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq, + BATADV_IF_DEFAULT); + + return 0; +} + +/** + * batadv_orig_hardif_seq_print_text - writes originator infos for a specific + * outgoing interface + * @seq: debugfs table seq_file struct + * @offset: not used + * + * Returns 0 + */ +int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct batadv_hard_iface *hard_iface; + struct batadv_priv *bat_priv; + + hard_iface = batadv_hardif_get_by_netdev(net_dev); + + if (!hard_iface || !hard_iface->soft_iface) { + seq_puts(seq, "Interface not known to B.A.T.M.A.N.\n"); + goto out; + } + + bat_priv = netdev_priv(hard_iface->soft_iface); + if (!bat_priv->bat_algo_ops->bat_orig_print) { + seq_puts(seq, + "No printing function for this routing protocol\n"); + goto out; + } + + if (hard_iface->if_status != BATADV_IF_ACTIVE) { + seq_puts(seq, "Interface not active\n"); + goto out; + } + seq_printf(seq, "[B.A.T.M.A.N. adv %s, IF/MAC: %s/%pM (%s %s)]\n", + BATADV_SOURCE_VERSION, hard_iface->net_dev->name, + hard_iface->net_dev->dev_addr, + hard_iface->soft_iface->name, bat_priv->bat_algo_ops->name); + + bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq, hard_iface); + +out: + batadv_hardif_free_ref(hard_iface); return 0; } diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 6f77d808a91..37be290f63f 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_ORIGINATOR_H_ @@ -36,8 +34,26 @@ batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, struct batadv_orig_node *orig_node); void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node); struct batadv_neigh_node * -batadv_orig_node_get_router(struct batadv_orig_node *orig_node); +batadv_orig_router_get(struct batadv_orig_node *orig_node, + const struct batadv_hard_iface *if_outgoing); +struct batadv_neigh_ifinfo * +batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh, + struct batadv_hard_iface *if_outgoing); +struct batadv_neigh_ifinfo * +batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, + struct batadv_hard_iface *if_outgoing); +void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo); + +struct batadv_orig_ifinfo * +batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node, + struct batadv_hard_iface *if_outgoing); +struct batadv_orig_ifinfo * +batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node, + struct batadv_hard_iface *if_outgoing); +void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo); + int batadv_orig_seq_print_text(struct seq_file *seq, void *offset); +int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset); int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, int max_if_num); int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 2dd8f242255..0a381d1174c 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_PACKET_H_ @@ -117,6 +115,7 @@ enum batadv_tt_client_flags { BATADV_TT_CLIENT_DEL = BIT(0), BATADV_TT_CLIENT_ROAM = BIT(1), BATADV_TT_CLIENT_WIFI = BIT(4), + BATADV_TT_CLIENT_ISOLA = BIT(5), BATADV_TT_CLIENT_NOPURGE = BIT(8), BATADV_TT_CLIENT_NEW = BIT(9), BATADV_TT_CLIENT_PENDING = BIT(10), diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 46278bfb8fd..1ed9f7c9ece 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -35,13 +33,32 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); +/** + * _batadv_update_route - set the router for this originator + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node which is to be configured + * @recv_if: the receive interface for which this route is set + * @neigh_node: neighbor which should be the next router + * + * This function does not perform any error checks + */ static void _batadv_update_route(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, + struct batadv_hard_iface *recv_if, struct batadv_neigh_node *neigh_node) { + struct batadv_orig_ifinfo *orig_ifinfo; struct batadv_neigh_node *curr_router; - curr_router = batadv_orig_node_get_router(orig_node); + orig_ifinfo = batadv_orig_ifinfo_get(orig_node, recv_if); + if (!orig_ifinfo) + return; + + rcu_read_lock(); + curr_router = rcu_dereference(orig_ifinfo->router); + if (curr_router && !atomic_inc_not_zero(&curr_router->refcount)) + curr_router = NULL; + rcu_read_unlock(); /* route deleted */ if ((curr_router) && (!neigh_node)) { @@ -71,16 +88,25 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, neigh_node = NULL; spin_lock_bh(&orig_node->neigh_list_lock); - rcu_assign_pointer(orig_node->router, neigh_node); + rcu_assign_pointer(orig_ifinfo->router, neigh_node); spin_unlock_bh(&orig_node->neigh_list_lock); + batadv_orig_ifinfo_free_ref(orig_ifinfo); /* decrease refcount of previous best neighbor */ if (curr_router) batadv_neigh_node_free_ref(curr_router); } +/** + * batadv_update_route - set the router for this originator + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node which is to be configured + * @recv_if: the receive interface for which this route is set + * @neigh_node: neighbor which should be the next router + */ void batadv_update_route(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, + struct batadv_hard_iface *recv_if, struct batadv_neigh_node *neigh_node) { struct batadv_neigh_node *router = NULL; @@ -88,125 +114,16 @@ void batadv_update_route(struct batadv_priv *bat_priv, if (!orig_node) goto out; - router = batadv_orig_node_get_router(orig_node); + router = batadv_orig_router_get(orig_node, recv_if); if (router != neigh_node) - _batadv_update_route(bat_priv, orig_node, neigh_node); + _batadv_update_route(bat_priv, orig_node, recv_if, neigh_node); out: if (router) batadv_neigh_node_free_ref(router); } -/* caller must hold the neigh_list_lock */ -void batadv_bonding_candidate_del(struct batadv_orig_node *orig_node, - struct batadv_neigh_node *neigh_node) -{ - /* this neighbor is not part of our candidate list */ - if (list_empty(&neigh_node->bonding_list)) - goto out; - - list_del_rcu(&neigh_node->bonding_list); - INIT_LIST_HEAD(&neigh_node->bonding_list); - batadv_neigh_node_free_ref(neigh_node); - atomic_dec(&orig_node->bond_candidates); - -out: - return; -} - -/** - * batadv_bonding_candidate_add - consider a new link for bonding mode towards - * the given originator - * @bat_priv: the bat priv with all the soft interface information - * @orig_node: the target node - * @neigh_node: the neighbor representing the new link to consider for bonding - * mode - */ -void batadv_bonding_candidate_add(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig_node, - struct batadv_neigh_node *neigh_node) -{ - struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; - struct batadv_neigh_node *tmp_neigh_node, *router = NULL; - uint8_t interference_candidate = 0; - - spin_lock_bh(&orig_node->neigh_list_lock); - - /* only consider if it has the same primary address ... */ - if (!batadv_compare_eth(orig_node->orig, - neigh_node->orig_node->primary_addr)) - goto candidate_del; - - router = batadv_orig_node_get_router(orig_node); - if (!router) - goto candidate_del; - - - /* ... and is good enough to be considered */ - if (bao->bat_neigh_is_equiv_or_better(neigh_node, router)) - goto candidate_del; - - /* check if we have another candidate with the same mac address or - * interface. If we do, we won't select this candidate because of - * possible interference. - */ - hlist_for_each_entry_rcu(tmp_neigh_node, - &orig_node->neigh_list, list) { - if (tmp_neigh_node == neigh_node) - continue; - - /* we only care if the other candidate is even - * considered as candidate. - */ - if (list_empty(&tmp_neigh_node->bonding_list)) - continue; - - if ((neigh_node->if_incoming == tmp_neigh_node->if_incoming) || - (batadv_compare_eth(neigh_node->addr, - tmp_neigh_node->addr))) { - interference_candidate = 1; - break; - } - } - - /* don't care further if it is an interference candidate */ - if (interference_candidate) - goto candidate_del; - - /* this neighbor already is part of our candidate list */ - if (!list_empty(&neigh_node->bonding_list)) - goto out; - - if (!atomic_inc_not_zero(&neigh_node->refcount)) - goto out; - - list_add_rcu(&neigh_node->bonding_list, &orig_node->bond_list); - atomic_inc(&orig_node->bond_candidates); - goto out; - -candidate_del: - batadv_bonding_candidate_del(orig_node, neigh_node); - -out: - spin_unlock_bh(&orig_node->neigh_list_lock); - - if (router) - batadv_neigh_node_free_ref(router); -} - -/* copy primary address for bonding */ -void -batadv_bonding_save_primary(const struct batadv_orig_node *orig_node, - struct batadv_orig_node *orig_neigh_node, - const struct batadv_ogm_packet *batman_ogm_packet) -{ - if (!(batman_ogm_packet->flags & BATADV_PRIMARIES_FIRST_HOP)) - return; - - memcpy(orig_neigh_node->primary_addr, orig_node->orig, ETH_ALEN); -} - /* checks whether the host restarted and is in the protection time. * returns: * 0 if the packet is to be accepted @@ -461,114 +378,6 @@ out: return ret; } -/* In the bonding case, send the packets in a round - * robin fashion over the remaining interfaces. - * - * This method rotates the bonding list and increases the - * returned router's refcount. - */ -static struct batadv_neigh_node * -batadv_find_bond_router(struct batadv_orig_node *primary_orig, - const struct batadv_hard_iface *recv_if) -{ - struct batadv_neigh_node *tmp_neigh_node; - struct batadv_neigh_node *router = NULL, *first_candidate = NULL; - - rcu_read_lock(); - list_for_each_entry_rcu(tmp_neigh_node, &primary_orig->bond_list, - bonding_list) { - if (!first_candidate) - first_candidate = tmp_neigh_node; - - /* recv_if == NULL on the first node. */ - if (tmp_neigh_node->if_incoming == recv_if) - continue; - - if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) - continue; - - router = tmp_neigh_node; - break; - } - - /* use the first candidate if nothing was found. */ - if (!router && first_candidate && - atomic_inc_not_zero(&first_candidate->refcount)) - router = first_candidate; - - if (!router) - goto out; - - /* selected should point to the next element - * after the current router - */ - spin_lock_bh(&primary_orig->neigh_list_lock); - /* this is a list_move(), which unfortunately - * does not exist as rcu version - */ - list_del_rcu(&primary_orig->bond_list); - list_add_rcu(&primary_orig->bond_list, - &router->bonding_list); - spin_unlock_bh(&primary_orig->neigh_list_lock); - -out: - rcu_read_unlock(); - return router; -} - -/** - * batadv_find_ifalter_router - find the best of the remaining candidates which - * are not using this interface - * @bat_priv: the bat priv with all the soft interface information - * @primary_orig: the destination - * @recv_if: the interface that the router returned by this function has to not - * use - * - * Returns the best candidate towards primary_orig that is not using recv_if. - * Increases the returned neighbor's refcount - */ -static struct batadv_neigh_node * -batadv_find_ifalter_router(struct batadv_priv *bat_priv, - struct batadv_orig_node *primary_orig, - const struct batadv_hard_iface *recv_if) -{ - struct batadv_neigh_node *router = NULL, *first_candidate = NULL; - struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; - struct batadv_neigh_node *tmp_neigh_node; - - rcu_read_lock(); - list_for_each_entry_rcu(tmp_neigh_node, &primary_orig->bond_list, - bonding_list) { - if (!first_candidate) - first_candidate = tmp_neigh_node; - - /* recv_if == NULL on the first node. */ - if (tmp_neigh_node->if_incoming == recv_if) - continue; - - if (router && bao->bat_neigh_cmp(tmp_neigh_node, router)) - continue; - - if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) - continue; - - /* decrement refcount of previously selected router */ - if (router) - batadv_neigh_node_free_ref(router); - - /* we found a better router (or at least one valid router) */ - router = tmp_neigh_node; - } - - /* use the first candidate if nothing was found. */ - if (!router && first_candidate && - atomic_inc_not_zero(&first_candidate->refcount)) - router = first_candidate; - - rcu_read_unlock(); - return router; -} - /** * batadv_check_unicast_packet - Check for malformed unicast packets * @bat_priv: the bat priv with all the soft interface information @@ -606,95 +415,141 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, return 0; } -/* find a suitable router for this originator, and use - * bonding if possible. increases the found neighbors - * refcount. +/** + * batadv_find_router - find a suitable router for this originator + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: the destination node + * @recv_if: pointer to interface this packet was received on + * + * Returns the router which should be used for this orig_node on + * this interface, or NULL if not available. */ struct batadv_neigh_node * batadv_find_router(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const struct batadv_hard_iface *recv_if) + struct batadv_hard_iface *recv_if) { - struct batadv_orig_node *primary_orig_node; - struct batadv_orig_node *router_orig; - struct batadv_neigh_node *router; - static uint8_t zero_mac[ETH_ALEN] = {0, 0, 0, 0, 0, 0}; - int bonding_enabled; - uint8_t *primary_addr; + struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; + struct batadv_neigh_node *first_candidate_router = NULL; + struct batadv_neigh_node *next_candidate_router = NULL; + struct batadv_neigh_node *router, *cand_router = NULL; + struct batadv_neigh_node *last_cand_router = NULL; + struct batadv_orig_ifinfo *cand, *first_candidate = NULL; + struct batadv_orig_ifinfo *next_candidate = NULL; + struct batadv_orig_ifinfo *last_candidate; + bool last_candidate_found = false; if (!orig_node) return NULL; - router = batadv_orig_node_get_router(orig_node); - if (!router) - goto err; + router = batadv_orig_router_get(orig_node, recv_if); - /* without bonding, the first node should - * always choose the default router. + /* only consider bonding for recv_if == BATADV_IF_DEFAULT (first hop) + * and if activated. + */ + if (recv_if == BATADV_IF_DEFAULT || !atomic_read(&bat_priv->bonding) || + !router) + return router; + + /* bonding: loop through the list of possible routers found + * for the various outgoing interfaces and find a candidate after + * the last chosen bonding candidate (next_candidate). If no such + * router is found, use the first candidate found (the previously + * chosen bonding candidate might have been the last one in the list). + * If this can't be found either, return the previously choosen + * router - obviously there are no other candidates. */ - bonding_enabled = atomic_read(&bat_priv->bonding); - rcu_read_lock(); - /* select default router to output */ - router_orig = router->orig_node; - if (!router_orig) - goto err_unlock; + last_candidate = orig_node->last_bonding_candidate; + if (last_candidate) + last_cand_router = rcu_dereference(last_candidate->router); - if ((!recv_if) && (!bonding_enabled)) - goto return_router; + hlist_for_each_entry_rcu(cand, &orig_node->ifinfo_list, list) { + /* acquire some structures and references ... */ + if (!atomic_inc_not_zero(&cand->refcount)) + continue; - primary_addr = router_orig->primary_addr; + cand_router = rcu_dereference(cand->router); + if (!cand_router) + goto next; - /* if we have something in the primary_addr, we can search - * for a potential bonding candidate. - */ - if (batadv_compare_eth(primary_addr, zero_mac)) - goto return_router; + if (!atomic_inc_not_zero(&cand_router->refcount)) { + cand_router = NULL; + goto next; + } - /* find the orig_node which has the primary interface. might - * even be the same as our router_orig in many cases - */ - if (batadv_compare_eth(primary_addr, router_orig->orig)) { - primary_orig_node = router_orig; - } else { - primary_orig_node = batadv_orig_hash_find(bat_priv, - primary_addr); - if (!primary_orig_node) - goto return_router; + /* alternative candidate should be good enough to be + * considered + */ + if (!bao->bat_neigh_is_equiv_or_better(cand_router, + cand->if_outgoing, + router, recv_if)) + goto next; + + /* don't use the same router twice */ + if (last_cand_router == cand_router) + goto next; + + /* mark the first possible candidate */ + if (!first_candidate) { + atomic_inc(&cand_router->refcount); + atomic_inc(&cand->refcount); + first_candidate = cand; + first_candidate_router = cand_router; + } + + /* check if the loop has already passed the previously selected + * candidate ... this function should select the next candidate + * AFTER the previously used bonding candidate. + */ + if (!last_candidate || last_candidate_found) { + next_candidate = cand; + next_candidate_router = cand_router; + break; + } - batadv_orig_node_free_ref(primary_orig_node); + if (last_candidate == cand) + last_candidate_found = true; +next: + /* free references */ + if (cand_router) { + batadv_neigh_node_free_ref(cand_router); + cand_router = NULL; + } + batadv_orig_ifinfo_free_ref(cand); } + rcu_read_unlock(); - /* with less than 2 candidates, we can't do any - * bonding and prefer the original router. - */ - if (atomic_read(&primary_orig_node->bond_candidates) < 2) - goto return_router; + /* last_bonding_candidate is reset below, remove the old reference. */ + if (orig_node->last_bonding_candidate) + batadv_orig_ifinfo_free_ref(orig_node->last_bonding_candidate); - /* all nodes between should choose a candidate which - * is is not on the interface where the packet came - * in. + /* After finding candidates, handle the three cases: + * 1) there is a next candidate, use that + * 2) there is no next candidate, use the first of the list + * 3) there is no candidate at all, return the default router */ - batadv_neigh_node_free_ref(router); + if (next_candidate) { + batadv_neigh_node_free_ref(router); - if (bonding_enabled) - router = batadv_find_bond_router(primary_orig_node, recv_if); - else - router = batadv_find_ifalter_router(bat_priv, primary_orig_node, - recv_if); + /* remove references to first candidate, we don't need it. */ + if (first_candidate) { + batadv_neigh_node_free_ref(first_candidate_router); + batadv_orig_ifinfo_free_ref(first_candidate); + } + router = next_candidate_router; + orig_node->last_bonding_candidate = next_candidate; + } else if (first_candidate) { + batadv_neigh_node_free_ref(router); -return_router: - if (router && router->if_incoming->if_status != BATADV_IF_ACTIVE) - goto err_unlock; + /* refcounting has already been done in the loop above. */ + router = first_candidate_router; + orig_node->last_bonding_candidate = first_candidate; + } else { + orig_node->last_bonding_candidate = NULL; + } - rcu_read_unlock(); return router; -err_unlock: - rcu_read_unlock(); -err: - if (router) - batadv_neigh_node_free_ref(router); - return NULL; } static int batadv_route_unicast_packet(struct sk_buff *skb, @@ -1135,6 +990,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, int hdr_size = sizeof(*bcast_packet); int ret = NET_RX_DROP; int32_t seq_diff; + uint32_t seqno; /* drop packet if it has not necessary minimum size */ if (unlikely(!pskb_may_pull(skb, hdr_size))) @@ -1170,12 +1026,13 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, spin_lock_bh(&orig_node->bcast_seqno_lock); + seqno = ntohl(bcast_packet->seqno); /* check whether the packet is a duplicate */ if (batadv_test_bit(orig_node->bcast_bits, orig_node->last_bcast_seqno, - ntohl(bcast_packet->seqno))) + seqno)) goto spin_unlock; - seq_diff = ntohl(bcast_packet->seqno) - orig_node->last_bcast_seqno; + seq_diff = seqno - orig_node->last_bcast_seqno; /* check whether the packet is old and the host just restarted. */ if (batadv_window_protected(bat_priv, seq_diff, @@ -1186,7 +1043,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, * if required. */ if (batadv_bit_get_packet(bat_priv, orig_node->bcast_bits, seq_diff, 1)) - orig_node->last_bcast_seqno = ntohl(bcast_packet->seqno); + orig_node->last_bcast_seqno = seqno; spin_unlock_bh(&orig_node->bcast_seqno_lock); diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index 19544ddb81b..557d3d12a9a 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_ROUTING_H_ @@ -25,6 +23,7 @@ bool batadv_check_management_packet(struct sk_buff *skb, int header_len); void batadv_update_route(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, + struct batadv_hard_iface *recv_if, struct batadv_neigh_node *neigh_node); int batadv_recv_icmp_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); @@ -45,16 +44,7 @@ int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb, struct batadv_neigh_node * batadv_find_router(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const struct batadv_hard_iface *recv_if); -void batadv_bonding_candidate_del(struct batadv_orig_node *orig_node, - struct batadv_neigh_node *neigh_node); -void batadv_bonding_candidate_add(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig_node, - struct batadv_neigh_node *neigh_node); -void batadv_bonding_save_primary(const struct batadv_orig_node *orig_node, - struct batadv_orig_node *orig_neigh_node, - const struct batadv_ogm_packet - *batman_ogm_packet); + struct batadv_hard_iface *recv_if); int batadv_window_protected(struct batadv_priv *bat_priv, int32_t seq_num_diff, unsigned long *last_reset); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index fba4dcfcfac..579f5f00a38 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -321,13 +319,23 @@ out: */ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, struct sk_buff *skb, int packet_type, - int packet_subtype, unsigned short vid) + int packet_subtype, uint8_t *dst_hint, + unsigned short vid) { struct ethhdr *ethhdr = (struct ethhdr *)skb->data; struct batadv_orig_node *orig_node; + uint8_t *src, *dst; + + src = ethhdr->h_source; + dst = ethhdr->h_dest; + + /* if we got an hint! let's send the packet to this client (if any) */ + if (dst_hint) { + src = NULL; + dst = dst_hint; + } + orig_node = batadv_transtable_search(bat_priv, src, dst, vid); - orig_node = batadv_transtable_search(bat_priv, ethhdr->h_source, - ethhdr->h_dest, vid); return batadv_send_skb_unicast(bat_priv, skb, packet_type, packet_subtype, orig_node, vid); } @@ -379,6 +387,8 @@ static void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet) kfree_skb(forw_packet->skb); if (forw_packet->if_incoming) batadv_hardif_free_ref(forw_packet->if_incoming); + if (forw_packet->if_outgoing) + batadv_hardif_free_ref(forw_packet->if_outgoing); kfree(forw_packet); } @@ -442,6 +452,7 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, forw_packet->skb = newskb; forw_packet->if_incoming = primary_if; + forw_packet->if_outgoing = NULL; /* how often did we send the bcast packet ? */ forw_packet->num_packets = 0; @@ -537,11 +548,16 @@ void batadv_send_outstanding_bat_ogm_packet(struct work_struct *work) bat_priv->bat_algo_ops->bat_ogm_emit(forw_packet); - /* we have to have at least one packet in the queue - * to determine the queues wake up time unless we are - * shutting down + /* we have to have at least one packet in the queue to determine the + * queues wake up time unless we are shutting down. + * + * only re-schedule if this is the "original" copy, e.g. the OGM of the + * primary interface should only be rescheduled once per period, but + * this function will be called for the forw_packet instances of the + * other secondary interfaces as well. */ - if (forw_packet->own) + if (forw_packet->own && + forw_packet->if_incoming == forw_packet->if_outgoing) batadv_schedule_bat_ogm(forw_packet->if_incoming); out: @@ -602,7 +618,8 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, * we delete only packets belonging to the given interface */ if ((hard_iface) && - (forw_packet->if_incoming != hard_iface)) + (forw_packet->if_incoming != hard_iface) && + (forw_packet->if_outgoing != hard_iface)) continue; spin_unlock_bh(&bat_priv->forw_bat_list_lock); diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index aa2e2537a73..aaddaa9661c 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_SEND_H_ @@ -40,7 +38,8 @@ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv, int packet_subtype); int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, struct sk_buff *skb, int packet_type, - int packet_subtype, unsigned short vid); + int packet_subtype, uint8_t *dst_hint, + unsigned short vid); int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid); @@ -57,11 +56,11 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv, - struct sk_buff *skb, + struct sk_buff *skb, uint8_t *dst_hint, unsigned short vid) { return batadv_send_skb_via_tt_generic(bat_priv, skb, BATADV_UNICAST, 0, - vid); + dst_hint, vid); } /** @@ -81,11 +80,12 @@ static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv, static inline int batadv_send_skb_via_tt_4addr(struct batadv_priv *bat_priv, struct sk_buff *skb, int packet_subtype, + uint8_t *dst_hint, unsigned short vid) { return batadv_send_skb_via_tt_generic(bat_priv, skb, BATADV_UNICAST_4ADDR, - packet_subtype, vid); + packet_subtype, dst_hint, vid); } #endif /* _NET_BATMAN_ADV_SEND_H_ */ diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index a8f99d1486c..f82c267e188 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -121,7 +119,7 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) batadv_tt_local_remove(bat_priv, old_addr, BATADV_NO_FLAGS, "mac address changed", false); batadv_tt_local_add(dev, addr->sa_data, BATADV_NO_FLAGS, - BATADV_NULL_IFINDEX); + BATADV_NULL_IFINDEX, BATADV_NO_MARK); } return 0; @@ -162,6 +160,8 @@ static int batadv_interface_tx(struct sk_buff *skb, 0x00, 0x00}; static const uint8_t ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00, 0x00, 0x00}; + enum batadv_dhcp_recipient dhcp_rcp = BATADV_DHCP_NO; + uint8_t *dst_hint = NULL, chaddr[ETH_ALEN]; struct vlan_ethhdr *vhdr; unsigned int header_len = 0; int data_len = skb->len, ret; @@ -169,6 +169,7 @@ static int batadv_interface_tx(struct sk_buff *skb, bool do_bcast = false, client_added; unsigned short vid; uint32_t seqno; + int gw_mode; if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto dropped; @@ -198,7 +199,8 @@ static int batadv_interface_tx(struct sk_buff *skb, /* Register the client MAC in the transtable */ if (!is_multicast_ether_addr(ethhdr->h_source)) { client_added = batadv_tt_local_add(soft_iface, ethhdr->h_source, - vid, skb->skb_iif); + vid, skb->skb_iif, + skb->mark); if (!client_added) goto dropped; } @@ -215,36 +217,39 @@ static int batadv_interface_tx(struct sk_buff *skb, if (batadv_compare_eth(ethhdr->h_dest, ectp_addr)) goto dropped; + gw_mode = atomic_read(&bat_priv->gw_mode); if (is_multicast_ether_addr(ethhdr->h_dest)) { - do_bcast = true; - - switch (atomic_read(&bat_priv->gw_mode)) { - case BATADV_GW_MODE_SERVER: - /* gateway servers should not send dhcp - * requests into the mesh - */ - ret = batadv_gw_is_dhcp_target(skb, &header_len); - if (ret) - goto dropped; - break; - case BATADV_GW_MODE_CLIENT: - /* gateway clients should send dhcp requests - * via unicast to their gateway - */ - ret = batadv_gw_is_dhcp_target(skb, &header_len); - if (ret) - do_bcast = false; - break; - case BATADV_GW_MODE_OFF: - default: - break; + /* if gw mode is off, broadcast every packet */ + if (gw_mode == BATADV_GW_MODE_OFF) { + do_bcast = true; + goto send; } - /* reminder: ethhdr might have become unusable from here on - * (batadv_gw_is_dhcp_target() might have reallocated skb data) + dhcp_rcp = batadv_gw_dhcp_recipient_get(skb, &header_len, + chaddr); + /* skb->data may have been modified by + * batadv_gw_dhcp_recipient_get() */ + ethhdr = (struct ethhdr *)skb->data; + /* if gw_mode is on, broadcast any non-DHCP message. + * All the DHCP packets are going to be sent as unicast + */ + if (dhcp_rcp == BATADV_DHCP_NO) { + do_bcast = true; + goto send; + } + + if (dhcp_rcp == BATADV_DHCP_TO_CLIENT) + dst_hint = chaddr; + else if ((gw_mode == BATADV_GW_MODE_SERVER) && + (dhcp_rcp == BATADV_DHCP_TO_SERVER)) + /* gateways should not forward any DHCP message if + * directed to a DHCP server + */ + goto dropped; } +send: batadv_skb_set_priority(skb, 0); /* ethernet packet should be broadcasted */ @@ -290,22 +295,22 @@ static int batadv_interface_tx(struct sk_buff *skb, /* unicast packet */ } else { - if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_OFF) { + /* DHCP packets going to a server will use the GW feature */ + if (dhcp_rcp == BATADV_DHCP_TO_SERVER) { ret = batadv_gw_out_of_range(bat_priv, skb); if (ret) goto dropped; - } - - if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb)) - goto dropped; - - batadv_dat_snoop_outgoing_arp_reply(bat_priv, skb); - - if (is_multicast_ether_addr(ethhdr->h_dest)) ret = batadv_send_skb_via_gw(bat_priv, skb, vid); - else - ret = batadv_send_skb_via_tt(bat_priv, skb, vid); + } else { + if (batadv_dat_snoop_outgoing_arp_request(bat_priv, + skb)) + goto dropped; + batadv_dat_snoop_outgoing_arp_reply(bat_priv, skb); + + ret = batadv_send_skb_via_tt(bat_priv, skb, dst_hint, + vid); + } if (ret == NET_XMIT_DROP) goto dropped_freed; } @@ -394,9 +399,23 @@ void batadv_interface_rx(struct net_device *soft_iface, batadv_tt_add_temporary_global_entry(bat_priv, orig_node, ethhdr->h_source, vid); - if (batadv_is_ap_isolated(bat_priv, ethhdr->h_source, ethhdr->h_dest, - vid)) + if (is_multicast_ether_addr(ethhdr->h_dest)) { + /* set the mark on broadcast packets if AP isolation is ON and + * the packet is coming from an "isolated" client + */ + if (batadv_vlan_ap_isola_get(bat_priv, vid) && + batadv_tt_global_is_isolated(bat_priv, ethhdr->h_source, + vid)) { + /* save bits in skb->mark not covered by the mask and + * apply the mark on the rest + */ + skb->mark &= ~bat_priv->isolation_mark_mask; + skb->mark |= bat_priv->isolation_mark; + } + } else if (batadv_is_ap_isolated(bat_priv, ethhdr->h_source, + ethhdr->h_dest, vid)) { goto dropped; + } netif_rx(skb); goto out; @@ -485,7 +504,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) */ batadv_tt_local_add(bat_priv->soft_iface, bat_priv->soft_iface->dev_addr, vid, - BATADV_NULL_IFINDEX); + BATADV_NULL_IFINDEX, BATADV_NO_MARK); spin_lock_bh(&bat_priv->softif_vlan_list_lock); hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list); @@ -678,7 +697,7 @@ static int batadv_softif_init_late(struct net_device *dev) atomic_set(&bat_priv->gw.bandwidth_down, 100); atomic_set(&bat_priv->gw.bandwidth_up, 20); atomic_set(&bat_priv->orig_interval, 1000); - atomic_set(&bat_priv->hop_penalty, 30); + atomic_set(&bat_priv->hop_penalty, 15); #ifdef CONFIG_BATMAN_ADV_DEBUG atomic_set(&bat_priv->log_level, 0); #endif @@ -697,6 +716,8 @@ static int batadv_softif_init_late(struct net_device *dev) #endif bat_priv->tt.last_changeset = NULL; bat_priv->tt.last_changeset_len = 0; + bat_priv->isolation_mark = 0; + bat_priv->isolation_mark_mask = 0; /* randomize initial seqno to avoid collision */ get_random_bytes(&random_seqno, sizeof(random_seqno)); diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 06fc91ff5a0..dbab22fd89a 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_SOFT_INTERFACE_H_ diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 6335433310a..e456bf6bb28 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -329,10 +327,10 @@ static ssize_t batadv_show_bat_algo(struct kobject *kobj, return sprintf(buff, "%s\n", bat_priv->bat_algo_ops->name); } -static void batadv_post_gw_deselect(struct net_device *net_dev) +static void batadv_post_gw_reselect(struct net_device *net_dev) { struct batadv_priv *bat_priv = netdev_priv(net_dev); - batadv_gw_deselect(bat_priv); + batadv_gw_reselect(bat_priv); } static ssize_t batadv_show_gw_mode(struct kobject *kobj, struct attribute *attr, @@ -408,7 +406,16 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj, batadv_info(net_dev, "Changing gw mode from: %s to: %s\n", curr_gw_mode_str, buff); - batadv_gw_deselect(bat_priv); + /* Invoking batadv_gw_reselect() is not enough to really de-select the + * current GW. It will only instruct the gateway client code to perform + * a re-election the next time that this is needed. + * + * When gw client mode is being switched off the current GW must be + * de-selected explicitly otherwise no GW_ADD uevent is thrown on + * client mode re-activation. This is operation is performed in + * batadv_gw_check_client_stop(). + */ + batadv_gw_reselect(bat_priv); /* always call batadv_gw_check_client_stop() before changing the gateway * state */ @@ -443,6 +450,74 @@ static ssize_t batadv_store_gw_bwidth(struct kobject *kobj, return batadv_gw_bandwidth_set(net_dev, buff, count); } +/** + * batadv_show_isolation_mark - print the current isolation mark/mask + * @kobj: kobject representing the private mesh sysfs directory + * @attr: the batman-adv attribute the user is interacting with + * @buff: the buffer that will contain the data to send back to the user + * + * Returns the number of bytes written into 'buff' on success or a negative + * error code in case of failure + */ +static ssize_t batadv_show_isolation_mark(struct kobject *kobj, + struct attribute *attr, char *buff) +{ + struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); + + return sprintf(buff, "%#.8x/%#.8x\n", bat_priv->isolation_mark, + bat_priv->isolation_mark_mask); +} + +/** + * batadv_store_isolation_mark - parse and store the isolation mark/mask entered + * by the user + * @kobj: kobject representing the private mesh sysfs directory + * @attr: the batman-adv attribute the user is interacting with + * @buff: the buffer containing the user data + * @count: number of bytes in the buffer + * + * Returns 'count' on success or a negative error code in case of failure + */ +static ssize_t batadv_store_isolation_mark(struct kobject *kobj, + struct attribute *attr, char *buff, + size_t count) +{ + struct net_device *net_dev = batadv_kobj_to_netdev(kobj); + struct batadv_priv *bat_priv = netdev_priv(net_dev); + uint32_t mark, mask; + char *mask_ptr; + + /* parse the mask if it has been specified, otherwise assume the mask is + * the biggest possible + */ + mask = 0xFFFFFFFF; + mask_ptr = strchr(buff, '/'); + if (mask_ptr) { + *mask_ptr = '\0'; + mask_ptr++; + + /* the mask must be entered in hex base as it is going to be a + * bitmask and not a prefix length + */ + if (kstrtou32(mask_ptr, 16, &mask) < 0) + return -EINVAL; + } + + /* the mark can be entered in any base */ + if (kstrtou32(buff, 0, &mark) < 0) + return -EINVAL; + + bat_priv->isolation_mark_mask = mask; + /* erase bits not covered by the mask */ + bat_priv->isolation_mark = mark & bat_priv->isolation_mark_mask; + + batadv_info(net_dev, + "New skb mark for extended isolation: %#.8x/%#.8x\n", + bat_priv->isolation_mark, bat_priv->isolation_mark_mask); + + return count; +} + BATADV_ATTR_SIF_BOOL(aggregated_ogms, S_IRUGO | S_IWUSR, NULL); BATADV_ATTR_SIF_BOOL(bonding, S_IRUGO | S_IWUSR, NULL); #ifdef CONFIG_BATMAN_ADV_BLA @@ -461,7 +536,7 @@ BATADV_ATTR_SIF_UINT(orig_interval, S_IRUGO | S_IWUSR, 2 * BATADV_JITTER, BATADV_ATTR_SIF_UINT(hop_penalty, S_IRUGO | S_IWUSR, 0, BATADV_TQ_MAX_VALUE, NULL); BATADV_ATTR_SIF_UINT(gw_sel_class, S_IRUGO | S_IWUSR, 1, BATADV_TQ_MAX_VALUE, - batadv_post_gw_deselect); + batadv_post_gw_reselect); static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth, batadv_store_gw_bwidth); #ifdef CONFIG_BATMAN_ADV_DEBUG @@ -471,6 +546,8 @@ BATADV_ATTR_SIF_UINT(log_level, S_IRUGO | S_IWUSR, 0, BATADV_DBG_ALL, NULL); BATADV_ATTR_SIF_BOOL(network_coding, S_IRUGO | S_IWUSR, batadv_nc_status_update); #endif +static BATADV_ATTR(isolation_mark, S_IRUGO | S_IWUSR, + batadv_show_isolation_mark, batadv_store_isolation_mark); static struct batadv_attribute *batadv_mesh_attrs[] = { &batadv_attr_aggregated_ogms, @@ -494,6 +571,7 @@ static struct batadv_attribute *batadv_mesh_attrs[] = { #ifdef CONFIG_BATMAN_ADV_NC &batadv_attr_network_coding, #endif + &batadv_attr_isolation_mark, NULL, }; diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h index c7d725de50a..b715b60db7c 100644 --- a/net/batman-adv/sysfs.h +++ b/net/batman-adv/sysfs.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2014 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_SYSFS_H_ diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index ff625fedbc5..b6071f675a3 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich, Antonio Quartulli * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -51,7 +49,7 @@ static int batadv_compare_tt(const struct hlist_node *node, const void *data2) const void *data1 = container_of(node, struct batadv_tt_common_entry, hash_entry); - return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); + return batadv_compare_eth(data1, data2); } /** @@ -476,11 +474,13 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv, * @vid: VLAN identifier * @ifindex: index of the interface where the client is connected to (useful to * identify wireless clients) + * @mark: the value contained in the skb->mark field of the received packet (if + * any) * * Returns true if the client was successfully added, false otherwise. */ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, - unsigned short vid, int ifindex) + unsigned short vid, int ifindex, uint32_t mark) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); struct batadv_tt_local_entry *tt_local; @@ -491,6 +491,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, int hash_added, table_size, packet_size_max; bool ret = false, roamed_back = false; uint8_t remote_flags; + uint32_t match_mark; if (ifindex != BATADV_NULL_IFINDEX) in_dev = dev_get_by_index(&init_net, ifindex); @@ -615,6 +616,17 @@ check_roaming: else tt_local->common.flags &= ~BATADV_TT_CLIENT_WIFI; + /* check the mark in the skb: if it's equal to the configured + * isolation_mark, it means the packet is coming from an isolated + * non-mesh client + */ + match_mark = (mark & bat_priv->isolation_mark_mask); + if (bat_priv->isolation_mark_mask && + match_mark == bat_priv->isolation_mark) + tt_local->common.flags |= BATADV_TT_CLIENT_ISOLA; + else + tt_local->common.flags &= ~BATADV_TT_CLIENT_ISOLA; + /* if any "dynamic" flag has been modified, resend an ADD event for this * entry so that all the nodes can get the new flags */ @@ -875,7 +887,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n", net_dev->name, (uint8_t)atomic_read(&bat_priv->tt.vn)); - seq_printf(seq, " %-13s %s %-7s %-9s (%-10s)\n", "Client", "VID", + seq_printf(seq, " %-13s %s %-8s %-9s (%-10s)\n", "Client", "VID", "Flags", "Last seen", "CRC"); for (i = 0; i < hash->size; i++) { @@ -903,7 +915,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) } seq_printf(seq, - " * %pM %4i [%c%c%c%c%c] %3u.%03u (%#.8x)\n", + " * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n", tt_common_entry->addr, BATADV_PRINT_VID(tt_common_entry->vid), (tt_common_entry->flags & @@ -915,6 +927,8 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) BATADV_TT_CLIENT_PENDING ? 'X' : '.'), (tt_common_entry->flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'), + (tt_common_entry->flags & + BATADV_TT_CLIENT_ISOLA ? 'I' : '.'), no_purge ? 0 : last_seen_secs, no_purge ? 0 : last_seen_msecs, vlan->tt.crc); @@ -1368,7 +1382,8 @@ out: return ret; } -/* batadv_transtable_best_orig - Get best originator list entry from tt entry +/** + * batadv_transtable_best_orig - Get best originator list entry from tt entry * @bat_priv: the bat priv with all the soft interface information * @tt_global_entry: global translation table entry to be analyzed * @@ -1386,12 +1401,14 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv, head = &tt_global_entry->orig_list; hlist_for_each_entry_rcu(orig_entry, head, list) { - router = batadv_orig_node_get_router(orig_entry->orig_node); + router = batadv_orig_router_get(orig_entry->orig_node, + BATADV_IF_DEFAULT); if (!router) continue; if (best_router && - bao->bat_neigh_cmp(router, best_router) <= 0) { + bao->bat_neigh_cmp(router, BATADV_IF_DEFAULT, + best_router, BATADV_IF_DEFAULT) <= 0) { batadv_neigh_node_free_ref(router); continue; } @@ -1410,8 +1427,9 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv, return best_entry; } -/* batadv_tt_global_print_entry - print all orig nodes who announce the address - * for this global entry +/** + * batadv_tt_global_print_entry - print all orig nodes who announce the address + * for this global entry * @bat_priv: the bat priv with all the soft interface information * @tt_global_entry: global translation table entry to be printed * @seq: debugfs table seq_file struct @@ -1447,13 +1465,14 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv, last_ttvn = atomic_read(&best_entry->orig_node->last_ttvn); seq_printf(seq, - " %c %pM %4i (%3u) via %pM (%3u) (%#.8x) [%c%c%c]\n", + " %c %pM %4i (%3u) via %pM (%3u) (%#.8x) [%c%c%c%c]\n", '*', tt_global_entry->common.addr, BATADV_PRINT_VID(tt_global_entry->common.vid), best_entry->ttvn, best_entry->orig_node->orig, last_ttvn, vlan->tt.crc, (flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'), (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'), + (flags & BATADV_TT_CLIENT_ISOLA ? 'I' : '.'), (flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.')); batadv_orig_node_vlan_free_ref(vlan); @@ -1478,13 +1497,14 @@ print_list: last_ttvn = atomic_read(&orig_entry->orig_node->last_ttvn); seq_printf(seq, - " %c %pM %4d (%3u) via %pM (%3u) (%#.8x) [%c%c%c]\n", + " %c %pM %4d (%3u) via %pM (%3u) (%#.8x) [%c%c%c%c]\n", '+', tt_global_entry->common.addr, BATADV_PRINT_VID(tt_global_entry->common.vid), orig_entry->ttvn, orig_entry->orig_node->orig, last_ttvn, vlan->tt.crc, (flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'), (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'), + (flags & BATADV_TT_CLIENT_ISOLA ? 'I' : '.'), (flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.')); batadv_orig_node_vlan_free_ref(vlan); @@ -1853,6 +1873,11 @@ _batadv_is_ap_isolated(struct batadv_tt_local_entry *tt_local_entry, tt_global_entry->common.flags & BATADV_TT_CLIENT_WIFI) ret = true; + /* check if the two clients are marked as isolated */ + if (tt_local_entry->common.flags & BATADV_TT_CLIENT_ISOLA && + tt_global_entry->common.flags & BATADV_TT_CLIENT_ISOLA) + ret = true; + return ret; } @@ -1879,19 +1904,8 @@ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv, struct batadv_tt_global_entry *tt_global_entry = NULL; struct batadv_orig_node *orig_node = NULL; struct batadv_tt_orig_list_entry *best_entry; - bool ap_isolation_enabled = false; - struct batadv_softif_vlan *vlan; - /* if the AP isolation is requested on a VLAN, then check for its - * setting in the proper VLAN private data structure - */ - vlan = batadv_softif_vlan_get(bat_priv, vid); - if (vlan) { - ap_isolation_enabled = atomic_read(&vlan->ap_isolation); - batadv_softif_vlan_free_ref(vlan); - } - - if (src && ap_isolation_enabled) { + if (src && batadv_vlan_ap_isola_get(bat_priv, vid)) { tt_local_entry = batadv_tt_local_hash_find(bat_priv, src, vid); if (!tt_local_entry || (tt_local_entry->common.flags & BATADV_TT_CLIENT_PENDING)) @@ -3567,3 +3581,29 @@ int batadv_tt_init(struct batadv_priv *bat_priv) return 1; } + +/** + * batadv_tt_global_is_isolated - check if a client is marked as isolated + * @bat_priv: the bat priv with all the soft interface information + * @addr: the mac address of the client + * @vid: the identifier of the VLAN where this client is connected + * + * Returns true if the client is marked with the TT_CLIENT_ISOLA flag, false + * otherwise + */ +bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv, + const uint8_t *addr, unsigned short vid) +{ + struct batadv_tt_global_entry *tt; + bool ret; + + tt = batadv_tt_global_hash_find(bat_priv, addr, vid); + if (!tt) + return false; + + ret = tt->common.flags & BATADV_TT_CLIENT_ISOLA; + + batadv_tt_global_entry_free_ref(tt); + + return ret; +} diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 026b1ffa674..20a1d7861de 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich, Antonio Quartulli * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ @@ -22,7 +20,7 @@ int batadv_tt_init(struct batadv_priv *bat_priv); bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, - unsigned short vid, int ifindex); + unsigned short vid, int ifindex, uint32_t mark); uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, const uint8_t *addr, unsigned short vid, const char *message, bool roaming); @@ -50,5 +48,7 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, const unsigned char *addr, unsigned short vid); +bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv, + const uint8_t *addr, unsigned short vid); #endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 91dd369b0ff..78370ab31f9 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2014 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -12,9 +12,7 @@ * General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_TYPES_H_ @@ -36,6 +34,18 @@ #endif /* CONFIG_BATMAN_ADV_DAT */ /** + * enum batadv_dhcp_recipient - dhcp destination + * @BATADV_DHCP_NO: packet is not a dhcp message + * @BATADV_DHCP_TO_SERVER: dhcp message is directed to a server + * @BATADV_DHCP_TO_CLIENT: dhcp message is directed to a client + */ +enum batadv_dhcp_recipient { + BATADV_DHCP_NO = 0, + BATADV_DHCP_TO_SERVER, + BATADV_DHCP_TO_CLIENT, +}; + +/** * BATADV_TT_REMOTE_MASK - bitmask selecting the flags that are sent over the * wire only */ @@ -74,6 +84,7 @@ struct batadv_hard_iface_bat_iv { * @rcu: struct used for freeing in an RCU-safe manner * @bat_iv: BATMAN IV specific per hard interface data * @cleanup_work: work queue callback item for hard interface deinit + * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs */ struct batadv_hard_iface { struct list_head list; @@ -88,6 +99,29 @@ struct batadv_hard_iface { struct rcu_head rcu; struct batadv_hard_iface_bat_iv bat_iv; struct work_struct cleanup_work; + struct dentry *debug_dir; +}; + +/** + * struct batadv_orig_ifinfo - originator info per outgoing interface + * @list: list node for orig_node::ifinfo_list + * @if_outgoing: pointer to outgoing hard interface + * @router: router that should be used to reach this originator + * @last_real_seqno: last and best known sequence number + * @last_ttl: ttl of last received packet + * @batman_seqno_reset: time when the batman seqno window was reset + * @refcount: number of contexts the object is used + * @rcu: struct used for freeing in an RCU-safe manner + */ +struct batadv_orig_ifinfo { + struct hlist_node list; + struct batadv_hard_iface *if_outgoing; + struct batadv_neigh_node __rcu *router; /* rcu protected pointer */ + uint32_t last_real_seqno; + uint8_t last_ttl; + unsigned long batman_seqno_reset; + atomic_t refcount; + struct rcu_head rcu; }; /** @@ -165,11 +199,11 @@ struct batadv_orig_bat_iv { * struct batadv_orig_node - structure for orig_list maintaining nodes of mesh * @orig: originator ethernet address * @primary_addr: hosts primary interface address - * @router: router that should be used to reach this originator + * @ifinfo_list: list for routers per outgoing interface + * @last_bonding_candidate: pointer to last ifinfo of last used router * @batadv_dat_addr_t: address of the orig node in the distributed hash * @last_seen: time when last packet from this node was received * @bcast_seqno_reset: time when the broadcast seqno window was reset - * @batman_seqno_reset: time when the batman seqno window was reset * @capabilities: announced capabilities of this originator * @last_ttvn: last seen translation table version number * @tt_buff: last tt changeset this node received from the orig node @@ -182,19 +216,15 @@ struct batadv_orig_bat_iv { * made up by two operations (data structure update and metdata -CRC/TTVN- * recalculation) and they have to be executed atomically in order to avoid * another thread to read the table/metadata between those. - * @last_real_seqno: last and best known sequence number - * @last_ttl: ttl of last received packet * @bcast_bits: bitfield containing the info which payload broadcast originated * from this orig node this host already has seen (relative to * last_bcast_seqno) * @last_bcast_seqno: last broadcast sequence number received by this host * @neigh_list: list of potential next hop neighbor towards this orig node - * @neigh_list_lock: lock protecting neigh_list, router and bonding_list + * @neigh_list_lock: lock protecting neigh_list and router * @hash_entry: hlist node for batadv_priv::orig_hash * @bat_priv: pointer to soft_iface this orig node belongs to * @bcast_seqno_lock: lock protecting bcast_bits & last_bcast_seqno - * @bond_candidates: how many candidates are available - * @bond_list: list of bonding candidates * @refcount: number of contexts the object is used * @rcu: struct used for freeing in an RCU-safe manner * @in_coding_list: list of nodes this orig can hear @@ -210,13 +240,13 @@ struct batadv_orig_bat_iv { struct batadv_orig_node { uint8_t orig[ETH_ALEN]; uint8_t primary_addr[ETH_ALEN]; - struct batadv_neigh_node __rcu *router; /* rcu protected pointer */ + struct hlist_head ifinfo_list; + struct batadv_orig_ifinfo *last_bonding_candidate; #ifdef CONFIG_BATMAN_ADV_DAT batadv_dat_addr_t dat_addr; #endif unsigned long last_seen; unsigned long bcast_seqno_reset; - unsigned long batman_seqno_reset; uint8_t capabilities; atomic_t last_ttvn; unsigned char *tt_buff; @@ -225,19 +255,15 @@ struct batadv_orig_node { bool tt_initialised; /* prevents from changing the table while reading it */ spinlock_t tt_lock; - uint32_t last_real_seqno; - uint8_t last_ttl; DECLARE_BITMAP(bcast_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); uint32_t last_bcast_seqno; struct hlist_head neigh_list; - /* neigh_list_lock protects: neigh_list, router & bonding_list */ + /* neigh_list_lock protects: neigh_list and router */ spinlock_t neigh_list_lock; struct hlist_node hash_entry; struct batadv_priv *bat_priv; /* bcast_seqno_lock protects: bcast_bits & last_bcast_seqno */ spinlock_t bcast_seqno_lock; - atomic_t bond_candidates; - struct list_head bond_list; atomic_t refcount; struct rcu_head rcu; #ifdef CONFIG_BATMAN_ADV_NC @@ -283,49 +309,64 @@ struct batadv_gw_node { }; /** - * struct batadv_neigh_bat_iv - B.A.T.M.A.N. IV specific structure for single - * hop neighbors + * struct batadv_neigh_node - structure for single hops neighbors + * @list: list node for batadv_orig_node::neigh_list + * @orig_node: pointer to corresponding orig_node + * @addr: the MAC address of the neighboring interface + * @ifinfo_list: list for routing metrics per outgoing interface + * @ifinfo_lock: lock protecting private ifinfo members and list + * @if_incoming: pointer to incoming hard interface + * @last_seen: when last packet via this neighbor was received + * @last_ttl: last received ttl from this neigh node + * @rcu: struct used for freeing in an RCU-safe manner + * @bat_iv: B.A.T.M.A.N. IV private structure + */ +struct batadv_neigh_node { + struct hlist_node list; + struct batadv_orig_node *orig_node; + uint8_t addr[ETH_ALEN]; + struct hlist_head ifinfo_list; + spinlock_t ifinfo_lock; /* protects ifinfo_list and its members */ + struct batadv_hard_iface *if_incoming; + unsigned long last_seen; + atomic_t refcount; + struct rcu_head rcu; +}; + +/** + * struct batadv_neigh_node_bat_iv - neighbor information per outgoing + * interface for BATMAN IV * @tq_recv: ring buffer of received TQ values from this neigh node * @tq_index: ring buffer index * @tq_avg: averaged tq of all tq values in the ring buffer (tq_recv) * @real_bits: bitfield containing the number of OGMs received from this neigh * node (relative to orig_node->last_real_seqno) * @real_packet_count: counted result of real_bits - * @lq_update_lock: lock protecting tq_recv & tq_index */ -struct batadv_neigh_bat_iv { +struct batadv_neigh_ifinfo_bat_iv { uint8_t tq_recv[BATADV_TQ_GLOBAL_WINDOW_SIZE]; uint8_t tq_index; uint8_t tq_avg; DECLARE_BITMAP(real_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); uint8_t real_packet_count; - spinlock_t lq_update_lock; /* protects tq_recv & tq_index */ }; /** - * struct batadv_neigh_node - structure for single hops neighbors - * @list: list node for batadv_orig_node::neigh_list - * @orig_node: pointer to corresponding orig_node - * @addr: the MAC address of the neighboring interface - * @if_incoming: pointer to incoming hard interface - * @last_seen: when last packet via this neighbor was received + * struct batadv_neigh_ifinfo - neighbor information per outgoing interface + * @list: list node for batadv_neigh_node::ifinfo_list + * @if_outgoing: pointer to outgoing hard interface + * @bat_iv: B.A.T.M.A.N. IV private structure * @last_ttl: last received ttl from this neigh node - * @bonding_list: list node for batadv_orig_node::bond_list * @refcount: number of contexts the object is used - * @rcu: struct used for freeing in an RCU-safe manner - * @bat_iv: B.A.T.M.A.N. IV private structure + * @rcu: struct used for freeing in a RCU-safe manner */ -struct batadv_neigh_node { +struct batadv_neigh_ifinfo { struct hlist_node list; - struct batadv_orig_node *orig_node; - uint8_t addr[ETH_ALEN]; - struct batadv_hard_iface *if_incoming; - unsigned long last_seen; + struct batadv_hard_iface *if_outgoing; + struct batadv_neigh_ifinfo_bat_iv bat_iv; uint8_t last_ttl; - struct list_head bonding_list; atomic_t refcount; struct rcu_head rcu; - struct batadv_neigh_bat_iv bat_iv; }; /** @@ -687,6 +728,8 @@ struct batadv_priv { #ifdef CONFIG_BATMAN_ADV_DEBUG atomic_t log_level; #endif + uint32_t isolation_mark; + uint32_t isolation_mark_mask; atomic_t bcast_seqno; atomic_t bcast_queue_left; atomic_t batman_queue_left; @@ -981,8 +1024,10 @@ struct batadv_skb_cb { * @direct_link_flags: direct link flags for aggregated OGM packets * @num_packets: counter for bcast packet retransmission * @delayed_work: work queue callback item for packet sending - * @if_incoming: pointer incoming hard-iface or primary iface if locally - * generated packet + * @if_incoming: pointer to incoming hard-iface or primary iface if + * locally generated packet + * @if_outgoing: packet where the packet should be sent to, or NULL if + * unspecified */ struct batadv_forw_packet { struct hlist_node list; @@ -994,6 +1039,7 @@ struct batadv_forw_packet { uint8_t num_packets; struct delayed_work delayed_work; struct batadv_hard_iface *if_incoming; + struct batadv_hard_iface *if_outgoing; }; /** @@ -1007,9 +1053,11 @@ struct batadv_forw_packet { * @bat_primary_iface_set: called when primary interface is selected / changed * @bat_ogm_schedule: prepare a new outgoing OGM for the send queue * @bat_ogm_emit: send scheduled OGM - * @bat_neigh_cmp: compare the metrics of two neighbors - * @bat_neigh_is_equiv_or_better: check if neigh1 is equally good or - * better than neigh2 from the metric prospective + * @bat_neigh_cmp: compare the metrics of two neighbors for their respective + * outgoing interfaces + * @bat_neigh_is_equiv_or_better: check if neigh1 is equally good or better + * than neigh2 for their respective outgoing interface from the metric + * prospective * @bat_orig_print: print the originator table (optional) * @bat_orig_free: free the resources allocated by the routing algorithm for an * orig_node object @@ -1028,11 +1076,17 @@ struct batadv_algo_ops { void (*bat_ogm_schedule)(struct batadv_hard_iface *hard_iface); void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet); int (*bat_neigh_cmp)(struct batadv_neigh_node *neigh1, - struct batadv_neigh_node *neigh2); - bool (*bat_neigh_is_equiv_or_better)(struct batadv_neigh_node *neigh1, - struct batadv_neigh_node *neigh2); + struct batadv_hard_iface *if_outgoing1, + struct batadv_neigh_node *neigh2, + struct batadv_hard_iface *if_outgoing2); + bool (*bat_neigh_is_equiv_or_better) + (struct batadv_neigh_node *neigh1, + struct batadv_hard_iface *if_outgoing1, + struct batadv_neigh_node *neigh2, + struct batadv_hard_iface *if_outgoing2); /* orig_node handling API */ - void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq); + void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq, + struct batadv_hard_iface *hard_iface); void (*bat_orig_free)(struct batadv_orig_node *orig_node); int (*bat_orig_add_if)(struct batadv_orig_node *orig_node, int max_if_num); diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c new file mode 100644 index 00000000000..adb3ea04ada --- /dev/null +++ b/net/bluetooth/6lowpan.c @@ -0,0 +1,860 @@ +/* + Copyright (c) 2013 Intel Corp. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 and + only version 2 as published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. +*/ + +#include <linux/if_arp.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> + +#include <net/ipv6.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> + +#include <net/af_ieee802154.h> /* to get the address type */ + +#include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> +#include <net/bluetooth/l2cap.h> + +#include "6lowpan.h" + +#include "../ieee802154/6lowpan.h" /* for the compression support */ + +#define IFACE_NAME_TEMPLATE "bt%d" +#define EUI64_ADDR_LEN 8 + +struct skb_cb { + struct in6_addr addr; + struct l2cap_conn *conn; +}; +#define lowpan_cb(skb) ((struct skb_cb *)((skb)->cb)) + +/* The devices list contains those devices that we are acting + * as a proxy. The BT 6LoWPAN device is a virtual device that + * connects to the Bluetooth LE device. The real connection to + * BT device is done via l2cap layer. There exists one + * virtual device / one BT 6LoWPAN network (=hciX device). + * The list contains struct lowpan_dev elements. + */ +static LIST_HEAD(bt_6lowpan_devices); +static DEFINE_RWLOCK(devices_lock); + +struct lowpan_peer { + struct list_head list; + struct l2cap_conn *conn; + + /* peer addresses in various formats */ + unsigned char eui64_addr[EUI64_ADDR_LEN]; + struct in6_addr peer_addr; +}; + +struct lowpan_dev { + struct list_head list; + + struct hci_dev *hdev; + struct net_device *netdev; + struct list_head peers; + atomic_t peer_count; /* number of items in peers list */ + + struct work_struct delete_netdev; + struct delayed_work notify_peers; +}; + +static inline struct lowpan_dev *lowpan_dev(const struct net_device *netdev) +{ + return netdev_priv(netdev); +} + +static inline void peer_add(struct lowpan_dev *dev, struct lowpan_peer *peer) +{ + list_add(&peer->list, &dev->peers); + atomic_inc(&dev->peer_count); +} + +static inline bool peer_del(struct lowpan_dev *dev, struct lowpan_peer *peer) +{ + list_del(&peer->list); + + if (atomic_dec_and_test(&dev->peer_count)) { + BT_DBG("last peer"); + return true; + } + + return false; +} + +static inline struct lowpan_peer *peer_lookup_ba(struct lowpan_dev *dev, + bdaddr_t *ba, __u8 type) +{ + struct lowpan_peer *peer, *tmp; + + BT_DBG("peers %d addr %pMR type %d", atomic_read(&dev->peer_count), + ba, type); + + list_for_each_entry_safe(peer, tmp, &dev->peers, list) { + BT_DBG("addr %pMR type %d", + &peer->conn->hcon->dst, peer->conn->hcon->dst_type); + + if (bacmp(&peer->conn->hcon->dst, ba)) + continue; + + if (type == peer->conn->hcon->dst_type) + return peer; + } + + return NULL; +} + +static inline struct lowpan_peer *peer_lookup_conn(struct lowpan_dev *dev, + struct l2cap_conn *conn) +{ + struct lowpan_peer *peer, *tmp; + + list_for_each_entry_safe(peer, tmp, &dev->peers, list) { + if (peer->conn == conn) + return peer; + } + + return NULL; +} + +static struct lowpan_peer *lookup_peer(struct l2cap_conn *conn) +{ + struct lowpan_dev *entry, *tmp; + struct lowpan_peer *peer = NULL; + unsigned long flags; + + read_lock_irqsave(&devices_lock, flags); + + list_for_each_entry_safe(entry, tmp, &bt_6lowpan_devices, list) { + peer = peer_lookup_conn(entry, conn); + if (peer) + break; + } + + read_unlock_irqrestore(&devices_lock, flags); + + return peer; +} + +static struct lowpan_dev *lookup_dev(struct l2cap_conn *conn) +{ + struct lowpan_dev *entry, *tmp; + struct lowpan_dev *dev = NULL; + unsigned long flags; + + read_lock_irqsave(&devices_lock, flags); + + list_for_each_entry_safe(entry, tmp, &bt_6lowpan_devices, list) { + if (conn->hcon->hdev == entry->hdev) { + dev = entry; + break; + } + } + + read_unlock_irqrestore(&devices_lock, flags); + + return dev; +} + +static int give_skb_to_upper(struct sk_buff *skb, struct net_device *dev) +{ + struct sk_buff *skb_cp; + int ret; + + skb_cp = skb_copy(skb, GFP_ATOMIC); + if (!skb_cp) + return -ENOMEM; + + ret = netif_rx(skb_cp); + + BT_DBG("receive skb %d", ret); + if (ret < 0) + return NET_RX_DROP; + + return ret; +} + +static int process_data(struct sk_buff *skb, struct net_device *netdev, + struct l2cap_conn *conn) +{ + const u8 *saddr, *daddr; + u8 iphc0, iphc1; + struct lowpan_dev *dev; + struct lowpan_peer *peer; + unsigned long flags; + + dev = lowpan_dev(netdev); + + read_lock_irqsave(&devices_lock, flags); + peer = peer_lookup_conn(dev, conn); + read_unlock_irqrestore(&devices_lock, flags); + if (!peer) + goto drop; + + saddr = peer->eui64_addr; + daddr = dev->netdev->dev_addr; + + /* at least two bytes will be used for the encoding */ + if (skb->len < 2) + goto drop; + + if (lowpan_fetch_skb_u8(skb, &iphc0)) + goto drop; + + if (lowpan_fetch_skb_u8(skb, &iphc1)) + goto drop; + + return lowpan_process_data(skb, netdev, + saddr, IEEE802154_ADDR_LONG, EUI64_ADDR_LEN, + daddr, IEEE802154_ADDR_LONG, EUI64_ADDR_LEN, + iphc0, iphc1, give_skb_to_upper); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +static int recv_pkt(struct sk_buff *skb, struct net_device *dev, + struct l2cap_conn *conn) +{ + struct sk_buff *local_skb; + int ret; + + if (!netif_running(dev)) + goto drop; + + if (dev->type != ARPHRD_6LOWPAN) + goto drop; + + /* check that it's our buffer */ + if (skb->data[0] == LOWPAN_DISPATCH_IPV6) { + /* Copy the packet so that the IPv6 header is + * properly aligned. + */ + local_skb = skb_copy_expand(skb, NET_SKB_PAD - 1, + skb_tailroom(skb), GFP_ATOMIC); + if (!local_skb) + goto drop; + + local_skb->protocol = htons(ETH_P_IPV6); + local_skb->pkt_type = PACKET_HOST; + + skb_reset_network_header(local_skb); + skb_set_transport_header(local_skb, sizeof(struct ipv6hdr)); + + if (give_skb_to_upper(local_skb, dev) != NET_RX_SUCCESS) { + kfree_skb(local_skb); + goto drop; + } + + dev->stats.rx_bytes += skb->len; + dev->stats.rx_packets++; + + kfree_skb(local_skb); + kfree_skb(skb); + } else { + switch (skb->data[0] & 0xe0) { + case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */ + local_skb = skb_clone(skb, GFP_ATOMIC); + if (!local_skb) + goto drop; + + ret = process_data(local_skb, dev, conn); + if (ret != NET_RX_SUCCESS) + goto drop; + + dev->stats.rx_bytes += skb->len; + dev->stats.rx_packets++; + + kfree_skb(skb); + break; + default: + break; + } + } + + return NET_RX_SUCCESS; + +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +/* Packet from BT LE device */ +int bt_6lowpan_recv(struct l2cap_conn *conn, struct sk_buff *skb) +{ + struct lowpan_dev *dev; + struct lowpan_peer *peer; + int err; + + peer = lookup_peer(conn); + if (!peer) + return -ENOENT; + + dev = lookup_dev(conn); + if (!dev || !dev->netdev) + return -ENOENT; + + err = recv_pkt(skb, dev->netdev, conn); + BT_DBG("recv pkt %d", err); + + return err; +} + +static inline int skbuff_copy(void *msg, int len, int count, int mtu, + struct sk_buff *skb, struct net_device *dev) +{ + struct sk_buff **frag; + int sent = 0; + + memcpy(skb_put(skb, count), msg, count); + + sent += count; + msg += count; + len -= count; + + dev->stats.tx_bytes += count; + dev->stats.tx_packets++; + + raw_dump_table(__func__, "Sending", skb->data, skb->len); + + /* Continuation fragments (no L2CAP header) */ + frag = &skb_shinfo(skb)->frag_list; + while (len > 0) { + struct sk_buff *tmp; + + count = min_t(unsigned int, mtu, len); + + tmp = bt_skb_alloc(count, GFP_ATOMIC); + if (!tmp) + return -ENOMEM; + + *frag = tmp; + + memcpy(skb_put(*frag, count), msg, count); + + raw_dump_table(__func__, "Sending fragment", + (*frag)->data, count); + + (*frag)->priority = skb->priority; + + sent += count; + msg += count; + len -= count; + + skb->len += (*frag)->len; + skb->data_len += (*frag)->len; + + frag = &(*frag)->next; + + dev->stats.tx_bytes += count; + dev->stats.tx_packets++; + } + + return sent; +} + +static struct sk_buff *create_pdu(struct l2cap_conn *conn, void *msg, + size_t len, u32 priority, + struct net_device *dev) +{ + struct sk_buff *skb; + int err, count; + struct l2cap_hdr *lh; + + /* FIXME: This mtu check should be not needed and atm is only used for + * testing purposes + */ + if (conn->mtu > (L2CAP_LE_MIN_MTU + L2CAP_HDR_SIZE)) + conn->mtu = L2CAP_LE_MIN_MTU + L2CAP_HDR_SIZE; + + count = min_t(unsigned int, (conn->mtu - L2CAP_HDR_SIZE), len); + + BT_DBG("conn %p len %zu mtu %d count %d", conn, len, conn->mtu, count); + + skb = bt_skb_alloc(count + L2CAP_HDR_SIZE, GFP_ATOMIC); + if (!skb) + return ERR_PTR(-ENOMEM); + + skb->priority = priority; + + lh = (struct l2cap_hdr *)skb_put(skb, L2CAP_HDR_SIZE); + lh->cid = cpu_to_le16(L2CAP_FC_6LOWPAN); + lh->len = cpu_to_le16(len); + + err = skbuff_copy(msg, len, count, conn->mtu, skb, dev); + if (unlikely(err < 0)) { + kfree_skb(skb); + BT_DBG("skbuff copy %d failed", err); + return ERR_PTR(err); + } + + return skb; +} + +static int conn_send(struct l2cap_conn *conn, + void *msg, size_t len, u32 priority, + struct net_device *dev) +{ + struct sk_buff *skb; + + skb = create_pdu(conn, msg, len, priority, dev); + if (IS_ERR(skb)) + return -EINVAL; + + BT_DBG("conn %p skb %p len %d priority %u", conn, skb, skb->len, + skb->priority); + + hci_send_acl(conn->hchan, skb, ACL_START); + + return 0; +} + +static void get_dest_bdaddr(struct in6_addr *ip6_daddr, + bdaddr_t *addr, u8 *addr_type) +{ + u8 *eui64; + + eui64 = ip6_daddr->s6_addr + 8; + + addr->b[0] = eui64[7]; + addr->b[1] = eui64[6]; + addr->b[2] = eui64[5]; + addr->b[3] = eui64[2]; + addr->b[4] = eui64[1]; + addr->b[5] = eui64[0]; + + addr->b[5] ^= 2; + + /* Set universal/local bit to 0 */ + if (addr->b[5] & 1) { + addr->b[5] &= ~1; + *addr_type = ADDR_LE_DEV_PUBLIC; + } else { + *addr_type = ADDR_LE_DEV_RANDOM; + } +} + +static int header_create(struct sk_buff *skb, struct net_device *netdev, + unsigned short type, const void *_daddr, + const void *_saddr, unsigned int len) +{ + struct ipv6hdr *hdr; + struct lowpan_dev *dev; + struct lowpan_peer *peer; + bdaddr_t addr, *any = BDADDR_ANY; + u8 *saddr, *daddr = any->b; + u8 addr_type; + + if (type != ETH_P_IPV6) + return -EINVAL; + + hdr = ipv6_hdr(skb); + + dev = lowpan_dev(netdev); + + if (ipv6_addr_is_multicast(&hdr->daddr)) { + memcpy(&lowpan_cb(skb)->addr, &hdr->daddr, + sizeof(struct in6_addr)); + lowpan_cb(skb)->conn = NULL; + } else { + unsigned long flags; + + /* Get destination BT device from skb. + * If there is no such peer then discard the packet. + */ + get_dest_bdaddr(&hdr->daddr, &addr, &addr_type); + + BT_DBG("dest addr %pMR type %d", &addr, addr_type); + + read_lock_irqsave(&devices_lock, flags); + peer = peer_lookup_ba(dev, &addr, addr_type); + read_unlock_irqrestore(&devices_lock, flags); + + if (!peer) { + BT_DBG("no such peer %pMR found", &addr); + return -ENOENT; + } + + daddr = peer->eui64_addr; + + memcpy(&lowpan_cb(skb)->addr, &hdr->daddr, + sizeof(struct in6_addr)); + lowpan_cb(skb)->conn = peer->conn; + } + + saddr = dev->netdev->dev_addr; + + return lowpan_header_compress(skb, netdev, type, daddr, saddr, len); +} + +/* Packet to BT LE device */ +static int send_pkt(struct l2cap_conn *conn, const void *saddr, + const void *daddr, struct sk_buff *skb, + struct net_device *netdev) +{ + raw_dump_table(__func__, "raw skb data dump before fragmentation", + skb->data, skb->len); + + return conn_send(conn, skb->data, skb->len, 0, netdev); +} + +static void send_mcast_pkt(struct sk_buff *skb, struct net_device *netdev) +{ + struct sk_buff *local_skb; + struct lowpan_dev *entry, *tmp; + unsigned long flags; + + read_lock_irqsave(&devices_lock, flags); + + list_for_each_entry_safe(entry, tmp, &bt_6lowpan_devices, list) { + struct lowpan_peer *pentry, *ptmp; + struct lowpan_dev *dev; + + if (entry->netdev != netdev) + continue; + + dev = lowpan_dev(entry->netdev); + + list_for_each_entry_safe(pentry, ptmp, &dev->peers, list) { + local_skb = skb_clone(skb, GFP_ATOMIC); + + send_pkt(pentry->conn, netdev->dev_addr, + pentry->eui64_addr, local_skb, netdev); + + kfree_skb(local_skb); + } + } + + read_unlock_irqrestore(&devices_lock, flags); +} + +static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + int err = 0; + unsigned char *eui64_addr; + struct lowpan_dev *dev; + struct lowpan_peer *peer; + bdaddr_t addr; + u8 addr_type; + + if (ipv6_addr_is_multicast(&lowpan_cb(skb)->addr)) { + /* We need to send the packet to every device + * behind this interface. + */ + send_mcast_pkt(skb, netdev); + } else { + unsigned long flags; + + get_dest_bdaddr(&lowpan_cb(skb)->addr, &addr, &addr_type); + eui64_addr = lowpan_cb(skb)->addr.s6_addr + 8; + dev = lowpan_dev(netdev); + + read_lock_irqsave(&devices_lock, flags); + peer = peer_lookup_ba(dev, &addr, addr_type); + read_unlock_irqrestore(&devices_lock, flags); + + BT_DBG("xmit from %s to %pMR (%pI6c) peer %p", netdev->name, + &addr, &lowpan_cb(skb)->addr, peer); + + if (peer && peer->conn) + err = send_pkt(peer->conn, netdev->dev_addr, + eui64_addr, skb, netdev); + } + dev_kfree_skb(skb); + + if (err) + BT_DBG("ERROR: xmit failed (%d)", err); + + return (err < 0) ? NET_XMIT_DROP : err; +} + +static const struct net_device_ops netdev_ops = { + .ndo_start_xmit = bt_xmit, +}; + +static struct header_ops header_ops = { + .create = header_create, +}; + +static void netdev_setup(struct net_device *dev) +{ + dev->addr_len = EUI64_ADDR_LEN; + dev->type = ARPHRD_6LOWPAN; + + dev->hard_header_len = 0; + dev->needed_tailroom = 0; + dev->mtu = IPV6_MIN_MTU; + dev->tx_queue_len = 0; + dev->flags = IFF_RUNNING | IFF_POINTOPOINT; + dev->watchdog_timeo = 0; + + dev->netdev_ops = &netdev_ops; + dev->header_ops = &header_ops; + dev->destructor = free_netdev; +} + +static struct device_type bt_type = { + .name = "bluetooth", +}; + +static void set_addr(u8 *eui, u8 *addr, u8 addr_type) +{ + /* addr is the BT address in little-endian format */ + eui[0] = addr[5]; + eui[1] = addr[4]; + eui[2] = addr[3]; + eui[3] = 0xFF; + eui[4] = 0xFE; + eui[5] = addr[2]; + eui[6] = addr[1]; + eui[7] = addr[0]; + + eui[0] ^= 2; + + /* Universal/local bit set, RFC 4291 */ + if (addr_type == ADDR_LE_DEV_PUBLIC) + eui[0] |= 1; + else + eui[0] &= ~1; +} + +static void set_dev_addr(struct net_device *netdev, bdaddr_t *addr, + u8 addr_type) +{ + netdev->addr_assign_type = NET_ADDR_PERM; + set_addr(netdev->dev_addr, addr->b, addr_type); + netdev->dev_addr[0] ^= 2; +} + +static void ifup(struct net_device *netdev) +{ + int err; + + rtnl_lock(); + err = dev_open(netdev); + if (err < 0) + BT_INFO("iface %s cannot be opened (%d)", netdev->name, err); + rtnl_unlock(); +} + +static void do_notify_peers(struct work_struct *work) +{ + struct lowpan_dev *dev = container_of(work, struct lowpan_dev, + notify_peers.work); + + netdev_notify_peers(dev->netdev); /* send neighbour adv at startup */ +} + +static bool is_bt_6lowpan(struct hci_conn *hcon) +{ + if (hcon->type != LE_LINK) + return false; + + return test_bit(HCI_CONN_6LOWPAN, &hcon->flags); +} + +static int add_peer_conn(struct l2cap_conn *conn, struct lowpan_dev *dev) +{ + struct lowpan_peer *peer; + unsigned long flags; + + peer = kzalloc(sizeof(*peer), GFP_ATOMIC); + if (!peer) + return -ENOMEM; + + peer->conn = conn; + memset(&peer->peer_addr, 0, sizeof(struct in6_addr)); + + /* RFC 2464 ch. 5 */ + peer->peer_addr.s6_addr[0] = 0xFE; + peer->peer_addr.s6_addr[1] = 0x80; + set_addr((u8 *)&peer->peer_addr.s6_addr + 8, conn->hcon->dst.b, + conn->hcon->dst_type); + + memcpy(&peer->eui64_addr, (u8 *)&peer->peer_addr.s6_addr + 8, + EUI64_ADDR_LEN); + peer->eui64_addr[0] ^= 2; /* second bit-flip (Universe/Local) + * is done according RFC2464 + */ + + raw_dump_inline(__func__, "peer IPv6 address", + (unsigned char *)&peer->peer_addr, 16); + raw_dump_inline(__func__, "peer EUI64 address", peer->eui64_addr, 8); + + write_lock_irqsave(&devices_lock, flags); + INIT_LIST_HEAD(&peer->list); + peer_add(dev, peer); + write_unlock_irqrestore(&devices_lock, flags); + + /* Notifying peers about us needs to be done without locks held */ + INIT_DELAYED_WORK(&dev->notify_peers, do_notify_peers); + schedule_delayed_work(&dev->notify_peers, msecs_to_jiffies(100)); + + return 0; +} + +/* This gets called when BT LE 6LoWPAN device is connected. We then + * create network device that acts as a proxy between BT LE device + * and kernel network stack. + */ +int bt_6lowpan_add_conn(struct l2cap_conn *conn) +{ + struct lowpan_peer *peer = NULL; + struct lowpan_dev *dev; + struct net_device *netdev; + int err = 0; + unsigned long flags; + + if (!is_bt_6lowpan(conn->hcon)) + return 0; + + peer = lookup_peer(conn); + if (peer) + return -EEXIST; + + dev = lookup_dev(conn); + if (dev) + return add_peer_conn(conn, dev); + + netdev = alloc_netdev(sizeof(*dev), IFACE_NAME_TEMPLATE, netdev_setup); + if (!netdev) + return -ENOMEM; + + set_dev_addr(netdev, &conn->hcon->src, conn->hcon->src_type); + + netdev->netdev_ops = &netdev_ops; + SET_NETDEV_DEV(netdev, &conn->hcon->dev); + SET_NETDEV_DEVTYPE(netdev, &bt_type); + + err = register_netdev(netdev); + if (err < 0) { + BT_INFO("register_netdev failed %d", err); + free_netdev(netdev); + goto out; + } + + BT_DBG("ifindex %d peer bdaddr %pMR my addr %pMR", + netdev->ifindex, &conn->hcon->dst, &conn->hcon->src); + set_bit(__LINK_STATE_PRESENT, &netdev->state); + + dev = netdev_priv(netdev); + dev->netdev = netdev; + dev->hdev = conn->hcon->hdev; + INIT_LIST_HEAD(&dev->peers); + + write_lock_irqsave(&devices_lock, flags); + INIT_LIST_HEAD(&dev->list); + list_add(&dev->list, &bt_6lowpan_devices); + write_unlock_irqrestore(&devices_lock, flags); + + ifup(netdev); + + return add_peer_conn(conn, dev); + +out: + return err; +} + +static void delete_netdev(struct work_struct *work) +{ + struct lowpan_dev *entry = container_of(work, struct lowpan_dev, + delete_netdev); + + unregister_netdev(entry->netdev); + + /* The entry pointer is deleted in device_event() */ +} + +int bt_6lowpan_del_conn(struct l2cap_conn *conn) +{ + struct lowpan_dev *entry, *tmp; + struct lowpan_dev *dev = NULL; + struct lowpan_peer *peer; + int err = -ENOENT; + unsigned long flags; + bool last = false; + + if (!conn || !is_bt_6lowpan(conn->hcon)) + return 0; + + write_lock_irqsave(&devices_lock, flags); + + list_for_each_entry_safe(entry, tmp, &bt_6lowpan_devices, list) { + dev = lowpan_dev(entry->netdev); + peer = peer_lookup_conn(dev, conn); + if (peer) { + last = peer_del(dev, peer); + err = 0; + break; + } + } + + if (!err && last && dev && !atomic_read(&dev->peer_count)) { + write_unlock_irqrestore(&devices_lock, flags); + + cancel_delayed_work_sync(&dev->notify_peers); + + /* bt_6lowpan_del_conn() is called with hci dev lock held which + * means that we must delete the netdevice in worker thread. + */ + INIT_WORK(&entry->delete_netdev, delete_netdev); + schedule_work(&entry->delete_netdev); + } else { + write_unlock_irqrestore(&devices_lock, flags); + } + + return err; +} + +static int device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + struct lowpan_dev *entry, *tmp; + unsigned long flags; + + if (netdev->type != ARPHRD_6LOWPAN) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_UNREGISTER: + write_lock_irqsave(&devices_lock, flags); + list_for_each_entry_safe(entry, tmp, &bt_6lowpan_devices, + list) { + if (entry->netdev == netdev) { + list_del(&entry->list); + kfree(entry); + break; + } + } + write_unlock_irqrestore(&devices_lock, flags); + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block bt_6lowpan_dev_notifier = { + .notifier_call = device_event, +}; + +int bt_6lowpan_init(void) +{ + return register_netdevice_notifier(&bt_6lowpan_dev_notifier); +} + +void bt_6lowpan_cleanup(void) +{ + unregister_netdevice_notifier(&bt_6lowpan_dev_notifier); +} diff --git a/net/bluetooth/6lowpan.h b/net/bluetooth/6lowpan.h new file mode 100644 index 00000000000..680eac808d7 --- /dev/null +++ b/net/bluetooth/6lowpan.h @@ -0,0 +1,26 @@ +/* + Copyright (c) 2013 Intel Corp. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 and + only version 2 as published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. +*/ + +#ifndef __6LOWPAN_H +#define __6LOWPAN_H + +#include <linux/skbuff.h> +#include <net/bluetooth/l2cap.h> + +int bt_6lowpan_recv(struct l2cap_conn *conn, struct sk_buff *skb); +int bt_6lowpan_add_conn(struct l2cap_conn *conn); +int bt_6lowpan_del_conn(struct l2cap_conn *conn); +int bt_6lowpan_init(void); +void bt_6lowpan_cleanup(void); + +#endif /* __6LOWPAN_H */ diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index d3f3f7b1d32..985b56070d2 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -12,6 +12,7 @@ menuconfig BT select CRYPTO_AES select CRYPTO_ECB select CRYPTO_SHA256 + select 6LOWPAN_IPHC help Bluetooth is low-cost, low-power, short-range wireless technology. It was designed as a replacement for cables and other short-range diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index 6a791e73e39..80cb215826e 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -10,6 +10,6 @@ obj-$(CONFIG_BT_HIDP) += hidp/ bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \ hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o \ - a2mp.o amp.o + a2mp.o amp.o 6lowpan.o subdir-ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 56ca494621c..0c5866bb49b 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -31,7 +31,7 @@ #include <net/bluetooth/bluetooth.h> #include <linux/proc_fs.h> -#define VERSION "2.17" +#define VERSION "2.18" /* Bluetooth sockets */ #define BT_MAX_PROTO 8 diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h index e7ee5314f39..5a5b16f365e 100644 --- a/net/bluetooth/bnep/bnep.h +++ b/net/bluetooth/bnep/bnep.h @@ -12,8 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _BNEP_H diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 6ccc4eb9e55..5e8663c194c 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -636,6 +636,49 @@ static int conn_max_interval_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(conn_max_interval_fops, conn_max_interval_get, conn_max_interval_set, "%llu\n"); +static ssize_t lowpan_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = test_bit(HCI_6LOWPAN_ENABLED, &hdev->dev_flags) ? 'Y' : 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static ssize_t lowpan_write(struct file *fp, const char __user *user_buffer, + size_t count, loff_t *position) +{ + struct hci_dev *hdev = fp->private_data; + bool enable; + char buf[32]; + size_t buf_size = min(count, (sizeof(buf)-1)); + + if (copy_from_user(buf, user_buffer, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + + if (strtobool(buf, &enable) < 0) + return -EINVAL; + + if (enable == test_bit(HCI_6LOWPAN_ENABLED, &hdev->dev_flags)) + return -EALREADY; + + change_bit(HCI_6LOWPAN_ENABLED, &hdev->dev_flags); + + return count; +} + +static const struct file_operations lowpan_debugfs_fops = { + .open = simple_open, + .read = lowpan_read, + .write = lowpan_write, + .llseek = default_llseek, +}; + /* ---- HCI requests ---- */ static void hci_req_sync_complete(struct hci_dev *hdev, u8 result) @@ -1228,7 +1271,7 @@ static void hci_set_event_mask_page_2(struct hci_request *req) /* If Connectionless Slave Broadcast master role is supported * enable all necessary events for it. */ - if (hdev->features[2][0] & 0x01) { + if (lmp_csb_master_capable(hdev)) { events[1] |= 0x40; /* Triggered Clock Capture */ events[1] |= 0x80; /* Synchronization Train Complete */ events[2] |= 0x10; /* Slave Page Response Timeout */ @@ -1238,7 +1281,7 @@ static void hci_set_event_mask_page_2(struct hci_request *req) /* If Connectionless Slave Broadcast slave role is supported * enable all necessary events for it. */ - if (hdev->features[2][0] & 0x02) { + if (lmp_csb_slave_capable(hdev)) { events[2] |= 0x01; /* Synchronization Train Received */ events[2] |= 0x02; /* CSB Receive */ events[2] |= 0x04; /* CSB Timeout */ @@ -1261,8 +1304,13 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) * as supported send it. If not supported assume that the controller * does not have actual support for stored link keys which makes this * command redundant anyway. + * + * Some controllers indicate that they support handling deleting + * stored link keys, but they don't. The quirk lets a driver + * just disable this command. */ - if (hdev->commands[6] & 0x80) { + if (hdev->commands[6] & 0x80 && + !test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) { struct hci_cp_delete_stored_link_key cp; bacpy(&cp.bdaddr, BDADDR_ANY); @@ -1275,15 +1323,17 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) hci_setup_link_policy(req); if (lmp_le_capable(hdev)) { - /* If the controller has a public BD_ADDR, then by - * default use that one. If this is a LE only - * controller without one, default to the random - * address. - */ - if (bacmp(&hdev->bdaddr, BDADDR_ANY)) - hdev->own_addr_type = ADDR_LE_DEV_PUBLIC; - else - hdev->own_addr_type = ADDR_LE_DEV_RANDOM; + if (test_bit(HCI_SETUP, &hdev->dev_flags)) { + /* If the controller has a public BD_ADDR, then + * by default use that one. If this is a LE only + * controller without a public address, default + * to the random address. + */ + if (bacmp(&hdev->bdaddr, BDADDR_ANY)) + hdev->own_addr_type = ADDR_LE_DEV_PUBLIC; + else + hdev->own_addr_type = ADDR_LE_DEV_RANDOM; + } hci_set_le_support(req); } @@ -1307,7 +1357,7 @@ static void hci_init4_req(struct hci_request *req, unsigned long opt) hci_set_event_mask_page_2(req); /* Check for Synchronization Train support */ - if (hdev->features[2][0] & 0x04) + if (lmp_sync_train_capable(hdev)) hci_req_add(req, HCI_OP_READ_SYNC_TRAIN_PARAMS, 0, NULL); } @@ -1404,6 +1454,8 @@ static int __hci_init(struct hci_dev *hdev) hdev, &conn_min_interval_fops); debugfs_create_file("conn_max_interval", 0644, hdev->debugfs, hdev, &conn_max_interval_fops); + debugfs_create_file("6lowpan", 0644, hdev->debugfs, hdev, + &lowpan_debugfs_fops); } return 0; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 5935f748c0f..5f812455a45 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -486,7 +486,10 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - if (!rp->status) + if (rp->status) + return; + + if (test_bit(HCI_SETUP, &hdev->dev_flags)) memcpy(hdev->commands, rp->commands, sizeof(hdev->commands)); } @@ -538,12 +541,6 @@ static void hci_cc_read_local_features(struct hci_dev *hdev, if (hdev->features[0][5] & LMP_EDR_3S_ESCO) hdev->esco_type |= (ESCO_2EV5 | ESCO_3EV5); - - BT_DBG("%s features 0x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x", hdev->name, - hdev->features[0][0], hdev->features[0][1], - hdev->features[0][2], hdev->features[0][3], - hdev->features[0][4], hdev->features[0][5], - hdev->features[0][6], hdev->features[0][7]); } static void hci_cc_read_local_ext_features(struct hci_dev *hdev, @@ -1782,7 +1779,9 @@ static u8 hci_to_mgmt_reason(u8 err) static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_disconn_complete *ev = (void *) skb->data; + u8 reason = hci_to_mgmt_reason(ev->reason); struct hci_conn *conn; + u8 type; BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); @@ -1792,43 +1791,38 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (!conn) goto unlock; - if (ev->status == 0) - conn->state = BT_CLOSED; + if (ev->status) { + mgmt_disconnect_failed(hdev, &conn->dst, conn->type, + conn->dst_type, ev->status); + goto unlock; + } - if (test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags) && - (conn->type == ACL_LINK || conn->type == LE_LINK)) { - if (ev->status) { - mgmt_disconnect_failed(hdev, &conn->dst, conn->type, - conn->dst_type, ev->status); - } else { - u8 reason = hci_to_mgmt_reason(ev->reason); + conn->state = BT_CLOSED; - mgmt_device_disconnected(hdev, &conn->dst, conn->type, - conn->dst_type, reason); - } - } + if (test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) + mgmt_device_disconnected(hdev, &conn->dst, conn->type, + conn->dst_type, reason); - if (ev->status == 0) { - u8 type = conn->type; + if (conn->type == ACL_LINK && conn->flush_key) + hci_remove_link_key(hdev, &conn->dst); - if (type == ACL_LINK && conn->flush_key) - hci_remove_link_key(hdev, &conn->dst); - hci_proto_disconn_cfm(conn, ev->reason); - hci_conn_del(conn); + type = conn->type; - /* Re-enable advertising if necessary, since it might - * have been disabled by the connection. From the - * HCI_LE_Set_Advertise_Enable command description in - * the core specification (v4.0): - * "The Controller shall continue advertising until the Host - * issues an LE_Set_Advertise_Enable command with - * Advertising_Enable set to 0x00 (Advertising is disabled) - * or until a connection is created or until the Advertising - * is timed out due to Directed Advertising." - */ - if (type == LE_LINK) - mgmt_reenable_advertising(hdev); - } + hci_proto_disconn_cfm(conn, ev->reason); + hci_conn_del(conn); + + /* Re-enable advertising if necessary, since it might + * have been disabled by the connection. From the + * HCI_LE_Set_Advertise_Enable command description in + * the core specification (v4.0): + * "The Controller shall continue advertising until the Host + * issues an LE_Set_Advertise_Enable command with + * Advertising_Enable set to 0x00 (Advertising is disabled) + * or until a connection is created or until the Advertising + * is timed out due to Directed Advertising." + */ + if (type == LE_LINK) + mgmt_reenable_advertising(hdev); unlock: hci_dev_unlock(hdev); @@ -3539,6 +3533,9 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) conn->handle = __le16_to_cpu(ev->handle); conn->state = BT_CONNECTED; + if (test_bit(HCI_6LOWPAN_ENABLED, &hdev->dev_flags)) + set_bit(HCI_CONN_6LOWPAN, &conn->flags); + hci_conn_add_sysfs(conn); hci_proto_connect_cfm(conn, ev->status); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 4af3821df88..b0ad2c752d7 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -40,6 +40,7 @@ #include "smp.h" #include "a2mp.h" #include "amp.h" +#include "6lowpan.h" bool disable_ertm; @@ -49,6 +50,9 @@ static u8 l2cap_fixed_chan[8] = { L2CAP_FC_L2CAP | L2CAP_FC_CONNLESS, }; static LIST_HEAD(chan_list); static DEFINE_RWLOCK(chan_list_lock); +static u16 le_max_credits = L2CAP_LE_MAX_CREDITS; +static u16 le_default_mps = L2CAP_LE_DEFAULT_MPS; + static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code, u8 ident, u16 dlen, void *data); static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, @@ -213,9 +217,14 @@ int l2cap_add_scid(struct l2cap_chan *chan, __u16 scid) static u16 l2cap_alloc_cid(struct l2cap_conn *conn) { - u16 cid = L2CAP_CID_DYN_START; + u16 cid, dyn_end; + + if (conn->hcon->type == LE_LINK) + dyn_end = L2CAP_CID_LE_DYN_END; + else + dyn_end = L2CAP_CID_DYN_END; - for (; cid < L2CAP_CID_DYN_END; cid++) { + for (cid = L2CAP_CID_DYN_START; cid < dyn_end; cid++) { if (!__l2cap_get_chan_by_scid(conn, cid)) return cid; } @@ -490,6 +499,18 @@ void l2cap_chan_set_defaults(struct l2cap_chan *chan) set_bit(FLAG_FORCE_ACTIVE, &chan->flags); } +static void l2cap_le_flowctl_init(struct l2cap_chan *chan) +{ + chan->sdu = NULL; + chan->sdu_last_frag = NULL; + chan->sdu_len = 0; + chan->tx_credits = 0; + chan->rx_credits = le_max_credits; + chan->mps = min_t(u16, chan->imtu, L2CAP_LE_DEFAULT_MPS); + + skb_queue_head_init(&chan->tx_q); +} + void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) { BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, @@ -502,12 +523,12 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) switch (chan->chan_type) { case L2CAP_CHAN_CONN_ORIENTED: if (conn->hcon->type == LE_LINK) { - /* LE connection */ - chan->omtu = L2CAP_DEFAULT_MTU; - if (chan->dcid == L2CAP_CID_ATT) + if (chan->dcid == L2CAP_CID_ATT) { + chan->omtu = L2CAP_DEFAULT_MTU; chan->scid = L2CAP_CID_ATT; - else + } else { chan->scid = l2cap_alloc_cid(conn); + } } else { /* Alloc CID for connection-oriented socket */ chan->scid = l2cap_alloc_cid(conn); @@ -597,6 +618,10 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err) case L2CAP_MODE_BASIC: break; + case L2CAP_MODE_LE_FLOWCTL: + skb_queue_purge(&chan->tx_q); + break; + case L2CAP_MODE_ERTM: __clear_retrans_timer(chan); __clear_monitor_timer(chan); @@ -617,6 +642,50 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err) return; } +static void l2cap_chan_le_connect_reject(struct l2cap_chan *chan) +{ + struct l2cap_conn *conn = chan->conn; + struct l2cap_le_conn_rsp rsp; + u16 result; + + if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) + result = L2CAP_CR_AUTHORIZATION; + else + result = L2CAP_CR_BAD_PSM; + + l2cap_state_change(chan, BT_DISCONN); + + rsp.dcid = cpu_to_le16(chan->scid); + rsp.mtu = cpu_to_le16(chan->imtu); + rsp.mps = cpu_to_le16(chan->mps); + rsp.credits = cpu_to_le16(chan->rx_credits); + rsp.result = cpu_to_le16(result); + + l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_RSP, sizeof(rsp), + &rsp); +} + +static void l2cap_chan_connect_reject(struct l2cap_chan *chan) +{ + struct l2cap_conn *conn = chan->conn; + struct l2cap_conn_rsp rsp; + u16 result; + + if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) + result = L2CAP_CR_SEC_BLOCK; + else + result = L2CAP_CR_BAD_PSM; + + l2cap_state_change(chan, BT_DISCONN); + + rsp.scid = cpu_to_le16(chan->dcid); + rsp.dcid = cpu_to_le16(chan->scid); + rsp.result = cpu_to_le16(result); + rsp.status = __constant_cpu_to_le16(L2CAP_CS_NO_INFO); + + l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); +} + void l2cap_chan_close(struct l2cap_chan *chan, int reason) { struct l2cap_conn *conn = chan->conn; @@ -630,8 +699,10 @@ void l2cap_chan_close(struct l2cap_chan *chan, int reason) case BT_CONNECTED: case BT_CONFIG: - if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED && - conn->hcon->type == ACL_LINK) { + /* ATT uses L2CAP_CHAN_CONN_ORIENTED so we must also + * check for chan->psm. + */ + if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED && chan->psm) { __set_chan_timer(chan, chan->ops->get_sndtimeo(chan)); l2cap_send_disconn_req(chan, reason); } else @@ -639,24 +710,11 @@ void l2cap_chan_close(struct l2cap_chan *chan, int reason) break; case BT_CONNECT2: - if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED && - conn->hcon->type == ACL_LINK) { - struct l2cap_conn_rsp rsp; - __u16 result; - - if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) - result = L2CAP_CR_SEC_BLOCK; - else - result = L2CAP_CR_BAD_PSM; - - l2cap_state_change(chan, BT_DISCONN); - - rsp.scid = cpu_to_le16(chan->dcid); - rsp.dcid = cpu_to_le16(chan->scid); - rsp.result = cpu_to_le16(result); - rsp.status = __constant_cpu_to_le16(L2CAP_CS_NO_INFO); - l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_RSP, - sizeof(rsp), &rsp); + if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED) { + if (conn->hcon->type == ACL_LINK) + l2cap_chan_connect_reject(chan); + else if (conn->hcon->type == LE_LINK) + l2cap_chan_le_connect_reject(chan); } l2cap_chan_del(chan, reason); @@ -726,6 +784,9 @@ int l2cap_chan_check_security(struct l2cap_chan *chan) struct l2cap_conn *conn = chan->conn; __u8 auth_type; + if (conn->hcon->type == LE_LINK) + return smp_conn_security(conn->hcon, chan->sec_level); + auth_type = l2cap_get_auth_type(chan); return hci_conn_security(conn->hcon, chan->sec_level, auth_type); @@ -1152,16 +1213,57 @@ static void l2cap_chan_ready(struct l2cap_chan *chan) chan->conf_state = 0; __clear_chan_timer(chan); + if (chan->mode == L2CAP_MODE_LE_FLOWCTL && !chan->tx_credits) + chan->ops->suspend(chan); + chan->state = BT_CONNECTED; chan->ops->ready(chan); } +static void l2cap_le_connect(struct l2cap_chan *chan) +{ + struct l2cap_conn *conn = chan->conn; + struct l2cap_le_conn_req req; + + if (test_and_set_bit(FLAG_LE_CONN_REQ_SENT, &chan->flags)) + return; + + req.psm = chan->psm; + req.scid = cpu_to_le16(chan->scid); + req.mtu = cpu_to_le16(chan->imtu); + req.mps = cpu_to_le16(chan->mps); + req.credits = cpu_to_le16(chan->rx_credits); + + chan->ident = l2cap_get_ident(conn); + + l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_REQ, + sizeof(req), &req); +} + +static void l2cap_le_start(struct l2cap_chan *chan) +{ + struct l2cap_conn *conn = chan->conn; + + if (!smp_conn_security(conn->hcon, chan->sec_level)) + return; + + if (!chan->psm) { + l2cap_chan_ready(chan); + return; + } + + if (chan->state == BT_CONNECT) + l2cap_le_connect(chan); +} + static void l2cap_start_connection(struct l2cap_chan *chan) { if (__amp_capable(chan)) { BT_DBG("chan %p AMP capable: discover AMPs", chan); a2mp_discover_amp(chan); + } else if (chan->conn->hcon->type == LE_LINK) { + l2cap_le_start(chan); } else { l2cap_send_conn_req(chan); } @@ -1172,7 +1274,7 @@ static void l2cap_do_start(struct l2cap_chan *chan) struct l2cap_conn *conn = chan->conn; if (conn->hcon->type == LE_LINK) { - l2cap_chan_ready(chan); + l2cap_le_start(chan); return; } @@ -1367,6 +1469,8 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) BT_DBG(""); + bt_6lowpan_add_conn(conn); + /* Check if we have socket listening on cid */ pchan = l2cap_global_chan_by_scid(BT_LISTEN, L2CAP_CID_ATT, &hcon->src, &hcon->dst); @@ -1430,9 +1534,7 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) } if (hcon->type == LE_LINK) { - if (smp_conn_security(hcon, chan->sec_level)) - l2cap_chan_ready(chan); - + l2cap_le_start(chan); } else if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) { l2cap_chan_ready(chan); @@ -1703,7 +1805,8 @@ EXPORT_SYMBOL(l2cap_conn_put); */ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, bdaddr_t *src, - bdaddr_t *dst) + bdaddr_t *dst, + u8 link_type) { struct l2cap_chan *c, *c1 = NULL; @@ -1713,6 +1816,12 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, if (state && c->state != state) continue; + if (link_type == ACL_LINK && c->src_type != BDADDR_BREDR) + continue; + + if (link_type == LE_LINK && c->src_type == BDADDR_BREDR) + continue; + if (c->psm == psm) { int src_match, dst_match; int src_any, dst_any; @@ -1739,6 +1848,18 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, return c1; } +static bool is_valid_psm(u16 psm, u8 dst_type) +{ + if (!psm) + return false; + + if (bdaddr_type_is_le(dst_type)) + return (psm <= 0x00ff); + + /* PSM must be odd and lsb of upper byte must be 0 */ + return ((psm & 0x0101) == 0x0001); +} + int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, bdaddr_t *dst, u8 dst_type) { @@ -1759,8 +1880,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, l2cap_chan_lock(chan); - /* PSM must be odd and lsb of upper byte must be 0 */ - if ((__le16_to_cpu(psm) & 0x0101) != 0x0001 && !cid && + if (!is_valid_psm(__le16_to_cpu(psm), dst_type) && !cid && chan->chan_type != L2CAP_CHAN_RAW) { err = -EINVAL; goto done; @@ -1774,6 +1894,9 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, switch (chan->mode) { case L2CAP_MODE_BASIC: break; + case L2CAP_MODE_LE_FLOWCTL: + l2cap_le_flowctl_init(chan); + break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: if (!disable_ertm) @@ -2432,6 +2555,89 @@ static int l2cap_segment_sdu(struct l2cap_chan *chan, return 0; } +static struct sk_buff *l2cap_create_le_flowctl_pdu(struct l2cap_chan *chan, + struct msghdr *msg, + size_t len, u16 sdulen) +{ + struct l2cap_conn *conn = chan->conn; + struct sk_buff *skb; + int err, count, hlen; + struct l2cap_hdr *lh; + + BT_DBG("chan %p len %zu", chan, len); + + if (!conn) + return ERR_PTR(-ENOTCONN); + + hlen = L2CAP_HDR_SIZE; + + if (sdulen) + hlen += L2CAP_SDULEN_SIZE; + + count = min_t(unsigned int, (conn->mtu - hlen), len); + + skb = chan->ops->alloc_skb(chan, count + hlen, + msg->msg_flags & MSG_DONTWAIT); + if (IS_ERR(skb)) + return skb; + + /* Create L2CAP header */ + lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); + lh->cid = cpu_to_le16(chan->dcid); + lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE)); + + if (sdulen) + put_unaligned_le16(sdulen, skb_put(skb, L2CAP_SDULEN_SIZE)); + + err = l2cap_skbuff_fromiovec(chan, msg, len, count, skb); + if (unlikely(err < 0)) { + kfree_skb(skb); + return ERR_PTR(err); + } + + return skb; +} + +static int l2cap_segment_le_sdu(struct l2cap_chan *chan, + struct sk_buff_head *seg_queue, + struct msghdr *msg, size_t len) +{ + struct sk_buff *skb; + size_t pdu_len; + u16 sdu_len; + + BT_DBG("chan %p, msg %p, len %zu", chan, msg, len); + + pdu_len = chan->conn->mtu - L2CAP_HDR_SIZE; + + pdu_len = min_t(size_t, pdu_len, chan->remote_mps); + + sdu_len = len; + pdu_len -= L2CAP_SDULEN_SIZE; + + while (len > 0) { + if (len <= pdu_len) + pdu_len = len; + + skb = l2cap_create_le_flowctl_pdu(chan, msg, pdu_len, sdu_len); + if (IS_ERR(skb)) { + __skb_queue_purge(seg_queue); + return PTR_ERR(skb); + } + + __skb_queue_tail(seg_queue, skb); + + len -= pdu_len; + + if (sdu_len) { + sdu_len = 0; + pdu_len += L2CAP_SDULEN_SIZE; + } + } + + return 0; +} + int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, u32 priority) { @@ -2453,6 +2659,40 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, } switch (chan->mode) { + case L2CAP_MODE_LE_FLOWCTL: + /* Check outgoing MTU */ + if (len > chan->omtu) + return -EMSGSIZE; + + if (!chan->tx_credits) + return -EAGAIN; + + __skb_queue_head_init(&seg_queue); + + err = l2cap_segment_le_sdu(chan, &seg_queue, msg, len); + + if (chan->state != BT_CONNECTED) { + __skb_queue_purge(&seg_queue); + err = -ENOTCONN; + } + + if (err) + return err; + + skb_queue_splice_tail_init(&seg_queue, &chan->tx_q); + + while (chan->tx_credits && !skb_queue_empty(&chan->tx_q)) { + l2cap_do_send(chan, skb_dequeue(&chan->tx_q)); + chan->tx_credits--; + } + + if (!chan->tx_credits) + chan->ops->suspend(chan); + + err = len; + + break; + case L2CAP_MODE_BASIC: /* Check outgoing MTU */ if (len > chan->omtu) @@ -3592,6 +3832,23 @@ static int l2cap_build_conf_rsp(struct l2cap_chan *chan, void *data, return ptr - data; } +void __l2cap_le_connect_rsp_defer(struct l2cap_chan *chan) +{ + struct l2cap_le_conn_rsp rsp; + struct l2cap_conn *conn = chan->conn; + + BT_DBG("chan %p", chan); + + rsp.dcid = cpu_to_le16(chan->scid); + rsp.mtu = cpu_to_le16(chan->imtu); + rsp.mps = cpu_to_le16(chan->mps); + rsp.credits = cpu_to_le16(chan->rx_credits); + rsp.result = __constant_cpu_to_le16(L2CAP_CR_SUCCESS); + + l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_RSP, sizeof(rsp), + &rsp); +} + void __l2cap_connect_rsp_defer(struct l2cap_chan *chan) { struct l2cap_conn_rsp rsp; @@ -3713,7 +3970,7 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, /* Check if we have socket listening on psm */ pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, - &conn->hcon->dst); + &conn->hcon->dst, ACL_LINK); if (!pchan) { result = L2CAP_CR_BAD_PSM; goto sendresp; @@ -5155,18 +5412,17 @@ static inline int l2cap_check_conn_param(u16 min, u16 max, u16 latency, static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, - u8 *data) + u16 cmd_len, u8 *data) { struct hci_conn *hcon = conn->hcon; struct l2cap_conn_param_update_req *req; struct l2cap_conn_param_update_rsp rsp; - u16 min, max, latency, to_multiplier, cmd_len; + u16 min, max, latency, to_multiplier; int err; if (!(hcon->link_mode & HCI_LM_MASTER)) return -EINVAL; - cmd_len = __le16_to_cpu(cmd->len); if (cmd_len != sizeof(struct l2cap_conn_param_update_req)) return -EPROTO; @@ -5196,6 +5452,65 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, return 0; } +static int l2cap_le_connect_rsp(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) +{ + struct l2cap_le_conn_rsp *rsp = (struct l2cap_le_conn_rsp *) data; + u16 dcid, mtu, mps, credits, result; + struct l2cap_chan *chan; + int err; + + if (cmd_len < sizeof(*rsp)) + return -EPROTO; + + dcid = __le16_to_cpu(rsp->dcid); + mtu = __le16_to_cpu(rsp->mtu); + mps = __le16_to_cpu(rsp->mps); + credits = __le16_to_cpu(rsp->credits); + result = __le16_to_cpu(rsp->result); + + if (result == L2CAP_CR_SUCCESS && (mtu < 23 || mps < 23)) + return -EPROTO; + + BT_DBG("dcid 0x%4.4x mtu %u mps %u credits %u result 0x%2.2x", + dcid, mtu, mps, credits, result); + + mutex_lock(&conn->chan_lock); + + chan = __l2cap_get_chan_by_ident(conn, cmd->ident); + if (!chan) { + err = -EBADSLT; + goto unlock; + } + + err = 0; + + l2cap_chan_lock(chan); + + switch (result) { + case L2CAP_CR_SUCCESS: + chan->ident = 0; + chan->dcid = dcid; + chan->omtu = mtu; + chan->remote_mps = mps; + chan->tx_credits = credits; + l2cap_chan_ready(chan); + break; + + default: + l2cap_chan_del(chan, ECONNREFUSED); + break; + } + + l2cap_chan_unlock(chan); + +unlock: + mutex_unlock(&conn->chan_lock); + + return err; +} + static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) @@ -5276,23 +5591,235 @@ static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn, return err; } +static int l2cap_le_connect_req(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) +{ + struct l2cap_le_conn_req *req = (struct l2cap_le_conn_req *) data; + struct l2cap_le_conn_rsp rsp; + struct l2cap_chan *chan, *pchan; + u16 dcid, scid, credits, mtu, mps; + __le16 psm; + u8 result; + + if (cmd_len != sizeof(*req)) + return -EPROTO; + + scid = __le16_to_cpu(req->scid); + mtu = __le16_to_cpu(req->mtu); + mps = __le16_to_cpu(req->mps); + psm = req->psm; + dcid = 0; + credits = 0; + + if (mtu < 23 || mps < 23) + return -EPROTO; + + BT_DBG("psm 0x%2.2x scid 0x%4.4x mtu %u mps %u", __le16_to_cpu(psm), + scid, mtu, mps); + + /* Check if we have socket listening on psm */ + pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, + &conn->hcon->dst, LE_LINK); + if (!pchan) { + result = L2CAP_CR_BAD_PSM; + chan = NULL; + goto response; + } + + mutex_lock(&conn->chan_lock); + l2cap_chan_lock(pchan); + + if (!smp_sufficient_security(conn->hcon, pchan->sec_level)) { + result = L2CAP_CR_AUTHENTICATION; + chan = NULL; + goto response_unlock; + } + + /* Check if we already have channel with that dcid */ + if (__l2cap_get_chan_by_dcid(conn, scid)) { + result = L2CAP_CR_NO_MEM; + chan = NULL; + goto response_unlock; + } + + chan = pchan->ops->new_connection(pchan); + if (!chan) { + result = L2CAP_CR_NO_MEM; + goto response_unlock; + } + + l2cap_le_flowctl_init(chan); + + bacpy(&chan->src, &conn->hcon->src); + bacpy(&chan->dst, &conn->hcon->dst); + chan->src_type = bdaddr_type(conn->hcon, conn->hcon->src_type); + chan->dst_type = bdaddr_type(conn->hcon, conn->hcon->dst_type); + chan->psm = psm; + chan->dcid = scid; + chan->omtu = mtu; + chan->remote_mps = mps; + chan->tx_credits = __le16_to_cpu(req->credits); + + __l2cap_chan_add(conn, chan); + dcid = chan->scid; + credits = chan->rx_credits; + + __set_chan_timer(chan, chan->ops->get_sndtimeo(chan)); + + chan->ident = cmd->ident; + + if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { + l2cap_state_change(chan, BT_CONNECT2); + result = L2CAP_CR_PEND; + chan->ops->defer(chan); + } else { + l2cap_chan_ready(chan); + result = L2CAP_CR_SUCCESS; + } + +response_unlock: + l2cap_chan_unlock(pchan); + mutex_unlock(&conn->chan_lock); + + if (result == L2CAP_CR_PEND) + return 0; + +response: + if (chan) { + rsp.mtu = cpu_to_le16(chan->imtu); + rsp.mps = cpu_to_le16(chan->mps); + } else { + rsp.mtu = 0; + rsp.mps = 0; + } + + rsp.dcid = cpu_to_le16(dcid); + rsp.credits = cpu_to_le16(credits); + rsp.result = cpu_to_le16(result); + + l2cap_send_cmd(conn, cmd->ident, L2CAP_LE_CONN_RSP, sizeof(rsp), &rsp); + + return 0; +} + +static inline int l2cap_le_credits(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) +{ + struct l2cap_le_credits *pkt; + struct l2cap_chan *chan; + u16 cid, credits; + + if (cmd_len != sizeof(*pkt)) + return -EPROTO; + + pkt = (struct l2cap_le_credits *) data; + cid = __le16_to_cpu(pkt->cid); + credits = __le16_to_cpu(pkt->credits); + + BT_DBG("cid 0x%4.4x credits 0x%4.4x", cid, credits); + + chan = l2cap_get_chan_by_dcid(conn, cid); + if (!chan) + return -EBADSLT; + + chan->tx_credits += credits; + + while (chan->tx_credits && !skb_queue_empty(&chan->tx_q)) { + l2cap_do_send(chan, skb_dequeue(&chan->tx_q)); + chan->tx_credits--; + } + + if (chan->tx_credits) + chan->ops->resume(chan); + + l2cap_chan_unlock(chan); + + return 0; +} + +static inline int l2cap_le_command_rej(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) +{ + struct l2cap_cmd_rej_unk *rej = (struct l2cap_cmd_rej_unk *) data; + struct l2cap_chan *chan; + + if (cmd_len < sizeof(*rej)) + return -EPROTO; + + mutex_lock(&conn->chan_lock); + + chan = __l2cap_get_chan_by_ident(conn, cmd->ident); + if (!chan) + goto done; + + l2cap_chan_lock(chan); + l2cap_chan_del(chan, ECONNREFUSED); + l2cap_chan_unlock(chan); + +done: + mutex_unlock(&conn->chan_lock); + return 0; +} + static inline int l2cap_le_sig_cmd(struct l2cap_conn *conn, - struct l2cap_cmd_hdr *cmd, u8 *data) + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) { + int err = 0; + + if (!enable_lecoc) { + switch (cmd->code) { + case L2CAP_LE_CONN_REQ: + case L2CAP_LE_CONN_RSP: + case L2CAP_LE_CREDITS: + case L2CAP_DISCONN_REQ: + case L2CAP_DISCONN_RSP: + return -EINVAL; + } + } + switch (cmd->code) { case L2CAP_COMMAND_REJ: - return 0; + l2cap_le_command_rej(conn, cmd, cmd_len, data); + break; case L2CAP_CONN_PARAM_UPDATE_REQ: - return l2cap_conn_param_update_req(conn, cmd, data); + err = l2cap_conn_param_update_req(conn, cmd, cmd_len, data); + break; case L2CAP_CONN_PARAM_UPDATE_RSP: - return 0; + break; + + case L2CAP_LE_CONN_RSP: + l2cap_le_connect_rsp(conn, cmd, cmd_len, data); + break; + + case L2CAP_LE_CONN_REQ: + err = l2cap_le_connect_req(conn, cmd, cmd_len, data); + break; + + case L2CAP_LE_CREDITS: + err = l2cap_le_credits(conn, cmd, cmd_len, data); + break; + + case L2CAP_DISCONN_REQ: + err = l2cap_disconnect_req(conn, cmd, cmd_len, data); + break; + + case L2CAP_DISCONN_RSP: + l2cap_disconnect_rsp(conn, cmd, cmd_len, data); + break; default: BT_ERR("Unknown LE signaling command 0x%2.2x", cmd->code); - return -EINVAL; + err = -EINVAL; + break; } + + return err; } static inline void l2cap_le_sig_channel(struct l2cap_conn *conn, @@ -5321,7 +5848,7 @@ static inline void l2cap_le_sig_channel(struct l2cap_conn *conn, goto drop; } - err = l2cap_le_sig_cmd(conn, cmd, skb->data); + err = l2cap_le_sig_cmd(conn, cmd, len, skb->data); if (err) { struct l2cap_cmd_rej_unk rej; @@ -6312,6 +6839,121 @@ drop: return 0; } +static void l2cap_chan_le_send_credits(struct l2cap_chan *chan) +{ + struct l2cap_conn *conn = chan->conn; + struct l2cap_le_credits pkt; + u16 return_credits; + + /* We return more credits to the sender only after the amount of + * credits falls below half of the initial amount. + */ + if (chan->rx_credits >= (le_max_credits + 1) / 2) + return; + + return_credits = le_max_credits - chan->rx_credits; + + BT_DBG("chan %p returning %u credits to sender", chan, return_credits); + + chan->rx_credits += return_credits; + + pkt.cid = cpu_to_le16(chan->scid); + pkt.credits = cpu_to_le16(return_credits); + + chan->ident = l2cap_get_ident(conn); + + l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CREDITS, sizeof(pkt), &pkt); +} + +static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) +{ + int err; + + if (!chan->rx_credits) { + BT_ERR("No credits to receive LE L2CAP data"); + return -ENOBUFS; + } + + if (chan->imtu < skb->len) { + BT_ERR("Too big LE L2CAP PDU"); + return -ENOBUFS; + } + + chan->rx_credits--; + BT_DBG("rx_credits %u -> %u", chan->rx_credits + 1, chan->rx_credits); + + l2cap_chan_le_send_credits(chan); + + err = 0; + + if (!chan->sdu) { + u16 sdu_len; + + sdu_len = get_unaligned_le16(skb->data); + skb_pull(skb, L2CAP_SDULEN_SIZE); + + BT_DBG("Start of new SDU. sdu_len %u skb->len %u imtu %u", + sdu_len, skb->len, chan->imtu); + + if (sdu_len > chan->imtu) { + BT_ERR("Too big LE L2CAP SDU length received"); + err = -EMSGSIZE; + goto failed; + } + + if (skb->len > sdu_len) { + BT_ERR("Too much LE L2CAP data received"); + err = -EINVAL; + goto failed; + } + + if (skb->len == sdu_len) + return chan->ops->recv(chan, skb); + + chan->sdu = skb; + chan->sdu_len = sdu_len; + chan->sdu_last_frag = skb; + + return 0; + } + + BT_DBG("SDU fragment. chan->sdu->len %u skb->len %u chan->sdu_len %u", + chan->sdu->len, skb->len, chan->sdu_len); + + if (chan->sdu->len + skb->len > chan->sdu_len) { + BT_ERR("Too much LE L2CAP data received"); + err = -EINVAL; + goto failed; + } + + append_skb_frag(chan->sdu, skb, &chan->sdu_last_frag); + skb = NULL; + + if (chan->sdu->len == chan->sdu_len) { + err = chan->ops->recv(chan, chan->sdu); + if (!err) { + chan->sdu = NULL; + chan->sdu_last_frag = NULL; + chan->sdu_len = 0; + } + } + +failed: + if (err) { + kfree_skb(skb); + kfree_skb(chan->sdu); + chan->sdu = NULL; + chan->sdu_last_frag = NULL; + chan->sdu_len = 0; + } + + /* We can't return an error here since we took care of the skb + * freeing internally. An error return would cause the caller to + * do a double-free of the skb. + */ + return 0; +} + static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk_buff *skb) { @@ -6341,6 +6983,12 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid, goto drop; switch (chan->mode) { + case L2CAP_MODE_LE_FLOWCTL: + if (l2cap_le_data_rcv(chan, skb) < 0) + goto drop; + + goto done; + case L2CAP_MODE_BASIC: /* If socket recv buffers overflows we drop data here * which is *bad* because L2CAP has to be reliable. @@ -6380,7 +7028,8 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, if (hcon->type != ACL_LINK) goto drop; - chan = l2cap_global_chan_by_psm(0, psm, &hcon->src, &hcon->dst); + chan = l2cap_global_chan_by_psm(0, psm, &hcon->src, &hcon->dst, + ACL_LINK); if (!chan) goto drop; @@ -6473,6 +7122,10 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) l2cap_conn_del(conn->hcon, EACCES); break; + case L2CAP_FC_6LOWPAN: + bt_6lowpan_recv(conn, skb); + break; + default: l2cap_data_channel(conn, cid, skb); break; @@ -6540,6 +7193,8 @@ void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) { BT_DBG("hcon %p reason %d", hcon, reason); + bt_6lowpan_del_conn(hcon->l2cap_data); + l2cap_conn_del(hcon, bt_to_errno(reason)); } @@ -6612,11 +7267,10 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) } if (chan->state == BT_CONNECT) { - if (!status) { + if (!status) l2cap_start_connection(chan); - } else { + else __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); - } } else if (chan->state == BT_CONNECT2) { struct l2cap_conn_rsp rsp; __u16 res, stat; @@ -6817,11 +7471,19 @@ int __init l2cap_init(void) l2cap_debugfs = debugfs_create_file("l2cap", 0444, bt_debugfs, NULL, &l2cap_debugfs_fops); + debugfs_create_u16("l2cap_le_max_credits", 0466, bt_debugfs, + &le_max_credits); + debugfs_create_u16("l2cap_le_default_mps", 0466, bt_debugfs, + &le_default_mps); + + bt_6lowpan_init(); + return 0; } void l2cap_exit(void) { + bt_6lowpan_cleanup(); debugfs_remove(l2cap_debugfs); l2cap_cleanup_sockets(); } diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 7cc24d263ca..d58f76bcebd 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -27,6 +27,7 @@ /* Bluetooth L2CAP sockets. */ +#include <linux/module.h> #include <linux/export.h> #include <net/bluetooth/bluetooth.h> @@ -35,6 +36,8 @@ #include "smp.h" +bool enable_lecoc; + static struct bt_sock_list l2cap_sk_list = { .lock = __RW_LOCK_UNLOCKED(l2cap_sk_list.lock) }; @@ -50,6 +53,32 @@ bool l2cap_is_socket(struct socket *sock) } EXPORT_SYMBOL(l2cap_is_socket); +static int l2cap_validate_bredr_psm(u16 psm) +{ + /* PSM must be odd and lsb of upper byte must be 0 */ + if ((psm & 0x0101) != 0x0001) + return -EINVAL; + + /* Restrict usage of well-known PSMs */ + if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) + return -EACCES; + + return 0; +} + +static int l2cap_validate_le_psm(u16 psm) +{ + /* Valid LE_PSM ranges are defined only until 0x00ff */ + if (psm > 0x00ff) + return -EINVAL; + + /* Restrict fixed, SIG assigned PSM values to CAP_NET_BIND_SERVICE */ + if (psm <= 0x007f && !capable(CAP_NET_BIND_SERVICE)) + return -EACCES; + + return 0; +} + static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) { struct sock *sk = sock->sk; @@ -73,11 +102,11 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) return -EINVAL; if (bdaddr_type_is_le(la.l2_bdaddr_type)) { - /* Connection oriented channels are not supported on LE */ - if (la.l2_psm) + if (!enable_lecoc && la.l2_psm) return -EINVAL; /* We only allow ATT user space socket */ - if (la.l2_cid != __constant_cpu_to_le16(L2CAP_CID_ATT)) + if (la.l2_cid && + la.l2_cid != __constant_cpu_to_le16(L2CAP_CID_ATT)) return -EINVAL; } @@ -91,17 +120,13 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) if (la.l2_psm) { __u16 psm = __le16_to_cpu(la.l2_psm); - /* PSM must be odd and lsb of upper byte must be 0 */ - if ((psm & 0x0101) != 0x0001) { - err = -EINVAL; - goto done; - } + if (la.l2_bdaddr_type == BDADDR_BREDR) + err = l2cap_validate_bredr_psm(psm); + else + err = l2cap_validate_le_psm(psm); - /* Restrict usage of well-known PSMs */ - if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) { - err = -EACCES; + if (err) goto done; - } } if (la.l2_cid) @@ -122,11 +147,17 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) __le16_to_cpu(la.l2_psm) == L2CAP_PSM_RFCOMM) chan->sec_level = BT_SECURITY_SDP; break; + case L2CAP_CHAN_RAW: + chan->sec_level = BT_SECURITY_SDP; + break; } bacpy(&chan->src, &la.l2_bdaddr); chan->src_type = la.l2_bdaddr_type; + if (chan->psm && bdaddr_type_is_le(chan->src_type)) + chan->mode = L2CAP_MODE_LE_FLOWCTL; + chan->state = BT_BOUND; sk->sk_state = BT_BOUND; @@ -189,14 +220,17 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, return -EINVAL; if (bdaddr_type_is_le(la.l2_bdaddr_type)) { - /* Connection oriented channels are not supported on LE */ - if (la.l2_psm) + if (!enable_lecoc && la.l2_psm) return -EINVAL; /* We only allow ATT user space socket */ - if (la.l2_cid != __constant_cpu_to_le16(L2CAP_CID_ATT)) + if (la.l2_cid && + la.l2_cid != __constant_cpu_to_le16(L2CAP_CID_ATT)) return -EINVAL; } + if (chan->psm && bdaddr_type_is_le(chan->src_type)) + chan->mode = L2CAP_MODE_LE_FLOWCTL; + err = l2cap_chan_connect(chan, la.l2_psm, __le16_to_cpu(la.l2_cid), &la.l2_bdaddr, la.l2_bdaddr_type); if (err) @@ -234,6 +268,7 @@ static int l2cap_sock_listen(struct socket *sock, int backlog) switch (chan->mode) { case L2CAP_MODE_BASIC: + case L2CAP_MODE_LE_FLOWCTL: break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: @@ -360,6 +395,16 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, switch (optname) { case L2CAP_OPTIONS: + /* LE sockets should use BT_SNDMTU/BT_RCVMTU, but since + * legacy ATT code depends on getsockopt for + * L2CAP_OPTIONS we need to let this pass. + */ + if (bdaddr_type_is_le(chan->src_type) && + chan->scid != L2CAP_CID_ATT) { + err = -EINVAL; + break; + } + memset(&opts, 0, sizeof(opts)); opts.imtu = chan->imtu; opts.omtu = chan->omtu; @@ -514,6 +559,41 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, err = -EFAULT; break; + case BT_SNDMTU: + if (!enable_lecoc) { + err = -EPROTONOSUPPORT; + break; + } + + if (!bdaddr_type_is_le(chan->src_type)) { + err = -EINVAL; + break; + } + + if (sk->sk_state != BT_CONNECTED) { + err = -ENOTCONN; + break; + } + + if (put_user(chan->omtu, (u16 __user *) optval)) + err = -EFAULT; + break; + + case BT_RCVMTU: + if (!enable_lecoc) { + err = -EPROTONOSUPPORT; + break; + } + + if (!bdaddr_type_is_le(chan->src_type)) { + err = -EINVAL; + break; + } + + if (put_user(chan->imtu, (u16 __user *) optval)) + err = -EFAULT; + break; + default: err = -ENOPROTOOPT; break; @@ -554,6 +634,11 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, switch (optname) { case L2CAP_OPTIONS: + if (bdaddr_type_is_le(chan->src_type)) { + err = -EINVAL; + break; + } + if (sk->sk_state == BT_CONNECTED) { err = -EINVAL; break; @@ -585,6 +670,8 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, chan->mode = opts.mode; switch (chan->mode) { + case L2CAP_MODE_LE_FLOWCTL: + break; case L2CAP_MODE_BASIC: clear_bit(CONF_STATE2_DEVICE, &chan->conf_state); break; @@ -807,6 +894,47 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; + case BT_SNDMTU: + if (!enable_lecoc) { + err = -EPROTONOSUPPORT; + break; + } + + if (!bdaddr_type_is_le(chan->src_type)) { + err = -EINVAL; + break; + } + + /* Setting is not supported as it's the remote side that + * decides this. + */ + err = -EPERM; + break; + + case BT_RCVMTU: + if (!enable_lecoc) { + err = -EPROTONOSUPPORT; + break; + } + + if (!bdaddr_type_is_le(chan->src_type)) { + err = -EINVAL; + break; + } + + if (sk->sk_state == BT_CONNECTED) { + err = -EISCONN; + break; + } + + if (get_user(opt, (u32 __user *) optval)) { + err = -EFAULT; + break; + } + + chan->imtu = opt; + break; + default: err = -ENOPROTOOPT; break; @@ -859,10 +987,16 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (sk->sk_state == BT_CONNECT2 && test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { - sk->sk_state = BT_CONFIG; - pi->chan->state = BT_CONFIG; + if (bdaddr_type_is_le(pi->chan->src_type)) { + sk->sk_state = BT_CONNECTED; + pi->chan->state = BT_CONNECTED; + __l2cap_le_connect_rsp_defer(pi->chan); + } else { + sk->sk_state = BT_CONFIG; + pi->chan->state = BT_CONFIG; + __l2cap_connect_rsp_defer(pi->chan); + } - __l2cap_connect_rsp_defer(pi->chan); err = 0; goto done; } @@ -1236,6 +1370,14 @@ static long l2cap_sock_get_sndtimeo_cb(struct l2cap_chan *chan) return sk->sk_sndtimeo; } +static void l2cap_sock_suspend_cb(struct l2cap_chan *chan) +{ + struct sock *sk = chan->data; + + set_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags); + sk->sk_state_change(sk); +} + static struct l2cap_ops l2cap_chan_ops = { .name = "L2CAP Socket Interface", .new_connection = l2cap_sock_new_connection_cb, @@ -1246,6 +1388,7 @@ static struct l2cap_ops l2cap_chan_ops = { .ready = l2cap_sock_ready_cb, .defer = l2cap_sock_defer_cb, .resume = l2cap_sock_resume_cb, + .suspend = l2cap_sock_suspend_cb, .set_shutdown = l2cap_sock_set_shutdown_cb, .get_sndtimeo = l2cap_sock_get_sndtimeo_cb, .alloc_skb = l2cap_sock_alloc_skb_cb, @@ -1270,7 +1413,7 @@ static void l2cap_sock_destruct(struct sock *sk) static void l2cap_skb_msg_name(struct sk_buff *skb, void *msg_name, int *msg_namelen) { - struct sockaddr_l2 *la = (struct sockaddr_l2 *) msg_name; + DECLARE_SOCKADDR(struct sockaddr_l2 *, la, msg_name); memset(la, 0, sizeof(struct sockaddr_l2)); la->l2_family = AF_BLUETOOTH; @@ -1303,6 +1446,8 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) chan->tx_win_max = pchan->tx_win_max; chan->sec_level = pchan->sec_level; chan->flags = pchan->flags; + chan->tx_credits = pchan->tx_credits; + chan->rx_credits = pchan->rx_credits; security_sk_clone(parent, sk); } else { @@ -1469,3 +1614,6 @@ void l2cap_cleanup_sockets(void) bt_sock_unregister(BTPROTO_L2CAP); proto_unregister(&l2cap_proto); } + +module_param(enable_lecoc, bool, 0644); +MODULE_PARM_DESC(enable_lecoc, "Enable support for LE CoC"); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 074d83690a4..a03ca3ca91b 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1264,7 +1264,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, if (cp->val == 0x02) { /* Limited discoverable mode */ - hci_cp.num_iac = 2; + hci_cp.num_iac = min_t(u8, hdev->num_iac, 2); hci_cp.iac_lap[0] = 0x00; /* LIAC */ hci_cp.iac_lap[1] = 0x8b; hci_cp.iac_lap[2] = 0x9e; @@ -4595,6 +4595,9 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, struct mgmt_ev_device_disconnected ev; struct sock *sk = NULL; + if (link_type != ACL_LINK && link_type != LE_LINK) + return; + mgmt_pending_foreach(MGMT_OP_DISCONNECT, hdev, disconnect_rsp, &sk); bacpy(&ev.addr.bdaddr, bdaddr); @@ -4613,6 +4616,8 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 status) { + u8 bdaddr_type = link_to_bdaddr(link_type, addr_type); + struct mgmt_cp_disconnect *cp; struct mgmt_rp_disconnect rp; struct pending_cmd *cmd; @@ -4623,8 +4628,16 @@ void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, if (!cmd) return; + cp = cmd->param; + + if (bacmp(bdaddr, &cp->addr.bdaddr)) + return; + + if (cp->addr.type != bdaddr_type) + return; + bacpy(&rp.addr.bdaddr, bdaddr); - rp.addr.type = link_to_bdaddr(link_type, addr_type); + rp.addr.type = bdaddr_type; cmd_complete(cmd->sk, cmd->index, MGMT_OP_DISCONNECT, mgmt_status(status), &rp, sizeof(rp)); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 84fcf9fff3e..f9c0980abee 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -58,6 +58,7 @@ struct rfcomm_dev { uint modem_status; struct rfcomm_dlc *dlc; + wait_queue_head_t conn_wait; struct device *tty_dev; @@ -103,20 +104,60 @@ static void rfcomm_dev_destruct(struct tty_port *port) module_put(THIS_MODULE); } -/* device-specific initialization: open the dlc */ -static int rfcomm_dev_activate(struct tty_port *port, struct tty_struct *tty) +static struct device *rfcomm_get_device(struct rfcomm_dev *dev) { - struct rfcomm_dev *dev = container_of(port, struct rfcomm_dev, port); + struct hci_dev *hdev; + struct hci_conn *conn; - return rfcomm_dlc_open(dev->dlc, &dev->src, &dev->dst, dev->channel); + hdev = hci_get_route(&dev->dst, &dev->src); + if (!hdev) + return NULL; + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &dev->dst); + + hci_dev_put(hdev); + + return conn ? &conn->dev : NULL; } -/* we block the open until the dlc->state becomes BT_CONNECTED */ -static int rfcomm_dev_carrier_raised(struct tty_port *port) +/* device-specific initialization: open the dlc */ +static int rfcomm_dev_activate(struct tty_port *port, struct tty_struct *tty) { struct rfcomm_dev *dev = container_of(port, struct rfcomm_dev, port); + DEFINE_WAIT(wait); + int err; + + err = rfcomm_dlc_open(dev->dlc, &dev->src, &dev->dst, dev->channel); + if (err) + return err; + + while (1) { + prepare_to_wait(&dev->conn_wait, &wait, TASK_INTERRUPTIBLE); + + if (dev->dlc->state == BT_CLOSED) { + err = -dev->err; + break; + } + + if (dev->dlc->state == BT_CONNECTED) + break; + + if (signal_pending(current)) { + err = -ERESTARTSYS; + break; + } + + tty_unlock(tty); + schedule(); + tty_lock(tty); + } + finish_wait(&dev->conn_wait, &wait); + + if (!err) + device_move(dev->tty_dev, rfcomm_get_device(dev), + DPM_ORDER_DEV_AFTER_PARENT); - return (dev->dlc->state == BT_CONNECTED); + return err; } /* device-specific cleanup: close the dlc */ @@ -135,7 +176,6 @@ static const struct tty_port_operations rfcomm_port_ops = { .destruct = rfcomm_dev_destruct, .activate = rfcomm_dev_activate, .shutdown = rfcomm_dev_shutdown, - .carrier_raised = rfcomm_dev_carrier_raised, }; static struct rfcomm_dev *__rfcomm_dev_get(int id) @@ -169,22 +209,6 @@ static struct rfcomm_dev *rfcomm_dev_get(int id) return dev; } -static struct device *rfcomm_get_device(struct rfcomm_dev *dev) -{ - struct hci_dev *hdev; - struct hci_conn *conn; - - hdev = hci_get_route(&dev->dst, &dev->src); - if (!hdev) - return NULL; - - conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &dev->dst); - - hci_dev_put(hdev); - - return conn ? &conn->dev : NULL; -} - static ssize_t show_address(struct device *tty_dev, struct device_attribute *attr, char *buf) { struct rfcomm_dev *dev = dev_get_drvdata(tty_dev); @@ -258,6 +282,7 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc) tty_port_init(&dev->port); dev->port.ops = &rfcomm_port_ops; + init_waitqueue_head(&dev->conn_wait); skb_queue_head_init(&dev->pending); @@ -437,7 +462,8 @@ static int rfcomm_release_dev(void __user *arg) tty_kref_put(tty); } - if (!test_and_set_bit(RFCOMM_TTY_RELEASED, &dev->flags)) + if (!test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags) && + !test_and_set_bit(RFCOMM_TTY_RELEASED, &dev->flags)) tty_port_put(&dev->port); tty_port_put(&dev->port); @@ -575,12 +601,9 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err) BT_DBG("dlc %p dev %p err %d", dlc, dev, err); dev->err = err; - if (dlc->state == BT_CONNECTED) { - device_move(dev->tty_dev, rfcomm_get_device(dev), - DPM_ORDER_DEV_AFTER_PARENT); + wake_up_interruptible(&dev->conn_wait); - wake_up_interruptible(&dev->port.open_wait); - } else if (dlc->state == BT_CLOSED) + if (dlc->state == BT_CLOSED) tty_port_tty_hangup(&dev->port, false); } @@ -670,10 +693,20 @@ static int rfcomm_tty_install(struct tty_driver *driver, struct tty_struct *tty) /* install the tty_port */ err = tty_port_install(&dev->port, driver, tty); - if (err) + if (err) { rfcomm_tty_cleanup(tty); + return err; + } - return err; + /* take over the tty_port reference if the port was created with the + * flag RFCOMM_RELEASE_ONHUP. This will force the release of the port + * when the last process closes the tty. The behaviour is expected by + * userspace. + */ + if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) + tty_port_put(&dev->port); + + return 0; } static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp) @@ -1010,10 +1043,6 @@ static void rfcomm_tty_hangup(struct tty_struct *tty) BT_DBG("tty %p dev %p", tty, dev); tty_port_hangup(&dev->port); - - if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags) && - !test_and_set_bit(RFCOMM_TTY_RELEASED, &dev->flags)) - tty_port_put(&dev->port); } static int rfcomm_tty_tiocmget(struct tty_struct *tty) @@ -1096,7 +1125,7 @@ int __init rfcomm_init_ttys(void) rfcomm_tty_driver->subtype = SERIAL_TYPE_NORMAL; rfcomm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV; rfcomm_tty_driver->init_termios = tty_std_termios; - rfcomm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL; + rfcomm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; rfcomm_tty_driver->init_termios.c_lflag &= ~ICANON; tty_set_operations(rfcomm_tty_driver, &rfcomm_ops); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 4b07acb8293..45007362683 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -53,8 +53,7 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r) { struct blkcipher_desc desc; struct scatterlist sg; - int err, iv_len; - unsigned char iv[128]; + int err; if (tfm == NULL) { BT_ERR("tfm %p", tfm); @@ -72,12 +71,6 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r) sg_init_one(&sg, r, 16); - iv_len = crypto_blkcipher_ivsize(tfm); - if (iv_len) { - memset(&iv, 0xff, iv_len); - crypto_blkcipher_set_iv(tfm, iv, iv_len); - } - err = crypto_blkcipher_encrypt(&desc, &sg, &sg, 16); if (err) BT_ERR("Encrypt data error %d", err); @@ -143,13 +136,6 @@ static int smp_s1(struct crypto_blkcipher *tfm, u8 k[16], u8 r1[16], return err; } -static int smp_rand(u8 *buf) -{ - get_random_bytes(buf, 16); - - return 0; -} - static struct sk_buff *smp_build_cmd(struct l2cap_conn *conn, u8 code, u16 dlen, void *data) { @@ -257,11 +243,11 @@ static u8 check_enc_key_size(struct l2cap_conn *conn, __u8 max_key_size) return 0; } -static void smp_failure(struct l2cap_conn *conn, u8 reason, u8 send) +static void smp_failure(struct l2cap_conn *conn, u8 reason) { struct hci_conn *hcon = conn->hcon; - if (send) + if (reason) smp_send_cmd(conn, SMP_CMD_PAIRING_FAIL, sizeof(reason), &reason); @@ -406,7 +392,7 @@ static void confirm_work(struct work_struct *work) return; error: - smp_failure(conn, reason, 1); + smp_failure(conn, reason); } static void random_work(struct work_struct *work) @@ -490,7 +476,7 @@ static void random_work(struct work_struct *work) return; error: - smp_failure(conn, reason, 1); + smp_failure(conn, reason); } static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) @@ -555,10 +541,10 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey) break; case MGMT_OP_USER_PASSKEY_NEG_REPLY: case MGMT_OP_USER_CONFIRM_NEG_REPLY: - smp_failure(conn, SMP_PASSKEY_ENTRY_FAILED, 1); + smp_failure(conn, SMP_PASSKEY_ENTRY_FAILED); return 0; default: - smp_failure(conn, SMP_PASSKEY_ENTRY_FAILED, 1); + smp_failure(conn, SMP_PASSKEY_ENTRY_FAILED); return -EOPNOTSUPP; } @@ -606,9 +592,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) if (check_enc_key_size(conn, key_size)) return SMP_ENC_KEY_SIZE; - ret = smp_rand(smp->prnd); - if (ret) - return SMP_UNSPECIFIED; + get_random_bytes(smp->prnd, sizeof(smp->prnd)); smp->prsp[0] = SMP_CMD_PAIRING_RSP; memcpy(&smp->prsp[1], &rsp, sizeof(rsp)); @@ -644,9 +628,7 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb) if (check_enc_key_size(conn, key_size)) return SMP_ENC_KEY_SIZE; - ret = smp_rand(smp->prnd); - if (ret) - return SMP_UNSPECIFIED; + get_random_bytes(smp->prnd, sizeof(smp->prnd)); smp->prsp[0] = SMP_CMD_PAIRING_RSP; memcpy(&smp->prsp[1], rsp, sizeof(*rsp)); @@ -768,6 +750,17 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) return 0; } +bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level) +{ + if (sec_level == BT_SECURITY_LOW) + return true; + + if (hcon->sec_level >= sec_level) + return true; + + return false; +} + int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) { struct l2cap_conn *conn = hcon->l2cap_data; @@ -779,10 +772,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) if (!test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags)) return 1; - if (sec_level == BT_SECURITY_LOW) - return 1; - - if (hcon->sec_level >= sec_level) + if (smp_sufficient_security(hcon, sec_level)) return 1; if (hcon->link_mode & HCI_LM_MASTER) @@ -895,7 +885,7 @@ int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) break; case SMP_CMD_PAIRING_FAIL: - smp_failure(conn, skb->data[0], 0); + smp_failure(conn, 0); reason = 0; err = -EPERM; break; @@ -941,7 +931,7 @@ int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) done: if (reason) - smp_failure(conn, reason, 1); + smp_failure(conn, reason); kfree_skb(skb); return err; diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h index f8ba07f3e5f..a700bcb490d 100644 --- a/net/bluetooth/smp.h +++ b/net/bluetooth/smp.h @@ -136,6 +136,7 @@ struct smp_chan { }; /* SMP Commands */ +bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level); int smp_conn_security(struct hci_conn *hcon, __u8 sec_level); int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb); int smp_distribute_keys(struct l2cap_conn *conn, __u8 force); diff --git a/net/bridge/br.c b/net/bridge/br.c index ba780cc8e51..19311aafcf5 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -22,14 +22,29 @@ #include "br_private.h" -static const struct stp_proto br_stp_proto = { - .rcv = br_stp_rcv, -}; +static void __net_exit br_net_exit(struct net *net) +{ + struct net_device *dev; + LIST_HEAD(list); + + rtnl_lock(); + for_each_netdev(net, dev) + if (dev->priv_flags & IFF_EBRIDGE) + br_dev_delete(dev, &list); + + unregister_netdevice_many(&list); + rtnl_unlock(); + +} static struct pernet_operations br_net_ops = { .exit = br_net_exit, }; +static const struct stp_proto br_stp_proto = { + .rcv = br_stp_rcv, +}; + static int __init br_init(void) { int err; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index f00cfd2a014..e4401a531af 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -32,7 +32,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) const unsigned char *dest = skb->data; struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; - struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); + struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); u16 vid = 0; rcu_read_lock(); @@ -90,12 +90,12 @@ static int br_dev_init(struct net_device *dev) struct net_bridge *br = netdev_priv(dev); int i; - br->stats = alloc_percpu(struct br_cpu_netstats); + br->stats = alloc_percpu(struct pcpu_sw_netstats); if (!br->stats) return -ENOMEM; for_each_possible_cpu(i) { - struct br_cpu_netstats *br_dev_stats; + struct pcpu_sw_netstats *br_dev_stats; br_dev_stats = per_cpu_ptr(br->stats, i); u64_stats_init(&br_dev_stats->syncp); } @@ -135,12 +135,12 @@ static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct net_bridge *br = netdev_priv(dev); - struct br_cpu_netstats tmp, sum = { 0 }; + struct pcpu_sw_netstats tmp, sum = { 0 }; unsigned int cpu; for_each_possible_cpu(cpu) { unsigned int start; - const struct br_cpu_netstats *bstats + const struct pcpu_sw_netstats *bstats = per_cpu_ptr(br->stats, cpu); do { start = u64_stats_fetch_begin_bh(&bstats->syncp); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 33e8f23acdd..c5f5a4a933f 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -570,8 +570,7 @@ static void fdb_notify(struct net_bridge *br, rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); return; errout: - if (err < 0) - rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); + rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); } /* Dump information about entries, in response to GETNEIGH */ diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 4b81b147178..d3409e6b545 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -26,13 +26,13 @@ static int deliver_clone(const struct net_bridge_port *prev, void (*__packet_hook)(const struct net_bridge_port *p, struct sk_buff *skb)); -/* Don't forward packets to originating port or forwarding diasabled */ +/* Don't forward packets to originating port or forwarding disabled */ static inline int should_deliver(const struct net_bridge_port *p, const struct sk_buff *skb) { - return (((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && + return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && br_allowed_egress(p->br, nbp_get_vlan_info(p), skb) && - p->state == BR_STATE_FORWARDING); + p->state == BR_STATE_FORWARDING; } static inline unsigned int packet_length(const struct sk_buff *skb) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 4bf02adb5dc..cffe1d666ba 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -61,7 +61,7 @@ static int port_cost(struct net_device *dev) } -/* Check for port carrier transistions. */ +/* Check for port carrier transitions. */ void br_port_carrier_check(struct net_bridge_port *p) { struct net_device *dev = p->dev; @@ -455,18 +455,3 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) return 0; } - -void __net_exit br_net_exit(struct net *net) -{ - struct net_device *dev; - LIST_HEAD(list); - - rtnl_lock(); - for_each_netdev(net, dev) - if (dev->priv_flags & IFF_EBRIDGE) - br_dev_delete(dev, &list); - - unregister_netdevice_many(&list); - rtnl_unlock(); - -} diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 7e73c32e205..bf8dc7d308d 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -28,7 +28,7 @@ static int br_pass_frame_up(struct sk_buff *skb) { struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; struct net_bridge *br = netdev_priv(brdev); - struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); + struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); u64_stats_update_begin(&brstats->syncp); brstats->rx_packets++; diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index cd8c3a44ab7..a9a4a1b7863 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -381,7 +381,7 @@ int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { struct net_bridge *br = netdev_priv(dev); - switch(cmd) { + switch (cmd) { case SIOCDEVPRIVATE: return old_dev_ioctl(dev, rq, cmd); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 80cad2cf02a..b008c59a92c 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -1001,7 +1001,7 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = { #ifdef CONFIG_SYSCTL static int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, - void __user * buffer, size_t * lenp, loff_t * ppos) + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index f75d92e4f96..e74b6d530cb 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -195,8 +195,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); return; errout: - if (err < 0) - rtnl_set_sk_err(net, RTNLGRP_LINK, err); + rtnl_set_sk_err(net, RTNLGRP_LINK, err); } @@ -373,7 +372,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) p = br_port_get_rtnl(dev); /* We want to accept dev as bridge itself if the AF_SPEC - * is set to see if someone is setting vlan info on the brigde + * is set to see if someone is setting vlan info on the bridge */ if (!p && !afspec) return -EINVAL; @@ -389,7 +388,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) err = br_setport(p, tb); spin_unlock_bh(&p->br->lock); } else { - /* Binary compatability with old RSTP */ + /* Binary compatibility with old RSTP */ if (nla_len(protinfo) < sizeof(u8)) return -EINVAL; @@ -482,9 +481,7 @@ int __init br_netlink_init(void) int err; br_mdb_init(); - err = rtnl_af_register(&br_af_ops); - if (err) - goto out; + rtnl_af_register(&br_af_ops); err = rtnl_link_register(&br_link_ops); if (err) @@ -494,7 +491,6 @@ int __init br_netlink_init(void) out_af: rtnl_af_unregister(&br_af_ops); -out: br_mdb_uninit(); return err; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 045d56eaeca..fcd12333c59 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -210,21 +210,13 @@ static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device * rtnl_dereference(dev->rx_handler_data) : NULL; } -struct br_cpu_netstats { - u64 rx_packets; - u64 rx_bytes; - u64 tx_packets; - u64 tx_bytes; - struct u64_stats_sync syncp; -}; - struct net_bridge { spinlock_t lock; struct list_head port_list; struct net_device *dev; - struct br_cpu_netstats __percpu *stats; + struct pcpu_sw_netstats __percpu *stats; spinlock_t hash_lock; struct hlist_head hash[BR_HASH_SIZE]; #ifdef CONFIG_BRIDGE_NETFILTER @@ -415,7 +407,6 @@ void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, void br_port_carrier_check(struct net_bridge_port *p); int br_add_bridge(struct net *net, const char *name); int br_del_bridge(struct net *net, const char *name); -void br_net_exit(struct net *net); int br_add_if(struct net_bridge *br, struct net_device *dev); int br_del_if(struct net_bridge *br, struct net_device *dev); int br_min_mtu(const struct net_bridge *br); @@ -721,7 +712,7 @@ void br_netfilter_fini(void); void br_netfilter_rtable_init(struct net_bridge *); #else #define br_netfilter_init() (0) -#define br_netfilter_fini() do { } while(0) +#define br_netfilter_fini() do { } while (0) #define br_netfilter_rtable_init(x) #endif diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 950663d4d33..558c46d19e0 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -110,7 +110,7 @@ static void br_tcn_timer_expired(unsigned long arg) if (!br_is_root_bridge(br) && (br->dev->flags & IFF_UP)) { br_transmit_tcn(br); - mod_timer(&br->tcn_timer,jiffies + br->bridge_hello_time); + mod_timer(&br->tcn_timer, jiffies + br->bridge_hello_time); } spin_unlock(&br->lock); } diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 3b9637fb793..8dac65552f1 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -49,53 +49,51 @@ static ssize_t store_bridge_parm(struct device *d, } -static ssize_t show_forward_delay(struct device *d, +static ssize_t forward_delay_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay)); } -static ssize_t store_forward_delay(struct device *d, +static ssize_t forward_delay_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_set_forward_delay); } -static DEVICE_ATTR(forward_delay, S_IRUGO | S_IWUSR, - show_forward_delay, store_forward_delay); +static DEVICE_ATTR_RW(forward_delay); -static ssize_t show_hello_time(struct device *d, struct device_attribute *attr, +static ssize_t hello_time_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%lu\n", jiffies_to_clock_t(to_bridge(d)->hello_time)); } -static ssize_t store_hello_time(struct device *d, +static ssize_t hello_time_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_set_hello_time); } -static DEVICE_ATTR(hello_time, S_IRUGO | S_IWUSR, show_hello_time, - store_hello_time); +static DEVICE_ATTR_RW(hello_time); -static ssize_t show_max_age(struct device *d, struct device_attribute *attr, +static ssize_t max_age_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%lu\n", jiffies_to_clock_t(to_bridge(d)->max_age)); } -static ssize_t store_max_age(struct device *d, struct device_attribute *attr, +static ssize_t max_age_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_set_max_age); } -static DEVICE_ATTR(max_age, S_IRUGO | S_IWUSR, show_max_age, store_max_age); +static DEVICE_ATTR_RW(max_age); -static ssize_t show_ageing_time(struct device *d, +static ssize_t ageing_time_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -108,16 +106,15 @@ static int set_ageing_time(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_ageing_time(struct device *d, +static ssize_t ageing_time_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_ageing_time); } -static DEVICE_ATTR(ageing_time, S_IRUGO | S_IWUSR, show_ageing_time, - store_ageing_time); +static DEVICE_ATTR_RW(ageing_time); -static ssize_t show_stp_state(struct device *d, +static ssize_t stp_state_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -125,7 +122,7 @@ static ssize_t show_stp_state(struct device *d, } -static ssize_t store_stp_state(struct device *d, +static ssize_t stp_state_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { @@ -147,20 +144,21 @@ static ssize_t store_stp_state(struct device *d, return len; } -static DEVICE_ATTR(stp_state, S_IRUGO | S_IWUSR, show_stp_state, - store_stp_state); +static DEVICE_ATTR_RW(stp_state); -static ssize_t show_group_fwd_mask(struct device *d, - struct device_attribute *attr, char *buf) +static ssize_t group_fwd_mask_show(struct device *d, + struct device_attribute *attr, + char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%#x\n", br->group_fwd_mask); } -static ssize_t store_group_fwd_mask(struct device *d, - struct device_attribute *attr, const char *buf, - size_t len) +static ssize_t group_fwd_mask_store(struct device *d, + struct device_attribute *attr, + const char *buf, + size_t len) { struct net_bridge *br = to_bridge(d); char *endp; @@ -180,10 +178,9 @@ static ssize_t store_group_fwd_mask(struct device *d, return len; } -static DEVICE_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask, - store_group_fwd_mask); +static DEVICE_ATTR_RW(group_fwd_mask); -static ssize_t show_priority(struct device *d, struct device_attribute *attr, +static ssize_t priority_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -197,93 +194,91 @@ static int set_priority(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_priority(struct device *d, struct device_attribute *attr, - const char *buf, size_t len) +static ssize_t priority_store(struct device *d, struct device_attribute *attr, + const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_priority); } -static DEVICE_ATTR(priority, S_IRUGO | S_IWUSR, show_priority, store_priority); +static DEVICE_ATTR_RW(priority); -static ssize_t show_root_id(struct device *d, struct device_attribute *attr, +static ssize_t root_id_show(struct device *d, struct device_attribute *attr, char *buf) { return br_show_bridge_id(buf, &to_bridge(d)->designated_root); } -static DEVICE_ATTR(root_id, S_IRUGO, show_root_id, NULL); +static DEVICE_ATTR_RO(root_id); -static ssize_t show_bridge_id(struct device *d, struct device_attribute *attr, +static ssize_t bridge_id_show(struct device *d, struct device_attribute *attr, char *buf) { return br_show_bridge_id(buf, &to_bridge(d)->bridge_id); } -static DEVICE_ATTR(bridge_id, S_IRUGO, show_bridge_id, NULL); +static DEVICE_ATTR_RO(bridge_id); -static ssize_t show_root_port(struct device *d, struct device_attribute *attr, +static ssize_t root_port_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", to_bridge(d)->root_port); } -static DEVICE_ATTR(root_port, S_IRUGO, show_root_port, NULL); +static DEVICE_ATTR_RO(root_port); -static ssize_t show_root_path_cost(struct device *d, +static ssize_t root_path_cost_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", to_bridge(d)->root_path_cost); } -static DEVICE_ATTR(root_path_cost, S_IRUGO, show_root_path_cost, NULL); +static DEVICE_ATTR_RO(root_path_cost); -static ssize_t show_topology_change(struct device *d, +static ssize_t topology_change_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", to_bridge(d)->topology_change); } -static DEVICE_ATTR(topology_change, S_IRUGO, show_topology_change, NULL); +static DEVICE_ATTR_RO(topology_change); -static ssize_t show_topology_change_detected(struct device *d, +static ssize_t topology_change_detected_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->topology_change_detected); } -static DEVICE_ATTR(topology_change_detected, S_IRUGO, - show_topology_change_detected, NULL); +static DEVICE_ATTR_RO(topology_change_detected); -static ssize_t show_hello_timer(struct device *d, +static ssize_t hello_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->hello_timer)); } -static DEVICE_ATTR(hello_timer, S_IRUGO, show_hello_timer, NULL); +static DEVICE_ATTR_RO(hello_timer); -static ssize_t show_tcn_timer(struct device *d, struct device_attribute *attr, +static ssize_t tcn_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->tcn_timer)); } -static DEVICE_ATTR(tcn_timer, S_IRUGO, show_tcn_timer, NULL); +static DEVICE_ATTR_RO(tcn_timer); -static ssize_t show_topology_change_timer(struct device *d, +static ssize_t topology_change_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->topology_change_timer)); } -static DEVICE_ATTR(topology_change_timer, S_IRUGO, show_topology_change_timer, - NULL); +static DEVICE_ATTR_RO(topology_change_timer); -static ssize_t show_gc_timer(struct device *d, struct device_attribute *attr, +static ssize_t gc_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->gc_timer)); } -static DEVICE_ATTR(gc_timer, S_IRUGO, show_gc_timer, NULL); +static DEVICE_ATTR_RO(gc_timer); -static ssize_t show_group_addr(struct device *d, +static ssize_t group_addr_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -293,7 +288,7 @@ static ssize_t show_group_addr(struct device *d, br->group_addr[4], br->group_addr[5]); } -static ssize_t store_group_addr(struct device *d, +static ssize_t group_addr_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { @@ -324,10 +319,9 @@ static ssize_t store_group_addr(struct device *d, return len; } -static DEVICE_ATTR(group_addr, S_IRUGO | S_IWUSR, - show_group_addr, store_group_addr); +static DEVICE_ATTR_RW(group_addr); -static ssize_t store_flush(struct device *d, +static ssize_t flush_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { @@ -339,26 +333,25 @@ static ssize_t store_flush(struct device *d, br_fdb_flush(br); return len; } -static DEVICE_ATTR(flush, S_IWUSR, NULL, store_flush); +static DEVICE_ATTR_WO(flush); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING -static ssize_t show_multicast_router(struct device *d, +static ssize_t multicast_router_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->multicast_router); } -static ssize_t store_multicast_router(struct device *d, +static ssize_t multicast_router_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_multicast_set_router); } -static DEVICE_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router, - store_multicast_router); +static DEVICE_ATTR_RW(multicast_router); -static ssize_t show_multicast_snooping(struct device *d, +static ssize_t multicast_snooping_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -366,18 +359,17 @@ static ssize_t show_multicast_snooping(struct device *d, return sprintf(buf, "%d\n", !br->multicast_disabled); } -static ssize_t store_multicast_snooping(struct device *d, +static ssize_t multicast_snooping_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_multicast_toggle); } -static DEVICE_ATTR(multicast_snooping, S_IRUGO | S_IWUSR, - show_multicast_snooping, store_multicast_snooping); +static DEVICE_ATTR_RW(multicast_snooping); -static ssize_t show_multicast_query_use_ifaddr(struct device *d, - struct device_attribute *attr, - char *buf) +static ssize_t multicast_query_use_ifaddr_show(struct device *d, + struct device_attribute *attr, + char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->multicast_query_use_ifaddr); @@ -390,17 +382,15 @@ static int set_query_use_ifaddr(struct net_bridge *br, unsigned long val) } static ssize_t -store_multicast_query_use_ifaddr(struct device *d, +multicast_query_use_ifaddr_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_query_use_ifaddr); } -static DEVICE_ATTR(multicast_query_use_ifaddr, S_IRUGO | S_IWUSR, - show_multicast_query_use_ifaddr, - store_multicast_query_use_ifaddr); +static DEVICE_ATTR_RW(multicast_query_use_ifaddr); -static ssize_t show_multicast_querier(struct device *d, +static ssize_t multicast_querier_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -408,16 +398,15 @@ static ssize_t show_multicast_querier(struct device *d, return sprintf(buf, "%d\n", br->multicast_querier); } -static ssize_t store_multicast_querier(struct device *d, +static ssize_t multicast_querier_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_multicast_set_querier); } -static DEVICE_ATTR(multicast_querier, S_IRUGO | S_IWUSR, - show_multicast_querier, store_multicast_querier); +static DEVICE_ATTR_RW(multicast_querier); -static ssize_t show_hash_elasticity(struct device *d, +static ssize_t hash_elasticity_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -430,31 +419,29 @@ static int set_elasticity(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_hash_elasticity(struct device *d, +static ssize_t hash_elasticity_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_elasticity); } -static DEVICE_ATTR(hash_elasticity, S_IRUGO | S_IWUSR, show_hash_elasticity, - store_hash_elasticity); +static DEVICE_ATTR_RW(hash_elasticity); -static ssize_t show_hash_max(struct device *d, struct device_attribute *attr, +static ssize_t hash_max_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br->hash_max); } -static ssize_t store_hash_max(struct device *d, struct device_attribute *attr, +static ssize_t hash_max_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_multicast_set_hash_max); } -static DEVICE_ATTR(hash_max, S_IRUGO | S_IWUSR, show_hash_max, - store_hash_max); +static DEVICE_ATTR_RW(hash_max); -static ssize_t show_multicast_last_member_count(struct device *d, +static ssize_t multicast_last_member_count_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -468,17 +455,15 @@ static int set_last_member_count(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_last_member_count(struct device *d, +static ssize_t multicast_last_member_count_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_last_member_count); } -static DEVICE_ATTR(multicast_last_member_count, S_IRUGO | S_IWUSR, - show_multicast_last_member_count, - store_multicast_last_member_count); +static DEVICE_ATTR_RW(multicast_last_member_count); -static ssize_t show_multicast_startup_query_count( +static ssize_t multicast_startup_query_count_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -491,17 +476,15 @@ static int set_startup_query_count(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_startup_query_count( +static ssize_t multicast_startup_query_count_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_startup_query_count); } -static DEVICE_ATTR(multicast_startup_query_count, S_IRUGO | S_IWUSR, - show_multicast_startup_query_count, - store_multicast_startup_query_count); +static DEVICE_ATTR_RW(multicast_startup_query_count); -static ssize_t show_multicast_last_member_interval( +static ssize_t multicast_last_member_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -515,17 +498,15 @@ static int set_last_member_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_last_member_interval( +static ssize_t multicast_last_member_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_last_member_interval); } -static DEVICE_ATTR(multicast_last_member_interval, S_IRUGO | S_IWUSR, - show_multicast_last_member_interval, - store_multicast_last_member_interval); +static DEVICE_ATTR_RW(multicast_last_member_interval); -static ssize_t show_multicast_membership_interval( +static ssize_t multicast_membership_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -539,17 +520,15 @@ static int set_membership_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_membership_interval( +static ssize_t multicast_membership_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_membership_interval); } -static DEVICE_ATTR(multicast_membership_interval, S_IRUGO | S_IWUSR, - show_multicast_membership_interval, - store_multicast_membership_interval); +static DEVICE_ATTR_RW(multicast_membership_interval); -static ssize_t show_multicast_querier_interval(struct device *d, +static ssize_t multicast_querier_interval_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -564,17 +543,15 @@ static int set_querier_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_querier_interval(struct device *d, +static ssize_t multicast_querier_interval_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_querier_interval); } -static DEVICE_ATTR(multicast_querier_interval, S_IRUGO | S_IWUSR, - show_multicast_querier_interval, - store_multicast_querier_interval); +static DEVICE_ATTR_RW(multicast_querier_interval); -static ssize_t show_multicast_query_interval(struct device *d, +static ssize_t multicast_query_interval_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -589,17 +566,15 @@ static int set_query_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_query_interval(struct device *d, +static ssize_t multicast_query_interval_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_query_interval); } -static DEVICE_ATTR(multicast_query_interval, S_IRUGO | S_IWUSR, - show_multicast_query_interval, - store_multicast_query_interval); +static DEVICE_ATTR_RW(multicast_query_interval); -static ssize_t show_multicast_query_response_interval( +static ssize_t multicast_query_response_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -614,17 +589,15 @@ static int set_query_response_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_query_response_interval( +static ssize_t multicast_query_response_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_query_response_interval); } -static DEVICE_ATTR(multicast_query_response_interval, S_IRUGO | S_IWUSR, - show_multicast_query_response_interval, - store_multicast_query_response_interval); +static DEVICE_ATTR_RW(multicast_query_response_interval); -static ssize_t show_multicast_startup_query_interval( +static ssize_t multicast_startup_query_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -639,18 +612,16 @@ static int set_startup_query_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_startup_query_interval( +static ssize_t multicast_startup_query_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_startup_query_interval); } -static DEVICE_ATTR(multicast_startup_query_interval, S_IRUGO | S_IWUSR, - show_multicast_startup_query_interval, - store_multicast_startup_query_interval); +static DEVICE_ATTR_RW(multicast_startup_query_interval); #endif #ifdef CONFIG_BRIDGE_NETFILTER -static ssize_t show_nf_call_iptables( +static ssize_t nf_call_iptables_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -663,16 +634,15 @@ static int set_nf_call_iptables(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_nf_call_iptables( +static ssize_t nf_call_iptables_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_nf_call_iptables); } -static DEVICE_ATTR(nf_call_iptables, S_IRUGO | S_IWUSR, - show_nf_call_iptables, store_nf_call_iptables); +static DEVICE_ATTR_RW(nf_call_iptables); -static ssize_t show_nf_call_ip6tables( +static ssize_t nf_call_ip6tables_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -685,16 +655,15 @@ static int set_nf_call_ip6tables(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_nf_call_ip6tables( +static ssize_t nf_call_ip6tables_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_nf_call_ip6tables); } -static DEVICE_ATTR(nf_call_ip6tables, S_IRUGO | S_IWUSR, - show_nf_call_ip6tables, store_nf_call_ip6tables); +static DEVICE_ATTR_RW(nf_call_ip6tables); -static ssize_t show_nf_call_arptables( +static ssize_t nf_call_arptables_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -707,17 +676,16 @@ static int set_nf_call_arptables(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_nf_call_arptables( +static ssize_t nf_call_arptables_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_nf_call_arptables); } -static DEVICE_ATTR(nf_call_arptables, S_IRUGO | S_IWUSR, - show_nf_call_arptables, store_nf_call_arptables); +static DEVICE_ATTR_RW(nf_call_arptables); #endif #ifdef CONFIG_BRIDGE_VLAN_FILTERING -static ssize_t show_vlan_filtering(struct device *d, +static ssize_t vlan_filtering_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -725,14 +693,13 @@ static ssize_t show_vlan_filtering(struct device *d, return sprintf(buf, "%d\n", br->vlan_enabled); } -static ssize_t store_vlan_filtering(struct device *d, +static ssize_t vlan_filtering_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_vlan_filter_toggle); } -static DEVICE_ATTR(vlan_filtering, S_IRUGO | S_IWUSR, - show_vlan_filtering, store_vlan_filtering); +static DEVICE_ATTR_RW(vlan_filtering); #endif static struct attribute *bridge_attrs[] = { diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 2a2cdb756d5..dd595bd7fa8 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -26,7 +26,7 @@ struct brport_attribute { int (*store)(struct net_bridge_port *, unsigned long); }; -#define BRPORT_ATTR(_name,_mode,_show,_store) \ +#define BRPORT_ATTR(_name, _mode, _show, _store) \ const struct brport_attribute brport_attr_##_name = { \ .attr = {.name = __stringify(_name), \ .mode = _mode }, \ @@ -209,21 +209,21 @@ static const struct brport_attribute *brport_attrs[] = { #define to_brport_attr(_at) container_of(_at, struct brport_attribute, attr) #define to_brport(obj) container_of(obj, struct net_bridge_port, kobj) -static ssize_t brport_show(struct kobject * kobj, - struct attribute * attr, char * buf) +static ssize_t brport_show(struct kobject *kobj, + struct attribute *attr, char *buf) { - struct brport_attribute * brport_attr = to_brport_attr(attr); - struct net_bridge_port * p = to_brport(kobj); + struct brport_attribute *brport_attr = to_brport_attr(attr); + struct net_bridge_port *p = to_brport(kobj); return brport_attr->show(p, buf); } -static ssize_t brport_store(struct kobject * kobj, - struct attribute * attr, - const char * buf, size_t count) +static ssize_t brport_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t count) { - struct brport_attribute * brport_attr = to_brport_attr(attr); - struct net_bridge_port * p = to_brport(kobj); + struct brport_attribute *brport_attr = to_brport_attr(attr); + struct net_bridge_port *p = to_brport(kobj); ssize_t ret = -EINVAL; char *endp; unsigned long val; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index af5ebd18d70..4ca4d0a0151 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -146,32 +146,11 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, /* At this point, we know that the frame was filtered and contains * a valid vlan id. If the vlan id is set in the untagged bitmap, - * send untagged; otherwise, send taged. + * send untagged; otherwise, send tagged. */ br_vlan_get_tag(skb, &vid); if (test_bit(vid, pv->untagged_bitmap)) skb = br_vlan_untag(skb); - else { - /* Egress policy says "send tagged". If output device - * is the bridge, we need to add the VLAN header - * ourselves since we'll be going through the RX path. - * Sending to ports puts the frame on the TX path and - * we let dev_hard_start_xmit() add the header. - */ - if (skb->protocol != htons(ETH_P_8021Q) && - pv->port_idx == 0) { - /* vlan_put_tag expects skb->data to point to - * mac header. - */ - skb_push(skb, ETH_HLEN); - skb = __vlan_put_tag(skb, skb->vlan_proto, skb->vlan_tci); - if (!skb) - goto out; - /* put skb->data back to where it was */ - skb_pull(skb, ETH_HLEN); - skb->vlan_tci = 0; - } - } out: return skb; diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 19c37a4929b..5322a36867a 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -96,7 +96,7 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, bitmask = NF_LOG_MASK; if ((bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto == - htons(ETH_P_IP)){ + htons(ETH_P_IP)) { const struct iphdr *ih; struct iphdr _iph; diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c index f8f0bd1a1d5..0f6b118d6cb 100644 --- a/net/bridge/netfilter/ebt_snat.c +++ b/net/bridge/netfilter/ebt_snat.c @@ -35,7 +35,7 @@ ebt_snat_tg(struct sk_buff *skb, const struct xt_action_param *par) return EBT_DROP; if (ap->ar_hln != ETH_ALEN) goto out; - if (skb_store_bits(skb, sizeof(_ah), info->mac,ETH_ALEN)) + if (skb_store_bits(skb, sizeof(_ah), info->mac, ETH_ALEN)) return EBT_DROP; } out: diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c index eae67bf0446..8d3f8c7651f 100644 --- a/net/bridge/netfilter/ebt_vlan.c +++ b/net/bridge/netfilter/ebt_vlan.c @@ -14,8 +14,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/if_ether.h> diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index dbd1c783431..d2cdf5d6e98 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -23,8 +23,7 @@ static struct ebt_entries initial_chain = { .policy = EBT_ACCEPT, }; -static struct ebt_replace_kernel initial_table = -{ +static struct ebt_replace_kernel initial_table = { .name = "broute", .valid_hooks = 1 << NF_BR_BROUTING, .entries_size = sizeof(struct ebt_entries), @@ -41,8 +40,7 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks) return 0; } -static const struct ebt_table broute_table = -{ +static const struct ebt_table broute_table = { .name = "broute", .table = &initial_table, .valid_hooks = 1 << NF_BR_BROUTING, diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index bb2da7b706e..ce205aabf9c 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -14,8 +14,7 @@ #define FILTER_VALID_HOOKS ((1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | \ (1 << NF_BR_LOCAL_OUT)) -static struct ebt_entries initial_chains[] = -{ +static struct ebt_entries initial_chains[] = { { .name = "INPUT", .policy = EBT_ACCEPT, @@ -30,8 +29,7 @@ static struct ebt_entries initial_chains[] = }, }; -static struct ebt_replace_kernel initial_table = -{ +static struct ebt_replace_kernel initial_table = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .entries_size = 3 * sizeof(struct ebt_entries), @@ -50,8 +48,7 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks) return 0; } -static const struct ebt_table frame_filter = -{ +static const struct ebt_table frame_filter = { .name = "filter", .table = &initial_table, .valid_hooks = FILTER_VALID_HOOKS, diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index bd238f1f105..a0ac2984fb6 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -14,8 +14,7 @@ #define NAT_VALID_HOOKS ((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT) | \ (1 << NF_BR_POST_ROUTING)) -static struct ebt_entries initial_chains[] = -{ +static struct ebt_entries initial_chains[] = { { .name = "PREROUTING", .policy = EBT_ACCEPT, @@ -30,8 +29,7 @@ static struct ebt_entries initial_chains[] = } }; -static struct ebt_replace_kernel initial_table = -{ +static struct ebt_replace_kernel initial_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, .entries_size = 3 * sizeof(struct ebt_entries), @@ -50,8 +48,7 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks) return 0; } -static struct ebt_table frame_nat = -{ +static struct ebt_table frame_nat = { .name = "nat", .table = &initial_table, .valid_hooks = NAT_VALID_HOOKS, diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index ac780242838..0e474b13463 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -118,10 +118,10 @@ ebt_dev_check(const char *entry, const struct net_device *device) /* 1 is the wildcard token */ while (entry[i] != '\0' && entry[i] != 1 && entry[i] == devname[i]) i++; - return (devname[i] != entry[i] && entry[i] != 1); + return devname[i] != entry[i] && entry[i] != 1; } -#define FWINV2(bool,invflg) ((bool) ^ !!(e->invflags & invflg)) +#define FWINV2(bool, invflg) ((bool) ^ !!(e->invflags & invflg)) /* process standard matches */ static inline int ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, @@ -1441,7 +1441,7 @@ static int copy_everything_to_user(struct ebt_table *t, void __user *user, return -EFAULT; if (*len != sizeof(struct ebt_replace) + entries_size + - (tmp.num_counters? nentries * sizeof(struct ebt_counter): 0)) + (tmp.num_counters ? nentries * sizeof(struct ebt_counter) : 0)) return -EINVAL; if (tmp.nentries != nentries) { @@ -1477,7 +1477,7 @@ static int do_ebt_set_ctl(struct sock *sk, if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; - switch(cmd) { + switch (cmd) { case EBT_SO_SET_ENTRIES: ret = do_replace(net, user, len); break; @@ -1507,10 +1507,10 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) if (!t) return ret; - switch(cmd) { + switch (cmd) { case EBT_SO_GET_INFO: case EBT_SO_GET_INIT_INFO: - if (*len != sizeof(struct ebt_replace)){ + if (*len != sizeof(struct ebt_replace)) { ret = -EINVAL; mutex_unlock(&ebt_mutex); break; @@ -1525,7 +1525,7 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) tmp.valid_hooks = t->table->valid_hooks; } mutex_unlock(&ebt_mutex); - if (copy_to_user(user, &tmp, *len) != 0){ + if (copy_to_user(user, &tmp, *len) != 0) { BUGPRINT("c2u Didn't work\n"); ret = -EFAULT; break; @@ -2375,8 +2375,7 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, } #endif -static struct nf_sockopt_ops ebt_sockopts = -{ +static struct nf_sockopt_ops ebt_sockopts = { .pf = PF_INET, .set_optmin = EBT_BASE_CTL, .set_optmax = EBT_SO_SET_MAX + 1, diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index cf54b22818c..5bcc0d8b31f 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -14,10 +14,30 @@ #include <linux/netfilter_bridge.h> #include <net/netfilter/nf_tables.h> +static unsigned int +nft_do_chain_bridge(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo(&pkt, ops, skb, in, out); + + return nft_do_chain(&pkt, ops); +} + static struct nft_af_info nft_af_bridge __read_mostly = { .family = NFPROTO_BRIDGE, .nhooks = NF_BR_NUMHOOKS, .owner = THIS_MODULE, + .nops = 1, + .hooks = { + [NF_BR_LOCAL_IN] = nft_do_chain_bridge, + [NF_BR_FORWARD] = nft_do_chain_bridge, + [NF_BR_LOCAL_OUT] = nft_do_chain_bridge, + }, }; static int nf_tables_bridge_init_net(struct net *net) @@ -48,32 +68,14 @@ static struct pernet_operations nf_tables_bridge_net_ops = { .exit = nf_tables_bridge_exit_net, }; -static unsigned int -nft_do_chain_bridge(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct nft_pktinfo pkt; - - nft_set_pktinfo(&pkt, ops, skb, in, out); - - return nft_do_chain_pktinfo(&pkt, ops); -} - -static struct nf_chain_type filter_bridge = { - .family = NFPROTO_BRIDGE, +static const struct nf_chain_type filter_bridge = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_BRIDGE, + .owner = THIS_MODULE, .hook_mask = (1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | (1 << NF_BR_LOCAL_OUT), - .fn = { - [NF_BR_LOCAL_IN] = nft_do_chain_bridge, - [NF_BR_FORWARD] = nft_do_chain_bridge, - [NF_BR_LOCAL_OUT] = nft_do_chain_bridge, - }, }; static int __init nf_tables_bridge_init(void) diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index 75ed04b78fa..ba02db02290 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -14,6 +14,7 @@ #include <linux/mii.h> #include <linux/usb.h> #include <linux/usb/usbnet.h> +#include <linux/etherdevice.h> #include <net/netns/generic.h> #include <net/caif/caif_dev.h> #include <net/caif/caif_layer.h> @@ -105,8 +106,8 @@ static struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN], * 5-11 source address * 12-13 protocol type */ - memcpy(&this->tx_eth_hdr[ETH_ALEN], braddr, ETH_ALEN); - memcpy(&this->tx_eth_hdr[ETH_ALEN], ethaddr, ETH_ALEN); + ether_addr_copy(&this->tx_eth_hdr[ETH_ALEN], braddr); + ether_addr_copy(&this->tx_eth_hdr[ETH_ALEN], ethaddr); this->tx_eth_hdr[12] = cpu_to_be16(ETH_P_802_EX1) & 0xff; this->tx_eth_hdr[13] = (cpu_to_be16(ETH_P_802_EX1) >> 8) & 0xff; pr_debug("caif ethernet TX-header dst:%pM src:%pM type:%02x%02x\n", diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 7344a8fa1bb..4589ff67bfa 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -285,7 +285,7 @@ static int chnl_net_open(struct net_device *dev) goto error; } - lldev = dev_get_by_index(dev_net(dev), llifindex); + lldev = __dev_get_by_index(dev_net(dev), llifindex); if (lldev == NULL) { pr_debug("no interface?\n"); @@ -307,7 +307,6 @@ static int chnl_net_open(struct net_device *dev) mtu = min_t(int, dev->mtu, lldev->mtu - (headroom + tailroom)); mtu = min_t(int, GPRS_PDP_MTU, mtu); dev_set_mtu(dev, mtu); - dev_put(lldev); if (mtu < 100) { pr_warn("CAIF Interface MTU too small (%d)\n", mtu); diff --git a/net/can/bcm.c b/net/can/bcm.c index 46f20bfafc0..3fc737b214c 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1256,8 +1256,7 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock, if (!ifindex && msg->msg_name) { /* no bound device as default => check msg_name */ - struct sockaddr_can *addr = - (struct sockaddr_can *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name); if (msg->msg_namelen < sizeof(*addr)) return -EINVAL; @@ -1568,6 +1567,7 @@ static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock, sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { + __sockaddr_check_size(sizeof(struct sockaddr_can)); msg->msg_namelen = sizeof(struct sockaddr_can); memcpy(msg->msg_name, skb->cb, msg->msg_namelen); } diff --git a/net/can/gw.c b/net/can/gw.c index 3f9b0f3a281..ac31891967d 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -839,23 +839,21 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) if (!gwj->ccgw.src_idx || !gwj->ccgw.dst_idx) goto out; - gwj->src.dev = dev_get_by_index(&init_net, gwj->ccgw.src_idx); + gwj->src.dev = __dev_get_by_index(&init_net, gwj->ccgw.src_idx); if (!gwj->src.dev) goto out; - /* check for CAN netdev not using header_ops - see gw_rcv() */ - if (gwj->src.dev->type != ARPHRD_CAN || gwj->src.dev->header_ops) - goto put_src_out; + if (gwj->src.dev->type != ARPHRD_CAN) + goto out; - gwj->dst.dev = dev_get_by_index(&init_net, gwj->ccgw.dst_idx); + gwj->dst.dev = __dev_get_by_index(&init_net, gwj->ccgw.dst_idx); if (!gwj->dst.dev) - goto put_src_out; + goto out; - /* check for CAN netdev not using header_ops - see gw_rcv() */ - if (gwj->dst.dev->type != ARPHRD_CAN || gwj->dst.dev->header_ops) - goto put_src_dst_out; + if (gwj->dst.dev->type != ARPHRD_CAN) + goto out; gwj->limit_hops = limhops; @@ -864,11 +862,6 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) err = cgw_register_filter(gwj); if (!err) hlist_add_head_rcu(&gwj->list, &cgw_list); - -put_src_dst_out: - dev_put(gwj->dst.dev); -put_src_out: - dev_put(gwj->src.dev); out: if (err) kmem_cache_free(cgw_cache, gwj); diff --git a/net/can/raw.c b/net/can/raw.c index 641e1c89512..07d72d85232 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -675,8 +675,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, int err; if (msg->msg_name) { - struct sockaddr_can *addr = - (struct sockaddr_can *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name); if (msg->msg_namelen < sizeof(*addr)) return -EINVAL; @@ -775,6 +774,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock, sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { + __sockaddr_check_size(sizeof(struct sockaddr_can)); msg->msg_namelen = sizeof(struct sockaddr_can); memcpy(msg->msg_name, skb->cb, msg->msg_namelen); } diff --git a/net/core/Makefile b/net/core/Makefile index b33b996f5dd..9628c20acff 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -21,4 +21,5 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o obj-$(CONFIG_TRACEPOINTS) += net-traces.o obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o -obj-$(CONFIG_NETPRIO_CGROUP) += netprio_cgroup.o +obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o +obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o diff --git a/net/core/dev.c b/net/core/dev.c index 2e0c6a90f6f..3721db71635 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -147,6 +147,8 @@ struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; struct list_head ptype_all __read_mostly; /* Taps */ static struct list_head offload_base __read_mostly; +static int netif_rx_internal(struct sk_buff *skb); + /* * The @dev_base_head list is protected by @dev_base_lock and the rtnl * semaphore. @@ -480,7 +482,7 @@ EXPORT_SYMBOL(dev_add_offload); * and must not be freed until after all the CPU's have gone * through a quiescent state. */ -void __dev_remove_offload(struct packet_offload *po) +static void __dev_remove_offload(struct packet_offload *po) { struct list_head *head = &offload_base; struct packet_offload *po1; @@ -498,7 +500,6 @@ void __dev_remove_offload(struct packet_offload *po) out: spin_unlock(&offload_lock); } -EXPORT_SYMBOL(__dev_remove_offload); /** * dev_remove_offload - remove packet offload handler @@ -1118,6 +1119,8 @@ rollback: write_seqcount_end(&devnet_rename_seq); + netdev_adjacent_rename_links(dev, oldname); + write_lock_bh(&dev_base_lock); hlist_del_rcu(&dev->name_hlist); write_unlock_bh(&dev_base_lock); @@ -1137,6 +1140,7 @@ rollback: err = ret; write_seqcount_begin(&devnet_rename_seq); memcpy(dev->name, oldname, IFNAMSIZ); + memcpy(oldname, newname, IFNAMSIZ); goto rollback; } else { pr_err("%s: name change rollback failed: %d\n", @@ -1566,14 +1570,14 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); * are as for raw_notifier_call_chain(). */ -int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, - struct netdev_notifier_info *info) +static int call_netdevice_notifiers_info(unsigned long val, + struct net_device *dev, + struct netdev_notifier_info *info) { ASSERT_RTNL(); netdev_notifier_info_init(info, dev); return raw_notifier_call_chain(&netdev_chain, val, info); } -EXPORT_SYMBOL(call_netdevice_notifiers_info); /** * call_netdevice_notifiers - call all network notifier blocks @@ -1699,7 +1703,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) skb_scrub_packet(skb, true); skb->protocol = eth_type_trans(skb, dev); - return netif_rx(skb); + return netif_rx_internal(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); @@ -2079,7 +2083,7 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) } EXPORT_SYMBOL(netif_set_real_num_tx_queues); -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS /** * netif_set_real_num_rx_queues - set actual number of RX queues used * @dev: Network device @@ -2145,30 +2149,42 @@ void __netif_schedule(struct Qdisc *q) } EXPORT_SYMBOL(__netif_schedule); -void dev_kfree_skb_irq(struct sk_buff *skb) +struct dev_kfree_skb_cb { + enum skb_free_reason reason; +}; + +static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb) { - if (atomic_dec_and_test(&skb->users)) { - struct softnet_data *sd; - unsigned long flags; + return (struct dev_kfree_skb_cb *)skb->cb; +} + +void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) +{ + unsigned long flags; - local_irq_save(flags); - sd = &__get_cpu_var(softnet_data); - skb->next = sd->completion_queue; - sd->completion_queue = skb; - raise_softirq_irqoff(NET_TX_SOFTIRQ); - local_irq_restore(flags); + if (likely(atomic_read(&skb->users) == 1)) { + smp_rmb(); + atomic_set(&skb->users, 0); + } else if (likely(!atomic_dec_and_test(&skb->users))) { + return; } + get_kfree_skb_cb(skb)->reason = reason; + local_irq_save(flags); + skb->next = __this_cpu_read(softnet_data.completion_queue); + __this_cpu_write(softnet_data.completion_queue, skb); + raise_softirq_irqoff(NET_TX_SOFTIRQ); + local_irq_restore(flags); } -EXPORT_SYMBOL(dev_kfree_skb_irq); +EXPORT_SYMBOL(__dev_kfree_skb_irq); -void dev_kfree_skb_any(struct sk_buff *skb) +void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason) { if (in_irq() || irqs_disabled()) - dev_kfree_skb_irq(skb); + __dev_kfree_skb_irq(skb, reason); else dev_kfree_skb(skb); } -EXPORT_SYMBOL(dev_kfree_skb_any); +EXPORT_SYMBOL(__dev_kfree_skb_any); /** @@ -2442,13 +2458,8 @@ static void dev_gso_skb_destructor(struct sk_buff *skb) { struct dev_gso_cb *cb; - do { - struct sk_buff *nskb = skb->next; - - skb->next = nskb->next; - nskb->next = NULL; - kfree_skb(nskb); - } while (skb->next); + kfree_skb_list(skb->next); + skb->next = NULL; cb = DEV_GSO_CB(skb); if (cb->destructor) @@ -2523,21 +2534,6 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) } EXPORT_SYMBOL(netif_skb_features); -/* - * Returns true if either: - * 1. skb has frag_list and the device doesn't support FRAGLIST, or - * 2. skb is fragmented and the device does not support SG. - */ -static inline int skb_needs_linearize(struct sk_buff *skb, - netdev_features_t features) -{ - return skb_is_nonlinear(skb) && - ((skb_has_frag_list(skb) && - !(features & NETIF_F_FRAGLIST)) || - (skb_shinfo(skb)->nr_frags && - !(features & NETIF_F_SG))); -} - int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { @@ -2605,8 +2601,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, dev_queue_xmit_nit(skb, dev); skb_len = skb->len; - rc = ops->ndo_start_xmit(skb, dev); - + trace_net_dev_start_xmit(skb, dev); + rc = ops->ndo_start_xmit(skb, dev); trace_net_dev_xmit(skb, rc, dev, skb_len); if (rc == NETDEV_TX_OK) txq_trans_update(txq); @@ -2624,6 +2620,7 @@ gso: dev_queue_xmit_nit(nskb, dev); skb_len = nskb->len; + trace_net_dev_start_xmit(nskb, dev); rc = ops->ndo_start_xmit(nskb, dev); trace_net_dev_xmit(nskb, rc, dev, skb_len); if (unlikely(rc != NETDEV_TX_OK)) { @@ -2744,7 +2741,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, return rc; } -#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) +#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) static void skb_update_prio(struct sk_buff *skb) { struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); @@ -2781,8 +2778,9 @@ int dev_loopback_xmit(struct sk_buff *skb) EXPORT_SYMBOL(dev_loopback_xmit); /** - * dev_queue_xmit - transmit a buffer + * __dev_queue_xmit - transmit a buffer * @skb: buffer to transmit + * @accel_priv: private data used for L2 forwarding offload * * Queue a buffer for transmission to a network device. The caller must * have set the device and priority and built the buffer before calling @@ -3014,7 +3012,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, } skb_reset_network_header(skb); - if (!skb_get_rxhash(skb)) + if (!skb_get_hash(skb)) goto done; flow_table = rcu_dereference(rxqueue->rps_flow_table); @@ -3159,7 +3157,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) rcu_read_lock(); fl = rcu_dereference(sd->flow_limit); if (fl) { - new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1); + new_flow = skb_get_hash(skb) & (fl->num_buckets - 1); old_flow = fl->history[fl->history_head]; fl->history[fl->history_head] = new_flow; @@ -3227,22 +3225,7 @@ enqueue: return NET_RX_DROP; } -/** - * netif_rx - post buffer to the network code - * @skb: buffer to post - * - * This function receives a packet from a device driver and queues it for - * the upper (protocol) levels to process. It always succeeds. The buffer - * may be dropped during processing for congestion control or by the - * protocol layers. - * - * return values: - * NET_RX_SUCCESS (no congestion) - * NET_RX_DROP (packet was dropped) - * - */ - -int netif_rx(struct sk_buff *skb) +static int netif_rx_internal(struct sk_buff *skb) { int ret; @@ -3278,14 +3261,38 @@ int netif_rx(struct sk_buff *skb) } return ret; } + +/** + * netif_rx - post buffer to the network code + * @skb: buffer to post + * + * This function receives a packet from a device driver and queues it for + * the upper (protocol) levels to process. It always succeeds. The buffer + * may be dropped during processing for congestion control or by the + * protocol layers. + * + * return values: + * NET_RX_SUCCESS (no congestion) + * NET_RX_DROP (packet was dropped) + * + */ + +int netif_rx(struct sk_buff *skb) +{ + trace_netif_rx_entry(skb); + + return netif_rx_internal(skb); +} EXPORT_SYMBOL(netif_rx); int netif_rx_ni(struct sk_buff *skb) { int err; + trace_netif_rx_ni_entry(skb); + preempt_disable(); - err = netif_rx(skb); + err = netif_rx_internal(skb); if (local_softirq_pending()) do_softirq(); preempt_enable(); @@ -3311,7 +3318,10 @@ static void net_tx_action(struct softirq_action *h) clist = clist->next; WARN_ON(atomic_read(&skb->users)); - trace_kfree_skb(skb, net_tx_action); + if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED)) + trace_consume_skb(skb); + else + trace_kfree_skb(skb, net_tx_action); __kfree_skb(skb); } } @@ -3667,22 +3677,7 @@ static int __netif_receive_skb(struct sk_buff *skb) return ret; } -/** - * netif_receive_skb - process receive buffer from network - * @skb: buffer to process - * - * netif_receive_skb() is the main receive data processing function. - * It always succeeds. The buffer may be dropped during processing - * for congestion control or by the protocol layers. - * - * This function may only be called from softirq context and interrupts - * should be enabled. - * - * Return values (usually ignored): - * NET_RX_SUCCESS: no congestion - * NET_RX_DROP: packet was dropped - */ -int netif_receive_skb(struct sk_buff *skb) +static int netif_receive_skb_internal(struct sk_buff *skb) { net_timestamp_check(netdev_tstamp_prequeue, skb); @@ -3708,6 +3703,28 @@ int netif_receive_skb(struct sk_buff *skb) #endif return __netif_receive_skb(skb); } + +/** + * netif_receive_skb - process receive buffer from network + * @skb: buffer to process + * + * netif_receive_skb() is the main receive data processing function. + * It always succeeds. The buffer may be dropped during processing + * for congestion control or by the protocol layers. + * + * This function may only be called from softirq context and interrupts + * should be enabled. + * + * Return values (usually ignored): + * NET_RX_SUCCESS: no congestion + * NET_RX_DROP: packet was dropped + */ +int netif_receive_skb(struct sk_buff *skb) +{ + trace_netif_receive_skb_entry(skb); + + return netif_receive_skb_internal(skb); +} EXPORT_SYMBOL(netif_receive_skb); /* Network device is going away, flush any packets still pending @@ -3757,7 +3774,7 @@ static int napi_gro_complete(struct sk_buff *skb) if (ptype->type != type || !ptype->callbacks.gro_complete) continue; - err = ptype->callbacks.gro_complete(skb); + err = ptype->callbacks.gro_complete(skb, 0); break; } rcu_read_unlock(); @@ -3769,7 +3786,7 @@ static int napi_gro_complete(struct sk_buff *skb) } out: - return netif_receive_skb(skb); + return netif_receive_skb_internal(skb); } /* napi->gro_list contains packets ordered by age. @@ -3805,10 +3822,18 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff *p; unsigned int maclen = skb->dev->hard_header_len; + u32 hash = skb_get_hash_raw(skb); for (p = napi->gro_list; p; p = p->next) { unsigned long diffs; + NAPI_GRO_CB(p)->flush = 0; + + if (hash != skb_get_hash_raw(p)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; diffs |= p->vlan_tci ^ skb->vlan_tci; if (maclen == ETH_HLEN) @@ -3819,7 +3844,23 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) skb_gro_mac_header(skb), maclen); NAPI_GRO_CB(p)->same_flow = !diffs; - NAPI_GRO_CB(p)->flush = 0; + } +} + +static void skb_gro_reset_offset(struct sk_buff *skb) +{ + const struct skb_shared_info *pinfo = skb_shinfo(skb); + const skb_frag_t *frag0 = &pinfo->frags[0]; + + NAPI_GRO_CB(skb)->data_offset = 0; + NAPI_GRO_CB(skb)->frag0 = NULL; + NAPI_GRO_CB(skb)->frag0_len = 0; + + if (skb_mac_header(skb) == skb_tail_pointer(skb) && + pinfo->nr_frags && + !PageHighMem(skb_frag_page(frag0))) { + NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); + NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); } } @@ -3838,7 +3879,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (skb_is_gso(skb) || skb_has_frag_list(skb)) goto normal; + skb_gro_reset_offset(skb); gro_list_prepare(napi, skb); + NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */ rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { @@ -3850,6 +3893,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; + NAPI_GRO_CB(skb)->udp_mark = 0; pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); break; @@ -3874,10 +3918,23 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (same_flow) goto ok; - if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS) + if (NAPI_GRO_CB(skb)->flush) goto normal; - napi->gro_count++; + if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) { + struct sk_buff *nskb = napi->gro_list; + + /* locate the end of the list to select the 'oldest' flow */ + while (nskb->next) { + pp = &nskb->next; + nskb = *pp; + } + *pp = NULL; + nskb->next = NULL; + napi_gro_complete(nskb); + } else { + napi->gro_count++; + } NAPI_GRO_CB(skb)->count = 1; NAPI_GRO_CB(skb)->age = jiffies; skb_shinfo(skb)->gso_size = skb_gro_len(skb); @@ -3915,12 +3972,39 @@ normal: goto pull; } +struct packet_offload *gro_find_receive_by_type(__be16 type) +{ + struct list_head *offload_head = &offload_base; + struct packet_offload *ptype; + + list_for_each_entry_rcu(ptype, offload_head, list) { + if (ptype->type != type || !ptype->callbacks.gro_receive) + continue; + return ptype; + } + return NULL; +} +EXPORT_SYMBOL(gro_find_receive_by_type); + +struct packet_offload *gro_find_complete_by_type(__be16 type) +{ + struct list_head *offload_head = &offload_base; + struct packet_offload *ptype; + + list_for_each_entry_rcu(ptype, offload_head, list) { + if (ptype->type != type || !ptype->callbacks.gro_complete) + continue; + return ptype; + } + return NULL; +} +EXPORT_SYMBOL(gro_find_complete_by_type); static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) { switch (ret) { case GRO_NORMAL: - if (netif_receive_skb(skb)) + if (netif_receive_skb_internal(skb)) ret = GRO_DROP; break; @@ -3943,26 +4027,9 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) return ret; } -static void skb_gro_reset_offset(struct sk_buff *skb) -{ - const struct skb_shared_info *pinfo = skb_shinfo(skb); - const skb_frag_t *frag0 = &pinfo->frags[0]; - - NAPI_GRO_CB(skb)->data_offset = 0; - NAPI_GRO_CB(skb)->frag0 = NULL; - NAPI_GRO_CB(skb)->frag0_len = 0; - - if (skb_mac_header(skb) == skb_tail_pointer(skb) && - pinfo->nr_frags && - !PageHighMem(skb_frag_page(frag0))) { - NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); - NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); - } -} - gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { - skb_gro_reset_offset(skb); + trace_napi_gro_receive_entry(skb); return napi_skb_finish(dev_gro_receive(napi, skb), skb); } @@ -3986,8 +4053,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) if (!skb) { skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); - if (skb) - napi->skb = skb; + napi->skb = skb; } return skb; } @@ -3998,12 +4064,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * { switch (ret) { case GRO_NORMAL: - case GRO_HELD: - skb->protocol = eth_type_trans(skb, skb->dev); - - if (ret == GRO_HELD) - skb_gro_pull(skb, -ETH_HLEN); - else if (netif_receive_skb(skb)) + if (netif_receive_skb_internal(skb)) ret = GRO_DROP; break; @@ -4012,6 +4073,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * napi_reuse_skb(napi, skb); break; + case GRO_HELD: case GRO_MERGED: break; } @@ -4022,36 +4084,15 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff * static struct sk_buff *napi_frags_skb(struct napi_struct *napi) { struct sk_buff *skb = napi->skb; - struct ethhdr *eth; - unsigned int hlen; - unsigned int off; napi->skb = NULL; - skb_reset_mac_header(skb); - skb_gro_reset_offset(skb); - - off = skb_gro_offset(skb); - hlen = off + sizeof(*eth); - eth = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) { - eth = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!eth)) { - napi_reuse_skb(napi, skb); - skb = NULL; - goto out; - } + if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) { + napi_reuse_skb(napi, skb); + return NULL; } + skb->protocol = eth_type_trans(skb, skb->dev); - skb_gro_pull(skb, sizeof(*eth)); - - /* - * This works because the only protocols we care about don't require - * special handling. We'll fix it up properly at the end. - */ - skb->protocol = eth->h_proto; - -out: return skb; } @@ -4062,12 +4103,14 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) if (!skb) return GRO_DROP; + trace_napi_gro_frags_entry(skb); + return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); } EXPORT_SYMBOL(napi_gro_frags); /* - * net_rps_action sends any pending IPI's for rps. + * net_rps_action_and_irq_enable sends any pending IPI's for rps. * Note: called with local irq disabled, but exits with local irq enabled. */ static void net_rps_action_and_irq_enable(struct softnet_data *sd) @@ -4272,17 +4315,10 @@ EXPORT_SYMBOL(netif_napi_add); void netif_napi_del(struct napi_struct *napi) { - struct sk_buff *skb, *next; - list_del_init(&napi->dev_list); napi_free_frags(napi); - for (skb = napi->gro_list; skb; skb = next) { - next = skb->next; - skb->next = NULL; - kfree_skb(skb); - } - + kfree_skb_list(napi->gro_list); napi->gro_list = NULL; napi->gro_count = 0; } @@ -4399,19 +4435,6 @@ struct netdev_adjacent { struct rcu_head rcu; }; -static struct netdev_adjacent *__netdev_find_adj_rcu(struct net_device *dev, - struct net_device *adj_dev, - struct list_head *adj_list) -{ - struct netdev_adjacent *adj; - - list_for_each_entry_rcu(adj, adj_list, list) { - if (adj->dev == adj_dev) - return adj; - } - return NULL; -} - static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, struct net_device *adj_dev, struct list_head *adj_list) @@ -4450,13 +4473,12 @@ EXPORT_SYMBOL(netdev_has_upper_dev); * Find out if a device is linked to an upper device and return true in case * it is. The caller must hold the RTNL lock. */ -bool netdev_has_any_upper_dev(struct net_device *dev) +static bool netdev_has_any_upper_dev(struct net_device *dev) { ASSERT_RTNL(); return !list_empty(&dev->all_adj_list.upper); } -EXPORT_SYMBOL(netdev_has_any_upper_dev); /** * netdev_master_upper_dev_get - Get master upper device @@ -4576,6 +4598,27 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, EXPORT_SYMBOL(netdev_lower_get_next_private_rcu); /** + * netdev_lower_get_first_private_rcu - Get the first ->private from the + * lower neighbour list, RCU + * variant + * @dev: device + * + * Gets the first netdev_adjacent->private from the dev's lower neighbour + * list. The caller must hold RCU read lock. + */ +void *netdev_lower_get_first_private_rcu(struct net_device *dev) +{ + struct netdev_adjacent *lower; + + lower = list_first_or_null_rcu(&dev->adj_list.lower, + struct netdev_adjacent, list); + if (lower) + return lower->private; + return NULL; +} +EXPORT_SYMBOL(netdev_lower_get_first_private_rcu); + +/** * netdev_master_upper_dev_get_rcu - Get master upper device * @dev: device * @@ -4594,13 +4637,36 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) } EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); +int netdev_adjacent_sysfs_add(struct net_device *dev, + struct net_device *adj_dev, + struct list_head *dev_list) +{ + char linkname[IFNAMSIZ+7]; + sprintf(linkname, dev_list == &dev->adj_list.upper ? + "upper_%s" : "lower_%s", adj_dev->name); + return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj), + linkname); +} +void netdev_adjacent_sysfs_del(struct net_device *dev, + char *name, + struct list_head *dev_list) +{ + char linkname[IFNAMSIZ+7]; + sprintf(linkname, dev_list == &dev->adj_list.upper ? + "upper_%s" : "lower_%s", name); + sysfs_remove_link(&(dev->dev.kobj), linkname); +} + +#define netdev_adjacent_is_neigh_list(dev, dev_list) \ + (dev_list == &dev->adj_list.upper || \ + dev_list == &dev->adj_list.lower) + static int __netdev_adjacent_dev_insert(struct net_device *dev, struct net_device *adj_dev, struct list_head *dev_list, void *private, bool master) { struct netdev_adjacent *adj; - char linkname[IFNAMSIZ+7]; int ret; adj = __netdev_find_adj(dev, adj_dev, dev_list); @@ -4623,16 +4689,8 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, pr_debug("dev_hold for %s, because of link added from %s to %s\n", adj_dev->name, dev->name, adj_dev->name); - if (dev_list == &dev->adj_list.lower) { - sprintf(linkname, "lower_%s", adj_dev->name); - ret = sysfs_create_link(&(dev->dev.kobj), - &(adj_dev->dev.kobj), linkname); - if (ret) - goto free_adj; - } else if (dev_list == &dev->adj_list.upper) { - sprintf(linkname, "upper_%s", adj_dev->name); - ret = sysfs_create_link(&(dev->dev.kobj), - &(adj_dev->dev.kobj), linkname); + if (netdev_adjacent_is_neigh_list(dev, dev_list)) { + ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list); if (ret) goto free_adj; } @@ -4652,14 +4710,8 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, return 0; remove_symlinks: - if (dev_list == &dev->adj_list.lower) { - sprintf(linkname, "lower_%s", adj_dev->name); - sysfs_remove_link(&(dev->dev.kobj), linkname); - } else if (dev_list == &dev->adj_list.upper) { - sprintf(linkname, "upper_%s", adj_dev->name); - sysfs_remove_link(&(dev->dev.kobj), linkname); - } - + if (netdev_adjacent_is_neigh_list(dev, dev_list)) + netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); free_adj: kfree(adj); dev_put(adj_dev); @@ -4667,12 +4719,11 @@ free_adj: return ret; } -void __netdev_adjacent_dev_remove(struct net_device *dev, - struct net_device *adj_dev, - struct list_head *dev_list) +static void __netdev_adjacent_dev_remove(struct net_device *dev, + struct net_device *adj_dev, + struct list_head *dev_list) { struct netdev_adjacent *adj; - char linkname[IFNAMSIZ+7]; adj = __netdev_find_adj(dev, adj_dev, dev_list); @@ -4692,13 +4743,8 @@ void __netdev_adjacent_dev_remove(struct net_device *dev, if (adj->master) sysfs_remove_link(&(dev->dev.kobj), "master"); - if (dev_list == &dev->adj_list.lower) { - sprintf(linkname, "lower_%s", adj_dev->name); - sysfs_remove_link(&(dev->dev.kobj), linkname); - } else if (dev_list == &dev->adj_list.upper) { - sprintf(linkname, "upper_%s", adj_dev->name); - sysfs_remove_link(&(dev->dev.kobj), linkname); - } + if (netdev_adjacent_is_neigh_list(dev, dev_list)) + netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); list_del_rcu(&adj->list); pr_debug("dev_put for %s, because link removed from %s to %s\n", @@ -4707,11 +4753,11 @@ void __netdev_adjacent_dev_remove(struct net_device *dev, kfree_rcu(adj, rcu); } -int __netdev_adjacent_dev_link_lists(struct net_device *dev, - struct net_device *upper_dev, - struct list_head *up_list, - struct list_head *down_list, - void *private, bool master) +static int __netdev_adjacent_dev_link_lists(struct net_device *dev, + struct net_device *upper_dev, + struct list_head *up_list, + struct list_head *down_list, + void *private, bool master) { int ret; @@ -4730,8 +4776,8 @@ int __netdev_adjacent_dev_link_lists(struct net_device *dev, return 0; } -int __netdev_adjacent_dev_link(struct net_device *dev, - struct net_device *upper_dev) +static int __netdev_adjacent_dev_link(struct net_device *dev, + struct net_device *upper_dev) { return __netdev_adjacent_dev_link_lists(dev, upper_dev, &dev->all_adj_list.upper, @@ -4739,26 +4785,26 @@ int __netdev_adjacent_dev_link(struct net_device *dev, NULL, false); } -void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, - struct net_device *upper_dev, - struct list_head *up_list, - struct list_head *down_list) +static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, + struct net_device *upper_dev, + struct list_head *up_list, + struct list_head *down_list) { __netdev_adjacent_dev_remove(dev, upper_dev, up_list); __netdev_adjacent_dev_remove(upper_dev, dev, down_list); } -void __netdev_adjacent_dev_unlink(struct net_device *dev, - struct net_device *upper_dev) +static void __netdev_adjacent_dev_unlink(struct net_device *dev, + struct net_device *upper_dev) { __netdev_adjacent_dev_unlink_lists(dev, upper_dev, &dev->all_adj_list.upper, &upper_dev->all_adj_list.lower); } -int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, - struct net_device *upper_dev, - void *private, bool master) +static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, + struct net_device *upper_dev, + void *private, bool master) { int ret = __netdev_adjacent_dev_link(dev, upper_dev); @@ -4777,8 +4823,8 @@ int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, return 0; } -void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, - struct net_device *upper_dev) +static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, + struct net_device *upper_dev) { __netdev_adjacent_dev_unlink(dev, upper_dev); __netdev_adjacent_dev_unlink_lists(dev, upper_dev, @@ -4967,20 +5013,24 @@ void netdev_upper_dev_unlink(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_dev_unlink); -void *netdev_lower_dev_get_private_rcu(struct net_device *dev, - struct net_device *lower_dev) +void netdev_adjacent_rename_links(struct net_device *dev, char *oldname) { - struct netdev_adjacent *lower; + struct netdev_adjacent *iter; - if (!lower_dev) - return NULL; - lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower); - if (!lower) - return NULL; + list_for_each_entry(iter, &dev->adj_list.upper, list) { + netdev_adjacent_sysfs_del(iter->dev, oldname, + &iter->dev->adj_list.lower); + netdev_adjacent_sysfs_add(iter->dev, dev, + &iter->dev->adj_list.lower); + } - return lower->private; + list_for_each_entry(iter, &dev->adj_list.lower, list) { + netdev_adjacent_sysfs_del(iter->dev, oldname, + &iter->dev->adj_list.upper); + netdev_adjacent_sysfs_add(iter->dev, dev, + &iter->dev->adj_list.upper); + } } -EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu); void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev) @@ -5314,6 +5364,17 @@ int dev_change_flags(struct net_device *dev, unsigned int flags) } EXPORT_SYMBOL(dev_change_flags); +static int __dev_set_mtu(struct net_device *dev, int new_mtu) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (ops->ndo_change_mtu) + return ops->ndo_change_mtu(dev, new_mtu); + + dev->mtu = new_mtu; + return 0; +} + /** * dev_set_mtu - Change maximum transfer unit * @dev: device @@ -5323,8 +5384,7 @@ EXPORT_SYMBOL(dev_change_flags); */ int dev_set_mtu(struct net_device *dev, int new_mtu) { - const struct net_device_ops *ops = dev->netdev_ops; - int err; + int err, orig_mtu; if (new_mtu == dev->mtu) return 0; @@ -5336,14 +5396,25 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) if (!netif_device_present(dev)) return -ENODEV; - err = 0; - if (ops->ndo_change_mtu) - err = ops->ndo_change_mtu(dev, new_mtu); - else - dev->mtu = new_mtu; + err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev); + err = notifier_to_errno(err); + if (err) + return err; - if (!err) - call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + orig_mtu = dev->mtu; + err = __dev_set_mtu(dev, new_mtu); + + if (!err) { + err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + err = notifier_to_errno(err); + if (err) { + /* setting mtu back and notifying everyone again, + * so that they have a chance to revert changes. + */ + __dev_set_mtu(dev, orig_mtu); + call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + } + } return err; } EXPORT_SYMBOL(dev_set_mtu); @@ -5697,7 +5768,7 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, } EXPORT_SYMBOL(netif_stacked_transfer_operstate); -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS static int netif_alloc_rx_queues(struct net_device *dev) { unsigned int i, count = dev->num_rx_queues; @@ -5836,13 +5907,8 @@ int register_netdevice(struct net_device *dev) dev->features |= NETIF_F_SOFT_FEATURES; dev->wanted_features = dev->features & dev->hw_features; - /* Turn on no cache copy if HW is doing checksum */ if (!(dev->flags & IFF_LOOPBACK)) { dev->hw_features |= NETIF_F_NOCACHE_COPY; - if (dev->features & NETIF_F_ALL_CSUM) { - dev->wanted_features |= NETIF_F_NOCACHE_COPY; - dev->features |= NETIF_F_NOCACHE_COPY; - } } /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. @@ -6247,7 +6313,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, return NULL; } -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS if (rxqs < 1) { pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); return NULL; @@ -6303,7 +6369,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, if (netif_alloc_netdev_queues(dev)) goto free_all; -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS dev->num_rx_queues = rxqs; dev->real_num_rx_queues = rxqs; if (netif_alloc_rx_queues(dev)) @@ -6323,7 +6389,7 @@ free_all: free_pcpu: free_percpu(dev->pcpu_refcnt); netif_free_tx_queues(dev); -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS kfree(dev->_rx); #endif @@ -6348,7 +6414,7 @@ void free_netdev(struct net_device *dev) release_net(dev_net(dev)); netif_free_tx_queues(dev); -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS kfree(dev->_rx); #endif @@ -6618,11 +6684,11 @@ static int dev_cpu_callback(struct notifier_block *nfb, /* Process offline CPU's input_pkt_queue */ while ((skb = __skb_dequeue(&oldsd->process_queue))) { - netif_rx(skb); + netif_rx_internal(skb); input_queue_head_incr(oldsd); } while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { - netif_rx(skb); + netif_rx_internal(skb); input_queue_head_incr(oldsd); } @@ -6935,28 +7001,18 @@ static int __init net_dev_init(void) for_each_possible_cpu(i) { struct softnet_data *sd = &per_cpu(softnet_data, i); - memset(sd, 0, sizeof(*sd)); skb_queue_head_init(&sd->input_pkt_queue); skb_queue_head_init(&sd->process_queue); - sd->completion_queue = NULL; INIT_LIST_HEAD(&sd->poll_list); - sd->output_queue = NULL; sd->output_queue_tailp = &sd->output_queue; #ifdef CONFIG_RPS sd->csd.func = rps_trigger_softirq; sd->csd.info = sd; - sd->csd.flags = 0; sd->cpu = i; #endif sd->backlog.poll = process_backlog; sd->backlog.weight = weight_p; - sd->backlog.gro_list = NULL; - sd->backlog.gro_count = 0; - -#ifdef CONFIG_NET_FLOW_LIMIT - sd->flow_limit = NULL; -#endif } dev_boot_phase = 0; diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index ec40a849fc4..329d5794e7d 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -38,7 +38,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, ha->type = addr_type; ha->refcount = 1; ha->global_use = global; - ha->synced = sync; + ha->synced = sync ? 1 : 0; ha->sync_cnt = 0; list_add_tail_rcu(&ha->list, &list->list); list->count++; @@ -48,7 +48,8 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, - unsigned char addr_type, bool global, bool sync) + unsigned char addr_type, bool global, bool sync, + int sync_count) { struct netdev_hw_addr *ha; @@ -66,10 +67,10 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, ha->global_use = true; } if (sync) { - if (ha->synced) + if (ha->synced && sync_count) return -EEXIST; else - ha->synced = true; + ha->synced++; } ha->refcount++; return 0; @@ -84,7 +85,8 @@ static int __hw_addr_add(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type) { - return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false); + return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false, + 0); } static int __hw_addr_del_entry(struct netdev_hw_addr_list *list, @@ -101,7 +103,7 @@ static int __hw_addr_del_entry(struct netdev_hw_addr_list *list, ha->global_use = false; if (sync) - ha->synced = false; + ha->synced--; if (--ha->refcount) return 0; @@ -139,7 +141,7 @@ static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list, int err; err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type, - false, true); + false, true, ha->sync_cnt); if (err && err != -EEXIST) return err; @@ -186,47 +188,6 @@ static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list, return err; } -int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, - struct netdev_hw_addr_list *from_list, - int addr_len, unsigned char addr_type) -{ - int err; - struct netdev_hw_addr *ha, *ha2; - unsigned char type; - - list_for_each_entry(ha, &from_list->list, list) { - type = addr_type ? addr_type : ha->type; - err = __hw_addr_add(to_list, ha->addr, addr_len, type); - if (err) - goto unroll; - } - return 0; - -unroll: - list_for_each_entry(ha2, &from_list->list, list) { - if (ha2 == ha) - break; - type = addr_type ? addr_type : ha2->type; - __hw_addr_del(to_list, ha2->addr, addr_len, type); - } - return err; -} -EXPORT_SYMBOL(__hw_addr_add_multiple); - -void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, - struct netdev_hw_addr_list *from_list, - int addr_len, unsigned char addr_type) -{ - struct netdev_hw_addr *ha; - unsigned char type; - - list_for_each_entry(ha, &from_list->list, list) { - type = addr_type ? addr_type : ha->type; - __hw_addr_del(to_list, ha->addr, addr_len, type); - } -} -EXPORT_SYMBOL(__hw_addr_del_multiple); - /* This function only works where there is a strict 1-1 relationship * between source and destionation of they synch. If you ever need to * sync addresses to more then 1 destination, you need to use @@ -264,7 +225,7 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, } EXPORT_SYMBOL(__hw_addr_unsync); -void __hw_addr_flush(struct netdev_hw_addr_list *list) +static void __hw_addr_flush(struct netdev_hw_addr_list *list) { struct netdev_hw_addr *ha, *tmp; @@ -274,7 +235,6 @@ void __hw_addr_flush(struct netdev_hw_addr_list *list) } list->count = 0; } -EXPORT_SYMBOL(__hw_addr_flush); void __hw_addr_init(struct netdev_hw_addr_list *list) { @@ -400,59 +360,6 @@ int dev_addr_del(struct net_device *dev, const unsigned char *addr, } EXPORT_SYMBOL(dev_addr_del); -/** - * dev_addr_add_multiple - Add device addresses from another device - * @to_dev: device to which addresses will be added - * @from_dev: device from which addresses will be added - * @addr_type: address type - 0 means type will be used from from_dev - * - * Add device addresses of the one device to another. - ** - * The caller must hold the rtnl_mutex. - */ -int dev_addr_add_multiple(struct net_device *to_dev, - struct net_device *from_dev, - unsigned char addr_type) -{ - int err; - - ASSERT_RTNL(); - - if (from_dev->addr_len != to_dev->addr_len) - return -EINVAL; - err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, - to_dev->addr_len, addr_type); - if (!err) - call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); - return err; -} -EXPORT_SYMBOL(dev_addr_add_multiple); - -/** - * dev_addr_del_multiple - Delete device addresses by another device - * @to_dev: device where the addresses will be deleted - * @from_dev: device supplying the addresses to be deleted - * @addr_type: address type - 0 means type will be used from from_dev - * - * Deletes addresses in to device by the list of addresses in from device. - * - * The caller must hold the rtnl_mutex. - */ -int dev_addr_del_multiple(struct net_device *to_dev, - struct net_device *from_dev, - unsigned char addr_type) -{ - ASSERT_RTNL(); - - if (from_dev->addr_len != to_dev->addr_len) - return -EINVAL; - __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, - to_dev->addr_len, addr_type); - call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); - return 0; -} -EXPORT_SYMBOL(dev_addr_del_multiple); - /* * Unicast list handling functions */ @@ -676,7 +583,7 @@ static int __dev_mc_add(struct net_device *dev, const unsigned char *addr, netif_addr_lock_bh(dev); err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len, - NETDEV_HW_ADDR_T_MULTICAST, global, false); + NETDEV_HW_ADDR_T_MULTICAST, global, false, 0); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 5b7d0e1d066..cf999e09bcd 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -327,6 +327,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) cmd == SIOCBRADDIF || cmd == SIOCBRDELIF || cmd == SIOCSHWTSTAMP || + cmd == SIOCGHWTSTAMP || cmd == SIOCWANDEV) { err = -EOPNOTSUPP; if (ops->ndo_do_ioctl) { @@ -546,6 +547,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) */ default: if (cmd == SIOCWANDEV || + cmd == SIOCGHWTSTAMP || (cmd >= SIOCDEVPRIVATE && cmd <= SIOCDEVPRIVATE + 15)) { dev_load(net, ifr.ifr_name); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2fc5beaf578..87577d44755 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -202,12 +202,12 @@ static __always_inline u32 __flow_hash_1word(u32 a) } /* - * __skb_get_rxhash: calculate a flow hash based on src/dst addresses + * __skb_get_hash: calculate a flow hash based on src/dst addresses * and src/dst port numbers. Sets rxhash in skb to non-zero hash value * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb * if hash is a canonical 4-tuple hash over transport ports. */ -void __skb_get_rxhash(struct sk_buff *skb) +void __skb_get_hash(struct sk_buff *skb) { struct flow_keys keys; u32 hash; @@ -234,7 +234,7 @@ void __skb_get_rxhash(struct sk_buff *skb) skb->rxhash = hash; } -EXPORT_SYMBOL(__skb_get_rxhash); +EXPORT_SYMBOL(__skb_get_hash); /* * Returns a Tx hash based on the given packet descriptor a Tx queues' number diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 932c6d7cf66..b9e9e0d3867 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -38,6 +38,8 @@ #include <linux/random.h> #include <linux/string.h> #include <linux/log2.h> +#include <linux/inetdevice.h> +#include <net/addrconf.h> #define DEBUG #define NEIGH_DEBUG 1 @@ -115,7 +117,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh) unsigned long neigh_rand_reach_time(unsigned long base) { - return base ? (net_random() % base) + (base >> 1) : 0; + return base ? (prandom_u32() % base) + (base >> 1) : 0; } EXPORT_SYMBOL(neigh_rand_reach_time); @@ -497,7 +499,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, goto out_neigh_release; } - n->confirmed = jiffies - (n->parms->base_reachable_time << 1); + n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1); write_lock_bh(&tbl->lock); nht = rcu_dereference_protected(tbl->nht, @@ -776,7 +778,7 @@ static void neigh_periodic_work(struct work_struct *work) tbl->last_rand = jiffies; for (p = &tbl->parms; p; p = p->next) p->reachable_time = - neigh_rand_reach_time(p->base_reachable_time); + neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); } for (i = 0 ; i < (1 << nht->hash_shift); i++) { @@ -799,7 +801,7 @@ static void neigh_periodic_work(struct work_struct *work) if (atomic_read(&n->refcnt) == 1 && (state == NUD_FAILED || - time_after(jiffies, n->used + n->parms->gc_staletime))) { + time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { *np = n->next; n->dead = 1; write_unlock(&n->lock); @@ -822,12 +824,12 @@ next_elt: lockdep_is_held(&tbl->lock)); } out: - /* Cycle through all hash buckets every base_reachable_time/2 ticks. - * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 - * base_reachable_time. + /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks. + * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2 + * BASE_REACHABLE_TIME. */ - schedule_delayed_work(&tbl->gc_work, - tbl->parms.base_reachable_time >> 1); + queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, + NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1); write_unlock_bh(&tbl->lock); } @@ -835,8 +837,9 @@ static __inline__ int neigh_max_probes(struct neighbour *n) { struct neigh_parms *p = n->parms; return (n->nud_state & NUD_PROBE) ? - p->ucast_probes : - p->ucast_probes + p->app_probes + p->mcast_probes; + NEIGH_VAR(p, UCAST_PROBES) : + NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) + + NEIGH_VAR(p, MCAST_PROBES); } static void neigh_invalidate(struct neighbour *neigh) @@ -901,12 +904,13 @@ static void neigh_timer_handler(unsigned long arg) neigh_dbg(2, "neigh %p is still alive\n", neigh); next = neigh->confirmed + neigh->parms->reachable_time; } else if (time_before_eq(now, - neigh->used + neigh->parms->delay_probe_time)) { + neigh->used + + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { neigh_dbg(2, "neigh %p is delayed\n", neigh); neigh->nud_state = NUD_DELAY; neigh->updated = jiffies; neigh_suspect(neigh); - next = now + neigh->parms->delay_probe_time; + next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME); } else { neigh_dbg(2, "neigh %p is suspected\n", neigh); neigh->nud_state = NUD_STALE; @@ -916,7 +920,8 @@ static void neigh_timer_handler(unsigned long arg) } } else if (state & NUD_DELAY) { if (time_before_eq(now, - neigh->confirmed + neigh->parms->delay_probe_time)) { + neigh->confirmed + + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { neigh_dbg(2, "neigh %p is now reachable\n", neigh); neigh->nud_state = NUD_REACHABLE; neigh->updated = jiffies; @@ -928,11 +933,11 @@ static void neigh_timer_handler(unsigned long arg) neigh->nud_state = NUD_PROBE; neigh->updated = jiffies; atomic_set(&neigh->probes, 0); - next = now + neigh->parms->retrans_time; + next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); } } else { /* NUD_PROBE|NUD_INCOMPLETE */ - next = now + neigh->parms->retrans_time; + next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); } if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && @@ -973,13 +978,16 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) goto out_unlock_bh; if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { - if (neigh->parms->mcast_probes + neigh->parms->app_probes) { + if (NEIGH_VAR(neigh->parms, MCAST_PROBES) + + NEIGH_VAR(neigh->parms, APP_PROBES)) { unsigned long next, now = jiffies; - atomic_set(&neigh->probes, neigh->parms->ucast_probes); + atomic_set(&neigh->probes, + NEIGH_VAR(neigh->parms, UCAST_PROBES)); neigh->nud_state = NUD_INCOMPLETE; neigh->updated = now; - next = now + max(neigh->parms->retrans_time, HZ/2); + next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), + HZ/2); neigh_add_timer(neigh, next); immediate_probe = true; } else { @@ -994,14 +1002,14 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) neigh_dbg(2, "neigh %p is delayed\n", neigh); neigh->nud_state = NUD_DELAY; neigh->updated = jiffies; - neigh_add_timer(neigh, - jiffies + neigh->parms->delay_probe_time); + neigh_add_timer(neigh, jiffies + + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME)); } if (neigh->nud_state == NUD_INCOMPLETE) { if (skb) { while (neigh->arp_queue_len_bytes + skb->truesize > - neigh->parms->queue_len_bytes) { + NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) { struct sk_buff *buff; buff = __skb_dequeue(&neigh->arp_queue); @@ -1171,7 +1179,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, neigh_update_hhs(neigh); if (!(new & NUD_CONNECTED)) neigh->confirmed = jiffies - - (neigh->parms->base_reachable_time << 1); + (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1); notify = 1; } if (new == old) @@ -1231,6 +1239,21 @@ out: } EXPORT_SYMBOL(neigh_update); +/* Update the neigh to listen temporarily for probe responses, even if it is + * in a NUD_FAILED state. The caller has to hold neigh->lock for writing. + */ +void __neigh_set_probe_once(struct neighbour *neigh) +{ + neigh->updated = jiffies; + if (!(neigh->nud_state & NUD_FAILED)) + return; + neigh->nud_state = NUD_PROBE; + atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES)); + neigh_add_timer(neigh, + jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME)); +} +EXPORT_SYMBOL(__neigh_set_probe_once); + struct neighbour *neigh_event_ns(struct neigh_table *tbl, u8 *lladdr, void *saddr, struct net_device *dev) @@ -1392,9 +1415,11 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct sk_buff *skb) { unsigned long now = jiffies; - unsigned long sched_next = now + (net_random() % p->proxy_delay); - if (tbl->proxy_queue.qlen > p->proxy_qlen) { + unsigned long sched_next = now + (prandom_u32() % + NEIGH_VAR(p, PROXY_DELAY)); + + if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) { kfree_skb(skb); return; } @@ -1441,7 +1466,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, p->tbl = tbl; atomic_set(&p->refcnt, 1); p->reachable_time = - neigh_rand_reach_time(p->base_reachable_time); + neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); dev_hold(dev); p->dev = dev; write_pnet(&p->net, hold_net(net)); @@ -1458,6 +1483,8 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, p->next = tbl->parms.next; tbl->parms.next = p; write_unlock_bh(&tbl->lock); + + neigh_parms_data_state_cleanall(p); } return p; } @@ -1510,7 +1537,7 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl) write_pnet(&tbl->parms.net, &init_net); atomic_set(&tbl->parms.refcnt, 1); tbl->parms.reachable_time = - neigh_rand_reach_time(tbl->parms.base_reachable_time); + neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME)); tbl->stats = alloc_percpu(struct neigh_statistics); if (!tbl->stats) @@ -1538,7 +1565,8 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl) rwlock_init(&tbl->lock); INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); - schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time); + queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, + tbl->parms.reachable_time); setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl); skb_queue_head_init_class(&tbl->proxy_queue, &neigh_table_proxy_queue_class); @@ -1778,24 +1806,32 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) if ((parms->dev && nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) || nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) || - nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) || + nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, + NEIGH_VAR(parms, QUEUE_LEN_BYTES)) || /* approximative value for deprecated QUEUE_LEN (in packets) */ nla_put_u32(skb, NDTPA_QUEUE_LEN, - parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) || - nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) || - nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) || - nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) || - nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) || + NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) || + nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) || + nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) || + nla_put_u32(skb, NDTPA_UCAST_PROBES, + NEIGH_VAR(parms, UCAST_PROBES)) || + nla_put_u32(skb, NDTPA_MCAST_PROBES, + NEIGH_VAR(parms, MCAST_PROBES)) || nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) || nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, - parms->base_reachable_time) || - nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) || + NEIGH_VAR(parms, BASE_REACHABLE_TIME)) || + nla_put_msecs(skb, NDTPA_GC_STALETIME, + NEIGH_VAR(parms, GC_STALETIME)) || nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME, - parms->delay_probe_time) || - nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) || - nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) || - nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) || - nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime)) + NEIGH_VAR(parms, DELAY_PROBE_TIME)) || + nla_put_msecs(skb, NDTPA_RETRANS_TIME, + NEIGH_VAR(parms, RETRANS_TIME)) || + nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, + NEIGH_VAR(parms, ANYCAST_DELAY)) || + nla_put_msecs(skb, NDTPA_PROXY_DELAY, + NEIGH_VAR(parms, PROXY_DELAY)) || + nla_put_msecs(skb, NDTPA_LOCKTIME, + NEIGH_VAR(parms, LOCKTIME))) goto nla_put_failure; return nla_nest_end(skb, nest); @@ -2011,44 +2047,57 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) switch (i) { case NDTPA_QUEUE_LEN: - p->queue_len_bytes = nla_get_u32(tbp[i]) * - SKB_TRUESIZE(ETH_FRAME_LEN); + NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, + nla_get_u32(tbp[i]) * + SKB_TRUESIZE(ETH_FRAME_LEN)); break; case NDTPA_QUEUE_LENBYTES: - p->queue_len_bytes = nla_get_u32(tbp[i]); + NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, + nla_get_u32(tbp[i])); break; case NDTPA_PROXY_QLEN: - p->proxy_qlen = nla_get_u32(tbp[i]); + NEIGH_VAR_SET(p, PROXY_QLEN, + nla_get_u32(tbp[i])); break; case NDTPA_APP_PROBES: - p->app_probes = nla_get_u32(tbp[i]); + NEIGH_VAR_SET(p, APP_PROBES, + nla_get_u32(tbp[i])); break; case NDTPA_UCAST_PROBES: - p->ucast_probes = nla_get_u32(tbp[i]); + NEIGH_VAR_SET(p, UCAST_PROBES, + nla_get_u32(tbp[i])); break; case NDTPA_MCAST_PROBES: - p->mcast_probes = nla_get_u32(tbp[i]); + NEIGH_VAR_SET(p, MCAST_PROBES, + nla_get_u32(tbp[i])); break; case NDTPA_BASE_REACHABLE_TIME: - p->base_reachable_time = nla_get_msecs(tbp[i]); + NEIGH_VAR_SET(p, BASE_REACHABLE_TIME, + nla_get_msecs(tbp[i])); break; case NDTPA_GC_STALETIME: - p->gc_staletime = nla_get_msecs(tbp[i]); + NEIGH_VAR_SET(p, GC_STALETIME, + nla_get_msecs(tbp[i])); break; case NDTPA_DELAY_PROBE_TIME: - p->delay_probe_time = nla_get_msecs(tbp[i]); + NEIGH_VAR_SET(p, DELAY_PROBE_TIME, + nla_get_msecs(tbp[i])); break; case NDTPA_RETRANS_TIME: - p->retrans_time = nla_get_msecs(tbp[i]); + NEIGH_VAR_SET(p, RETRANS_TIME, + nla_get_msecs(tbp[i])); break; case NDTPA_ANYCAST_DELAY: - p->anycast_delay = nla_get_msecs(tbp[i]); + NEIGH_VAR_SET(p, ANYCAST_DELAY, + nla_get_msecs(tbp[i])); break; case NDTPA_PROXY_DELAY: - p->proxy_delay = nla_get_msecs(tbp[i]); + NEIGH_VAR_SET(p, PROXY_DELAY, + nla_get_msecs(tbp[i])); break; case NDTPA_LOCKTIME: - p->locktime = nla_get_msecs(tbp[i]); + NEIGH_VAR_SET(p, LOCKTIME, + nla_get_msecs(tbp[i])); break; } } @@ -2789,133 +2838,167 @@ static int proc_unres_qlen(struct ctl_table *ctl, int write, return ret; } -enum { - NEIGH_VAR_MCAST_PROBE, - NEIGH_VAR_UCAST_PROBE, - NEIGH_VAR_APP_PROBE, - NEIGH_VAR_RETRANS_TIME, - NEIGH_VAR_BASE_REACHABLE_TIME, - NEIGH_VAR_DELAY_PROBE_TIME, - NEIGH_VAR_GC_STALETIME, - NEIGH_VAR_QUEUE_LEN, - NEIGH_VAR_QUEUE_LEN_BYTES, - NEIGH_VAR_PROXY_QLEN, - NEIGH_VAR_ANYCAST_DELAY, - NEIGH_VAR_PROXY_DELAY, - NEIGH_VAR_LOCKTIME, - NEIGH_VAR_RETRANS_TIME_MS, - NEIGH_VAR_BASE_REACHABLE_TIME_MS, - NEIGH_VAR_GC_INTERVAL, - NEIGH_VAR_GC_THRESH1, - NEIGH_VAR_GC_THRESH2, - NEIGH_VAR_GC_THRESH3, - NEIGH_VAR_MAX -}; +static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, + int family) +{ + switch (family) { + case AF_INET: + return __in_dev_arp_parms_get_rcu(dev); + case AF_INET6: + return __in6_dev_nd_parms_get_rcu(dev); + } + return NULL; +} + +static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p, + int index) +{ + struct net_device *dev; + int family = neigh_parms_family(p); + + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + struct neigh_parms *dst_p = + neigh_get_dev_parms_rcu(dev, family); + + if (dst_p && !test_bit(index, dst_p->data_state)) + dst_p->data[index] = p->data[index]; + } + rcu_read_unlock(); +} + +static void neigh_proc_update(struct ctl_table *ctl, int write) +{ + struct net_device *dev = ctl->extra1; + struct neigh_parms *p = ctl->extra2; + struct net *net = neigh_parms_net(p); + int index = (int *) ctl->data - p->data; + + if (!write) + return; + + set_bit(index, p->data_state); + if (!dev) /* NULL dev means this is default value */ + neigh_copy_dflt_parms(net, p, index); +} + +static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + struct ctl_table tmp = *ctl; + int ret; + + tmp.extra1 = &zero; + tmp.extra2 = &int_max; + + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + neigh_proc_update(ctl, write); + return ret; +} + +int neigh_proc_dointvec(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + + neigh_proc_update(ctl, write); + return ret; +} +EXPORT_SYMBOL(neigh_proc_dointvec); + +int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); + + neigh_proc_update(ctl, write); + return ret; +} +EXPORT_SYMBOL(neigh_proc_dointvec_jiffies); + +static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos); + + neigh_proc_update(ctl, write); + return ret; +} + +int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); + + neigh_proc_update(ctl, write); + return ret; +} +EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies); + +static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos); + + neigh_proc_update(ctl, write); + return ret; +} + +#define NEIGH_PARMS_DATA_OFFSET(index) \ + (&((struct neigh_parms *) 0)->data[index]) + +#define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \ + [NEIGH_VAR_ ## attr] = { \ + .procname = name, \ + .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \ + .maxlen = sizeof(int), \ + .mode = mval, \ + .proc_handler = proc, \ + } + +#define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \ + NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax) + +#define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \ + NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies) + +#define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \ + NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies) + +#define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \ + NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies) + +#define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \ + NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies) + +#define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \ + NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen) static struct neigh_sysctl_table { struct ctl_table_header *sysctl_header; struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; } neigh_sysctl_template __read_mostly = { .neigh_vars = { - [NEIGH_VAR_MCAST_PROBE] = { - .procname = "mcast_solicit", - .maxlen = sizeof(int), - .mode = 0644, - .extra1 = &zero, - .extra2 = &int_max, - .proc_handler = proc_dointvec_minmax, - }, - [NEIGH_VAR_UCAST_PROBE] = { - .procname = "ucast_solicit", - .maxlen = sizeof(int), - .mode = 0644, - .extra1 = &zero, - .extra2 = &int_max, - .proc_handler = proc_dointvec_minmax, - }, - [NEIGH_VAR_APP_PROBE] = { - .procname = "app_solicit", - .maxlen = sizeof(int), - .mode = 0644, - .extra1 = &zero, - .extra2 = &int_max, - .proc_handler = proc_dointvec_minmax, - }, - [NEIGH_VAR_RETRANS_TIME] = { - .procname = "retrans_time", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_userhz_jiffies, - }, - [NEIGH_VAR_BASE_REACHABLE_TIME] = { - .procname = "base_reachable_time", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - [NEIGH_VAR_DELAY_PROBE_TIME] = { - .procname = "delay_first_probe_time", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - [NEIGH_VAR_GC_STALETIME] = { - .procname = "gc_stale_time", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - [NEIGH_VAR_QUEUE_LEN] = { - .procname = "unres_qlen", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_unres_qlen, - }, - [NEIGH_VAR_QUEUE_LEN_BYTES] = { - .procname = "unres_qlen_bytes", - .maxlen = sizeof(int), - .mode = 0644, - .extra1 = &zero, - .proc_handler = proc_dointvec_minmax, - }, - [NEIGH_VAR_PROXY_QLEN] = { - .procname = "proxy_qlen", - .maxlen = sizeof(int), - .mode = 0644, - .extra1 = &zero, - .extra2 = &int_max, - .proc_handler = proc_dointvec_minmax, - }, - [NEIGH_VAR_ANYCAST_DELAY] = { - .procname = "anycast_delay", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_userhz_jiffies, - }, - [NEIGH_VAR_PROXY_DELAY] = { - .procname = "proxy_delay", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_userhz_jiffies, - }, - [NEIGH_VAR_LOCKTIME] = { - .procname = "locktime", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_userhz_jiffies, - }, - [NEIGH_VAR_RETRANS_TIME_MS] = { - .procname = "retrans_time_ms", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, - }, - [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = { - .procname = "base_reachable_time_ms", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, - }, + NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"), + NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"), + NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"), + NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"), + NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"), + NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"), + NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"), + NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"), + NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"), + NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"), + NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"), + NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"), + NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"), + NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"), + NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"), [NEIGH_VAR_GC_INTERVAL] = { .procname = "gc_interval", .maxlen = sizeof(int), @@ -2951,31 +3034,23 @@ static struct neigh_sysctl_table { }; int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, - char *p_name, proc_handler *handler) + proc_handler *handler) { + int i; struct neigh_sysctl_table *t; - const char *dev_name_source = NULL; + const char *dev_name_source; char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ]; + char *p_name; t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL); if (!t) goto err; - t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data = &p->mcast_probes; - t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data = &p->ucast_probes; - t->neigh_vars[NEIGH_VAR_APP_PROBE].data = &p->app_probes; - t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data = &p->retrans_time; - t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data = &p->base_reachable_time; - t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data = &p->delay_probe_time; - t->neigh_vars[NEIGH_VAR_GC_STALETIME].data = &p->gc_staletime; - t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data = &p->queue_len_bytes; - t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data = &p->queue_len_bytes; - t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data = &p->proxy_qlen; - t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data = &p->anycast_delay; - t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay; - t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime; - t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data = &p->retrans_time; - t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data = &p->base_reachable_time; + for (i = 0; i < ARRAY_SIZE(t->neigh_vars); i++) { + t->neigh_vars[i].data += (long) p; + t->neigh_vars[i].extra1 = dev; + t->neigh_vars[i].extra2 = p; + } if (dev) { dev_name_source = dev->name; @@ -2990,26 +3065,32 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3; } - if (handler) { /* RetransTime */ t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler; - t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev; /* ReachableTime */ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler; - t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev; /* RetransTime (in milliseconds)*/ t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; - t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev; /* ReachableTime (in milliseconds) */ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; - t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev; } /* Don't export sysctls to unprivileged users */ if (neigh_parms_net(p)->user_ns != &init_user_ns) t->neigh_vars[0].procname = NULL; + switch (neigh_parms_family(p)) { + case AF_INET: + p_name = "ipv4"; + break; + case AF_INET6: + p_name = "ipv6"; + break; + default: + BUG(); + } + snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", p_name, dev_name_source); t->sysctl_header = diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f3edf9635e0..93886246a0b 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -498,17 +498,7 @@ static struct attribute_group wireless_group = { #define net_class_groups NULL #endif /* CONFIG_SYSFS */ -#ifdef CONFIG_RPS -/* - * RX queue sysfs structures and functions. - */ -struct rx_queue_attribute { - struct attribute attr; - ssize_t (*show)(struct netdev_rx_queue *queue, - struct rx_queue_attribute *attr, char *buf); - ssize_t (*store)(struct netdev_rx_queue *queue, - struct rx_queue_attribute *attr, const char *buf, size_t len); -}; +#ifdef CONFIG_SYSFS #define to_rx_queue_attr(_attr) container_of(_attr, \ struct rx_queue_attribute, attr) @@ -543,6 +533,7 @@ static const struct sysfs_ops rx_queue_sysfs_ops = { .store = rx_queue_attr_store, }; +#ifdef CONFIG_RPS static ssize_t show_rps_map(struct netdev_rx_queue *queue, struct rx_queue_attribute *attribute, char *buf) { @@ -676,8 +667,8 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, while ((mask | (mask >> 1)) != mask) mask |= (mask >> 1); /* On 64 bit arches, must check mask fits in table->mask (u32), - * and on 32bit arches, must check RPS_DEV_FLOW_TABLE_SIZE(mask + 1) - * doesnt overflow. + * and on 32bit arches, must check + * RPS_DEV_FLOW_TABLE_SIZE(mask + 1) doesn't overflow. */ #if BITS_PER_LONG > 32 if (mask > (unsigned long)(u32)mask) @@ -718,16 +709,20 @@ static struct rx_queue_attribute rps_cpus_attribute = static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute = __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR, show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt); +#endif /* CONFIG_RPS */ static struct attribute *rx_queue_default_attrs[] = { +#ifdef CONFIG_RPS &rps_cpus_attribute.attr, &rps_dev_flow_table_cnt_attribute.attr, +#endif NULL }; static void rx_queue_release(struct kobject *kobj) { struct netdev_rx_queue *queue = to_rx_queue(kobj); +#ifdef CONFIG_RPS struct rps_map *map; struct rps_dev_flow_table *flow_table; @@ -743,15 +738,29 @@ static void rx_queue_release(struct kobject *kobj) RCU_INIT_POINTER(queue->rps_flow_table, NULL); call_rcu(&flow_table->rcu, rps_dev_flow_table_release); } +#endif memset(kobj, 0, sizeof(*kobj)); dev_put(queue->dev); } +static const void *rx_queue_namespace(struct kobject *kobj) +{ + struct netdev_rx_queue *queue = to_rx_queue(kobj); + struct device *dev = &queue->dev->dev; + const void *ns = NULL; + + if (dev->class && dev->class->ns_type) + ns = dev->class->namespace(dev); + + return ns; +} + static struct kobj_type rx_queue_ktype = { .sysfs_ops = &rx_queue_sysfs_ops, .release = rx_queue_release, .default_attrs = rx_queue_default_attrs, + .namespace = rx_queue_namespace }; static int rx_queue_add_kobject(struct net_device *net, int index) @@ -763,25 +772,36 @@ static int rx_queue_add_kobject(struct net_device *net, int index) kobj->kset = net->queues_kset; error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, "rx-%u", index); - if (error) { - kobject_put(kobj); - return error; + if (error) + goto exit; + + if (net->sysfs_rx_queue_group) { + error = sysfs_create_group(kobj, net->sysfs_rx_queue_group); + if (error) + goto exit; } kobject_uevent(kobj, KOBJ_ADD); dev_hold(queue->dev); return error; +exit: + kobject_put(kobj); + return error; } -#endif /* CONFIG_RPS */ +#endif /* CONFIG_SYFS */ int net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) { -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS int i; int error = 0; +#ifndef CONFIG_RPS + if (!net->sysfs_rx_queue_group) + return 0; +#endif for (i = old_num; i < new_num; i++) { error = rx_queue_add_kobject(net, i); if (error) { @@ -790,8 +810,12 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) } } - while (--i >= new_num) + while (--i >= new_num) { + if (net->sysfs_rx_queue_group) + sysfs_remove_group(&net->_rx[i].kobj, + net->sysfs_rx_queue_group); kobject_put(&net->_rx[i].kobj); + } return error; #else @@ -1082,10 +1106,23 @@ static void netdev_queue_release(struct kobject *kobj) dev_put(queue->dev); } +static const void *netdev_queue_namespace(struct kobject *kobj) +{ + struct netdev_queue *queue = to_netdev_queue(kobj); + struct device *dev = &queue->dev->dev; + const void *ns = NULL; + + if (dev->class && dev->class->ns_type) + ns = dev->class->namespace(dev); + + return ns; +} + static struct kobj_type netdev_queue_ktype = { .sysfs_ops = &netdev_queue_sysfs_ops, .release = netdev_queue_release, .default_attrs = netdev_queue_default_attrs, + .namespace = netdev_queue_namespace, }; static int netdev_queue_add_kobject(struct net_device *net, int index) @@ -1155,9 +1192,6 @@ static int register_queue_kobjects(struct net_device *net) NULL, &net->dev.kobj); if (!net->queues_kset) return -ENOMEM; -#endif - -#ifdef CONFIG_RPS real_rx = net->real_num_rx_queues; #endif real_tx = net->real_num_tx_queues; @@ -1184,7 +1218,7 @@ static void remove_queue_kobjects(struct net_device *net) { int real_rx = 0, real_tx = 0; -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS real_rx = net->real_num_rx_queues; #endif real_tx = net->real_num_tx_queues; @@ -1358,7 +1392,7 @@ void netdev_class_remove_file_ns(struct class_attribute *class_attr, } EXPORT_SYMBOL(netdev_class_remove_file_ns); -int netdev_kobject_init(void) +int __init netdev_kobject_init(void) { kobj_ns_type_register(&net_ns_type_operations); return class_register(&net_class); diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h index bd7751ec1c4..2745a1b51e0 100644 --- a/net/core/net-sysfs.h +++ b/net/core/net-sysfs.h @@ -1,7 +1,7 @@ #ifndef __NET_SYSFS_H__ #define __NET_SYSFS_H__ -int netdev_kobject_init(void); +int __init netdev_kobject_init(void); int netdev_register_kobject(struct net_device *); void netdev_unregister_kobject(struct net_device *); int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num); diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c new file mode 100644 index 00000000000..719efd54166 --- /dev/null +++ b/net/core/netclassid_cgroup.c @@ -0,0 +1,120 @@ +/* + * net/core/netclassid_cgroup.c Classid Cgroupfs Handling + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf <tgraf@suug.ch> + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/cgroup.h> +#include <linux/fdtable.h> +#include <net/cls_cgroup.h> +#include <net/sock.h> + +static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css) +{ + return css ? container_of(css, struct cgroup_cls_state, css) : NULL; +} + +struct cgroup_cls_state *task_cls_state(struct task_struct *p) +{ + return css_cls_state(task_css(p, net_cls_subsys_id)); +} +EXPORT_SYMBOL_GPL(task_cls_state); + +static struct cgroup_subsys_state * +cgrp_css_alloc(struct cgroup_subsys_state *parent_css) +{ + struct cgroup_cls_state *cs; + + cs = kzalloc(sizeof(*cs), GFP_KERNEL); + if (!cs) + return ERR_PTR(-ENOMEM); + + return &cs->css; +} + +static int cgrp_css_online(struct cgroup_subsys_state *css) +{ + struct cgroup_cls_state *cs = css_cls_state(css); + struct cgroup_cls_state *parent = css_cls_state(css_parent(css)); + + if (parent) + cs->classid = parent->classid; + + return 0; +} + +static void cgrp_css_free(struct cgroup_subsys_state *css) +{ + kfree(css_cls_state(css)); +} + +static int update_classid(const void *v, struct file *file, unsigned n) +{ + int err; + struct socket *sock = sock_from_file(file, &err); + + if (sock) + sock->sk->sk_classid = (u32)(unsigned long)v; + + return 0; +} + +static void cgrp_attach(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset) +{ + struct cgroup_cls_state *cs = css_cls_state(css); + void *v = (void *)(unsigned long)cs->classid; + struct task_struct *p; + + cgroup_taskset_for_each(p, css, tset) { + task_lock(p); + iterate_fd(p->files, 0, update_classid, v); + task_unlock(p); + } +} + +static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft) +{ + return css_cls_state(css)->classid; +} + +static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, + u64 value) +{ + css_cls_state(css)->classid = (u32) value; + + return 0; +} + +static struct cftype ss_files[] = { + { + .name = "classid", + .read_u64 = read_classid, + .write_u64 = write_classid, + }, + { } /* terminate */ +}; + +struct cgroup_subsys net_cls_subsys = { + .name = "net_cls", + .css_alloc = cgrp_css_alloc, + .css_online = cgrp_css_online, + .css_free = cgrp_css_free, + .attach = cgrp_attach, + .subsys_id = net_cls_subsys_id, + .base_cftypes = ss_files, + .module = THIS_MODULE, +}; + +static int __init init_netclassid_cgroup(void) +{ + return cgroup_load_subsys(&net_cls_subsys); +} +__initcall(init_netclassid_cgroup); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 19fe9c717ce..c03f3dec476 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -520,8 +520,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) skb->protocol = eth->h_proto = htons(ETH_P_IP); } - memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN); - memcpy(eth->h_dest, np->remote_mac, ETH_ALEN); + ether_addr_copy(eth->h_source, np->dev->dev_addr); + ether_addr_copy(eth->h_dest, np->remote_mac); skb->dev = np->dev; diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 56cbb69ba02..9043caedcd0 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -30,7 +30,7 @@ #define PRIOMAP_MIN_SZ 128 /* - * Extend @dev->priomap so that it's large enough to accomodate + * Extend @dev->priomap so that it's large enough to accommodate * @target_idx. @dev->priomap.priomap_len > @target_idx after successful * return. Must be called under rtnl lock. */ diff --git a/net/core/pktgen.c b/net/core/pktgen.c index a797fff7f22..fdac61cac1b 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -389,6 +389,9 @@ struct pktgen_dev { #ifdef CONFIG_XFRM __u8 ipsmode; /* IPSEC mode (config) */ __u8 ipsproto; /* IPSEC type (config) */ + __u32 spi; + struct dst_entry dst; + struct dst_ops dstops; #endif char result[512]; }; @@ -654,8 +657,11 @@ static int pktgen_if_show(struct seq_file *seq, void *v) } #ifdef CONFIG_XFRM - if (pkt_dev->flags & F_IPSEC_ON) + if (pkt_dev->flags & F_IPSEC_ON) { seq_printf(seq, "IPSEC "); + if (pkt_dev->spi) + seq_printf(seq, "spi:%u", pkt_dev->spi); + } #endif if (pkt_dev->flags & F_MACSRC_RND) @@ -1434,7 +1440,7 @@ static ssize_t pktgen_if_write(struct file *file, if (!mac_pton(valstr, pkt_dev->dst_mac)) return -EINVAL; /* Set up Dest MAC */ - memcpy(&pkt_dev->hh[0], pkt_dev->dst_mac, ETH_ALEN); + ether_addr_copy(&pkt_dev->hh[0], pkt_dev->dst_mac); sprintf(pg_result, "OK: dstmac %pM", pkt_dev->dst_mac); return count; @@ -1451,7 +1457,7 @@ static ssize_t pktgen_if_write(struct file *file, if (!mac_pton(valstr, pkt_dev->src_mac)) return -EINVAL; /* Set up Src MAC */ - memcpy(&pkt_dev->hh[6], pkt_dev->src_mac, ETH_ALEN); + ether_addr_copy(&pkt_dev->hh[6], pkt_dev->src_mac); sprintf(pg_result, "OK: srcmac %pM", pkt_dev->src_mac); return count; @@ -1476,7 +1482,18 @@ static ssize_t pktgen_if_write(struct file *file, sprintf(pg_result, "OK: flows=%u", pkt_dev->cflows); return count; } +#ifdef CONFIG_XFRM + if (!strcmp(name, "spi")) { + len = num_arg(&user_buffer[i], 10, &value); + if (len < 0) + return len; + i += len; + pkt_dev->spi = value; + sprintf(pg_result, "OK: spi=%u", pkt_dev->spi); + return count; + } +#endif if (!strcmp(name, "flowlen")) { len = num_arg(&user_buffer[i], 10, &value); if (len < 0) @@ -2043,10 +2060,10 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) /* Default to the interface's mac if not explicitly set. */ if (is_zero_ether_addr(pkt_dev->src_mac)) - memcpy(&(pkt_dev->hh[6]), pkt_dev->odev->dev_addr, ETH_ALEN); + ether_addr_copy(&(pkt_dev->hh[6]), pkt_dev->odev->dev_addr); /* Set up Dest MAC */ - memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, ETH_ALEN); + ether_addr_copy(&(pkt_dev->hh[0]), pkt_dev->dst_mac); if (pkt_dev->flags & F_IPV6) { int i, set = 0, err = 1; @@ -2233,13 +2250,21 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) struct xfrm_state *x = pkt_dev->flows[flow].x; struct pktgen_net *pn = net_generic(dev_net(pkt_dev->odev), pg_net_id); if (!x) { - /*slow path: we dont already have xfrm_state*/ - x = xfrm_stateonly_find(pn->net, DUMMY_MARK, - (xfrm_address_t *)&pkt_dev->cur_daddr, - (xfrm_address_t *)&pkt_dev->cur_saddr, - AF_INET, - pkt_dev->ipsmode, - pkt_dev->ipsproto, 0); + + if (pkt_dev->spi) { + /* We need as quick as possible to find the right SA + * Searching with minimum criteria to archieve this. + */ + x = xfrm_state_lookup_byspi(pn->net, htonl(pkt_dev->spi), AF_INET); + } else { + /* slow path: we dont already have xfrm_state */ + x = xfrm_stateonly_find(pn->net, DUMMY_MARK, + (xfrm_address_t *)&pkt_dev->cur_daddr, + (xfrm_address_t *)&pkt_dev->cur_saddr, + AF_INET, + pkt_dev->ipsmode, + pkt_dev->ipsproto, 0); + } if (x) { pkt_dev->flows[flow].x = x; set_pkt_overhead(pkt_dev); @@ -2475,31 +2500,47 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) #ifdef CONFIG_XFRM +static u32 pktgen_dst_metrics[RTAX_MAX + 1] = { + + [RTAX_HOPLIMIT] = 0x5, /* Set a static hoplimit */ +}; + static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) { struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; int err = 0; + struct net *net = dev_net(pkt_dev->odev); if (!x) return 0; /* XXX: we dont support tunnel mode for now until * we resolve the dst issue */ - if (x->props.mode != XFRM_MODE_TRANSPORT) + if ((x->props.mode != XFRM_MODE_TRANSPORT) && (pkt_dev->spi == 0)) return 0; - spin_lock(&x->lock); + /* But when user specify an valid SPI, transformation + * supports both transport/tunnel mode + ESP/AH type. + */ + if ((x->props.mode == XFRM_MODE_TUNNEL) && (pkt_dev->spi != 0)) + skb->_skb_refdst = (unsigned long)&pkt_dev->dst | SKB_DST_NOREF; + rcu_read_lock_bh(); err = x->outer_mode->output(x, skb); - if (err) + rcu_read_unlock_bh(); + if (err) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); goto error; + } err = x->type->output(x, skb); - if (err) + if (err) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEPROTOERROR); goto error; - + } + spin_lock_bh(&x->lock); x->curlft.bytes += skb->len; x->curlft.packets++; + spin_unlock_bh(&x->lock); error: - spin_unlock(&x->lock); return err; } @@ -3542,6 +3583,17 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) #ifdef CONFIG_XFRM pkt_dev->ipsmode = XFRM_MODE_TRANSPORT; pkt_dev->ipsproto = IPPROTO_ESP; + + /* xfrm tunnel mode needs additional dst to extract outter + * ip header protocol/ttl/id field, here creat a phony one. + * instead of looking for a valid rt, which definitely hurting + * performance under such circumstance. + */ + pkt_dev->dstops.family = AF_INET; + pkt_dev->dst.dev = pkt_dev->odev; + dst_init_metrics(&pkt_dev->dst, pktgen_dst_metrics, false); + pkt_dev->dst.child = &pkt_dev->dst; + pkt_dev->dst.ops = &pkt_dev->dstops; #endif return add_dev_to_thread(t, pkt_dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index cf67144d3e3..393b1bc9a61 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -365,6 +365,22 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops) } EXPORT_SYMBOL_GPL(rtnl_link_unregister); +static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev) +{ + struct net_device *master_dev; + const struct rtnl_link_ops *ops; + + master_dev = netdev_master_upper_dev_get((struct net_device *) dev); + if (!master_dev) + return 0; + ops = master_dev->rtnl_link_ops; + if (!ops->get_slave_size) + return 0; + /* IFLA_INFO_SLAVE_DATA + nested data */ + return nla_total_size(sizeof(struct nlattr)) + + ops->get_slave_size(master_dev, dev); +} + static size_t rtnl_link_get_size(const struct net_device *dev) { const struct rtnl_link_ops *ops = dev->rtnl_link_ops; @@ -385,6 +401,8 @@ static size_t rtnl_link_get_size(const struct net_device *dev) /* IFLA_INFO_XSTATS */ size += nla_total_size(ops->get_xstats_size(dev)); + size += rtnl_link_get_slave_info_data_size(dev); + return size; } @@ -403,34 +421,16 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family) } /** - * __rtnl_af_register - Register rtnl_af_ops with rtnetlink. - * @ops: struct rtnl_af_ops * to register - * - * The caller must hold the rtnl_mutex. - * - * Returns 0 on success or a negative error code. - */ -int __rtnl_af_register(struct rtnl_af_ops *ops) -{ - list_add_tail(&ops->list, &rtnl_af_ops); - return 0; -} -EXPORT_SYMBOL_GPL(__rtnl_af_register); - -/** * rtnl_af_register - Register rtnl_af_ops with rtnetlink. * @ops: struct rtnl_af_ops * to register * * Returns 0 on success or a negative error code. */ -int rtnl_af_register(struct rtnl_af_ops *ops) +void rtnl_af_register(struct rtnl_af_ops *ops) { - int err; - rtnl_lock(); - err = __rtnl_af_register(ops); + list_add_tail(&ops->list, &rtnl_af_ops); rtnl_unlock(); - return err; } EXPORT_SYMBOL_GPL(rtnl_af_register); @@ -477,40 +477,100 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev) return size; } -static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) +static bool rtnl_have_link_slave_info(const struct net_device *dev) { - const struct rtnl_link_ops *ops = dev->rtnl_link_ops; - struct nlattr *linkinfo, *data; - int err = -EMSGSIZE; + struct net_device *master_dev; - linkinfo = nla_nest_start(skb, IFLA_LINKINFO); - if (linkinfo == NULL) - goto out; + master_dev = netdev_master_upper_dev_get((struct net_device *) dev); + if (master_dev && master_dev->rtnl_link_ops) + return true; + return false; +} + +static int rtnl_link_slave_info_fill(struct sk_buff *skb, + const struct net_device *dev) +{ + struct net_device *master_dev; + const struct rtnl_link_ops *ops; + struct nlattr *slave_data; + int err; + + master_dev = netdev_master_upper_dev_get((struct net_device *) dev); + if (!master_dev) + return 0; + ops = master_dev->rtnl_link_ops; + if (!ops) + return 0; + if (nla_put_string(skb, IFLA_INFO_SLAVE_KIND, ops->kind) < 0) + return -EMSGSIZE; + if (ops->fill_slave_info) { + slave_data = nla_nest_start(skb, IFLA_INFO_SLAVE_DATA); + if (!slave_data) + return -EMSGSIZE; + err = ops->fill_slave_info(skb, master_dev, dev); + if (err < 0) + goto err_cancel_slave_data; + nla_nest_end(skb, slave_data); + } + return 0; + +err_cancel_slave_data: + nla_nest_cancel(skb, slave_data); + return err; +} + +static int rtnl_link_info_fill(struct sk_buff *skb, + const struct net_device *dev) +{ + const struct rtnl_link_ops *ops = dev->rtnl_link_ops; + struct nlattr *data; + int err; + if (!ops) + return 0; if (nla_put_string(skb, IFLA_INFO_KIND, ops->kind) < 0) - goto err_cancel_link; + return -EMSGSIZE; if (ops->fill_xstats) { err = ops->fill_xstats(skb, dev); if (err < 0) - goto err_cancel_link; + return err; } if (ops->fill_info) { data = nla_nest_start(skb, IFLA_INFO_DATA); - if (data == NULL) { - err = -EMSGSIZE; - goto err_cancel_link; - } + if (data == NULL) + return -EMSGSIZE; err = ops->fill_info(skb, dev); if (err < 0) goto err_cancel_data; nla_nest_end(skb, data); } - - nla_nest_end(skb, linkinfo); return 0; err_cancel_data: nla_nest_cancel(skb, data); + return err; +} + +static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) +{ + struct nlattr *linkinfo; + int err = -EMSGSIZE; + + linkinfo = nla_nest_start(skb, IFLA_LINKINFO); + if (linkinfo == NULL) + goto out; + + err = rtnl_link_info_fill(skb, dev); + if (err < 0) + goto err_cancel_link; + + err = rtnl_link_slave_info_fill(skb, dev); + if (err < 0) + goto err_cancel_link; + + nla_nest_end(skb, linkinfo); + return 0; + err_cancel_link: nla_nest_cancel(skb, linkinfo); out: @@ -1019,7 +1079,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (rtnl_port_fill(skb, dev)) goto nla_put_failure; - if (dev->rtnl_link_ops) { + if (dev->rtnl_link_ops || rtnl_have_link_slave_info(dev)) { if (rtnl_link_fill(skb, dev) < 0) goto nla_put_failure; } @@ -1142,6 +1202,8 @@ EXPORT_SYMBOL(ifla_policy); static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_KIND] = { .type = NLA_STRING }, [IFLA_INFO_DATA] = { .type = NLA_NESTED }, + [IFLA_INFO_SLAVE_KIND] = { .type = NLA_STRING }, + [IFLA_INFO_SLAVE_DATA] = { .type = NLA_NESTED }, }; static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { @@ -1729,7 +1791,9 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; + const struct rtnl_link_ops *m_ops = NULL; struct net_device *dev; + struct net_device *master_dev = NULL; struct ifinfomsg *ifm; char kind[MODULE_NAME_LEN]; char ifname[IFNAMSIZ]; @@ -1759,6 +1823,12 @@ replay: dev = NULL; } + if (dev) { + master_dev = netdev_master_upper_dev_get(dev); + if (master_dev) + m_ops = master_dev->rtnl_link_ops; + } + err = validate_linkmsg(dev, tb); if (err < 0) return err; @@ -1780,7 +1850,10 @@ replay: } if (1) { - struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL; + struct nlattr *attr[ops ? ops->maxtype + 1 : 0]; + struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 0]; + struct nlattr **data = NULL; + struct nlattr **slave_data = NULL; struct net *dest_net; if (ops) { @@ -1799,6 +1872,24 @@ replay: } } + if (m_ops) { + if (m_ops->slave_maxtype && + linkinfo[IFLA_INFO_SLAVE_DATA]) { + err = nla_parse_nested(slave_attr, + m_ops->slave_maxtype, + linkinfo[IFLA_INFO_SLAVE_DATA], + m_ops->slave_policy); + if (err < 0) + return err; + slave_data = slave_attr; + } + if (m_ops->slave_validate) { + err = m_ops->slave_validate(tb, slave_data); + if (err < 0) + return err; + } + } + if (dev) { int modified = 0; @@ -1818,6 +1909,17 @@ replay: modified = 1; } + if (linkinfo[IFLA_INFO_SLAVE_DATA]) { + if (!m_ops || !m_ops->slave_changelink) + return -EOPNOTSUPP; + + err = m_ops->slave_changelink(master_dev, dev, + tb, slave_data); + if (err < 0) + return err; + modified = 1; + } + return do_setlink(dev, ifm, tb, ifname, modified); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0b5149c5bc4..8f519dbb358 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -65,6 +65,7 @@ #include <net/dst.h> #include <net/sock.h> #include <net/checksum.h> +#include <net/ip6_checksum.h> #include <net/xfrm.h> #include <asm/uaccess.h> @@ -682,9 +683,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->inner_network_header = old->inner_network_header; new->inner_mac_header = old->inner_mac_header; skb_dst_copy(new, old); - new->rxhash = old->rxhash; + skb_copy_hash(new, old); new->ooo_okay = old->ooo_okay; - new->l4_rxhash = old->l4_rxhash; new->no_fcs = old->no_fcs; new->encapsulation = old->encapsulation; #ifdef CONFIG_XFRM @@ -2092,6 +2092,91 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, } EXPORT_SYMBOL(skb_copy_and_csum_bits); + /** + * skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy() + * @from: source buffer + * + * Calculates the amount of linear headroom needed in the 'to' skb passed + * into skb_zerocopy(). + */ +unsigned int +skb_zerocopy_headlen(const struct sk_buff *from) +{ + unsigned int hlen = 0; + + if (!from->head_frag || + skb_headlen(from) < L1_CACHE_BYTES || + skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) + hlen = skb_headlen(from); + + if (skb_has_frag_list(from)) + hlen = from->len; + + return hlen; +} +EXPORT_SYMBOL_GPL(skb_zerocopy_headlen); + +/** + * skb_zerocopy - Zero copy skb to skb + * @to: destination buffer + * @source: source buffer + * @len: number of bytes to copy from source buffer + * @hlen: size of linear headroom in destination buffer + * + * Copies up to `len` bytes from `from` to `to` by creating references + * to the frags in the source buffer. + * + * The `hlen` as calculated by skb_zerocopy_headlen() specifies the + * headroom in the `to` buffer. + */ +void +skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) +{ + int i, j = 0; + int plen = 0; /* length of skb->head fragment */ + struct page *page; + unsigned int offset; + + BUG_ON(!from->head_frag && !hlen); + + /* dont bother with small payloads */ + if (len <= skb_tailroom(to)) { + skb_copy_bits(from, 0, skb_put(to, len), len); + return; + } + + if (hlen) { + skb_copy_bits(from, 0, skb_put(to, hlen), hlen); + len -= hlen; + } else { + plen = min_t(int, skb_headlen(from), len); + if (plen) { + page = virt_to_head_page(from->head); + offset = from->data - (unsigned char *)page_address(page); + __skb_fill_page_desc(to, 0, page, offset, plen); + get_page(page); + j = 1; + len -= plen; + } + } + + to->truesize += len + plen; + to->len += len + plen; + to->data_len += len + plen; + + for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { + if (!len) + break; + skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; + skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); + len -= skb_shinfo(to)->frags[j].size; + skb_frag_ref(to, j); + j++; + } + skb_shinfo(to)->nr_frags = j; +} +EXPORT_SYMBOL_GPL(skb_zerocopy); + void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) { __wsum csum; @@ -2952,10 +3037,7 @@ perform_csum_check: return segs; err: - while ((skb = segs)) { - segs = skb->next; - kfree_skb(skb); - } + kfree_skb_list(segs); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(skb_segment); @@ -3438,6 +3520,278 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) } EXPORT_SYMBOL_GPL(skb_partial_csum_set); +static int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len, + unsigned int max) +{ + if (skb_headlen(skb) >= len) + return 0; + + /* If we need to pullup then pullup to the max, so we + * won't need to do it again. + */ + if (max > skb->len) + max = skb->len; + + if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL) + return -ENOMEM; + + if (skb_headlen(skb) < len) + return -EPROTO; + + return 0; +} + +/* This value should be large enough to cover a tagged ethernet header plus + * maximally sized IP and TCP or UDP headers. + */ +#define MAX_IP_HDR_LEN 128 + +static int skb_checksum_setup_ip(struct sk_buff *skb, bool recalculate) +{ + unsigned int off; + bool fragment; + int err; + + fragment = false; + + err = skb_maybe_pull_tail(skb, + sizeof(struct iphdr), + MAX_IP_HDR_LEN); + if (err < 0) + goto out; + + if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF)) + fragment = true; + + off = ip_hdrlen(skb); + + err = -EPROTO; + + if (fragment) + goto out; + + switch (ip_hdr(skb)->protocol) { + case IPPROTO_TCP: + err = skb_maybe_pull_tail(skb, + off + sizeof(struct tcphdr), + MAX_IP_HDR_LEN); + if (err < 0) + goto out; + + if (!skb_partial_csum_set(skb, off, + offsetof(struct tcphdr, check))) { + err = -EPROTO; + goto out; + } + + if (recalculate) + tcp_hdr(skb)->check = + ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + skb->len - off, + IPPROTO_TCP, 0); + break; + case IPPROTO_UDP: + err = skb_maybe_pull_tail(skb, + off + sizeof(struct udphdr), + MAX_IP_HDR_LEN); + if (err < 0) + goto out; + + if (!skb_partial_csum_set(skb, off, + offsetof(struct udphdr, check))) { + err = -EPROTO; + goto out; + } + + if (recalculate) + udp_hdr(skb)->check = + ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + skb->len - off, + IPPROTO_UDP, 0); + break; + default: + goto out; + } + + err = 0; + +out: + return err; +} + +/* This value should be large enough to cover a tagged ethernet header plus + * an IPv6 header, all options, and a maximal TCP or UDP header. + */ +#define MAX_IPV6_HDR_LEN 256 + +#define OPT_HDR(type, skb, off) \ + (type *)(skb_network_header(skb) + (off)) + +static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate) +{ + int err; + u8 nexthdr; + unsigned int off; + unsigned int len; + bool fragment; + bool done; + + fragment = false; + done = false; + + off = sizeof(struct ipv6hdr); + + err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + + nexthdr = ipv6_hdr(skb)->nexthdr; + + len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); + while (off <= len && !done) { + switch (nexthdr) { + case IPPROTO_DSTOPTS: + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: { + struct ipv6_opt_hdr *hp; + + err = skb_maybe_pull_tail(skb, + off + + sizeof(struct ipv6_opt_hdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + + hp = OPT_HDR(struct ipv6_opt_hdr, skb, off); + nexthdr = hp->nexthdr; + off += ipv6_optlen(hp); + break; + } + case IPPROTO_AH: { + struct ip_auth_hdr *hp; + + err = skb_maybe_pull_tail(skb, + off + + sizeof(struct ip_auth_hdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + + hp = OPT_HDR(struct ip_auth_hdr, skb, off); + nexthdr = hp->nexthdr; + off += ipv6_authlen(hp); + break; + } + case IPPROTO_FRAGMENT: { + struct frag_hdr *hp; + + err = skb_maybe_pull_tail(skb, + off + + sizeof(struct frag_hdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + + hp = OPT_HDR(struct frag_hdr, skb, off); + + if (hp->frag_off & htons(IP6_OFFSET | IP6_MF)) + fragment = true; + + nexthdr = hp->nexthdr; + off += sizeof(struct frag_hdr); + break; + } + default: + done = true; + break; + } + } + + err = -EPROTO; + + if (!done || fragment) + goto out; + + switch (nexthdr) { + case IPPROTO_TCP: + err = skb_maybe_pull_tail(skb, + off + sizeof(struct tcphdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + + if (!skb_partial_csum_set(skb, off, + offsetof(struct tcphdr, check))) { + err = -EPROTO; + goto out; + } + + if (recalculate) + tcp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len - off, + IPPROTO_TCP, 0); + break; + case IPPROTO_UDP: + err = skb_maybe_pull_tail(skb, + off + sizeof(struct udphdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + + if (!skb_partial_csum_set(skb, off, + offsetof(struct udphdr, check))) { + err = -EPROTO; + goto out; + } + + if (recalculate) + udp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len - off, + IPPROTO_UDP, 0); + break; + default: + goto out; + } + + err = 0; + +out: + return err; +} + +/** + * skb_checksum_setup - set up partial checksum offset + * @skb: the skb to set up + * @recalculate: if true the pseudo-header checksum will be recalculated + */ +int skb_checksum_setup(struct sk_buff *skb, bool recalculate) +{ + int err; + + switch (skb->protocol) { + case htons(ETH_P_IP): + err = skb_checksum_setup_ip(skb, recalculate); + break; + + case htons(ETH_P_IPV6): + err = skb_checksum_setup_ipv6(skb, recalculate); + break; + + default: + err = -EPROTO; + break; + } + + return err; +} +EXPORT_SYMBOL(skb_checksum_setup); + void __skb_warn_lro_forwarding(const struct sk_buff *skb) { net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n", diff --git a/net/core/sock.c b/net/core/sock.c index 5393b4b719d..0c127dcdf6a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -925,8 +925,8 @@ set_rcvbuf: EXPORT_SYMBOL(sock_setsockopt); -void cred_to_ucred(struct pid *pid, const struct cred *cred, - struct ucred *ucred) +static void cred_to_ucred(struct pid *pid, const struct cred *cred, + struct ucred *ucred) { ucred->pid = pid_vnr(pid); ucred->uid = ucred->gid = -1; @@ -937,7 +937,6 @@ void cred_to_ucred(struct pid *pid, const struct cred *cred, ucred->gid = from_kgid_munged(current_ns, cred->egid); } } -EXPORT_SYMBOL_GPL(cred_to_ucred); int sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) @@ -1168,6 +1167,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sock_flag(sk, SOCK_FILTER_LOCKED); break; + case SO_BPF_EXTENSIONS: + v.val = bpf_tell_extensions(); + break; + case SO_SELECT_ERR_QUEUE: v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); break; @@ -1308,19 +1311,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) module_put(owner); } -#if IS_ENABLED(CONFIG_NET_CLS_CGROUP) -void sock_update_classid(struct sock *sk) -{ - u32 classid; - - classid = task_cls_classid(current); - if (classid != sk->sk_classid) - sk->sk_classid = classid; -} -EXPORT_SYMBOL(sock_update_classid); -#endif - -#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) +#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) void sock_update_netprioidx(struct sock *sk) { if (in_interrupt()) @@ -1666,22 +1657,6 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, EXPORT_SYMBOL(sock_wmalloc); /* - * Allocate a skb from the socket's receive buffer. - */ -struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, - gfp_t priority) -{ - if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { - struct sk_buff *skb = alloc_skb(size, priority); - if (skb) { - skb_set_owner_r(skb, sk); - return skb; - } - } - return NULL; -} - -/* * Allocate a memory block from the socket's option memory buffer. */ void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) @@ -1865,9 +1840,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio) put_page(pfrag->page); } - /* We restrict high order allocations to users that can afford to wait */ - order = (prio & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0; - + order = SKB_FRAG_PAGE_ORDER; do { gfp_t gfp = prio; diff --git a/net/core/stream.c b/net/core/stream.c index 512f0a24269..301c05f2606 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -122,7 +122,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) DEFINE_WAIT(wait); if (sk_stream_memory_free(sk)) - current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2; + current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2; while (1) { set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index cca44419090..cf9cd13509a 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -122,7 +122,8 @@ static int flow_limit_cpu_sysctl(struct ctl_table *table, int write, synchronize_rcu(); kfree(cur); } else if (!cur && cpumask_test_cpu(i, mask)) { - cur = kzalloc(len, GFP_KERNEL); + cur = kzalloc_node(len, GFP_KERNEL, + cpu_to_node(i)); if (!cur) { /* not unwinding previous changes */ ret = -ENOMEM; diff --git a/net/dcb/dcbevent.c b/net/dcb/dcbevent.c index 4f72fc40bf0..a520d8004d8 100644 --- a/net/dcb/dcbevent.c +++ b/net/dcb/dcbevent.c @@ -11,8 +11,7 @@ * more details. * * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. + * this program; if not, see <http://www.gnu.org/licenses/>. * * Author: John Fastabend <john.r.fastabend@intel.com> */ diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 40d5829ed36..55364440267 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -11,8 +11,7 @@ * more details. * * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. + * this program; if not, see <http://www.gnu.org/licenses/>. * * Author: Lucy Liu <lucy.liu@intel.com> */ @@ -1689,21 +1688,17 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh) if (!tb[DCB_ATTR_IFNAME]) return -EINVAL; - netdev = dev_get_by_name(net, nla_data(tb[DCB_ATTR_IFNAME])); + netdev = __dev_get_by_name(net, nla_data(tb[DCB_ATTR_IFNAME])); if (!netdev) return -ENODEV; - if (!netdev->dcbnl_ops) { - ret = -EOPNOTSUPP; - goto out; - } + if (!netdev->dcbnl_ops) + return -EOPNOTSUPP; reply_skb = dcbnl_newmsg(fn->type, dcb->cmd, portid, nlh->nlmsg_seq, nlh->nlmsg_flags, &reply_nlh); - if (!reply_skb) { - ret = -ENOBUFS; - goto out; - } + if (!reply_skb) + return -ENOBUFS; ret = fn->cb(netdev, nlh, nlh->nlmsg_seq, tb, reply_skb); if (ret < 0) { @@ -1715,7 +1710,6 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh) ret = rtnl_unicast(reply_skb, net, portid); out: - dev_put(netdev); return ret; } diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c index 62b5828acde..c073b81a1f3 100644 --- a/net/dccp/ccids/lib/tfrc.c +++ b/net/dccp/ccids/lib/tfrc.c @@ -8,7 +8,7 @@ #include "tfrc.h" #ifdef CONFIG_IP_DCCP_TFRC_DEBUG -bool tfrc_debug; +static bool tfrc_debug; module_param(tfrc_debug, bool, 0644); MODULE_PARM_DESC(tfrc_debug, "Enable TFRC debug messages"); #endif diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index 40ee7d62b65..a3d8f7c76ae 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -21,7 +21,6 @@ #include "packet_history.h" #ifdef CONFIG_IP_DCCP_TFRC_DEBUG -extern bool tfrc_debug; #define tfrc_pr_debug(format, a...) DCCP_PR_DEBUG(tfrc_debug, format, ##a) #else #define tfrc_pr_debug(format, a...) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 30948784dd5..c67816647cc 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -479,7 +479,6 @@ void dccp_feat_list_purge(struct list_head *fn_list); int dccp_insert_options(struct sock *sk, struct sk_buff *skb); int dccp_insert_options_rsk(struct dccp_request_sock *, struct sk_buff *); -int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed); u32 dccp_timestamp(void); void dccp_timestamping_init(void); int dccp_insert_option(struct sk_buff *skb, unsigned char option, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index d9f65fc66db..22b5d818b20 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -75,7 +75,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_DCCP, - orig_sport, orig_dport, sk, true); + orig_sport, orig_dport, sk); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -989,6 +989,7 @@ static const struct net_protocol dccp_v4_protocol = { .err_handler = dccp_v4_err, .no_policy = 1, .netns_ok = 1, + .icmp_strict_tag_validation = 1, }; static const struct proto_ops inet_dccp_ops = { diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 2b90a786e47..4db3c2a1679 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -141,6 +141,9 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst = NULL; + if (!ip6_sk_accept_pmtu(sk)) + goto out; + if (sock_owned_by_user(sk)) goto out; if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED)) @@ -237,7 +240,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) final_p = fl6_update_dst(&fl6, np->opt, &final); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; @@ -301,7 +304,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6)); /* sk = NULL, but it is safe for now. RST socket required. */ - dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false); + dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL); if (!IS_ERR(dst)) { skb_dst_set(skb, dst); ip6_xmit(ctl_sk, skb, &fl6, NULL, 0); @@ -512,7 +515,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, fl6.fl6_sport = htons(ireq->ir_num); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) goto out; } @@ -931,7 +934,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, final_p = fl6_update_dst(&fl6, np->opt, &final); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto failure; diff --git a/net/dccp/options.c b/net/dccp/options.c index a58e0b63405..9bce31886bd 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -343,38 +343,6 @@ static inline int dccp_elapsed_time_len(const u32 elapsed_time) return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4; } -/* FIXME: This function is currently not used anywhere */ -int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time) -{ - const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); - const int len = 2 + elapsed_time_len; - unsigned char *to; - - if (elapsed_time_len == 0) - return 0; - - if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) - return -1; - - DCCP_SKB_CB(skb)->dccpd_opt_len += len; - - to = skb_push(skb, len); - *to++ = DCCPO_ELAPSED_TIME; - *to++ = len; - - if (elapsed_time_len == 2) { - const __be16 var16 = htons((u16)elapsed_time); - memcpy(to, &var16, 2); - } else { - const __be32 var32 = htonl(elapsed_time); - memcpy(to, &var32, 4); - } - - return 0; -} - -EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); - static int dccp_insert_option_timestamp(struct sk_buff *skb) { __be32 now = htonl(dccp_timestamp()); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index dd4d506ef92..2954dcbca83 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1808,6 +1808,7 @@ out: rv = (flags & MSG_PEEK) ? -sk->sk_err : sock_error(sk); if ((rv >= 0) && msg->msg_name) { + __sockaddr_check_size(sizeof(struct sockaddr_dn)); memcpy(msg->msg_name, &scp->peer, sizeof(struct sockaddr_dn)); msg->msg_namelen = sizeof(struct sockaddr_dn); } @@ -1914,7 +1915,7 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, int err = 0; size_t sent = 0; int addr_len = msg->msg_namelen; - struct sockaddr_dn *addr = (struct sockaddr_dn *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_dn *, addr, msg->msg_name); struct sk_buff *skb = NULL; struct dn_skb_cb *cb; size_t len; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index dd0dfb25f4b..a603823a3e2 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -561,6 +561,7 @@ static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = { [IFA_LOCAL] = { .type = NLA_U16 }, [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, + [IFA_FLAGS] = { .type = NLA_U32 }, }; static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -648,7 +649,8 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) ifa->ifa_local = nla_get_le16(tb[IFA_LOCAL]); ifa->ifa_address = nla_get_le16(tb[IFA_ADDRESS]); - ifa->ifa_flags = ifm->ifa_flags; + ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : + ifm->ifa_flags; ifa->ifa_scope = ifm->ifa_scope; ifa->ifa_dev = dn_db; @@ -669,7 +671,8 @@ static inline size_t dn_ifaddr_nlmsg_size(void) return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ + nla_total_size(2) /* IFA_ADDRESS */ - + nla_total_size(2); /* IFA_LOCAL */ + + nla_total_size(2) /* IFA_LOCAL */ + + nla_total_size(4); /* IFA_FLAGS */ } static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, @@ -677,6 +680,7 @@ static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; + u32 ifa_flags = ifa->ifa_flags | IFA_F_PERMANENT; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); if (nlh == NULL) @@ -685,7 +689,7 @@ static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, ifm = nlmsg_data(nlh); ifm->ifa_family = AF_DECnet; ifm->ifa_prefixlen = 16; - ifm->ifa_flags = ifa->ifa_flags | IFA_F_PERMANENT; + ifm->ifa_flags = ifa_flags; ifm->ifa_scope = ifa->ifa_scope; ifm->ifa_index = ifa->ifa_dev->dev->ifindex; @@ -694,7 +698,8 @@ static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, (ifa->ifa_local && nla_put_le16(skb, IFA_LOCAL, ifa->ifa_local)) || (ifa->ifa_label[0] && - nla_put_string(skb, IFA_LABEL, ifa->ifa_label))) + nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || + nla_put_u32(skb, IFA_FLAGS, ifa_flags)) goto nla_put_failure; return nlmsg_end(skb, nlh); diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index f8637f93d31..c8121ceddb9 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -102,19 +102,21 @@ struct neigh_table dn_neigh_table = { .id = "dn_neigh_cache", .parms ={ .tbl = &dn_neigh_table, - .base_reachable_time = 30 * HZ, - .retrans_time = 1 * HZ, - .gc_staletime = 60 * HZ, - .reachable_time = 30 * HZ, - .delay_probe_time = 5 * HZ, - .queue_len_bytes = 64*1024, - .ucast_probes = 0, - .app_probes = 0, - .mcast_probes = 0, - .anycast_delay = 0, - .proxy_delay = 0, - .proxy_qlen = 0, - .locktime = 1 * HZ, + .reachable_time = 30 * HZ, + .data = { + [NEIGH_VAR_MCAST_PROBES] = 0, + [NEIGH_VAR_UCAST_PROBES] = 0, + [NEIGH_VAR_APP_PROBES] = 0, + [NEIGH_VAR_RETRANS_TIME] = 1 * HZ, + [NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ, + [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ, + [NEIGH_VAR_GC_STALETIME] = 60 * HZ, + [NEIGH_VAR_QUEUE_LEN_BYTES] = 64*1024, + [NEIGH_VAR_PROXY_QLEN] = 0, + [NEIGH_VAR_ANYCAST_DELAY] = 0, + [NEIGH_VAR_PROXY_DELAY] = 0, + [NEIGH_VAR_LOCKTIME] = 1 * HZ, + }, }, .gc_interval = 30 * HZ, .gc_thresh1 = 128, diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index fe32388ea24..ce0cbbfe0f4 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1288,8 +1288,6 @@ int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *fl, stru err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD); if (err == 0 && fl->flowidn_proto) { - if (!(flags & MSG_DONTWAIT)) - fl->flowidn_flags |= FLOWI_FLAG_CAN_SLEEP; *pprt = xfrm_lookup(&init_net, *pprt, flowidn_to_flowi(fl), sk, 0); if (IS_ERR(*pprt)) { @@ -1668,12 +1666,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) if (fld.flowidn_iif) { struct net_device *dev; - if ((dev = dev_get_by_index(&init_net, fld.flowidn_iif)) == NULL) { - kfree_skb(skb); - return -ENODEV; - } - if (!dev->dn_ptr) { - dev_put(dev); + dev = __dev_get_by_index(&init_net, fld.flowidn_iif); + if (!dev || !dev->dn_ptr) { kfree_skb(skb); return -ENODEV; } @@ -1695,8 +1689,6 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0); } - if (skb->dev) - dev_put(skb->dev); skb->dev = NULL; if (err) goto out_free; diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index f347a2ca7d7..bf858433904 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -19,8 +19,7 @@ * the GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/module.h> #include <linux/moduleparam.h> diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index c32be292c7e..e7b6d53eef8 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -32,8 +32,7 @@ * the GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/module.h> diff --git a/net/dns_resolver/internal.h b/net/dns_resolver/internal.h index 17c7886b5b3..7af1ed39c00 100644 --- a/net/dns_resolver/internal.h +++ b/net/dns_resolver/internal.h @@ -15,8 +15,7 @@ * the GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/compiler.h> diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 29d684ebca6..02c0e1716f6 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -156,7 +156,7 @@ static int dsa_slave_set_mac_address(struct net_device *dev, void *a) dev_uc_del(master, dev->dev_addr); out: - memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); + ether_addr_copy(dev->dev_addr, addr->sa_data); return 0; } diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 8f032bae60a..5dc638cad2e 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -156,7 +156,9 @@ EXPORT_SYMBOL(eth_rebuild_header); */ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) { - struct ethhdr *eth; + unsigned short _service_access_point; + const unsigned short *sap; + const struct ethhdr *eth; skb->dev = dev; skb_reset_mac_header(skb); @@ -194,7 +196,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This * won't work for fault tolerant netware but does for the rest. */ - if (unlikely(skb->len >= 2 && *(unsigned short *)(skb->data) == 0xFFFF)) + sap = skb_header_pointer(skb, 0, sizeof(*sap), &_service_access_point); + if (sap && *sap == 0xFFFF) return htons(ETH_P_802_3); /* diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 4bdab152187..327060c6c87 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -127,11 +127,6 @@ int hsr_create_self_node(struct list_head *self_node_db, return 0; } -static void node_entry_reclaim(struct rcu_head *rh) -{ - kfree(container_of(rh, struct node_entry, rcu_head)); -} - /* Add/merge node to the database of nodes. 'skb' must contain an HSR * supervision frame. @@ -175,7 +170,7 @@ struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv, if (node && !ether_addr_equal(node->MacAddressA, hsr_sp->MacAddressA)) { /* Node has changed its AddrA, frame was received from SlaveB */ list_del_rcu(&node->mac_list); - call_rcu(&node->rcu_head, node_entry_reclaim); + kfree_rcu(node, rcu_head); node = NULL; } @@ -183,7 +178,7 @@ struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv, !ether_addr_equal(node->MacAddressB, hsr_ethsup->ethhdr.h_source)) { /* Cables have been swapped */ list_del_rcu(&node->mac_list); - call_rcu(&node->rcu_head, node_entry_reclaim); + kfree_rcu(node, rcu_head); node = NULL; } @@ -192,7 +187,7 @@ struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv, !ether_addr_equal(node->MacAddressA, hsr_ethsup->ethhdr.h_source)) { /* Cables have been swapped */ list_del_rcu(&node->mac_list); - call_rcu(&node->rcu_head, node_entry_reclaim); + kfree_rcu(node, rcu_head); node = NULL; } @@ -417,7 +412,7 @@ void hsr_prune_nodes(struct hsr_priv *hsr_priv) hsr_nl_nodedown(hsr_priv, node->MacAddressA); list_del_rcu(&node->mac_list); /* Note that we need to free this entry later: */ - call_rcu(&node->rcu_head, node_entry_reclaim); + kfree_rcu(node, rcu_head); } } rcu_read_unlock(); diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index a2d2456a557..48b25c0af4d 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -62,9 +62,6 @@ #include "6lowpan.h" -/* TTL uncompression values */ -static const u8 lowpan_ttl_values[] = {0, 1, 64, 255}; - static LIST_HEAD(lowpan_devices); /* private device info */ @@ -104,378 +101,14 @@ static inline void lowpan_address_flip(u8 *src, u8 *dest) (dest)[IEEE802154_ADDR_LEN - i - 1] = (src)[i]; } -/* list of all 6lowpan devices, uses for package delivering */ -/* print data in line */ -static inline void lowpan_raw_dump_inline(const char *caller, char *msg, - unsigned char *buf, int len) -{ -#ifdef DEBUG - if (msg) - pr_debug("(%s) %s: ", caller, msg); - print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, - 16, 1, buf, len, false); -#endif /* DEBUG */ -} - -/* - * print data in a table format: - * - * addr: xx xx xx xx xx xx - * addr: xx xx xx xx xx xx - * ... - */ -static inline void lowpan_raw_dump_table(const char *caller, char *msg, - unsigned char *buf, int len) -{ -#ifdef DEBUG - if (msg) - pr_debug("(%s) %s:\n", caller, msg); - print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, - 16, 1, buf, len, false); -#endif /* DEBUG */ -} - -static u8 -lowpan_compress_addr_64(u8 **hc06_ptr, u8 shift, const struct in6_addr *ipaddr, - const unsigned char *lladdr) -{ - u8 val = 0; - - if (is_addr_mac_addr_based(ipaddr, lladdr)) - val = 3; /* 0-bits */ - else if (lowpan_is_iid_16_bit_compressable(ipaddr)) { - /* compress IID to 16 bits xxxx::XXXX */ - memcpy(*hc06_ptr, &ipaddr->s6_addr16[7], 2); - *hc06_ptr += 2; - val = 2; /* 16-bits */ - } else { - /* do not compress IID => xxxx::IID */ - memcpy(*hc06_ptr, &ipaddr->s6_addr16[4], 8); - *hc06_ptr += 8; - val = 1; /* 64-bits */ - } - - return rol8(val, shift); -} - -/* - * Uncompress address function for source and - * destination address(non-multicast). - * - * address_mode is sam value or dam value. - */ -static int -lowpan_uncompress_addr(struct sk_buff *skb, - struct in6_addr *ipaddr, - const u8 address_mode, - const struct ieee802154_addr *lladdr) -{ - bool fail; - - switch (address_mode) { - case LOWPAN_IPHC_ADDR_00: - /* for global link addresses */ - fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16); - break; - case LOWPAN_IPHC_ADDR_01: - /* fe:80::XXXX:XXXX:XXXX:XXXX */ - ipaddr->s6_addr[0] = 0xFE; - ipaddr->s6_addr[1] = 0x80; - fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[8], 8); - break; - case LOWPAN_IPHC_ADDR_02: - /* fe:80::ff:fe00:XXXX */ - ipaddr->s6_addr[0] = 0xFE; - ipaddr->s6_addr[1] = 0x80; - ipaddr->s6_addr[11] = 0xFF; - ipaddr->s6_addr[12] = 0xFE; - fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[14], 2); - break; - case LOWPAN_IPHC_ADDR_03: - fail = false; - switch (lladdr->addr_type) { - case IEEE802154_ADDR_LONG: - /* fe:80::XXXX:XXXX:XXXX:XXXX - * \_________________/ - * hwaddr - */ - ipaddr->s6_addr[0] = 0xFE; - ipaddr->s6_addr[1] = 0x80; - memcpy(&ipaddr->s6_addr[8], lladdr->hwaddr, - IEEE802154_ADDR_LEN); - /* second bit-flip (Universe/Local) - * is done according RFC2464 - */ - ipaddr->s6_addr[8] ^= 0x02; - break; - case IEEE802154_ADDR_SHORT: - /* fe:80::ff:fe00:XXXX - * \__/ - * short_addr - * - * Universe/Local bit is zero. - */ - ipaddr->s6_addr[0] = 0xFE; - ipaddr->s6_addr[1] = 0x80; - ipaddr->s6_addr[11] = 0xFF; - ipaddr->s6_addr[12] = 0xFE; - ipaddr->s6_addr16[7] = htons(lladdr->short_addr); - break; - default: - pr_debug("Invalid addr_type set\n"); - return -EINVAL; - } - break; - default: - pr_debug("Invalid address mode value: 0x%x\n", address_mode); - return -EINVAL; - } - - if (fail) { - pr_debug("Failed to fetch skb data\n"); - return -EIO; - } - - lowpan_raw_dump_inline(NULL, "Reconstructed ipv6 addr is:\n", - ipaddr->s6_addr, 16); - - return 0; -} - -/* Uncompress address function for source context - * based address(non-multicast). - */ -static int -lowpan_uncompress_context_based_src_addr(struct sk_buff *skb, - struct in6_addr *ipaddr, - const u8 sam) -{ - switch (sam) { - case LOWPAN_IPHC_ADDR_00: - /* unspec address :: - * Do nothing, address is already :: - */ - break; - case LOWPAN_IPHC_ADDR_01: - /* TODO */ - case LOWPAN_IPHC_ADDR_02: - /* TODO */ - case LOWPAN_IPHC_ADDR_03: - /* TODO */ - netdev_warn(skb->dev, "SAM value 0x%x not supported\n", sam); - return -EINVAL; - default: - pr_debug("Invalid sam value: 0x%x\n", sam); - return -EINVAL; - } - - lowpan_raw_dump_inline(NULL, - "Reconstructed context based ipv6 src addr is:\n", - ipaddr->s6_addr, 16); - - return 0; -} - -/* Uncompress function for multicast destination address, - * when M bit is set. - */ -static int -lowpan_uncompress_multicast_daddr(struct sk_buff *skb, - struct in6_addr *ipaddr, - const u8 dam) -{ - bool fail; - - switch (dam) { - case LOWPAN_IPHC_DAM_00: - /* 00: 128 bits. The full address - * is carried in-line. - */ - fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16); - break; - case LOWPAN_IPHC_DAM_01: - /* 01: 48 bits. The address takes - * the form ffXX::00XX:XXXX:XXXX. - */ - ipaddr->s6_addr[0] = 0xFF; - fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 1); - fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[11], 5); - break; - case LOWPAN_IPHC_DAM_10: - /* 10: 32 bits. The address takes - * the form ffXX::00XX:XXXX. - */ - ipaddr->s6_addr[0] = 0xFF; - fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 1); - fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[13], 3); - break; - case LOWPAN_IPHC_DAM_11: - /* 11: 8 bits. The address takes - * the form ff02::00XX. - */ - ipaddr->s6_addr[0] = 0xFF; - ipaddr->s6_addr[1] = 0x02; - fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[15], 1); - break; - default: - pr_debug("DAM value has a wrong value: 0x%x\n", dam); - return -EINVAL; - } - - if (fail) { - pr_debug("Failed to fetch skb data\n"); - return -EIO; - } - - lowpan_raw_dump_inline(NULL, "Reconstructed ipv6 multicast addr is:\n", - ipaddr->s6_addr, 16); - - return 0; -} - -static void -lowpan_compress_udp_header(u8 **hc06_ptr, struct sk_buff *skb) -{ - struct udphdr *uh = udp_hdr(skb); - - if (((uh->source & LOWPAN_NHC_UDP_4BIT_MASK) == - LOWPAN_NHC_UDP_4BIT_PORT) && - ((uh->dest & LOWPAN_NHC_UDP_4BIT_MASK) == - LOWPAN_NHC_UDP_4BIT_PORT)) { - pr_debug("UDP header: both ports compression to 4 bits\n"); - **hc06_ptr = LOWPAN_NHC_UDP_CS_P_11; - **(hc06_ptr + 1) = /* subtraction is faster */ - (u8)((uh->dest - LOWPAN_NHC_UDP_4BIT_PORT) + - ((uh->source & LOWPAN_NHC_UDP_4BIT_PORT) << 4)); - *hc06_ptr += 2; - } else if ((uh->dest & LOWPAN_NHC_UDP_8BIT_MASK) == - LOWPAN_NHC_UDP_8BIT_PORT) { - pr_debug("UDP header: remove 8 bits of dest\n"); - **hc06_ptr = LOWPAN_NHC_UDP_CS_P_01; - memcpy(*hc06_ptr + 1, &uh->source, 2); - **(hc06_ptr + 3) = (u8)(uh->dest - LOWPAN_NHC_UDP_8BIT_PORT); - *hc06_ptr += 4; - } else if ((uh->source & LOWPAN_NHC_UDP_8BIT_MASK) == - LOWPAN_NHC_UDP_8BIT_PORT) { - pr_debug("UDP header: remove 8 bits of source\n"); - **hc06_ptr = LOWPAN_NHC_UDP_CS_P_10; - memcpy(*hc06_ptr + 1, &uh->dest, 2); - **(hc06_ptr + 3) = (u8)(uh->source - LOWPAN_NHC_UDP_8BIT_PORT); - *hc06_ptr += 4; - } else { - pr_debug("UDP header: can't compress\n"); - **hc06_ptr = LOWPAN_NHC_UDP_CS_P_00; - memcpy(*hc06_ptr + 1, &uh->source, 2); - memcpy(*hc06_ptr + 3, &uh->dest, 2); - *hc06_ptr += 5; - } - - /* checksum is always inline */ - memcpy(*hc06_ptr, &uh->check, 2); - *hc06_ptr += 2; - - /* skip the UDP header */ - skb_pull(skb, sizeof(struct udphdr)); -} - -static inline int lowpan_fetch_skb_u8(struct sk_buff *skb, u8 *val) -{ - if (unlikely(!pskb_may_pull(skb, 1))) - return -EINVAL; - - *val = skb->data[0]; - skb_pull(skb, 1); - - return 0; -} - -static inline int lowpan_fetch_skb_u16(struct sk_buff *skb, u16 *val) -{ - if (unlikely(!pskb_may_pull(skb, 2))) - return -EINVAL; - - *val = (skb->data[0] << 8) | skb->data[1]; - skb_pull(skb, 2); - - return 0; -} - -static int -lowpan_uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh) -{ - u8 tmp; - - if (!uh) - goto err; - - if (lowpan_fetch_skb_u8(skb, &tmp)) - goto err; - - if ((tmp & LOWPAN_NHC_UDP_MASK) == LOWPAN_NHC_UDP_ID) { - pr_debug("UDP header uncompression\n"); - switch (tmp & LOWPAN_NHC_UDP_CS_P_11) { - case LOWPAN_NHC_UDP_CS_P_00: - memcpy(&uh->source, &skb->data[0], 2); - memcpy(&uh->dest, &skb->data[2], 2); - skb_pull(skb, 4); - break; - case LOWPAN_NHC_UDP_CS_P_01: - memcpy(&uh->source, &skb->data[0], 2); - uh->dest = - skb->data[2] + LOWPAN_NHC_UDP_8BIT_PORT; - skb_pull(skb, 3); - break; - case LOWPAN_NHC_UDP_CS_P_10: - uh->source = skb->data[0] + LOWPAN_NHC_UDP_8BIT_PORT; - memcpy(&uh->dest, &skb->data[1], 2); - skb_pull(skb, 3); - break; - case LOWPAN_NHC_UDP_CS_P_11: - uh->source = - LOWPAN_NHC_UDP_4BIT_PORT + (skb->data[0] >> 4); - uh->dest = - LOWPAN_NHC_UDP_4BIT_PORT + (skb->data[0] & 0x0f); - skb_pull(skb, 1); - break; - default: - pr_debug("ERROR: unknown UDP format\n"); - goto err; - } - - pr_debug("uncompressed UDP ports: src = %d, dst = %d\n", - uh->source, uh->dest); - - /* copy checksum */ - memcpy(&uh->check, &skb->data[0], 2); - skb_pull(skb, 2); - - /* - * UDP lenght needs to be infered from the lower layers - * here, we obtain the hint from the remaining size of the - * frame - */ - uh->len = htons(skb->len + sizeof(struct udphdr)); - pr_debug("uncompressed UDP length: src = %d", uh->len); - } else { - pr_debug("ERROR: unsupported NH format\n"); - goto err; - } - - return 0; -err: - return -EINVAL; -} - static int lowpan_header_create(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *_daddr, const void *_saddr, unsigned int len) { - u8 tmp, iphc0, iphc1, *hc06_ptr; struct ipv6hdr *hdr; const u8 *saddr = _saddr; const u8 *daddr = _daddr; - u8 head[100]; struct ieee802154_addr sa, da; /* TODO: @@ -485,181 +118,14 @@ static int lowpan_header_create(struct sk_buff *skb, return 0; hdr = ipv6_hdr(skb); - hc06_ptr = head + 2; - - pr_debug("IPv6 header dump:\n\tversion = %d\n\tlength = %d\n" - "\tnexthdr = 0x%02x\n\thop_lim = %d\n", hdr->version, - ntohs(hdr->payload_len), hdr->nexthdr, hdr->hop_limit); - - lowpan_raw_dump_table(__func__, "raw skb network header dump", - skb_network_header(skb), sizeof(struct ipv6hdr)); if (!saddr) saddr = dev->dev_addr; - lowpan_raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8); - - /* - * As we copy some bit-length fields, in the IPHC encoding bytes, - * we sometimes use |= - * If the field is 0, and the current bit value in memory is 1, - * this does not work. We therefore reset the IPHC encoding here - */ - iphc0 = LOWPAN_DISPATCH_IPHC; - iphc1 = 0; - - /* TODO: context lookup */ + raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8); + raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8); - lowpan_raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8); - - /* - * Traffic class, flow label - * If flow label is 0, compress it. If traffic class is 0, compress it - * We have to process both in the same time as the offset of traffic - * class depends on the presence of version and flow label - */ - - /* hc06 format of TC is ECN | DSCP , original one is DSCP | ECN */ - tmp = (hdr->priority << 4) | (hdr->flow_lbl[0] >> 4); - tmp = ((tmp & 0x03) << 6) | (tmp >> 2); - - if (((hdr->flow_lbl[0] & 0x0F) == 0) && - (hdr->flow_lbl[1] == 0) && (hdr->flow_lbl[2] == 0)) { - /* flow label can be compressed */ - iphc0 |= LOWPAN_IPHC_FL_C; - if ((hdr->priority == 0) && - ((hdr->flow_lbl[0] & 0xF0) == 0)) { - /* compress (elide) all */ - iphc0 |= LOWPAN_IPHC_TC_C; - } else { - /* compress only the flow label */ - *hc06_ptr = tmp; - hc06_ptr += 1; - } - } else { - /* Flow label cannot be compressed */ - if ((hdr->priority == 0) && - ((hdr->flow_lbl[0] & 0xF0) == 0)) { - /* compress only traffic class */ - iphc0 |= LOWPAN_IPHC_TC_C; - *hc06_ptr = (tmp & 0xc0) | (hdr->flow_lbl[0] & 0x0F); - memcpy(hc06_ptr + 1, &hdr->flow_lbl[1], 2); - hc06_ptr += 3; - } else { - /* compress nothing */ - memcpy(hc06_ptr, hdr, 4); - /* replace the top byte with new ECN | DSCP format */ - *hc06_ptr = tmp; - hc06_ptr += 4; - } - } - - /* NOTE: payload length is always compressed */ - - /* Next Header is compress if UDP */ - if (hdr->nexthdr == UIP_PROTO_UDP) - iphc0 |= LOWPAN_IPHC_NH_C; - - if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) { - *hc06_ptr = hdr->nexthdr; - hc06_ptr += 1; - } - - /* - * Hop limit - * if 1: compress, encoding is 01 - * if 64: compress, encoding is 10 - * if 255: compress, encoding is 11 - * else do not compress - */ - switch (hdr->hop_limit) { - case 1: - iphc0 |= LOWPAN_IPHC_TTL_1; - break; - case 64: - iphc0 |= LOWPAN_IPHC_TTL_64; - break; - case 255: - iphc0 |= LOWPAN_IPHC_TTL_255; - break; - default: - *hc06_ptr = hdr->hop_limit; - hc06_ptr += 1; - break; - } - - /* source address compression */ - if (is_addr_unspecified(&hdr->saddr)) { - pr_debug("source address is unspecified, setting SAC\n"); - iphc1 |= LOWPAN_IPHC_SAC; - /* TODO: context lookup */ - } else if (is_addr_link_local(&hdr->saddr)) { - pr_debug("source address is link-local\n"); - iphc1 |= lowpan_compress_addr_64(&hc06_ptr, - LOWPAN_IPHC_SAM_BIT, &hdr->saddr, saddr); - } else { - pr_debug("send the full source address\n"); - memcpy(hc06_ptr, &hdr->saddr.s6_addr16[0], 16); - hc06_ptr += 16; - } - - /* destination address compression */ - if (is_addr_mcast(&hdr->daddr)) { - pr_debug("destination address is multicast: "); - iphc1 |= LOWPAN_IPHC_M; - if (lowpan_is_mcast_addr_compressable8(&hdr->daddr)) { - pr_debug("compressed to 1 octet\n"); - iphc1 |= LOWPAN_IPHC_DAM_11; - /* use last byte */ - *hc06_ptr = hdr->daddr.s6_addr[15]; - hc06_ptr += 1; - } else if (lowpan_is_mcast_addr_compressable32(&hdr->daddr)) { - pr_debug("compressed to 4 octets\n"); - iphc1 |= LOWPAN_IPHC_DAM_10; - /* second byte + the last three */ - *hc06_ptr = hdr->daddr.s6_addr[1]; - memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[13], 3); - hc06_ptr += 4; - } else if (lowpan_is_mcast_addr_compressable48(&hdr->daddr)) { - pr_debug("compressed to 6 octets\n"); - iphc1 |= LOWPAN_IPHC_DAM_01; - /* second byte + the last five */ - *hc06_ptr = hdr->daddr.s6_addr[1]; - memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[11], 5); - hc06_ptr += 6; - } else { - pr_debug("using full address\n"); - iphc1 |= LOWPAN_IPHC_DAM_00; - memcpy(hc06_ptr, &hdr->daddr.s6_addr[0], 16); - hc06_ptr += 16; - } - } else { - /* TODO: context lookup */ - if (is_addr_link_local(&hdr->daddr)) { - pr_debug("dest address is unicast and link-local\n"); - iphc1 |= lowpan_compress_addr_64(&hc06_ptr, - LOWPAN_IPHC_DAM_BIT, &hdr->daddr, daddr); - } else { - pr_debug("dest address is unicast: using full one\n"); - memcpy(hc06_ptr, &hdr->daddr.s6_addr16[0], 16); - hc06_ptr += 16; - } - } - - /* UDP header compression */ - if (hdr->nexthdr == UIP_PROTO_UDP) - lowpan_compress_udp_header(&hc06_ptr, skb); - - head[0] = iphc0; - head[1] = iphc1; - - skb_pull(skb, sizeof(struct ipv6hdr)); - skb_reset_transport_header(skb); - memcpy(skb_push(skb, hc06_ptr - head), head, hc06_ptr - head); - skb_reset_network_header(skb); - - lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data, - skb->len); + lowpan_header_compress(skb, dev, type, daddr, saddr, len); /* * NOTE1: I'm still unsure about the fact that compression and WPAN @@ -671,39 +137,38 @@ static int lowpan_header_create(struct sk_buff *skb, * from MAC subif of the 'dev' and 'real_dev' network devices, but * this isn't implemented in mainline yet, so currently we assign 0xff */ - { - mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA; - mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev); + mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA; + mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev); - /* prepare wpan address data */ - sa.addr_type = IEEE802154_ADDR_LONG; - sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); + /* prepare wpan address data */ + sa.addr_type = IEEE802154_ADDR_LONG; + sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); - memcpy(&(sa.hwaddr), saddr, 8); - /* intra-PAN communications */ - da.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); + memcpy(&(sa.hwaddr), saddr, 8); + /* intra-PAN communications */ + da.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); - /* - * if the destination address is the broadcast address, use the - * corresponding short address - */ - if (lowpan_is_addr_broadcast(daddr)) { - da.addr_type = IEEE802154_ADDR_SHORT; - da.short_addr = IEEE802154_ADDR_BROADCAST; - } else { - da.addr_type = IEEE802154_ADDR_LONG; - memcpy(&(da.hwaddr), daddr, IEEE802154_ADDR_LEN); - - /* request acknowledgment */ - mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ; - } + /* + * if the destination address is the broadcast address, use the + * corresponding short address + */ + if (lowpan_is_addr_broadcast(daddr)) { + da.addr_type = IEEE802154_ADDR_SHORT; + da.short_addr = IEEE802154_ADDR_BROADCAST; + } else { + da.addr_type = IEEE802154_ADDR_LONG; + memcpy(&(da.hwaddr), daddr, IEEE802154_ADDR_LEN); - return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, - type, (void *)&da, (void *)&sa, skb->len); + /* request acknowledgment */ + mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ; } + + return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, + type, (void *)&da, (void *)&sa, skb->len); } -static int lowpan_give_skb_to_devices(struct sk_buff *skb) +static int lowpan_give_skb_to_devices(struct sk_buff *skb, + struct net_device *dev) { struct lowpan_dev_record *entry; struct sk_buff *skb_cp; @@ -726,31 +191,6 @@ static int lowpan_give_skb_to_devices(struct sk_buff *skb) return stat; } -static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr) -{ - struct sk_buff *new; - int stat = NET_RX_SUCCESS; - - new = skb_copy_expand(skb, sizeof(struct ipv6hdr), skb_tailroom(skb), - GFP_ATOMIC); - kfree_skb(skb); - - if (!new) - return -ENOMEM; - - skb_push(new, sizeof(struct ipv6hdr)); - skb_copy_to_linear_data(new, hdr, sizeof(struct ipv6hdr)); - - new->protocol = htons(ETH_P_IPV6); - new->pkt_type = PACKET_HOST; - - stat = lowpan_give_skb_to_devices(new); - - kfree_skb(new); - - return stat; -} - static void lowpan_fragment_timer_expired(unsigned long entry_addr) { struct lowpan_fragment *entry = (struct lowpan_fragment *)entry_addr; @@ -814,16 +254,12 @@ frame_err: return NULL; } -static int -lowpan_process_data(struct sk_buff *skb) +static int process_data(struct sk_buff *skb) { - struct ipv6hdr hdr = {}; - u8 tmp, iphc0, iphc1, num_context = 0; + u8 iphc0, iphc1; const struct ieee802154_addr *_saddr, *_daddr; - int err; - lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data, - skb->len); + raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len); /* at least two bytes will be used for the encoding */ if (skb->len < 2) goto drop; @@ -925,162 +361,11 @@ lowpan_process_data(struct sk_buff *skb) _saddr = &mac_cb(skb)->sa; _daddr = &mac_cb(skb)->da; - pr_debug("iphc0 = %02x, iphc1 = %02x\n", iphc0, iphc1); - - /* another if the CID flag is set */ - if (iphc1 & LOWPAN_IPHC_CID) { - pr_debug("CID flag is set, increase header with one\n"); - if (lowpan_fetch_skb_u8(skb, &num_context)) - goto drop; - } - - hdr.version = 6; - - /* Traffic Class and Flow Label */ - switch ((iphc0 & LOWPAN_IPHC_TF) >> 3) { - /* - * Traffic Class and FLow Label carried in-line - * ECN + DSCP + 4-bit Pad + Flow Label (4 bytes) - */ - case 0: /* 00b */ - if (lowpan_fetch_skb_u8(skb, &tmp)) - goto drop; - - memcpy(&hdr.flow_lbl, &skb->data[0], 3); - skb_pull(skb, 3); - hdr.priority = ((tmp >> 2) & 0x0f); - hdr.flow_lbl[0] = ((tmp >> 2) & 0x30) | (tmp << 6) | - (hdr.flow_lbl[0] & 0x0f); - break; - /* - * Traffic class carried in-line - * ECN + DSCP (1 byte), Flow Label is elided - */ - case 2: /* 10b */ - if (lowpan_fetch_skb_u8(skb, &tmp)) - goto drop; - - hdr.priority = ((tmp >> 2) & 0x0f); - hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30); - break; - /* - * Flow Label carried in-line - * ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided - */ - case 1: /* 01b */ - if (lowpan_fetch_skb_u8(skb, &tmp)) - goto drop; - - hdr.flow_lbl[0] = (skb->data[0] & 0x0F) | ((tmp >> 2) & 0x30); - memcpy(&hdr.flow_lbl[1], &skb->data[0], 2); - skb_pull(skb, 2); - break; - /* Traffic Class and Flow Label are elided */ - case 3: /* 11b */ - break; - default: - break; - } - - /* Next Header */ - if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) { - /* Next header is carried inline */ - if (lowpan_fetch_skb_u8(skb, &(hdr.nexthdr))) - goto drop; - - pr_debug("NH flag is set, next header carried inline: %02x\n", - hdr.nexthdr); - } - - /* Hop Limit */ - if ((iphc0 & 0x03) != LOWPAN_IPHC_TTL_I) - hdr.hop_limit = lowpan_ttl_values[iphc0 & 0x03]; - else { - if (lowpan_fetch_skb_u8(skb, &(hdr.hop_limit))) - goto drop; - } - - /* Extract SAM to the tmp variable */ - tmp = ((iphc1 & LOWPAN_IPHC_SAM) >> LOWPAN_IPHC_SAM_BIT) & 0x03; - - if (iphc1 & LOWPAN_IPHC_SAC) { - /* Source address context based uncompression */ - pr_debug("SAC bit is set. Handle context based source address.\n"); - err = lowpan_uncompress_context_based_src_addr( - skb, &hdr.saddr, tmp); - } else { - /* Source address uncompression */ - pr_debug("source address stateless compression\n"); - err = lowpan_uncompress_addr(skb, &hdr.saddr, tmp, _saddr); - } - - /* Check on error of previous branch */ - if (err) - goto drop; - - /* Extract DAM to the tmp variable */ - tmp = ((iphc1 & LOWPAN_IPHC_DAM_11) >> LOWPAN_IPHC_DAM_BIT) & 0x03; - - /* check for Multicast Compression */ - if (iphc1 & LOWPAN_IPHC_M) { - if (iphc1 & LOWPAN_IPHC_DAC) { - pr_debug("dest: context-based mcast compression\n"); - /* TODO: implement this */ - } else { - err = lowpan_uncompress_multicast_daddr( - skb, &hdr.daddr, tmp); - if (err) - goto drop; - } - } else { - pr_debug("dest: stateless compression\n"); - err = lowpan_uncompress_addr(skb, &hdr.daddr, tmp, _daddr); - if (err) - goto drop; - } - - /* UDP data uncompression */ - if (iphc0 & LOWPAN_IPHC_NH_C) { - struct udphdr uh; - struct sk_buff *new; - if (lowpan_uncompress_udp_header(skb, &uh)) - goto drop; - - /* - * replace the compressed UDP head by the uncompressed UDP - * header - */ - new = skb_copy_expand(skb, sizeof(struct udphdr), - skb_tailroom(skb), GFP_ATOMIC); - kfree_skb(skb); - - if (!new) - return -ENOMEM; - - skb = new; - - skb_push(skb, sizeof(struct udphdr)); - skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr)); - - lowpan_raw_dump_table(__func__, "raw UDP header dump", - (u8 *)&uh, sizeof(uh)); - - hdr.nexthdr = UIP_PROTO_UDP; - } - - /* Not fragmented package */ - hdr.payload_len = htons(skb->len); - - pr_debug("skb headroom size = %d, data length = %d\n", - skb_headroom(skb), skb->len); - - pr_debug("IPv6 header dump:\n\tversion = %d\n\tlength = %d\n\t" - "nexthdr = 0x%02x\n\thop_lim = %d\n", hdr.version, - ntohs(hdr.payload_len), hdr.nexthdr, hdr.hop_limit); - - lowpan_raw_dump_table(__func__, "raw header dump", (u8 *)&hdr, - sizeof(hdr)); - return lowpan_skb_deliver(skb, &hdr); + return lowpan_process_data(skb, skb->dev, (u8 *)_saddr->hwaddr, + _saddr->addr_type, IEEE802154_ADDR_LEN, + (u8 *)_daddr->hwaddr, _daddr->addr_type, + IEEE802154_ADDR_LEN, iphc0, iphc1, + lowpan_give_skb_to_devices); unlock_and_drop: spin_unlock_bh(&flist_lock); @@ -1112,7 +397,7 @@ lowpan_fragment_xmit(struct sk_buff *skb, u8 *head, hlen = (type == LOWPAN_DISPATCH_FRAG1) ? LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE; - lowpan_raw_dump_inline(__func__, "6lowpan fragment header", head, hlen); + raw_dump_inline(__func__, "6lowpan fragment header", head, hlen); frag = netdev_alloc_skb(skb->dev, hlen + mlen + plen + IEEE802154_MFR_SIZE); @@ -1132,8 +417,7 @@ lowpan_fragment_xmit(struct sk_buff *skb, u8 *head, skb_copy_to_linear_data_offset(frag, mlen + hlen, skb_network_header(skb) + offset, plen); - lowpan_raw_dump_table(__func__, " raw fragment dump", frag->data, - frag->len); + raw_dump_table(__func__, " raw fragment dump", frag->data, frag->len); return dev_queue_xmit(frag); } @@ -1316,7 +600,7 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, /* Pull off the 1-byte of 6lowpan header. */ skb_pull(local_skb, 1); - lowpan_give_skb_to_devices(local_skb); + lowpan_give_skb_to_devices(local_skb, NULL); kfree_skb(local_skb); kfree_skb(skb); @@ -1328,7 +612,7 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, local_skb = skb_clone(skb, GFP_ATOMIC); if (!local_skb) goto drop; - lowpan_process_data(local_skb); + process_data(local_skb); kfree_skb(skb); break; diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h index 2869c0526da..2b835db3bda 100644 --- a/net/ieee802154/6lowpan.h +++ b/net/ieee802154/6lowpan.h @@ -231,6 +231,61 @@ #define LOWPAN_NHC_UDP_CS_P_10 0xF2 /* source = 0xF0 + 8bit inline, dest = 16 bit inline */ #define LOWPAN_NHC_UDP_CS_P_11 0xF3 /* source & dest = 0xF0B + 4bit inline */ +#define LOWPAN_NHC_UDP_CS_C 0x04 /* checksum elided */ + +#ifdef DEBUG +/* print data in line */ +static inline void raw_dump_inline(const char *caller, char *msg, + unsigned char *buf, int len) +{ + if (msg) + pr_debug("%s():%s: ", caller, msg); + + print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, buf, len, false); +} + +/* print data in a table format: + * + * addr: xx xx xx xx xx xx + * addr: xx xx xx xx xx xx + * ... + */ +static inline void raw_dump_table(const char *caller, char *msg, + unsigned char *buf, int len) +{ + if (msg) + pr_debug("%s():%s:\n", caller, msg); + + print_hex_dump_debug("\t", DUMP_PREFIX_OFFSET, 16, 1, buf, len, false); +} +#else +static inline void raw_dump_table(const char *caller, char *msg, + unsigned char *buf, int len) { } +static inline void raw_dump_inline(const char *caller, char *msg, + unsigned char *buf, int len) { } +#endif + +static inline int lowpan_fetch_skb_u8(struct sk_buff *skb, u8 *val) +{ + if (unlikely(!pskb_may_pull(skb, 1))) + return -EINVAL; + + *val = skb->data[0]; + skb_pull(skb, 1); + + return 0; +} + +static inline int lowpan_fetch_skb_u16(struct sk_buff *skb, u16 *val) +{ + if (unlikely(!pskb_may_pull(skb, 2))) + return -EINVAL; + + *val = (skb->data[0] << 8) | skb->data[1]; + skb_pull(skb, 2); + + return 0; +} static inline bool lowpan_fetch_skb(struct sk_buff *skb, void *data, const unsigned int len) @@ -244,4 +299,21 @@ static inline bool lowpan_fetch_skb(struct sk_buff *skb, return false; } +static inline void lowpan_push_hc_data(u8 **hc_ptr, const void *data, + const size_t len) +{ + memcpy(*hc_ptr, data, len); + *hc_ptr += len; +} + +typedef int (*skb_delivery_cb)(struct sk_buff *skb, struct net_device *dev); + +int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, + const u8 *saddr, const u8 saddr_type, const u8 saddr_len, + const u8 *daddr, const u8 daddr_type, const u8 daddr_len, + u8 iphc0, u8 iphc1, skb_delivery_cb skb_deliver); +int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *_daddr, + const void *_saddr, unsigned int len); + #endif /* __6LOWPAN_H__ */ diff --git a/net/ieee802154/6lowpan_iphc.c b/net/ieee802154/6lowpan_iphc.c new file mode 100644 index 00000000000..083f905bf10 --- /dev/null +++ b/net/ieee802154/6lowpan_iphc.c @@ -0,0 +1,802 @@ +/* + * Copyright 2011, Siemens AG + * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com> + */ + +/* + * Based on patches from Jon Smirl <jonsmirl@gmail.com> + * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* Jon's code is based on 6lowpan implementation for Contiki which is: + * Copyright (c) 2008, Swedish Institute of Computer Science. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <linux/bitops.h> +#include <linux/if_arp.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <net/ipv6.h> +#include <net/af_ieee802154.h> + +#include "6lowpan.h" + +/* + * Uncompress address function for source and + * destination address(non-multicast). + * + * address_mode is sam value or dam value. + */ +static int uncompress_addr(struct sk_buff *skb, + struct in6_addr *ipaddr, const u8 address_mode, + const u8 *lladdr, const u8 addr_type, + const u8 addr_len) +{ + bool fail; + + switch (address_mode) { + case LOWPAN_IPHC_ADDR_00: + /* for global link addresses */ + fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16); + break; + case LOWPAN_IPHC_ADDR_01: + /* fe:80::XXXX:XXXX:XXXX:XXXX */ + ipaddr->s6_addr[0] = 0xFE; + ipaddr->s6_addr[1] = 0x80; + fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[8], 8); + break; + case LOWPAN_IPHC_ADDR_02: + /* fe:80::ff:fe00:XXXX */ + ipaddr->s6_addr[0] = 0xFE; + ipaddr->s6_addr[1] = 0x80; + ipaddr->s6_addr[11] = 0xFF; + ipaddr->s6_addr[12] = 0xFE; + fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[14], 2); + break; + case LOWPAN_IPHC_ADDR_03: + fail = false; + switch (addr_type) { + case IEEE802154_ADDR_LONG: + /* fe:80::XXXX:XXXX:XXXX:XXXX + * \_________________/ + * hwaddr + */ + ipaddr->s6_addr[0] = 0xFE; + ipaddr->s6_addr[1] = 0x80; + memcpy(&ipaddr->s6_addr[8], lladdr, addr_len); + /* second bit-flip (Universe/Local) + * is done according RFC2464 + */ + ipaddr->s6_addr[8] ^= 0x02; + break; + case IEEE802154_ADDR_SHORT: + /* fe:80::ff:fe00:XXXX + * \__/ + * short_addr + * + * Universe/Local bit is zero. + */ + ipaddr->s6_addr[0] = 0xFE; + ipaddr->s6_addr[1] = 0x80; + ipaddr->s6_addr[11] = 0xFF; + ipaddr->s6_addr[12] = 0xFE; + ipaddr->s6_addr16[7] = htons(*((u16 *)lladdr)); + break; + default: + pr_debug("Invalid addr_type set\n"); + return -EINVAL; + } + break; + default: + pr_debug("Invalid address mode value: 0x%x\n", address_mode); + return -EINVAL; + } + + if (fail) { + pr_debug("Failed to fetch skb data\n"); + return -EIO; + } + + raw_dump_inline(NULL, "Reconstructed ipv6 addr is", + ipaddr->s6_addr, 16); + + return 0; +} + +/* + * Uncompress address function for source context + * based address(non-multicast). + */ +static int uncompress_context_based_src_addr(struct sk_buff *skb, + struct in6_addr *ipaddr, + const u8 sam) +{ + switch (sam) { + case LOWPAN_IPHC_ADDR_00: + /* unspec address :: + * Do nothing, address is already :: + */ + break; + case LOWPAN_IPHC_ADDR_01: + /* TODO */ + case LOWPAN_IPHC_ADDR_02: + /* TODO */ + case LOWPAN_IPHC_ADDR_03: + /* TODO */ + netdev_warn(skb->dev, "SAM value 0x%x not supported\n", sam); + return -EINVAL; + default: + pr_debug("Invalid sam value: 0x%x\n", sam); + return -EINVAL; + } + + raw_dump_inline(NULL, + "Reconstructed context based ipv6 src addr is", + ipaddr->s6_addr, 16); + + return 0; +} + +static int skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr, + struct net_device *dev, skb_delivery_cb deliver_skb) +{ + struct sk_buff *new; + int stat; + + new = skb_copy_expand(skb, sizeof(struct ipv6hdr), skb_tailroom(skb), + GFP_ATOMIC); + kfree_skb(skb); + + if (!new) + return -ENOMEM; + + skb_push(new, sizeof(struct ipv6hdr)); + skb_reset_network_header(new); + skb_copy_to_linear_data(new, hdr, sizeof(struct ipv6hdr)); + + new->protocol = htons(ETH_P_IPV6); + new->pkt_type = PACKET_HOST; + new->dev = dev; + + raw_dump_table(__func__, "raw skb data dump before receiving", + new->data, new->len); + + stat = deliver_skb(new, dev); + + kfree_skb(new); + + return stat; +} + +/* Uncompress function for multicast destination address, + * when M bit is set. + */ +static int +lowpan_uncompress_multicast_daddr(struct sk_buff *skb, + struct in6_addr *ipaddr, + const u8 dam) +{ + bool fail; + + switch (dam) { + case LOWPAN_IPHC_DAM_00: + /* 00: 128 bits. The full address + * is carried in-line. + */ + fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16); + break; + case LOWPAN_IPHC_DAM_01: + /* 01: 48 bits. The address takes + * the form ffXX::00XX:XXXX:XXXX. + */ + ipaddr->s6_addr[0] = 0xFF; + fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 1); + fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[11], 5); + break; + case LOWPAN_IPHC_DAM_10: + /* 10: 32 bits. The address takes + * the form ffXX::00XX:XXXX. + */ + ipaddr->s6_addr[0] = 0xFF; + fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 1); + fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[13], 3); + break; + case LOWPAN_IPHC_DAM_11: + /* 11: 8 bits. The address takes + * the form ff02::00XX. + */ + ipaddr->s6_addr[0] = 0xFF; + ipaddr->s6_addr[1] = 0x02; + fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[15], 1); + break; + default: + pr_debug("DAM value has a wrong value: 0x%x\n", dam); + return -EINVAL; + } + + if (fail) { + pr_debug("Failed to fetch skb data\n"); + return -EIO; + } + + raw_dump_inline(NULL, "Reconstructed ipv6 multicast addr is", + ipaddr->s6_addr, 16); + + return 0; +} + +static int +uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh) +{ + bool fail; + u8 tmp = 0, val = 0; + + if (!uh) + goto err; + + fail = lowpan_fetch_skb(skb, &tmp, 1); + + if ((tmp & LOWPAN_NHC_UDP_MASK) == LOWPAN_NHC_UDP_ID) { + pr_debug("UDP header uncompression\n"); + switch (tmp & LOWPAN_NHC_UDP_CS_P_11) { + case LOWPAN_NHC_UDP_CS_P_00: + fail |= lowpan_fetch_skb(skb, &uh->source, 2); + fail |= lowpan_fetch_skb(skb, &uh->dest, 2); + break; + case LOWPAN_NHC_UDP_CS_P_01: + fail |= lowpan_fetch_skb(skb, &uh->source, 2); + fail |= lowpan_fetch_skb(skb, &val, 1); + uh->dest = htons(val + LOWPAN_NHC_UDP_8BIT_PORT); + break; + case LOWPAN_NHC_UDP_CS_P_10: + fail |= lowpan_fetch_skb(skb, &val, 1); + uh->source = htons(val + LOWPAN_NHC_UDP_8BIT_PORT); + fail |= lowpan_fetch_skb(skb, &uh->dest, 2); + break; + case LOWPAN_NHC_UDP_CS_P_11: + fail |= lowpan_fetch_skb(skb, &val, 1); + uh->source = htons(LOWPAN_NHC_UDP_4BIT_PORT + + (val >> 4)); + uh->dest = htons(LOWPAN_NHC_UDP_4BIT_PORT + + (val & 0x0f)); + break; + default: + pr_debug("ERROR: unknown UDP format\n"); + goto err; + break; + } + + pr_debug("uncompressed UDP ports: src = %d, dst = %d\n", + ntohs(uh->source), ntohs(uh->dest)); + + /* checksum */ + if (tmp & LOWPAN_NHC_UDP_CS_C) { + pr_debug_ratelimited("checksum elided currently not supported\n"); + goto err; + } else { + fail |= lowpan_fetch_skb(skb, &uh->check, 2); + } + + /* + * UDP lenght needs to be infered from the lower layers + * here, we obtain the hint from the remaining size of the + * frame + */ + uh->len = htons(skb->len + sizeof(struct udphdr)); + pr_debug("uncompressed UDP length: src = %d", ntohs(uh->len)); + } else { + pr_debug("ERROR: unsupported NH format\n"); + goto err; + } + + if (fail) + goto err; + + return 0; +err: + return -EINVAL; +} + +/* TTL uncompression values */ +static const u8 lowpan_ttl_values[] = { 0, 1, 64, 255 }; + +int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, + const u8 *saddr, const u8 saddr_type, const u8 saddr_len, + const u8 *daddr, const u8 daddr_type, const u8 daddr_len, + u8 iphc0, u8 iphc1, skb_delivery_cb deliver_skb) +{ + struct ipv6hdr hdr = {}; + u8 tmp, num_context = 0; + int err; + + raw_dump_table(__func__, "raw skb data dump uncompressed", + skb->data, skb->len); + + /* another if the CID flag is set */ + if (iphc1 & LOWPAN_IPHC_CID) { + pr_debug("CID flag is set, increase header with one\n"); + if (lowpan_fetch_skb_u8(skb, &num_context)) + goto drop; + } + + hdr.version = 6; + + /* Traffic Class and Flow Label */ + switch ((iphc0 & LOWPAN_IPHC_TF) >> 3) { + /* + * Traffic Class and FLow Label carried in-line + * ECN + DSCP + 4-bit Pad + Flow Label (4 bytes) + */ + case 0: /* 00b */ + if (lowpan_fetch_skb_u8(skb, &tmp)) + goto drop; + + memcpy(&hdr.flow_lbl, &skb->data[0], 3); + skb_pull(skb, 3); + hdr.priority = ((tmp >> 2) & 0x0f); + hdr.flow_lbl[0] = ((tmp >> 2) & 0x30) | (tmp << 6) | + (hdr.flow_lbl[0] & 0x0f); + break; + /* + * Traffic class carried in-line + * ECN + DSCP (1 byte), Flow Label is elided + */ + case 2: /* 10b */ + if (lowpan_fetch_skb_u8(skb, &tmp)) + goto drop; + + hdr.priority = ((tmp >> 2) & 0x0f); + hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30); + break; + /* + * Flow Label carried in-line + * ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided + */ + case 1: /* 01b */ + if (lowpan_fetch_skb_u8(skb, &tmp)) + goto drop; + + hdr.flow_lbl[0] = (skb->data[0] & 0x0F) | ((tmp >> 2) & 0x30); + memcpy(&hdr.flow_lbl[1], &skb->data[0], 2); + skb_pull(skb, 2); + break; + /* Traffic Class and Flow Label are elided */ + case 3: /* 11b */ + break; + default: + break; + } + + /* Next Header */ + if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) { + /* Next header is carried inline */ + if (lowpan_fetch_skb_u8(skb, &(hdr.nexthdr))) + goto drop; + + pr_debug("NH flag is set, next header carried inline: %02x\n", + hdr.nexthdr); + } + + /* Hop Limit */ + if ((iphc0 & 0x03) != LOWPAN_IPHC_TTL_I) + hdr.hop_limit = lowpan_ttl_values[iphc0 & 0x03]; + else { + if (lowpan_fetch_skb_u8(skb, &(hdr.hop_limit))) + goto drop; + } + + /* Extract SAM to the tmp variable */ + tmp = ((iphc1 & LOWPAN_IPHC_SAM) >> LOWPAN_IPHC_SAM_BIT) & 0x03; + + if (iphc1 & LOWPAN_IPHC_SAC) { + /* Source address context based uncompression */ + pr_debug("SAC bit is set. Handle context based source address.\n"); + err = uncompress_context_based_src_addr( + skb, &hdr.saddr, tmp); + } else { + /* Source address uncompression */ + pr_debug("source address stateless compression\n"); + err = uncompress_addr(skb, &hdr.saddr, tmp, saddr, + saddr_type, saddr_len); + } + + /* Check on error of previous branch */ + if (err) + goto drop; + + /* Extract DAM to the tmp variable */ + tmp = ((iphc1 & LOWPAN_IPHC_DAM_11) >> LOWPAN_IPHC_DAM_BIT) & 0x03; + + /* check for Multicast Compression */ + if (iphc1 & LOWPAN_IPHC_M) { + if (iphc1 & LOWPAN_IPHC_DAC) { + pr_debug("dest: context-based mcast compression\n"); + /* TODO: implement this */ + } else { + err = lowpan_uncompress_multicast_daddr( + skb, &hdr.daddr, tmp); + if (err) + goto drop; + } + } else { + err = uncompress_addr(skb, &hdr.daddr, tmp, daddr, + daddr_type, daddr_len); + pr_debug("dest: stateless compression mode %d dest %pI6c\n", + tmp, &hdr.daddr); + if (err) + goto drop; + } + + /* UDP data uncompression */ + if (iphc0 & LOWPAN_IPHC_NH_C) { + struct udphdr uh; + struct sk_buff *new; + if (uncompress_udp_header(skb, &uh)) + goto drop; + + /* + * replace the compressed UDP head by the uncompressed UDP + * header + */ + new = skb_copy_expand(skb, sizeof(struct udphdr), + skb_tailroom(skb), GFP_ATOMIC); + kfree_skb(skb); + + if (!new) + return -ENOMEM; + + skb = new; + + skb_push(skb, sizeof(struct udphdr)); + skb_reset_transport_header(skb); + skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr)); + + raw_dump_table(__func__, "raw UDP header dump", + (u8 *)&uh, sizeof(uh)); + + hdr.nexthdr = UIP_PROTO_UDP; + } + + hdr.payload_len = htons(skb->len); + + pr_debug("skb headroom size = %d, data length = %d\n", + skb_headroom(skb), skb->len); + + pr_debug("IPv6 header dump:\n\tversion = %d\n\tlength = %d\n\t" + "nexthdr = 0x%02x\n\thop_lim = %d\n\tdest = %pI6c\n", + hdr.version, ntohs(hdr.payload_len), hdr.nexthdr, + hdr.hop_limit, &hdr.daddr); + + raw_dump_table(__func__, "raw header dump", (u8 *)&hdr, + sizeof(hdr)); + + return skb_deliver(skb, &hdr, dev, deliver_skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} +EXPORT_SYMBOL_GPL(lowpan_process_data); + +static u8 lowpan_compress_addr_64(u8 **hc06_ptr, u8 shift, + const struct in6_addr *ipaddr, + const unsigned char *lladdr) +{ + u8 val = 0; + + if (is_addr_mac_addr_based(ipaddr, lladdr)) { + val = 3; /* 0-bits */ + pr_debug("address compression 0 bits\n"); + } else if (lowpan_is_iid_16_bit_compressable(ipaddr)) { + /* compress IID to 16 bits xxxx::XXXX */ + memcpy(*hc06_ptr, &ipaddr->s6_addr16[7], 2); + *hc06_ptr += 2; + val = 2; /* 16-bits */ + raw_dump_inline(NULL, "Compressed ipv6 addr is (16 bits)", + *hc06_ptr - 2, 2); + } else { + /* do not compress IID => xxxx::IID */ + memcpy(*hc06_ptr, &ipaddr->s6_addr16[4], 8); + *hc06_ptr += 8; + val = 1; /* 64-bits */ + raw_dump_inline(NULL, "Compressed ipv6 addr is (64 bits)", + *hc06_ptr - 8, 8); + } + + return rol8(val, shift); +} + +static void compress_udp_header(u8 **hc06_ptr, struct sk_buff *skb) +{ + struct udphdr *uh = udp_hdr(skb); + u8 tmp; + + if (((ntohs(uh->source) & LOWPAN_NHC_UDP_4BIT_MASK) == + LOWPAN_NHC_UDP_4BIT_PORT) && + ((ntohs(uh->dest) & LOWPAN_NHC_UDP_4BIT_MASK) == + LOWPAN_NHC_UDP_4BIT_PORT)) { + pr_debug("UDP header: both ports compression to 4 bits\n"); + /* compression value */ + tmp = LOWPAN_NHC_UDP_CS_P_11; + lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp)); + /* source and destination port */ + tmp = ntohs(uh->dest) - LOWPAN_NHC_UDP_4BIT_PORT + + ((ntohs(uh->source) - LOWPAN_NHC_UDP_4BIT_PORT) << 4); + lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp)); + } else if ((ntohs(uh->dest) & LOWPAN_NHC_UDP_8BIT_MASK) == + LOWPAN_NHC_UDP_8BIT_PORT) { + pr_debug("UDP header: remove 8 bits of dest\n"); + /* compression value */ + tmp = LOWPAN_NHC_UDP_CS_P_01; + lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp)); + /* source port */ + lowpan_push_hc_data(hc06_ptr, &uh->source, sizeof(uh->source)); + /* destination port */ + tmp = ntohs(uh->dest) - LOWPAN_NHC_UDP_8BIT_PORT; + lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp)); + } else if ((ntohs(uh->source) & LOWPAN_NHC_UDP_8BIT_MASK) == + LOWPAN_NHC_UDP_8BIT_PORT) { + pr_debug("UDP header: remove 8 bits of source\n"); + /* compression value */ + tmp = LOWPAN_NHC_UDP_CS_P_10; + lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp)); + /* source port */ + tmp = ntohs(uh->source) - LOWPAN_NHC_UDP_8BIT_PORT; + lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp)); + /* destination port */ + lowpan_push_hc_data(hc06_ptr, &uh->dest, sizeof(uh->dest)); + } else { + pr_debug("UDP header: can't compress\n"); + /* compression value */ + tmp = LOWPAN_NHC_UDP_CS_P_00; + lowpan_push_hc_data(hc06_ptr, &tmp, sizeof(tmp)); + /* source port */ + lowpan_push_hc_data(hc06_ptr, &uh->source, sizeof(uh->source)); + /* destination port */ + lowpan_push_hc_data(hc06_ptr, &uh->dest, sizeof(uh->dest)); + } + + /* checksum is always inline */ + lowpan_push_hc_data(hc06_ptr, &uh->check, sizeof(uh->check)); + + /* skip the UDP header */ + skb_pull(skb, sizeof(struct udphdr)); +} + +int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *_daddr, + const void *_saddr, unsigned int len) +{ + u8 tmp, iphc0, iphc1, *hc06_ptr; + struct ipv6hdr *hdr; + u8 head[100] = {}; + + if (type != ETH_P_IPV6) + return -EINVAL; + + hdr = ipv6_hdr(skb); + hc06_ptr = head + 2; + + pr_debug("IPv6 header dump:\n\tversion = %d\n\tlength = %d\n" + "\tnexthdr = 0x%02x\n\thop_lim = %d\n\tdest = %pI6c\n", + hdr->version, ntohs(hdr->payload_len), hdr->nexthdr, + hdr->hop_limit, &hdr->daddr); + + raw_dump_table(__func__, "raw skb network header dump", + skb_network_header(skb), sizeof(struct ipv6hdr)); + + /* + * As we copy some bit-length fields, in the IPHC encoding bytes, + * we sometimes use |= + * If the field is 0, and the current bit value in memory is 1, + * this does not work. We therefore reset the IPHC encoding here + */ + iphc0 = LOWPAN_DISPATCH_IPHC; + iphc1 = 0; + + /* TODO: context lookup */ + + raw_dump_inline(__func__, "saddr", + (unsigned char *)_saddr, IEEE802154_ADDR_LEN); + raw_dump_inline(__func__, "daddr", + (unsigned char *)_daddr, IEEE802154_ADDR_LEN); + + raw_dump_table(__func__, + "sending raw skb network uncompressed packet", + skb->data, skb->len); + + /* + * Traffic class, flow label + * If flow label is 0, compress it. If traffic class is 0, compress it + * We have to process both in the same time as the offset of traffic + * class depends on the presence of version and flow label + */ + + /* hc06 format of TC is ECN | DSCP , original one is DSCP | ECN */ + tmp = (hdr->priority << 4) | (hdr->flow_lbl[0] >> 4); + tmp = ((tmp & 0x03) << 6) | (tmp >> 2); + + if (((hdr->flow_lbl[0] & 0x0F) == 0) && + (hdr->flow_lbl[1] == 0) && (hdr->flow_lbl[2] == 0)) { + /* flow label can be compressed */ + iphc0 |= LOWPAN_IPHC_FL_C; + if ((hdr->priority == 0) && + ((hdr->flow_lbl[0] & 0xF0) == 0)) { + /* compress (elide) all */ + iphc0 |= LOWPAN_IPHC_TC_C; + } else { + /* compress only the flow label */ + *hc06_ptr = tmp; + hc06_ptr += 1; + } + } else { + /* Flow label cannot be compressed */ + if ((hdr->priority == 0) && + ((hdr->flow_lbl[0] & 0xF0) == 0)) { + /* compress only traffic class */ + iphc0 |= LOWPAN_IPHC_TC_C; + *hc06_ptr = (tmp & 0xc0) | (hdr->flow_lbl[0] & 0x0F); + memcpy(hc06_ptr + 1, &hdr->flow_lbl[1], 2); + hc06_ptr += 3; + } else { + /* compress nothing */ + memcpy(hc06_ptr, &hdr, 4); + /* replace the top byte with new ECN | DSCP format */ + *hc06_ptr = tmp; + hc06_ptr += 4; + } + } + + /* NOTE: payload length is always compressed */ + + /* Next Header is compress if UDP */ + if (hdr->nexthdr == UIP_PROTO_UDP) + iphc0 |= LOWPAN_IPHC_NH_C; + + if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) { + *hc06_ptr = hdr->nexthdr; + hc06_ptr += 1; + } + + /* + * Hop limit + * if 1: compress, encoding is 01 + * if 64: compress, encoding is 10 + * if 255: compress, encoding is 11 + * else do not compress + */ + switch (hdr->hop_limit) { + case 1: + iphc0 |= LOWPAN_IPHC_TTL_1; + break; + case 64: + iphc0 |= LOWPAN_IPHC_TTL_64; + break; + case 255: + iphc0 |= LOWPAN_IPHC_TTL_255; + break; + default: + *hc06_ptr = hdr->hop_limit; + hc06_ptr += 1; + break; + } + + /* source address compression */ + if (is_addr_unspecified(&hdr->saddr)) { + pr_debug("source address is unspecified, setting SAC\n"); + iphc1 |= LOWPAN_IPHC_SAC; + /* TODO: context lookup */ + } else if (is_addr_link_local(&hdr->saddr)) { + iphc1 |= lowpan_compress_addr_64(&hc06_ptr, + LOWPAN_IPHC_SAM_BIT, &hdr->saddr, _saddr); + pr_debug("source address unicast link-local %pI6c " + "iphc1 0x%02x\n", &hdr->saddr, iphc1); + } else { + pr_debug("send the full source address\n"); + memcpy(hc06_ptr, &hdr->saddr.s6_addr16[0], 16); + hc06_ptr += 16; + } + + /* destination address compression */ + if (is_addr_mcast(&hdr->daddr)) { + pr_debug("destination address is multicast: "); + iphc1 |= LOWPAN_IPHC_M; + if (lowpan_is_mcast_addr_compressable8(&hdr->daddr)) { + pr_debug("compressed to 1 octet\n"); + iphc1 |= LOWPAN_IPHC_DAM_11; + /* use last byte */ + *hc06_ptr = hdr->daddr.s6_addr[15]; + hc06_ptr += 1; + } else if (lowpan_is_mcast_addr_compressable32(&hdr->daddr)) { + pr_debug("compressed to 4 octets\n"); + iphc1 |= LOWPAN_IPHC_DAM_10; + /* second byte + the last three */ + *hc06_ptr = hdr->daddr.s6_addr[1]; + memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[13], 3); + hc06_ptr += 4; + } else if (lowpan_is_mcast_addr_compressable48(&hdr->daddr)) { + pr_debug("compressed to 6 octets\n"); + iphc1 |= LOWPAN_IPHC_DAM_01; + /* second byte + the last five */ + *hc06_ptr = hdr->daddr.s6_addr[1]; + memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[11], 5); + hc06_ptr += 6; + } else { + pr_debug("using full address\n"); + iphc1 |= LOWPAN_IPHC_DAM_00; + memcpy(hc06_ptr, &hdr->daddr.s6_addr[0], 16); + hc06_ptr += 16; + } + } else { + /* TODO: context lookup */ + if (is_addr_link_local(&hdr->daddr)) { + iphc1 |= lowpan_compress_addr_64(&hc06_ptr, + LOWPAN_IPHC_DAM_BIT, &hdr->daddr, _daddr); + pr_debug("dest address unicast link-local %pI6c " + "iphc1 0x%02x\n", &hdr->daddr, iphc1); + } else { + pr_debug("dest address unicast %pI6c\n", &hdr->daddr); + memcpy(hc06_ptr, &hdr->daddr.s6_addr16[0], 16); + hc06_ptr += 16; + } + } + + /* UDP header compression */ + if (hdr->nexthdr == UIP_PROTO_UDP) + compress_udp_header(&hc06_ptr, skb); + + head[0] = iphc0; + head[1] = iphc1; + + skb_pull(skb, sizeof(struct ipv6hdr)); + skb_reset_transport_header(skb); + memcpy(skb_push(skb, hc06_ptr - head), head, hc06_ptr - head); + skb_reset_network_header(skb); + + pr_debug("header len %d skb %u\n", (int)(hc06_ptr - head), skb->len); + + raw_dump_table(__func__, "raw skb data dump compressed", + skb->data, skb->len); + return 0; +} +EXPORT_SYMBOL_GPL(lowpan_header_compress); + +MODULE_LICENSE("GPL"); diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig index b2e06df0076..9c9879d5ea6 100644 --- a/net/ieee802154/Kconfig +++ b/net/ieee802154/Kconfig @@ -13,5 +13,12 @@ config IEEE802154 config IEEE802154_6LOWPAN tristate "6lowpan support over IEEE 802.15.4" depends on IEEE802154 && IPV6 + select 6LOWPAN_IPHC ---help--- IPv6 compression over IEEE 802.15.4. + +config 6LOWPAN_IPHC + tristate + ---help--- + 6lowpan compression code which is shared between IEEE 802.15.4 and Bluetooth + stacks. diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile index d7716d64c6b..e8f05885ced 100644 --- a/net/ieee802154/Makefile +++ b/net/ieee802154/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o obj-$(CONFIG_IEEE802154_6LOWPAN) += 6lowpan.o +obj-$(CONFIG_6LOWPAN_IPHC) += 6lowpan_iphc.o ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o af_802154-y := af_ieee802154.o raw.o dgram.o diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index 1865fdf5a5a..1846c1fe0d0 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -291,9 +291,7 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, size_t copied = 0; int err = -EOPNOTSUPP; struct sk_buff *skb; - struct sockaddr_ieee802154 *saddr; - - saddr = (struct sockaddr_ieee802154 *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_ieee802154 *, saddr, msg->msg_name); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c index ef56ab5b35f..4dd37615a74 100644 --- a/net/ieee802154/wpan-class.c +++ b/net/ieee802154/wpan-class.c @@ -46,7 +46,7 @@ MASTER_SHOW(current_channel, "%d"); MASTER_SHOW(current_page, "%d"); MASTER_SHOW_COMPLEX(transmit_power, "%d +- %d dB", ((signed char) (phy->transmit_power << 2)) >> 2, - (phy->transmit_power >> 6) ? (phy->transmit_power >> 6) * 3 : 1 ); + (phy->transmit_power >> 6) ? (phy->transmit_power >> 6) * 3 : 1); MASTER_SHOW(cca_mode, "%d"); static ssize_t channels_supported_show(struct device *dev, diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 4b81e91c80f..f8c49ce5b28 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -11,7 +11,7 @@ obj-y := route.o inetpeer.o protocol.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ - inet_fragment.o ping.o ip_tunnel_core.o + inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o @@ -19,7 +19,7 @@ obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o obj-$(CONFIG_IP_MROUTE) += ipmr.o obj-$(CONFIG_NET_IPIP) += ipip.o -gre-y := gre_demux.o gre_offload.o +gre-y := gre_demux.o obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o obj-$(CONFIG_NET_IPGRE) += ip_gre.o obj-$(CONFIG_NET_IPVTI) += ip_vti.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 70011e029ac..ecd2c3f245c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -126,9 +126,6 @@ static struct list_head inetsw[SOCK_MAX]; static DEFINE_SPINLOCK(inetsw_lock); -struct ipv4_config ipv4_config; -EXPORT_SYMBOL(ipv4_config); - /* New destruction routine */ void inet_sock_destruct(struct sock *sk) @@ -342,7 +339,7 @@ lookup_protocol: inet->hdrincl = 1; } - if (ipv4_config.no_pmtu_disc) + if (net->ipv4.sysctl_ip_no_pmtu_disc) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; @@ -1133,7 +1130,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) fl4 = &inet->cork.fl.u.ip4; rt = ip_route_connect(fl4, daddr, 0, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, sk->sk_protocol, - inet->inet_sport, inet->inet_dport, sk, false); + inet->inet_sport, inet->inet_dport, sk); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -1377,8 +1374,12 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, if (!NAPI_GRO_CB(p)->same_flow) continue; - iph2 = ip_hdr(p); - + iph2 = (struct iphdr *)(p->data + off); + /* The above works because, with the exception of the top + * (inner most) layer, we only aggregate pkts with the same + * hdr length so all the hdrs we'll need to verify will start + * at the same offset. + */ if ((iph->protocol ^ iph2->protocol) | ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { @@ -1390,13 +1391,24 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, NAPI_GRO_CB(p)->flush |= (iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | - (__force int)((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)) | - ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); + ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)); + /* Save the IP ID check to be included later when we get to + * the transport layer so only the inner most IP ID is checked. + * This is because some GSO/TSO implementations do not + * correctly increment the IP ID for the outer hdrs. + */ + NAPI_GRO_CB(p)->flush_id = + ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; } NAPI_GRO_CB(skb)->flush |= flush; + skb_set_network_header(skb, off); + /* The above will be needed by the transport layer if there is one + * immediately following this IP hdr. + */ + skb_gro_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb)); @@ -1411,10 +1423,10 @@ out: return pp; } -static int inet_gro_complete(struct sk_buff *skb) +static int inet_gro_complete(struct sk_buff *skb, int nhoff) { - __be16 newlen = htons(skb->len - skb_network_offset(skb)); - struct iphdr *iph = ip_hdr(skb); + __be16 newlen = htons(skb->len - nhoff); + struct iphdr *iph = (struct iphdr *)(skb->data + nhoff); const struct net_offload *ops; int proto = iph->protocol; int err = -ENOSYS; @@ -1427,7 +1439,11 @@ static int inet_gro_complete(struct sk_buff *skb) if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out_unlock; - err = ops->callbacks.gro_complete(skb); + /* Only need to add sizeof(*iph) to get to the next hdr below + * because any hdr with option will have been flushed in + * inet_gro_receive(). + */ + err = ops->callbacks.gro_complete(skb, nhoff + sizeof(*iph)); out_unlock: rcu_read_unlock(); @@ -1529,6 +1545,7 @@ static const struct net_protocol tcp_protocol = { .err_handler = tcp_v4_err, .no_policy = 1, .netns_ok = 1, + .icmp_strict_tag_validation = 1, }; static const struct net_protocol udp_protocol = { diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 7808093cede..1a9b99e0446 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -166,18 +166,20 @@ struct neigh_table arp_tbl = { .id = "arp_cache", .parms = { .tbl = &arp_tbl, - .base_reachable_time = 30 * HZ, - .retrans_time = 1 * HZ, - .gc_staletime = 60 * HZ, .reachable_time = 30 * HZ, - .delay_probe_time = 5 * HZ, - .queue_len_bytes = 64*1024, - .ucast_probes = 3, - .mcast_probes = 3, - .anycast_delay = 1 * HZ, - .proxy_delay = (8 * HZ) / 10, - .proxy_qlen = 64, - .locktime = 1 * HZ, + .data = { + [NEIGH_VAR_MCAST_PROBES] = 3, + [NEIGH_VAR_UCAST_PROBES] = 3, + [NEIGH_VAR_RETRANS_TIME] = 1 * HZ, + [NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ, + [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ, + [NEIGH_VAR_GC_STALETIME] = 60 * HZ, + [NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024, + [NEIGH_VAR_PROXY_QLEN] = 64, + [NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ, + [NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10, + [NEIGH_VAR_LOCKTIME] = 1 * HZ, + }, }, .gc_interval = 30 * HZ, .gc_thresh1 = 128, @@ -359,14 +361,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) if (!saddr) saddr = inet_select_addr(dev, target, RT_SCOPE_LINK); - probes -= neigh->parms->ucast_probes; + probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); if (probes < 0) { if (!(neigh->nud_state & NUD_VALID)) pr_debug("trying to ucast probe in NUD_INVALID\n"); neigh_ha_snapshot(dst_ha, neigh, dev); dst_hw = dst_ha; } else { - probes -= neigh->parms->app_probes; + probes -= NEIGH_VAR(neigh->parms, APP_PROBES); if (probes < 0) { neigh_app_ns(neigh); return; @@ -379,6 +381,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) { + struct net *net = dev_net(in_dev->dev); int scope; switch (IN_DEV_ARP_IGNORE(in_dev)) { @@ -397,6 +400,7 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) case 3: /* Do not reply for scope host addresses */ sip = 0; scope = RT_SCOPE_LINK; + in_dev = NULL; break; case 4: /* Reserved */ case 5: @@ -408,7 +412,7 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) default: return 0; } - return !inet_confirm_addr(in_dev, sip, tip, scope); + return !inet_confirm_addr(net, in_dev, sip, tip, scope); } static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) @@ -728,6 +732,7 @@ static int arp_process(struct sk_buff *skb) int addr_type; struct neighbour *n; struct net *net = dev_net(dev); + bool is_garp = false; /* arp_rcv below verifies the ARP header and verifies the device * is ARP'able. @@ -871,7 +876,7 @@ static int arp_process(struct sk_buff *skb) if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED || skb->pkt_type == PACKET_HOST || - in_dev->arp_parms->proxy_delay == 0) { + NEIGH_VAR(in_dev->arp_parms, PROXY_DELAY) == 0) { arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha, dev->dev_addr, sha); @@ -894,10 +899,12 @@ static int arp_process(struct sk_buff *skb) It is possible, that this option should be enabled for some devices (strip is candidate) */ + is_garp = arp->ar_op == htons(ARPOP_REQUEST) && tip == sip && + inet_addr_type(net, sip) == RTN_UNICAST; + if (n == NULL && - (arp->ar_op == htons(ARPOP_REPLY) || - (arp->ar_op == htons(ARPOP_REQUEST) && tip == sip)) && - inet_addr_type(net, sip) == RTN_UNICAST) + ((arp->ar_op == htons(ARPOP_REPLY) && + inet_addr_type(net, sip) == RTN_UNICAST) || is_garp)) n = __neigh_lookup(&arp_tbl, &sip, dev, 1); } @@ -910,7 +917,10 @@ static int arp_process(struct sk_buff *skb) agents are active. Taking the first reply prevents arp trashing and chooses the fastest router. */ - override = time_after(jiffies, n->updated + n->parms->locktime); + override = time_after(jiffies, + n->updated + + NEIGH_VAR(n->parms, LOCKTIME)) || + is_garp; /* Broadcast replies and request packets do not assert neighbour reachability. @@ -1107,7 +1117,7 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) return err; } -int arp_invalidate(struct net_device *dev, __be32 ip) +static int arp_invalidate(struct net_device *dev, __be32 ip) { struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev); int err = -ENXIO; @@ -1122,7 +1132,6 @@ int arp_invalidate(struct net_device *dev, __be32 ip) return err; } -EXPORT_SYMBOL(arp_invalidate); static int arp_req_delete_public(struct net *net, struct arpreq *r, struct net_device *dev) @@ -1284,7 +1293,7 @@ void __init arp_init(void) dev_add_pack(&arp_packet_type); arp_proc_init(); #ifdef CONFIG_SYSCTL - neigh_sysctl_register(NULL, &arp_tbl.parms, "ipv4", NULL); + neigh_sysctl_register(NULL, &arp_tbl.parms, NULL); #endif register_netdevice_notifier(&arp_netdev_notifier); } diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 667c1d4ca98..69e77c8ff28 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -31,8 +31,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ @@ -1336,8 +1335,7 @@ static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def, secattr->flags |= NETLBL_SECATTR_MLS_LVL; if (tag_len > 4) { - secattr->attr.mls.cat = - netlbl_secattr_catmap_alloc(GFP_ATOMIC); + secattr->attr.mls.cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); if (secattr->attr.mls.cat == NULL) return -ENOMEM; @@ -1432,8 +1430,7 @@ static int cipso_v4_parsetag_enum(const struct cipso_v4_doi *doi_def, secattr->flags |= NETLBL_SECATTR_MLS_LVL; if (tag_len > 4) { - secattr->attr.mls.cat = - netlbl_secattr_catmap_alloc(GFP_ATOMIC); + secattr->attr.mls.cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); if (secattr->attr.mls.cat == NULL) return -ENOMEM; @@ -1527,8 +1524,7 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def, secattr->flags |= NETLBL_SECATTR_MLS_LVL; if (tag_len > 4) { - secattr->attr.mls.cat = - netlbl_secattr_catmap_alloc(GFP_ATOMIC); + secattr->attr.mls.cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); if (secattr->attr.mls.cat == NULL) return -ENOMEM; diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 19e36376d2a..8b5134c582f 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -53,7 +53,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr, RT_CONN_FLAGS(sk), oif, sk->sk_protocol, - inet->inet_sport, usin->sin_port, sk, true); + inet->inet_sport, usin->sin_port, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index a1b5bcbd04a..ac2dff3c2c1 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -99,6 +99,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_BROADCAST] = { .type = NLA_U32 }, [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, + [IFA_FLAGS] = { .type = NLA_U32 }, }; #define IN4_ADDR_HSIZE_SHIFT 8 @@ -463,7 +464,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, } if (!(ifa->ifa_flags & IFA_F_SECONDARY)) { - net_srandom(ifa->ifa_local); + prandom_seed((__force u32) ifa->ifa_local); ifap = last_primary; } @@ -473,7 +474,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, inet_hash_insert(dev_net(in_dev->dev), ifa); cancel_delayed_work(&check_lifetime_work); - schedule_delayed_work(&check_lifetime_work, 0); + queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); /* Send message first, then call notifier. Notifier will trigger FIB update, so that @@ -500,6 +501,7 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) return -ENOBUFS; } ipv4_devconf_setall(in_dev); + neigh_parms_data_state_setall(in_dev->arp_parms); if (ifa->ifa_dev != in_dev) { WARN_ON(ifa->ifa_dev); in_dev_hold(in_dev); @@ -682,7 +684,8 @@ static void check_lifetime(struct work_struct *work) if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX)) next_sched = now + ADDRCONF_TIMER_FUZZ_MAX; - schedule_delayed_work(&check_lifetime_work, next_sched - now); + queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, + next_sched - now); } static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, @@ -747,6 +750,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, goto errout; ipv4_devconf_setall(in_dev); + neigh_parms_data_state_setall(in_dev->arp_parms); in_dev_hold(in_dev); if (tb[IFA_ADDRESS] == NULL) @@ -755,7 +759,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, INIT_HLIST_NODE(&ifa->hash); ifa->ifa_prefixlen = ifm->ifa_prefixlen; ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); - ifa->ifa_flags = ifm->ifa_flags; + ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : + ifm->ifa_flags; ifa->ifa_scope = ifm->ifa_scope; ifa->ifa_dev = in_dev; @@ -838,7 +843,8 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) ifa = ifa_existing; set_ifa_lifetime(ifa, valid_lft, prefered_lft); cancel_delayed_work(&check_lifetime_work); - schedule_delayed_work(&check_lifetime_work, 0); + queue_delayed_work(system_power_efficient_wq, + &check_lifetime_work, 0); rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); } @@ -1236,22 +1242,21 @@ static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, /* * Confirm that local IP address exists using wildcards: - * - in_dev: only on this interface, 0=any interface + * - net: netns to check, cannot be NULL + * - in_dev: only on this interface, NULL=any interface * - dst: only in the same subnet as dst, 0=any dst * - local: address, 0=autoselect the local address * - scope: maximum allowed scope value for the local address */ -__be32 inet_confirm_addr(struct in_device *in_dev, +__be32 inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst, __be32 local, int scope) { __be32 addr = 0; struct net_device *dev; - struct net *net; - if (scope != RT_SCOPE_LINK) + if (in_dev != NULL) return confirm_addr_indev(in_dev, dst, local, scope); - net = dev_net(in_dev->dev); rcu_read_lock(); for_each_netdev_rcu(net, dev) { in_dev = __in_dev_get_rcu(dev); @@ -1382,6 +1387,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); + ipv4_devconf_setall(in_dev); + neigh_parms_data_state_setall(in_dev->arp_parms); inet_insert_ifa(ifa); } } @@ -1435,7 +1442,8 @@ static size_t inet_nlmsg_size(void) + nla_total_size(4) /* IFA_ADDRESS */ + nla_total_size(4) /* IFA_LOCAL */ + nla_total_size(4) /* IFA_BROADCAST */ - + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ + + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ + + nla_total_size(4); /* IFA_FLAGS */ } static inline u32 cstamp_delta(unsigned long cstamp) @@ -1503,6 +1511,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || (ifa->ifa_label[0] && nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || + nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) || put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp, preferred, valid)) goto nla_put_failure; @@ -1691,6 +1700,8 @@ static int inet_netconf_msgsize_devconf(int type) size += nla_total_size(4); if (type == -1 || type == NETCONFA_MC_FORWARDING) size += nla_total_size(4); + if (type == -1 || type == NETCONFA_PROXY_NEIGH) + size += nla_total_size(4); return size; } @@ -1727,6 +1738,10 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, nla_put_s32(skb, NETCONFA_MC_FORWARDING, IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0) goto nla_put_failure; + if ((type == -1 || type == NETCONFA_PROXY_NEIGH) && + nla_put_s32(skb, NETCONFA_PROXY_NEIGH, + IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0) + goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -1764,6 +1779,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { [NETCONFA_IFINDEX] = { .len = sizeof(int) }, [NETCONFA_FORWARDING] = { .len = sizeof(int) }, [NETCONFA_RP_FILTER] = { .len = sizeof(int) }, + [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) }, }; static int inet_netconf_get_devconf(struct sk_buff *in_skb, @@ -1945,6 +1961,19 @@ static void inet_forward_change(struct net *net) } } +static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf) +{ + if (cnf == net->ipv4.devconf_dflt) + return NETCONFA_IFINDEX_DEFAULT; + else if (cnf == net->ipv4.devconf_all) + return NETCONFA_IFINDEX_ALL; + else { + struct in_device *idev + = container_of(cnf, struct in_device, cnf); + return idev->dev->ifindex; + } +} + static int devinet_conf_proc(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -1957,6 +1986,7 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write, struct ipv4_devconf *cnf = ctl->extra1; struct net *net = ctl->extra2; int i = (int *)ctl->data - cnf->data; + int ifindex; set_bit(i, cnf->state); @@ -1966,23 +1996,19 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write, i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) if ((new_value == 0) && (old_value != 0)) rt_cache_flush(net); + if (i == IPV4_DEVCONF_RP_FILTER - 1 && new_value != old_value) { - int ifindex; - - if (cnf == net->ipv4.devconf_dflt) - ifindex = NETCONFA_IFINDEX_DEFAULT; - else if (cnf == net->ipv4.devconf_all) - ifindex = NETCONFA_IFINDEX_ALL; - else { - struct in_device *idev = - container_of(cnf, struct in_device, - cnf); - ifindex = idev->dev->ifindex; - } + ifindex = devinet_conf_ifindex(net, cnf); inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER, ifindex, cnf); } + if (i == IPV4_DEVCONF_PROXY_ARP - 1 && + new_value != old_value) { + ifindex = devinet_conf_ifindex(net, cnf); + inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + ifindex, cnf); + } } return ret; @@ -2160,7 +2186,7 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) static void devinet_sysctl_register(struct in_device *idev) { - neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL); + neigh_sysctl_register(idev->dev, idev->arp_parms, NULL); __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, &idev->cnf); } @@ -2298,7 +2324,7 @@ void __init devinet_init(void) register_gifconf(PF_INET, inet_gifconf); register_netdevice_notifier(&ip_netdev_notifier); - schedule_delayed_work(&check_lifetime_work, 0); + queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); rtnl_af_register(&inet_af_ops); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d846304b7b8..c7539e22868 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1047,6 +1047,8 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo } in_dev = __in_dev_get_rtnl(dev); + if (!in_dev) + return NOTIFY_DONE; switch (event) { case NETDEV_UP: diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 388d113fd28..1e4f6600b31 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -33,8 +33,6 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id, void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len, u32 tb_id, const struct nl_info *info, unsigned int nlm_flags); struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); -int fib_detect_death(struct fib_info *fi, int order, - struct fib_info **last_resort, int *last_idx, int dflt); static inline void fib_result_assign(struct fib_result *res, struct fib_info *fi) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e63f47a4e65..b53f0bf84dc 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -426,8 +426,9 @@ struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) return NULL; } -int fib_detect_death(struct fib_info *fi, int order, - struct fib_info **last_resort, int *last_idx, int dflt) +static int fib_detect_death(struct fib_info *fi, int order, + struct fib_info **last_resort, int *last_idx, + int dflt) { struct neighbour *n; int state = NUD_NONE; diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 5893e99e829..1863422fb7d 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -355,14 +355,7 @@ static int __init gre_init(void) goto err_gre; } - if (gre_offload_init()) { - pr_err("can't add protocol offload\n"); - goto err_gso; - } - return 0; -err_gso: - gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); err_gre: inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); err: @@ -371,8 +364,6 @@ err: static void __exit gre_exit(void) { - gre_offload_exit(); - gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); } diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 2cd02f32f99..f1d32280cb5 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -11,6 +11,7 @@ */ #include <linux/skbuff.h> +#include <linux/init.h> #include <net/protocol.h> #include <net/gre.h> @@ -26,7 +27,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, { struct sk_buff *segs = ERR_PTR(-EINVAL); netdev_features_t enc_features; - int ghl = GRE_HEADER_SECTION; + int ghl; struct gre_base_hdr *greh; u16 mac_offset = skb->mac_header; int mac_len = skb->mac_len; @@ -49,15 +50,11 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, greh = (struct gre_base_hdr *)skb_transport_header(skb); - if (greh->flags & GRE_KEY) - ghl += GRE_HEADER_SECTION; - if (greh->flags & GRE_SEQ) - ghl += GRE_HEADER_SECTION; - if (greh->flags & GRE_CSUM) { - ghl += GRE_HEADER_SECTION; - csum = true; - } else - csum = false; + ghl = skb_inner_network_header(skb) - skb_transport_header(skb); + if (unlikely(ghl < sizeof(*greh))) + goto out; + + csum = !!(greh->flags & GRE_CSUM); if (unlikely(!pskb_may_pull(skb, ghl))) goto out; @@ -116,19 +113,175 @@ out: return segs; } +/* Compute the whole skb csum in s/w and store it, then verify GRO csum + * starting from gro_offset. + */ +static __sum16 gro_skb_checksum(struct sk_buff *skb) +{ + __sum16 sum; + + skb->csum = skb_checksum(skb, 0, skb->len, 0); + NAPI_GRO_CB(skb)->csum = csum_sub(skb->csum, + csum_partial(skb->data, skb_gro_offset(skb), 0)); + sum = csum_fold(NAPI_GRO_CB(skb)->csum); + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) { + if (unlikely(!sum)) + netdev_rx_csum_fault(skb->dev); + } else + skb->ip_summed = CHECKSUM_COMPLETE; + + return sum; +} + +static struct sk_buff **gre_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct sk_buff **pp = NULL; + struct sk_buff *p; + const struct gre_base_hdr *greh; + unsigned int hlen, grehlen; + unsigned int off; + int flush = 1; + struct packet_offload *ptype; + __be16 type; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*greh); + greh = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + greh = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!greh)) + goto out; + } + + /* Only support version 0 and K (key), C (csum) flags. Note that + * although the support for the S (seq#) flag can be added easily + * for GRO, this is problematic for GSO hence can not be enabled + * here because a GRO pkt may end up in the forwarding path, thus + * requiring GSO support to break it up correctly. + */ + if ((greh->flags & ~(GRE_KEY|GRE_CSUM)) != 0) + goto out; + + type = greh->protocol; + + rcu_read_lock(); + ptype = gro_find_receive_by_type(type); + if (ptype == NULL) + goto out_unlock; + + grehlen = GRE_HEADER_SECTION; + + if (greh->flags & GRE_KEY) + grehlen += GRE_HEADER_SECTION; + + if (greh->flags & GRE_CSUM) + grehlen += GRE_HEADER_SECTION; + + hlen = off + grehlen; + if (skb_gro_header_hard(skb, hlen)) { + greh = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!greh)) + goto out_unlock; + } + if (greh->flags & GRE_CSUM) { /* Need to verify GRE csum first */ + __sum16 csum = 0; + + if (skb->ip_summed == CHECKSUM_COMPLETE) + csum = csum_fold(NAPI_GRO_CB(skb)->csum); + /* Don't trust csum error calculated/reported by h/w */ + if (skb->ip_summed == CHECKSUM_NONE || csum != 0) + csum = gro_skb_checksum(skb); + + /* GRE CSUM is the 1's complement of the 1's complement sum + * of the GRE hdr plus payload so it should add up to 0xffff + * (and 0 after csum_fold()) just like the IPv4 hdr csum. + */ + if (csum) + goto out_unlock; + } + flush = 0; + + for (p = *head; p; p = p->next) { + const struct gre_base_hdr *greh2; + + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + /* The following checks are needed to ensure only pkts + * from the same tunnel are considered for aggregation. + * The criteria for "the same tunnel" includes: + * 1) same version (we only support version 0 here) + * 2) same protocol (we only support ETH_P_IP for now) + * 3) same set of flags + * 4) same key if the key field is present. + */ + greh2 = (struct gre_base_hdr *)(p->data + off); + + if (greh2->flags != greh->flags || + greh2->protocol != greh->protocol) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + if (greh->flags & GRE_KEY) { + /* compare keys */ + if (*(__be32 *)(greh2+1) != *(__be32 *)(greh+1)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + } + } + + skb_gro_pull(skb, grehlen); + + /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/ + skb_gro_postpull_rcsum(skb, greh, grehlen); + + pp = ptype->callbacks.gro_receive(head, skb); + +out_unlock: + rcu_read_unlock(); +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +static int gre_gro_complete(struct sk_buff *skb, int nhoff) +{ + struct gre_base_hdr *greh = (struct gre_base_hdr *)(skb->data + nhoff); + struct packet_offload *ptype; + unsigned int grehlen = sizeof(*greh); + int err = -ENOENT; + __be16 type; + + type = greh->protocol; + if (greh->flags & GRE_KEY) + grehlen += GRE_HEADER_SECTION; + + if (greh->flags & GRE_CSUM) + grehlen += GRE_HEADER_SECTION; + + rcu_read_lock(); + ptype = gro_find_complete_by_type(type); + if (ptype != NULL) + err = ptype->callbacks.gro_complete(skb, nhoff + grehlen); + + rcu_read_unlock(); + return err; +} + static const struct net_offload gre_offload = { .callbacks = { .gso_send_check = gre_gso_send_check, .gso_segment = gre_gso_segment, + .gro_receive = gre_gro_receive, + .gro_complete = gre_gro_complete, }, }; -int __init gre_offload_init(void) +static int __init gre_offload_init(void) { return inet_add_offload(&gre_offload, IPPROTO_GRE); } - -void __exit gre_offload_exit(void) -{ - inet_del_offload(&gre_offload, IPPROTO_GRE); -} +device_initcall(gre_offload_init); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 5c0e8bc6e5b..0134663fdbc 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -668,6 +668,16 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info) rcu_read_unlock(); } +static bool icmp_tag_validation(int proto) +{ + bool ok; + + rcu_read_lock(); + ok = rcu_dereference(inet_protos[proto])->icmp_strict_tag_validation; + rcu_read_unlock(); + return ok; +} + /* * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and * ICMP_PARAMETERPROB. @@ -705,10 +715,22 @@ static void icmp_unreach(struct sk_buff *skb) case ICMP_PORT_UNREACH: break; case ICMP_FRAG_NEEDED: - if (ipv4_config.no_pmtu_disc) { + /* for documentation of the ip_no_pmtu_disc + * values please see + * Documentation/networking/ip-sysctl.txt + */ + switch (net->ipv4.sysctl_ip_no_pmtu_disc) { + default: LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"), &iph->daddr); - } else { + break; + case 2: + goto out; + case 3: + if (!icmp_tag_validation(iph->protocol)) + goto out; + /* fall through */ + case 0: info = ntohs(icmph->un.frag.mtu); if (!info) goto out; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 7defdc9ba16..97e4d1655d2 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -211,7 +211,7 @@ static void igmp_stop_timer(struct ip_mc_list *im) /* It must be called with locked im->lock */ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) { - int tv = net_random() % max_delay; + int tv = prandom_u32() % max_delay; im->tm_running = 1; if (!mod_timer(&im->timer, jiffies+tv+2)) @@ -220,7 +220,7 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) static void igmp_gq_start_timer(struct in_device *in_dev) { - int tv = net_random() % in_dev->mr_maxdelay; + int tv = prandom_u32() % in_dev->mr_maxdelay; in_dev->mr_gq_running = 1; if (!mod_timer(&in_dev->mr_gq_timer, jiffies+tv+2)) @@ -229,7 +229,7 @@ static void igmp_gq_start_timer(struct in_device *in_dev) static void igmp_ifc_start_timer(struct in_device *in_dev, int delay) { - int tv = net_random() % delay; + int tv = prandom_u32() % delay; if (!mod_timer(&in_dev->mr_ifc_timer, jiffies+tv+2)) in_dev_hold(in_dev); @@ -310,7 +310,7 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted) struct ip_sf_list *psf; int scount = 0; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (!is_in(pmc, psf, type, gdeleted, sdeleted)) continue; scount++; @@ -463,7 +463,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, } first = 1; psf_prev = NULL; - for (psf=*psf_list; psf; psf=psf_next) { + for (psf = *psf_list; psf; psf = psf_next) { __be32 *psrc; psf_next = psf->sf_next; @@ -520,7 +520,7 @@ empty_source: return skb; if (pmc->crcount || isquery) { /* make sure we have room for group header */ - if (skb && AVAILABLE(skb)<sizeof(struct igmpv3_grec)) { + if (skb && AVAILABLE(skb) < sizeof(struct igmpv3_grec)) { igmpv3_sendpack(skb); skb = NULL; /* add_grhead will get a new one */ } @@ -576,7 +576,7 @@ static void igmpv3_clear_zeros(struct ip_sf_list **ppsf) struct ip_sf_list *psf_prev, *psf_next, *psf; psf_prev = NULL; - for (psf=*ppsf; psf; psf = psf_next) { + for (psf = *ppsf; psf; psf = psf_next) { psf_next = psf->sf_next; if (psf->sf_crcount == 0) { if (psf_prev) @@ -600,7 +600,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) /* deleted MCA's */ pmc_prev = NULL; - for (pmc=in_dev->mc_tomb; pmc; pmc=pmc_next) { + for (pmc = in_dev->mc_tomb; pmc; pmc = pmc_next) { pmc_next = pmc->next; if (pmc->sfmode == MCAST_INCLUDE) { type = IGMPV3_BLOCK_OLD_SOURCES; @@ -764,7 +764,7 @@ static void igmp_ifc_event(struct in_device *in_dev) static void igmp_timer_expire(unsigned long data) { - struct ip_mc_list *im=(struct ip_mc_list *)data; + struct ip_mc_list *im = (struct ip_mc_list *)data; struct in_device *in_dev = im->interface; spin_lock(&im->lock); @@ -794,10 +794,10 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) int i, scount; scount = 0; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (scount == nsrcs) break; - for (i=0; i<nsrcs; i++) { + for (i = 0; i < nsrcs; i++) { /* skip inactive filters */ if (psf->sf_count[MCAST_INCLUDE] || pmc->sfcount[MCAST_EXCLUDE] != @@ -825,10 +825,10 @@ static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) /* mark INCLUDE-mode sources */ scount = 0; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (scount == nsrcs) break; - for (i=0; i<nsrcs; i++) + for (i = 0; i < nsrcs; i++) if (srcs[i] == psf->sf_inaddr) { psf->sf_gsresp = 1; scount++; @@ -1103,7 +1103,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) pmc->tomb = im->tomb; pmc->sources = im->sources; im->tomb = im->sources = NULL; - for (psf=pmc->sources; psf; psf=psf->sf_next) + for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = pmc->crcount; } spin_unlock_bh(&im->lock); @@ -1121,7 +1121,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr) spin_lock_bh(&in_dev->mc_tomb_lock); pmc_prev = NULL; - for (pmc=in_dev->mc_tomb; pmc; pmc=pmc->next) { + for (pmc = in_dev->mc_tomb; pmc; pmc = pmc->next) { if (pmc->multiaddr == multiaddr) break; pmc_prev = pmc; @@ -1134,7 +1134,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr) } spin_unlock_bh(&in_dev->mc_tomb_lock); if (pmc) { - for (psf=pmc->tomb; psf; psf=psf_next) { + for (psf = pmc->tomb; psf; psf = psf_next) { psf_next = psf->sf_next; kfree(psf); } @@ -1167,7 +1167,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) psf = pmc->tomb; pmc->tomb = NULL; spin_unlock_bh(&pmc->lock); - for (; psf; psf=psf_next) { + for (; psf; psf = psf_next) { psf_next = psf->sf_next; kfree(psf); } @@ -1557,7 +1557,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, int rv = 0; psf_prev = NULL; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (psf->sf_inaddr == *psfsrc) break; psf_prev = psf; @@ -1630,7 +1630,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->sfcount[sfmode]--; } err = 0; - for (i=0; i<sfcount; i++) { + for (i = 0; i < sfcount; i++) { int rv = ip_mc_del1_src(pmc, sfmode, &psfsrc[i]); changerec |= rv > 0; @@ -1650,7 +1650,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : IGMP_Unsolicited_Report_Count; in_dev->mr_ifc_count = pmc->crcount; - for (psf=pmc->sources; psf; psf = psf->sf_next) + for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; igmp_ifc_event(pmc->interface); } else if (sf_setstate(pmc) || changerec) { @@ -1671,7 +1671,7 @@ static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode, struct ip_sf_list *psf, *psf_prev; psf_prev = NULL; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (psf->sf_inaddr == *psfsrc) break; psf_prev = psf; @@ -1699,7 +1699,7 @@ static void sf_markstate(struct ip_mc_list *pmc) struct ip_sf_list *psf; int mca_xcount = pmc->sfcount[MCAST_EXCLUDE]; - for (psf=pmc->sources; psf; psf=psf->sf_next) + for (psf = pmc->sources; psf; psf = psf->sf_next) if (pmc->sfcount[MCAST_EXCLUDE]) { psf->sf_oldin = mca_xcount == psf->sf_count[MCAST_EXCLUDE] && @@ -1716,7 +1716,7 @@ static int sf_setstate(struct ip_mc_list *pmc) int new_in, rv; rv = 0; - for (psf=pmc->sources; psf; psf=psf->sf_next) { + for (psf = pmc->sources; psf; psf = psf->sf_next) { if (pmc->sfcount[MCAST_EXCLUDE]) { new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] && !psf->sf_count[MCAST_INCLUDE]; @@ -1726,7 +1726,7 @@ static int sf_setstate(struct ip_mc_list *pmc) if (!psf->sf_oldin) { struct ip_sf_list *prev = NULL; - for (dpsf=pmc->tomb; dpsf; dpsf=dpsf->sf_next) { + for (dpsf = pmc->tomb; dpsf; dpsf = dpsf->sf_next) { if (dpsf->sf_inaddr == psf->sf_inaddr) break; prev = dpsf; @@ -1748,7 +1748,7 @@ static int sf_setstate(struct ip_mc_list *pmc) * add or update "delete" records if an active filter * is now inactive */ - for (dpsf=pmc->tomb; dpsf; dpsf=dpsf->sf_next) + for (dpsf = pmc->tomb; dpsf; dpsf = dpsf->sf_next) if (dpsf->sf_inaddr == psf->sf_inaddr) break; if (!dpsf) { @@ -1800,7 +1800,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, if (!delta) pmc->sfcount[sfmode]++; err = 0; - for (i=0; i<sfcount; i++) { + for (i = 0; i < sfcount; i++) { err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i]); if (err) break; @@ -1810,7 +1810,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, if (!delta) pmc->sfcount[sfmode]--; - for (j=0; j<i; j++) + for (j = 0; j < i; j++) (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[j]); } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) { #ifdef CONFIG_IP_MULTICAST @@ -1829,7 +1829,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : IGMP_Unsolicited_Report_Count; in_dev->mr_ifc_count = pmc->crcount; - for (psf=pmc->sources; psf; psf = psf->sf_next) + for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; igmp_ifc_event(in_dev); } else if (sf_setstate(pmc)) { @@ -1844,12 +1844,12 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc) { struct ip_sf_list *psf, *nextpsf; - for (psf=pmc->tomb; psf; psf=nextpsf) { + for (psf = pmc->tomb; psf; psf = nextpsf) { nextpsf = psf->sf_next; kfree(psf); } pmc->tomb = NULL; - for (psf=pmc->sources; psf; psf=nextpsf) { + for (psf = pmc->sources; psf; psf = nextpsf) { nextpsf = psf->sf_next; kfree(psf); } @@ -2043,7 +2043,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct if (!psl) goto done; /* err = -EADDRNOTAVAIL */ rv = !0; - for (i=0; i<psl->sl_count; i++) { + for (i = 0; i < psl->sl_count; i++) { rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr, sizeof(__be32)); if (rv == 0) @@ -2062,7 +2062,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct ip_mc_del_src(in_dev, &mreqs->imr_multiaddr, omode, 1, &mreqs->imr_sourceaddr, 1); - for (j=i+1; j<psl->sl_count; j++) + for (j = i+1; j < psl->sl_count; j++) psl->sl_addr[j-1] = psl->sl_addr[j]; psl->sl_count--; err = 0; @@ -2088,7 +2088,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct newpsl->sl_max = count; newpsl->sl_count = count - IP_SFBLOCK; if (psl) { - for (i=0; i<psl->sl_count; i++) + for (i = 0; i < psl->sl_count; i++) newpsl->sl_addr[i] = psl->sl_addr[i]; /* decrease mem now to avoid the memleak warning */ atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); @@ -2098,7 +2098,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct psl = newpsl; } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ - for (i=0; i<psl->sl_count; i++) { + for (i = 0; i < psl->sl_count; i++) { rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr, sizeof(__be32)); if (rv == 0) @@ -2106,7 +2106,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct } if (rv == 0) /* address already there is an error */ goto done; - for (j=psl->sl_count-1; j>=i; j--) + for (j = psl->sl_count-1; j >= i; j--) psl->sl_addr[j+1] = psl->sl_addr[j]; psl->sl_addr[i] = mreqs->imr_sourceaddr; psl->sl_count++; @@ -2305,7 +2305,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) { return -EFAULT; } - for (i=0; i<copycount; i++) { + for (i = 0; i < copycount; i++) { struct sockaddr_storage ss; psin = (struct sockaddr_in *)&ss; @@ -2350,7 +2350,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) if (!psl) goto unlock; - for (i=0; i<psl->sl_count; i++) { + for (i = 0; i < psl->sl_count; i++) { if (psl->sl_addr[i] == rmt_addr) break; } @@ -2423,7 +2423,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u rv = 1; } else if (im) { if (src_addr) { - for (psf=im->sources; psf; psf=psf->sf_next) { + for (psf = im->sources; psf; psf = psf->sf_next) { if (psf->sf_inaddr == src_addr) break; } @@ -2762,6 +2762,7 @@ static struct pernet_operations igmp_net_ops = { .init = igmp_net_init, .exit = igmp_net_exit, }; +#endif static int igmp_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) @@ -2785,8 +2786,9 @@ static struct notifier_block igmp_notifier = { .notifier_call = igmp_netdev_event, }; -int __init igmp_mc_proc_init(void) +int __init igmp_mc_init(void) { +#if defined(CONFIG_PROC_FS) int err; err = register_pernet_subsys(&igmp_net_ops); @@ -2800,5 +2802,7 @@ int __init igmp_mc_proc_init(void) reg_notif_fail: unregister_pernet_subsys(&igmp_net_ops); return err; -} +#else + return register_netdevice_notifier(&igmp_notifier); #endif +} diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index fc0e649cc00..0d1e2cb877e 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -109,7 +109,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) again: inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; - smallest_rover = rover = net_random() % remaining + low; + smallest_rover = rover = prandom_u32() % remaining + low; smallest_size = -1; do { diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index 1975f52933c..f17ea49b28f 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -230,29 +230,6 @@ static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb, lro_desc->last_skb = skb; } -static void lro_add_frags(struct net_lro_desc *lro_desc, - int len, int hlen, int truesize, - struct skb_frag_struct *skb_frags, - struct iphdr *iph, struct tcphdr *tcph) -{ - struct sk_buff *skb = lro_desc->parent; - int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); - - lro_add_common(lro_desc, iph, tcph, tcp_data_len); - - skb->truesize += truesize; - - skb_frags[0].page_offset += hlen; - skb_frag_size_sub(&skb_frags[0], hlen); - - while (tcp_data_len > 0) { - *(lro_desc->next_frag) = *skb_frags; - tcp_data_len -= skb_frag_size(skb_frags); - lro_desc->next_frag++; - skb_frags++; - skb_shinfo(skb)->nr_frags++; - } -} static int lro_check_tcp_conn(struct net_lro_desc *lro_desc, struct iphdr *iph, @@ -371,128 +348,6 @@ out: return 1; } - -static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr, - struct skb_frag_struct *frags, - int len, int true_size, - void *mac_hdr, - int hlen, __wsum sum, - u32 ip_summed) -{ - struct sk_buff *skb; - struct skb_frag_struct *skb_frags; - int data_len = len; - int hdr_len = min(len, hlen); - - skb = netdev_alloc_skb(lro_mgr->dev, hlen + lro_mgr->frag_align_pad); - if (!skb) - return NULL; - - skb_reserve(skb, lro_mgr->frag_align_pad); - skb->len = len; - skb->data_len = len - hdr_len; - skb->truesize += true_size; - skb->tail += hdr_len; - - memcpy(skb->data, mac_hdr, hdr_len); - - skb_frags = skb_shinfo(skb)->frags; - while (data_len > 0) { - *skb_frags = *frags; - data_len -= skb_frag_size(frags); - skb_frags++; - frags++; - skb_shinfo(skb)->nr_frags++; - } - - skb_shinfo(skb)->frags[0].page_offset += hdr_len; - skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hdr_len); - - skb->ip_summed = ip_summed; - skb->csum = sum; - skb->protocol = eth_type_trans(skb, lro_mgr->dev); - return skb; -} - -static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, - struct skb_frag_struct *frags, - int len, int true_size, - void *priv, __wsum sum) -{ - struct net_lro_desc *lro_desc; - struct iphdr *iph; - struct tcphdr *tcph; - struct sk_buff *skb; - u64 flags; - void *mac_hdr; - int mac_hdr_len; - int hdr_len = LRO_MAX_PG_HLEN; - int vlan_hdr_len = 0; - - if (!lro_mgr->get_frag_header || - lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph, - (void *)&tcph, &flags, priv)) { - mac_hdr = skb_frag_address(frags); - goto out1; - } - - if (!(flags & LRO_IPV4) || !(flags & LRO_TCP)) - goto out1; - - hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr); - mac_hdr_len = (int)((void *)(iph) - mac_hdr); - - lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); - if (!lro_desc) - goto out1; - - if (!lro_desc->active) { /* start new lro session */ - if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL)) - goto out1; - - skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr, - hdr_len, 0, lro_mgr->ip_summed_aggr); - if (!skb) - goto out; - - if ((skb->protocol == htons(ETH_P_8021Q)) && - !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) - vlan_hdr_len = VLAN_HLEN; - - iph = (void *)(skb->data + vlan_hdr_len); - tcph = (void *)((u8 *)skb->data + vlan_hdr_len - + IP_HDR_LEN(iph)); - - lro_init_desc(lro_desc, skb, iph, tcph); - LRO_INC_STATS(lro_mgr, aggregated); - return NULL; - } - - if (lro_desc->tcp_next_seq != ntohl(tcph->seq)) - goto out2; - - if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc)) - goto out2; - - lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph); - LRO_INC_STATS(lro_mgr, aggregated); - - if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) || - lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu)) - lro_flush(lro_mgr, lro_desc); - - return NULL; - -out2: /* send aggregated packets to the stack */ - lro_flush(lro_mgr, lro_desc); - -out1: /* Original packet has to be posted to the stack */ - skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr, - hdr_len, sum, lro_mgr->ip_summed); -out: - return skb; -} - void lro_receive_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, void *priv) @@ -506,23 +361,6 @@ void lro_receive_skb(struct net_lro_mgr *lro_mgr, } EXPORT_SYMBOL(lro_receive_skb); -void lro_receive_frags(struct net_lro_mgr *lro_mgr, - struct skb_frag_struct *frags, - int len, int true_size, void *priv, __wsum sum) -{ - struct sk_buff *skb; - - skb = __lro_proc_segment(lro_mgr, frags, len, true_size, priv, sum); - if (!skb) - return; - - if (lro_mgr->features & LRO_F_NAPI) - netif_receive_skb(skb); - else - netif_rx(skb); -} -EXPORT_SYMBOL(lro_receive_frags); - void lro_flush_all(struct net_lro_mgr *lro_mgr) { int i; @@ -534,14 +372,3 @@ void lro_flush_all(struct net_lro_mgr *lro_mgr) } } EXPORT_SYMBOL(lro_flush_all); - -void lro_flush_pkt(struct net_lro_mgr *lro_mgr, - struct iphdr *iph, struct tcphdr *tcph) -{ - struct net_lro_desc *lro_desc; - - lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); - if (lro_desc->active) - lro_flush(lro_mgr, lro_desc); -} -EXPORT_SYMBOL(lro_flush_pkt); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 33d5537881e..48f42446511 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -109,13 +109,6 @@ static inline void flush_check(struct inet_peer_base *base, int family) } } -void inetpeer_invalidate_family(int family) -{ - atomic_t *fp = inetpeer_seq_ptr(family); - - atomic_inc(fp); -} - #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ /* Exported for sysctl_net_ipv4. */ @@ -227,7 +220,7 @@ static int addr_compare(const struct inetpeer_addr *a, stackptr = _stack; \ *stackptr++ = &_base->root; \ for (u = rcu_deref_locked(_base->root, _base); \ - u != peer_avl_empty; ) { \ + u != peer_avl_empty;) { \ int cmp = addr_compare(_daddr, &u->daddr); \ if (cmp == 0) \ break; \ @@ -282,7 +275,7 @@ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, *stackptr++ = &start->avl_left; \ v = &start->avl_left; \ for (u = rcu_deref_locked(*v, base); \ - u->avl_right != peer_avl_empty_rcu; ) { \ + u->avl_right != peer_avl_empty_rcu;) { \ v = &u->avl_right; \ *stackptr++ = v; \ u = rcu_deref_locked(*v, base); \ diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 694de3b7aeb..e9f1217a8af 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -54,6 +54,7 @@ static int ip_forward_finish(struct sk_buff *skb) int ip_forward(struct sk_buff *skb) { + u32 mtu; struct iphdr *iph; /* Our header */ struct rtable *rt; /* Route we use */ struct ip_options *opt = &(IPCB(skb)->opt); @@ -88,11 +89,13 @@ int ip_forward(struct sk_buff *skb) if (opt->is_strictroute && rt->rt_uses_gateway) goto sr_failed; - if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && + IPCB(skb)->flags |= IPSKB_FORWARDED; + mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); + if (unlikely(skb->len > mtu && !skb_is_gso(skb) && (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(dst_mtu(&rt->dst))); + htonl(mtu)); goto drop; } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 2481993a497..c10a3ce5cbf 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -704,7 +704,7 @@ struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user) memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); if (ip_defrag(skb, user)) return NULL; - skb->rxhash = 0; + skb_clear_hash(skb); } } return skb; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index e560ef34cf4..e7a92fdb36f 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -278,7 +278,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, return NETDEV_TX_OK; free_skb: - dev_kfree_skb(skb); + kfree_skb(skb); out: dev->stats.tx_dropped++; return NETDEV_TX_OK; @@ -301,7 +301,7 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, return NETDEV_TX_OK; free_skb: - dev_kfree_skb(skb); + kfree_skb(skb); out: dev->stats.tx_dropped++; return NETDEV_TX_OK; diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index ec7264514a8..f4ab72e19af 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -167,7 +167,7 @@ int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb) soffset -= 4; if (soffset > 3) { memcpy(&faddr, &start[soffset-1], 4); - for (soffset-=4, doffset=4; soffset > 3; soffset-=4, doffset+=4) + for (soffset -= 4, doffset = 4; soffset > 3; soffset -= 4, doffset += 4) memcpy(&dptr[doffset-1], &start[soffset-1], 4); /* * RFC1812 requires to fix illegal source routes. @@ -227,7 +227,7 @@ void ip_options_fragment(struct sk_buff *skb) continue; } optlen = optptr[1]; - if (optlen<2 || optlen>l) + if (optlen < 2 || optlen > l) return; if (!IPOPT_COPIED(*optptr)) memset(optptr, IPOPT_NOOP, optlen); @@ -275,27 +275,27 @@ int ip_options_compile(struct net *net, for (l = opt->optlen; l > 0; ) { switch (*optptr) { - case IPOPT_END: - for (optptr++, l--; l>0; optptr++, l--) { + case IPOPT_END: + for (optptr++, l--; l > 0; optptr++, l--) { if (*optptr != IPOPT_END) { *optptr = IPOPT_END; opt->is_changed = 1; } } goto eol; - case IPOPT_NOOP: + case IPOPT_NOOP: l--; optptr++; continue; } optlen = optptr[1]; - if (optlen<2 || optlen>l) { + if (optlen < 2 || optlen > l) { pp_ptr = optptr; goto error; } switch (*optptr) { - case IPOPT_SSRR: - case IPOPT_LSRR: + case IPOPT_SSRR: + case IPOPT_LSRR: if (optlen < 3) { pp_ptr = optptr + 1; goto error; @@ -321,7 +321,7 @@ int ip_options_compile(struct net *net, opt->is_strictroute = (optptr[0] == IPOPT_SSRR); opt->srr = optptr - iph; break; - case IPOPT_RR: + case IPOPT_RR: if (opt->rr) { pp_ptr = optptr; goto error; @@ -349,7 +349,7 @@ int ip_options_compile(struct net *net, } opt->rr = optptr - iph; break; - case IPOPT_TIMESTAMP: + case IPOPT_TIMESTAMP: if (opt->ts) { pp_ptr = optptr; goto error; @@ -369,13 +369,13 @@ int ip_options_compile(struct net *net, goto error; } switch (optptr[3]&0xF) { - case IPOPT_TS_TSONLY: + case IPOPT_TS_TSONLY: if (skb) timeptr = &optptr[optptr[2]-1]; opt->ts_needtime = 1; optptr[2] += 4; break; - case IPOPT_TS_TSANDADDR: + case IPOPT_TS_TSANDADDR: if (optptr[2]+7 > optptr[1]) { pp_ptr = optptr + 2; goto error; @@ -389,7 +389,7 @@ int ip_options_compile(struct net *net, opt->ts_needtime = 1; optptr[2] += 8; break; - case IPOPT_TS_PRESPEC: + case IPOPT_TS_PRESPEC: if (optptr[2]+7 > optptr[1]) { pp_ptr = optptr + 2; goto error; @@ -405,7 +405,7 @@ int ip_options_compile(struct net *net, opt->ts_needtime = 1; optptr[2] += 8; break; - default: + default: if (!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) { pp_ptr = optptr + 3; goto error; @@ -433,7 +433,7 @@ int ip_options_compile(struct net *net, } opt->ts = optptr - iph; break; - case IPOPT_RA: + case IPOPT_RA: if (optlen < 4) { pp_ptr = optptr + 1; goto error; @@ -441,7 +441,7 @@ int ip_options_compile(struct net *net, if (optptr[2] == 0 && optptr[3] == 0) opt->router_alert = optptr - iph; break; - case IPOPT_CIPSO: + case IPOPT_CIPSO: if ((!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) || opt->cipso) { pp_ptr = optptr; goto error; @@ -452,9 +452,9 @@ int ip_options_compile(struct net *net, goto error; } break; - case IPOPT_SEC: - case IPOPT_SID: - default: + case IPOPT_SEC: + case IPOPT_SID: + default: if (!skb && !ns_capable(net->user_ns, CAP_NET_RAW)) { pp_ptr = optptr; goto error; @@ -572,7 +572,7 @@ void ip_forward_options(struct sk_buff *skb) optptr = raw + opt->srr; - for ( srrptr=optptr[2], srrspace = optptr[1]; + for ( srrptr = optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4 ) { @@ -628,7 +628,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) if (rt->rt_type != RTN_LOCAL) return -EINVAL; - for (srrptr=optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4) { + for (srrptr = optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4) { if (srrptr + 3 > srrspace) { icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((opt->srr+2)<<24)); return -EINVAL; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index df184616493..8971780aec7 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -449,6 +449,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) __be16 not_last_frag; struct rtable *rt = skb_rtable(skb); int err = 0; + bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED; dev = rt->dst.dev; @@ -458,12 +459,13 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) iph = ip_hdr(skb); + mtu = ip_dst_mtu_maybe_forward(&rt->dst, forwarding); if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) || (IPCB(skb)->frag_max_size && - IPCB(skb)->frag_max_size > dst_mtu(&rt->dst)))) { + IPCB(skb)->frag_max_size > mtu))) { IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(ip_skb_dst_mtu(skb))); + htonl(mtu)); kfree_skb(skb); return -EMSGSIZE; } @@ -473,7 +475,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) */ hlen = iph->ihl * 4; - mtu = dst_mtu(&rt->dst) - hlen; /* Size of data space */ + mtu = mtu - hlen; /* Size of data space */ #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge) mtu -= nf_bridge_mtu_reduction(skb); @@ -1551,7 +1553,7 @@ void __init ip_init(void) ip_rt_init(); inet_initpeers(); -#if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS) - igmp_mc_proc_init(); +#if defined(CONFIG_IP_MULTICAST) + igmp_mc_init(); #endif } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ddf32a6bc41..580dd96666e 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -56,7 +56,6 @@ /* * SOL_IP control messages. */ -#define PKTINFO_SKB_CB(__skb) ((struct in_pktinfo *)((__skb)->cb)) static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) { @@ -390,7 +389,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct sock_exterr_skb *serr; struct sk_buff *skb, *skb2; - struct sockaddr_in *sin; + DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); struct { struct sock_extended_err ee; struct sockaddr_in offender; @@ -416,7 +415,6 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - sin = (struct sockaddr_in *)msg->msg_name; if (sin) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + @@ -1051,14 +1049,15 @@ e_inval: * * To support IP_CMSG_PKTINFO option, we store rt_iif and specific * destination in skb->cb[] before dst drop. - * This way, receiver doesnt make cache line misses to read rtable. + * This way, receiver doesn't make cache line misses to read rtable. */ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) { struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); + bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) || + ipv6_sk_rxinfo(sk); - if ((inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) && - skb_rtable(skb)) { + if (prepare && skb_rtable(skb)) { pktinfo->ipi_ifindex = inet_iif(skb); pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); } else { diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 90ff9570d7d..c0e3cb72ad7 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -61,13 +61,69 @@ #include <net/ip6_route.h> #endif -static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn, - __be32 key, __be32 remote) +static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) { return hash_32((__force u32)key ^ (__force u32)remote, IP_TNL_HASH_BITS); } +static void __tunnel_dst_set(struct ip_tunnel_dst *idst, + struct dst_entry *dst) +{ + struct dst_entry *old_dst; + + if (dst) { + if (dst->flags & DST_NOCACHE) + dst = NULL; + else + dst_clone(dst); + } + old_dst = xchg((__force struct dst_entry **)&idst->dst, dst); + dst_release(old_dst); +} + +static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst) +{ + __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst); +} + +static void tunnel_dst_reset(struct ip_tunnel *t) +{ + tunnel_dst_set(t, NULL); +} + +static void tunnel_dst_reset_all(struct ip_tunnel *t) +{ + int i; + + for_each_possible_cpu(i) + __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); +} + +static struct dst_entry *tunnel_dst_get(struct ip_tunnel *t) +{ + struct dst_entry *dst; + + rcu_read_lock(); + dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); + if (dst) + dst_hold(dst); + rcu_read_unlock(); + return dst; +} + +static struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie) +{ + struct dst_entry *dst = tunnel_dst_get(t); + + if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { + tunnel_dst_reset(t); + return NULL; + } + + return dst; +} + /* Often modified stats are per cpu, other are shared (netdev->stats) */ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *tot) @@ -75,7 +131,8 @@ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, int i; for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); + const struct pcpu_sw_netstats *tstats = + per_cpu_ptr(dev->tstats, i); u64 rx_packets, rx_bytes, tx_packets, tx_bytes; unsigned int start; @@ -146,7 +203,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, struct ip_tunnel *t, *cand = NULL; struct hlist_head *head; - hash = ip_tunnel_hash(itn, key, remote); + hash = ip_tunnel_hash(key, remote); head = &itn->tunnels[hash]; hlist_for_each_entry_rcu(t, head, hash_node) { @@ -178,7 +235,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, cand = t; } - hash = ip_tunnel_hash(itn, key, 0); + hash = ip_tunnel_hash(key, 0); head = &itn->tunnels[hash]; hlist_for_each_entry_rcu(t, head, hash_node) { @@ -234,7 +291,7 @@ static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, else remote = 0; - h = ip_tunnel_hash(itn, parms->i_key, remote); + h = ip_tunnel_hash(parms->i_key, remote); return &itn->tunnels[h]; } @@ -318,11 +375,10 @@ failed: return ERR_PTR(err); } -static inline struct rtable *ip_route_output_tunnel(struct net *net, - struct flowi4 *fl4, - int proto, - __be32 daddr, __be32 saddr, - __be32 key, __u8 tos, int oif) +static inline void init_tunnel_flow(struct flowi4 *fl4, + int proto, + __be32 daddr, __be32 saddr, + __be32 key, __u8 tos, int oif) { memset(fl4, 0, sizeof(*fl4)); fl4->flowi4_oif = oif; @@ -331,7 +387,6 @@ static inline struct rtable *ip_route_output_tunnel(struct net *net, fl4->flowi4_tos = tos; fl4->flowi4_proto = proto; fl4->fl4_gre_key = key; - return ip_route_output_key(net, fl4); } static int ip_tunnel_bind_dev(struct net_device *dev) @@ -350,14 +405,14 @@ static int ip_tunnel_bind_dev(struct net_device *dev) struct flowi4 fl4; struct rtable *rt; - rt = ip_route_output_tunnel(tunnel->net, &fl4, - tunnel->parms.iph.protocol, - iph->daddr, iph->saddr, - tunnel->parms.o_key, - RT_TOS(iph->tos), - tunnel->parms.link); + init_tunnel_flow(&fl4, iph->protocol, iph->daddr, + iph->saddr, tunnel->parms.o_key, + RT_TOS(iph->tos), tunnel->parms.link); + rt = ip_route_output_key(tunnel->net, &fl4); + if (!IS_ERR(rt)) { tdev = rt->dst.dev; + tunnel_dst_set(tunnel, &rt->dst); ip_rt_put(rt); } if (dev->type != ARPHRD_ETHER) @@ -405,7 +460,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, const struct tnl_ptk_info *tpi, bool log_ecn_error) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; const struct iphdr *iph = ip_hdr(skb); int err; @@ -528,10 +583,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi4 fl4; u8 tos, ttl; __be16 df; - struct rtable *rt; /* Route to the other host */ + struct rtable *rt = NULL; /* Route to the other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst; int err; + bool connected = true; inner_iph = (const struct iphdr *)skb_inner_network_header(skb); @@ -581,27 +637,39 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, #endif else goto tx_error; + + connected = false; } tos = tnl_params->tos; if (tos & 0x1) { tos &= ~0x1; - if (skb->protocol == htons(ETH_P_IP)) + if (skb->protocol == htons(ETH_P_IP)) { tos = inner_iph->tos; - else if (skb->protocol == htons(ETH_P_IPV6)) + connected = false; + } else if (skb->protocol == htons(ETH_P_IPV6)) { tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); + connected = false; + } } - rt = ip_route_output_tunnel(tunnel->net, &fl4, - protocol, - dst, tnl_params->saddr, - tunnel->parms.o_key, - RT_TOS(tos), - tunnel->parms.link); - if (IS_ERR(rt)) { - dev->stats.tx_carrier_errors++; - goto tx_error; + init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, + tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); + + if (connected) + rt = (struct rtable *)tunnel_dst_check(tunnel, 0); + + if (!rt) { + rt = ip_route_output_key(tunnel->net, &fl4); + + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error; + } + if (connected) + tunnel_dst_set(tunnel, &rt->dst); } + if (rt->dst.dev == dev) { ip_rt_put(rt); dev->stats.collisions++; @@ -618,6 +686,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { tunnel->err_count--; + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); dst_link_failure(skb); } else tunnel->err_count = 0; @@ -647,7 +716,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, if (skb_cow_head(skb, dev->needed_headroom)) { dev->stats.tx_dropped++; - dev_kfree_skb(skb); + kfree_skb(skb); return; } @@ -663,7 +732,7 @@ tx_error_icmp: #endif tx_error: dev->stats.tx_errors++; - dev_kfree_skb(skb); + kfree_skb(skb); } EXPORT_SYMBOL_GPL(ip_tunnel_xmit); @@ -696,6 +765,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, if (set_mtu) dev->mtu = mtu; } + tunnel_dst_reset_all(t); netdev_state_change(dev); } @@ -811,6 +881,7 @@ static void ip_tunnel_dev_free(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); gro_cells_destroy(&tunnel->gro_cells); + free_percpu(tunnel->dst_cache); free_percpu(dev->tstats); free_netdev(dev); } @@ -979,18 +1050,25 @@ int ip_tunnel_init(struct net_device *dev) int i, err; dev->destructor = ip_tunnel_dev_free; - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; for_each_possible_cpu(i) { - struct pcpu_tstats *ipt_stats; + struct pcpu_sw_netstats *ipt_stats; ipt_stats = per_cpu_ptr(dev->tstats, i); u64_stats_init(&ipt_stats->syncp); } + tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); + if (!tunnel->dst_cache) { + free_percpu(dev->tstats); + return -ENOMEM; + } + err = gro_cells_init(&tunnel->gro_cells, dev); if (err) { + free_percpu(tunnel->dst_cache); free_percpu(dev->tstats); return err; } @@ -1015,6 +1093,8 @@ void ip_tunnel_uninit(struct net_device *dev) /* fb_tunnel_dev will be unregisted in net-exit call. */ if (itn->fb_tunnel_dev != dev) ip_tunnel_del(netdev_priv(dev)); + + tunnel_dst_reset_all(tunnel); } EXPORT_SYMBOL_GPL(ip_tunnel_uninit); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 42ffbc8d65c..6156f4ef5e9 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -56,7 +56,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb, skb_scrub_packet(skb, xnet); - skb->rxhash = 0; + skb_clear_hash(skb); skb_dst_set(skb, &rt->dst); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); @@ -107,8 +107,7 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) nf_reset(skb); secpath_reset(skb); - if (!skb->l4_rxhash) - skb->rxhash = 0; + skb_clear_hash_if_not_l4(skb); skb_dst_drop(skb); skb->vlan_tci = 0; skb_set_queue_mapping(skb, 0); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 52b802a0cd8..48eafae5176 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -60,7 +60,7 @@ static int vti_rcv(struct sk_buff *skb) tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->saddr, iph->daddr, 0); if (tunnel != NULL) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; u32 oldmark = skb->mark; int ret; @@ -162,7 +162,7 @@ tx_error_icmp: dst_link_failure(skb); tx_error: dev->stats.tx_errors++; - dev_kfree_skb(skb); + kfree_skb(skb); return NETDEV_TX_OK; } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index fe3e9f7f1f0..812b1835146 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -228,7 +228,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; tx_error: - dev_kfree_skb(skb); + kfree_skb(skb); out: dev->stats.tx_errors++; return NETDEV_TX_OK; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 1672409f5ba..b9b3472975b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -428,6 +428,7 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) goto failure; ipv4_devconf_setall(in_dev); + neigh_parms_data_state_setall(in_dev->arp_parms); IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; if (dev_open(dev)) @@ -520,6 +521,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) } ipv4_devconf_setall(in_dev); + neigh_parms_data_state_setall(in_dev->arp_parms); IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; rcu_read_unlock(); diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 40d56073cd1..81c6910cfa9 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -39,23 +39,33 @@ config NF_CONNTRACK_PROC_COMPAT config NF_TABLES_IPV4 depends on NF_TABLES tristate "IPv4 nf_tables support" - -config NFT_REJECT_IPV4 - depends on NF_TABLES_IPV4 - tristate "nf_tables IPv4 reject support" + help + This option enables the IPv4 support for nf_tables. config NFT_CHAIN_ROUTE_IPV4 depends on NF_TABLES_IPV4 tristate "IPv4 nf_tables route chain support" + help + This option enables the "route" chain for IPv4 in nf_tables. This + chain type is used to force packet re-routing after mangling header + fields such as the source, destination, type of service and + the packet mark. config NFT_CHAIN_NAT_IPV4 depends on NF_TABLES_IPV4 depends on NF_NAT_IPV4 && NFT_NAT tristate "IPv4 nf_tables nat chain support" + help + This option enables the "nat" chain for IPv4 in nf_tables. This + chain type is used to perform Network Address Translation (NAT) + packet transformations such as the source, destination address and + source and destination ports. config NF_TABLES_ARP depends on NF_TABLES tristate "ARP nf_tables support" + help + This option enables the ARP support for nf_tables. config IP_NF_IPTABLES tristate "IP tables support (required for filtering/masq/NAT)" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 19df72b7ba8..c16be9d5842 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -28,7 +28,6 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o -obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index b969131ad1c..5b6e0df4ccf 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -17,10 +17,6 @@ #include <linux/udp.h> #include <linux/icmp.h> #include <net/icmp.h> -#include <net/ip.h> -#include <net/tcp.h> -#include <net/route.h> -#include <net/dst.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ipt_REJECT.h> @@ -28,128 +24,12 @@ #include <linux/netfilter_bridge.h> #endif +#include <net/netfilter/ipv4/nf_reject.h> + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4"); -/* Send RST reply */ -static void send_reset(struct sk_buff *oldskb, int hook) -{ - struct sk_buff *nskb; - const struct iphdr *oiph; - struct iphdr *niph; - const struct tcphdr *oth; - struct tcphdr _otcph, *tcph; - - /* IP header checks: fragment. */ - if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) - return; - - oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb), - sizeof(_otcph), &_otcph); - if (oth == NULL) - return; - - /* No RST for RST. */ - if (oth->rst) - return; - - if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) - return; - - /* Check checksum */ - if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) - return; - oiph = ip_hdr(oldskb); - - nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + - LL_MAX_HEADER, GFP_ATOMIC); - if (!nskb) - return; - - skb_reserve(nskb, LL_MAX_HEADER); - - skb_reset_network_header(nskb); - niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr)); - niph->version = 4; - niph->ihl = sizeof(struct iphdr) / 4; - niph->tos = 0; - niph->id = 0; - niph->frag_off = htons(IP_DF); - niph->protocol = IPPROTO_TCP; - niph->check = 0; - niph->saddr = oiph->daddr; - niph->daddr = oiph->saddr; - - skb_reset_transport_header(nskb); - tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); - memset(tcph, 0, sizeof(*tcph)); - tcph->source = oth->dest; - tcph->dest = oth->source; - tcph->doff = sizeof(struct tcphdr) / 4; - - if (oth->ack) - tcph->seq = oth->ack_seq; - else { - tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + - oldskb->len - ip_hdrlen(oldskb) - - (oth->doff << 2)); - tcph->ack = 1; - } - - tcph->rst = 1; - tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr, - niph->daddr, 0); - nskb->ip_summed = CHECKSUM_PARTIAL; - nskb->csum_start = (unsigned char *)tcph - nskb->head; - nskb->csum_offset = offsetof(struct tcphdr, check); - - /* ip_route_me_harder expects skb->dst to be set */ - skb_dst_set_noref(nskb, skb_dst(oldskb)); - - nskb->protocol = htons(ETH_P_IP); - if (ip_route_me_harder(nskb, RTN_UNSPEC)) - goto free_nskb; - - niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); - - /* "Never happens" */ - if (nskb->len > dst_mtu(skb_dst(nskb))) - goto free_nskb; - - nf_ct_attach(nskb, oldskb); - -#ifdef CONFIG_BRIDGE_NETFILTER - /* If we use ip_local_out for bridged traffic, the MAC source on - * the RST will be ours, instead of the destination's. This confuses - * some routers/firewalls, and they drop the packet. So we need to - * build the eth header using the original destination's MAC as the - * source, and send the RST packet directly. - */ - if (oldskb->nf_bridge) { - struct ethhdr *oeth = eth_hdr(oldskb); - nskb->dev = oldskb->nf_bridge->physindev; - niph->tot_len = htons(nskb->len); - ip_send_check(niph); - if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), - oeth->h_source, oeth->h_dest, nskb->len) < 0) - goto free_nskb; - dev_queue_xmit(nskb); - } else -#endif - ip_local_out(nskb); - - return; - - free_nskb: - kfree_skb(nskb); -} - -static inline void send_unreach(struct sk_buff *skb_in, int code) -{ - icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); -} - static unsigned int reject_tg(struct sk_buff *skb, const struct xt_action_param *par) { @@ -157,28 +37,28 @@ reject_tg(struct sk_buff *skb, const struct xt_action_param *par) switch (reject->with) { case IPT_ICMP_NET_UNREACHABLE: - send_unreach(skb, ICMP_NET_UNREACH); + nf_send_unreach(skb, ICMP_NET_UNREACH); break; case IPT_ICMP_HOST_UNREACHABLE: - send_unreach(skb, ICMP_HOST_UNREACH); + nf_send_unreach(skb, ICMP_HOST_UNREACH); break; case IPT_ICMP_PROT_UNREACHABLE: - send_unreach(skb, ICMP_PROT_UNREACH); + nf_send_unreach(skb, ICMP_PROT_UNREACH); break; case IPT_ICMP_PORT_UNREACHABLE: - send_unreach(skb, ICMP_PORT_UNREACH); + nf_send_unreach(skb, ICMP_PORT_UNREACH); break; case IPT_ICMP_NET_PROHIBITED: - send_unreach(skb, ICMP_NET_ANO); + nf_send_unreach(skb, ICMP_NET_ANO); break; case IPT_ICMP_HOST_PROHIBITED: - send_unreach(skb, ICMP_HOST_ANO); + nf_send_unreach(skb, ICMP_HOST_ANO); break; case IPT_ICMP_ADMIN_PROHIBITED: - send_unreach(skb, ICMP_PKT_FILTERED); + nf_send_unreach(skb, ICMP_PKT_FILTERED); break; case IPT_TCP_RESET: - send_reset(skb, par->hooknum); + nf_send_reset(skb, par->hooknum); case IPT_ICMP_ECHOREPLY: /* Doesn't happen. */ break; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index ecd8bec411c..8127dc80286 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -548,9 +548,3 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void) module_init(nf_conntrack_l3proto_ipv4_init); module_exit(nf_conntrack_l3proto_ipv4_fini); - -void need_ipv4_conntrack(void) -{ - return; -} -EXPORT_SYMBOL_GPL(need_ipv4_conntrack); diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 5f011cc89cd..d551e31b416 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -34,8 +34,7 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * * Author: James Morris <jmorris@intercode.com.au> * @@ -462,14 +461,14 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx, } if (subid < 40) { - optr [0] = 0; - optr [1] = subid; + optr[0] = 0; + optr[1] = subid; } else if (subid < 80) { - optr [0] = 1; - optr [1] = subid - 40; + optr[0] = 1; + optr[1] = subid - 40; } else { - optr [0] = 2; - optr [1] = subid - 80; + optr[0] = 2; + optr[1] = subid - 80; } *len = 2; diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 3e67ef1c676..19412a4063f 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -14,10 +14,30 @@ #include <linux/netfilter_arp.h> #include <net/netfilter/nf_tables.h> +static unsigned int +nft_do_chain_arp(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo(&pkt, ops, skb, in, out); + + return nft_do_chain(&pkt, ops); +} + static struct nft_af_info nft_af_arp __read_mostly = { .family = NFPROTO_ARP, .nhooks = NF_ARP_NUMHOOKS, .owner = THIS_MODULE, + .nops = 1, + .hooks = { + [NF_ARP_IN] = nft_do_chain_arp, + [NF_ARP_OUT] = nft_do_chain_arp, + [NF_ARP_FORWARD] = nft_do_chain_arp, + }, }; static int nf_tables_arp_init_net(struct net *net) @@ -48,32 +68,14 @@ static struct pernet_operations nf_tables_arp_net_ops = { .exit = nf_tables_arp_exit_net, }; -static unsigned int -nft_do_chain_arp(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct nft_pktinfo pkt; - - nft_set_pktinfo(&pkt, ops, skb, in, out); - - return nft_do_chain_pktinfo(&pkt, ops); -} - -static struct nf_chain_type filter_arp = { - .family = NFPROTO_ARP, +static const struct nf_chain_type filter_arp = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_ARP, + .owner = THIS_MODULE, .hook_mask = (1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | (1 << NF_ARP_FORWARD), - .fn = { - [NF_ARP_IN] = nft_do_chain_arp, - [NF_ARP_OUT] = nft_do_chain_arp, - [NF_ARP_FORWARD] = nft_do_chain_arp, - }, }; static int __init nf_tables_arp_init(void) diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index 0f4cbfeb19b..6820c8c4084 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -18,14 +18,25 @@ #include <net/ip.h> #include <net/netfilter/nf_tables_ipv4.h> +static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + + return nft_do_chain(&pkt, ops); +} + static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct nft_pktinfo pkt; - if (unlikely(skb->len < sizeof(struct iphdr) || ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) { if (net_ratelimit()) @@ -33,19 +44,24 @@ static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, "packet\n"); return NF_ACCEPT; } - nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); - return nft_do_chain_pktinfo(&pkt, ops); + return nft_do_chain_ipv4(ops, skb, in, out, okfn); } -static struct nft_af_info nft_af_ipv4 __read_mostly = { +struct nft_af_info nft_af_ipv4 __read_mostly = { .family = NFPROTO_IPV4, .nhooks = NF_INET_NUMHOOKS, .owner = THIS_MODULE, + .nops = 1, .hooks = { + [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, [NF_INET_LOCAL_OUT] = nft_ipv4_output, + [NF_INET_FORWARD] = nft_do_chain_ipv4, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, }, }; +EXPORT_SYMBOL_GPL(nft_af_ipv4); static int nf_tables_ipv4_init_net(struct net *net) { @@ -75,42 +91,28 @@ static struct pernet_operations nf_tables_ipv4_net_ops = { .exit = nf_tables_ipv4_exit_net, }; -static unsigned int -nft_do_chain_ipv4(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct nft_pktinfo pkt; - - nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); - - return nft_do_chain_pktinfo(&pkt, ops); -} - -static struct nf_chain_type filter_ipv4 = { - .family = NFPROTO_IPV4, +static const struct nf_chain_type filter_ipv4 = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_IPV4, + .owner = THIS_MODULE, .hook_mask = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), - .fn = { - [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, - [NF_INET_LOCAL_OUT] = nft_ipv4_output, - [NF_INET_FORWARD] = nft_do_chain_ipv4, - [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4, - [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, - }, }; static int __init nf_tables_ipv4_init(void) { + int ret; + nft_register_chain_type(&filter_ipv4); - return register_pernet_subsys(&nf_tables_ipv4_net_ops); + ret = register_pernet_subsys(&nf_tables_ipv4_net_ops); + if (ret < 0) + nft_unregister_chain_type(&filter_ipv4); + + return ret; } static void __exit nf_tables_ipv4_exit(void) diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c index cf2c792cd97..b5b256d45e6 100644 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -75,7 +75,7 @@ static unsigned int nf_nat_fn(const struct nf_hook_ops *ops, nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); - ret = nft_do_chain_pktinfo(&pkt, ops); + ret = nft_do_chain(&pkt, ops); if (ret != NF_ACCEPT) return ret; if (!nf_nat_initialized(ct, maniptype)) { @@ -164,21 +164,21 @@ static unsigned int nf_nat_output(const struct nf_hook_ops *ops, return ret; } -static struct nf_chain_type nft_chain_nat_ipv4 = { - .family = NFPROTO_IPV4, +static const struct nf_chain_type nft_chain_nat_ipv4 = { .name = "nat", .type = NFT_CHAIN_T_NAT, + .family = NFPROTO_IPV4, + .owner = THIS_MODULE, .hook_mask = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN), - .fn = { + .hooks = { [NF_INET_PRE_ROUTING] = nf_nat_prerouting, [NF_INET_POST_ROUTING] = nf_nat_postrouting, [NF_INET_LOCAL_OUT] = nf_nat_output, [NF_INET_LOCAL_IN] = nf_nat_fn, }, - .me = THIS_MODULE, }; static int __init nft_chain_nat_init(void) diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index 4e6bf9a3d7a..125b66766c0 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -47,7 +47,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, daddr = iph->daddr; tos = iph->tos; - ret = nft_do_chain_pktinfo(&pkt, ops); + ret = nft_do_chain(&pkt, ops); if (ret != NF_DROP && ret != NF_QUEUE) { iph = ip_hdr(skb); @@ -61,15 +61,15 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, return ret; } -static struct nf_chain_type nft_chain_route_ipv4 = { - .family = NFPROTO_IPV4, +static const struct nf_chain_type nft_chain_route_ipv4 = { .name = "route", .type = NFT_CHAIN_T_ROUTE, + .family = NFPROTO_IPV4, + .owner = THIS_MODULE, .hook_mask = (1 << NF_INET_LOCAL_OUT), - .fn = { + .hooks = { [NF_INET_LOCAL_OUT] = nf_route_table_hook, }, - .me = THIS_MODULE, }; static int __init nft_chain_route_init(void) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 242e7f4ed6f..2d11c094296 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -53,8 +53,12 @@ #include <net/transp_v6.h> #endif +struct ping_table { + struct hlist_nulls_head hash[PING_HTABLE_SIZE]; + rwlock_t lock; +}; -struct ping_table ping_table; +static struct ping_table ping_table; struct pingv6_ops pingv6_ops; EXPORT_SYMBOL_GPL(pingv6_ops); @@ -316,6 +320,9 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, if (addr_len < sizeof(*addr)) return -EINVAL; + if (addr->sin6_family != AF_INET6) + return -EINVAL; + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n", sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port)); @@ -668,8 +675,8 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, } EXPORT_SYMBOL_GPL(ping_common_sendmsg); -int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) { struct net *net = sock_net(sk); struct flowi4 fl4; @@ -696,7 +703,7 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, */ if (msg->msg_name) { - struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); if (msg->msg_namelen < sizeof(*usin)) return -EINVAL; if (usin->sin_family != AF_INET) @@ -869,7 +876,7 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* Copy the address and add cmsg data. */ if (family == AF_INET) { - struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); if (sin) { sin->sin_family = AF_INET; @@ -886,8 +893,7 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } else if (family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *ip6 = ipv6_hdr(skb); - struct sockaddr_in6 *sin6 = - (struct sockaddr_in6 *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); if (sin6) { sin6->sin6_family = AF_INET6; @@ -903,7 +909,12 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } if (inet6_sk(sk)->rxopt.all) - pingv6_ops.ip6_datagram_recv_ctl(sk, msg, skb); + pingv6_ops.ip6_datagram_recv_common_ctl(sk, msg, skb); + if (skb->protocol == htons(ETH_P_IPV6) && + inet6_sk(sk)->rxopt.all) + pingv6_ops.ip6_datagram_recv_specific_ctl(sk, msg, skb); + else if (skb->protocol == htons(ETH_P_IP) && isk->cmsg_flags) + ip_cmsg_recv(msg, skb); #endif } else { BUG(); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 4a0335854b8..a6c8a80ec9d 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -279,6 +279,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS), + SNMP_MIB_ITEM("TCPAutoCorking", LINUX_MIB_TCPAUTOCORKING), SNMP_MIB_SENTINEL }; @@ -332,22 +333,22 @@ static void icmp_put(struct seq_file *seq) atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors"); - for (i=0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " In%s", icmpmibmap[i].name); seq_printf(seq, " OutMsgs OutErrors"); - for (i=0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, "\nIcmp: %lu %lu %lu", snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS), snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); - for (i=0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); - for (i=0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 23c3e5b5bb5..c04518f4850 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -493,7 +493,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, */ if (msg->msg_namelen) { - struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); err = -EINVAL; if (msg->msg_namelen < sizeof(*usin)) goto out; @@ -575,7 +575,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, - inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP | + inet_sk_flowi_flags(sk) | (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0); @@ -690,7 +690,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct inet_sock *inet = inet_sk(sk); size_t copied = 0; int err = -EOPNOTSUPP; - struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); struct sk_buff *skb; if (flags & MSG_OOB) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f8da2827801..25071b48921 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -112,9 +112,6 @@ #define RT_FL_TOS(oldflp4) \ ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) -/* IPv4 datagram length is stored into 16bit field (tot_len) */ -#define IP_MAX_MTU 0xFFFF - #define RT_GC_TIMEOUT (300*HZ) static int ip_rt_max_size; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index b95331e6c07..f2ed13c2125 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -121,7 +121,7 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr, cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq; /* Cookie is now reduced to (count * 2^24) ^ (hash % 2^24) */ - diff = (count - (cookie >> COOKIEBITS)) & ((__u32) - 1 >> COOKIEBITS); + diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS); if (diff >= MAX_SYNCOOKIE_AGE) return (__u32)-1; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 3d69ec8dac5..44eba052b43 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -286,13 +286,6 @@ static struct ctl_table ipv4_table[] = { .extra2 = &ip_ttl_max, }, { - .procname = "ip_no_pmtu_disc", - .data = &ipv4_config.no_pmtu_disc, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "ip_nonlocal_bind", .data = &sysctl_ip_nonlocal_bind, .maxlen = sizeof(int), @@ -707,7 +700,7 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { + { .procname = "tcp_thin_dupack", .data = &sysctl_tcp_thin_dupack, .maxlen = sizeof(int), @@ -733,6 +726,15 @@ static struct ctl_table ipv4_table[] = { .extra2 = &gso_max_segs, }, { + .procname = "tcp_autocorking", + .data = &sysctl_tcp_autocorking, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { .procname = "udp_mem", .data = &sysctl_udp_mem, .maxlen = sizeof(sysctl_udp_mem), @@ -822,6 +824,20 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = ipv4_local_port_range, }, + { + .procname = "ip_no_pmtu_disc", + .data = &init_net.ipv4.sysctl_ip_no_pmtu_disc, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "ip_forward_use_pmtu", + .data = &init_net.ipv4.sysctl_ip_fwd_use_pmtu, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 82de7860368..4475b3bb494 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -285,6 +285,8 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; int sysctl_tcp_min_tso_segs __read_mostly = 2; +int sysctl_tcp_autocorking __read_mostly = 1; + struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); @@ -379,7 +381,7 @@ void tcp_init_sock(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - skb_queue_head_init(&tp->out_of_order_queue); + __skb_queue_head_init(&tp->out_of_order_queue); tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); INIT_LIST_HEAD(&tp->tsq_node); @@ -619,19 +621,58 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags) tp->snd_up = tp->write_seq; } -static inline void tcp_push(struct sock *sk, int flags, int mss_now, - int nonagle) +/* If a not yet filled skb is pushed, do not send it if + * we have data packets in Qdisc or NIC queues : + * Because TX completion will happen shortly, it gives a chance + * to coalesce future sendmsg() payload into this skb, without + * need for a timer, and with no latency trade off. + * As packets containing data payload have a bigger truesize + * than pure acks (dataless) packets, the last checks prevent + * autocorking if we only have an ACK in Qdisc/NIC queues, + * or if TX completion was delayed after we processed ACK packet. + */ +static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, + int size_goal) { - if (tcp_send_head(sk)) { - struct tcp_sock *tp = tcp_sk(sk); + return skb->len < size_goal && + sysctl_tcp_autocorking && + skb != tcp_write_queue_head(sk) && + atomic_read(&sk->sk_wmem_alloc) > skb->truesize; +} + +static void tcp_push(struct sock *sk, int flags, int mss_now, + int nonagle, int size_goal) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; - if (!(flags & MSG_MORE) || forced_push(tp)) - tcp_mark_push(tp, tcp_write_queue_tail(sk)); + if (!tcp_send_head(sk)) + return; + + skb = tcp_write_queue_tail(sk); + if (!(flags & MSG_MORE) || forced_push(tp)) + tcp_mark_push(tp, skb); + + tcp_mark_urg(tp, flags); + + if (tcp_should_autocork(sk, skb, size_goal)) { - tcp_mark_urg(tp, flags); - __tcp_push_pending_frames(sk, mss_now, - (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); + /* avoid atomic op if TSQ_THROTTLED bit is already set */ + if (!test_bit(TSQ_THROTTLED, &tp->tsq_flags)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING); + set_bit(TSQ_THROTTLED, &tp->tsq_flags); + } + /* It is possible TX completion already happened + * before we set TSQ_THROTTLED. + */ + if (atomic_read(&sk->sk_wmem_alloc) > skb->truesize) + return; } + + if (flags & MSG_MORE) + nonagle = TCP_NAGLE_CORK; + + __tcp_push_pending_frames(sk, mss_now, nonagle); } static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, @@ -934,7 +975,8 @@ new_segment: wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: - tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); + tcp_push(sk, flags & ~MSG_MORE, mss_now, + TCP_NAGLE_PUSH, size_goal); if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; @@ -944,7 +986,7 @@ wait_for_memory: out: if (copied && !(flags & MSG_SENDPAGE_NOTLAST)) - tcp_push(sk, flags, mss_now, tp->nonagle); + tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); return copied; do_error: @@ -1225,7 +1267,8 @@ wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: if (copied) - tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); + tcp_push(sk, flags & ~MSG_MORE, mss_now, + TCP_NAGLE_PUSH, size_goal); if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; @@ -1236,7 +1279,7 @@ wait_for_memory: out: if (copied) - tcp_push(sk, flags, mss_now, tp->nonagle); + tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); release_sock(sk); return copied + copied_syn; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c53b7f35c51..65cf90e063d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -766,7 +766,7 @@ static void tcp_update_pacing_rate(struct sock *sk) /* Calculate rto without backoff. This is the second half of Van Jacobson's * routine referred to above. */ -void tcp_set_rto(struct sock *sk) +static void tcp_set_rto(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); /* Old crap is replaced with new one. 8) @@ -3686,7 +3686,7 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) int opcode = *ptr++; int opsize; - switch(opcode) { + switch (opcode) { case TCPOPT_EOL: return NULL; case TCPOPT_NOP: @@ -4046,7 +4046,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) WARN_ON(before(tp->rcv_nxt, sp->end_seq)); /* Zap this SACK, by moving forward any other SACKS. */ - for (i=this_sack+1; i < num_sacks; i++) + for (i = this_sack+1; i < num_sacks; i++) tp->selective_acks[i-1] = tp->selective_acks[i]; num_sacks--; continue; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 06721392475..3cf97651049 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -173,7 +173,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_TCP, - orig_sport, orig_dport, sk, true); + orig_sport, orig_dport, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) @@ -827,7 +827,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, const struct inet_request_sock *ireq = inet_rsk(req); struct flowi4 fl4; int err = -1; - struct sk_buff * skb; + struct sk_buff *skb; /* First, grab a route. */ if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) @@ -1668,7 +1668,6 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, } sk_setup_caps(newsk, dst); - tcp_mtup_init(newsk); tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric_advmss(dst); if (tcp_sk(sk)->rx_opt.user_mss && diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 098b3a29f6f..d547075d830 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -22,7 +22,8 @@ int sysctl_tcp_nometrics_save __read_mostly; -static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *addr, +static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr, + const struct inetpeer_addr *daddr, struct net *net, unsigned int hash); struct tcp_fastopen_metrics { @@ -34,7 +35,8 @@ struct tcp_fastopen_metrics { struct tcp_metrics_block { struct tcp_metrics_block __rcu *tcpm_next; - struct inetpeer_addr tcpm_addr; + struct inetpeer_addr tcpm_saddr; + struct inetpeer_addr tcpm_daddr; unsigned long tcpm_stamp; u32 tcpm_ts; u32 tcpm_ts_stamp; @@ -145,7 +147,8 @@ static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst #define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, - struct inetpeer_addr *addr, + struct inetpeer_addr *saddr, + struct inetpeer_addr *daddr, unsigned int hash) { struct tcp_metrics_block *tm; @@ -158,7 +161,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, /* While waiting for the spin-lock the cache might have been populated * with this entry and so we have to check again. */ - tm = __tcp_get_metrics(addr, net, hash); + tm = __tcp_get_metrics(saddr, daddr, net, hash); if (tm == TCP_METRICS_RECLAIM_PTR) { reclaim = true; tm = NULL; @@ -183,7 +186,8 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, if (!tm) goto out_unlock; } - tm->tcpm_addr = *addr; + tm->tcpm_saddr = *saddr; + tm->tcpm_daddr = *daddr; tcpm_suck_dst(tm, dst, true); @@ -206,7 +210,8 @@ static struct tcp_metrics_block *tcp_get_encode(struct tcp_metrics_block *tm, in return NULL; } -static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *addr, +static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr, + const struct inetpeer_addr *daddr, struct net *net, unsigned int hash) { struct tcp_metrics_block *tm; @@ -214,7 +219,8 @@ static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *a for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { - if (addr_same(&tm->tcpm_addr, addr)) + if (addr_same(&tm->tcpm_saddr, saddr) && + addr_same(&tm->tcpm_daddr, daddr)) break; depth++; } @@ -225,19 +231,22 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, struct dst_entry *dst) { struct tcp_metrics_block *tm; - struct inetpeer_addr addr; + struct inetpeer_addr saddr, daddr; unsigned int hash; struct net *net; - addr.family = req->rsk_ops->family; - switch (addr.family) { + saddr.family = req->rsk_ops->family; + daddr.family = req->rsk_ops->family; + switch (daddr.family) { case AF_INET: - addr.addr.a4 = inet_rsk(req)->ir_rmt_addr; - hash = (__force unsigned int) addr.addr.a4; + saddr.addr.a4 = inet_rsk(req)->ir_loc_addr; + daddr.addr.a4 = inet_rsk(req)->ir_rmt_addr; + hash = (__force unsigned int) daddr.addr.a4; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - *(struct in6_addr *)addr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr; + *(struct in6_addr *)saddr.addr.a6 = inet_rsk(req)->ir_v6_loc_addr; + *(struct in6_addr *)daddr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr; hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr); break; #endif @@ -250,7 +259,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { - if (addr_same(&tm->tcpm_addr, &addr)) + if (addr_same(&tm->tcpm_saddr, &saddr) && + addr_same(&tm->tcpm_daddr, &daddr)) break; } tcpm_check_stamp(tm, dst); @@ -260,32 +270,44 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw) { struct tcp_metrics_block *tm; - struct inetpeer_addr addr; + struct inetpeer_addr saddr, daddr; unsigned int hash; struct net *net; - addr.family = tw->tw_family; - switch (addr.family) { - case AF_INET: - addr.addr.a4 = tw->tw_daddr; - hash = (__force unsigned int) addr.addr.a4; - break; + if (tw->tw_family == AF_INET) { + saddr.family = AF_INET; + saddr.addr.a4 = tw->tw_rcv_saddr; + daddr.family = AF_INET; + daddr.addr.a4 = tw->tw_daddr; + hash = (__force unsigned int) daddr.addr.a4; + } #if IS_ENABLED(CONFIG_IPV6) - case AF_INET6: - *(struct in6_addr *)addr.addr.a6 = tw->tw_v6_daddr; - hash = ipv6_addr_hash(&tw->tw_v6_daddr); - break; + else if (tw->tw_family == AF_INET6) { + if (ipv6_addr_v4mapped(&tw->tw_v6_daddr)) { + saddr.family = AF_INET; + saddr.addr.a4 = tw->tw_rcv_saddr; + daddr.family = AF_INET; + daddr.addr.a4 = tw->tw_daddr; + hash = (__force unsigned int) daddr.addr.a4; + } else { + saddr.family = AF_INET6; + *(struct in6_addr *)saddr.addr.a6 = tw->tw_v6_rcv_saddr; + daddr.family = AF_INET6; + *(struct in6_addr *)daddr.addr.a6 = tw->tw_v6_daddr; + hash = ipv6_addr_hash(&tw->tw_v6_daddr); + } + } #endif - default: + else return NULL; - } net = twsk_net(tw); hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { - if (addr_same(&tm->tcpm_addr, &addr)) + if (addr_same(&tm->tcpm_saddr, &saddr) && + addr_same(&tm->tcpm_daddr, &daddr)) break; } return tm; @@ -296,34 +318,45 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, bool create) { struct tcp_metrics_block *tm; - struct inetpeer_addr addr; + struct inetpeer_addr saddr, daddr; unsigned int hash; struct net *net; - addr.family = sk->sk_family; - switch (addr.family) { - case AF_INET: - addr.addr.a4 = inet_sk(sk)->inet_daddr; - hash = (__force unsigned int) addr.addr.a4; - break; + if (sk->sk_family == AF_INET) { + saddr.family = AF_INET; + saddr.addr.a4 = inet_sk(sk)->inet_saddr; + daddr.family = AF_INET; + daddr.addr.a4 = inet_sk(sk)->inet_daddr; + hash = (__force unsigned int) daddr.addr.a4; + } #if IS_ENABLED(CONFIG_IPV6) - case AF_INET6: - *(struct in6_addr *)addr.addr.a6 = sk->sk_v6_daddr; - hash = ipv6_addr_hash(&sk->sk_v6_daddr); - break; + else if (sk->sk_family == AF_INET6) { + if (ipv6_addr_v4mapped(&sk->sk_v6_daddr)) { + saddr.family = AF_INET; + saddr.addr.a4 = inet_sk(sk)->inet_saddr; + daddr.family = AF_INET; + daddr.addr.a4 = inet_sk(sk)->inet_daddr; + hash = (__force unsigned int) daddr.addr.a4; + } else { + saddr.family = AF_INET6; + *(struct in6_addr *)saddr.addr.a6 = sk->sk_v6_rcv_saddr; + daddr.family = AF_INET6; + *(struct in6_addr *)daddr.addr.a6 = sk->sk_v6_daddr; + hash = ipv6_addr_hash(&sk->sk_v6_daddr); + } + } #endif - default: + else return NULL; - } net = dev_net(dst->dev); hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); - tm = __tcp_get_metrics(&addr, net, hash); + tm = __tcp_get_metrics(&saddr, &daddr, net, hash); if (tm == TCP_METRICS_RECLAIM_PTR) tm = NULL; if (!tm && create) - tm = tcpm_new(dst, &addr, hash); + tm = tcpm_new(dst, &saddr, &daddr, hash); else tcpm_check_stamp(tm, dst); @@ -737,15 +770,21 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, struct nlattr *nest; int i; - switch (tm->tcpm_addr.family) { + switch (tm->tcpm_daddr.family) { case AF_INET: if (nla_put_be32(msg, TCP_METRICS_ATTR_ADDR_IPV4, - tm->tcpm_addr.addr.a4) < 0) + tm->tcpm_daddr.addr.a4) < 0) + goto nla_put_failure; + if (nla_put_be32(msg, TCP_METRICS_ATTR_SADDR_IPV4, + tm->tcpm_saddr.addr.a4) < 0) goto nla_put_failure; break; case AF_INET6: if (nla_put(msg, TCP_METRICS_ATTR_ADDR_IPV6, 16, - tm->tcpm_addr.addr.a6) < 0) + tm->tcpm_daddr.addr.a6) < 0) + goto nla_put_failure; + if (nla_put(msg, TCP_METRICS_ATTR_SADDR_IPV6, 16, + tm->tcpm_saddr.addr.a6) < 0) goto nla_put_failure; break; default: @@ -868,44 +907,66 @@ done: return skb->len; } -static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, - unsigned int *hash, int optional) +static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, + unsigned int *hash, int optional, int v4, int v6) { struct nlattr *a; - a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV4]; + a = info->attrs[v4]; if (a) { addr->family = AF_INET; addr->addr.a4 = nla_get_be32(a); - *hash = (__force unsigned int) addr->addr.a4; + if (hash) + *hash = (__force unsigned int) addr->addr.a4; return 0; } - a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV6]; + a = info->attrs[v6]; if (a) { if (nla_len(a) != sizeof(struct in6_addr)) return -EINVAL; addr->family = AF_INET6; memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6)); - *hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6); + if (hash) + *hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6); return 0; } return optional ? 1 : -EAFNOSUPPORT; } +static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, + unsigned int *hash, int optional) +{ + return __parse_nl_addr(info, addr, hash, optional, + TCP_METRICS_ATTR_ADDR_IPV4, + TCP_METRICS_ATTR_ADDR_IPV6); +} + +static int parse_nl_saddr(struct genl_info *info, struct inetpeer_addr *addr) +{ + return __parse_nl_addr(info, addr, NULL, 0, + TCP_METRICS_ATTR_SADDR_IPV4, + TCP_METRICS_ATTR_SADDR_IPV6); +} + static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info) { struct tcp_metrics_block *tm; - struct inetpeer_addr addr; + struct inetpeer_addr saddr, daddr; unsigned int hash; struct sk_buff *msg; struct net *net = genl_info_net(info); void *reply; int ret; + bool src = true; - ret = parse_nl_addr(info, &addr, &hash, 0); + ret = parse_nl_addr(info, &daddr, &hash, 0); if (ret < 0) return ret; + ret = parse_nl_saddr(info, &saddr); + if (ret < 0) + src = false; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -920,7 +981,8 @@ static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info) rcu_read_lock(); for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { - if (addr_same(&tm->tcpm_addr, &addr)) { + if (addr_same(&tm->tcpm_daddr, &daddr) && + (!src || addr_same(&tm->tcpm_saddr, &saddr))) { ret = tcp_metrics_fill_info(msg, tm); break; } @@ -975,32 +1037,38 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) struct tcpm_hash_bucket *hb; struct tcp_metrics_block *tm; struct tcp_metrics_block __rcu **pp; - struct inetpeer_addr addr; + struct inetpeer_addr saddr, daddr; unsigned int hash; struct net *net = genl_info_net(info); int ret; + bool src = true, found = false; - ret = parse_nl_addr(info, &addr, &hash, 1); + ret = parse_nl_addr(info, &daddr, &hash, 1); if (ret < 0) return ret; if (ret > 0) return tcp_metrics_flush_all(net); + ret = parse_nl_saddr(info, &saddr); + if (ret < 0) + src = false; hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); hb = net->ipv4.tcp_metrics_hash + hash; pp = &hb->chain; spin_lock_bh(&tcp_metrics_lock); - for (tm = deref_locked_genl(*pp); tm; - pp = &tm->tcpm_next, tm = deref_locked_genl(*pp)) { - if (addr_same(&tm->tcpm_addr, &addr)) { + for (tm = deref_locked_genl(*pp); tm; tm = deref_locked_genl(*pp)) { + if (addr_same(&tm->tcpm_daddr, &daddr) && + (!src || addr_same(&tm->tcpm_saddr, &saddr))) { *pp = tm->tcpm_next; - break; + kfree_rcu(tm, rcu_head); + found = true; + } else { + pp = &tm->tcpm_next; } } spin_unlock_bh(&tcp_metrics_lock); - if (!tm) + if (!found) return -ESRCH; - kfree_rcu(tm, rcu_head); return 0; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 97b68415986..7a436c517e4 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -297,6 +297,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tw->tw_v6_daddr = sk->sk_v6_daddr; tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; tw->tw_tclass = np->tclass; + tw->tw_flowlabel = np->flow_label >> 12; tw->tw_ipv6only = np->ipv6only; } #endif @@ -425,7 +426,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); - skb_queue_head_init(&newtp->out_of_order_queue); + __skb_queue_head_init(&newtp->out_of_order_queue); newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; newtp->rx_opt.saw_tstamp = 0; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 05606353c7e..b92b81718ca 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -138,7 +138,6 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, out: return segs; } -EXPORT_SYMBOL(tcp_gso_segment); struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) { @@ -197,7 +196,8 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) goto out_check_final; found: - flush = NAPI_GRO_CB(p)->flush; + /* Include the IP ID check below from the inner most IP hdr */ + flush = NAPI_GRO_CB(p)->flush | NAPI_GRO_CB(p)->flush_id; flush |= (__force int)(flags & TCP_FLAG_CWR); flush |= (__force int)((flags ^ tcp_flag_word(th2)) & ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); @@ -230,17 +230,16 @@ out_check_final: pp = head; out: - NAPI_GRO_CB(skb)->flush |= flush; + NAPI_GRO_CB(skb)->flush |= (flush != 0); return pp; } -EXPORT_SYMBOL(tcp_gro_receive); int tcp_gro_complete(struct sk_buff *skb) { struct tcphdr *th = tcp_hdr(skb); - skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_start = (unsigned char *)th - skb->head; skb->csum_offset = offsetof(struct tcphdr, check); skb->ip_summed = CHECKSUM_PARTIAL; @@ -272,6 +271,7 @@ static int tcp_v4_gso_send_check(struct sk_buff *skb) static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) { + /* Use the IP hdr immediately proceeding for this transport */ const struct iphdr *iph = skb_gro_network_header(skb); __wsum wsum; @@ -279,7 +279,7 @@ static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff * if (NAPI_GRO_CB(skb)->flush) goto skip_csum; - wsum = skb->csum; + wsum = NAPI_GRO_CB(skb)->csum; switch (skb->ip_summed) { case CHECKSUM_NONE: @@ -303,13 +303,13 @@ skip_csum: return tcp_gro_receive(head, skb); } -static int tcp4_gro_complete(struct sk_buff *skb) +static int tcp4_gro_complete(struct sk_buff *skb, int thoff) { const struct iphdr *iph = ip_hdr(skb); struct tcphdr *th = tcp_hdr(skb); - th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), - iph->saddr, iph->daddr, 0); + th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr, + iph->daddr, 0); skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; return tcp_gro_complete(skb); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7820f3a7dd7..03d26b85eab 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -363,15 +363,17 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, */ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) { + struct skb_shared_info *shinfo = skb_shinfo(skb); + skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; TCP_SKB_CB(skb)->tcp_flags = flags; TCP_SKB_CB(skb)->sacked = 0; - skb_shinfo(skb)->gso_segs = 1; - skb_shinfo(skb)->gso_size = 0; - skb_shinfo(skb)->gso_type = 0; + shinfo->gso_segs = 1; + shinfo->gso_size = 0; + shinfo->gso_type = 0; TCP_SKB_CB(skb)->seq = seq; if (flags & (TCPHDR_SYN | TCPHDR_FIN)) @@ -406,7 +408,7 @@ struct tcp_out_options { * Beware: Something in the Internet is very sensitive to the ordering of * TCP options, we learned this through the hard way, so be careful here. * Luckily we can at least blame others for their non-compliance but from - * inter-operatibility perspective it seems that we're somewhat stuck with + * inter-operability perspective it seems that we're somewhat stuck with * the ordering which we have been using if we want to keep working with * those broken things (not that it currently hurts anybody as there isn't * particular reason why the ordering would need to be changed). @@ -679,7 +681,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb * * Its important tcp_wfree() can be replaced by sock_wfree() in the event skb * needs to be reallocated in a driver. - * The invariant being skb->truesize substracted from sk->sk_wmem_alloc + * The invariant being skb->truesize subtracted from sk->sk_wmem_alloc * * Since transmit from skb destructor is forbidden, we use a tasklet * to process all sockets that eventually need to send more skbs. @@ -699,9 +701,9 @@ static void tcp_tsq_handler(struct sock *sk) tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC); } /* - * One tasklest per cpu tries to send more skbs. + * One tasklet per cpu tries to send more skbs. * We run in tasklet context but need to disable irqs when - * transfering tsq->head because tcp_wfree() might + * transferring tsq->head because tcp_wfree() might * interrupt us (non NAPI drivers) */ static void tcp_tasklet_func(unsigned long data) @@ -795,7 +797,7 @@ void __init tcp_tasklet_init(void) /* * Write buffer destructor automatically called from kfree_skb. - * We cant xmit new skbs from this context, as we might already + * We can't xmit new skbs from this context, as we might already * hold qdisc lock. */ void tcp_wfree(struct sk_buff *skb) @@ -986,6 +988,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { + struct skb_shared_info *shinfo = skb_shinfo(skb); + /* Make sure we own this skb before messing gso_size/gso_segs */ WARN_ON_ONCE(skb_cloned(skb)); @@ -993,13 +997,13 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, /* Avoid the costly divide in the normal * non-TSO case. */ - skb_shinfo(skb)->gso_segs = 1; - skb_shinfo(skb)->gso_size = 0; - skb_shinfo(skb)->gso_type = 0; + shinfo->gso_segs = 1; + shinfo->gso_size = 0; + shinfo->gso_type = 0; } else { - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now); - skb_shinfo(skb)->gso_size = mss_now; - skb_shinfo(skb)->gso_type = sk->sk_gso_type; + shinfo->gso_segs = DIV_ROUND_UP(skb->len, mss_now); + shinfo->gso_size = mss_now; + shinfo->gso_type = sk->sk_gso_type; } } @@ -1146,6 +1150,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, */ static void __pskb_trim_head(struct sk_buff *skb, int len) { + struct skb_shared_info *shinfo; int i, k, eat; eat = min_t(int, len, skb_headlen(skb)); @@ -1157,23 +1162,24 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) } eat = len; k = 0; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); + shinfo = skb_shinfo(skb); + for (i = 0; i < shinfo->nr_frags; i++) { + int size = skb_frag_size(&shinfo->frags[i]); if (size <= eat) { skb_frag_unref(skb, i); eat -= size; } else { - skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; + shinfo->frags[k] = shinfo->frags[i]; if (eat) { - skb_shinfo(skb)->frags[k].page_offset += eat; - skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); + shinfo->frags[k].page_offset += eat; + skb_frag_size_sub(&shinfo->frags[k], eat); eat = 0; } k++; } } - skb_shinfo(skb)->nr_frags = k; + shinfo->nr_frags = k; skb_reset_tail_pointer(skb); skb->data_len -= len; @@ -1378,23 +1384,51 @@ static void tcp_cwnd_validate(struct sock *sk) } } -/* Returns the portion of skb which can be sent right away without - * introducing MSS oddities to segment boundaries. In rare cases where - * mss_now != mss_cache, we will request caller to create a small skb - * per input skb which could be mostly avoided here (if desired). - * - * We explicitly want to create a request for splitting write queue tail - * to a small skb for Nagle purposes while avoiding unnecessary modulos, - * thus all the complexity (cwnd_len is always MSS multiple which we - * return whenever allowed by the other factors). Basically we need the - * modulo only when the receiver window alone is the limiting factor or - * when we would be allowed to send the split-due-to-Nagle skb fully. +/* Minshall's variant of the Nagle send check. */ +static bool tcp_minshall_check(const struct tcp_sock *tp) +{ + return after(tp->snd_sml, tp->snd_una) && + !after(tp->snd_sml, tp->snd_nxt); +} + +/* Update snd_sml if this skb is under mss + * Note that a TSO packet might end with a sub-mss segment + * The test is really : + * if ((skb->len % mss) != 0) + * tp->snd_sml = TCP_SKB_CB(skb)->end_seq; + * But we can avoid doing the divide again given we already have + * skb_pcount = skb->len / mss_now + */ +static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now, + const struct sk_buff *skb) +{ + if (skb->len < tcp_skb_pcount(skb) * mss_now) + tp->snd_sml = TCP_SKB_CB(skb)->end_seq; +} + +/* Return false, if packet can be sent now without violation Nagle's rules: + * 1. It is full sized. (provided by caller in %partial bool) + * 2. Or it contains FIN. (already checked by caller) + * 3. Or TCP_CORK is not set, and TCP_NODELAY is set. + * 4. Or TCP_CORK is not set, and all sent packets are ACKed. + * With Minshall's modification: all sent small packets are ACKed. */ -static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, - unsigned int mss_now, unsigned int max_segs) +static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp, + unsigned int mss_now, int nonagle) +{ + return partial && + ((nonagle & TCP_NAGLE_CORK) || + (!nonagle && tp->packets_out && tcp_minshall_check(tp))); +} +/* Returns the portion of skb which can be sent right away */ +static unsigned int tcp_mss_split_point(const struct sock *sk, + const struct sk_buff *skb, + unsigned int mss_now, + unsigned int max_segs, + int nonagle) { const struct tcp_sock *tp = tcp_sk(sk); - u32 needed, window, max_len; + u32 partial, needed, window, max_len; window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; max_len = mss_now * max_segs; @@ -1407,7 +1441,15 @@ static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_b if (max_len <= needed) return max_len; - return needed - needed % mss_now; + partial = needed % mss_now; + /* If last segment is not a full MSS, check if Nagle rules allow us + * to include this last segment in this skb. + * Otherwise, we'll split the skb at last MSS boundary + */ + if (tcp_nagle_check(partial != 0, tp, mss_now, nonagle)) + return needed - partial; + + return needed; } /* Can at least one segment of SKB be sent right now, according to the @@ -1447,28 +1489,6 @@ static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb, return tso_segs; } -/* Minshall's variant of the Nagle send check. */ -static inline bool tcp_minshall_check(const struct tcp_sock *tp) -{ - return after(tp->snd_sml, tp->snd_una) && - !after(tp->snd_sml, tp->snd_nxt); -} - -/* Return false, if packet can be sent now without violation Nagle's rules: - * 1. It is full sized. - * 2. Or it contains FIN. (already checked by caller) - * 3. Or TCP_CORK is not set, and TCP_NODELAY is set. - * 4. Or TCP_CORK is not set, and all sent packets are ACKed. - * With Minshall's modification: all sent small packets are ACKed. - */ -static inline bool tcp_nagle_check(const struct tcp_sock *tp, - const struct sk_buff *skb, - unsigned int mss_now, int nonagle) -{ - return skb->len < mss_now && - ((nonagle & TCP_NAGLE_CORK) || - (!nonagle && tp->packets_out && tcp_minshall_check(tp))); -} /* Return true if the Nagle test allows this packet to be * sent now. @@ -1489,7 +1509,7 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) return true; - if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) + if (!tcp_nagle_check(skb->len < cur_mss, tp, cur_mss, nonagle)) return true; return false; @@ -1892,7 +1912,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, limit = tcp_mss_split_point(sk, skb, mss_now, min_t(unsigned int, cwnd_quota, - sk->sk_gso_max_segs)); + sk->sk_gso_max_segs), + nonagle); if (skb->len > limit && unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) @@ -2756,7 +2777,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, EXPORT_SYMBOL(tcp_make_synack); /* Do all connect socket setups that can be done AF independent. */ -void tcp_connect_init(struct sock *sk) +static void tcp_connect_init(struct sock *sk) { const struct dst_entry *dst = __sk_dst_get(sk); struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 8b97d71e193..1f2d37613c9 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -38,7 +38,7 @@ MODULE_DESCRIPTION("TCP cwnd snooper"); MODULE_LICENSE("GPL"); MODULE_VERSION("1.1"); -static int port __read_mostly = 0; +static int port __read_mostly; MODULE_PARM_DESC(port, "Port to match (0=all)"); module_param(port, int, 0); @@ -46,7 +46,7 @@ static unsigned int bufsize __read_mostly = 4096; MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)"); module_param(bufsize, uint, 0); -static unsigned int fwmark __read_mostly = 0; +static unsigned int fwmark __read_mostly; MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)"); module_param(fwmark, uint, 0); diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index a347a078ee0..1a8d271f994 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -3,7 +3,7 @@ * YeAH TCP * * For further details look at: - * http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf + * https://web.archive.org/web/20080316215752/http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf * */ #include <linux/mm.h> @@ -15,13 +15,13 @@ #include "tcp_vegas.h" -#define TCP_YEAH_ALPHA 80 //lin number of packets queued at the bottleneck -#define TCP_YEAH_GAMMA 1 //lin fraction of queue to be removed per rtt -#define TCP_YEAH_DELTA 3 //log minimum fraction of cwnd to be removed on loss -#define TCP_YEAH_EPSILON 1 //log maximum fraction to be removed on early decongestion -#define TCP_YEAH_PHY 8 //lin maximum delta from base -#define TCP_YEAH_RHO 16 //lin minimum number of consecutive rtt to consider competition on loss -#define TCP_YEAH_ZETA 50 //lin minimum number of state switchs to reset reno_count +#define TCP_YEAH_ALPHA 80 /* number of packets queued at the bottleneck */ +#define TCP_YEAH_GAMMA 1 /* fraction of queue to be removed per rtt */ +#define TCP_YEAH_DELTA 3 /* log minimum fraction of cwnd to be removed on loss */ +#define TCP_YEAH_EPSILON 1 /* log maximum fraction to be removed on early decongestion */ +#define TCP_YEAH_PHY 8 /* maximum delta from base */ +#define TCP_YEAH_RHO 16 /* minimum number of consecutive rtt to consider competition on loss */ +#define TCP_YEAH_ZETA 50 /* minimum number of state switches to reset reno_count */ #define TCP_SCALABLE_AI_CNT 100U @@ -214,9 +214,9 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) { if (yeah->doing_reno_now < TCP_YEAH_RHO) { reduction = yeah->lastQ; - reduction = min( reduction, max(tp->snd_cwnd>>1, 2U) ); + reduction = min(reduction, max(tp->snd_cwnd>>1, 2U)); - reduction = max( reduction, tp->snd_cwnd >> TCP_YEAH_DELTA); + reduction = max(reduction, tp->snd_cwnd >> TCP_YEAH_DELTA); } else reduction = max(tp->snd_cwnd>>1, 2U); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a7e4729e974..77bd16fa9f3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -223,7 +223,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; - rand = net_random(); + rand = prandom_u32(); first = (((u64)rand * remaining) >> 32) + low; /* * force rand to be an odd multiple of UDP_HTABLE_SIZE @@ -902,7 +902,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, * Get and verify the address. */ if (msg->msg_name) { - struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); if (msg->msg_namelen < sizeof(*usin)) return -EINVAL; if (usin->sin_family != AF_INET) { @@ -986,7 +986,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, fl4 = &fl4_stack; flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, - inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP, + inet_sk_flowi_flags(sk), faddr, saddr, dport, inet->inet_sport); security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); @@ -1226,7 +1226,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); - struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); struct sk_buff *skb; unsigned int ulen, copied; int peeked, off = 0; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 79c62bdcd3c..25f5cee3a08 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -14,6 +14,15 @@ #include <net/udp.h> #include <net/protocol.h> +static DEFINE_SPINLOCK(udp_offload_lock); +static struct udp_offload_priv __rcu *udp_offload_base __read_mostly; + +struct udp_offload_priv { + struct udp_offload *offload; + struct rcu_head rcu; + struct udp_offload_priv __rcu *next; +}; + static int udp4_ufo_send_check(struct sk_buff *skb) { if (!pskb_may_pull(skb, sizeof(struct udphdr))) @@ -89,10 +98,144 @@ out: return segs; } +int udp_add_offload(struct udp_offload *uo) +{ + struct udp_offload_priv __rcu **head = &udp_offload_base; + struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_KERNEL); + + if (!new_offload) + return -ENOMEM; + + new_offload->offload = uo; + + spin_lock(&udp_offload_lock); + rcu_assign_pointer(new_offload->next, rcu_dereference(*head)); + rcu_assign_pointer(*head, new_offload); + spin_unlock(&udp_offload_lock); + + return 0; +} +EXPORT_SYMBOL(udp_add_offload); + +static void udp_offload_free_routine(struct rcu_head *head) +{ + struct udp_offload_priv *ou_priv = container_of(head, struct udp_offload_priv, rcu); + kfree(ou_priv); +} + +void udp_del_offload(struct udp_offload *uo) +{ + struct udp_offload_priv __rcu **head = &udp_offload_base; + struct udp_offload_priv *uo_priv; + + spin_lock(&udp_offload_lock); + + uo_priv = rcu_dereference(*head); + for (; uo_priv != NULL; + uo_priv = rcu_dereference(*head)) { + + if (uo_priv->offload == uo) { + rcu_assign_pointer(*head, rcu_dereference(uo_priv->next)); + goto unlock; + } + head = &uo_priv->next; + } + pr_warn("udp_del_offload: didn't find offload for port %d\n", ntohs(uo->port)); +unlock: + spin_unlock(&udp_offload_lock); + if (uo_priv != NULL) + call_rcu(&uo_priv->rcu, udp_offload_free_routine); +} +EXPORT_SYMBOL(udp_del_offload); + +static struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + struct udp_offload_priv *uo_priv; + struct sk_buff *p, **pp = NULL; + struct udphdr *uh, *uh2; + unsigned int hlen, off; + int flush = 1; + + if (NAPI_GRO_CB(skb)->udp_mark || + (!skb->encapsulation && skb->ip_summed != CHECKSUM_COMPLETE)) + goto out; + + /* mark that this skb passed once through the udp gro layer */ + NAPI_GRO_CB(skb)->udp_mark = 1; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*uh); + uh = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + uh = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!uh)) + goto out; + } + + rcu_read_lock(); + uo_priv = rcu_dereference(udp_offload_base); + for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { + if (uo_priv->offload->port == uh->dest && + uo_priv->offload->callbacks.gro_receive) + goto unflush; + } + goto out_unlock; + +unflush: + flush = 0; + + for (p = *head; p; p = p->next) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + uh2 = (struct udphdr *)(p->data + off); + if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + } + + skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ + pp = uo_priv->offload->callbacks.gro_receive(head, skb); + +out_unlock: + rcu_read_unlock(); +out: + NAPI_GRO_CB(skb)->flush |= flush; + return pp; +} + +static int udp_gro_complete(struct sk_buff *skb, int nhoff) +{ + struct udp_offload_priv *uo_priv; + __be16 newlen = htons(skb->len - nhoff); + struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); + int err = -ENOSYS; + + uh->len = newlen; + + rcu_read_lock(); + + uo_priv = rcu_dereference(udp_offload_base); + for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { + if (uo_priv->offload->port == uh->dest && + uo_priv->offload->callbacks.gro_complete) + break; + } + + if (uo_priv != NULL) + err = uo_priv->offload->callbacks.gro_complete(skb, nhoff + sizeof(struct udphdr)); + + rcu_read_unlock(); + return err; +} + static const struct net_offload udpv4_offload = { .callbacks = { .gso_send_check = udp4_ufo_send_check, .gso_segment = udp4_ufo_fragment, + .gro_receive = udp_gro_receive, + .gro_complete = udp_gro_complete, }, }; diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index e3db3f91511..71acd0014f2 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c @@ -48,7 +48,7 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); skb_set_network_header(skb, -x->props.header_len - - hdrlen + (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); + hdrlen + (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); if (x->sel.family != AF_INET6) skb->network_header += IPV4_BEET_PHMAXLEN; skb->mac_header = skb->network_header + diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 0b2a0641526..542074c00c7 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -16,7 +16,7 @@ static int xfrm4_init_flags(struct xfrm_state *x) { - if (ipv4_config.no_pmtu_disc) + if (xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc) x->props.flags |= XFRM_STATE_NOPMTUDISC; return 0; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4b6b720971b..ad235690684 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -442,6 +442,8 @@ static int inet6_netconf_msgsize_devconf(int type) if (type == -1 || type == NETCONFA_MC_FORWARDING) size += nla_total_size(4); #endif + if (type == -1 || type == NETCONFA_PROXY_NEIGH) + size += nla_total_size(4); return size; } @@ -475,6 +477,10 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, devconf->mc_forwarding) < 0) goto nla_put_failure; #endif + if ((type == -1 || type == NETCONFA_PROXY_NEIGH) && + nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0) + goto nla_put_failure; + return nlmsg_end(skb, nlh); nla_put_failure: @@ -509,6 +515,7 @@ errout: static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = { [NETCONFA_IFINDEX] = { .len = sizeof(int) }, [NETCONFA_FORWARDING] = { .len = sizeof(int) }, + [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) }, }; static int inet6_netconf_get_devconf(struct sk_buff *in_skb, @@ -834,6 +841,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, goto out; } + neigh_parms_data_state_setall(idev->nd_parms); + ifa->addr = *addr; if (peer_addr) ifa->peer_addr = *peer_addr; @@ -891,15 +900,95 @@ out: goto out2; } +enum cleanup_prefix_rt_t { + CLEANUP_PREFIX_RT_NOP, /* no cleanup action for prefix route */ + CLEANUP_PREFIX_RT_DEL, /* delete the prefix route */ + CLEANUP_PREFIX_RT_EXPIRE, /* update the lifetime of the prefix route */ +}; + +/* + * Check, whether the prefix for ifp would still need a prefix route + * after deleting ifp. The function returns one of the CLEANUP_PREFIX_RT_* + * constants. + * + * 1) we don't purge prefix if address was not permanent. + * prefix is managed by its own lifetime. + * 2) we also don't purge, if the address was IFA_F_NOPREFIXROUTE. + * 3) if there are no addresses, delete prefix. + * 4) if there are still other permanent address(es), + * corresponding prefix is still permanent. + * 5) if there are still other addresses with IFA_F_NOPREFIXROUTE, + * don't purge the prefix, assume user space is managing it. + * 6) otherwise, update prefix lifetime to the + * longest valid lifetime among the corresponding + * addresses on the device. + * Note: subsequent RA will update lifetime. + **/ +static enum cleanup_prefix_rt_t +check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires) +{ + struct inet6_ifaddr *ifa; + struct inet6_dev *idev = ifp->idev; + unsigned long lifetime; + enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_DEL; + + *expires = jiffies; + + list_for_each_entry(ifa, &idev->addr_list, if_list) { + if (ifa == ifp) + continue; + if (!ipv6_prefix_equal(&ifa->addr, &ifp->addr, + ifp->prefix_len)) + continue; + if (ifa->flags & (IFA_F_PERMANENT | IFA_F_NOPREFIXROUTE)) + return CLEANUP_PREFIX_RT_NOP; + + action = CLEANUP_PREFIX_RT_EXPIRE; + + spin_lock(&ifa->lock); + + lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ); + /* + * Note: Because this address is + * not permanent, lifetime < + * LONG_MAX / HZ here. + */ + if (time_before(*expires, ifa->tstamp + lifetime * HZ)) + *expires = ifa->tstamp + lifetime * HZ; + spin_unlock(&ifa->lock); + } + + return action; +} + +static void +cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt) +{ + struct rt6_info *rt; + + rt = addrconf_get_prefix_route(&ifp->addr, + ifp->prefix_len, + ifp->idev->dev, + 0, RTF_GATEWAY | RTF_DEFAULT); + if (rt) { + if (del_rt) + ip6_del_rt(rt); + else { + if (!(rt->rt6i_flags & RTF_EXPIRES)) + rt6_set_expires(rt, expires); + ip6_rt_put(rt); + } + } +} + + /* This function wants to get referenced ifp and releases it before return */ static void ipv6_del_addr(struct inet6_ifaddr *ifp) { - struct inet6_ifaddr *ifa, *ifn; - struct inet6_dev *idev = ifp->idev; int state; - int deleted = 0, onlink = 0; - unsigned long expires = jiffies; + enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP; + unsigned long expires; spin_lock_bh(&ifp->state_lock); state = ifp->state; @@ -913,7 +1002,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) hlist_del_init_rcu(&ifp->addr_lst); spin_unlock_bh(&addrconf_hash_lock); - write_lock_bh(&idev->lock); + write_lock_bh(&ifp->idev->lock); if (ifp->flags&IFA_F_TEMPORARY) { list_del(&ifp->tmp_list); @@ -924,45 +1013,13 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) __in6_ifa_put(ifp); } - list_for_each_entry_safe(ifa, ifn, &idev->addr_list, if_list) { - if (ifa == ifp) { - list_del_init(&ifp->if_list); - __in6_ifa_put(ifp); + if (ifp->flags & IFA_F_PERMANENT && !(ifp->flags & IFA_F_NOPREFIXROUTE)) + action = check_cleanup_prefix_route(ifp, &expires); - if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0) - break; - deleted = 1; - continue; - } else if (ifp->flags & IFA_F_PERMANENT) { - if (ipv6_prefix_equal(&ifa->addr, &ifp->addr, - ifp->prefix_len)) { - if (ifa->flags & IFA_F_PERMANENT) { - onlink = 1; - if (deleted) - break; - } else { - unsigned long lifetime; - - if (!onlink) - onlink = -1; - - spin_lock(&ifa->lock); - - lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ); - /* - * Note: Because this address is - * not permanent, lifetime < - * LONG_MAX / HZ here. - */ - if (time_before(expires, - ifa->tstamp + lifetime * HZ)) - expires = ifa->tstamp + lifetime * HZ; - spin_unlock(&ifa->lock); - } - } - } - } - write_unlock_bh(&idev->lock); + list_del_init(&ifp->if_list); + __in6_ifa_put(ifp); + + write_unlock_bh(&ifp->idev->lock); addrconf_del_dad_timer(ifp); @@ -970,41 +1027,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) inet6addr_notifier_call_chain(NETDEV_DOWN, ifp); - /* - * Purge or update corresponding prefix - * - * 1) we don't purge prefix here if address was not permanent. - * prefix is managed by its own lifetime. - * 2) if there're no addresses, delete prefix. - * 3) if there're still other permanent address(es), - * corresponding prefix is still permanent. - * 4) otherwise, update prefix lifetime to the - * longest valid lifetime among the corresponding - * addresses on the device. - * Note: subsequent RA will update lifetime. - * - * --yoshfuji - */ - if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) { - struct in6_addr prefix; - struct rt6_info *rt; - - ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); - - rt = addrconf_get_prefix_route(&prefix, - ifp->prefix_len, - ifp->idev->dev, - 0, RTF_GATEWAY | RTF_DEFAULT); - - if (rt) { - if (onlink == 0) { - ip6_del_rt(rt); - rt = NULL; - } else if (!(rt->rt6i_flags & RTF_EXPIRES)) { - rt6_set_expires(rt, expires); - } - } - ip6_rt_put(rt); + if (action != CLEANUP_PREFIX_RT_NOP) { + cleanup_prefix_route(ifp, expires, + action == CLEANUP_PREFIX_RT_DEL); } /* clean up prefsrc entries */ @@ -1024,7 +1049,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i u32 addr_flags; unsigned long now = jiffies; - write_lock(&idev->lock); + write_lock_bh(&idev->lock); if (ift) { spin_lock_bh(&ift->lock); memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8); @@ -1036,7 +1061,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i retry: in6_dev_hold(idev); if (idev->cnf.use_tempaddr <= 0) { - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); pr_info("%s: use_tempaddr is disabled\n", __func__); in6_dev_put(idev); ret = -1; @@ -1046,7 +1071,7 @@ retry: if (ifp->regen_count++ >= idev->cnf.regen_max_retry) { idev->cnf.use_tempaddr = -1; /*XXX*/ spin_unlock_bh(&ifp->lock); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); pr_warn("%s: regeneration time exceeded - disabled temporary address support\n", __func__); in6_dev_put(idev); @@ -1071,8 +1096,8 @@ retry: regen_advance = idev->cnf.regen_max_retry * idev->cnf.dad_transmits * - idev->nd_parms->retrans_time / HZ; - write_unlock(&idev->lock); + NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ; + write_unlock_bh(&idev->lock); /* A temporary address is created only if this calculated Preferred * Lifetime is greater than REGEN_ADVANCE time units. In particular, @@ -1099,7 +1124,7 @@ retry: in6_dev_put(idev); pr_info("%s: retry temporary address regeneration\n", __func__); tmpaddr = &addr; - write_lock(&idev->lock); + write_lock_bh(&idev->lock); goto retry; } @@ -1200,7 +1225,7 @@ static int ipv6_get_saddr_eval(struct net *net, * | d is scope of the destination. * B-d | \ * | \ <- smaller scope is better if - * B-15 | \ if scope is enough for destinaion. + * B-15 | \ if scope is enough for destination. * | ret = B - scope (-1 <= scope >= d <= 15). * d-C-1 | / * |/ <- greater is better @@ -1407,12 +1432,14 @@ try_nextdev: EXPORT_SYMBOL(ipv6_dev_get_saddr); int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, - unsigned char banned_flags) + u32 banned_flags) { struct inet6_ifaddr *ifp; int err = -EADDRNOTAVAIL; - list_for_each_entry(ifp, &idev->addr_list, if_list) { + list_for_each_entry_reverse(ifp, &idev->addr_list, if_list) { + if (ifp->scope > IFA_LINK) + break; if (ifp->scope == IFA_LINK && !(ifp->flags & banned_flags)) { *addr = ifp->addr; @@ -1424,7 +1451,7 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, } int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, - unsigned char banned_flags) + u32 banned_flags) { struct inet6_dev *idev; int err = -EADDRNOTAVAIL; @@ -1816,6 +1843,7 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) return addrconf_ifid_sit(eui, dev); case ARPHRD_IPGRE: return addrconf_ifid_gre(eui, dev); + case ARPHRD_6LOWPAN: case ARPHRD_IEEE802154: return addrconf_ifid_eui64(eui, dev); case ARPHRD_IEEE1394: @@ -1832,7 +1860,9 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) struct inet6_ifaddr *ifp; read_lock_bh(&idev->lock); - list_for_each_entry(ifp, &idev->addr_list, if_list) { + list_for_each_entry_reverse(ifp, &idev->addr_list, if_list) { + if (ifp->scope > IFA_LINK) + break; if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) { memcpy(eui, ifp->addr.s6_addr+8, 8); err = 0; @@ -1888,7 +1918,8 @@ static void ipv6_regen_rndid(unsigned long data) expires = jiffies + idev->cnf.temp_prefered_lft * HZ - - idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - + idev->cnf.regen_max_retry * idev->cnf.dad_transmits * + NEIGH_VAR(idev->nd_parms, RETRANS_TIME) - idev->cnf.max_desync_factor * HZ; if (time_before(expires, jiffies)) { pr_warn("%s: too short regeneration interval; timer disabled for %s\n", @@ -2016,6 +2047,73 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev) return idev; } +static void manage_tempaddrs(struct inet6_dev *idev, + struct inet6_ifaddr *ifp, + __u32 valid_lft, __u32 prefered_lft, + bool create, unsigned long now) +{ + u32 flags; + struct inet6_ifaddr *ift; + + read_lock_bh(&idev->lock); + /* update all temporary addresses in the list */ + list_for_each_entry(ift, &idev->tempaddr_list, tmp_list) { + int age, max_valid, max_prefered; + + if (ifp != ift->ifpub) + continue; + + /* RFC 4941 section 3.3: + * If a received option will extend the lifetime of a public + * address, the lifetimes of temporary addresses should + * be extended, subject to the overall constraint that no + * temporary addresses should ever remain "valid" or "preferred" + * for a time longer than (TEMP_VALID_LIFETIME) or + * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR), respectively. + */ + age = (now - ift->cstamp) / HZ; + max_valid = idev->cnf.temp_valid_lft - age; + if (max_valid < 0) + max_valid = 0; + + max_prefered = idev->cnf.temp_prefered_lft - + idev->cnf.max_desync_factor - age; + if (max_prefered < 0) + max_prefered = 0; + + if (valid_lft > max_valid) + valid_lft = max_valid; + + if (prefered_lft > max_prefered) + prefered_lft = max_prefered; + + spin_lock(&ift->lock); + flags = ift->flags; + ift->valid_lft = valid_lft; + ift->prefered_lft = prefered_lft; + ift->tstamp = now; + if (prefered_lft > 0) + ift->flags &= ~IFA_F_DEPRECATED; + + spin_unlock(&ift->lock); + if (!(flags&IFA_F_TENTATIVE)) + ipv6_ifa_notify(0, ift); + } + + if ((create || list_empty(&idev->tempaddr_list)) && + idev->cnf.use_tempaddr > 0) { + /* When a new public address is created as described + * in [ADDRCONF], also create a new temporary address. + * Also create a temporary address if it's enabled but + * no temporary address currently exists. + */ + read_unlock_bh(&idev->lock); + ipv6_create_tempaddr(ifp, NULL); + } else { + read_unlock_bh(&idev->lock); + } +} + void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) { struct prefix_info *pinfo; @@ -2170,6 +2268,7 @@ ok: return; } + ifp->flags |= IFA_F_MANAGETEMPADDR; update_lft = 0; create = 1; ifp->cstamp = jiffies; @@ -2178,9 +2277,8 @@ ok: } if (ifp) { - int flags; + u32 flags; unsigned long now; - struct inet6_ifaddr *ift; u32 stored_lft; /* update lifetime (RFC2462 5.5.3 e) */ @@ -2221,70 +2319,8 @@ ok: } else spin_unlock(&ifp->lock); - read_lock_bh(&in6_dev->lock); - /* update all temporary addresses in the list */ - list_for_each_entry(ift, &in6_dev->tempaddr_list, - tmp_list) { - int age, max_valid, max_prefered; - - if (ifp != ift->ifpub) - continue; - - /* - * RFC 4941 section 3.3: - * If a received option will extend the lifetime - * of a public address, the lifetimes of - * temporary addresses should be extended, - * subject to the overall constraint that no - * temporary addresses should ever remain - * "valid" or "preferred" for a time longer than - * (TEMP_VALID_LIFETIME) or - * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR), - * respectively. - */ - age = (now - ift->cstamp) / HZ; - max_valid = in6_dev->cnf.temp_valid_lft - age; - if (max_valid < 0) - max_valid = 0; - - max_prefered = in6_dev->cnf.temp_prefered_lft - - in6_dev->cnf.max_desync_factor - - age; - if (max_prefered < 0) - max_prefered = 0; - - if (valid_lft > max_valid) - valid_lft = max_valid; - - if (prefered_lft > max_prefered) - prefered_lft = max_prefered; - - spin_lock(&ift->lock); - flags = ift->flags; - ift->valid_lft = valid_lft; - ift->prefered_lft = prefered_lft; - ift->tstamp = now; - if (prefered_lft > 0) - ift->flags &= ~IFA_F_DEPRECATED; - - spin_unlock(&ift->lock); - if (!(flags&IFA_F_TENTATIVE)) - ipv6_ifa_notify(0, ift); - } - - if ((create || list_empty(&in6_dev->tempaddr_list)) && in6_dev->cnf.use_tempaddr > 0) { - /* - * When a new public address is created as - * described in [ADDRCONF], also create a new - * temporary address. Also create a temporary - * address if it's enabled but no temporary - * address currently exists. - */ - read_unlock_bh(&in6_dev->lock); - ipv6_create_tempaddr(ifp, NULL); - } else { - read_unlock_bh(&in6_dev->lock); - } + manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft, + create, now); in6_ifa_put(ifp); addrconf_verify(0); @@ -2363,10 +2399,11 @@ err_exit: /* * Manual configuration of address on an interface */ -static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *pfx, +static int inet6_addr_add(struct net *net, int ifindex, + const struct in6_addr *pfx, const struct in6_addr *peer_pfx, - unsigned int plen, __u8 ifa_flags, __u32 prefered_lft, - __u32 valid_lft) + unsigned int plen, __u32 ifa_flags, + __u32 prefered_lft, __u32 valid_lft) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; @@ -2385,6 +2422,9 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p if (!valid_lft || prefered_lft > valid_lft) return -EINVAL; + if (ifa_flags & IFA_F_MANAGETEMPADDR && plen != 64) + return -EINVAL; + dev = __dev_get_by_index(net, ifindex); if (!dev) return -ENODEV; @@ -2417,14 +2457,20 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p valid_lft, prefered_lft); if (!IS_ERR(ifp)) { - addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, - expires, flags); + if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) { + addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, + expires, flags); + } + /* * Note that section 3.1 of RFC 4429 indicates * that the Optimistic flag should not be set for * manually configured addresses */ addrconf_dad_start(ifp); + if (ifa_flags & IFA_F_MANAGETEMPADDR) + manage_tempaddrs(idev, ifp, valid_lft, prefered_lft, + true, jiffies); in6_ifa_put(ifp); addrconf_verify(0); return 0; @@ -2611,8 +2657,18 @@ static void init_loopback(struct net_device *dev) if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE)) continue; - if (sp_ifa->rt) - continue; + if (sp_ifa->rt) { + /* This dst has been added to garbage list when + * lo device down, release this obsolete dst and + * reallocate a new router for ifa. + */ + if (sp_ifa->rt->dst.obsolete > 0) { + ip6_rt_put(sp_ifa->rt); + sp_ifa->rt = NULL; + } else { + continue; + } + } sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, false); @@ -2660,7 +2716,8 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_INFINIBAND) && (dev->type != ARPHRD_IEEE802154) && (dev->type != ARPHRD_IEEE1394) && - (dev->type != ARPHRD_TUNNEL6)) { + (dev->type != ARPHRD_TUNNEL6) && + (dev->type != ARPHRD_6LOWPAN)) { /* Alas, we support only Ethernet autoconfiguration. */ return; } @@ -2857,7 +2914,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, } /* - * MTU falled under IPV6_MIN_MTU. + * if MTU under IPV6_MIN_MTU. * Stop IPv6 on this interface. */ @@ -3083,7 +3140,7 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp) if (ifp->flags & IFA_F_OPTIMISTIC) rand_num = 0; else - rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1); + rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1); ifp->dad_probes = idev->cnf.dad_transmits; addrconf_mod_dad_timer(ifp, rand_num); @@ -3096,7 +3153,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp) addrconf_join_solict(dev, &ifp->addr); - net_srandom(ifp->addr.s6_addr32[3]); + prandom_seed((__force u32) ifp->addr.s6_addr32[3]); read_lock_bh(&idev->lock); spin_lock(&ifp->lock); @@ -3178,7 +3235,8 @@ static void addrconf_dad_timer(unsigned long data) } ifp->dad_probes--; - addrconf_mod_dad_timer(ifp, ifp->idev->nd_parms->retrans_time); + addrconf_mod_dad_timer(ifp, + NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME)); spin_unlock(&ifp->lock); write_unlock(&idev->lock); @@ -3195,7 +3253,9 @@ static bool ipv6_lonely_lladdr(struct inet6_ifaddr *ifp) struct inet6_ifaddr *ifpiter; struct inet6_dev *idev = ifp->idev; - list_for_each_entry(ifpiter, &idev->addr_list, if_list) { + list_for_each_entry_reverse(ifpiter, &idev->addr_list, if_list) { + if (ifpiter->scope > IFA_LINK) + break; if (ifp != ifpiter && ifpiter->scope == IFA_LINK && (ifpiter->flags & (IFA_F_PERMANENT|IFA_F_TENTATIVE| IFA_F_OPTIMISTIC|IFA_F_DADFAILED)) == @@ -3371,7 +3431,7 @@ static int if6_seq_show(struct seq_file *seq, void *v) ifp->idev->dev->ifindex, ifp->prefix_len, ifp->scope, - ifp->flags, + (u8) ifp->flags, ifp->idev->dev->name); return 0; } @@ -3518,7 +3578,7 @@ restart: !(ifp->flags&IFA_F_TENTATIVE)) { unsigned long regen_advance = ifp->idev->cnf.regen_max_retry * ifp->idev->cnf.dad_transmits * - ifp->idev->nd_parms->retrans_time / HZ; + NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME) / HZ; if (age >= ifp->prefered_lft - regen_advance) { struct inet6_ifaddr *ifpub = ifp->ifpub; @@ -3593,6 +3653,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = { [IFA_ADDRESS] = { .len = sizeof(struct in6_addr) }, [IFA_LOCAL] = { .len = sizeof(struct in6_addr) }, [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, + [IFA_FLAGS] = { .len = sizeof(u32) }, }; static int @@ -3616,16 +3677,22 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) return inet6_addr_del(net, ifm->ifa_index, pfx, ifm->ifa_prefixlen); } -static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, +static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, u32 prefered_lft, u32 valid_lft) { u32 flags; clock_t expires; unsigned long timeout; + bool was_managetempaddr; + bool had_prefixroute; if (!valid_lft || (prefered_lft > valid_lft)) return -EINVAL; + if (ifa_flags & IFA_F_MANAGETEMPADDR && + (ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64)) + return -EINVAL; + timeout = addrconf_timeout_fixup(valid_lft, HZ); if (addrconf_finite_timeout(timeout)) { expires = jiffies_to_clock_t(timeout * HZ); @@ -3645,7 +3712,13 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, } spin_lock_bh(&ifp->lock); - ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags; + was_managetempaddr = ifp->flags & IFA_F_MANAGETEMPADDR; + had_prefixroute = ifp->flags & IFA_F_PERMANENT && + !(ifp->flags & IFA_F_NOPREFIXROUTE); + ifp->flags &= ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | + IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR | + IFA_F_NOPREFIXROUTE); + ifp->flags |= ifa_flags; ifp->tstamp = jiffies; ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; @@ -3654,8 +3727,30 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, if (!(ifp->flags&IFA_F_TENTATIVE)) ipv6_ifa_notify(0, ifp); - addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, - expires, flags); + if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) { + addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, + expires, flags); + } else if (had_prefixroute) { + enum cleanup_prefix_rt_t action; + unsigned long rt_expires; + + write_lock_bh(&ifp->idev->lock); + action = check_cleanup_prefix_route(ifp, &rt_expires); + write_unlock_bh(&ifp->idev->lock); + + if (action != CLEANUP_PREFIX_RT_NOP) { + cleanup_prefix_route(ifp, rt_expires, + action == CLEANUP_PREFIX_RT_DEL); + } + } + + if (was_managetempaddr || ifp->flags & IFA_F_MANAGETEMPADDR) { + if (was_managetempaddr && !(ifp->flags & IFA_F_MANAGETEMPADDR)) + valid_lft = prefered_lft = 0; + manage_tempaddrs(ifp->idev, ifp, valid_lft, prefered_lft, + !was_managetempaddr, jiffies); + } + addrconf_verify(0); return 0; @@ -3671,7 +3766,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) struct inet6_ifaddr *ifa; struct net_device *dev; u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME; - u8 ifa_flags; + u32 ifa_flags; int err; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); @@ -3698,14 +3793,17 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) if (dev == NULL) return -ENODEV; + ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags; + /* We ignore other flags so far. */ - ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS); + ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR | + IFA_F_NOPREFIXROUTE; ifa = ipv6_get_ifaddr(net, pfx, dev, 1); if (ifa == NULL) { /* * It would be best to check for !NLM_F_CREATE here but - * userspace alreay relies on not having to provide this. + * userspace already relies on not having to provide this. */ return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx, ifm->ifa_prefixlen, ifa_flags, @@ -3723,7 +3821,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) return err; } -static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags, +static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u32 flags, u8 scope, int ifindex) { struct ifaddrmsg *ifm; @@ -3766,7 +3864,8 @@ static inline int inet6_ifaddr_msgsize(void) return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + nla_total_size(16) /* IFA_LOCAL */ + nla_total_size(16) /* IFA_ADDRESS */ - + nla_total_size(sizeof(struct ifa_cacheinfo)); + + nla_total_size(sizeof(struct ifa_cacheinfo)) + + nla_total_size(4) /* IFA_FLAGS */; } static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, @@ -3815,6 +3914,9 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) goto error; + if (nla_put_u32(skb, IFA_FLAGS, ifa->flags) < 0) + goto error; + return nlmsg_end(skb, nlh); error: @@ -4218,7 +4320,7 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev) ci.max_reasm_len = IPV6_MAXPLEN; ci.tstamp = cstamp_delta(idev->tstamp); ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time); - ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time); + ci.retrans_time = jiffies_to_msecs(NEIGH_VAR(idev->nd_parms, RETRANS_TIME)); if (nla_put(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); @@ -4694,6 +4796,46 @@ int addrconf_sysctl_disable(struct ctl_table *ctl, int write, return ret; } +static +int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int *valp = ctl->data; + int ret; + int old, new; + + old = *valp; + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + new = *valp; + + if (write && old != new) { + struct net *net = ctl->extra2; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (valp == &net->ipv6.devconf_dflt->proxy_ndp) + inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + NETCONFA_IFINDEX_DEFAULT, + net->ipv6.devconf_dflt); + else if (valp == &net->ipv6.devconf_all->proxy_ndp) + inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all); + else { + struct inet6_dev *idev = ctl->extra1; + + inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + idev->dev->ifindex, + &idev->cnf); + } + rtnl_unlock(); + } + + return ret; +} + + static struct addrconf_sysctl_table { struct ctl_table_header *sysctl_header; @@ -4880,7 +5022,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.proxy_ndp, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = addrconf_sysctl_proxy_ndp, }, { .procname = "accept_source_route", @@ -4996,7 +5138,7 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) static void addrconf_sysctl_register(struct inet6_dev *idev) { - neigh_sysctl_register(idev->dev, idev->nd_parms, "ipv6", + neigh_sysctl_register(idev->dev, idev->nd_parms, &ndisc_ifinfo_sysctl_change); __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name, idev, &idev->cnf); @@ -5129,9 +5271,7 @@ int __init addrconf_init(void) addrconf_verify(0); - err = rtnl_af_register(&inet6_ops); - if (err < 0) - goto errout_af; + rtnl_af_register(&inet6_ops); err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo, NULL); @@ -5155,7 +5295,6 @@ int __init addrconf_init(void) return 0; errout: rtnl_af_unregister(&inet6_ops); -errout_af: unregister_netdevice_notifier(&ipv6_dev_notf); errlo: unregister_pernet_subsys(&addrconf_ops); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 4fbdb7046d2..d935889f100 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -213,7 +213,7 @@ lookup_protocol: inet->mc_list = NULL; inet->rcv_tos = 0; - if (ipv4_config.no_pmtu_disc) + if (net->ipv4.sysctl_ip_no_pmtu_disc) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; @@ -661,7 +661,7 @@ int inet6_sk_rebuild_header(struct sock *sk) final_p = fl6_update_dst(&fl6, np->opt, &final); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { sk->sk_route_caps = 0; sk->sk_err_soft = -PTR_ERR(dst); @@ -683,8 +683,7 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb) if (np->rxopt.all) { if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) || - ((IPV6_FLOWINFO_MASK & - *(__be32 *)skb_network_header(skb)) && + (ip6_flowinfo((struct ipv6hdr *) skb_network_header(skb)) && np->rxopt.bits.rxflow) || (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) || @@ -776,6 +775,7 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.bindv6only = 0; net->ipv6.sysctl.icmpv6_time = 1*HZ; + net->ipv6.sysctl.flowlabel_consistency = 1; atomic_set(&net->ipv6.rt_genid, 0); err = ipv6_init_mibs(net); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 82e1da3a40b..81e496a2e00 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -12,8 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * * Authors * diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 5a80f15a9de..21018324468 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -383,6 +383,17 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, return found; } +/* check if this anycast address is link-local on given interface or + * is global + */ +bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, + const struct in6_addr *addr) +{ + return ipv6_chk_acast_addr(net, + (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL ? + dev : NULL), + addr); +} #ifdef CONFIG_PROC_FS struct ac6_iter_state { diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 93b1aa34c43..c3bf2d2e519 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -170,7 +170,7 @@ ipv4_connected: opt = flowlabel ? flowlabel->opt : np->opt; final_p = fl6_update_dst(&fl6, opt, &final); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); err = 0; if (IS_ERR(dst)) { err = PTR_ERR(dst); @@ -205,6 +205,16 @@ out: } EXPORT_SYMBOL_GPL(ip6_datagram_connect); +int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, uaddr); + if (sin6->sin6_family != AF_INET6) + return -EAFNOSUPPORT; + return ip6_datagram_connect(sk, uaddr, addr_len); +} +EXPORT_SYMBOL_GPL(ip6_datagram_connect_v6_only); + void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload) { @@ -322,7 +332,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; struct sk_buff *skb, *skb2; - struct sockaddr_in6 *sin; + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin, msg->msg_name); struct { struct sock_extended_err ee; struct sockaddr_in6 offender; @@ -348,7 +358,6 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - sin = (struct sockaddr_in6 *)msg->msg_name; if (sin) { const unsigned char *nh = skb_network_header(skb); sin->sin6_family = AF_INET6; @@ -378,10 +387,12 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; sin->sin6_port = 0; + if (np->rxopt.all) + ip6_datagram_recv_common_ctl(sk, msg, skb); if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) - ip6_datagram_recv_ctl(sk, msg, skb); + ip6_datagram_recv_specific_ctl(sk, msg, skb); sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, IP6CB(skb)->iif); @@ -429,8 +440,8 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; - struct sockaddr_in6 *sin; struct ip6_mtuinfo mtu_info; + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin, msg->msg_name); int err; int copied; @@ -452,7 +463,6 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, memcpy(&mtu_info, IP6CBMTU(skb), sizeof(mtu_info)); - sin = (struct sockaddr_in6 *)msg->msg_name; if (sin) { sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; @@ -473,20 +483,34 @@ out: } -int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, - struct sk_buff *skb) +void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); - struct inet6_skb_parm *opt = IP6CB(skb); - unsigned char *nh = skb_network_header(skb); + bool is_ipv6 = skb->protocol == htons(ETH_P_IPV6); if (np->rxopt.bits.rxinfo) { struct in6_pktinfo src_info; - src_info.ipi6_ifindex = opt->iif; - src_info.ipi6_addr = ipv6_hdr(skb)->daddr; + if (is_ipv6) { + src_info.ipi6_ifindex = IP6CB(skb)->iif; + src_info.ipi6_addr = ipv6_hdr(skb)->daddr; + } else { + src_info.ipi6_ifindex = + PKTINFO_SKB_CB(skb)->ipi_ifindex; + ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, + &src_info.ipi6_addr); + } put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } +} + +void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + struct inet6_skb_parm *opt = IP6CB(skb); + unsigned char *nh = skb_network_header(skb); if (np->rxopt.bits.rxhlim) { int hlim = ipv6_hdr(skb)->hop_limit; @@ -604,7 +628,13 @@ int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6); } } - return 0; +} + +void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) +{ + ip6_datagram_recv_common_ctl(sk, msg, skb); + ip6_datagram_recv_specific_ctl(sk, msg, skb); } EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl); @@ -669,7 +699,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) && !ipv6_chk_addr(net, &src_info->ipi6_addr, - strict ? dev : NULL, 0)) + strict ? dev : NULL, 0) && + !ipv6_chk_acast_addr_src(net, dev, + &src_info->ipi6_addr)) err = -EINVAL; else fl6->saddr = src_info->ipi6_addr; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index b8719df0366..6eef8a7e35f 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -12,8 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * * Authors * diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 3fd0a578329..b4d5e1d97c1 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -169,7 +169,7 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) return 0; } - if (r->tclass && r->tclass != ((ntohl(fl6->flowlabel) >> 20) & 0xff)) + if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel)) return 0; return 1; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index eef8d945b36..f81f59686f2 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -67,6 +67,7 @@ #include <net/icmp.h> #include <net/xfrm.h> #include <net/inet_common.h> +#include <net/dsfield.h> #include <asm/uaccess.h> @@ -315,8 +316,10 @@ static void mip6_addr_swap(struct sk_buff *skb) static inline void mip6_addr_swap(struct sk_buff *skb) {} #endif -struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, - struct sock *sk, struct flowi6 *fl6) +static struct dst_entry *icmpv6_route_lookup(struct net *net, + struct sk_buff *skb, + struct sock *sk, + struct flowi6 *fl6) { struct dst_entry *dst, *dst2; struct flowi6 fl2; @@ -410,7 +413,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) */ addr_type = ipv6_addr_type(&hdr->daddr); - if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0)) + if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) || + ipv6_anycast_destination(skb)) saddr = &hdr->daddr; /* @@ -551,10 +555,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct dst_entry *dst; int err = 0; int hlimit; + u8 tclass; saddr = &ipv6_hdr(skb)->daddr; - if (!ipv6_unicast_destination(skb)) + if (!ipv6_unicast_destination(skb) && + !(net->ipv6.sysctl.anycast_src_echo_reply && + ipv6_anycast_destination(skb))) saddr = NULL; memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); @@ -599,8 +606,9 @@ static void icmpv6_echo_reply(struct sk_buff *skb) msg.offset = 0; msg.type = ICMPV6_ECHO_REPLY; + tclass = ipv6_get_dsfield(ipv6_hdr(skb)); err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), - sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6, + sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl6, (struct rt6_info *)dst, MSG_DONTWAIT, np->dontfrag); @@ -984,7 +992,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err) EXPORT_SYMBOL(icmpv6_err_convert); #ifdef CONFIG_SYSCTL -struct ctl_table ipv6_icmp_table_template[] = { +static struct ctl_table ipv6_icmp_table_template[] = { { .procname = "ratelimit", .data = &init_net.ipv6.sysctl.icmpv6_time, diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 77bb8afb141..c9138189415 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -86,7 +86,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, fl6->fl6_sport = htons(ireq->ir_num); security_req_classify_flow(req, flowi6_to_flowi(fl6)); - dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, fl6, final_p); if (IS_ERR(dst)) return NULL; @@ -216,7 +216,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, dst = __inet6_csk_dst_check(sk, np->dst_cookie); if (!dst) { - dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, fl6, final_p); if (!IS_ERR(dst)) __inet6_csk_dst_store(sk, dst, NULL, NULL); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5550a8113a6..075602fc6b6 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1530,7 +1530,7 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root, } void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), - int prune, void *arg) + void *arg) { struct fib6_table *table; struct hlist_head *head; @@ -1542,7 +1542,7 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), hlist_for_each_entry_rcu(table, head, tb6_hlist) { write_lock_bh(&table->tb6_lock); fib6_clean_tree(net, &table->tb6_root, - func, prune, arg); + func, 0, arg); write_unlock_bh(&table->tb6_lock); } } @@ -1636,7 +1636,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force) gc_args.more = icmp6_dst_gc(); - fib6_clean_all(net, fib6_age, 0, NULL); + fib6_clean_all(net, fib6_age, NULL); now = jiffies; net->ipv6.ip6_rt_last_gc = now; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index e7fb7106550..dfa41bb4e0d 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -210,7 +210,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, spin_lock_bh(&ip6_fl_lock); if (label == 0) { for (;;) { - fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK; + fl->label = htonl(prandom_u32())&IPV6_FLOWLABEL_MASK; if (fl->label) { lfl = __fl_lookup(net, fl->label); if (lfl == NULL) @@ -481,11 +481,22 @@ static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, spin_unlock_bh(&ip6_sk_fl_lock); } -int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq) +int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, + int flags) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_fl_socklist *sfl; + if (flags & IPV6_FL_F_REMOTE) { + freq->flr_label = np->rcv_flowinfo & IPV6_FLOWLABEL_MASK; + return 0; + } + + if (np->repflow) { + freq->flr_label = np->flow_label; + return 0; + } + rcu_read_lock_bh(); for_each_sk_fl_rcu(np, sfl) { @@ -527,6 +538,15 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) switch (freq.flr_action) { case IPV6_FL_A_PUT: + if (freq.flr_flags & IPV6_FL_F_REFLECT) { + if (sk->sk_protocol != IPPROTO_TCP) + return -ENOPROTOOPT; + if (!np->repflow) + return -ESRCH; + np->flow_label = 0; + np->repflow = 0; + return 0; + } spin_lock_bh(&ip6_sk_fl_lock); for (sflp = &np->ipv6_fl_list; (sfl = rcu_dereference(*sflp))!=NULL; @@ -567,6 +587,20 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) return -ESRCH; case IPV6_FL_A_GET: + if (freq.flr_flags & IPV6_FL_F_REFLECT) { + struct net *net = sock_net(sk); + if (net->ipv6.sysctl.flowlabel_consistency) { + net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n"); + return -EPERM; + } + + if (sk->sk_protocol != IPPROTO_TCP) + return -ENOPROTOOPT; + + np->repflow = 1; + return 0; + } + if (freq.flr_label & ~IPV6_FLOWLABEL_MASK) return -EINVAL; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 8acb28621f9..f3ffb43f59c 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -61,9 +61,6 @@ static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); -#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) -#define IPV6_TCLASS_SHIFT 20 - #define HASH_SIZE_SHIFT 5 #define HASH_SIZE (1 << HASH_SIZE_SHIFT) @@ -499,7 +496,7 @@ static int ip6gre_rcv(struct sk_buff *skb) &ipv6h->saddr, &ipv6h->daddr, key, gre_proto); if (tunnel) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; @@ -846,7 +843,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) - fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); + fl6.flowlabel |= ip6_flowlabel(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; @@ -1266,12 +1263,12 @@ static int ip6gre_tunnel_init(struct net_device *dev) if (ipv6_addr_any(&tunnel->parms.raddr)) dev->header_ops = &ip6gre_header_ops; - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; for_each_possible_cpu(i) { - struct pcpu_tstats *ip6gre_tunnel_stats; + struct pcpu_sw_netstats *ip6gre_tunnel_stats; ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i); u64_stats_init(&ip6gre_tunnel_stats->syncp); } @@ -1467,12 +1464,12 @@ static int ip6gre_tap_init(struct net_device *dev) ip6gre_tnl_link_config(tunnel, 1); - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; for_each_possible_cpu(i) { - struct pcpu_tstats *ip6gre_tap_stats; + struct pcpu_sw_netstats *ip6gre_tap_stats; ip6gre_tap_stats = per_cpu_ptr(dev->tstats, i); u64_stats_init(&ip6gre_tap_stats->syncp); } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 4b851692b1f..1e8683b135b 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -154,6 +154,32 @@ out: return segs; } +/* Return the total length of all the extension hdrs, following the same + * logic in ipv6_gso_pull_exthdrs() when parsing ext-hdrs. + */ +static int ipv6_exthdrs_len(struct ipv6hdr *iph, + const struct net_offload **opps) +{ + struct ipv6_opt_hdr *opth = (void *)iph; + int len = 0, proto, optlen = sizeof(*iph); + + proto = iph->nexthdr; + for (;;) { + if (proto != NEXTHDR_HOP) { + *opps = rcu_dereference(inet6_offloads[proto]); + if (unlikely(!(*opps))) + break; + if (!((*opps)->flags & INET6_PROTO_GSO_EXTHDR)) + break; + } + opth = (void *)opth + optlen; + optlen = ipv6_optlen(opth); + len += optlen; + proto = opth->nexthdr; + } + return len; +} + static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, struct sk_buff *skb) { @@ -164,7 +190,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, unsigned int nlen; unsigned int hlen; unsigned int off; - int flush = 1; + u16 flush = 1; int proto; __wsum csum; @@ -177,6 +203,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, goto out; } + skb_set_network_header(skb, off); skb_gro_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb)); @@ -211,12 +238,16 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, if (!NAPI_GRO_CB(p)->same_flow) continue; - iph2 = ipv6_hdr(p); + iph2 = (struct ipv6hdr *)(p->data + off); first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ; - /* All fields must match except length and Traffic Class. */ - if (nlen != skb_network_header_len(p) || - (first_word & htonl(0xF00FFFFF)) || + /* All fields must match except length and Traffic Class. + * XXX skbs on the gro_list have all been parsed and pulled + * already so we don't need to compare nlen + * (nlen != (sizeof(*iph2) + ipv6_exthdrs_len(iph2, &ops))) + * memcmp() alone below is suffcient, right? + */ + if ((first_word & htonl(0xF00FFFFF)) || memcmp(&iph->nexthdr, &iph2->nexthdr, nlen - offsetof(struct ipv6hdr, nexthdr))) { NAPI_GRO_CB(p)->same_flow = 0; @@ -245,21 +276,21 @@ out: return pp; } -static int ipv6_gro_complete(struct sk_buff *skb) +static int ipv6_gro_complete(struct sk_buff *skb, int nhoff) { const struct net_offload *ops; - struct ipv6hdr *iph = ipv6_hdr(skb); + struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff); int err = -ENOSYS; - iph->payload_len = htons(skb->len - skb_network_offset(skb) - - sizeof(*iph)); + iph->payload_len = htons(skb->len - nhoff - sizeof(*iph)); rcu_read_lock(); - ops = rcu_dereference(inet6_offloads[NAPI_GRO_CB(skb)->proto]); + + nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &ops); if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out_unlock; - err = ops->callbacks.gro_complete(skb); + err = ops->callbacks.gro_complete(skb, nhoff); out_unlock: rcu_read_unlock(); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e6f93199799..ef02b26ccf8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -321,6 +321,27 @@ static inline int ip6_forward_finish(struct sk_buff *skb) return dst_output(skb); } +static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) +{ + unsigned int mtu; + struct inet6_dev *idev; + + if (dst_metric_locked(dst, RTAX_MTU)) { + mtu = dst_metric_raw(dst, RTAX_MTU); + if (mtu) + return mtu; + } + + mtu = IPV6_MIN_MTU; + rcu_read_lock(); + idev = __in6_dev_get(dst->dev); + if (idev) + mtu = idev->cnf.mtu6; + rcu_read_unlock(); + + return mtu; +} + int ip6_forward(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -336,7 +357,8 @@ int ip6_forward(struct sk_buff *skb) goto drop; if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { - IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_INDISCARDS); goto drop; } @@ -370,8 +392,8 @@ int ip6_forward(struct sk_buff *skb) /* Force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); - IP6_INC_STATS_BH(net, - ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -ETIMEDOUT; @@ -384,14 +406,15 @@ int ip6_forward(struct sk_buff *skb) if (proxied > 0) return ip6_input(skb); else if (proxied < 0) { - IP6_INC_STATS(net, ip6_dst_idev(dst), - IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_INDISCARDS); goto drop; } } if (!xfrm6_route_forward(skb)) { - IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_INDISCARDS); goto drop; } dst = skb_dst(skb); @@ -439,7 +462,7 @@ int ip6_forward(struct sk_buff *skb) } } - mtu = dst_mtu(dst); + mtu = ip6_dst_mtu_forward(dst); if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; @@ -448,16 +471,17 @@ int ip6_forward(struct sk_buff *skb) /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - IP6_INC_STATS_BH(net, - ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); - IP6_INC_STATS_BH(net, - ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_INTOOBIGERRORS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } if (skb_cow(skb, dst->dev->hard_header_len)) { - IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_OUTDISCARDS); goto drop; } @@ -938,7 +962,6 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup); * @sk: socket which provides route info * @fl6: flow to lookup * @final_dst: final destination address for ipsec lookup - * @can_sleep: we are in a sleepable context * * This function performs a route lookup on the given flow. * @@ -946,8 +969,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup); * error code. */ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, - const struct in6_addr *final_dst, - bool can_sleep) + const struct in6_addr *final_dst) { struct dst_entry *dst = NULL; int err; @@ -957,8 +979,6 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, return ERR_PTR(err); if (final_dst) fl6->daddr = *final_dst; - if (can_sleep) - fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } @@ -969,7 +989,6 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); * @sk: socket which provides the dst cache and route info * @fl6: flow to lookup * @final_dst: final destination address for ipsec lookup - * @can_sleep: we are in a sleepable context * * This function performs a route lookup on the given flow with the * possibility of using the cached route in the socket if it is valid. @@ -980,8 +999,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); * error code. */ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, - const struct in6_addr *final_dst, - bool can_sleep) + const struct in6_addr *final_dst) { struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); int err; @@ -993,8 +1011,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, return ERR_PTR(err); if (final_dst) fl6->daddr = *final_dst; - if (can_sleep) - fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } @@ -1162,10 +1178,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, np->cork.hop_limit = hlimit; np->cork.tclass = tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) - mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? rt->dst.dev->mtu : dst_mtu(&rt->dst); else - mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? rt->dst.dev->mtu : dst_mtu(rt->dst.path); if (np->frag_size < mtu) { if (np->frag_size) @@ -1285,7 +1301,7 @@ alloc_new_skb: if (skb == NULL || skb_prev == NULL) ip6_append_data_mtu(&mtu, &maxfraglen, fragheaderlen, skb, rt, - np->pmtudisc == + np->pmtudisc >= IPV6_PMTUDISC_PROBE); skb_prev = skb; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 7881965a824..5db8d310f9c 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -29,7 +29,6 @@ #include <linux/if.h> #include <linux/in.h> #include <linux/ip.h> -#include <linux/if_tunnel.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> @@ -70,9 +69,6 @@ MODULE_ALIAS_NETDEV("ip6tnl0"); #define IP6_TNL_TRACE(x...) do {;} while(0) #endif -#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) -#define IPV6_TCLASS_SHIFT 20 - #define HASH_SIZE_SHIFT 5 #define HASH_SIZE (1 << HASH_SIZE_SHIFT) @@ -103,12 +99,13 @@ struct ip6_tnl_net { static struct net_device_stats *ip6_get_stats(struct net_device *dev) { - struct pcpu_tstats tmp, sum = { 0 }; + struct pcpu_sw_netstats tmp, sum = { 0 }; int i; for_each_possible_cpu(i) { unsigned int start; - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); + const struct pcpu_sw_netstats *tstats = + per_cpu_ptr(dev->tstats, i); do { start = u64_stats_fetch_begin_bh(&tstats->syncp); @@ -794,7 +791,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr)) != NULL) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; if (t->parms.proto != ipproto && t->parms.proto != 0) { rcu_read_unlock(); @@ -1142,7 +1139,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) - fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); + fl6.flowlabel |= ip6_flowlabel(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; @@ -1509,12 +1506,12 @@ ip6_tnl_dev_init_gen(struct net_device *dev) t->dev = dev; t->net = dev_net(dev); - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; for_each_possible_cpu(i) { - struct pcpu_tstats *ip6_tnl_stats; + struct pcpu_sw_netstats *ip6_tnl_stats; ip6_tnl_stats = per_cpu_ptr(dev->tstats, i); u64_stats_init(&ip6_tnl_stats->syncp); } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 7b42d5ef868..2d19272b8ce 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -24,7 +24,6 @@ #include <linux/if.h> #include <linux/in.h> #include <linux/ip.h> -#include <linux/if_tunnel.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> @@ -292,7 +291,7 @@ static int vti6_rcv(struct sk_buff *skb) if ((t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr)) != NULL) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) { rcu_read_unlock(); @@ -736,11 +735,11 @@ static inline int vti6_dev_init_gen(struct net_device *dev) t->dev = dev; t->net = dev_net(dev); - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; for_each_possible_cpu(i) { - struct pcpu_tstats *stats; + struct pcpu_sw_netstats *stats; stats = per_cpu_ptr(dev->tstats, i); u64_stats_init(&stats->syncp); } diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index ce507d9e1c9..da9becb42e8 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -16,8 +16,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ /* * [Memo] diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 1c6ce3119ff..0a00f449de5 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -722,7 +722,7 @@ done: case IPV6_MTU_DISCOVER: if (optlen < sizeof(int)) goto e_inval; - if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE) + if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_INTERFACE) goto e_inval; np->pmtudisc = val; retv = 0; @@ -1002,10 +1002,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, release_sock(sk); if (skb) { - int err = ip6_datagram_recv_ctl(sk, &msg, skb); + ip6_datagram_recv_ctl(sk, &msg, skb); kfree_skb(skb); - if (err) - return err; } else { if (np->rxopt.bits.rxinfo) { struct in6_pktinfo src_info; @@ -1019,7 +1017,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } if (np->rxopt.bits.rxtclass) { - int tclass = np->rcv_tclass; + int tclass = (int)ip6_tclass(np->rcv_flowinfo); + put_cmsg(&msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass); } if (np->rxopt.bits.rxoinfo) { @@ -1034,6 +1033,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, int hlim = np->mcast_hops; put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); } + if (np->rxopt.bits.rxflow) { + __be32 flowinfo = np->rcv_flowinfo; + + put_cmsg(&msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo); + } } len -= msg.msg_controllen; return put_user(len, optlen); @@ -1215,6 +1219,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, case IPV6_FLOWLABEL_MGR: { struct in6_flowlabel_req freq; + int flags; if (len < sizeof(freq)) return -EINVAL; @@ -1226,9 +1231,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, return -EINVAL; len = sizeof(freq); + flags = freq.flr_flags; + memset(&freq, 0, sizeof(freq)); - val = ipv6_flowlabel_opt_get(sk, &freq); + val = ipv6_flowlabel_opt_get(sk, &freq, flags); if (val < 0) return val; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index d18f9f903db..e1e47350784 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -999,7 +999,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, static void mld_gq_start_timer(struct inet6_dev *idev) { - unsigned long tv = net_random() % idev->mc_maxdelay; + unsigned long tv = prandom_u32() % idev->mc_maxdelay; idev->mc_gq_running = 1; if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2)) @@ -1015,7 +1015,7 @@ static void mld_gq_stop_timer(struct inet6_dev *idev) static void mld_ifc_start_timer(struct inet6_dev *idev, unsigned long delay) { - unsigned long tv = net_random() % delay; + unsigned long tv = prandom_u32() % delay; if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2)) in6_dev_hold(idev); @@ -1030,7 +1030,7 @@ static void mld_ifc_stop_timer(struct inet6_dev *idev) static void mld_dad_start_timer(struct inet6_dev *idev, unsigned long delay) { - unsigned long tv = net_random() % delay; + unsigned long tv = prandom_u32() % delay; if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2)) in6_dev_hold(idev); @@ -1061,7 +1061,7 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime) } if (delay >= resptime) - delay = net_random() % resptime; + delay = prandom_u32() % resptime; ma->mca_timer.expires = jiffies + delay; if (!mod_timer(&ma->mca_timer, jiffies + delay)) @@ -1665,7 +1665,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, skb_tailroom(skb)) : 0) static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, - int type, int gdeleted, int sdeleted) + int type, int gdeleted, int sdeleted, int crsend) { struct inet6_dev *idev = pmc->idev; struct net_device *dev = idev->dev; @@ -1757,7 +1757,7 @@ empty_source: if (type == MLD2_ALLOW_NEW_SOURCES || type == MLD2_BLOCK_OLD_SOURCES) return skb; - if (pmc->mca_crcount || isquery) { + if (pmc->mca_crcount || isquery || crsend) { /* make sure we have room for group header */ if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)) { mld_sendpack(skb); @@ -1789,7 +1789,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) type = MLD2_MODE_IS_EXCLUDE; else type = MLD2_MODE_IS_INCLUDE; - skb = add_grec(skb, pmc, type, 0, 0); + skb = add_grec(skb, pmc, type, 0, 0, 0); spin_unlock_bh(&pmc->mca_lock); } } else { @@ -1798,7 +1798,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) type = MLD2_MODE_IS_EXCLUDE; else type = MLD2_MODE_IS_INCLUDE; - skb = add_grec(skb, pmc, type, 0, 0); + skb = add_grec(skb, pmc, type, 0, 0, 0); spin_unlock_bh(&pmc->mca_lock); } read_unlock_bh(&idev->lock); @@ -1843,13 +1843,13 @@ static void mld_send_cr(struct inet6_dev *idev) if (pmc->mca_sfmode == MCAST_INCLUDE) { type = MLD2_BLOCK_OLD_SOURCES; dtype = MLD2_BLOCK_OLD_SOURCES; - skb = add_grec(skb, pmc, type, 1, 0); - skb = add_grec(skb, pmc, dtype, 1, 1); + skb = add_grec(skb, pmc, type, 1, 0, 0); + skb = add_grec(skb, pmc, dtype, 1, 1, 0); } if (pmc->mca_crcount) { if (pmc->mca_sfmode == MCAST_EXCLUDE) { type = MLD2_CHANGE_TO_INCLUDE; - skb = add_grec(skb, pmc, type, 1, 0); + skb = add_grec(skb, pmc, type, 1, 0, 0); } pmc->mca_crcount--; if (pmc->mca_crcount == 0) { @@ -1880,8 +1880,8 @@ static void mld_send_cr(struct inet6_dev *idev) type = MLD2_ALLOW_NEW_SOURCES; dtype = MLD2_BLOCK_OLD_SOURCES; } - skb = add_grec(skb, pmc, type, 0, 0); - skb = add_grec(skb, pmc, dtype, 0, 1); /* deleted sources */ + skb = add_grec(skb, pmc, type, 0, 0, 0); + skb = add_grec(skb, pmc, dtype, 0, 1, 0); /* deleted sources */ /* filter mode changes */ if (pmc->mca_crcount) { @@ -1889,7 +1889,7 @@ static void mld_send_cr(struct inet6_dev *idev) type = MLD2_CHANGE_TO_EXCLUDE; else type = MLD2_CHANGE_TO_INCLUDE; - skb = add_grec(skb, pmc, type, 0, 0); + skb = add_grec(skb, pmc, type, 0, 0, 0); pmc->mca_crcount--; } spin_unlock_bh(&pmc->mca_lock); @@ -1997,27 +1997,36 @@ err_out: goto out; } -static void mld_resend_report(struct inet6_dev *idev) +static void mld_send_initial_cr(struct inet6_dev *idev) { - if (mld_in_v1_mode(idev)) { - struct ifmcaddr6 *mcaddr; - read_lock_bh(&idev->lock); - for (mcaddr = idev->mc_list; mcaddr; mcaddr = mcaddr->next) { - if (!(mcaddr->mca_flags & MAF_NOREPORT)) - igmp6_send(&mcaddr->mca_addr, idev->dev, - ICMPV6_MGM_REPORT); - } - read_unlock_bh(&idev->lock); - } else { - mld_send_report(idev, NULL); + struct sk_buff *skb; + struct ifmcaddr6 *pmc; + int type; + + if (mld_in_v1_mode(idev)) + return; + + skb = NULL; + read_lock_bh(&idev->lock); + for (pmc=idev->mc_list; pmc; pmc=pmc->next) { + spin_lock_bh(&pmc->mca_lock); + if (pmc->mca_sfcount[MCAST_EXCLUDE]) + type = MLD2_CHANGE_TO_EXCLUDE; + else + type = MLD2_CHANGE_TO_INCLUDE; + skb = add_grec(skb, pmc, type, 0, 0, 1); + spin_unlock_bh(&pmc->mca_lock); } + read_unlock_bh(&idev->lock); + if (skb) + mld_sendpack(skb); } void ipv6_mc_dad_complete(struct inet6_dev *idev) { idev->mc_dad_count = idev->mc_qrv; if (idev->mc_dad_count) { - mld_resend_report(idev); + mld_send_initial_cr(idev); idev->mc_dad_count--; if (idev->mc_dad_count) mld_dad_start_timer(idev, idev->mc_maxdelay); @@ -2028,7 +2037,7 @@ static void mld_dad_timer_expire(unsigned long data) { struct inet6_dev *idev = (struct inet6_dev *)data; - mld_resend_report(idev); + mld_send_initial_cr(idev); if (idev->mc_dad_count) { idev->mc_dad_count--; if (idev->mc_dad_count) @@ -2328,7 +2337,7 @@ static void igmp6_join_group(struct ifmcaddr6 *ma) igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT); - delay = net_random() % unsolicited_report_interval(ma->idev); + delay = prandom_u32() % unsolicited_report_interval(ma->idev); spin_lock_bh(&ma->mca_lock); if (del_timer(&ma->mca_timer)) { diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 9ac01dc9402..db9b6cbc9db 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -13,8 +13,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ /* * Authors: diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 30086517139..09a22f4f36c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -125,17 +125,19 @@ struct neigh_table nd_tbl = { .id = "ndisc_cache", .parms = { .tbl = &nd_tbl, - .base_reachable_time = ND_REACHABLE_TIME, - .retrans_time = ND_RETRANS_TIMER, - .gc_staletime = 60 * HZ, .reachable_time = ND_REACHABLE_TIME, - .delay_probe_time = 5 * HZ, - .queue_len_bytes = 64*1024, - .ucast_probes = 3, - .mcast_probes = 3, - .anycast_delay = 1 * HZ, - .proxy_delay = (8 * HZ) / 10, - .proxy_qlen = 64, + .data = { + [NEIGH_VAR_MCAST_PROBES] = 3, + [NEIGH_VAR_UCAST_PROBES] = 3, + [NEIGH_VAR_RETRANS_TIME] = ND_RETRANS_TIMER, + [NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME, + [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ, + [NEIGH_VAR_GC_STALETIME] = 60 * HZ, + [NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024, + [NEIGH_VAR_PROXY_QLEN] = 64, + [NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ, + [NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10, + }, }, .gc_interval = 30 * HZ, .gc_thresh1 = 128, @@ -656,14 +658,14 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1)) saddr = &ipv6_hdr(skb)->saddr; - if ((probes -= neigh->parms->ucast_probes) < 0) { + if ((probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES)) < 0) { if (!(neigh->nud_state & NUD_VALID)) { ND_PRINTK(1, dbg, "%s: trying to ucast probe in NUD_INVALID: %pI6\n", __func__, target); } ndisc_send_ns(dev, neigh, target, target, saddr); - } else if ((probes -= neigh->parms->app_probes) < 0) { + } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) { neigh_app_ns(neigh); } else { addrconf_addr_solict_mult(target, &mcaddr); @@ -790,7 +792,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && inc && - idev->nd_parms->proxy_delay != 0) { + NEIGH_VAR(idev->nd_parms, PROXY_DELAY) != 0) { /* * for anycast or proxy, * sender should delay its response @@ -1210,7 +1212,7 @@ skip_defrtr: rtime = (rtime*HZ)/1000; if (rtime < HZ/10) rtime = HZ/10; - in6_dev->nd_parms->retrans_time = rtime; + NEIGH_VAR_SET(in6_dev->nd_parms, RETRANS_TIME, rtime); in6_dev->tstamp = jiffies; inet6_ifinfo_notify(RTM_NEWLINK, in6_dev); } @@ -1222,9 +1224,11 @@ skip_defrtr: if (rtime < HZ/10) rtime = HZ/10; - if (rtime != in6_dev->nd_parms->base_reachable_time) { - in6_dev->nd_parms->base_reachable_time = rtime; - in6_dev->nd_parms->gc_staletime = 3 * rtime; + if (rtime != NEIGH_VAR(in6_dev->nd_parms, BASE_REACHABLE_TIME)) { + NEIGH_VAR_SET(in6_dev->nd_parms, + BASE_REACHABLE_TIME, rtime); + NEIGH_VAR_SET(in6_dev->nd_parms, + GC_STALETIME, 3 * rtime); in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime); in6_dev->tstamp = jiffies; inet6_ifinfo_notify(RTM_NEWLINK, in6_dev); @@ -1651,22 +1655,23 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *bu ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default"); if (strcmp(ctl->procname, "retrans_time") == 0) - ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + ret = neigh_proc_dointvec(ctl, write, buffer, lenp, ppos); else if (strcmp(ctl->procname, "base_reachable_time") == 0) - ret = proc_dointvec_jiffies(ctl, write, - buffer, lenp, ppos); + ret = neigh_proc_dointvec_jiffies(ctl, write, + buffer, lenp, ppos); else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) || (strcmp(ctl->procname, "base_reachable_time_ms") == 0)) - ret = proc_dointvec_ms_jiffies(ctl, write, - buffer, lenp, ppos); + ret = neigh_proc_dointvec_ms_jiffies(ctl, write, + buffer, lenp, ppos); else ret = -1; if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) { - if (ctl->data == &idev->nd_parms->base_reachable_time) - idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time); + if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME)) + idev->nd_parms->reachable_time = + neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME)); idev->tstamp = jiffies; inet6_ifinfo_notify(RTM_NEWLINK, idev); in6_dev_put(idev); @@ -1725,7 +1730,7 @@ int __init ndisc_init(void) neigh_table_init(&nd_tbl); #ifdef CONFIG_SYSCTL - err = neigh_sysctl_register(NULL, &nd_tbl.parms, "ipv6", + err = neigh_sysctl_register(NULL, &nd_tbl.parms, &ndisc_ifinfo_sysctl_change); if (err) goto out_unregister_pernet; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 7702f9e90a0..35750df744d 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -28,15 +28,27 @@ config NF_CONNTRACK_IPV6 config NF_TABLES_IPV6 depends on NF_TABLES tristate "IPv6 nf_tables support" + help + This option enables the IPv6 support for nf_tables. config NFT_CHAIN_ROUTE_IPV6 depends on NF_TABLES_IPV6 tristate "IPv6 nf_tables route chain support" + help + This option enables the "route" chain for IPv6 in nf_tables. This + chain type is used to force packet re-routing after mangling header + fields such as the source, destination, flowlabel, hop-limit and + the packet mark. config NFT_CHAIN_NAT_IPV6 depends on NF_TABLES_IPV6 depends on NF_NAT_IPV6 && NFT_NAT tristate "IPv6 nf_tables nat chain support" + help + This option enables the "nat" chain for IPv6 in nf_tables. This + chain type is used to perform Network Address Translation (NAT) + packet transformations such as the source, destination address and + source and destination ports. config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index da00a2ecde5..544b0a9da1b 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -23,181 +23,18 @@ #include <linux/skbuff.h> #include <linux/icmpv6.h> #include <linux/netdevice.h> -#include <net/ipv6.h> -#include <net/tcp.h> #include <net/icmp.h> -#include <net/ip6_checksum.h> -#include <net/ip6_fib.h> -#include <net/ip6_route.h> #include <net/flow.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_REJECT.h> +#include <net/netfilter/ipv6/nf_reject.h> + MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>"); MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6"); MODULE_LICENSE("GPL"); -/* Send RST reply */ -static void send_reset(struct net *net, struct sk_buff *oldskb, int hook) -{ - struct sk_buff *nskb; - struct tcphdr otcph, *tcph; - unsigned int otcplen, hh_len; - int tcphoff, needs_ack; - const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); - struct ipv6hdr *ip6h; -#define DEFAULT_TOS_VALUE 0x0U - const __u8 tclass = DEFAULT_TOS_VALUE; - struct dst_entry *dst = NULL; - u8 proto; - __be16 frag_off; - struct flowi6 fl6; - - if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || - (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { - pr_debug("addr is not unicast.\n"); - return; - } - - proto = oip6h->nexthdr; - tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off); - - if ((tcphoff < 0) || (tcphoff > oldskb->len)) { - pr_debug("Cannot get TCP header.\n"); - return; - } - - otcplen = oldskb->len - tcphoff; - - /* IP header checks: fragment, too short. */ - if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) { - pr_debug("proto(%d) != IPPROTO_TCP, " - "or too short. otcplen = %d\n", - proto, otcplen); - return; - } - - if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr))) - BUG(); - - /* No RST for RST. */ - if (otcph.rst) { - pr_debug("RST is set\n"); - return; - } - - /* Check checksum. */ - if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) { - pr_debug("TCP checksum is invalid\n"); - return; - } - - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_TCP; - fl6.saddr = oip6h->daddr; - fl6.daddr = oip6h->saddr; - fl6.fl6_sport = otcph.dest; - fl6.fl6_dport = otcph.source; - security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); - dst = ip6_route_output(net, NULL, &fl6); - if (dst == NULL || dst->error) { - dst_release(dst); - return; - } - dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); - if (IS_ERR(dst)) - return; - - hh_len = (dst->dev->hard_header_len + 15)&~15; - nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) + dst->trailer_len, - GFP_ATOMIC); - - if (!nskb) { - net_dbg_ratelimited("cannot alloc skb\n"); - dst_release(dst); - return; - } - - skb_dst_set(nskb, dst); - - skb_reserve(nskb, hh_len + dst->header_len); - - skb_put(nskb, sizeof(struct ipv6hdr)); - skb_reset_network_header(nskb); - ip6h = ipv6_hdr(nskb); - ip6_flow_hdr(ip6h, tclass, 0); - ip6h->hop_limit = ip6_dst_hoplimit(dst); - ip6h->nexthdr = IPPROTO_TCP; - ip6h->saddr = oip6h->daddr; - ip6h->daddr = oip6h->saddr; - - skb_reset_transport_header(nskb); - tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); - /* Truncate to length (no data) */ - tcph->doff = sizeof(struct tcphdr)/4; - tcph->source = otcph.dest; - tcph->dest = otcph.source; - - if (otcph.ack) { - needs_ack = 0; - tcph->seq = otcph.ack_seq; - tcph->ack_seq = 0; - } else { - needs_ack = 1; - tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin - + otcplen - (otcph.doff<<2)); - tcph->seq = 0; - } - - /* Reset flags */ - ((u_int8_t *)tcph)[13] = 0; - tcph->rst = 1; - tcph->ack = needs_ack; - tcph->window = 0; - tcph->urg_ptr = 0; - tcph->check = 0; - - /* Adjust TCP checksum */ - tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr, - &ipv6_hdr(nskb)->daddr, - sizeof(struct tcphdr), IPPROTO_TCP, - csum_partial(tcph, - sizeof(struct tcphdr), 0)); - - nf_ct_attach(nskb, oldskb); - -#ifdef CONFIG_BRIDGE_NETFILTER - /* If we use ip6_local_out for bridged traffic, the MAC source on - * the RST will be ours, instead of the destination's. This confuses - * some routers/firewalls, and they drop the packet. So we need to - * build the eth header using the original destination's MAC as the - * source, and send the RST packet directly. - */ - if (oldskb->nf_bridge) { - struct ethhdr *oeth = eth_hdr(oldskb); - nskb->dev = oldskb->nf_bridge->physindev; - nskb->protocol = htons(ETH_P_IPV6); - ip6h->payload_len = htons(sizeof(struct tcphdr)); - if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), - oeth->h_source, oeth->h_dest, nskb->len) < 0) - return; - dev_queue_xmit(nskb); - } else -#endif - ip6_local_out(nskb); -} - -static inline void -send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code, - unsigned int hooknum) -{ - if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) - skb_in->dev = net->loopback_dev; - - icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); -} static unsigned int reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) @@ -208,25 +45,25 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) pr_debug("%s: medium point\n", __func__); switch (reject->with) { case IP6T_ICMP6_NO_ROUTE: - send_unreach(net, skb, ICMPV6_NOROUTE, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum); break; case IP6T_ICMP6_ADM_PROHIBITED: - send_unreach(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum); break; case IP6T_ICMP6_NOT_NEIGHBOUR: - send_unreach(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum); break; case IP6T_ICMP6_ADDR_UNREACH: - send_unreach(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum); break; case IP6T_ICMP6_PORT_UNREACH: - send_unreach(net, skb, ICMPV6_PORT_UNREACH, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, par->hooknum); break; case IP6T_ICMP6_ECHOREPLY: /* Do nothing */ break; case IP6T_TCP_RESET: - send_reset(net, skb, par->hooknum); + nf_send_reset6(net, skb, par->hooknum); break; default: net_info_ratelimited("case %u not handled yet\n", reject->with); diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index d77db8a1350..0d812b31277 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -16,34 +16,51 @@ #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_ipv6.h> +static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + /* malformed packet, drop it */ + if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) + return NF_DROP; + + return nft_do_chain(&pkt, ops); +} + static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct nft_pktinfo pkt; - if (unlikely(skb->len < sizeof(struct ipv6hdr))) { if (net_ratelimit()) pr_info("nf_tables_ipv6: ignoring short SOCK_RAW " "packet\n"); return NF_ACCEPT; } - if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) - return NF_DROP; - return nft_do_chain_pktinfo(&pkt, ops); + return nft_do_chain_ipv6(ops, skb, in, out, okfn); } -static struct nft_af_info nft_af_ipv6 __read_mostly = { +struct nft_af_info nft_af_ipv6 __read_mostly = { .family = NFPROTO_IPV6, .nhooks = NF_INET_NUMHOOKS, .owner = THIS_MODULE, + .nops = 1, .hooks = { + [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, [NF_INET_LOCAL_OUT] = nft_ipv6_output, + [NF_INET_FORWARD] = nft_do_chain_ipv6, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, }, }; +EXPORT_SYMBOL_GPL(nft_af_ipv6); static int nf_tables_ipv6_init_net(struct net *net) { @@ -73,44 +90,28 @@ static struct pernet_operations nf_tables_ipv6_net_ops = { .exit = nf_tables_ipv6_exit_net, }; -static unsigned int -nft_do_chain_ipv6(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct nft_pktinfo pkt; - - /* malformed packet, drop it */ - if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) - return NF_DROP; - - return nft_do_chain_pktinfo(&pkt, ops); -} - -static struct nf_chain_type filter_ipv6 = { - .family = NFPROTO_IPV6, +static const struct nf_chain_type filter_ipv6 = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_IPV6, + .owner = THIS_MODULE, .hook_mask = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), - .fn = { - [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, - [NF_INET_LOCAL_OUT] = nft_ipv6_output, - [NF_INET_FORWARD] = nft_do_chain_ipv6, - [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6, - [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, - }, }; static int __init nf_tables_ipv6_init(void) { + int ret; + nft_register_chain_type(&filter_ipv6); - return register_pernet_subsys(&nf_tables_ipv6_net_ops); + ret = register_pernet_subsys(&nf_tables_ipv6_net_ops); + if (ret < 0) + nft_unregister_chain_type(&filter_ipv6); + + return ret; } static void __exit nf_tables_ipv6_exit(void) diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c index e86dcd70dc7..9c3297a768f 100644 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -79,7 +79,7 @@ static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops, nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out); - ret = nft_do_chain_pktinfo(&pkt, ops); + ret = nft_do_chain(&pkt, ops); if (ret != NF_ACCEPT) return ret; if (!nf_nat_initialized(ct, maniptype)) { @@ -170,21 +170,21 @@ static unsigned int nf_nat_ipv6_output(const struct nf_hook_ops *ops, return ret; } -static struct nf_chain_type nft_chain_nat_ipv6 = { - .family = NFPROTO_IPV6, +static const struct nf_chain_type nft_chain_nat_ipv6 = { .name = "nat", .type = NFT_CHAIN_T_NAT, + .family = NFPROTO_IPV6, + .owner = THIS_MODULE, .hook_mask = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN), - .fn = { + .hooks = { [NF_INET_PRE_ROUTING] = nf_nat_ipv6_prerouting, [NF_INET_POST_ROUTING] = nf_nat_ipv6_postrouting, [NF_INET_LOCAL_OUT] = nf_nat_ipv6_output, [NF_INET_LOCAL_IN] = nf_nat_ipv6_fn, }, - .me = THIS_MODULE, }; static int __init nft_chain_nat_ipv6_init(void) diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index 3fe40f0456a..42031299585 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -47,7 +47,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u32 *)ipv6_hdr(skb)); - ret = nft_do_chain_pktinfo(&pkt, ops); + ret = nft_do_chain(&pkt, ops); if (ret != NF_DROP && ret != NF_QUEUE && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || @@ -59,15 +59,15 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, return ret; } -static struct nf_chain_type nft_chain_route_ipv6 = { - .family = NFPROTO_IPV6, +static const struct nf_chain_type nft_chain_route_ipv6 = { .name = "route", .type = NFT_CHAIN_T_ROUTE, + .family = NFPROTO_IPV6, + .owner = THIS_MODULE, .hook_mask = (1 << NF_INET_LOCAL_OUT), - .fn = { + .hooks = { [NF_INET_LOCAL_OUT] = nf_route_table_hook, }, - .me = THIS_MODULE, }; static int __init nft_chain_route_init(void) diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index a83243c3d65..fb9beb78f00 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -31,7 +31,7 @@ struct proto pingv6_prot = { .owner = THIS_MODULE, .init = ping_init_sock, .close = ping_close, - .connect = ip6_datagram_connect, + .connect = ip6_datagram_connect_v6_only, .disconnect = udp_disconnect, .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, @@ -62,10 +62,9 @@ static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, { return -EAFNOSUPPORT; } -static int dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, +static void dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) { - return -EAFNOSUPPORT; } static int dummy_icmpv6_err_convert(u8 type, u8 code, int *err) { @@ -103,7 +102,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, return err; if (msg->msg_name) { - struct sockaddr_in6 *u = (struct sockaddr_in6 *) msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in6 *, u, msg->msg_name); if (msg->msg_namelen < sizeof(struct sockaddr_in6) || u->sin6_family != AF_INET6) { return -EINVAL; @@ -145,7 +144,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; - dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, 1); + dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr); if (IS_ERR(dst)) return PTR_ERR(dst); rt = (struct rt6_info *) dst; @@ -254,7 +253,9 @@ int __init pingv6_init(void) return ret; #endif pingv6_ops.ipv6_recv_error = ipv6_recv_error; - pingv6_ops.ip6_datagram_recv_ctl = ip6_datagram_recv_ctl; + pingv6_ops.ip6_datagram_recv_common_ctl = ip6_datagram_recv_common_ctl; + pingv6_ops.ip6_datagram_recv_specific_ctl = + ip6_datagram_recv_specific_ctl; pingv6_ops.icmpv6_err_convert = icmpv6_err_convert; pingv6_ops.ipv6_icmp_error = ipv6_icmp_error; pingv6_ops.ipv6_chk_addr = ipv6_chk_addr; @@ -267,7 +268,8 @@ int __init pingv6_init(void) void pingv6_exit(void) { pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error; - pingv6_ops.ip6_datagram_recv_ctl = dummy_ip6_datagram_recv_ctl; + pingv6_ops.ip6_datagram_recv_common_ctl = dummy_ip6_datagram_recv_ctl; + pingv6_ops.ip6_datagram_recv_specific_ctl = dummy_ip6_datagram_recv_ctl; pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert; pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error; pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index b6bb87e5580..1f29996e368 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -250,6 +250,10 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; + + if (addr->sin6_family != AF_INET6) + return -EINVAL; + addr_type = ipv6_addr_type(&addr->sin6_addr); /* Raw sockets are IPv6 only */ @@ -457,7 +461,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, int noblock, int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct sk_buff *skb; size_t copied; int err; @@ -734,7 +738,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions opt_space; - struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct in6_addr *daddr, *final_p, final; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -864,7 +868,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, fl6.flowi6_oif = np->ucast_oif; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto out; @@ -1209,7 +1213,7 @@ struct proto rawv6_prot = { .owner = THIS_MODULE, .close = rawv6_close, .destroy = raw6_destroy, - .connect = ip6_datagram_connect, + .connect = ip6_datagram_connect_v6_only, .disconnect = udp_disconnect, .ioctl = rawv6_ioctl, .init = rawv6_init_sk, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4b4944c3e4c..11dac21e658 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -66,8 +66,9 @@ #endif enum rt6_nud_state { - RT6_NUD_FAIL_HARD = -2, - RT6_NUD_FAIL_SOFT = -1, + RT6_NUD_FAIL_HARD = -3, + RT6_NUD_FAIL_PROBE = -2, + RT6_NUD_FAIL_DO_RR = -1, RT6_NUD_SUCCEED = 1 }; @@ -103,6 +104,36 @@ static struct rt6_info *rt6_get_route_info(struct net *net, const struct in6_addr *gwaddr, int ifindex); #endif +static void rt6_bind_peer(struct rt6_info *rt, int create) +{ + struct inet_peer_base *base; + struct inet_peer *peer; + + base = inetpeer_base_ptr(rt->_rt6i_peer); + if (!base) + return; + + peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); + if (peer) { + if (!rt6_set_peer(rt, peer)) + inet_putpeer(peer); + } +} + +static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create) +{ + if (rt6_has_peer(rt)) + return rt6_peer_ptr(rt); + + rt6_bind_peer(rt, create); + return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL); +} + +static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt) +{ + return __rt6_get_peer(rt, 1); +} + static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) { struct rt6_info *rt = (struct rt6_info *) dst; @@ -311,22 +342,6 @@ static void ip6_dst_destroy(struct dst_entry *dst) } } -void rt6_bind_peer(struct rt6_info *rt, int create) -{ - struct inet_peer_base *base; - struct inet_peer *peer; - - base = inetpeer_base_ptr(rt->_rt6i_peer); - if (!base) - return; - - peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); - if (peer) { - if (!rt6_set_peer(rt, peer)) - inet_putpeer(peer); - } -} - static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how) { @@ -521,7 +536,7 @@ static void rt6_probe(struct rt6_info *rt) work = kmalloc(sizeof(*work), GFP_ATOMIC); if (neigh && work) - neigh->updated = jiffies; + __neigh_set_probe_once(neigh); if (neigh) write_unlock(&neigh->lock); @@ -577,11 +592,13 @@ static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) #ifdef CONFIG_IPV6_ROUTER_PREF else if (!(neigh->nud_state & NUD_FAILED)) ret = RT6_NUD_SUCCEED; + else + ret = RT6_NUD_FAIL_PROBE; #endif read_unlock(&neigh->lock); } else { ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? - RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT; + RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR; } rcu_read_unlock_bh(); @@ -618,16 +635,17 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, goto out; m = rt6_score_route(rt, oif, strict); - if (m == RT6_NUD_FAIL_SOFT) { + if (m == RT6_NUD_FAIL_DO_RR) { match_do_rr = true; m = 0; /* lowest valid score */ - } else if (m < 0) { + } else if (m == RT6_NUD_FAIL_HARD) { goto out; } if (strict & RT6_LOOKUP_F_REACHABLE) rt6_probe(rt); + /* note that m can be RT6_NUD_FAIL_PROBE at this point */ if (m > *mpri) { *do_rr = match_do_rr; *mpri = m; @@ -2238,7 +2256,7 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) .net = net, .addr = &ifp->addr, }; - fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni); + fib6_clean_all(net, fib6_remove_prefsrc, &adni); } struct arg_dev_net { @@ -2265,7 +2283,7 @@ void rt6_ifdown(struct net *net, struct net_device *dev) .net = net, }; - fib6_clean_all(net, fib6_ifdown, 0, &adn); + fib6_clean_all(net, fib6_ifdown, &adn); icmp6_clean_all(fib6_ifdown, &adn); } @@ -2320,7 +2338,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu) .mtu = mtu, }; - fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); + fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg); } static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index d3005b34476..3dfbcf1dcb1 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -671,7 +671,7 @@ static int ipip6_rcv(struct sk_buff *skb) tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); if (tunnel != NULL) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; if (tunnel->parms.iph.protocol != IPPROTO_IPV6 && tunnel->parms.iph.protocol != 0) @@ -1365,12 +1365,12 @@ static int ipip6_tunnel_init(struct net_device *dev) memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); ipip6_tunnel_bind_dev(dev); - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; for_each_possible_cpu(i) { - struct pcpu_tstats *ipip6_tunnel_stats; + struct pcpu_sw_netstats *ipip6_tunnel_stats; ipip6_tunnel_stats = per_cpu_ptr(dev->tstats, i); u64_stats_init(&ipip6_tunnel_stats->syncp); } @@ -1395,12 +1395,12 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) iph->ihl = 5; iph->ttl = 64; - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; for_each_possible_cpu(i) { - struct pcpu_tstats *ipip6_fb_stats; + struct pcpu_sw_netstats *ipip6_fb_stats; ipip6_fb_stats = per_cpu_ptr(dev->tstats, i); u64_stats_init(&ipip6_fb_stats->syncp); } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 535a3ad262f..bb53a5e73c1 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -247,7 +247,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.fl6_sport = inet_sk(sk)->inet_sport; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) goto out_free; } diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 107b2f1d90a..7f405a16882 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -24,6 +24,20 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "anycast_src_echo_reply", + .data = &init_net.ipv6.sysctl.anycast_src_echo_reply, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "flowlabel_consistency", + .data = &init_net.ipv6.sysctl.flowlabel_consistency, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; @@ -51,6 +65,8 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) if (!ipv6_table) goto out; ipv6_table[0].data = &net->ipv6.sysctl.bindv6only; + ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply; + ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f67033b4bb6..889079b2ea8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -164,12 +164,12 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, * connect() to INADDR_ANY means loopback (BSD'ism). */ - if(ipv6_addr_any(&usin->sin6_addr)) + if (ipv6_addr_any(&usin->sin6_addr)) usin->sin6_addr.s6_addr[15] = 0x1; addr_type = ipv6_addr_type(&usin->sin6_addr); - if(addr_type & IPV6_ADDR_MULTICAST) + if (addr_type & IPV6_ADDR_MULTICAST) return -ENETUNREACH; if (addr_type&IPV6_ADDR_LINKLOCAL) { @@ -257,7 +257,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto failure; @@ -336,7 +336,7 @@ static void tcp_v6_mtu_reduced(struct sock *sk) static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { - const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data; + const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); struct ipv6_pinfo *np; struct sock *sk; @@ -397,6 +397,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk->sk_state == TCP_LISTEN) goto out; + if (!ip6_sk_accept_pmtu(sk)) + goto out; + tp->mtu_info = ntohl(info); if (!sock_owned_by_user(sk)) tcp_v6_mtu_reduced(sk); @@ -466,7 +469,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); - struct sk_buff * skb; + struct sk_buff *skb; int err = -ENOMEM; /* First, grab a route. */ @@ -480,6 +483,9 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, &ireq->ir_v6_rmt_addr); fl6->daddr = ireq->ir_v6_rmt_addr; + if (np->repflow && (ireq->pktopts != NULL)) + fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); + skb_set_queue_mapping(skb, queue_mapping); err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass); err = net_xmit_eval(err); @@ -721,7 +727,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, - struct tcp_md5sig_key *key, int rst, u8 tclass) + struct tcp_md5sig_key *key, int rst, u8 tclass, + u32 label) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; @@ -783,6 +790,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, memset(&fl6, 0, sizeof(fl6)); fl6.daddr = ipv6_hdr(skb)->saddr; fl6.saddr = ipv6_hdr(skb)->daddr; + fl6.flowlabel = label; buff->ip_summed = CHECKSUM_PARTIAL; buff->csum = 0; @@ -800,7 +808,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, * Underlying function will use this to retrieve the network * namespace */ - dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false); + dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL); if (!IS_ERR(dst)) { skb_dst_set(buff, dst); ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass); @@ -868,7 +876,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2); - tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0); + tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0, 0); #ifdef CONFIG_TCP_MD5SIG release_sk1: @@ -881,9 +889,11 @@ release_sk1: static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, - struct tcp_md5sig_key *key, u8 tclass) + struct tcp_md5sig_key *key, u8 tclass, + u32 label) { - tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass); + tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass, + label); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) @@ -895,7 +905,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw), - tw->tw_tclass); + tw->tw_tclass, (tw->tw_flowlabel << 12)); inet_twsk_put(tw); } @@ -905,11 +915,12 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, { tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, tcp_time_stamp, req->ts_recent, - tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0); + tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), + 0, 0); } -static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) +static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) { struct request_sock *req, **prev; const struct tcphdr *th = tcp_hdr(skb); @@ -1010,7 +1021,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!isn) { if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || - np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { + np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim || + np->repflow) { atomic_inc(&skb->users); ireq->pktopts = skb; } @@ -1082,9 +1094,9 @@ drop: return 0; /* don't send reset */ } -static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst) +static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst) { struct inet_request_sock *ireq; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); @@ -1134,7 +1146,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); + newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); + if (np->repflow) + newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count @@ -1214,7 +1228,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); + newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); + if (np->repflow) + newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); /* Clone native IPv6 options from listening socket (if any) @@ -1230,7 +1246,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + newnp->opt->opt_flen); - tcp_mtup_init(newsk); tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric_advmss(dst); if (tcp_sk(sk)->rx_opt.user_mss && @@ -1379,7 +1394,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) * otherwise we just shortcircuit this and continue with * the new socket.. */ - if(nsk != sk) { + if (nsk != sk) { sock_rps_save_rxhash(nsk, skb); if (tcp_child_process(sk, nsk, skb)) goto reset; @@ -1424,8 +1439,10 @@ ipv6_pktoptions: np->mcast_oif = inet6_iif(opt_skb); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; - if (np->rxopt.bits.rxtclass) - np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(opt_skb)); + if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) + np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); + if (np->repflow) + np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb)) { skb_set_owner_r(opt_skb, sk); opt_skb = xchg(&np->pktoptions, opt_skb); @@ -1739,7 +1756,7 @@ static void get_openreq6(struct seq_file *seq, dest->s6_addr32[2], dest->s6_addr32[3], ntohs(inet_rsk(req)->ir_rmt_port), TCP_SYN_RECV, - 0,0, /* could print option size, but that is af dependent. */ + 0, 0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ jiffies_to_clock_t(ttd), req->num_timeout, @@ -1798,7 +1815,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) atomic_read(&sp->sk_refcnt), sp, jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), - (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong, + (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh ); diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 6d18157dc32..0d78132ff18 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -66,13 +66,13 @@ skip_csum: return tcp_gro_receive(head, skb); } -static int tcp6_gro_complete(struct sk_buff *skb) +static int tcp6_gro_complete(struct sk_buff *skb, int thoff) { const struct ipv6hdr *iph = ipv6_hdr(skb); struct tcphdr *th = tcp_hdr(skb); - th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb), - &iph->saddr, &iph->daddr, 0); + th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr, + &iph->daddr, 0); skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; return tcp_gro_complete(skb); diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 4b0f50d9a96..2c4e4c5c761 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -12,8 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * * Authors Mitsuru KANDA <mk@linux-ipv6.org> * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 089c741a399..1e586d92260 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -460,9 +460,7 @@ try_again: /* Copy the address. */ if (msg->msg_name) { - struct sockaddr_in6 *sin6; - - sin6 = (struct sockaddr_in6 *) msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); sin6->sin6_family = AF_INET6; sin6->sin6_port = udp_hdr(skb)->source; sin6->sin6_flowinfo = 0; @@ -479,12 +477,16 @@ try_again: } *addr_len = sizeof(*sin6); } + + if (np->rxopt.all) + ip6_datagram_recv_common_ctl(sk, msg, skb); + if (is_udp4) { if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); } else { if (np->rxopt.all) - ip6_datagram_recv_ctl(sk, msg, skb); + ip6_datagram_recv_specific_ctl(sk, msg, skb); } err = copied; @@ -538,8 +540,11 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk == NULL) return; - if (type == ICMPV6_PKT_TOOBIG) + if (type == ICMPV6_PKT_TOOBIG) { + if (!ip6_sk_accept_pmtu(sk)) + goto out; ip6_sk_update_pmtu(skb, sk, info); + } if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); goto out; @@ -1038,7 +1043,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct in6_addr *daddr, *final_p, final; struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; @@ -1220,7 +1225,7 @@ do_udp_sendmsg: security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, true); + dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c index 63d5d493098..0e015906f9c 100644 --- a/net/ipv6/xfrm6_mode_ro.c +++ b/net/ipv6/xfrm6_mode_ro.c @@ -15,8 +15,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ /* * Authors: diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index de2bcfaaf75..1c66465a42d 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -12,8 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * * Authors Mitsuru KANDA <mk@linux-ipv6.org> * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index e096025b477..994e28bfb32 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1707,7 +1707,7 @@ static int ipx_sendmsg(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct ipx_sock *ipxs = ipx_sk(sk); - struct sockaddr_ipx *usipx = (struct sockaddr_ipx *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_ipx *, usipx, msg->msg_name); struct sockaddr_ipx local_sipx; int rc = -EINVAL; int flags = msg->msg_flags; @@ -1774,7 +1774,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct ipx_sock *ipxs = ipx_sk(sk); - struct sockaddr_ipx *sipx = (struct sockaddr_ipx *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_ipx *, sipx, msg->msg_name); struct ipxhdr *ipx = NULL; struct sk_buff *skb; int copied, rc; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index de7db23049f..54747c25c86 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -25,9 +25,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * * Linux-IrDA now supports four different types of IrDA sockets: * @@ -1654,7 +1652,7 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock, /* Check if an address was specified with sendto. Jean II */ if (msg->msg_name) { - struct sockaddr_irda *addr = (struct sockaddr_irda *) msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_irda *, addr, msg->msg_name); err = -EINVAL; /* Check address, extract pid. Jean II */ if (msg->msg_namelen < sizeof(*addr)) diff --git a/net/irda/discovery.c b/net/irda/discovery.c index b0b56a339a8..6786e7f193d 100644 --- a/net/irda/discovery.c +++ b/net/irda/discovery.c @@ -24,9 +24,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c index b797daac063..4490a675b1b 100644 --- a/net/irda/ircomm/ircomm_core.c +++ b/net/irda/ircomm/ircomm_core.c @@ -23,9 +23,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/ircomm/ircomm_event.c b/net/irda/ircomm/ircomm_event.c index d78554fedba..b172c652232 100644 --- a/net/irda/ircomm/ircomm_event.c +++ b/net/irda/ircomm/ircomm_event.c @@ -22,9 +22,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/ircomm/ircomm_lmp.c b/net/irda/ircomm/ircomm_lmp.c index 3b8095c771d..6536114adf3 100644 --- a/net/irda/ircomm/ircomm_lmp.c +++ b/net/irda/ircomm/ircomm_lmp.c @@ -24,9 +24,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c index 30893912835..f80b1a6a244 100644 --- a/net/irda/ircomm/ircomm_param.c +++ b/net/irda/ircomm/ircomm_param.c @@ -22,9 +22,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/ircomm/ircomm_ttp.c b/net/irda/ircomm/ircomm_ttp.c index 6e6509f22f6..d362d711b79 100644 --- a/net/irda/ircomm/ircomm_ttp.c +++ b/net/irda/ircomm/ircomm_ttp.c @@ -23,9 +23,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 41ac7938268..2ba8b9705bb 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -24,9 +24,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c index a2a508f5f26..2ee87bf387c 100644 --- a/net/irda/ircomm/ircomm_tty_attach.c +++ b/net/irda/ircomm/ircomm_tty_attach.c @@ -23,9 +23,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c index b343f50dc8d..ce943853c38 100644 --- a/net/irda/ircomm/ircomm_tty_ioctl.c +++ b/net/irda/ircomm/ircomm_tty_ioctl.c @@ -22,9 +22,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c index 14653b8d664..365b895da84 100644 --- a/net/irda/irda_device.c +++ b/net/irda/irda_device.c @@ -23,9 +23,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/irlap.c b/net/irda/irlap.c index 005b424494a..a778df55f5d 100644 --- a/net/irda/irlap.c +++ b/net/irda/irlap.c @@ -23,9 +23,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/parameters.c b/net/irda/parameters.c index 71cd38c1a67..6d0869716bf 100644 --- a/net/irda/parameters.c +++ b/net/irda/parameters.c @@ -22,9 +22,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/irda/qos.c b/net/irda/qos.c index 798ffd9a705..11a7cc0cbc2 100644 --- a/net/irda/qos.c +++ b/net/irda/qos.c @@ -24,9 +24,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * ********************************************************************/ diff --git a/net/key/af_key.c b/net/key/af_key.c index 545f047868a..1a04c132936 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1340,6 +1340,12 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_ max_spi = range->sadb_spirange_max; } + err = verify_spi_info(x->id.proto, min_spi, max_spi); + if (err) { + xfrm_state_put(x); + return err; + } + err = xfrm_alloc_spi(x, min_spi, max_spi); resp_skb = err ? ERR_PTR(err) : pfkey_xfrm_state2msg(x); @@ -1380,10 +1386,9 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, const struct sadb return 0; spin_lock_bh(&x->lock); - if (x->km.state == XFRM_STATE_ACQ) { + if (x->km.state == XFRM_STATE_ACQ) x->km.state = XFRM_STATE_ERROR; - wake_up(&net->xfrm.km_waitq); - } + spin_unlock_bh(&x->lock); xfrm_state_put(x); return 0; @@ -1785,7 +1790,9 @@ static int pfkey_dump_sa(struct pfkey_sock *pfk) static void pfkey_dump_sa_done(struct pfkey_sock *pfk) { - xfrm_state_walk_done(&pfk->dump.u.state); + struct net *net = sock_net(&pfk->sk); + + xfrm_state_walk_done(&pfk->dump.u.state, net); } static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) @@ -1861,7 +1868,7 @@ static u32 gen_reqid(struct net *net) reqid = IPSEC_MANUAL_REQID_MAX+1; xfrm_policy_walk_init(&walk, XFRM_POLICY_TYPE_MAIN); rc = xfrm_policy_walk(net, &walk, check_reqid, (void*)&reqid); - xfrm_policy_walk_done(&walk); + xfrm_policy_walk_done(&walk, net); if (rc != -EEXIST) return reqid; } while (reqid != start); @@ -2485,6 +2492,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, struct xfrm_selector sel; struct xfrm_migrate m[XFRM_MAX_DEPTH]; struct xfrm_kmaddress k; + struct net *net = sock_net(sk); if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC - 1], ext_hdrs[SADB_EXT_ADDRESS_DST - 1]) || @@ -2558,7 +2566,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, } return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i, - kma ? &k : NULL); + kma ? &k : NULL, net); out: return err; @@ -2659,7 +2667,9 @@ static int pfkey_dump_sp(struct pfkey_sock *pfk) static void pfkey_dump_sp_done(struct pfkey_sock *pfk) { - xfrm_policy_walk_done(&pfk->dump.u.policy); + struct net *net = sock_net((struct sock *)pfk); + + xfrm_policy_walk_done(&pfk->dump.u.policy, net); } static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) @@ -3569,6 +3579,7 @@ static int pfkey_sendmsg(struct kiocb *kiocb, struct sk_buff *skb = NULL; struct sadb_msg *hdr = NULL; int err; + struct net *net = sock_net(sk); err = -EOPNOTSUPP; if (msg->msg_flags & MSG_OOB) @@ -3591,9 +3602,9 @@ static int pfkey_sendmsg(struct kiocb *kiocb, if (!hdr) goto out; - mutex_lock(&xfrm_cfg_mutex); + mutex_lock(&net->xfrm.xfrm_cfg_mutex); err = pfkey_process(sk, skb, hdr); - mutex_unlock(&xfrm_cfg_mutex); + mutex_unlock(&net->xfrm.xfrm_cfg_mutex); out: if (err && hdr && pfkey_error(hdr, err, sk) == 0) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 9af77d9c0ec..735d0f60c83 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -176,7 +176,7 @@ l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id) * owned by userspace. A struct sock returned from this function must be * released using l2tp_tunnel_sock_put once you're done with it. */ -struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel) +static struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel) { int err = 0; struct socket *sock = NULL; @@ -202,10 +202,9 @@ struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel) out: return sk; } -EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_lookup); /* Drop a reference to a tunnel socket obtained via. l2tp_tunnel_sock_put */ -void l2tp_tunnel_sock_put(struct sock *sk) +static void l2tp_tunnel_sock_put(struct sock *sk) { struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); if (tunnel) { @@ -217,7 +216,6 @@ void l2tp_tunnel_sock_put(struct sock *sk) } sock_put(sk); } -EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_put); /* Lookup a session by id in the global session list */ diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 1ee9f6965d6..1f01ba3435b 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -238,8 +238,6 @@ out: return tunnel; } -struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel); -void l2tp_tunnel_sock_put(struct sock *sk); struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index da1a1cee1a0..0b44d855269 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -403,7 +403,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m /* Get and verify the address. */ if (msg->msg_name) { - struct sockaddr_l2tpip *lip = (struct sockaddr_l2tpip *) msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_l2tpip *, lip, msg->msg_name); rc = -EINVAL; if (msg->msg_namelen < sizeof(*lip)) goto out; @@ -512,7 +512,7 @@ static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m struct inet_sock *inet = inet_sk(sk); size_t copied = 0; int err = -EOPNOTSUPP; - struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); struct sk_buff *skb; if (flags & MSG_OOB) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index bb6e206ea70..7704ea9502f 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -371,6 +371,9 @@ static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr, if (addr_len < sizeof(*lsa)) return -EINVAL; + if (usin->sin6_family != AF_INET6) + return -EINVAL; + addr_type = ipv6_addr_type(&usin->sin6_addr); if (addr_type & IPV6_ADDR_MULTICAST) return -EINVAL; @@ -481,8 +484,7 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions opt_space; - struct sockaddr_l2tpip6 *lsa = - (struct sockaddr_l2tpip6 *) msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name); struct in6_addr *daddr, *final_p, final; struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_txoptions *opt = NULL; @@ -597,7 +599,7 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto out; @@ -652,7 +654,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); - struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name); size_t copied = 0; int err = -EOPNOTSUPP; struct sk_buff *skb; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index c71b699eb55..0080d2b0a8a 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -707,7 +707,7 @@ out: static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags) { - struct sockaddr_llc *uaddr = (struct sockaddr_llc *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_llc *, uaddr, msg->msg_name); const int nonblock = flags & MSG_DONTWAIT; struct sk_buff *skb = NULL; struct sock *sk = sock->sk; @@ -884,7 +884,7 @@ static int llc_ui_sendmsg(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); - struct sockaddr_llc *addr = (struct sockaddr_llc *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name); int flags = msg->msg_flags; int noblock = flags & MSG_DONTWAIT; struct sk_buff *skb; diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index cd872417796..42dc2e45c92 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -753,7 +753,7 @@ void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk) * * Sends received pdus to the connection state machine. */ -static int llc_conn_rcv(struct sock* sk, struct sk_buff *skb) +static int llc_conn_rcv(struct sock *sk, struct sk_buff *skb) { struct llc_conn_state_ev *ev = llc_conn_ev(skb); @@ -891,7 +891,7 @@ out_kfree_skb: * * Initializes a socket with default llc values. */ -static void llc_sk_init(struct sock* sk) +static void llc_sk_init(struct sock *sk) { struct llc_sock *llc = llc_sk(sk); diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 2bb0ddff8c0..842851cef69 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -23,7 +23,7 @@ #include <net/llc.h> LIST_HEAD(llc_sap_list); -DEFINE_SPINLOCK(llc_sap_list_lock); +static DEFINE_SPINLOCK(llc_sap_list_lock); /** * llc_sap_alloc - allocates and initializes sap. @@ -48,7 +48,7 @@ static struct llc_sap *llc_sap_alloc(void) static struct llc_sap *__llc_sap_find(unsigned char sap_value) { - struct llc_sap* sap; + struct llc_sap *sap; list_for_each_entry(sap, &llc_sap_list, node) if (sap->laddr.lsap == sap_value) @@ -159,7 +159,6 @@ module_init(llc_init); module_exit(llc_exit); EXPORT_SYMBOL(llc_sap_list); -EXPORT_SYMBOL(llc_sap_list_lock); EXPORT_SYMBOL(llc_sap_find); EXPORT_SYMBOL(llc_sap_open); EXPORT_SYMBOL(llc_sap_close); diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index e5850699098..06033f6c845 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -66,7 +66,7 @@ struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev, return skb; } -void llc_save_primitive(struct sock *sk, struct sk_buff* skb, u8 prim) +void llc_save_primitive(struct sock *sk, struct sk_buff *skb, u8 prim) { struct sockaddr_llc *addr; @@ -114,7 +114,7 @@ void llc_sap_rtn_pdu(struct llc_sap *sap, struct sk_buff *skb) * failure. */ static struct llc_sap_state_trans *llc_find_sap_trans(struct llc_sap *sap, - struct sk_buff* skb) + struct sk_buff *skb) { int i = 0; struct llc_sap_state_trans *rc = NULL; diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c index 537488cbf94..9b9009f9955 100644 --- a/net/mac80211/aes_cmac.c +++ b/net/mac80211/aes_cmac.c @@ -111,7 +111,7 @@ void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad, } -struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[]) +struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[]) { struct crypto_cipher *tfm; diff --git a/net/mac80211/aes_cmac.h b/net/mac80211/aes_cmac.h index 20785a64725..0ce6487af79 100644 --- a/net/mac80211/aes_cmac.h +++ b/net/mac80211/aes_cmac.h @@ -11,7 +11,7 @@ #include <linux/crypto.h> -struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[]); +struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[]); void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad, const u8 *data, size_t data_len, u8 *mic); void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 364ce0c5962..f9ae9b85d4c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -133,7 +133,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, struct key_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; struct sta_info *sta = NULL; + const struct ieee80211_cipher_scheme *cs = NULL; struct ieee80211_key *key; int err; @@ -145,22 +147,28 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_TKIP: case WLAN_CIPHER_SUITE_WEP104: - if (IS_ERR(sdata->local->wep_tx_tfm)) + if (IS_ERR(local->wep_tx_tfm)) return -EINVAL; break; + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_GCMP: + break; default: + cs = ieee80211_cs_get(local, params->cipher, sdata->vif.type); break; } key = ieee80211_key_alloc(params->cipher, key_idx, params->key_len, - params->key, params->seq_len, params->seq); + params->key, params->seq_len, params->seq, + cs); if (IS_ERR(key)) return PTR_ERR(key); if (pairwise) key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE; - mutex_lock(&sdata->local->sta_mtx); + mutex_lock(&local->sta_mtx); if (mac_addr) { if (ieee80211_vif_is_mesh(&sdata->vif)) @@ -216,10 +224,13 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, break; } + if (sta) + sta->cipher_scheme = cs; + err = ieee80211_key_link(key, sdata, sta); out_unlock: - mutex_unlock(&sdata->local->sta_mtx); + mutex_unlock(&local->sta_mtx); return err; } @@ -244,7 +255,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, goto out_unlock; if (pairwise) - key = key_mtx_dereference(local, sta->ptk); + key = key_mtx_dereference(local, sta->ptk[key_idx]); else key = key_mtx_dereference(local, sta->gtk[key_idx]); } else @@ -290,9 +301,10 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, if (!sta) goto out; - if (pairwise) - key = rcu_dereference(sta->ptk); - else if (key_idx < NUM_DEFAULT_KEYS) + if (pairwise && key_idx < NUM_DEFAULT_KEYS) + key = rcu_dereference(sta->ptk[key_idx]); + else if (!pairwise && + key_idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) key = rcu_dereference(sta->gtk[key_idx]); } else key = rcu_dereference(sdata->keys[key_idx]); @@ -521,8 +533,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) STATION_INFO_PEER_PM | STATION_INFO_NONPEER_PM; - sinfo->llid = le16_to_cpu(sta->llid); - sinfo->plid = le16_to_cpu(sta->plid); + sinfo->llid = sta->llid; + sinfo->plid = sta->plid; sinfo->plink_state = sta->plink_state; if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) { sinfo->filled |= STATION_INFO_T_OFFSET; @@ -816,6 +828,7 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, if (cfg80211_chandef_identical(&local->monitor_chandef, chandef)) return 0; + mutex_lock(&local->mtx); mutex_lock(&local->iflist_mtx); if (local->use_chanctx) { sdata = rcu_dereference_protected( @@ -834,6 +847,7 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, if (ret == 0) local->monitor_chandef = *chandef; mutex_unlock(&local->iflist_mtx); + mutex_unlock(&local->mtx); return ret; } @@ -846,7 +860,7 @@ static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, if (!resp || !resp_len) return 1; - old = rtnl_dereference(sdata->u.ap.probe_resp); + old = sdata_dereference(sdata->u.ap.probe_resp, sdata); new = kzalloc(sizeof(struct probe_resp) + resp_len, GFP_KERNEL); if (!new) @@ -862,15 +876,16 @@ static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, return 0; } -int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, - struct cfg80211_beacon_data *params) +static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, + struct cfg80211_beacon_data *params) { struct beacon_data *new, *old; int new_head_len, new_tail_len; int size, err; u32 changed = BSS_CHANGED_BEACON; - old = rtnl_dereference(sdata->u.ap.beacon); + old = sdata_dereference(sdata->u.ap.beacon, sdata); + /* Need to have a beacon head if we don't have one yet */ if (!params->head && !old) @@ -938,6 +953,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; struct beacon_data *old; struct ieee80211_sub_if_data *vlan; u32 changed = BSS_CHANGED_BEACON_INT | @@ -947,7 +963,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, BSS_CHANGED_P2P_PS; int err; - old = rtnl_dereference(sdata->u.ap.beacon); + old = sdata_dereference(sdata->u.ap.beacon, sdata); if (old) return -EALREADY; @@ -956,8 +972,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->needed_rx_chains = sdata->local->rx_chains; sdata->radar_required = params->radar_required; + mutex_lock(&local->mtx); err = ieee80211_vif_use_channel(sdata, ¶ms->chandef, IEEE80211_CHANCTX_SHARED); + mutex_unlock(&local->mtx); if (err) return err; ieee80211_vif_copy_chanctx_to_vlans(sdata, false); @@ -968,11 +986,19 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, */ sdata->control_port_protocol = params->crypto.control_port_ethertype; sdata->control_port_no_encrypt = params->crypto.control_port_no_encrypt; + sdata->encrypt_headroom = ieee80211_cs_headroom(sdata->local, + ¶ms->crypto, + sdata->vif.type); + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { vlan->control_port_protocol = params->crypto.control_port_ethertype; vlan->control_port_no_encrypt = params->crypto.control_port_no_encrypt; + vlan->encrypt_headroom = + ieee80211_cs_headroom(sdata->local, + ¶ms->crypto, + vlan->vif.type); } sdata->vif.bss_conf.beacon_int = params->beacon_interval; @@ -1001,13 +1027,15 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, err = drv_start_ap(sdata->local, sdata); if (err) { - old = rtnl_dereference(sdata->u.ap.beacon); + old = sdata_dereference(sdata->u.ap.beacon, sdata); + if (old) kfree_rcu(old, rcu_head); RCU_INIT_POINTER(sdata->u.ap.beacon, NULL); return err; } + ieee80211_recalc_dtim(local, sdata); ieee80211_bss_info_change_notify(sdata, changed); netif_carrier_on(dev); @@ -1032,7 +1060,7 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, if (sdata->vif.csa_active) return -EBUSY; - old = rtnl_dereference(sdata->u.ap.beacon); + old = sdata_dereference(sdata->u.ap.beacon, sdata); if (!old) return -ENOENT; @@ -1050,15 +1078,18 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) struct ieee80211_local *local = sdata->local; struct beacon_data *old_beacon; struct probe_resp *old_probe_resp; + struct cfg80211_chan_def chandef; - old_beacon = rtnl_dereference(sdata->u.ap.beacon); + old_beacon = sdata_dereference(sdata->u.ap.beacon, sdata); if (!old_beacon) return -ENOENT; - old_probe_resp = rtnl_dereference(sdata->u.ap.probe_resp); + old_probe_resp = sdata_dereference(sdata->u.ap.probe_resp, sdata); /* abort any running channel switch */ sdata->vif.csa_active = false; - cancel_work_sync(&sdata->csa_finalize_work); + kfree(sdata->u.ap.next_beacon); + sdata->u.ap.next_beacon = NULL; + cancel_work_sync(&sdata->u.ap.request_smps_work); /* turn off carrier for this interface and dependent VLANs */ @@ -1073,17 +1104,8 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) if (old_probe_resp) kfree_rcu(old_probe_resp, rcu_head); - list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) - sta_info_flush_defer(vlan); - sta_info_flush_defer(sdata); - synchronize_net(); - rcu_barrier(); - list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { - sta_info_flush_cleanup(vlan); - ieee80211_free_keys(vlan); - } - sta_info_flush_cleanup(sdata); - ieee80211_free_keys(sdata); + __sta_info_flush(sdata, true); + ieee80211_free_keys(sdata, true); sdata->vif.bss_conf.enable_beacon = false; sdata->vif.bss_conf.ssid_len = 0; @@ -1091,8 +1113,10 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); if (sdata->wdev.cac_started) { + chandef = sdata->vif.bss_conf.chandef; cancel_delayed_work_sync(&sdata->dfs_cac_timer_work); - cfg80211_cac_event(sdata->dev, NL80211_RADAR_CAC_ABORTED, + cfg80211_cac_event(sdata->dev, &chandef, + NL80211_RADAR_CAC_ABORTED, GFP_KERNEL); } @@ -1103,7 +1127,9 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) skb_queue_purge(&sdata->u.ap.ps.bc_buf); ieee80211_vif_copy_chanctx_to_vlans(sdata, true); + mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); + mutex_unlock(&local->mtx); return 0; } @@ -1926,8 +1952,10 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev, sdata->smps_mode = IEEE80211_SMPS_OFF; sdata->needed_rx_chains = sdata->local->rx_chains; + mutex_lock(&sdata->local->mtx); err = ieee80211_vif_use_channel(sdata, &setup->chandef, IEEE80211_CHANCTX_SHARED); + mutex_unlock(&sdata->local->mtx); if (err) return err; @@ -1939,7 +1967,9 @@ static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev) struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); ieee80211_stop_mesh(sdata); + mutex_lock(&sdata->local->mtx); ieee80211_vif_release_channel(sdata); + mutex_unlock(&sdata->local->mtx); return 0; } @@ -1953,7 +1983,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy, enum ieee80211_band band; u32 changed = 0; - if (!rtnl_dereference(sdata->u.ap.beacon)) + if (!sdata_dereference(sdata->u.ap.beacon, sdata)) return -ENOENT; band = ieee80211_get_sdata_band(sdata); @@ -2561,8 +2591,8 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, int j; sdata->rc_rateidx_mask[i] = mask->control[i].legacy; - memcpy(sdata->rc_rateidx_mcs_mask[i], mask->control[i].mcs, - sizeof(mask->control[i].mcs)); + memcpy(sdata->rc_rateidx_mcs_mask[i], mask->control[i].ht_mcs, + sizeof(mask->control[i].ht_mcs)); sdata->rc_has_mcs_mask[i] = false; if (!sband) @@ -2877,26 +2907,29 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy, unsigned long timeout; int err; - if (!list_empty(&local->roc_list) || local->scanning) - return -EBUSY; + mutex_lock(&local->mtx); + if (!list_empty(&local->roc_list) || local->scanning) { + err = -EBUSY; + goto out_unlock; + } /* whatever, but channel contexts should not complain about that one */ sdata->smps_mode = IEEE80211_SMPS_OFF; sdata->needed_rx_chains = local->rx_chains; sdata->radar_required = true; - mutex_lock(&local->iflist_mtx); err = ieee80211_vif_use_channel(sdata, chandef, IEEE80211_CHANCTX_SHARED); - mutex_unlock(&local->iflist_mtx); if (err) - return err; + goto out_unlock; timeout = msecs_to_jiffies(IEEE80211_DFS_MIN_CAC_TIME_MS); ieee80211_queue_delayed_work(&sdata->local->hw, &sdata->dfs_cac_timer_work, timeout); - return 0; + out_unlock: + mutex_unlock(&local->mtx); + return err; } static struct cfg80211_beacon_data * @@ -2963,27 +2996,35 @@ void ieee80211_csa_finalize_work(struct work_struct *work) struct ieee80211_local *local = sdata->local; int err, changed = 0; + sdata_lock(sdata); + /* AP might have been stopped while waiting for the lock. */ + if (!sdata->vif.csa_active) + goto unlock; + if (!ieee80211_sdata_running(sdata)) - return; + goto unlock; sdata->radar_required = sdata->csa_radar_required; - err = ieee80211_vif_change_channel(sdata, &local->csa_chandef, - &changed); + mutex_lock(&local->mtx); + err = ieee80211_vif_change_channel(sdata, &changed); + mutex_unlock(&local->mtx); if (WARN_ON(err < 0)) - return; + goto unlock; if (!local->use_chanctx) { - local->_oper_chandef = local->csa_chandef; + local->_oper_chandef = sdata->csa_chandef; ieee80211_hw_config(local, 0); } ieee80211_bss_info_change_notify(sdata, changed); + sdata->vif.csa_active = false; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon); if (err < 0) - return; + goto unlock; + changed |= err; kfree(sdata->u.ap.next_beacon); sdata->u.ap.next_beacon = NULL; @@ -2997,24 +3038,26 @@ void ieee80211_csa_finalize_work(struct work_struct *work) case NL80211_IFTYPE_MESH_POINT: err = ieee80211_mesh_finish_csa(sdata); if (err < 0) - return; + goto unlock; break; #endif default: WARN_ON(1); - return; + goto unlock; } - sdata->vif.csa_active = false; ieee80211_wake_queues_by_reason(&sdata->local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); - cfg80211_ch_switch_notify(sdata->dev, &local->csa_chandef); + cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef); + +unlock: + sdata_unlock(sdata); } -static int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_csa_settings *params) +int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_csa_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -3023,6 +3066,8 @@ static int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_if_mesh __maybe_unused *ifmsh; int err, num_chanctx; + lockdep_assert_held(&sdata->wdev.mtx); + if (!list_empty(&local->roc_list) || local->scanning) return -EBUSY; @@ -3143,7 +3188,7 @@ static int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); - local->csa_chandef = params->chandef; + sdata->csa_chandef = params->chandef; sdata->vif.csa_active = true; ieee80211_bss_info_change_notify(sdata, err); @@ -3153,26 +3198,25 @@ static int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, - struct ieee80211_channel *chan, bool offchan, - unsigned int wait, const u8 *buf, size_t len, - bool no_cck, bool dont_wait_for_ack, u64 *cookie) + struct cfg80211_mgmt_tx_params *params, + u64 *cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct sta_info *sta; - const struct ieee80211_mgmt *mgmt = (void *)buf; + const struct ieee80211_mgmt *mgmt = (void *)params->buf; bool need_offchan = false; u32 flags; int ret; - if (dont_wait_for_ack) + if (params->dont_wait_for_ack) flags = IEEE80211_TX_CTL_NO_ACK; else flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX | IEEE80211_TX_CTL_REQ_TX_STATUS; - if (no_cck) + if (params->no_cck) flags |= IEEE80211_TX_CTL_NO_CCK_RATE; switch (sdata->vif.type) { @@ -3220,7 +3264,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, /* configurations requiring offchan cannot work if no channel has been * specified */ - if (need_offchan && !chan) + if (need_offchan && !params->chan) return -EINVAL; mutex_lock(&local->mtx); @@ -3233,8 +3277,10 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); if (chanctx_conf) { - need_offchan = chan && (chan != chanctx_conf->def.chan); - } else if (!chan) { + need_offchan = params->chan && + (params->chan != + chanctx_conf->def.chan); + } else if (!params->chan) { ret = -EINVAL; rcu_read_unlock(); goto out_unlock; @@ -3244,19 +3290,19 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, rcu_read_unlock(); } - if (need_offchan && !offchan) { + if (need_offchan && !params->offchan) { ret = -EBUSY; goto out_unlock; } - skb = dev_alloc_skb(local->hw.extra_tx_headroom + len); + skb = dev_alloc_skb(local->hw.extra_tx_headroom + params->len); if (!skb) { ret = -ENOMEM; goto out_unlock; } skb_reserve(skb, local->hw.extra_tx_headroom); - memcpy(skb_put(skb, len), buf, len); + memcpy(skb_put(skb, params->len), params->buf, params->len); IEEE80211_SKB_CB(skb)->flags = flags; @@ -3276,8 +3322,8 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, local->hw.offchannel_tx_hw_queue; /* This will handle all kinds of coalescing and immediate TX */ - ret = ieee80211_start_roc_work(local, sdata, chan, - wait, cookie, skb, + ret = ieee80211_start_roc_work(local, sdata, params->chan, + params->wait, cookie, skb, IEEE80211_ROC_TYPE_MGMT_TX); if (ret) kfree_skb(skb); @@ -3792,6 +3838,31 @@ static void ieee80211_set_wakeup(struct wiphy *wiphy, bool enabled) } #endif +static int ieee80211_set_qos_map(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_qos_map *qos_map) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct mac80211_qos_map *new_qos_map, *old_qos_map; + + if (qos_map) { + new_qos_map = kzalloc(sizeof(*new_qos_map), GFP_KERNEL); + if (!new_qos_map) + return -ENOMEM; + memcpy(&new_qos_map->qos_map, qos_map, sizeof(*qos_map)); + } else { + /* A NULL qos_map was passed to disable QoS mapping */ + new_qos_map = NULL; + } + + old_qos_map = sdata_dereference(sdata->qos_map, sdata); + rcu_assign_pointer(sdata->qos_map, new_qos_map); + if (old_qos_map) + kfree_rcu(old_qos_map, rcu_head); + + return 0; +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -3871,4 +3942,5 @@ struct cfg80211_ops mac80211_config_ops = { .get_channel = ieee80211_cfg_get_channel, .start_radar_detection = ieee80211_start_radar_detection, .channel_switch = ieee80211_channel_switch, + .set_qos_map = ieee80211_set_qos_map, }; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 03ba6b5c537..f43613a97dd 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -9,6 +9,140 @@ #include "ieee80211_i.h" #include "driver-ops.h" +static enum nl80211_chan_width ieee80211_get_sta_bw(struct ieee80211_sta *sta) +{ + switch (sta->bandwidth) { + case IEEE80211_STA_RX_BW_20: + if (sta->ht_cap.ht_supported) + return NL80211_CHAN_WIDTH_20; + else + return NL80211_CHAN_WIDTH_20_NOHT; + case IEEE80211_STA_RX_BW_40: + return NL80211_CHAN_WIDTH_40; + case IEEE80211_STA_RX_BW_80: + return NL80211_CHAN_WIDTH_80; + case IEEE80211_STA_RX_BW_160: + /* + * This applied for both 160 and 80+80. since we use + * the returned value to consider degradation of + * ctx->conf.min_def, we have to make sure to take + * the bigger one (NL80211_CHAN_WIDTH_160). + * Otherwise we might try degrading even when not + * needed, as the max required sta_bw returned (80+80) + * might be smaller than the configured bw (160). + */ + return NL80211_CHAN_WIDTH_160; + default: + WARN_ON(1); + return NL80211_CHAN_WIDTH_20; + } +} + +static enum nl80211_chan_width +ieee80211_get_max_required_bw(struct ieee80211_sub_if_data *sdata) +{ + enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT; + struct sta_info *sta; + + rcu_read_lock(); + list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) { + if (sdata != sta->sdata && + !(sta->sdata->bss && sta->sdata->bss == sdata->bss)) + continue; + + if (!sta->uploaded) + continue; + + max_bw = max(max_bw, ieee80211_get_sta_bw(&sta->sta)); + } + rcu_read_unlock(); + + return max_bw; +} + +static enum nl80211_chan_width +ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, + struct ieee80211_chanctx_conf *conf) +{ + struct ieee80211_sub_if_data *sdata; + enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT; + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + struct ieee80211_vif *vif = &sdata->vif; + enum nl80211_chan_width width = NL80211_CHAN_WIDTH_20_NOHT; + + if (!ieee80211_sdata_running(sdata)) + continue; + + if (rcu_access_pointer(sdata->vif.chanctx_conf) != conf) + continue; + + switch (vif->type) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + width = ieee80211_get_max_required_bw(sdata); + break; + case NL80211_IFTYPE_P2P_DEVICE: + continue; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_MESH_POINT: + width = vif->bss_conf.chandef.width; + break; + case NL80211_IFTYPE_UNSPECIFIED: + case NUM_NL80211_IFTYPES: + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_P2P_GO: + WARN_ON_ONCE(1); + } + max_bw = max(max_bw, width); + } + rcu_read_unlock(); + + return max_bw; +} + +/* + * recalc the min required chan width of the channel context, which is + * the max of min required widths of all the interfaces bound to this + * channel context. + */ +void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) +{ + enum nl80211_chan_width max_bw; + struct cfg80211_chan_def min_def; + + lockdep_assert_held(&local->chanctx_mtx); + + /* don't optimize 5MHz, 10MHz, and radar_enabled confs */ + if (ctx->conf.def.width == NL80211_CHAN_WIDTH_5 || + ctx->conf.def.width == NL80211_CHAN_WIDTH_10 || + ctx->conf.radar_enabled) { + ctx->conf.min_def = ctx->conf.def; + return; + } + + max_bw = ieee80211_get_chanctx_max_required_bw(local, &ctx->conf); + + /* downgrade chandef up to max_bw */ + min_def = ctx->conf.def; + while (min_def.width > max_bw) + ieee80211_chandef_downgrade(&min_def); + + if (cfg80211_chandef_identical(&ctx->conf.min_def, &min_def)) + return; + + ctx->conf.min_def = min_def; + if (!ctx->driver_present) + return; + + drv_change_chanctx(local, ctx, IEEE80211_CHANCTX_CHANGE_MIN_WIDTH); +} + static void ieee80211_change_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, const struct cfg80211_chan_def *chandef) @@ -20,6 +154,7 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local, ctx->conf.def = *chandef; drv_change_chanctx(local, ctx, IEEE80211_CHANCTX_CHANGE_WIDTH); + ieee80211_recalc_chanctx_min_def(local, ctx); if (!local->use_chanctx) { local->_oper_chandef = *chandef; @@ -93,11 +228,12 @@ ieee80211_new_chanctx(struct ieee80211_local *local, ctx->conf.rx_chains_dynamic = 1; ctx->mode = mode; ctx->conf.radar_enabled = ieee80211_is_radar_required(local); + ieee80211_recalc_chanctx_min_def(local, ctx); if (!local->use_chanctx) local->hw.conf.radar_enabled = ctx->conf.radar_enabled; - /* acquire mutex to prevent idle from changing */ - mutex_lock(&local->mtx); + /* we hold the mutex to prevent idle from changing */ + lockdep_assert_held(&local->mtx); /* turn idle off *before* setting channel -- some drivers need that */ changed = ieee80211_idle_off(local); if (changed) @@ -110,19 +246,14 @@ ieee80211_new_chanctx(struct ieee80211_local *local, err = drv_add_chanctx(local, ctx); if (err) { kfree(ctx); - ctx = ERR_PTR(err); - ieee80211_recalc_idle(local); - goto out; + return ERR_PTR(err); } } /* and keep the mutex held until the new chanctx is on the list */ list_add_rcu(&ctx->list, &local->chanctx_list); - out: - mutex_unlock(&local->mtx); - return ctx; } @@ -158,9 +289,7 @@ static void ieee80211_free_chanctx(struct ieee80211_local *local, /* throw a warning if this wasn't the only channel context. */ WARN_ON(check_single_channel && !list_empty(&local->chanctx_list)); - mutex_lock(&local->mtx); ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); } static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata, @@ -179,6 +308,7 @@ static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata, ctx->refcount++; ieee80211_recalc_txpower(sdata); + ieee80211_recalc_chanctx_min_def(local, ctx); sdata->vif.bss_conf.idle = false; if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && @@ -221,6 +351,31 @@ static void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, ieee80211_change_chanctx(local, ctx, compat); } +static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, + struct ieee80211_chanctx *chanctx) +{ + bool radar_enabled; + + lockdep_assert_held(&local->chanctx_mtx); + /* for setting local->radar_detect_enabled */ + lockdep_assert_held(&local->mtx); + + radar_enabled = ieee80211_is_radar_required(local); + + if (radar_enabled == chanctx->conf.radar_enabled) + return; + + chanctx->conf.radar_enabled = radar_enabled; + local->radar_detect_enabled = chanctx->conf.radar_enabled; + + if (!local->use_chanctx) { + local->hw.conf.radar_enabled = chanctx->conf.radar_enabled; + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); + } + + drv_change_chanctx(local, chanctx, IEEE80211_CHANCTX_CHANGE_RADAR); +} + static void ieee80211_unassign_vif_chanctx(struct ieee80211_sub_if_data *sdata, struct ieee80211_chanctx *ctx) { @@ -243,6 +398,7 @@ static void ieee80211_unassign_vif_chanctx(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_chanctx_chantype(sdata->local, ctx); ieee80211_recalc_smps_chanctx(local, ctx); ieee80211_recalc_radar_chanctx(local, ctx); + ieee80211_recalc_chanctx_min_def(local, ctx); } } @@ -266,29 +422,6 @@ static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata) ieee80211_free_chanctx(local, ctx); } -void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, - struct ieee80211_chanctx *chanctx) -{ - bool radar_enabled; - - lockdep_assert_held(&local->chanctx_mtx); - - radar_enabled = ieee80211_is_radar_required(local); - - if (radar_enabled == chanctx->conf.radar_enabled) - return; - - chanctx->conf.radar_enabled = radar_enabled; - local->radar_detect_enabled = chanctx->conf.radar_enabled; - - if (!local->use_chanctx) { - local->hw.conf.radar_enabled = chanctx->conf.radar_enabled; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); - } - - drv_change_chanctx(local, chanctx, IEEE80211_CHANCTX_CHANGE_RADAR); -} - void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *chanctx) { @@ -380,6 +513,8 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, struct ieee80211_chanctx *ctx; int ret; + lockdep_assert_held(&local->mtx); + WARN_ON(sdata->dev && netif_carrier_ok(sdata->dev)); mutex_lock(&local->chanctx_mtx); @@ -411,15 +546,17 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, } int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata, - const struct cfg80211_chan_def *chandef, u32 *changed) { struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *ctx; + const struct cfg80211_chan_def *chandef = &sdata->csa_chandef; int ret; u32 chanctx_changed = 0; + lockdep_assert_held(&local->mtx); + /* should never be called if not performing a channel switch. */ if (WARN_ON(!sdata->vif.csa_active)) return -EINVAL; @@ -456,6 +593,7 @@ int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_chanctx_chantype(local, ctx); ieee80211_recalc_smps_chanctx(local, ctx); ieee80211_recalc_radar_chanctx(local, ctx); + ieee80211_recalc_chanctx_min_def(local, ctx); ret = 0; out: @@ -516,6 +654,8 @@ void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata) { WARN_ON(sdata->dev && netif_carrier_ok(sdata->dev)); + lockdep_assert_held(&sdata->local->mtx); + mutex_lock(&sdata->local->chanctx_mtx); __ieee80211_vif_release_channel(sdata); mutex_unlock(&sdata->local->chanctx_mtx); diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 5c090e41d9b..fa16e54980a 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -17,6 +17,172 @@ #define DEBUGFS_FORMAT_BUFFER_SIZE 100 +#define TX_LATENCY_BIN_DELIMTER_C ',' +#define TX_LATENCY_BIN_DELIMTER_S "," +#define TX_LATENCY_BINS_DISABLED "enable(bins disabled)\n" +#define TX_LATENCY_DISABLED "disable\n" + + +/* + * Display if Tx latency statistics & bins are enabled/disabled + */ +static ssize_t sta_tx_latency_stat_read(struct file *file, + char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct ieee80211_local *local = file->private_data; + struct ieee80211_tx_latency_bin_ranges *tx_latency; + char *buf; + int bufsz, i, ret; + int pos = 0; + + rcu_read_lock(); + + tx_latency = rcu_dereference(local->tx_latency); + + if (tx_latency && tx_latency->n_ranges) { + bufsz = tx_latency->n_ranges * 15; + buf = kzalloc(bufsz, GFP_ATOMIC); + if (!buf) + goto err; + + for (i = 0; i < tx_latency->n_ranges; i++) + pos += scnprintf(buf + pos, bufsz - pos, "%d,", + tx_latency->ranges[i]); + pos += scnprintf(buf + pos, bufsz - pos, "\n"); + } else if (tx_latency) { + bufsz = sizeof(TX_LATENCY_BINS_DISABLED) + 1; + buf = kzalloc(bufsz, GFP_ATOMIC); + if (!buf) + goto err; + + pos += scnprintf(buf + pos, bufsz - pos, "%s\n", + TX_LATENCY_BINS_DISABLED); + } else { + bufsz = sizeof(TX_LATENCY_DISABLED) + 1; + buf = kzalloc(bufsz, GFP_ATOMIC); + if (!buf) + goto err; + + pos += scnprintf(buf + pos, bufsz - pos, "%s\n", + TX_LATENCY_DISABLED); + } + + rcu_read_unlock(); + + ret = simple_read_from_buffer(userbuf, count, ppos, buf, pos); + kfree(buf); + + return ret; +err: + rcu_read_unlock(); + return -ENOMEM; +} + +/* + * Receive input from user regarding Tx latency statistics + * The input should indicate if Tx latency statistics and bins are + * enabled/disabled. + * If bins are enabled input should indicate the amount of different bins and + * their ranges. Each bin will count how many Tx frames transmitted within the + * appropriate latency. + * Legal input is: + * a) "enable(bins disabled)" - to enable only general statistics + * b) "a,b,c,d,...z" - to enable general statistics and bins, where all are + * numbers and a < b < c < d.. < z + * c) "disable" - disable all statistics + * NOTE: must configure Tx latency statistics bins before stations connected. + */ + +static ssize_t sta_tx_latency_stat_write(struct file *file, + const char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct ieee80211_local *local = file->private_data; + char buf[128] = {}; + char *bins = buf; + char *token; + int buf_size, i, alloc_size; + int prev_bin = 0; + int n_ranges = 0; + int ret = count; + struct ieee80211_tx_latency_bin_ranges *tx_latency; + + if (sizeof(buf) <= count) + return -EINVAL; + buf_size = count; + if (copy_from_user(buf, userbuf, buf_size)) + return -EFAULT; + + mutex_lock(&local->sta_mtx); + + /* cannot change config once we have stations */ + if (local->num_sta) + goto unlock; + + tx_latency = + rcu_dereference_protected(local->tx_latency, + lockdep_is_held(&local->sta_mtx)); + + /* disable Tx statistics */ + if (!strcmp(buf, TX_LATENCY_DISABLED)) { + if (!tx_latency) + goto unlock; + rcu_assign_pointer(local->tx_latency, NULL); + synchronize_rcu(); + kfree(tx_latency); + goto unlock; + } + + /* Tx latency already enabled */ + if (tx_latency) + goto unlock; + + if (strcmp(TX_LATENCY_BINS_DISABLED, buf)) { + /* check how many bins and between what ranges user requested */ + token = buf; + while (*token != '\0') { + if (*token == TX_LATENCY_BIN_DELIMTER_C) + n_ranges++; + token++; + } + n_ranges++; + } + + alloc_size = sizeof(struct ieee80211_tx_latency_bin_ranges) + + n_ranges * sizeof(u32); + tx_latency = kzalloc(alloc_size, GFP_ATOMIC); + if (!tx_latency) { + ret = -ENOMEM; + goto unlock; + } + tx_latency->n_ranges = n_ranges; + for (i = 0; i < n_ranges; i++) { /* setting bin ranges */ + token = strsep(&bins, TX_LATENCY_BIN_DELIMTER_S); + sscanf(token, "%d", &tx_latency->ranges[i]); + /* bins values should be in ascending order */ + if (prev_bin >= tx_latency->ranges[i]) { + ret = -EINVAL; + kfree(tx_latency); + goto unlock; + } + prev_bin = tx_latency->ranges[i]; + } + rcu_assign_pointer(local->tx_latency, tx_latency); + +unlock: + mutex_unlock(&local->sta_mtx); + + return ret; +} + +static const struct file_operations stats_tx_latency_ops = { + .write = sta_tx_latency_stat_write, + .read = sta_tx_latency_stat_read, + .open = simple_open, + .llseek = generic_file_llseek, +}; + int mac80211_format_buffer(char __user *userbuf, size_t count, loff_t *ppos, char *fmt, ...) { @@ -315,4 +481,6 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_DEVSTATS_ADD(dot11RTSFailureCount); DEBUGFS_DEVSTATS_ADD(dot11FCSErrorCount); DEBUGFS_DEVSTATS_ADD(dot11RTSSuccessCount); + + DEBUGFS_DEVSTATS_ADD(tx_latency); } diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 04b5a14c8a0..ebf80f3abd8 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -133,7 +133,15 @@ static ssize_t ieee80211_if_fmt_##name( \ jiffies_to_msecs(sdata->field)); \ } -#define __IEEE80211_IF_FILE(name, _write) \ +#define _IEEE80211_IF_FILE_OPS(name, _read, _write) \ +static const struct file_operations name##_ops = { \ + .read = (_read), \ + .write = (_write), \ + .open = simple_open, \ + .llseek = generic_file_llseek, \ +} + +#define _IEEE80211_IF_FILE_R_FN(name) \ static ssize_t ieee80211_if_read_##name(struct file *file, \ char __user *userbuf, \ size_t count, loff_t *ppos) \ @@ -141,28 +149,34 @@ static ssize_t ieee80211_if_read_##name(struct file *file, \ return ieee80211_if_read(file->private_data, \ userbuf, count, ppos, \ ieee80211_if_fmt_##name); \ -} \ -static const struct file_operations name##_ops = { \ - .read = ieee80211_if_read_##name, \ - .write = (_write), \ - .open = simple_open, \ - .llseek = generic_file_llseek, \ } -#define __IEEE80211_IF_FILE_W(name) \ +#define _IEEE80211_IF_FILE_W_FN(name) \ static ssize_t ieee80211_if_write_##name(struct file *file, \ const char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ return ieee80211_if_write(file->private_data, userbuf, count, \ ppos, ieee80211_if_parse_##name); \ -} \ -__IEEE80211_IF_FILE(name, ieee80211_if_write_##name) +} + +#define IEEE80211_IF_FILE_R(name) \ + _IEEE80211_IF_FILE_R_FN(name) \ + _IEEE80211_IF_FILE_OPS(name, ieee80211_if_read_##name, NULL) + +#define IEEE80211_IF_FILE_W(name) \ + _IEEE80211_IF_FILE_W_FN(name) \ + _IEEE80211_IF_FILE_OPS(name, NULL, ieee80211_if_write_##name) +#define IEEE80211_IF_FILE_RW(name) \ + _IEEE80211_IF_FILE_R_FN(name) \ + _IEEE80211_IF_FILE_W_FN(name) \ + _IEEE80211_IF_FILE_OPS(name, ieee80211_if_read_##name, \ + ieee80211_if_write_##name) #define IEEE80211_IF_FILE(name, field, format) \ - IEEE80211_IF_FMT_##format(name, field) \ - __IEEE80211_IF_FILE(name, NULL) + IEEE80211_IF_FMT_##format(name, field) \ + IEEE80211_IF_FILE_R(name) /* common attributes */ IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC); @@ -199,7 +213,7 @@ ieee80211_if_fmt_hw_queues(const struct ieee80211_sub_if_data *sdata, return len; } -__IEEE80211_IF_FILE(hw_queues, NULL); +IEEE80211_IF_FILE_R(hw_queues); /* STA attributes */ IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC); @@ -275,14 +289,7 @@ static ssize_t ieee80211_if_parse_smps(struct ieee80211_sub_if_data *sdata, return -EINVAL; } - -__IEEE80211_IF_FILE_W(smps); - -static ssize_t ieee80211_if_fmt_tkip_mic_test( - const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) -{ - return -EOPNOTSUPP; -} +IEEE80211_IF_FILE_RW(smps); static ssize_t ieee80211_if_parse_tkip_mic_test( struct ieee80211_sub_if_data *sdata, const char *buf, int buflen) @@ -349,8 +356,7 @@ static ssize_t ieee80211_if_parse_tkip_mic_test( return buflen; } - -__IEEE80211_IF_FILE_W(tkip_mic_test); +IEEE80211_IF_FILE_W(tkip_mic_test); static ssize_t ieee80211_if_fmt_uapsd_queues( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) @@ -378,7 +384,7 @@ static ssize_t ieee80211_if_parse_uapsd_queues( return buflen; } -__IEEE80211_IF_FILE_W(uapsd_queues); +IEEE80211_IF_FILE_RW(uapsd_queues); static ssize_t ieee80211_if_fmt_uapsd_max_sp_len( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) @@ -406,7 +412,7 @@ static ssize_t ieee80211_if_parse_uapsd_max_sp_len( return buflen; } -__IEEE80211_IF_FILE_W(uapsd_max_sp_len); +IEEE80211_IF_FILE_RW(uapsd_max_sp_len); /* AP attributes */ IEEE80211_IF_FILE(num_mcast_sta, u.ap.num_mcast_sta, ATOMIC); @@ -419,7 +425,7 @@ static ssize_t ieee80211_if_fmt_num_buffered_multicast( return scnprintf(buf, buflen, "%u\n", skb_queue_len(&sdata->u.ap.ps.bc_buf)); } -__IEEE80211_IF_FILE(num_buffered_multicast, NULL); +IEEE80211_IF_FILE_R(num_buffered_multicast); /* IBSS attributes */ static ssize_t ieee80211_if_fmt_tsf( @@ -468,9 +474,10 @@ static ssize_t ieee80211_if_parse_tsf( } } + ieee80211_recalc_dtim(local, sdata); return buflen; } -__IEEE80211_IF_FILE_W(tsf); +IEEE80211_IF_FILE_RW(tsf); /* WDS attributes */ diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 19c54a44ed4..80194b557a0 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -38,6 +38,13 @@ static const struct file_operations sta_ ##name## _ops = { \ .llseek = generic_file_llseek, \ } +#define STA_OPS_W(name) \ +static const struct file_operations sta_ ##name## _ops = { \ + .write = sta_##name##_write, \ + .open = simple_open, \ + .llseek = generic_file_llseek, \ +} + #define STA_OPS_RW(name) \ static const struct file_operations sta_ ##name## _ops = { \ .read = sta_##name##_read, \ @@ -388,6 +395,131 @@ static ssize_t sta_last_rx_rate_read(struct file *file, char __user *userbuf, } STA_OPS(last_rx_rate); +static int +sta_tx_latency_stat_header(struct ieee80211_tx_latency_bin_ranges *tx_latency, + char *buf, int pos, int bufsz) +{ + int i; + int range_count = tx_latency->n_ranges; + u32 *bin_ranges = tx_latency->ranges; + + pos += scnprintf(buf + pos, bufsz - pos, + "Station\t\t\tTID\tMax\tAvg"); + if (range_count) { + pos += scnprintf(buf + pos, bufsz - pos, + "\t<=%d", bin_ranges[0]); + for (i = 0; i < range_count - 1; i++) + pos += scnprintf(buf + pos, bufsz - pos, "\t%d-%d", + bin_ranges[i], bin_ranges[i+1]); + pos += scnprintf(buf + pos, bufsz - pos, + "\t%d<", bin_ranges[range_count - 1]); + } + + pos += scnprintf(buf + pos, bufsz - pos, "\n"); + + return pos; +} + +static int +sta_tx_latency_stat_table(struct ieee80211_tx_latency_bin_ranges *tx_lat_range, + struct ieee80211_tx_latency_stat *tx_lat, + char *buf, int pos, int bufsz, int tid) +{ + u32 avg = 0; + int j; + int bin_count = tx_lat->bin_count; + + pos += scnprintf(buf + pos, bufsz - pos, "\t\t\t%d", tid); + /* make sure you don't divide in 0 */ + if (tx_lat->counter) + avg = tx_lat->sum / tx_lat->counter; + + pos += scnprintf(buf + pos, bufsz - pos, "\t%d\t%d", + tx_lat->max, avg); + + if (tx_lat_range->n_ranges && tx_lat->bins) + for (j = 0; j < bin_count; j++) + pos += scnprintf(buf + pos, bufsz - pos, + "\t%d", tx_lat->bins[j]); + pos += scnprintf(buf + pos, bufsz - pos, "\n"); + + return pos; +} + +/* + * Output Tx latency statistics station && restart all statistics information + */ +static ssize_t sta_tx_latency_stat_read(struct file *file, + char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct sta_info *sta = file->private_data; + struct ieee80211_local *local = sta->local; + struct ieee80211_tx_latency_bin_ranges *tx_latency; + char *buf; + int bufsz, ret, i; + int pos = 0; + + bufsz = 20 * IEEE80211_NUM_TIDS * + sizeof(struct ieee80211_tx_latency_stat); + buf = kzalloc(bufsz, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + rcu_read_lock(); + + tx_latency = rcu_dereference(local->tx_latency); + + if (!sta->tx_lat) { + pos += scnprintf(buf + pos, bufsz - pos, + "Tx latency statistics are not enabled\n"); + goto unlock; + } + + pos = sta_tx_latency_stat_header(tx_latency, buf, pos, bufsz); + + pos += scnprintf(buf + pos, bufsz - pos, "%pM\n", sta->sta.addr); + for (i = 0; i < IEEE80211_NUM_TIDS; i++) + pos = sta_tx_latency_stat_table(tx_latency, &sta->tx_lat[i], + buf, pos, bufsz, i); +unlock: + rcu_read_unlock(); + + ret = simple_read_from_buffer(userbuf, count, ppos, buf, pos); + kfree(buf); + + return ret; +} +STA_OPS(tx_latency_stat); + +static ssize_t sta_tx_latency_stat_reset_write(struct file *file, + const char __user *userbuf, + size_t count, loff_t *ppos) +{ + u32 *bins; + int bin_count; + struct sta_info *sta = file->private_data; + int i; + + if (!sta->tx_lat) + return -EINVAL; + + for (i = 0; i < IEEE80211_NUM_TIDS; i++) { + bins = sta->tx_lat[i].bins; + bin_count = sta->tx_lat[i].bin_count; + + sta->tx_lat[i].max = 0; + sta->tx_lat[i].sum = 0; + sta->tx_lat[i].counter = 0; + + if (bin_count) + memset(bins, 0, bin_count * sizeof(u32)); + } + + return count; +} +STA_OPS_W(tx_latency_stat_reset); + #define DEBUGFS_ADD(name) \ debugfs_create_file(#name, 0400, \ sta->debugfs.dir, sta, &sta_ ##name## _ops); @@ -441,6 +573,8 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD(last_ack_signal); DEBUGFS_ADD(current_tx_rate); DEBUGFS_ADD(last_rx_rate); + DEBUGFS_ADD(tx_latency_stat); + DEBUGFS_ADD(tx_latency_stat_reset); DEBUGFS_ADD_COUNTER(rx_packets, rx_packets); DEBUGFS_ADD_COUNTER(tx_packets, tx_packets); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 5d03c47c0a4..ef8b385eff0 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -242,22 +242,6 @@ static inline u64 drv_prepare_multicast(struct ieee80211_local *local, return ret; } -static inline void drv_set_multicast_list(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct netdev_hw_addr_list *mc_list) -{ - bool allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI; - - trace_drv_set_multicast_list(local, sdata, mc_list->count); - - check_sdata_in_driver(sdata); - - if (local->ops->set_multicast_list) - local->ops->set_multicast_list(&local->hw, &sdata->vif, - allmulti, mc_list); - trace_drv_return_void(local); -} - static inline void drv_configure_filter(struct ieee80211_local *local, unsigned int changed_flags, unsigned int *total_flags, @@ -550,6 +534,22 @@ static inline void drv_sta_remove_debugfs(struct ieee80211_local *local, } #endif +static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct sta_info *sta) +{ + might_sleep(); + + sdata = get_bss_sdata(sdata); + check_sdata_in_driver(sdata); + + trace_drv_sta_pre_rcu_remove(local, sdata, &sta->sta); + if (local->ops->sta_pre_rcu_remove) + local->ops->sta_pre_rcu_remove(&local->hw, &sdata->vif, + &sta->sta); + trace_drv_return_void(local); +} + static inline __must_check int drv_sta_state(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 9a8be8f6922..fab7b91923e 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -479,10 +479,9 @@ void ieee80211_request_smps(struct ieee80211_vif *vif, vif->type != NL80211_IFTYPE_AP)) return; - if (WARN_ON(smps_mode == IEEE80211_SMPS_OFF)) - smps_mode = IEEE80211_SMPS_AUTOMATIC; - if (vif->type == NL80211_IFTYPE_STATION) { + if (WARN_ON(smps_mode == IEEE80211_SMPS_OFF)) + smps_mode = IEEE80211_SMPS_AUTOMATIC; if (sdata->u.mgd.driver_smps_mode == smps_mode) return; sdata->u.mgd.driver_smps_mode = smps_mode; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 27a39de8967..771080ec721 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -293,14 +293,17 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, radar_required = true; } + mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); if (ieee80211_vif_use_channel(sdata, &chandef, ifibss->fixed_channel ? IEEE80211_CHANCTX_SHARED : IEEE80211_CHANCTX_EXCLUSIVE)) { sdata_info(sdata, "Failed to join IBSS, no channel context\n"); + mutex_unlock(&local->mtx); return; } + mutex_unlock(&local->mtx); memcpy(ifibss->bssid, bssid, ETH_ALEN); @@ -363,7 +366,9 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.ssid_len = 0; RCU_INIT_POINTER(ifibss->presp, NULL); kfree_rcu(presp, rcu_head); + mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); + mutex_unlock(&local->mtx); sdata_info(sdata, "Failed to join IBSS, driver failure: %d\n", err); return; @@ -522,7 +527,7 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata, if (csa_settings) ieee80211_send_action_csa(sdata, csa_settings); - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); + return BSS_CHANGED_BEACON; out: return ret; } @@ -534,7 +539,8 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata) int err; u16 capability; - sdata_lock(sdata); + sdata_assert_lock(sdata); + /* update cfg80211 bss information with the new channel */ if (!is_zero_ether_addr(ifibss->bssid)) { capability = WLAN_CAPABILITY_IBSS; @@ -550,19 +556,21 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata) capability); /* XXX: should not really modify cfg80211 data */ if (cbss) { - cbss->channel = sdata->local->csa_chandef.chan; + cbss->channel = sdata->csa_chandef.chan; cfg80211_put_bss(sdata->local->hw.wiphy, cbss); } } - ifibss->chandef = sdata->local->csa_chandef; + ifibss->chandef = sdata->csa_chandef; /* generate the beacon */ err = ieee80211_ibss_csa_beacon(sdata, NULL); - sdata_unlock(sdata); if (err < 0) return err; + if (err) + ieee80211_bss_info_change_notify(sdata, err); + return 0; } @@ -744,7 +752,9 @@ static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_IBSS); drv_leave_ibss(local, sdata); + mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); + mutex_unlock(&local->mtx); } static void ieee80211_csa_connection_drop_work(struct work_struct *work) @@ -753,12 +763,16 @@ static void ieee80211_csa_connection_drop_work(struct work_struct *work) container_of(work, struct ieee80211_sub_if_data, u.ibss.csa_connection_drop_work); + sdata_lock(sdata); + ieee80211_ibss_disconnect(sdata); synchronize_rcu(); skb_queue_purge(&sdata->skb_queue); /* trigger a scan to find another IBSS network to join */ ieee80211_queue_work(&sdata->local->hw, &sdata->work); + + sdata_unlock(sdata); } static void ieee80211_ibss_csa_mark_radar(struct ieee80211_sub_if_data *sdata) @@ -784,18 +798,10 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, struct cfg80211_csa_settings params; struct ieee80211_csa_ie csa_ie; struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - struct ieee80211_chanctx_conf *chanctx_conf; - struct ieee80211_chanctx *chanctx; enum nl80211_channel_type ch_type; - int err, num_chanctx; + int err; u32 sta_flags; - if (sdata->vif.csa_active) - return true; - - if (!sdata->vif.bss_conf.ibss_joined) - return false; - sta_flags = IEEE80211_STA_DISABLE_VHT; switch (ifibss->chandef.width) { case NL80211_CHAN_WIDTH_5: @@ -830,9 +836,6 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, params.count = csa_ie.count; params.chandef = csa_ie.chandef; - if (ifibss->chandef.chan->band != params.chandef.chan->band) - goto disconnect; - switch (ifibss->chandef.width) { case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_20: @@ -888,28 +891,12 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, params.radar_required = true; } - rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (!chanctx_conf) { - rcu_read_unlock(); - goto disconnect; - } - - /* don't handle for multi-VIF cases */ - chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf); - if (chanctx->refcount > 1) { - rcu_read_unlock(); - goto disconnect; - } - num_chanctx = 0; - list_for_each_entry_rcu(chanctx, &sdata->local->chanctx_list, list) - num_chanctx++; - - if (num_chanctx > 1) { - rcu_read_unlock(); - goto disconnect; + if (cfg80211_chandef_identical(¶ms.chandef, + &sdata->vif.bss_conf.chandef)) { + ibss_dbg(sdata, + "received csa with an identical chandef, ignoring\n"); + return true; } - rcu_read_unlock(); /* all checks done, now perform the channel switch. */ ibss_dbg(sdata, @@ -918,19 +905,9 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, params.block_tx = !!csa_ie.mode; - ieee80211_ibss_csa_beacon(sdata, ¶ms); - sdata->csa_radar_required = params.radar_required; - - if (params.block_tx) - ieee80211_stop_queues_by_reason(&sdata->local->hw, - IEEE80211_MAX_QUEUE_MAP, - IEEE80211_QUEUE_STOP_REASON_CSA); - - sdata->local->csa_chandef = params.chandef; - sdata->vif.csa_active = true; - - ieee80211_bss_info_change_notify(sdata, err); - drv_channel_switch_beacon(sdata, ¶ms.chandef); + if (ieee80211_channel_switch(sdata->local->hw.wiphy, sdata->dev, + ¶ms)) + goto disconnect; ieee80211_ibss_csa_mark_radar(sdata); @@ -966,7 +943,8 @@ ieee80211_rx_mgmt_spectrum_mgmt(struct ieee80211_sub_if_data *sdata, if (len < required_len) return; - ieee80211_ibss_process_chanswitch(sdata, elems, false); + if (!sdata->vif.csa_active) + ieee80211_ibss_process_chanswitch(sdata, elems, false); } static void ieee80211_rx_mgmt_deauth_ibss(struct ieee80211_sub_if_data *sdata, @@ -1147,7 +1125,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, goto put_bss; /* process channel switch */ - if (ieee80211_ibss_process_chanswitch(sdata, elems, true)) + if (sdata->vif.csa_active || + ieee80211_ibss_process_chanswitch(sdata, elems, true)) goto put_bss; /* same BSSID */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 4aea4e79111..3701930c664 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -232,6 +232,7 @@ struct ieee80211_rx_data { struct beacon_data { u8 *head, *tail; int head_len, tail_len; + struct ieee80211_meshconf_ie *meshconf; struct rcu_head rcu_head; }; @@ -245,7 +246,8 @@ struct ps_data { /* yes, this looks ugly, but guarantees that we can later use * bitmap_empty :) * NB: don't touch this bitmap, use sta_info_{set,clear}_tim_bit */ - u8 tim[sizeof(unsigned long) * BITS_TO_LONGS(IEEE80211_MAX_AID + 1)]; + u8 tim[sizeof(unsigned long) * BITS_TO_LONGS(IEEE80211_MAX_AID + 1)] + __aligned(__alignof__(unsigned long)); struct sk_buff_head bc_buf; atomic_t num_sta_ps; /* number of stations in PS mode */ int dtim_count; @@ -540,7 +542,10 @@ struct ieee80211_mesh_sync_ops { struct ieee80211_mgmt *mgmt, struct ieee802_11_elems *elems, struct ieee80211_rx_status *rx_status); - void (*adjust_tbtt)(struct ieee80211_sub_if_data *sdata); + + /* should be called with beacon_data under RCU read lock */ + void (*adjust_tbtt)(struct ieee80211_sub_if_data *sdata, + struct beacon_data *beacon); /* add other framework functions here */ }; @@ -614,6 +619,9 @@ struct ieee80211_if_mesh { bool chsw_init; u8 chsw_ttl; u16 pre_value; + + /* offset from skb->data while building IE */ + int meshconf_offset; }; #ifdef CONFIG_MAC80211_MESH @@ -686,6 +694,11 @@ struct ieee80211_chanctx { struct ieee80211_chanctx_conf conf; }; +struct mac80211_qos_map { + struct cfg80211_qos_map qos_map; + struct rcu_head rcu_head; +}; + struct ieee80211_sub_if_data { struct list_head list; @@ -728,13 +741,16 @@ struct ieee80211_sub_if_data { u16 sequence_number; __be16 control_port_protocol; bool control_port_no_encrypt; + int encrypt_headroom; struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS]; + struct mac80211_qos_map __rcu *qos_map; struct work_struct csa_finalize_work; int csa_counter_offset_beacon; int csa_counter_offset_presp; bool csa_radar_required; + struct cfg80211_chan_def csa_chandef; /* used to reconfigure hardware SM PS */ struct work_struct recalc_smps; @@ -774,10 +790,6 @@ struct ieee80211_sub_if_data { u32 mntr_flags; } u; - spinlock_t cleanup_stations_lock; - struct list_head cleanup_stations; - struct work_struct cleanup_stations_wk; - #ifdef CONFIG_MAC80211_DEBUGFS struct { struct dentry *subdir_stations; @@ -811,6 +823,9 @@ static inline void sdata_unlock(struct ieee80211_sub_if_data *sdata) __release(&sdata->wdev.mtx); } +#define sdata_dereference(p, sdata) \ + rcu_dereference_protected(p, lockdep_is_held(&sdata->wdev.mtx)) + static inline void sdata_assert_lock(struct ieee80211_sub_if_data *sdata) { @@ -896,6 +911,24 @@ struct tpt_led_trigger { }; #endif +/* + * struct ieee80211_tx_latency_bin_ranges - Tx latency statistics bins ranges + * + * Measuring Tx latency statistics. Counts how many Tx frames transmitted in a + * certain latency range (in Milliseconds). Each station that uses these + * ranges will have bins to count the amount of frames received in that range. + * The user can configure the ranges via debugfs. + * If ranges is NULL then Tx latency statistics bins are disabled for all + * stations. + * + * @n_ranges: number of ranges that are taken in account + * @ranges: the ranges that the user requested or NULL if disabled. + */ +struct ieee80211_tx_latency_bin_ranges { + int n_ranges; + u32 ranges[]; +}; + /** * mac80211 scan flags - currently active scan mode * @@ -1048,6 +1081,12 @@ struct ieee80211_local { struct timer_list sta_cleanup; int sta_generation; + /* + * Tx latency statistics parameters for all stations. + * Can enable via debugfs (NULL when disabled). + */ + struct ieee80211_tx_latency_bin_ranges __rcu *tx_latency; + struct sk_buff_head pending[IEEE80211_MAX_QUEUES]; struct tasklet_struct tx_pending_tasklet; @@ -1088,12 +1127,12 @@ struct ieee80211_local { struct work_struct sched_scan_stopped_work; struct ieee80211_sub_if_data __rcu *sched_scan_sdata; + struct cfg80211_sched_scan_request *sched_scan_req; unsigned long leave_oper_channel_time; enum mac80211_scan_state next_scan_state; struct delayed_work scan_work; struct ieee80211_sub_if_data __rcu *scan_sdata; - struct cfg80211_chan_def csa_chandef; /* For backward compatibility only -- do not use */ struct cfg80211_chan_def _oper_chandef; @@ -1397,6 +1436,9 @@ void ieee80211_rx_bss_put(struct ieee80211_local *local, struct ieee80211_bss *bss); /* scheduled scan handling */ +int +__ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, + struct cfg80211_sched_scan_request *req); int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, struct cfg80211_sched_scan_request *req); int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata); @@ -1415,6 +1457,8 @@ void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc); /* channel switch handling */ void ieee80211_csa_finalize_work(struct work_struct *work); +int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_csa_settings *params); /* interface handling */ int ieee80211_iface_init(void); @@ -1437,8 +1481,6 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local); bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata); void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata); -int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, - struct cfg80211_beacon_data *params); static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata) { @@ -1693,6 +1735,7 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, int __ieee80211_request_smps_ap(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps_mode); void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata); +void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata); size_t ieee80211_ie_split(const u8 *ies, size_t ielen, const u8 *ids, int n_ids, size_t offset); @@ -1731,7 +1774,6 @@ ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata, /* NOTE: only use ieee80211_vif_change_channel() for channel switch */ int __must_check ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata, - const struct cfg80211_chan_def *chandef, u32 *changed); void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata); void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata); @@ -1740,8 +1782,8 @@ void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *chanctx); -void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, - struct ieee80211_chanctx *chanctx); +void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx); void ieee80211_dfs_cac_timer(unsigned long data); void ieee80211_dfs_cac_timer_work(struct work_struct *work); @@ -1750,6 +1792,17 @@ void ieee80211_dfs_radar_detected_work(struct work_struct *work); int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, struct cfg80211_csa_settings *csa_settings); +bool ieee80211_cs_valid(const struct ieee80211_cipher_scheme *cs); +bool ieee80211_cs_list_valid(const struct ieee80211_cipher_scheme *cs, int n); +const struct ieee80211_cipher_scheme * +ieee80211_cs_get(struct ieee80211_local *local, u32 cipher, + enum nl80211_iftype iftype); +int ieee80211_cs_headroom(struct ieee80211_local *local, + struct cfg80211_crypto_settings *crypto, + enum nl80211_iftype iftype); +void ieee80211_recalc_dtim(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata); + #ifdef CONFIG_MAC80211_NOINLINE #define debug_noinline noinline #else diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index a0757913046..3dfd20a453a 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -401,6 +401,8 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) snprintf(sdata->name, IFNAMSIZ, "%s-monitor", wiphy_name(local->hw.wiphy)); + sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; + ieee80211_set_default_queues(sdata); ret = drv_add_interface(local, sdata); @@ -416,8 +418,10 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) return ret; } + mutex_lock(&local->mtx); ret = ieee80211_vif_use_channel(sdata, &local->monitor_chandef, IEEE80211_CHANCTX_EXCLUSIVE); + mutex_unlock(&local->mtx); if (ret) { drv_remove_interface(local, sdata); kfree(sdata); @@ -454,7 +458,9 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) synchronize_net(); + mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); + mutex_unlock(&local->mtx); drv_remove_interface(local, sdata); @@ -749,6 +755,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, u32 hw_reconf_flags = 0; int i, flushed; struct ps_data *ps; + struct cfg80211_chan_def chandef; clear_bit(SDATA_STATE_RUNNING, &sdata->state); @@ -783,10 +790,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, * This is relevant only in WDS mode, in all other modes we've * already removed all stations when disconnecting or similar, * so warn otherwise. - * - * We call sta_info_flush_cleanup() later, to combine RCU waits. */ - flushed = sta_info_flush_defer(sdata); + flushed = sta_info_flush(sdata); WARN_ON_ONCE((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) || (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1)); @@ -823,11 +828,13 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, cancel_delayed_work_sync(&sdata->dfs_cac_timer_work); if (sdata->wdev.cac_started) { + chandef = sdata->vif.bss_conf.chandef; WARN_ON(local->suspended); - mutex_lock(&local->iflist_mtx); + mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); - mutex_unlock(&local->iflist_mtx); - cfg80211_cac_event(sdata->dev, NL80211_RADAR_CAC_ABORTED, + mutex_unlock(&local->mtx); + cfg80211_cac_event(sdata->dev, &chandef, + NL80211_RADAR_CAC_ABORTED, GFP_KERNEL); } @@ -886,23 +893,15 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, cancel_work_sync(&sdata->work); /* * When we get here, the interface is marked down. + * Free the remaining keys, if there are any + * (shouldn't be, except maybe in WDS mode?) * - * sta_info_flush_cleanup() requires rcu_barrier() - * first to wait for the station call_rcu() calls - * to complete, and we also need synchronize_rcu() - * to wait for the RX path in case it is using the - * interface and enqueuing frames at this very time on + * Force the key freeing to always synchronize_net() + * to wait for the RX path in case it is using this + * interface enqueuing frames * at this very time on * another CPU. */ - synchronize_rcu(); - rcu_barrier(); - sta_info_flush_cleanup(sdata); - - /* - * Free all remaining keys, there shouldn't be any, - * except maybe in WDS mode? - */ - ieee80211_free_keys(sdata); + ieee80211_free_keys(sdata, true); /* fall through */ case NL80211_IFTYPE_AP: @@ -1013,17 +1012,6 @@ static void ieee80211_set_multicast_list(struct net_device *dev) atomic_dec(&local->iff_promiscs); sdata->flags ^= IEEE80211_SDATA_PROMISC; } - - /* - * TODO: If somebody needs this on AP interfaces, - * it can be enabled easily but multicast - * addresses from VLANs need to be synced. - */ - if (sdata->vif.type != NL80211_IFTYPE_MONITOR && - sdata->vif.type != NL80211_IFTYPE_AP_VLAN && - sdata->vif.type != NL80211_IFTYPE_AP) - drv_set_multicast_list(local, sdata, &dev->mc); - spin_lock_bh(&local->filter_lock); __hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len); spin_unlock_bh(&local->filter_lock); @@ -1036,11 +1024,10 @@ static void ieee80211_set_multicast_list(struct net_device *dev) */ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata) { - int flushed; int i; /* free extra data */ - ieee80211_free_keys(sdata); + ieee80211_free_keys(sdata, false); ieee80211_debugfs_remove_netdev(sdata); @@ -1050,9 +1037,6 @@ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata) if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_rmc_free(sdata); - - flushed = sta_info_flush(sdata); - WARN_ON(flushed); } static void ieee80211_uninit(struct net_device *dev) @@ -1272,6 +1256,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->control_port_protocol = cpu_to_be16(ETH_P_PAE); sdata->control_port_no_encrypt = false; + sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; sdata->noack_map = 0; @@ -1497,8 +1482,8 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, bool used = false; list_for_each_entry(sdata, &local->interfaces, list) { - if (memcmp(local->hw.wiphy->addresses[i].addr, - sdata->vif.addr, ETH_ALEN) == 0) { + if (ether_addr_equal(local->hw.wiphy->addresses[i].addr, + sdata->vif.addr)) { used = true; break; } @@ -1558,8 +1543,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, val += inc; list_for_each_entry(sdata, &local->interfaces, list) { - if (memcmp(tmp_addr, sdata->vif.addr, - ETH_ALEN) == 0) { + if (ether_addr_equal(tmp_addr, sdata->vif.addr)) { used = true; break; } @@ -1579,15 +1563,6 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, mutex_unlock(&local->iflist_mtx); } -static void ieee80211_cleanup_sdata_stas_wk(struct work_struct *wk) -{ - struct ieee80211_sub_if_data *sdata; - - sdata = container_of(wk, struct ieee80211_sub_if_data, cleanup_stations_wk); - - ieee80211_cleanup_sdata_stas(sdata); -} - int ieee80211_if_add(struct ieee80211_local *local, const char *name, struct wireless_dev **new_wdev, enum nl80211_iftype type, struct vif_params *params) @@ -1660,9 +1635,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, INIT_LIST_HEAD(&sdata->key_list); - spin_lock_init(&sdata->cleanup_stations_lock); - INIT_LIST_HEAD(&sdata->cleanup_stations); - INIT_WORK(&sdata->cleanup_stations_wk, ieee80211_cleanup_sdata_stas_wk); INIT_DELAYED_WORK(&sdata->dfs_cac_timer_work, ieee80211_dfs_cac_timer_work); INIT_DELAYED_WORK(&sdata->dec_tailroom_needed_wk, @@ -1687,6 +1659,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL; sdata->user_power_level = local->user_power_level; + sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; + /* setup type-dependent data */ ieee80211_setup_sdata(sdata, type); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 3e51dd7d98b..6ff65a1ebaa 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -260,25 +260,29 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, int idx; bool defunikey, defmultikey, defmgmtkey; + /* caller must provide at least one old/new */ + if (WARN_ON(!new && !old)) + return; + if (new) list_add_tail(&new->list, &sdata->key_list); - if (sta && pairwise) { - rcu_assign_pointer(sta->ptk, new); - } else if (sta) { - if (old) - idx = old->conf.keyidx; - else - idx = new->conf.keyidx; - rcu_assign_pointer(sta->gtk[idx], new); - } else { - WARN_ON(new && old && new->conf.keyidx != old->conf.keyidx); + WARN_ON(new && old && new->conf.keyidx != old->conf.keyidx); - if (old) - idx = old->conf.keyidx; - else - idx = new->conf.keyidx; + if (old) + idx = old->conf.keyidx; + else + idx = new->conf.keyidx; + if (sta) { + if (pairwise) { + rcu_assign_pointer(sta->ptk[idx], new); + sta->ptk_idx = idx; + } else { + rcu_assign_pointer(sta->gtk[idx], new); + sta->gtk_idx = idx; + } + } else { defunikey = old && old == key_mtx_dereference(sdata->local, sdata->default_unicast_key); @@ -312,9 +316,11 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, list_del(&old->list); } -struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, - const u8 *key_data, - size_t seq_len, const u8 *seq) +struct ieee80211_key * +ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, + const u8 *key_data, + size_t seq_len, const u8 *seq, + const struct ieee80211_cipher_scheme *cs) { struct ieee80211_key *key; int i, j, err; @@ -393,6 +399,18 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, return ERR_PTR(err); } break; + default: + if (cs) { + size_t len = (seq_len > MAX_PN_LEN) ? + MAX_PN_LEN : seq_len; + + key->conf.iv_len = cs->hdr_len; + key->conf.icv_len = cs->mic_len; + for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) + for (j = 0; j < len; j++) + key->u.gen.rx_pn[i][j] = + seq[len - j - 1]; + } } memcpy(key->conf.key, key_data, key_len); INIT_LIST_HEAD(&key->list); @@ -475,7 +493,7 @@ int ieee80211_key_link(struct ieee80211_key *key, mutex_lock(&sdata->local->key_mtx); if (sta && pairwise) - old_key = key_mtx_dereference(sdata->local, sta->ptk); + old_key = key_mtx_dereference(sdata->local, sta->ptk[idx]); else if (sta) old_key = key_mtx_dereference(sdata->local, sta->gtk[idx]); else @@ -571,14 +589,10 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_iter_keys); -void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) +static void ieee80211_free_keys_iface(struct ieee80211_sub_if_data *sdata, + struct list_head *keys) { struct ieee80211_key *key, *tmp; - LIST_HEAD(keys); - - cancel_delayed_work_sync(&sdata->dec_tailroom_needed_wk); - - mutex_lock(&sdata->local->key_mtx); sdata->crypto_tx_tailroom_needed_cnt -= sdata->crypto_tx_tailroom_pending_dec; @@ -590,28 +604,51 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) ieee80211_key_replace(key->sdata, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); - list_add_tail(&key->list, &keys); + list_add_tail(&key->list, keys); } ieee80211_debugfs_key_update_default(sdata); +} - if (!list_empty(&keys)) { - synchronize_net(); - list_for_each_entry_safe(key, tmp, &keys, list) - __ieee80211_key_destroy(key, false); +void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, + bool force_synchronize) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *vlan; + struct ieee80211_key *key, *tmp; + LIST_HEAD(keys); + + cancel_delayed_work_sync(&sdata->dec_tailroom_needed_wk); + + mutex_lock(&local->key_mtx); + + ieee80211_free_keys_iface(sdata, &keys); + + if (sdata->vif.type == NL80211_IFTYPE_AP) { + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + ieee80211_free_keys_iface(vlan, &keys); } + if (!list_empty(&keys) || force_synchronize) + synchronize_net(); + list_for_each_entry_safe(key, tmp, &keys, list) + __ieee80211_key_destroy(key, false); + WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt || sdata->crypto_tx_tailroom_pending_dec); + if (sdata->vif.type == NL80211_IFTYPE_AP) { + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + WARN_ON_ONCE(vlan->crypto_tx_tailroom_needed_cnt || + vlan->crypto_tx_tailroom_pending_dec); + } - mutex_unlock(&sdata->local->key_mtx); + mutex_unlock(&local->key_mtx); } void ieee80211_free_sta_keys(struct ieee80211_local *local, struct sta_info *sta) { - struct ieee80211_key *key, *tmp; - LIST_HEAD(keys); + struct ieee80211_key *key; int i; mutex_lock(&local->key_mtx); @@ -622,25 +659,18 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local, ieee80211_key_replace(key->sdata, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); - list_add(&key->list, &keys); + __ieee80211_key_destroy(key, true); } - key = key_mtx_dereference(local, sta->ptk); - if (key) { + for (i = 0; i < NUM_DEFAULT_KEYS; i++) { + key = key_mtx_dereference(local, sta->ptk[i]); + if (!key) + continue; ieee80211_key_replace(key->sdata, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); - list_add(&key->list, &keys); - } - - /* - * NB: the station code relies on this being - * done even if there aren't any keys - */ - synchronize_net(); - - list_for_each_entry_safe(key, tmp, &keys, list) __ieee80211_key_destroy(key, true); + } mutex_unlock(&local->key_mtx); } @@ -877,7 +907,7 @@ ieee80211_gtk_rekey_add(struct ieee80211_vif *vif, key = ieee80211_key_alloc(keyconf->cipher, keyconf->keyidx, keyconf->keylen, keyconf->key, - 0, NULL); + 0, NULL, NULL); if (IS_ERR(key)) return ERR_CAST(key); diff --git a/net/mac80211/key.h b/net/mac80211/key.h index aaae0ed3700..19db68663d7 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -18,6 +18,7 @@ #define NUM_DEFAULT_KEYS 4 #define NUM_DEFAULT_MGMT_KEYS 2 +#define MAX_PN_LEN 16 struct ieee80211_local; struct ieee80211_sub_if_data; @@ -93,6 +94,10 @@ struct ieee80211_key { u32 replays; /* dot11RSNAStatsCMACReplays */ u32 icverrors; /* dot11RSNAStatsCMACICVErrors */ } aes_cmac; + struct { + /* generic cipher scheme */ + u8 rx_pn[IEEE80211_NUM_TIDS + 1][MAX_PN_LEN]; + } gen; } u; /* number of times this key has been used */ @@ -113,9 +118,11 @@ struct ieee80211_key { struct ieee80211_key_conf conf; }; -struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, - const u8 *key_data, - size_t seq_len, const u8 *seq); +struct ieee80211_key * +ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, + const u8 *key_data, + size_t seq_len, const u8 *seq, + const struct ieee80211_cipher_scheme *cs); /* * Insert a key into data structures (sdata, sta if necessary) * to make it used, free old key. On failure, also free the new key. @@ -129,7 +136,8 @@ void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx, bool uni, bool multi); void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx); -void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata); +void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, + bool force_synchronize); void ieee80211_free_sta_keys(struct ieee80211_local *local, struct sta_info *sta); void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 7d1c3ac48ed..d767cfb9b45 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -250,12 +250,8 @@ static void ieee80211_restart_work(struct work_struct *work) /* wait for scan work complete */ flush_workqueue(local->workqueue); - mutex_lock(&local->mtx); - WARN(test_bit(SCAN_HW_SCANNING, &local->scanning) || - rcu_dereference_protected(local->sched_scan_sdata, - lockdep_is_held(&local->mtx)), - "%s called with hardware scan in progress\n", __func__); - mutex_unlock(&local->mtx); + WARN(test_bit(SCAN_HW_SCANNING, &local->scanning), + "%s called with hardware scan in progress\n", __func__); rtnl_lock(); ieee80211_scan_cancel(local); @@ -651,15 +647,14 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, } EXPORT_SYMBOL(ieee80211_alloc_hw); -int ieee80211_register_hw(struct ieee80211_hw *hw) +static int ieee80211_init_cipher_suites(struct ieee80211_local *local) { - struct ieee80211_local *local = hw_to_local(hw); - int result, i; - enum ieee80211_band band; - int channels, max_bitrates; - bool supp_ht, supp_vht; - netdev_features_t feature_whitelist; - struct cfg80211_chan_def dflt_chandef = {}; + bool have_wep = !(IS_ERR(local->wep_tx_tfm) || + IS_ERR(local->wep_rx_tfm)); + bool have_mfp = local->hw.flags & IEEE80211_HW_MFP_CAPABLE; + const struct ieee80211_cipher_scheme *cs = local->hw.cipher_schemes; + int n_suites = 0, r = 0, w = 0; + u32 *suites; static const u32 cipher_suites[] = { /* keep WEP first, it may be removed below */ WLAN_CIPHER_SUITE_WEP40, @@ -671,6 +666,93 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) WLAN_CIPHER_SUITE_AES_CMAC }; + /* Driver specifies the ciphers, we have nothing to do... */ + if (local->hw.wiphy->cipher_suites && have_wep) + return 0; + + /* Set up cipher suites if driver relies on mac80211 cipher defs */ + if (!local->hw.wiphy->cipher_suites && !cs) { + local->hw.wiphy->cipher_suites = cipher_suites; + local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); + + if (!have_mfp) + local->hw.wiphy->n_cipher_suites--; + + if (!have_wep) { + local->hw.wiphy->cipher_suites += 2; + local->hw.wiphy->n_cipher_suites -= 2; + } + + return 0; + } + + if (!local->hw.wiphy->cipher_suites) { + /* + * Driver specifies cipher schemes only + * We start counting ciphers defined by schemes, TKIP and CCMP + */ + n_suites = local->hw.n_cipher_schemes + 2; + + /* check if we have WEP40 and WEP104 */ + if (have_wep) + n_suites += 2; + + /* check if we have AES_CMAC */ + if (have_mfp) + n_suites++; + + suites = kmalloc(sizeof(u32) * n_suites, GFP_KERNEL); + if (!suites) + return -ENOMEM; + + suites[w++] = WLAN_CIPHER_SUITE_CCMP; + suites[w++] = WLAN_CIPHER_SUITE_TKIP; + + if (have_wep) { + suites[w++] = WLAN_CIPHER_SUITE_WEP40; + suites[w++] = WLAN_CIPHER_SUITE_WEP104; + } + + if (have_mfp) + suites[w++] = WLAN_CIPHER_SUITE_AES_CMAC; + + for (r = 0; r < local->hw.n_cipher_schemes; r++) + suites[w++] = cs[r].cipher; + } else { + /* Driver provides cipher suites, but we need to exclude WEP */ + suites = kmemdup(local->hw.wiphy->cipher_suites, + sizeof(u32) * local->hw.wiphy->n_cipher_suites, + GFP_KERNEL); + if (!suites) + return -ENOMEM; + + for (r = 0; r < local->hw.wiphy->n_cipher_suites; r++) { + u32 suite = local->hw.wiphy->cipher_suites[r]; + + if (suite == WLAN_CIPHER_SUITE_WEP40 || + suite == WLAN_CIPHER_SUITE_WEP104) + continue; + suites[w++] = suite; + } + } + + local->hw.wiphy->cipher_suites = suites; + local->hw.wiphy->n_cipher_suites = w; + local->wiphy_ciphers_allocated = true; + + return 0; +} + +int ieee80211_register_hw(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + int result, i; + enum ieee80211_band band; + int channels, max_bitrates; + bool supp_ht, supp_vht; + netdev_features_t feature_whitelist; + struct cfg80211_chan_def dflt_chandef = {}; + if (hw->flags & IEEE80211_HW_QUEUE_CONTROL && (local->hw.offchannel_tx_hw_queue == IEEE80211_INVAL_HW_QUEUE || local->hw.offchannel_tx_hw_queue >= local->hw.queues)) @@ -764,17 +846,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) /* TODO: consider VHT for RX chains, hopefully it's the same */ } - local->int_scan_req = kzalloc(sizeof(*local->int_scan_req) + - sizeof(void *) * channels, GFP_KERNEL); - if (!local->int_scan_req) - return -ENOMEM; - - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { - if (!local->hw.wiphy->bands[band]) - continue; - local->int_scan_req->rates[band] = (u32) -1; - } - /* if low-level driver supports AP, we also support VLAN */ if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP)) { hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN); @@ -798,6 +869,17 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) return -EINVAL; } + local->int_scan_req = kzalloc(sizeof(*local->int_scan_req) + + sizeof(void *) * channels, GFP_KERNEL); + if (!local->int_scan_req) + return -ENOMEM; + + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + if (!local->hw.wiphy->bands[band]) + continue; + local->int_scan_req->rates[band] = (u32) -1; + } + #ifndef CONFIG_MAC80211_MESH /* mesh depends on Kconfig, but drivers should set it if they want */ local->hw.wiphy->interface_modes &= ~BIT(NL80211_IFTYPE_MESH_POINT); @@ -851,43 +933,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (local->hw.wiphy->max_scan_ie_len) local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len; - /* Set up cipher suites unless driver already did */ - if (!local->hw.wiphy->cipher_suites) { - local->hw.wiphy->cipher_suites = cipher_suites; - local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); - if (!(local->hw.flags & IEEE80211_HW_MFP_CAPABLE)) - local->hw.wiphy->n_cipher_suites--; - } - if (IS_ERR(local->wep_tx_tfm) || IS_ERR(local->wep_rx_tfm)) { - if (local->hw.wiphy->cipher_suites == cipher_suites) { - local->hw.wiphy->cipher_suites += 2; - local->hw.wiphy->n_cipher_suites -= 2; - } else { - u32 *suites; - int r, w = 0; - - /* Filter out WEP */ - - suites = kmemdup( - local->hw.wiphy->cipher_suites, - sizeof(u32) * local->hw.wiphy->n_cipher_suites, - GFP_KERNEL); - if (!suites) { - result = -ENOMEM; - goto fail_wiphy_register; - } - for (r = 0; r < local->hw.wiphy->n_cipher_suites; r++) { - u32 suite = local->hw.wiphy->cipher_suites[r]; - if (suite == WLAN_CIPHER_SUITE_WEP40 || - suite == WLAN_CIPHER_SUITE_WEP104) - continue; - suites[w++] = suite; - } - local->hw.wiphy->cipher_suites = suites; - local->hw.wiphy->n_cipher_suites = w; - local->wiphy_ciphers_allocated = true; - } - } + WARN_ON(!ieee80211_cs_list_valid(local->hw.cipher_schemes, + local->hw.n_cipher_schemes)); + + result = ieee80211_init_cipher_suites(local); + if (result < 0) + goto fail_wiphy_register; if (!local->ops->remain_on_channel) local->hw.wiphy->max_remain_on_channel_duration = 5000; @@ -1090,6 +1141,8 @@ void ieee80211_free_hw(struct ieee80211_hw *hw) ieee80211_free_ack_frame, NULL); idr_destroy(&local->ack_status_frames); + kfree(rcu_access_pointer(local->tx_latency)); + wiphy_free(local->hw.wiphy); } EXPORT_SYMBOL(ieee80211_free_hw); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index ba105257d03..5b919cab1de 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -259,6 +259,9 @@ int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata, *pos++ = WLAN_EID_MESH_CONFIG; *pos++ = meshconf_len; + /* save a pointer for quick updates in pre-tbtt */ + ifmsh->meshconf_offset = pos - skb->data; + /* Active path selection protocol ID */ *pos++ = ifmsh->mesh_pp_id; /* Active path selection metric ID */ @@ -674,8 +677,6 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) rcu_read_lock(); csa = rcu_dereference(ifmsh->csa); if (csa) { - __le16 pre_value; - pos = skb_put(skb, 13); memset(pos, 0, 13); *pos++ = WLAN_EID_CHANNEL_SWITCH; @@ -697,8 +698,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) WLAN_EID_CHAN_SWITCH_PARAM_TX_RESTRICT : 0x00; put_unaligned_le16(WLAN_REASON_MESH_CHAN, pos); pos += 2; - pre_value = cpu_to_le16(ifmsh->pre_value); - memcpy(pos, &pre_value, 2); + put_unaligned_le16(ifmsh->pre_value, pos); pos += 2; } rcu_read_unlock(); @@ -726,6 +726,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) bcn->tail_len = skb->len; memcpy(bcn->tail, skb->data, bcn->tail_len); + bcn->meshconf = (struct ieee80211_meshconf_ie *) + (bcn->tail + ifmsh->meshconf_offset); dev_kfree_skb(skb); rcu_assign_pointer(ifmsh->beacon, bcn); @@ -805,6 +807,7 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) return -ENOMEM; } + ieee80211_recalc_dtim(local, sdata); ieee80211_bss_info_change_notify(sdata, changed); netif_carrier_on(sdata->dev); @@ -964,7 +967,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->local->csa_chandef = params.chandef; + sdata->csa_chandef = params.chandef; sdata->vif.csa_active = true; ieee80211_bss_info_change_notify(sdata, err); diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 2bc7fd2f787..f39a19f9090 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -215,8 +215,6 @@ int mesh_rmc_check(struct ieee80211_sub_if_data *sdata, bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *ie); void mesh_ids_set_default(struct ieee80211_if_mesh *mesh); -void mesh_mgmt_ies_add(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb); int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); int mesh_add_meshid_ie(struct ieee80211_sub_if_data *sdata, @@ -303,8 +301,8 @@ void mesh_mpath_table_grow(void); void mesh_mpp_table_grow(void); /* Mesh paths */ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, - u8 ttl, const u8 *target, __le32 target_sn, - __le16 target_rcode, const u8 *ra); + u8 ttl, const u8 *target, u32 target_sn, + u16 target_rcode, const u8 *ra); void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta); void mesh_path_flush_pending(struct mesh_path *mpath); void mesh_path_tx_pending(struct mesh_path *mpath); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 486819cd02c..f9514685d45 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -102,12 +102,11 @@ enum mpath_frame_type { static const u8 broadcast_addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, - const u8 *orig_addr, __le32 orig_sn, + const u8 *orig_addr, u32 orig_sn, u8 target_flags, const u8 *target, - __le32 target_sn, const u8 *da, + u32 target_sn, const u8 *da, u8 hop_count, u8 ttl, - __le32 lifetime, __le32 metric, - __le32 preq_id, + u32 lifetime, u32 metric, u32 preq_id, struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; @@ -167,33 +166,33 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, if (action == MPATH_PREP) { memcpy(pos, target, ETH_ALEN); pos += ETH_ALEN; - memcpy(pos, &target_sn, 4); + put_unaligned_le32(target_sn, pos); pos += 4; } else { if (action == MPATH_PREQ) { - memcpy(pos, &preq_id, 4); + put_unaligned_le32(preq_id, pos); pos += 4; } memcpy(pos, orig_addr, ETH_ALEN); pos += ETH_ALEN; - memcpy(pos, &orig_sn, 4); + put_unaligned_le32(orig_sn, pos); pos += 4; } - memcpy(pos, &lifetime, 4); /* interval for RANN */ + put_unaligned_le32(lifetime, pos); /* interval for RANN */ pos += 4; - memcpy(pos, &metric, 4); + put_unaligned_le32(metric, pos); pos += 4; if (action == MPATH_PREQ) { *pos++ = 1; /* destination count */ *pos++ = target_flags; memcpy(pos, target, ETH_ALEN); pos += ETH_ALEN; - memcpy(pos, &target_sn, 4); + put_unaligned_le32(target_sn, pos); pos += 4; } else if (action == MPATH_PREP) { memcpy(pos, orig_addr, ETH_ALEN); pos += ETH_ALEN; - memcpy(pos, &orig_sn, 4); + put_unaligned_le32(orig_sn, pos); pos += 4; } @@ -239,8 +238,8 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata, * frame directly but add it to the pending queue instead. */ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, - u8 ttl, const u8 *target, __le32 target_sn, - __le16 target_rcode, const u8 *ra) + u8 ttl, const u8 *target, u32 target_sn, + u16 target_rcode, const u8 *ra) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; @@ -254,13 +253,13 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, return -EAGAIN; skb = dev_alloc_skb(local->tx_headroom + - IEEE80211_ENCRYPT_HEADROOM + + sdata->encrypt_headroom + IEEE80211_ENCRYPT_TAILROOM + hdr_len + 2 + 15 /* PERR IE */); if (!skb) return -1; - skb_reserve(skb, local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM); + skb_reserve(skb, local->tx_headroom + sdata->encrypt_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); memset(mgmt, 0, hdr_len); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | @@ -293,9 +292,9 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, pos++; memcpy(pos, target, ETH_ALEN); pos += ETH_ALEN; - memcpy(pos, &target_sn, 4); + put_unaligned_le32(target_sn, pos); pos += 4; - memcpy(pos, &target_rcode, 2); + put_unaligned_le16(target_rcode, pos); /* see note in function header */ prepare_frame_for_deferred_tx(sdata, skb); @@ -592,10 +591,9 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, if (ttl != 0) { mhwmp_dbg(sdata, "replying to the PREQ\n"); mesh_path_sel_frame_tx(MPATH_PREP, 0, orig_addr, - cpu_to_le32(orig_sn), 0, target_addr, - cpu_to_le32(target_sn), mgmt->sa, 0, ttl, - cpu_to_le32(lifetime), cpu_to_le32(metric), - 0, sdata); + orig_sn, 0, target_addr, + target_sn, mgmt->sa, 0, ttl, + lifetime, metric, 0, sdata); } else { ifmsh->mshstats.dropped_frames_ttl++; } @@ -625,11 +623,9 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, } mesh_path_sel_frame_tx(MPATH_PREQ, flags, orig_addr, - cpu_to_le32(orig_sn), target_flags, target_addr, - cpu_to_le32(target_sn), da, - hopcount, ttl, cpu_to_le32(lifetime), - cpu_to_le32(metric), cpu_to_le32(preq_id), - sdata); + orig_sn, target_flags, target_addr, + target_sn, da, hopcount, ttl, lifetime, + metric, preq_id, sdata); if (!is_multicast_ether_addr(da)) ifmsh->mshstats.fwded_unicast++; else @@ -695,11 +691,9 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, target_sn = PREP_IE_TARGET_SN(prep_elem); orig_sn = PREP_IE_ORIG_SN(prep_elem); - mesh_path_sel_frame_tx(MPATH_PREP, flags, orig_addr, - cpu_to_le32(orig_sn), 0, target_addr, - cpu_to_le32(target_sn), next_hop, hopcount, - ttl, cpu_to_le32(lifetime), cpu_to_le32(metric), - 0, sdata); + mesh_path_sel_frame_tx(MPATH_PREP, flags, orig_addr, orig_sn, 0, + target_addr, target_sn, next_hop, hopcount, + ttl, lifetime, metric, 0, sdata); rcu_read_unlock(); sdata->u.mesh.mshstats.fwded_unicast++; @@ -750,8 +744,7 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, if (!ifmsh->mshcfg.dot11MeshForwarding) goto endperr; mesh_path_error_tx(sdata, ttl, target_addr, - cpu_to_le32(target_sn), - cpu_to_le16(target_rcode), + target_sn, target_rcode, broadcast_addr); } else spin_unlock_bh(&mpath->state_lock); @@ -847,11 +840,9 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, if (ifmsh->mshcfg.dot11MeshForwarding) { mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr, - cpu_to_le32(orig_sn), - 0, NULL, 0, broadcast_addr, - hopcount, ttl, cpu_to_le32(interval), - cpu_to_le32(metric + metric_txsta), - 0, sdata); + orig_sn, 0, NULL, 0, broadcast_addr, + hopcount, ttl, interval, + metric + metric_txsta, 0, sdata); } rcu_read_unlock(); @@ -1049,11 +1040,9 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) spin_unlock_bh(&mpath->state_lock); da = (mpath->is_root) ? mpath->rann_snd_addr : broadcast_addr; - mesh_path_sel_frame_tx(MPATH_PREQ, 0, sdata->vif.addr, - cpu_to_le32(ifmsh->sn), target_flags, mpath->dst, - cpu_to_le32(mpath->sn), da, 0, - ttl, cpu_to_le32(lifetime), 0, - cpu_to_le32(ifmsh->preq_id++), sdata); + mesh_path_sel_frame_tx(MPATH_PREQ, 0, sdata->vif.addr, ifmsh->sn, + target_flags, mpath->dst, mpath->sn, da, 0, + ttl, lifetime, 0, ifmsh->preq_id++, sdata); mod_timer(&mpath->timer, jiffies + mpath->discovery_timeout); enddiscovery: @@ -1212,10 +1201,9 @@ void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata) switch (ifmsh->mshcfg.dot11MeshHWMPRootMode) { case IEEE80211_PROACTIVE_RANN: mesh_path_sel_frame_tx(MPATH_RANN, flags, sdata->vif.addr, - cpu_to_le32(++ifmsh->sn), - 0, NULL, 0, broadcast_addr, - 0, ifmsh->mshcfg.element_ttl, - cpu_to_le32(interval), 0, 0, sdata); + ++ifmsh->sn, 0, NULL, 0, broadcast_addr, + 0, ifmsh->mshcfg.element_ttl, + interval, 0, 0, sdata); break; case IEEE80211_PROACTIVE_PREQ_WITH_PREP: flags |= IEEE80211_PREQ_PROACTIVE_PREP_FLAG; @@ -1224,11 +1212,10 @@ void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata) target_flags |= IEEE80211_PREQ_TO_FLAG | IEEE80211_PREQ_USN_FLAG; mesh_path_sel_frame_tx(MPATH_PREQ, flags, sdata->vif.addr, - cpu_to_le32(++ifmsh->sn), target_flags, - (u8 *) broadcast_addr, 0, broadcast_addr, - 0, ifmsh->mshcfg.element_ttl, - cpu_to_le32(interval), - 0, cpu_to_le32(ifmsh->preq_id++), sdata); + ++ifmsh->sn, target_flags, + (u8 *) broadcast_addr, 0, broadcast_addr, + 0, ifmsh->mshcfg.element_ttl, interval, + 0, ifmsh->preq_id++, sdata); break; default: mhwmp_dbg(sdata, "Proactive mechanism not supported\n"); diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 89aacfd2756..7d050ed6fe5 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -722,7 +722,6 @@ void mesh_plink_broken(struct sta_info *sta) struct mpath_node *node; struct ieee80211_sub_if_data *sdata = sta->sdata; int i; - __le16 reason = cpu_to_le16(WLAN_REASON_MESH_PATH_DEST_UNREACHABLE); rcu_read_lock(); tbl = rcu_dereference(mesh_paths); @@ -736,9 +735,9 @@ void mesh_plink_broken(struct sta_info *sta) ++mpath->sn; spin_unlock_bh(&mpath->state_lock); mesh_path_error_tx(sdata, - sdata->u.mesh.mshcfg.element_ttl, - mpath->dst, cpu_to_le32(mpath->sn), - reason, bcast); + sdata->u.mesh.mshcfg.element_ttl, + mpath->dst, mpath->sn, + WLAN_REASON_MESH_PATH_DEST_UNREACHABLE, bcast); } } rcu_read_unlock(); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 4301aa5aa22..e8f60aa2e84 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -19,12 +19,6 @@ #define mod_plink_timer(s, t) (mod_timer(&s->plink_timer, \ jiffies + HZ * t / 1000)) -/* We only need a valid sta if user configured a minimum rssi_threshold. */ -#define rssi_threshold_check(sta, sdata) \ - (sdata->u.mesh.mshcfg.rssi_threshold == 0 ||\ - (sta && (s8) -ewma_read(&sta->avg_signal) > \ - sdata->u.mesh.mshcfg.rssi_threshold)) - enum plink_event { PLINK_UNDEFINED, OPN_ACPT, @@ -61,7 +55,17 @@ static const char * const mplevents[] = { static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, enum ieee80211_self_protected_actioncode action, - u8 *da, __le16 llid, __le16 plid, __le16 reason); + u8 *da, u16 llid, u16 plid, u16 reason); + + +/* We only need a valid sta if user configured a minimum rssi_threshold. */ +static bool rssi_threshold_check(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta) +{ + s32 rssi_threshold = sdata->u.mesh.mshcfg.rssi_threshold; + return rssi_threshold == 0 || + (sta && (s8) -ewma_read(&sta->avg_signal) > rssi_threshold); +} /** * mesh_plink_fsm_restart - restart a mesh peer link finite state machine @@ -242,7 +246,7 @@ u32 mesh_plink_deactivate(struct sta_info *sta) spin_lock_bh(&sta->lock); changed = __mesh_plink_deactivate(sta); - sta->reason = cpu_to_le16(WLAN_REASON_MESH_PEER_CANCELED); + sta->reason = WLAN_REASON_MESH_PEER_CANCELED; mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, sta->sta.addr, sta->llid, sta->plid, sta->reason); @@ -253,7 +257,7 @@ u32 mesh_plink_deactivate(struct sta_info *sta) static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, enum ieee80211_self_protected_actioncode action, - u8 *da, __le16 llid, __le16 plid, __le16 reason) + u8 *da, u16 llid, u16 plid, u16 reason) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; @@ -279,7 +283,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, 2 + 8 + /* peering IE */ sdata->u.mesh.ie_len); if (!skb) - return -1; + return err; info = IEEE80211_SKB_CB(skb); skb_reserve(skb, local->tx_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); @@ -301,7 +305,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, if (action == WLAN_SP_MESH_PEERING_CONFIRM) { /* AID */ pos = skb_put(skb, 2); - memcpy(pos + 2, &plid, 2); + put_unaligned_le16(plid, pos + 2); } if (ieee80211_add_srates_ie(sdata, skb, true, band) || ieee80211_add_ext_srates_ie(sdata, skb, true, band) || @@ -343,14 +347,14 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, *pos++ = ie_len; memcpy(pos, &peering_proto, 2); pos += 2; - memcpy(pos, &llid, 2); + put_unaligned_le16(llid, pos); pos += 2; if (include_plid) { - memcpy(pos, &plid, 2); + put_unaligned_le16(plid, pos); pos += 2; } if (action == WLAN_SP_MESH_PEERING_CLOSE) { - memcpy(pos, &reason, 2); + put_unaligned_le16(reason, pos); pos += 2; } @@ -433,6 +437,7 @@ __mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *hw_addr) sta_info_pre_move_state(sta, IEEE80211_STA_AUTHORIZED); set_sta_flag(sta, WLAN_STA_WME); + sta->sta.wme = true; return sta; } @@ -518,7 +523,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, sta->plink_state == NL80211_PLINK_LISTEN && sdata->u.mesh.accepting_plinks && sdata->u.mesh.mshcfg.auto_open_plinks && - rssi_threshold_check(sta, sdata)) + rssi_threshold_check(sdata, sta)) changed = mesh_plink_open(sta); ieee80211_mps_frame_release(sta, elems); @@ -530,9 +535,10 @@ out: static void mesh_plink_timer(unsigned long data) { struct sta_info *sta; - __le16 llid, plid, reason; + u16 reason = 0; struct ieee80211_sub_if_data *sdata; struct mesh_config *mshcfg; + enum ieee80211_self_protected_actioncode action = 0; /* * This STA is valid because sta_info_destroy() will @@ -553,9 +559,6 @@ static void mesh_plink_timer(unsigned long data) mpl_dbg(sta->sdata, "Mesh plink timer for %pM fired on state %s\n", sta->sta.addr, mplstates[sta->plink_state]); - reason = 0; - llid = sta->llid; - plid = sta->plid; sdata = sta->sdata; mshcfg = &sdata->u.mesh.mshcfg; @@ -574,33 +577,31 @@ static void mesh_plink_timer(unsigned long data) rand % sta->plink_timeout; ++sta->plink_retries; mod_plink_timer(sta, sta->plink_timeout); - spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN, - sta->sta.addr, llid, 0, 0); + action = WLAN_SP_MESH_PEERING_OPEN; break; } - reason = cpu_to_le16(WLAN_REASON_MESH_MAX_RETRIES); + reason = WLAN_REASON_MESH_MAX_RETRIES; /* fall through on else */ case NL80211_PLINK_CNF_RCVD: /* confirm timer */ if (!reason) - reason = cpu_to_le16(WLAN_REASON_MESH_CONFIRM_TIMEOUT); + reason = WLAN_REASON_MESH_CONFIRM_TIMEOUT; sta->plink_state = NL80211_PLINK_HOLDING; mod_plink_timer(sta, mshcfg->dot11MeshHoldingTimeout); - spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, - sta->sta.addr, llid, plid, reason); + action = WLAN_SP_MESH_PEERING_CLOSE; break; case NL80211_PLINK_HOLDING: /* holding timer */ del_timer(&sta->plink_timer); mesh_plink_fsm_restart(sta); - spin_unlock_bh(&sta->lock); break; default: - spin_unlock_bh(&sta->lock); break; } + spin_unlock_bh(&sta->lock); + if (action) + mesh_plink_frame_tx(sdata, action, sta->sta.addr, + sta->llid, sta->plid, reason); } static inline void mesh_plink_timer_set(struct sta_info *sta, int timeout) @@ -612,9 +613,40 @@ static inline void mesh_plink_timer_set(struct sta_info *sta, int timeout) add_timer(&sta->plink_timer); } +static bool llid_in_use(struct ieee80211_sub_if_data *sdata, + u16 llid) +{ + struct ieee80211_local *local = sdata->local; + bool in_use = false; + struct sta_info *sta; + + rcu_read_lock(); + list_for_each_entry_rcu(sta, &local->sta_list, list) { + if (!memcmp(&sta->llid, &llid, sizeof(llid))) { + in_use = true; + break; + } + } + rcu_read_unlock(); + + return in_use; +} + +static u16 mesh_get_new_llid(struct ieee80211_sub_if_data *sdata) +{ + u16 llid; + + do { + get_random_bytes(&llid, sizeof(llid)); + /* for mesh PS we still only have the AID range for TIM bits */ + llid = (llid % IEEE80211_MAX_AID) + 1; + } while (llid_in_use(sdata, llid)); + + return llid; +} + u32 mesh_plink_open(struct sta_info *sta) { - __le16 llid; struct ieee80211_sub_if_data *sdata = sta->sdata; u32 changed; @@ -622,8 +654,7 @@ u32 mesh_plink_open(struct sta_info *sta) return 0; spin_lock_bh(&sta->lock); - get_random_bytes(&llid, 2); - sta->llid = llid; + sta->llid = mesh_get_new_llid(sdata); if (sta->plink_state != NL80211_PLINK_LISTEN && sta->plink_state != NL80211_PLINK_BLOCKED) { spin_unlock_bh(&sta->lock); @@ -640,7 +671,7 @@ u32 mesh_plink_open(struct sta_info *sta) changed = ieee80211_mps_local_status_update(sdata); mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN, - sta->sta.addr, llid, 0, 0); + sta->sta.addr, sta->llid, 0, 0); return changed; } @@ -656,390 +687,147 @@ u32 mesh_plink_block(struct sta_info *sta) return changed; } - -void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len, - struct ieee80211_rx_status *rx_status) +static void mesh_plink_close(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + enum plink_event event) { struct mesh_config *mshcfg = &sdata->u.mesh.mshcfg; - struct ieee802_11_elems elems; - struct sta_info *sta; - enum plink_event event; - enum ieee80211_self_protected_actioncode ftype; - size_t baselen; - bool matches_local = true; - u8 ie_len; - u8 *baseaddr; - u32 changed = 0; - __le16 plid, llid, reason; - - /* need action_code, aux */ - if (len < IEEE80211_MIN_ACTION_SIZE + 3) - return; - - if (sdata->u.mesh.user_mpm) - /* userspace must register for these */ - return; - - if (is_multicast_ether_addr(mgmt->da)) { - mpl_dbg(sdata, - "Mesh plink: ignore frame from multicast address\n"); - return; - } - - baseaddr = mgmt->u.action.u.self_prot.variable; - baselen = (u8 *) mgmt->u.action.u.self_prot.variable - (u8 *) mgmt; - if (mgmt->u.action.u.self_prot.action_code == - WLAN_SP_MESH_PEERING_CONFIRM) { - baseaddr += 4; - baselen += 4; - } - ieee802_11_parse_elems(baseaddr, len - baselen, true, &elems); - - if (!elems.peering) { - mpl_dbg(sdata, - "Mesh plink: missing necessary peer link ie\n"); - return; - } - if (elems.rsn_len && - sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) { - mpl_dbg(sdata, - "Mesh plink: can't establish link with secure peer\n"); - return; - } + u16 reason = (event == CLS_ACPT) ? + WLAN_REASON_MESH_CLOSE : WLAN_REASON_MESH_CONFIG; - ftype = mgmt->u.action.u.self_prot.action_code; - ie_len = elems.peering_len; - if ((ftype == WLAN_SP_MESH_PEERING_OPEN && ie_len != 4) || - (ftype == WLAN_SP_MESH_PEERING_CONFIRM && ie_len != 6) || - (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len != 6 - && ie_len != 8)) { - mpl_dbg(sdata, - "Mesh plink: incorrect plink ie length %d %d\n", - ftype, ie_len); - return; - } - - if (ftype != WLAN_SP_MESH_PEERING_CLOSE && - (!elems.mesh_id || !elems.mesh_config)) { - mpl_dbg(sdata, "Mesh plink: missing necessary ie\n"); - return; - } - /* Note the lines below are correct, the llid in the frame is the plid - * from the point of view of this host. - */ - memcpy(&plid, PLINK_GET_LLID(elems.peering), 2); - if (ftype == WLAN_SP_MESH_PEERING_CONFIRM || - (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len == 8)) - memcpy(&llid, PLINK_GET_PLID(elems.peering), 2); - - /* WARNING: Only for sta pointer, is dropped & re-acquired */ - rcu_read_lock(); - - sta = sta_info_get(sdata, mgmt->sa); - if (!sta && ftype != WLAN_SP_MESH_PEERING_OPEN) { - mpl_dbg(sdata, "Mesh plink: cls or cnf from unknown peer\n"); - rcu_read_unlock(); - return; - } - - if (ftype == WLAN_SP_MESH_PEERING_OPEN && - !rssi_threshold_check(sta, sdata)) { - mpl_dbg(sdata, "Mesh plink: %pM does not meet rssi threshold\n", - mgmt->sa); - rcu_read_unlock(); - return; - } - - if (sta && !test_sta_flag(sta, WLAN_STA_AUTH)) { - mpl_dbg(sdata, "Mesh plink: Action frame from non-authed peer\n"); - rcu_read_unlock(); - return; - } + sta->reason = reason; + sta->plink_state = NL80211_PLINK_HOLDING; + mod_plink_timer(sta, mshcfg->dot11MeshHoldingTimeout); +} - if (sta && sta->plink_state == NL80211_PLINK_BLOCKED) { - rcu_read_unlock(); - return; - } +static u32 mesh_plink_establish(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta) +{ + struct mesh_config *mshcfg = &sdata->u.mesh.mshcfg; + u32 changed = 0; - /* Now we will figure out the appropriate event... */ - event = PLINK_UNDEFINED; - if (ftype != WLAN_SP_MESH_PEERING_CLOSE && - !mesh_matches_local(sdata, &elems)) { - matches_local = false; - switch (ftype) { - case WLAN_SP_MESH_PEERING_OPEN: - event = OPN_RJCT; - break; - case WLAN_SP_MESH_PEERING_CONFIRM: - event = CNF_RJCT; - break; - default: - break; - } - } + del_timer(&sta->plink_timer); + sta->plink_state = NL80211_PLINK_ESTAB; + changed |= mesh_plink_inc_estab_count(sdata); + changed |= mesh_set_ht_prot_mode(sdata); + changed |= mesh_set_short_slot_time(sdata); + mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); + ieee80211_mps_sta_status_update(sta); + changed |= ieee80211_mps_set_sta_local_pm(sta, mshcfg->power_mode); + return changed; +} - if (!sta && !matches_local) { - rcu_read_unlock(); - reason = cpu_to_le16(WLAN_REASON_MESH_CONFIG); - llid = 0; - mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, - mgmt->sa, llid, plid, reason); - return; - } else if (!sta) { - /* ftype == WLAN_SP_MESH_PEERING_OPEN */ - if (!mesh_plink_free_count(sdata)) { - mpl_dbg(sdata, "Mesh plink error: no more free plinks\n"); - rcu_read_unlock(); - return; - } - event = OPN_ACPT; - } else if (matches_local) { - switch (ftype) { - case WLAN_SP_MESH_PEERING_OPEN: - if (!mesh_plink_free_count(sdata) || - (sta->plid && sta->plid != plid)) - event = OPN_IGNR; - else - event = OPN_ACPT; - break; - case WLAN_SP_MESH_PEERING_CONFIRM: - if (!mesh_plink_free_count(sdata) || - (sta->llid != llid || sta->plid != plid)) - event = CNF_IGNR; - else - event = CNF_ACPT; - break; - case WLAN_SP_MESH_PEERING_CLOSE: - if (sta->plink_state == NL80211_PLINK_ESTAB) - /* Do not check for llid or plid. This does not - * follow the standard but since multiple plinks - * per sta are not supported, it is necessary in - * order to avoid a livelock when MP A sees an - * establish peer link to MP B but MP B does not - * see it. This can be caused by a timeout in - * B's peer link establishment or B beign - * restarted. - */ - event = CLS_ACPT; - else if (sta->plid != plid) - event = CLS_IGNR; - else if (ie_len == 7 && sta->llid != llid) - event = CLS_IGNR; - else - event = CLS_ACPT; - break; - default: - mpl_dbg(sdata, "Mesh plink: unknown frame subtype\n"); - rcu_read_unlock(); - return; - } - } +/** + * mesh_plink_fsm - step @sta MPM based on @event + * + * @sdata: interface + * @sta: mesh neighbor + * @event: peering event + * + * Return: changed MBSS flags + */ +static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, enum plink_event event) +{ + struct mesh_config *mshcfg = &sdata->u.mesh.mshcfg; + enum ieee80211_self_protected_actioncode action = 0; + u32 changed = 0; - if (event == OPN_ACPT) { - rcu_read_unlock(); - /* allocate sta entry if necessary and update info */ - sta = mesh_sta_info_get(sdata, mgmt->sa, &elems); - if (!sta) { - mpl_dbg(sdata, "Mesh plink: failed to init peer!\n"); - rcu_read_unlock(); - return; - } - } + mpl_dbg(sdata, "peer %pM in state %s got event %s\n", sta->sta.addr, + mplstates[sta->plink_state], mplevents[event]); - mpl_dbg(sdata, "peer %pM in state %s got event %s\n", mgmt->sa, - mplstates[sta->plink_state], mplevents[event]); - reason = 0; spin_lock_bh(&sta->lock); switch (sta->plink_state) { - /* spin_unlock as soon as state is updated at each case */ case NL80211_PLINK_LISTEN: switch (event) { case CLS_ACPT: mesh_plink_fsm_restart(sta); - spin_unlock_bh(&sta->lock); break; case OPN_ACPT: sta->plink_state = NL80211_PLINK_OPN_RCVD; - sta->plid = plid; - get_random_bytes(&llid, 2); - sta->llid = llid; + sta->llid = mesh_get_new_llid(sdata); mesh_plink_timer_set(sta, mshcfg->dot11MeshRetryTimeout); /* set the non-peer mode to active during peering */ changed |= ieee80211_mps_local_status_update(sdata); - - spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, - WLAN_SP_MESH_PEERING_OPEN, - sta->sta.addr, llid, 0, 0); - mesh_plink_frame_tx(sdata, - WLAN_SP_MESH_PEERING_CONFIRM, - sta->sta.addr, llid, plid, 0); + action = WLAN_SP_MESH_PEERING_OPEN; break; default: - spin_unlock_bh(&sta->lock); break; } break; - case NL80211_PLINK_OPN_SNT: switch (event) { case OPN_RJCT: case CNF_RJCT: - reason = cpu_to_le16(WLAN_REASON_MESH_CONFIG); case CLS_ACPT: - if (!reason) - reason = cpu_to_le16(WLAN_REASON_MESH_CLOSE); - sta->reason = reason; - sta->plink_state = NL80211_PLINK_HOLDING; - if (!mod_plink_timer(sta, - mshcfg->dot11MeshHoldingTimeout)) - sta->ignore_plink_timer = true; - - llid = sta->llid; - spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, - WLAN_SP_MESH_PEERING_CLOSE, - sta->sta.addr, llid, plid, reason); + mesh_plink_close(sdata, sta, event); + action = WLAN_SP_MESH_PEERING_CLOSE; break; case OPN_ACPT: /* retry timer is left untouched */ sta->plink_state = NL80211_PLINK_OPN_RCVD; - sta->plid = plid; - llid = sta->llid; - spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, - WLAN_SP_MESH_PEERING_CONFIRM, - sta->sta.addr, llid, plid, 0); + action = WLAN_SP_MESH_PEERING_CONFIRM; break; case CNF_ACPT: sta->plink_state = NL80211_PLINK_CNF_RCVD; if (!mod_plink_timer(sta, mshcfg->dot11MeshConfirmTimeout)) sta->ignore_plink_timer = true; - - spin_unlock_bh(&sta->lock); break; default: - spin_unlock_bh(&sta->lock); break; } break; - case NL80211_PLINK_OPN_RCVD: switch (event) { case OPN_RJCT: case CNF_RJCT: - reason = cpu_to_le16(WLAN_REASON_MESH_CONFIG); case CLS_ACPT: - if (!reason) - reason = cpu_to_le16(WLAN_REASON_MESH_CLOSE); - sta->reason = reason; - sta->plink_state = NL80211_PLINK_HOLDING; - if (!mod_plink_timer(sta, - mshcfg->dot11MeshHoldingTimeout)) - sta->ignore_plink_timer = true; - - llid = sta->llid; - spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, - sta->sta.addr, llid, plid, reason); + mesh_plink_close(sdata, sta, event); + action = WLAN_SP_MESH_PEERING_CLOSE; break; case OPN_ACPT: - llid = sta->llid; - spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, - WLAN_SP_MESH_PEERING_CONFIRM, - sta->sta.addr, llid, plid, 0); + action = WLAN_SP_MESH_PEERING_CONFIRM; break; case CNF_ACPT: - del_timer(&sta->plink_timer); - sta->plink_state = NL80211_PLINK_ESTAB; - spin_unlock_bh(&sta->lock); - changed |= mesh_plink_inc_estab_count(sdata); - changed |= mesh_set_ht_prot_mode(sdata); - changed |= mesh_set_short_slot_time(sdata); - mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n", - sta->sta.addr); - ieee80211_mps_sta_status_update(sta); - changed |= ieee80211_mps_set_sta_local_pm(sta, - mshcfg->power_mode); + changed |= mesh_plink_establish(sdata, sta); break; default: - spin_unlock_bh(&sta->lock); break; } break; - case NL80211_PLINK_CNF_RCVD: switch (event) { case OPN_RJCT: case CNF_RJCT: - reason = cpu_to_le16(WLAN_REASON_MESH_CONFIG); case CLS_ACPT: - if (!reason) - reason = cpu_to_le16(WLAN_REASON_MESH_CLOSE); - sta->reason = reason; - sta->plink_state = NL80211_PLINK_HOLDING; - if (!mod_plink_timer(sta, - mshcfg->dot11MeshHoldingTimeout)) - sta->ignore_plink_timer = true; - - llid = sta->llid; - spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, - WLAN_SP_MESH_PEERING_CLOSE, - sta->sta.addr, llid, plid, reason); + mesh_plink_close(sdata, sta, event); + action = WLAN_SP_MESH_PEERING_CLOSE; break; case OPN_ACPT: - del_timer(&sta->plink_timer); - sta->plink_state = NL80211_PLINK_ESTAB; - spin_unlock_bh(&sta->lock); - changed |= mesh_plink_inc_estab_count(sdata); - changed |= mesh_set_ht_prot_mode(sdata); - changed |= mesh_set_short_slot_time(sdata); - mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n", - sta->sta.addr); - mesh_plink_frame_tx(sdata, - WLAN_SP_MESH_PEERING_CONFIRM, - sta->sta.addr, llid, plid, 0); - ieee80211_mps_sta_status_update(sta); - changed |= ieee80211_mps_set_sta_local_pm(sta, - mshcfg->power_mode); + changed |= mesh_plink_establish(sdata, sta); + action = WLAN_SP_MESH_PEERING_CONFIRM; break; default: - spin_unlock_bh(&sta->lock); break; } break; - case NL80211_PLINK_ESTAB: switch (event) { case CLS_ACPT: - reason = cpu_to_le16(WLAN_REASON_MESH_CLOSE); - sta->reason = reason; changed |= __mesh_plink_deactivate(sta); - sta->plink_state = NL80211_PLINK_HOLDING; - llid = sta->llid; - mod_plink_timer(sta, mshcfg->dot11MeshHoldingTimeout); - spin_unlock_bh(&sta->lock); changed |= mesh_set_ht_prot_mode(sdata); changed |= mesh_set_short_slot_time(sdata); - mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, - sta->sta.addr, llid, plid, reason); + mesh_plink_close(sdata, sta, event); + action = WLAN_SP_MESH_PEERING_CLOSE; break; case OPN_ACPT: - llid = sta->llid; - spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, - WLAN_SP_MESH_PEERING_CONFIRM, - sta->sta.addr, llid, plid, 0); + action = WLAN_SP_MESH_PEERING_CONFIRM; break; default: - spin_unlock_bh(&sta->lock); break; } break; @@ -1049,32 +837,271 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, if (del_timer(&sta->plink_timer)) sta->ignore_plink_timer = 1; mesh_plink_fsm_restart(sta); - spin_unlock_bh(&sta->lock); break; case OPN_ACPT: case CNF_ACPT: case OPN_RJCT: case CNF_RJCT: - llid = sta->llid; - reason = sta->reason; - spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, - sta->sta.addr, llid, plid, reason); + action = WLAN_SP_MESH_PEERING_CLOSE; break; default: - spin_unlock_bh(&sta->lock); + break; } break; default: /* should not get here, PLINK_BLOCKED is dealt with at the * beginning of the function */ - spin_unlock_bh(&sta->lock); break; } + spin_unlock_bh(&sta->lock); + if (action) { + mesh_plink_frame_tx(sdata, action, sta->sta.addr, + sta->llid, sta->plid, sta->reason); + + /* also send confirm in open case */ + if (action == WLAN_SP_MESH_PEERING_OPEN) { + mesh_plink_frame_tx(sdata, + WLAN_SP_MESH_PEERING_CONFIRM, + sta->sta.addr, sta->llid, + sta->plid, 0); + } + } + + return changed; +} + +/* + * mesh_plink_get_event - get correct MPM event + * + * @sdata: interface + * @sta: peer, leave NULL if processing a frame from a new suitable peer + * @elems: peering management IEs + * @ftype: frame type + * @llid: peer's peer link ID + * @plid: peer's local link ID + * + * Return: new peering event for @sta, but PLINK_UNDEFINED should be treated as + * an error. + */ +static enum plink_event +mesh_plink_get_event(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct ieee802_11_elems *elems, + enum ieee80211_self_protected_actioncode ftype, + u16 llid, u16 plid) +{ + enum plink_event event = PLINK_UNDEFINED; + u8 ie_len = elems->peering_len; + bool matches_local; + + matches_local = (ftype == WLAN_SP_MESH_PEERING_CLOSE || + mesh_matches_local(sdata, elems)); + + /* deny open request from non-matching peer */ + if (!matches_local && !sta) { + event = OPN_RJCT; + goto out; + } + + if (!sta) { + if (ftype != WLAN_SP_MESH_PEERING_OPEN) { + mpl_dbg(sdata, "Mesh plink: cls or cnf from unknown peer\n"); + goto out; + } + /* ftype == WLAN_SP_MESH_PEERING_OPEN */ + if (!mesh_plink_free_count(sdata)) { + mpl_dbg(sdata, "Mesh plink error: no more free plinks\n"); + goto out; + } + } else { + if (!test_sta_flag(sta, WLAN_STA_AUTH)) { + mpl_dbg(sdata, "Mesh plink: Action frame from non-authed peer\n"); + goto out; + } + if (sta->plink_state == NL80211_PLINK_BLOCKED) + goto out; + } + + /* new matching peer */ + if (!sta) { + event = OPN_ACPT; + goto out; + } + + switch (ftype) { + case WLAN_SP_MESH_PEERING_OPEN: + if (!matches_local) + event = OPN_RJCT; + if (!mesh_plink_free_count(sdata) || + (sta->plid && sta->plid != plid)) + event = OPN_IGNR; + else + event = OPN_ACPT; + break; + case WLAN_SP_MESH_PEERING_CONFIRM: + if (!matches_local) + event = CNF_RJCT; + if (!mesh_plink_free_count(sdata) || + (sta->llid != llid || sta->plid != plid)) + event = CNF_IGNR; + else + event = CNF_ACPT; + break; + case WLAN_SP_MESH_PEERING_CLOSE: + if (sta->plink_state == NL80211_PLINK_ESTAB) + /* Do not check for llid or plid. This does not + * follow the standard but since multiple plinks + * per sta are not supported, it is necessary in + * order to avoid a livelock when MP A sees an + * establish peer link to MP B but MP B does not + * see it. This can be caused by a timeout in + * B's peer link establishment or B beign + * restarted. + */ + event = CLS_ACPT; + else if (sta->plid != plid) + event = CLS_IGNR; + else if (ie_len == 8 && sta->llid != llid) + event = CLS_IGNR; + else + event = CLS_ACPT; + break; + default: + mpl_dbg(sdata, "Mesh plink: unknown frame subtype\n"); + break; + } + +out: + return event; +} +static void +mesh_process_plink_frame(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + struct ieee802_11_elems *elems) +{ + + struct sta_info *sta; + enum plink_event event; + enum ieee80211_self_protected_actioncode ftype; + u32 changed = 0; + u8 ie_len = elems->peering_len; + __le16 _plid, _llid; + u16 plid, llid = 0; + + if (!elems->peering) { + mpl_dbg(sdata, + "Mesh plink: missing necessary peer link ie\n"); + return; + } + + if (elems->rsn_len && + sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) { + mpl_dbg(sdata, + "Mesh plink: can't establish link with secure peer\n"); + return; + } + + ftype = mgmt->u.action.u.self_prot.action_code; + if ((ftype == WLAN_SP_MESH_PEERING_OPEN && ie_len != 4) || + (ftype == WLAN_SP_MESH_PEERING_CONFIRM && ie_len != 6) || + (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len != 6 + && ie_len != 8)) { + mpl_dbg(sdata, + "Mesh plink: incorrect plink ie length %d %d\n", + ftype, ie_len); + return; + } + + if (ftype != WLAN_SP_MESH_PEERING_CLOSE && + (!elems->mesh_id || !elems->mesh_config)) { + mpl_dbg(sdata, "Mesh plink: missing necessary ie\n"); + return; + } + /* Note the lines below are correct, the llid in the frame is the plid + * from the point of view of this host. + */ + memcpy(&_plid, PLINK_GET_LLID(elems->peering), sizeof(__le16)); + plid = le16_to_cpu(_plid); + if (ftype == WLAN_SP_MESH_PEERING_CONFIRM || + (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len == 8)) { + memcpy(&_llid, PLINK_GET_PLID(elems->peering), sizeof(__le16)); + llid = le16_to_cpu(_llid); + } + + /* WARNING: Only for sta pointer, is dropped & re-acquired */ + rcu_read_lock(); + + sta = sta_info_get(sdata, mgmt->sa); + + if (ftype == WLAN_SP_MESH_PEERING_OPEN && + !rssi_threshold_check(sdata, sta)) { + mpl_dbg(sdata, "Mesh plink: %pM does not meet rssi threshold\n", + mgmt->sa); + goto unlock_rcu; + } + + /* Now we will figure out the appropriate event... */ + event = mesh_plink_get_event(sdata, sta, elems, ftype, llid, plid); + + if (event == OPN_ACPT) { + rcu_read_unlock(); + /* allocate sta entry if necessary and update info */ + sta = mesh_sta_info_get(sdata, mgmt->sa, elems); + if (!sta) { + mpl_dbg(sdata, "Mesh plink: failed to init peer!\n"); + goto unlock_rcu; + } + sta->plid = plid; + } else if (!sta && event == OPN_RJCT) { + mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, + mgmt->sa, 0, plid, + WLAN_REASON_MESH_CONFIG); + goto unlock_rcu; + } else if (!sta || event == PLINK_UNDEFINED) { + /* something went wrong */ + goto unlock_rcu; + } + + changed |= mesh_plink_fsm(sdata, sta, event); + +unlock_rcu: rcu_read_unlock(); if (changed) ieee80211_mbss_info_change_notify(sdata, changed); } + +void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len, + struct ieee80211_rx_status *rx_status) +{ + struct ieee802_11_elems elems; + size_t baselen; + u8 *baseaddr; + + /* need action_code, aux */ + if (len < IEEE80211_MIN_ACTION_SIZE + 3) + return; + + if (sdata->u.mesh.user_mpm) + /* userspace must register for these */ + return; + + if (is_multicast_ether_addr(mgmt->da)) { + mpl_dbg(sdata, + "Mesh plink: ignore frame from multicast address\n"); + return; + } + + baseaddr = mgmt->u.action.u.self_prot.variable; + baselen = (u8 *) mgmt->u.action.u.self_prot.variable - (u8 *) mgmt; + if (mgmt->u.action.u.self_prot.action_code == + WLAN_SP_MESH_PEERING_CONFIRM) { + baseaddr += 4; + baselen += 4; + } + ieee802_11_parse_elems(baseaddr, len - baselen, true, &elems); + mesh_process_plink_frame(sdata, mgmt, &elems); +} diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c index 0f79b78b5e8..2802f9d9279 100644 --- a/net/mac80211/mesh_ps.c +++ b/net/mac80211/mesh_ps.c @@ -576,10 +576,9 @@ void ieee80211_mps_frame_release(struct sta_info *sta, int ac, buffer_local = 0; bool has_buffered = false; - /* TIM map only for LLID <= IEEE80211_MAX_AID */ if (sta->plink_state == NL80211_PLINK_ESTAB) has_buffered = ieee80211_check_tim(elems->tim, elems->tim_len, - le16_to_cpu(sta->llid) % IEEE80211_MAX_AID); + sta->llid); if (has_buffered) mps_dbg(sta->sdata, "%pM indicates buffered frames\n", diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index 05a256b38e2..2bc5dc25d5a 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -92,12 +92,20 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, if (stype != IEEE80211_STYPE_BEACON) return; - /* The current tsf is a first approximation for the timestamp - * for the received beacon. Further down we try to get a - * better value from the rx_status->mactime field if - * available. Also we have to call drv_get_tsf() before - * entering the rcu-read section.*/ - t_r = drv_get_tsf(local, sdata); + /* + * Get time when timestamp field was received. If we don't + * have rx timestamps, then use current tsf as an approximation. + * drv_get_tsf() must be called before entering the rcu-read + * section. + */ + if (ieee80211_have_rx_timestamp(rx_status)) + t_r = ieee80211_calculate_rx_timestamp(local, rx_status, + 24 + 12 + + elems->total_len + + FCS_LEN, + 24); + else + t_r = drv_get_tsf(local, sdata); rcu_read_lock(); sta = sta_info_get(sdata, mgmt->sa); @@ -117,14 +125,6 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, goto no_sync; } - if (ieee80211_have_rx_timestamp(rx_status)) - /* time when timestamp field was received */ - t_r = ieee80211_calculate_rx_timestamp(local, rx_status, - 24 + 12 + - elems->total_len + - FCS_LEN, - 24); - /* Timing offset calculation (see 13.13.2.2.2) */ t_t = le64_to_cpu(mgmt->u.beacon.timestamp); sta->t_offset = t_t - t_r; @@ -164,12 +164,15 @@ no_sync: rcu_read_unlock(); } -static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata) +static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata, + struct beacon_data *beacon) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + u8 cap; WARN_ON(ifmsh->mesh_sp_id != IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET); BUG_ON(!rcu_read_lock_held()); + cap = beacon->meshconf->meshconf_cap; spin_lock_bh(&ifmsh->sync_offset_lock); @@ -194,6 +197,10 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata) ifmsh->adjusting_tbtt = false; } spin_unlock_bh(&ifmsh->sync_offset_lock); + + beacon->meshconf->meshconf_cap = ifmsh->adjusting_tbtt ? + IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING | cap : + ~IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING & cap; } static const struct sync_method sync_methods[] = { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index b3a3ce31665..fc1d82465b3 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -330,6 +330,16 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, if (WARN_ON_ONCE(!sta)) return -EINVAL; + /* + * if bss configuration changed store the new one - + * this may be applicable even if channel is identical + */ + ht_opmode = le16_to_cpu(ht_oper->operation_mode); + if (sdata->vif.bss_conf.ht_operation_mode != ht_opmode) { + *changed |= BSS_CHANGED_HT; + sdata->vif.bss_conf.ht_operation_mode = ht_opmode; + } + chan = sdata->vif.bss_conf.chandef.chan; sband = local->hw.wiphy->bands[chan->band]; @@ -416,14 +426,6 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, IEEE80211_RC_BW_CHANGED); } - ht_opmode = le16_to_cpu(ht_oper->operation_mode); - - /* if bss configuration changed store the new one */ - if (sdata->vif.bss_conf.ht_operation_mode != ht_opmode) { - *changed |= BSS_CHANGED_HT; - sdata->vif.bss_conf.ht_operation_mode = ht_opmode; - } - return 0; } @@ -714,7 +716,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) } /* if present, add any custom IEs that go before HT */ - if (assoc_data->ie_len && assoc_data->ie) { + if (assoc_data->ie_len) { static const u8 before_ht[] = { WLAN_EID_SSID, WLAN_EID_SUPP_RATES, @@ -748,7 +750,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) &assoc_data->ap_vht_cap); /* if present, add any custom non-vendor IEs that go after HT */ - if (assoc_data->ie_len && assoc_data->ie) { + if (assoc_data->ie_len) { noffset = ieee80211_ie_split_vendor(assoc_data->ie, assoc_data->ie_len, offset); @@ -779,7 +781,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) } /* add any remaining custom (i.e. vendor specific here) IEs */ - if (assoc_data->ie_len && assoc_data->ie) { + if (assoc_data->ie_len) { noffset = assoc_data->ie_len; pos = skb_put(skb, noffset - offset); memcpy(pos, assoc_data->ie + offset, noffset - offset); @@ -886,8 +888,9 @@ static void ieee80211_chswitch_work(struct work_struct *work) if (!ifmgd->associated) goto out; - ret = ieee80211_vif_change_channel(sdata, &local->csa_chandef, - &changed); + mutex_lock(&local->mtx); + ret = ieee80211_vif_change_channel(sdata, &changed); + mutex_unlock(&local->mtx); if (ret) { sdata_info(sdata, "vif channel switch failed, disconnecting\n"); @@ -897,7 +900,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) } if (!local->use_chanctx) { - local->_oper_chandef = local->csa_chandef; + local->_oper_chandef = sdata->csa_chandef; /* Call "hw_config" only if doing sw channel switch. * Otherwise update the channel directly */ @@ -908,7 +911,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) } /* XXX: shouldn't really modify cfg80211-owned data! */ - ifmgd->associated->channel = local->csa_chandef.chan; + ifmgd->associated->channel = sdata->csa_chandef.chan; /* XXX: wait for a beacon first? */ ieee80211_wake_queues_by_reason(&local->hw, @@ -1035,7 +1038,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, } mutex_unlock(&local->chanctx_mtx); - local->csa_chandef = csa_ie.chandef; + sdata->csa_chandef = csa_ie.chandef; if (csa_ie.mode) ieee80211_stop_queues_by_reason(&local->hw, @@ -1398,10 +1401,16 @@ void ieee80211_dfs_cac_timer_work(struct work_struct *work) struct ieee80211_sub_if_data *sdata = container_of(delayed_work, struct ieee80211_sub_if_data, dfs_cac_timer_work); + struct cfg80211_chan_def chandef = sdata->vif.bss_conf.chandef; - ieee80211_vif_release_channel(sdata); - - cfg80211_cac_event(sdata->dev, NL80211_RADAR_CAC_FINISHED, GFP_KERNEL); + mutex_lock(&sdata->local->mtx); + if (sdata->wdev.cac_started) { + ieee80211_vif_release_channel(sdata); + cfg80211_cac_event(sdata->dev, &chandef, + NL80211_RADAR_CAC_FINISHED, + GFP_KERNEL); + } + mutex_unlock(&sdata->local->mtx); } /* MLME */ @@ -1695,7 +1704,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, memset(ifmgd->bssid, 0, ETH_ALEN); /* remove AP and TDLS peers */ - sta_info_flush_defer(sdata); + sta_info_flush(sdata); /* finally reset all BSS / config parameters */ changed |= ieee80211_reset_erp_info(sdata); @@ -1744,7 +1753,11 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ifmgd->have_beacon = false; ifmgd->flags = 0; + mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); + mutex_unlock(&local->mtx); + + sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM; } void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, @@ -2065,7 +2078,9 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, memset(sdata->u.mgd.bssid, 0, ETH_ALEN); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); sdata->u.mgd.flags = 0; + mutex_lock(&sdata->local->mtx); ieee80211_vif_release_channel(sdata); + mutex_unlock(&sdata->local->mtx); } cfg80211_put_bss(sdata->local->hw.wiphy, auth_data->bss); @@ -2314,7 +2329,9 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, memset(sdata->u.mgd.bssid, 0, ETH_ALEN); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); sdata->u.mgd.flags = 0; + mutex_lock(&sdata->local->mtx); ieee80211_vif_release_channel(sdata); + mutex_unlock(&sdata->local->mtx); } kfree(assoc_data); @@ -3665,6 +3682,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, /* will change later if needed */ sdata->smps_mode = IEEE80211_SMPS_OFF; + mutex_lock(&local->mtx); /* * If this fails (possibly due to channel context sharing * on incompatible channels, e.g. 80+80 and 160 sharing the @@ -3676,13 +3694,15 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, /* don't downgrade for 5 and 10 MHz channels, though. */ if (chandef.width == NL80211_CHAN_WIDTH_5 || chandef.width == NL80211_CHAN_WIDTH_10) - return ret; + goto out; while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) { ifmgd->flags |= ieee80211_chandef_downgrade(&chandef); ret = ieee80211_vif_use_channel(sdata, &chandef, IEEE80211_CHANCTX_SHARED); } + out: + mutex_unlock(&local->mtx); return ret; } @@ -4191,6 +4211,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, sdata->control_port_protocol = req->crypto.control_port_ethertype; sdata->control_port_no_encrypt = req->crypto.control_port_no_encrypt; + sdata->encrypt_headroom = ieee80211_cs_headroom(local, &req->crypto, + sdata->vif.type); /* kick off associate process */ diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 34012620434..af64fb8e8ad 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -37,9 +37,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_SUSPEND); - /* flush out all packets and station cleanup call_rcu()s */ + /* flush out all packets */ synchronize_net(); - rcu_barrier(); ieee80211_flush_queues(local, NULL); diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 505bc0dea07..b95e16c0708 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -54,6 +54,8 @@ static inline void rate_control_rate_init(struct sta_info *sta) struct ieee80211_supported_band *sband; struct ieee80211_chanctx_conf *chanctx_conf; + ieee80211_sta_set_rx_nss(sta); + if (!ref) return; @@ -67,8 +69,6 @@ static inline void rate_control_rate_init(struct sta_info *sta) sband = local->hw.wiphy->bands[chanctx_conf->def.chan->band]; - ieee80211_sta_set_rx_nss(sta); - ref->ops->rate_init(ref->priv, sband, &chanctx_conf->def, ista, priv_sta); rcu_read_unlock(); diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 7fa1b36e620..f3d88b0c054 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -135,7 +135,7 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) u32 usecs; int i; - for (i=0; i < MAX_THR_RATES; i++) + for (i = 0; i < MAX_THR_RATES; i++) tmp_tp_rate[i] = 0; for (i = 0; i < mi->n_rates; i++) { @@ -190,7 +190,7 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) * choose the maximum throughput rate as max_prob_rate * (2) if all success probabilities < 95%, the rate with * highest success probability is choosen as max_prob_rate */ - if (mr->probability >= MINSTREL_FRAC(95,100)) { + if (mr->probability >= MINSTREL_FRAC(95, 100)) { if (mr->cur_tp >= mi->r[tmp_prob_rate].cur_tp) tmp_prob_rate = i; } else { @@ -220,7 +220,7 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) static void minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, - struct ieee80211_sta *sta, void *priv_sta, + struct ieee80211_sta *sta, void *priv_sta, struct sk_buff *skb) { struct minstrel_priv *mp = priv; @@ -260,7 +260,7 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, static inline unsigned int minstrel_get_retry_count(struct minstrel_rate *mr, - struct ieee80211_tx_info *info) + struct ieee80211_tx_info *info) { unsigned int retry = mr->adjusted_retry_count; @@ -422,10 +422,9 @@ init_sample_table(struct minstrel_sta_info *mi) memset(mi->sample_table, 0xff, SAMPLE_COLUMNS * mi->n_rates); for (col = 0; col < SAMPLE_COLUMNS; col++) { + prandom_bytes(rnd, sizeof(rnd)); for (i = 0; i < mi->n_rates; i++) { - get_random_bytes(rnd, sizeof(rnd)); new_idx = (i + rnd[i & 7]) % mi->n_rates; - while (SAMPLE_TBL(mi, new_idx, col) != 0xff) new_idx = (new_idx + 1) % mi->n_rates; diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 4096ff6cc24..c1b5b73c5b9 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -63,7 +63,7 @@ #define CCK_DURATION(_bitrate, _short, _len) \ (1000 * (10 /* SIFS */ + \ - (_short ? 72 + 24 : 144 + 48 ) + \ + (_short ? 72 + 24 : 144 + 48) + \ (8 * (_len + 4) * 10) / (_bitrate))) #define CCK_ACK_DURATION(_bitrate, _short) \ @@ -135,7 +135,7 @@ minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi); static int minstrel_ht_get_group_idx(struct ieee80211_tx_rate *rate) { - return GROUP_IDX((rate->idx / MCS_GROUP_RATES) + 1, + return GROUP_IDX((rate->idx / 8) + 1, !!(rate->flags & IEEE80211_TX_RC_SHORT_GI), !!(rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)); } @@ -148,7 +148,7 @@ minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, if (rate->flags & IEEE80211_TX_RC_MCS) { group = minstrel_ht_get_group_idx(rate); - idx = rate->idx % MCS_GROUP_RATES; + idx = rate->idx % 8; } else { group = MINSTREL_CCK_GROUP; @@ -637,8 +637,7 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, idx = mp->cck_rates[index % ARRAY_SIZE(mp->cck_rates)]; flags = 0; } else { - idx = index % MCS_GROUP_RATES + - (group->streams - 1) * MCS_GROUP_RATES; + idx = index % MCS_GROUP_RATES + (group->streams - 1) * 8; flags = IEEE80211_TX_RC_MCS | group->flags; } @@ -702,12 +701,16 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) if (!mi->sample_tries) return -1; - mg = &mi->groups[mi->sample_group]; + sample_group = mi->sample_group; + mg = &mi->groups[sample_group]; sample_idx = sample_table[mg->column][mg->index]; + minstrel_next_sample_idx(mi); + + if (!(mg->supported & BIT(sample_idx))) + return -1; + mr = &mg->rates[sample_idx]; - sample_group = mi->sample_group; sample_idx += sample_group * MCS_GROUP_RATES; - minstrel_next_sample_idx(mi); /* * Sampling might add some overhead (RTS, no aggregation) @@ -818,7 +821,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, } rate->idx = sample_idx % MCS_GROUP_RATES + - (sample_group->streams - 1) * MCS_GROUP_RATES; + (sample_group->streams - 1) * 8; rate->flags = IEEE80211_TX_RC_MCS | sample_group->flags; } @@ -1053,10 +1056,9 @@ init_sample_table(void) memset(sample_table, 0xff, sizeof(sample_table)); for (col = 0; col < SAMPLE_COLUMNS; col++) { + prandom_bytes(rnd, sizeof(rnd)); for (i = 0; i < MCS_GROUP_RATES; i++) { - get_random_bytes(rnd, sizeof(rnd)); new_idx = (i + rnd[i]) % MCS_GROUP_RATES; - while (sample_table[col][new_idx] != 0xff) new_idx = (new_idx + 1) % MCS_GROUP_RATES; diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c index df44a5ad827..3e7d793de0c 100644 --- a/net/mac80211/rc80211_minstrel_ht_debugfs.c +++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c @@ -54,8 +54,7 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p) int r = bitrates[j % 4]; p += sprintf(p, " %2u.%1uM", r / 10, r % 10); } else { - p += sprintf(p, " MCS%-2u", (mg->streams - 1) * - MCS_GROUP_RATES + j); + p += sprintf(p, " MCS%-2u", (mg->streams - 1) * 8 + j); } tp = mr->cur_tp / 10; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 2b0debb0422..c24ca0d0f46 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -638,6 +638,27 @@ static int ieee80211_get_mmie_keyidx(struct sk_buff *skb) return le16_to_cpu(mmie->key_id); } +static int iwl80211_get_cs_keyid(const struct ieee80211_cipher_scheme *cs, + struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + __le16 fc; + int hdrlen; + u8 keyid; + + fc = hdr->frame_control; + hdrlen = ieee80211_hdrlen(fc); + + if (skb->len < hdrlen + cs->hdr_len) + return -EINVAL; + + skb_copy_bits(skb, hdrlen + cs->key_idx_off, &keyid, 1); + keyid &= cs->key_idx_mask; + keyid >>= cs->key_idx_shift; + + return keyid; +} + static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; @@ -729,9 +750,7 @@ static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata lockdep_assert_held(&tid_agg_rx->reorder_lock); while (ieee80211_sn_less(tid_agg_rx->head_seq_num, head_seq_num)) { - index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, - tid_agg_rx->ssn) % - tid_agg_rx->buf_size; + index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size; ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, frames); } @@ -757,8 +776,7 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, lockdep_assert_held(&tid_agg_rx->reorder_lock); /* release the buffer until next missing frame */ - index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, - tid_agg_rx->ssn) % tid_agg_rx->buf_size; + index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size; if (!tid_agg_rx->reorder_buf[index] && tid_agg_rx->stored_mpdu_num) { /* @@ -793,15 +811,11 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, } else while (tid_agg_rx->reorder_buf[index]) { ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, frames); - index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, - tid_agg_rx->ssn) % - tid_agg_rx->buf_size; + index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size; } if (tid_agg_rx->stored_mpdu_num) { - j = index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, - tid_agg_rx->ssn) % - tid_agg_rx->buf_size; + j = index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size; for (; j != (index - 1) % tid_agg_rx->buf_size; j = (j + 1) % tid_agg_rx->buf_size) { @@ -861,8 +875,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata /* Now the new frame is always in the range of the reordering buffer */ - index = ieee80211_sn_sub(mpdu_seq_num, - tid_agg_rx->ssn) % tid_agg_rx->buf_size; + index = mpdu_seq_num % tid_agg_rx->buf_size; /* check if we already stored this frame */ if (tid_agg_rx->reorder_buf[index]) { @@ -1369,6 +1382,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) struct ieee80211_key *sta_ptk = NULL; int mmie_keyidx = -1; __le16 fc; + const struct ieee80211_cipher_scheme *cs = NULL; /* * Key selection 101 @@ -1406,11 +1420,19 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) /* start without a key */ rx->key = NULL; + fc = hdr->frame_control; - if (rx->sta) - sta_ptk = rcu_dereference(rx->sta->ptk); + if (rx->sta) { + int keyid = rx->sta->ptk_idx; - fc = hdr->frame_control; + if (ieee80211_has_protected(fc) && rx->sta->cipher_scheme) { + cs = rx->sta->cipher_scheme; + keyid = iwl80211_get_cs_keyid(cs, rx->skb); + if (unlikely(keyid < 0)) + return RX_DROP_UNUSABLE; + } + sta_ptk = rcu_dereference(rx->sta->ptk[keyid]); + } if (!ieee80211_has_protected(fc)) mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb); @@ -1472,6 +1494,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) return RX_CONTINUE; } else { u8 keyid; + /* * The device doesn't give us the IV so we won't be * able to look up the key. That's ok though, we @@ -1487,15 +1510,21 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) hdrlen = ieee80211_hdrlen(fc); - if (rx->skb->len < 8 + hdrlen) - return RX_DROP_UNUSABLE; /* TODO: count this? */ + if (cs) { + keyidx = iwl80211_get_cs_keyid(cs, rx->skb); - /* - * no need to call ieee80211_wep_get_keyidx, - * it verifies a bunch of things we've done already - */ - skb_copy_bits(rx->skb, hdrlen + 3, &keyid, 1); - keyidx = keyid >> 6; + if (unlikely(keyidx < 0)) + return RX_DROP_UNUSABLE; + } else { + if (rx->skb->len < 8 + hdrlen) + return RX_DROP_UNUSABLE; /* TODO: count this? */ + /* + * no need to call ieee80211_wep_get_keyidx, + * it verifies a bunch of things we've done already + */ + skb_copy_bits(rx->skb, hdrlen + 3, &keyid, 1); + keyidx = keyid >> 6; + } /* check per-station GTK first, if multicast packet */ if (is_multicast_ether_addr(hdr->addr1) && rx->sta) @@ -1543,11 +1572,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) result = ieee80211_crypto_aes_cmac_decrypt(rx); break; default: - /* - * We can reach here only with HW-only algorithms - * but why didn't it decrypt the frame?! - */ - return RX_DROP_UNUSABLE; + result = ieee80211_crypto_hw_decrypt(rx); } /* the hdr variable is invalid after the decrypt handlers */ @@ -1938,20 +1963,17 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) } } - if (skb) { - int align __maybe_unused; - #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - /* - * 'align' will only take the values 0 or 2 here - * since all frames are required to be aligned - * to 2-byte boundaries when being passed to - * mac80211; the code here works just as well if - * that isn't true, but mac80211 assumes it can - * access fields as 2-byte aligned (e.g. for - * compare_ether_addr) + if (skb) { + /* 'align' will only take the values 0 or 2 here since all + * frames are required to be aligned to 2-byte boundaries + * when being passed to mac80211; the code here works just + * as well if that isn't true, but mac80211 assumes it can + * access fields as 2-byte aligned (e.g. for ether_addr_equal) */ - align = ((unsigned long)(skb->data + sizeof(struct ethhdr))) & 3; + int align; + + align = (unsigned long)(skb->data + sizeof(struct ethhdr)) & 3; if (align) { if (WARN_ON(skb_headroom(skb) < 3)) { dev_kfree_skb(skb); @@ -1964,14 +1986,14 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) skb_set_tail_pointer(skb, len); } } + } #endif - if (skb) { - /* deliver to local stack */ - skb->protocol = eth_type_trans(skb, dev); - memset(skb->cb, 0, sizeof(skb->cb)); - netif_receive_skb(skb); - } + if (skb) { + /* deliver to local stack */ + skb->protocol = eth_type_trans(skb, dev); + memset(skb->cb, 0, sizeof(skb->cb)); + netif_receive_skb(skb); } if (xmit_skb) { @@ -2057,7 +2079,6 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - __le16 reason = cpu_to_le16(WLAN_REASON_MESH_PATH_NOFORWARD); u16 q, hdrlen; hdr = (struct ieee80211_hdr *) skb->data; @@ -2165,7 +2186,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) } else { /* unable to resolve next hop */ mesh_path_error_tx(sdata, ifmsh->mshcfg.element_ttl, - fwd_hdr->addr3, 0, reason, fwd_hdr->addr2); + fwd_hdr->addr3, 0, + WLAN_REASON_MESH_PATH_NOFORWARD, + fwd_hdr->addr2); IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_no_route); kfree_skb(fwd_skb); return RX_DROP_MONITOR; @@ -3053,8 +3076,8 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) /* main receive path */ -static int prepare_for_handlers(struct ieee80211_rx_data *rx, - struct ieee80211_hdr *hdr) +static bool prepare_for_handlers(struct ieee80211_rx_data *rx, + struct ieee80211_hdr *hdr) { struct ieee80211_sub_if_data *sdata = rx->sdata; struct sk_buff *skb = rx->skb; @@ -3065,29 +3088,29 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: if (!bssid && !sdata->u.mgd.use_4addr) - return 0; + return false; if (!multicast && !ether_addr_equal(sdata->vif.addr, hdr->addr1)) { if (!(sdata->dev->flags & IFF_PROMISC) || sdata->u.mgd.use_4addr) - return 0; + return false; status->rx_flags &= ~IEEE80211_RX_RA_MATCH; } break; case NL80211_IFTYPE_ADHOC: if (!bssid) - return 0; + return false; if (ether_addr_equal(sdata->vif.addr, hdr->addr2) || ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2)) - return 0; + return false; if (ieee80211_is_beacon(hdr->frame_control)) { - return 1; + return true; } else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) { - return 0; + return false; } else if (!multicast && !ether_addr_equal(sdata->vif.addr, hdr->addr1)) { if (!(sdata->dev->flags & IFF_PROMISC)) - return 0; + return false; status->rx_flags &= ~IEEE80211_RX_RA_MATCH; } else if (!rx->sta) { int rate_idx; @@ -3103,7 +3126,7 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, if (!multicast && !ether_addr_equal(sdata->vif.addr, hdr->addr1)) { if (!(sdata->dev->flags & IFF_PROMISC)) - return 0; + return false; status->rx_flags &= ~IEEE80211_RX_RA_MATCH; } @@ -3112,7 +3135,7 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, case NL80211_IFTYPE_AP: if (!bssid) { if (!ether_addr_equal(sdata->vif.addr, hdr->addr1)) - return 0; + return false; } else if (!ieee80211_bssid_match(bssid, sdata->vif.addr)) { /* * Accept public action frames even when the @@ -3122,26 +3145,26 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, */ if (!multicast && !ether_addr_equal(sdata->vif.addr, hdr->addr1)) - return 0; + return false; if (ieee80211_is_public_action(hdr, skb->len)) - return 1; + return true; if (!ieee80211_is_beacon(hdr->frame_control)) - return 0; + return false; status->rx_flags &= ~IEEE80211_RX_RA_MATCH; } break; case NL80211_IFTYPE_WDS: if (bssid || !ieee80211_is_data(hdr->frame_control)) - return 0; + return false; if (!ether_addr_equal(sdata->u.wds.remote_addr, hdr->addr2)) - return 0; + return false; break; case NL80211_IFTYPE_P2P_DEVICE: if (!ieee80211_is_public_action(hdr, skb->len) && !ieee80211_is_probe_req(hdr->frame_control) && !ieee80211_is_probe_resp(hdr->frame_control) && !ieee80211_is_beacon(hdr->frame_control)) - return 0; + return false; if (!ether_addr_equal(sdata->vif.addr, hdr->addr1) && !multicast) status->rx_flags &= ~IEEE80211_RX_RA_MATCH; @@ -3152,7 +3175,7 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, break; } - return 1; + return true; } /* @@ -3168,13 +3191,11 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr = (void *)skb->data; - int prepares; rx->skb = skb; status->rx_flags |= IEEE80211_RX_RA_MATCH; - prepares = prepare_for_handlers(rx, hdr); - if (!prepares) + if (!prepare_for_handlers(rx, hdr)) return false; if (!consume) { diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index bcc4833d754..88c81616f8f 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -271,10 +271,11 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) return true; } -static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, - bool was_hw_scan) +static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) { struct ieee80211_local *local = hw_to_local(hw); + bool hw_scan = local->ops->hw_scan; + bool was_scanning = local->scanning; lockdep_assert_held(&local->mtx); @@ -290,7 +291,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, if (WARN_ON(!local->scan_req)) return; - if (was_hw_scan && !aborted && ieee80211_prep_hw_scan(local)) { + if (hw_scan && !aborted && ieee80211_prep_hw_scan(local)) { int rc; rc = drv_hw_scan(local, @@ -316,7 +317,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, /* Set power back to normal operating levels. */ ieee80211_hw_config(local, 0); - if (!was_hw_scan) { + if (!hw_scan) { ieee80211_configure_filter(local); drv_sw_scan_complete(local); ieee80211_offchannel_return(local); @@ -327,7 +328,8 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, ieee80211_mlme_notify_scan_completed(local); ieee80211_ibss_notify_scan_completed(local); ieee80211_mesh_notify_scan_completed(local); - ieee80211_start_next_roc(local); + if (was_scanning) + ieee80211_start_next_roc(local); } void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) @@ -526,7 +528,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, ieee80211_hw_config(local, 0); if ((req->channels[0]->flags & - IEEE80211_CHAN_PASSIVE_SCAN) || + IEEE80211_CHAN_NO_IR) || !local->scan_req->n_ssids) { next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; } else { @@ -572,7 +574,7 @@ ieee80211_scan_get_channel_time(struct ieee80211_channel *chan) * TODO: channel switching also consumes quite some time, * add that delay as well to get a better estimation */ - if (chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) + if (chan->flags & IEEE80211_CHAN_NO_IR) return IEEE80211_PASSIVE_CHANNEL_TIME; return IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME; } @@ -696,7 +698,7 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local, * * In any case, it is not necessary for a passive scan. */ - if (chan->flags & IEEE80211_CHAN_PASSIVE_SCAN || + if (chan->flags & IEEE80211_CHAN_NO_IR || !local->scan_req->n_ssids) { *next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; local->next_scan_state = SCAN_DECISION; @@ -747,7 +749,7 @@ void ieee80211_scan_work(struct work_struct *work) container_of(work, struct ieee80211_local, scan_work.work); struct ieee80211_sub_if_data *sdata; unsigned long next_delay = 0; - bool aborted, hw_scan; + bool aborted; mutex_lock(&local->mtx); @@ -786,14 +788,6 @@ void ieee80211_scan_work(struct work_struct *work) } /* - * Avoid re-scheduling when the sdata is going away. - */ - if (!ieee80211_sdata_running(sdata)) { - aborted = true; - goto out_complete; - } - - /* * as long as no delay is required advance immediately * without scheduling a new work */ @@ -834,8 +828,7 @@ void ieee80211_scan_work(struct work_struct *work) goto out; out_complete: - hw_scan = test_bit(SCAN_HW_SCANNING, &local->scanning); - __ieee80211_scan_completed(&local->hw, aborted, hw_scan); + __ieee80211_scan_completed(&local->hw, aborted); out: mutex_unlock(&local->mtx); } @@ -881,7 +874,7 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *tmp_ch = &local->hw.wiphy->bands[band]->channels[i]; - if (tmp_ch->flags & (IEEE80211_CHAN_NO_IBSS | + if (tmp_ch->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_DISABLED)) continue; @@ -895,7 +888,7 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, local->int_scan_req->n_channels = n_ch; } else { - if (WARN_ON_ONCE(chan->flags & (IEEE80211_CHAN_NO_IBSS | + if (WARN_ON_ONCE(chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_DISABLED))) goto unlock; @@ -973,13 +966,13 @@ void ieee80211_scan_cancel(struct ieee80211_local *local) */ cancel_delayed_work(&local->scan_work); /* and clean up */ - __ieee80211_scan_completed(&local->hw, true, false); + __ieee80211_scan_completed(&local->hw, true); out: mutex_unlock(&local->mtx); } -int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, - struct cfg80211_sched_scan_request *req) +int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, + struct cfg80211_sched_scan_request *req) { struct ieee80211_local *local = sdata->local; struct ieee80211_sched_scan_ies sched_scan_ies = {}; @@ -989,17 +982,10 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, iebufsz = 2 + IEEE80211_MAX_SSID_LEN + local->scan_ies_len + req->ie_len; - mutex_lock(&local->mtx); - - if (rcu_access_pointer(local->sched_scan_sdata)) { - ret = -EBUSY; - goto out; - } + lockdep_assert_held(&local->mtx); - if (!local->ops->sched_scan_start) { - ret = -ENOTSUPP; - goto out; - } + if (!local->ops->sched_scan_start) + return -ENOTSUPP; for (i = 0; i < IEEE80211_NUM_BANDS; i++) { if (!local->hw.wiphy->bands[i]) @@ -1020,13 +1006,39 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, } ret = drv_sched_scan_start(local, sdata, req, &sched_scan_ies); - if (ret == 0) + if (ret == 0) { rcu_assign_pointer(local->sched_scan_sdata, sdata); + local->sched_scan_req = req; + } out_free: while (i > 0) kfree(sched_scan_ies.ie[--i]); -out: + + if (ret) { + /* Clean in case of failure after HW restart or upon resume. */ + rcu_assign_pointer(local->sched_scan_sdata, NULL); + local->sched_scan_req = NULL; + } + + return ret; +} + +int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, + struct cfg80211_sched_scan_request *req) +{ + struct ieee80211_local *local = sdata->local; + int ret; + + mutex_lock(&local->mtx); + + if (rcu_access_pointer(local->sched_scan_sdata)) { + mutex_unlock(&local->mtx); + return -EBUSY; + } + + ret = __ieee80211_request_sched_scan_start(sdata, req); + mutex_unlock(&local->mtx); return ret; } @@ -1043,6 +1055,9 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata) goto out; } + /* We don't want to restart sched scan anymore. */ + local->sched_scan_req = NULL; + if (rcu_access_pointer(local->sched_scan_sdata)) drv_sched_scan_stop(local, sdata); @@ -1077,6 +1092,9 @@ void ieee80211_sched_scan_stopped_work(struct work_struct *work) rcu_assign_pointer(local->sched_scan_sdata, NULL); + /* If sched scan was aborted by the driver. */ + local->sched_scan_req = NULL; + mutex_unlock(&local->mtx); cfg80211_sched_scan_stopped(local->hw.wiphy); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 1eb66e26e49..decd30c1e29 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -99,23 +99,6 @@ static void cleanup_single_sta(struct sta_info *sta) struct ieee80211_local *local = sdata->local; struct ps_data *ps; - /* - * At this point, when being called as call_rcu callback, - * neither mac80211 nor the driver can reference this - * sta struct any more except by still existing timers - * associated with this station that we clean up below. - * - * Note though that this still uses the sdata and even - * calls the driver in AP and mesh mode, so interfaces - * of those types mush use call sta_info_flush_cleanup() - * (typically via sta_info_flush()) before deconfiguring - * the driver. - * - * In station mode, nothing happens here so it doesn't - * have to (and doesn't) do that, this is intentional to - * speed up roaming. - */ - if (test_sta_flag(sta, WLAN_STA_PS_STA)) { if (sta->sdata->vif.type == NL80211_IFTYPE_AP || sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) @@ -160,37 +143,6 @@ static void cleanup_single_sta(struct sta_info *sta) sta_info_free(local, sta); } -void ieee80211_cleanup_sdata_stas(struct ieee80211_sub_if_data *sdata) -{ - struct sta_info *sta; - - spin_lock_bh(&sdata->cleanup_stations_lock); - while (!list_empty(&sdata->cleanup_stations)) { - sta = list_first_entry(&sdata->cleanup_stations, - struct sta_info, list); - list_del(&sta->list); - spin_unlock_bh(&sdata->cleanup_stations_lock); - - cleanup_single_sta(sta); - - spin_lock_bh(&sdata->cleanup_stations_lock); - } - - spin_unlock_bh(&sdata->cleanup_stations_lock); -} - -static void free_sta_rcu(struct rcu_head *h) -{ - struct sta_info *sta = container_of(h, struct sta_info, rcu_head); - struct ieee80211_sub_if_data *sdata = sta->sdata; - - spin_lock(&sdata->cleanup_stations_lock); - list_add_tail(&sta->list, &sdata->cleanup_stations); - spin_unlock(&sdata->cleanup_stations_lock); - - ieee80211_queue_work(&sdata->local->hw, &sdata->cleanup_stations_wk); -} - /* protected by RCU */ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, const u8 *addr) @@ -266,9 +218,17 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata, */ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) { + int i; + if (sta->rate_ctrl) rate_control_free_sta(sta); + if (sta->tx_lat) { + for (i = 0; i < IEEE80211_NUM_TIDS; i++) + kfree(sta->tx_lat[i].bins); + kfree(sta->tx_lat); + } + sta_dbg(sta->sdata, "Destroyed STA %pM\n", sta->sta.addr); kfree(sta); @@ -333,12 +293,42 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct timespec uptime; + struct ieee80211_tx_latency_bin_ranges *tx_latency; int i; sta = kzalloc(sizeof(*sta) + local->hw.sta_data_size, gfp); if (!sta) return NULL; + rcu_read_lock(); + tx_latency = rcu_dereference(local->tx_latency); + /* init stations Tx latency statistics && TID bins */ + if (tx_latency) { + sta->tx_lat = kzalloc(IEEE80211_NUM_TIDS * + sizeof(struct ieee80211_tx_latency_stat), + GFP_ATOMIC); + if (!sta->tx_lat) { + rcu_read_unlock(); + goto free; + } + + if (tx_latency->n_ranges) { + for (i = 0; i < IEEE80211_NUM_TIDS; i++) { + /* size of bins is size of the ranges +1 */ + sta->tx_lat[i].bin_count = + tx_latency->n_ranges + 1; + sta->tx_lat[i].bins = + kcalloc(sta->tx_lat[i].bin_count, + sizeof(u32), GFP_ATOMIC); + if (!sta->tx_lat[i].bins) { + rcu_read_unlock(); + goto free; + } + } + } + } + rcu_read_unlock(); + spin_lock_init(&sta->lock); INIT_WORK(&sta->drv_unblock_wk, sta_unblock); INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); @@ -363,10 +353,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, for (i = 0; i < ARRAY_SIZE(sta->chain_signal_avg); i++) ewma_init(&sta->chain_signal_avg[i], 1024, 8); - if (sta_prepare_rate_control(local, sta, gfp)) { - kfree(sta); - return NULL; - } + if (sta_prepare_rate_control(local, sta, gfp)) + goto free; for (i = 0; i < IEEE80211_NUM_TIDS; i++) { /* @@ -411,8 +399,16 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, } sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); - return sta; + +free: + if (sta->tx_lat) { + for (i = 0; i < IEEE80211_NUM_TIDS; i++) + kfree(sta->tx_lat[i].bins); + kfree(sta->tx_lat); + } + kfree(sta); + return NULL; } static int sta_info_insert_check(struct sta_info *sta) @@ -507,6 +503,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) set_sta_flag(sta, WLAN_STA_INSERTED); + ieee80211_recalc_min_chandef(sdata); ieee80211_sta_debugfs_add(sta); rate_control_add_sta_debugfs(sta); @@ -630,8 +627,8 @@ void sta_info_recalc_tim(struct sta_info *sta) #ifdef CONFIG_MAC80211_MESH } else if (ieee80211_vif_is_mesh(&sta->sdata->vif)) { ps = &sta->sdata->u.mesh.ps; - /* TIM map only for PLID <= IEEE80211_MAX_AID */ - id = le16_to_cpu(sta->plid) % IEEE80211_MAX_AID; + /* TIM map only for 1 <= PLID <= IEEE80211_MAX_AID */ + id = sta->plid % (IEEE80211_MAX_AID + 1); #endif } else { return; @@ -807,7 +804,7 @@ static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local, return have_buffered; } -int __must_check __sta_info_destroy(struct sta_info *sta) +static int __must_check __sta_info_destroy_part1(struct sta_info *sta) { struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; @@ -833,12 +830,35 @@ int __must_check __sta_info_destroy(struct sta_info *sta) ieee80211_sta_tear_down_BA_sessions(sta, AGG_STOP_DESTROY_STA); ret = sta_info_hash_del(local, sta); - if (ret) + if (WARN_ON(ret)) return ret; list_del_rcu(&sta->list); - /* this always calls synchronize_net() */ + drv_sta_pre_rcu_remove(local, sta->sdata, sta); + + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && + rcu_access_pointer(sdata->u.vlan.sta) == sta) + RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); + + return 0; +} + +static void __sta_info_destroy_part2(struct sta_info *sta) +{ + struct ieee80211_local *local = sta->local; + struct ieee80211_sub_if_data *sdata = sta->sdata; + int ret; + + /* + * NOTE: This assumes at least synchronize_net() was done + * after _part1 and before _part2! + */ + + might_sleep(); + lockdep_assert_held(&local->sta_mtx); + + /* now keys can no longer be reached */ ieee80211_free_sta_keys(local, sta); sta->dead = true; @@ -846,9 +866,6 @@ int __must_check __sta_info_destroy(struct sta_info *sta) local->num_sta--; local->sta_generation++; - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); - while (sta->sta_state > IEEE80211_STA_NONE) { ret = sta_info_move_state(sta, sta->sta_state - 1); if (ret) { @@ -869,8 +886,21 @@ int __must_check __sta_info_destroy(struct sta_info *sta) rate_control_remove_sta_debugfs(sta); ieee80211_sta_debugfs_remove(sta); + ieee80211_recalc_min_chandef(sdata); + + cleanup_single_sta(sta); +} - call_rcu(&sta->rcu_head, free_sta_rcu); +int __must_check __sta_info_destroy(struct sta_info *sta) +{ + int err = __sta_info_destroy_part1(sta); + + if (err) + return err; + + synchronize_net(); + + __sta_info_destroy_part2(sta); return 0; } @@ -940,32 +970,38 @@ void sta_info_stop(struct ieee80211_local *local) } -int sta_info_flush_defer(struct ieee80211_sub_if_data *sdata) +int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans) { struct ieee80211_local *local = sdata->local; struct sta_info *sta, *tmp; + LIST_HEAD(free_list); int ret = 0; might_sleep(); + WARN_ON(vlans && sdata->vif.type != NL80211_IFTYPE_AP); + WARN_ON(vlans && !sdata->bss); + mutex_lock(&local->sta_mtx); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { - if (sdata == sta->sdata) { - WARN_ON(__sta_info_destroy(sta)); + if (sdata == sta->sdata || + (vlans && sdata->bss == sta->sdata->bss)) { + if (!WARN_ON(__sta_info_destroy_part1(sta))) + list_add(&sta->free_list, &free_list); ret++; } } + + if (!list_empty(&free_list)) { + synchronize_net(); + list_for_each_entry_safe(sta, tmp, &free_list, free_list) + __sta_info_destroy_part2(sta); + } mutex_unlock(&local->sta_mtx); return ret; } -void sta_info_flush_cleanup(struct ieee80211_sub_if_data *sdata) -{ - ieee80211_cleanup_sdata_stas(sdata); - cancel_work_sync(&sdata->cleanup_stations_wk); -} - void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, unsigned long exp_time) { @@ -1117,7 +1153,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, int tid, - enum ieee80211_frame_release_type reason) + enum ieee80211_frame_release_type reason, + bool call_driver) { struct ieee80211_local *local = sdata->local; struct ieee80211_qos_hdr *nullfunc; @@ -1175,7 +1212,9 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, IEEE80211_TX_STATUS_EOSP | IEEE80211_TX_CTL_REQ_TX_STATUS; - drv_allow_buffered_frames(local, sta, BIT(tid), 1, reason, false); + if (call_driver) + drv_allow_buffered_frames(local, sta, BIT(tid), 1, + reason, false); skb->dev = sdata->dev; @@ -1191,6 +1230,17 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); } +static int find_highest_prio_tid(unsigned long tids) +{ + /* lower 3 TIDs aren't ordered perfectly */ + if (tids & 0xF8) + return fls(tids) - 1; + /* TID 0 is BE just like TID 3 */ + if (tids & BIT(0)) + return 0; + return fls(tids) - 1; +} + static void ieee80211_sta_ps_deliver_response(struct sta_info *sta, int n_frames, u8 ignored_acs, @@ -1198,7 +1248,6 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; - bool found = false; bool more_data = false; int ac; unsigned long driver_release_tids = 0; @@ -1209,9 +1258,7 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, __skb_queue_head_init(&frames); - /* - * Get response frame(s) and more data bit for it. - */ + /* Get response frame(s) and more data bit for the last one. */ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { unsigned long tids; @@ -1220,43 +1267,48 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, tids = ieee80211_tids_for_ac(ac); - if (!found) { - driver_release_tids = sta->driver_buffered_tids & tids; - if (driver_release_tids) { - found = true; - } else { - struct sk_buff *skb; - - while (n_frames > 0) { - skb = skb_dequeue(&sta->tx_filtered[ac]); - if (!skb) { - skb = skb_dequeue( - &sta->ps_tx_buf[ac]); - if (skb) - local->total_ps_buffered--; - } - if (!skb) - break; - n_frames--; - found = true; - __skb_queue_tail(&frames, skb); - } - } + /* if we already have frames from software, then we can't also + * release from hardware queues + */ + if (skb_queue_empty(&frames)) + driver_release_tids |= sta->driver_buffered_tids & tids; - /* - * If the driver has data on more than one TID then + if (driver_release_tids) { + /* If the driver has data on more than one TID then * certainly there's more data if we release just a - * single frame now (from a single TID). + * single frame now (from a single TID). This will + * only happen for PS-Poll. */ if (reason == IEEE80211_FRAME_RELEASE_PSPOLL && hweight16(driver_release_tids) > 1) { more_data = true; driver_release_tids = - BIT(ffs(driver_release_tids) - 1); + BIT(find_highest_prio_tid( + driver_release_tids)); break; } + } else { + struct sk_buff *skb; + + while (n_frames > 0) { + skb = skb_dequeue(&sta->tx_filtered[ac]); + if (!skb) { + skb = skb_dequeue( + &sta->ps_tx_buf[ac]); + if (skb) + local->total_ps_buffered--; + } + if (!skb) + break; + n_frames--; + __skb_queue_tail(&frames, skb); + } } + /* If we have more frames buffered on this AC, then set the + * more-data bit and abort the loop since we can't send more + * data from other ACs before the buffered frames from this. + */ if (!skb_queue_empty(&sta->tx_filtered[ac]) || !skb_queue_empty(&sta->ps_tx_buf[ac])) { more_data = true; @@ -1264,7 +1316,7 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, } } - if (!found) { + if (skb_queue_empty(&frames) && !driver_release_tids) { int tid; /* @@ -1285,15 +1337,13 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, /* This will evaluate to 1, 3, 5 or 7. */ tid = 7 - ((ffs(~ignored_acs) - 1) << 1); - ieee80211_send_null_response(sdata, sta, tid, reason); - return; - } - - if (!driver_release_tids) { + ieee80211_send_null_response(sdata, sta, tid, reason, true); + } else if (!driver_release_tids) { struct sk_buff_head pending; struct sk_buff *skb; int num = 0; u16 tids = 0; + bool need_null = false; skb_queue_head_init(&pending); @@ -1327,22 +1377,57 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, ieee80211_is_qos_nullfunc(hdr->frame_control)) qoshdr = ieee80211_get_qos_ctl(hdr); - /* end service period after last frame */ - if (skb_queue_empty(&frames)) { - if (reason == IEEE80211_FRAME_RELEASE_UAPSD && - qoshdr) - *qoshdr |= IEEE80211_QOS_CTL_EOSP; + tids |= BIT(skb->priority); + + __skb_queue_tail(&pending, skb); + + /* end service period after last frame or add one */ + if (!skb_queue_empty(&frames)) + continue; + if (reason != IEEE80211_FRAME_RELEASE_UAPSD) { + /* for PS-Poll, there's only one frame */ info->flags |= IEEE80211_TX_STATUS_EOSP | IEEE80211_TX_CTL_REQ_TX_STATUS; + break; } - if (qoshdr) - tids |= BIT(*qoshdr & IEEE80211_QOS_CTL_TID_MASK); - else - tids |= BIT(0); + /* For uAPSD, things are a bit more complicated. If the + * last frame has a QoS header (i.e. is a QoS-data or + * QoS-nulldata frame) then just set the EOSP bit there + * and be done. + * If the frame doesn't have a QoS header (which means + * it should be a bufferable MMPDU) then we can't set + * the EOSP bit in the QoS header; add a QoS-nulldata + * frame to the list to send it after the MMPDU. + * + * Note that this code is only in the mac80211-release + * code path, we assume that the driver will not buffer + * anything but QoS-data frames, or if it does, will + * create the QoS-nulldata frame by itself if needed. + * + * Cf. 802.11-2012 10.2.1.10 (c). + */ + if (qoshdr) { + *qoshdr |= IEEE80211_QOS_CTL_EOSP; - __skb_queue_tail(&pending, skb); + info->flags |= IEEE80211_TX_STATUS_EOSP | + IEEE80211_TX_CTL_REQ_TX_STATUS; + } else { + /* The standard isn't completely clear on this + * as it says the more-data bit should be set + * if there are more BUs. The QoS-Null frame + * we're about to send isn't buffered yet, we + * only create it below, but let's pretend it + * was buffered just in case some clients only + * expect more-data=0 when eosp=1. + */ + hdr->frame_control |= + cpu_to_le16(IEEE80211_FCTL_MOREDATA); + need_null = true; + num++; + } + break; } drv_allow_buffered_frames(local, sta, tids, num, @@ -1350,17 +1435,22 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, ieee80211_add_pending_skbs(local, &pending); + if (need_null) + ieee80211_send_null_response( + sdata, sta, find_highest_prio_tid(tids), + reason, false); + sta_info_recalc_tim(sta); } else { /* * We need to release a frame that is buffered somewhere in the * driver ... it'll have to handle that. - * Note that, as per the comment above, it'll also have to see - * if there is more than just one frame on the specific TID that - * we're releasing from, and it needs to set the more-data bit - * accordingly if we tell it that there's no more data. If we do - * tell it there's more data, then of course the more-data bit - * needs to be set anyway. + * Note that the driver also has to check the number of frames + * on the TIDs we're releasing from - if there are more than + * n_frames it has to set the more-data bit (if we didn't ask + * it to set it anyway due to other buffered frames); if there + * are fewer than n_frames it has to make sure to adjust that + * to allow the service period to end properly. */ drv_release_buffered_frames(local, sta, driver_release_tids, n_frames, reason, more_data); @@ -1368,9 +1458,9 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, /* * Note that we don't recalculate the TIM bit here as it would * most likely have no effect at all unless the driver told us - * that the TID became empty before returning here from the + * that the TID(s) became empty before returning here from the * release function. - * Either way, however, when the driver tells us that the TID + * Either way, however, when the driver tells us that the TID(s) * became empty we'll do the TIM recalculation. */ } @@ -1459,6 +1549,8 @@ void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta, if (WARN_ON(tid >= IEEE80211_NUM_TIDS)) return; + trace_api_sta_set_buffered(sta->local, pubsta, tid, buffered); + if (buffered) set_bit(tid, &sta->driver_buffered_tids); else diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 3ef06a26b9c..d77ff709063 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -220,6 +220,25 @@ struct sta_ampdu_mlme { u8 dialog_token_allocator; }; +/* + * struct ieee80211_tx_latency_stat - Tx latency statistics + * + * Measures TX latency and jitter for a station per TID. + * + * @max: worst case latency + * @sum: sum of all latencies + * @counter: amount of Tx frames sent from interface + * @bins: each bin counts how many frames transmitted within a certain + * latency range. when disabled it is NULL. + * @bin_count: amount of bins. + */ +struct ieee80211_tx_latency_stat { + u32 max; + u32 sum; + u32 counter; + u32 *bins; + u32 bin_count; +}; /** * struct sta_info - STA information @@ -228,11 +247,14 @@ struct sta_ampdu_mlme { * mac80211 is communicating with. * * @list: global linked list entry + * @free_list: list entry for keeping track of stations to free * @hnext: hash table linked list pointer * @local: pointer to the global information * @sdata: virtual interface this station belongs to - * @ptk: peer key negotiated with this station, if any + * @ptk: peer keys negotiated with this station, if any + * @ptk_idx: last installed peer key index * @gtk: group keys negotiated with this station, if any + * @gtk_idx: last installed group key index * @rate_ctrl: rate control algorithm reference * @rate_ctrl_priv: rate control private per-STA pointer * @last_tx_rate: rate used for last transmit, to report to userspace as @@ -274,6 +296,7 @@ struct sta_ampdu_mlme { * @tid_seq: per-TID sequence numbers for sending to this STA * @ampdu_mlme: A-MPDU state machine state * @timer_to_tid: identity mapping to ID timers + * @tx_lat: Tx latency statistics * @llid: Local link ID * @plid: Peer link ID * @reason: Cancel reason on PLINK_HOLDING state @@ -303,16 +326,19 @@ struct sta_ampdu_mlme { * @chain_signal_avg: signal average (per chain) * @known_smps_mode: the smps_mode the client thinks we are in. Relevant for * AP only. + * @cipher_scheme: optional cipher scheme for this station */ struct sta_info { /* General information, mostly static */ - struct list_head list; + struct list_head list, free_list; struct rcu_head rcu_head; struct sta_info __rcu *hnext; struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS]; - struct ieee80211_key __rcu *ptk; + struct ieee80211_key __rcu *ptk[NUM_DEFAULT_KEYS]; + u8 gtk_idx; + u8 ptk_idx; struct rate_control_ref *rate_ctrl; void *rate_ctrl_priv; spinlock_t lock; @@ -380,14 +406,16 @@ struct sta_info { struct sta_ampdu_mlme ampdu_mlme; u8 timer_to_tid[IEEE80211_NUM_TIDS]; + struct ieee80211_tx_latency_stat *tx_lat; + #ifdef CONFIG_MAC80211_MESH /* * Mesh peer link attributes * TODO: move to a sub-structure that is referenced with pointer? */ - __le16 llid; - __le16 plid; - __le16 reason; + u16 llid; + u16 plid; + u16 reason; u8 plink_retries; bool ignore_plink_timer; enum nl80211_plink_state plink_state; @@ -414,6 +442,7 @@ struct sta_info { unsigned int beacon_loss_count; enum ieee80211_smps_mode known_smps_mode; + const struct ieee80211_cipher_scheme *cipher_scheme; /* keep last! */ struct ieee80211_sta sta; @@ -577,21 +606,6 @@ void sta_info_recalc_tim(struct sta_info *sta); void sta_info_init(struct ieee80211_local *local); void sta_info_stop(struct ieee80211_local *local); -int sta_info_flush_defer(struct ieee80211_sub_if_data *sdata); - -/** - * sta_info_flush_cleanup - flush the sta_info cleanup queue - * @sdata: the interface - * - * Flushes the sta_info cleanup queue for a given interface; - * this is necessary before the interface is removed or, for - * AP/mesh interfaces, before it is deconfigured. - * - * Note an rcu_barrier() must precede the function, after all - * stations have been flushed/removed to ensure the call_rcu() - * calls that add stations to the cleanup queue have completed. - */ -void sta_info_flush_cleanup(struct ieee80211_sub_if_data *sdata); /** * sta_info_flush - flush matching STA entries from the STA table @@ -599,15 +613,13 @@ void sta_info_flush_cleanup(struct ieee80211_sub_if_data *sdata); * Returns the number of removed STA entries. * * @sdata: sdata to remove all stations from + * @vlans: if the given interface is an AP interface, also flush VLANs */ +int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans); + static inline int sta_info_flush(struct ieee80211_sub_if_data *sdata) { - int ret = sta_info_flush_defer(sdata); - - rcu_barrier(); - sta_info_flush_cleanup(sdata); - - return ret; + return __sta_info_flush(sdata, false); } void sta_set_rate_info_tx(struct sta_info *sta, @@ -623,6 +635,4 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta); void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta); void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta); -void ieee80211_cleanup_sdata_stas(struct ieee80211_sub_if_data *sdata); - #endif /* STA_INFO_H */ diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 52a152b01b0..1ee85c40243 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -11,6 +11,7 @@ #include <linux/export.h> #include <linux/etherdevice.h> +#include <linux/time.h> #include <net/mac80211.h> #include <asm/unaligned.h> #include "ieee80211_i.h" @@ -463,6 +464,77 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, } /* + * Measure Tx frame completion and removal time for Tx latency statistics + * calculation. A single Tx frame latency should be measured from when it + * is entering the Kernel until we receive Tx complete confirmation indication + * and remove the skb. + */ +static void ieee80211_tx_latency_end_msrmnt(struct ieee80211_local *local, + struct sk_buff *skb, + struct sta_info *sta, + struct ieee80211_hdr *hdr) +{ + ktime_t skb_dprt; + struct timespec dprt_time; + u32 msrmnt; + u16 tid; + u8 *qc; + int i, bin_range_count, bin_count; + u32 *bin_ranges; + __le16 fc; + struct ieee80211_tx_latency_stat *tx_lat; + struct ieee80211_tx_latency_bin_ranges *tx_latency; + ktime_t skb_arv = skb->tstamp; + + tx_latency = rcu_dereference(local->tx_latency); + + /* assert Tx latency stats are enabled & frame arrived when enabled */ + if (!tx_latency || !ktime_to_ns(skb_arv)) + return; + + fc = hdr->frame_control; + + if (!ieee80211_is_data(fc)) /* make sure it is a data frame */ + return; + + /* get frame tid */ + if (ieee80211_is_data_qos(hdr->frame_control)) { + qc = ieee80211_get_qos_ctl(hdr); + tid = qc[0] & IEEE80211_QOS_CTL_TID_MASK; + } else { + tid = 0; + } + + tx_lat = &sta->tx_lat[tid]; + + ktime_get_ts(&dprt_time); /* time stamp completion time */ + skb_dprt = ktime_set(dprt_time.tv_sec, dprt_time.tv_nsec); + msrmnt = ktime_to_ms(ktime_sub(skb_dprt, skb_arv)); + + if (tx_lat->max < msrmnt) /* update stats */ + tx_lat->max = msrmnt; + tx_lat->counter++; + tx_lat->sum += msrmnt; + + if (!tx_lat->bins) /* bins not activated */ + return; + + /* count how many Tx frames transmitted with the appropriate latency */ + bin_range_count = tx_latency->n_ranges; + bin_ranges = tx_latency->ranges; + bin_count = tx_lat->bin_count; + + for (i = 0; i < bin_range_count; i++) { + if (msrmnt <= bin_ranges[i]) { + tx_lat->bins[i]++; + break; + } + } + if (i == bin_range_count) /* msrmnt is bigger than the biggest range */ + tx_lat->bins[i]++; +} + +/* * Use a static threshold for now, best value to be determined * by testing ... * Should it depend on: @@ -620,6 +692,12 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (acked) sta->last_ack_signal = info->status.ack_signal; + + /* + * Measure frame removal for tx latency + * statistics calculation + */ + ieee80211_tx_latency_end_msrmnt(local, skb, sta, hdr); } rcu_read_unlock(); diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index 124b1fdc20d..0ae207771a5 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -186,7 +186,7 @@ void ieee80211_get_tkip_p1k_iv(struct ieee80211_key_conf *keyconf, EXPORT_SYMBOL(ieee80211_get_tkip_p1k_iv); void ieee80211_get_tkip_rx_p1k(struct ieee80211_key_conf *keyconf, - const u8 *ta, u32 iv32, u16 *p1k) + const u8 *ta, u32 iv32, u16 *p1k) { const u8 *tk = &keyconf->key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY]; struct tkip_ctx ctx; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index d4cee98533f..a0b0aea7652 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -41,14 +41,31 @@ #define CHANDEF_PR_ARG __entry->control_freq, __entry->chan_width, \ __entry->center_freq1, __entry->center_freq2 +#define MIN_CHANDEF_ENTRY \ + __field(u32, min_control_freq) \ + __field(u32, min_chan_width) \ + __field(u32, min_center_freq1) \ + __field(u32, min_center_freq2) + +#define MIN_CHANDEF_ASSIGN(c) \ + __entry->min_control_freq = (c)->chan ? (c)->chan->center_freq : 0; \ + __entry->min_chan_width = (c)->width; \ + __entry->min_center_freq1 = (c)->center_freq1; \ + __entry->min_center_freq2 = (c)->center_freq2; +#define MIN_CHANDEF_PR_FMT " min_control:%d MHz min_width:%d min_center: %d/%d MHz" +#define MIN_CHANDEF_PR_ARG __entry->min_control_freq, __entry->min_chan_width, \ + __entry->min_center_freq1, __entry->min_center_freq2 + #define CHANCTX_ENTRY CHANDEF_ENTRY \ + MIN_CHANDEF_ENTRY \ __field(u8, rx_chains_static) \ __field(u8, rx_chains_dynamic) #define CHANCTX_ASSIGN CHANDEF_ASSIGN(&ctx->conf.def) \ + MIN_CHANDEF_ASSIGN(&ctx->conf.min_def) \ __entry->rx_chains_static = ctx->conf.rx_chains_static; \ __entry->rx_chains_dynamic = ctx->conf.rx_chains_dynamic -#define CHANCTX_PR_FMT CHANDEF_PR_FMT " chains:%d/%d" -#define CHANCTX_PR_ARG CHANDEF_PR_ARG, \ +#define CHANCTX_PR_FMT CHANDEF_PR_FMT MIN_CHANDEF_PR_FMT " chains:%d/%d" +#define CHANCTX_PR_ARG CHANDEF_PR_ARG, MIN_CHANDEF_PR_ARG, \ __entry->rx_chains_static, __entry->rx_chains_dynamic @@ -426,30 +443,6 @@ TRACE_EVENT(drv_prepare_multicast, ) ); -TRACE_EVENT(drv_set_multicast_list, - TP_PROTO(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, int mc_count), - - TP_ARGS(local, sdata, mc_count), - - TP_STRUCT__entry( - LOCAL_ENTRY - __field(bool, allmulti) - __field(int, mc_count) - ), - - TP_fast_assign( - LOCAL_ASSIGN; - __entry->allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI; - __entry->mc_count = mc_count; - ), - - TP_printk( - LOCAL_PR_FMT " configure mc filter, count=%d, allmulti=%d", - LOCAL_PR_ARG, __entry->mc_count, __entry->allmulti - ) -); - TRACE_EVENT(drv_configure_filter, TP_PROTO(struct ieee80211_local *local, unsigned int changed_flags, @@ -560,7 +553,7 @@ TRACE_EVENT(drv_update_tkip_key, TP_printk( LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " iv32:%#x", - LOCAL_PR_ARG,VIF_PR_ARG,STA_PR_ARG, __entry->iv32 + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->iv32 ) ); @@ -773,7 +766,7 @@ TRACE_EVENT(drv_sta_rc_update, ) ); -TRACE_EVENT(drv_sta_add, +DECLARE_EVENT_CLASS(sta_event, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta), @@ -798,29 +791,25 @@ TRACE_EVENT(drv_sta_add, ) ); -TRACE_EVENT(drv_sta_remove, +DEFINE_EVENT(sta_event, drv_sta_add, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta), + TP_ARGS(local, sdata, sta) +); - TP_ARGS(local, sdata, sta), - - TP_STRUCT__entry( - LOCAL_ENTRY - VIF_ENTRY - STA_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - VIF_ASSIGN; - STA_ASSIGN; - ), +DEFINE_EVENT(sta_event, drv_sta_remove, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta), + TP_ARGS(local, sdata, sta) +); - TP_printk( - LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT, - LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG - ) +DEFINE_EVENT(sta_event, drv_sta_pre_rcu_remove, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta), + TP_ARGS(local, sdata, sta) ); TRACE_EVENT(drv_conf_tx, @@ -1846,6 +1835,33 @@ TRACE_EVENT(api_eosp, ) ); +TRACE_EVENT(api_sta_set_buffered, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sta *sta, + u8 tid, bool buffered), + + TP_ARGS(local, sta, tid, buffered), + + TP_STRUCT__entry( + LOCAL_ENTRY + STA_ENTRY + __field(u8, tid) + __field(bool, buffered) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + STA_ASSIGN; + __entry->tid = tid; + __entry->buffered = buffered; + ), + + TP_printk( + LOCAL_PR_FMT STA_PR_FMT " tid:%d buffered:%d", + LOCAL_PR_ARG, STA_PR_ARG, __entry->tid, __entry->buffered + ) +); + /* * Tracing for internal functions * (which may also be called in response to driver calls) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index ca7fa7f0613..27c990bf232 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -19,6 +19,7 @@ #include <linux/bitmap.h> #include <linux/rcupdate.h> #include <linux/export.h> +#include <linux/time.h> #include <net/net_namespace.h> #include <net/ieee80211_radiotap.h> #include <net/cfg80211.h> @@ -489,6 +490,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) info->control.jiffies = jiffies; info->control.vif = &tx->sdata->vif; info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; + info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS; skb_queue_tail(&sta->ps_tx_buf[ac], tx->skb); if (!timer_pending(&local->sta_cleanup)) @@ -560,7 +562,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) if (unlikely(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT)) tx->key = NULL; - else if (tx->sta && (key = rcu_dereference(tx->sta->ptk))) + else if (tx->sta && + (key = rcu_dereference(tx->sta->ptk[tx->sta->ptk_idx]))) tx->key = key; else if (ieee80211_is_mgmt(hdr->frame_control) && is_multicast_ether_addr(hdr->addr1) && @@ -843,15 +846,16 @@ static int ieee80211_fragment(struct ieee80211_tx_data *tx, rem -= fraglen; tmp = dev_alloc_skb(local->tx_headroom + frag_threshold + - IEEE80211_ENCRYPT_HEADROOM + + tx->sdata->encrypt_headroom + IEEE80211_ENCRYPT_TAILROOM); if (!tmp) return -ENOMEM; __skb_queue_tail(&tx->skbs, tmp); - skb_reserve(tmp, local->tx_headroom + - IEEE80211_ENCRYPT_HEADROOM); + skb_reserve(tmp, + local->tx_headroom + tx->sdata->encrypt_headroom); + /* copy control information */ memcpy(tmp->cb, skb->cb, sizeof(tmp->cb)); @@ -1073,6 +1077,7 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, queued = true; info->control.vif = &tx->sdata->vif; info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; + info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS; __skb_queue_tail(&tid_tx->pending, skb); if (skb_queue_len(&tid_tx->pending) > STA_MAX_TX_BUFFER) purge_skb = __skb_dequeue(&tid_tx->pending); @@ -1488,7 +1493,7 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, headroom = local->tx_headroom; if (may_encrypt) - headroom += IEEE80211_ENCRYPT_HEADROOM; + headroom += sdata->encrypt_headroom; headroom -= skb_headroom(skb); headroom = max_t(int, 0, headroom); @@ -1727,8 +1732,7 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, * radar detection by itself. We can do that later by adding a * monitor flag interfaces used for AP support. */ - if ((chan->flags & (IEEE80211_CHAN_NO_IBSS | IEEE80211_CHAN_RADAR | - IEEE80211_CHAN_PASSIVE_SCAN))) + if ((chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR))) goto fail_rcu; ieee80211_xmit(sdata, skb, chan->band); @@ -1743,6 +1747,26 @@ fail: return NETDEV_TX_OK; /* meaning, we dealt with the skb */ } +/* + * Measure Tx frame arrival time for Tx latency statistics calculation + * A single Tx frame latency should be measured from when it is entering the + * Kernel until we receive Tx complete confirmation indication and the skb is + * freed. + */ +static void ieee80211_tx_latency_start_msrmnt(struct ieee80211_local *local, + struct sk_buff *skb) +{ + struct timespec skb_arv; + struct ieee80211_tx_latency_bin_ranges *tx_latency; + + tx_latency = rcu_dereference(local->tx_latency); + if (!tx_latency) + return; + + ktime_get_ts(&skb_arv); + skb->tstamp = ktime_set(skb_arv.tv_sec, skb_arv.tv_nsec); +} + /** * ieee80211_subif_start_xmit - netif start_xmit function for Ethernet-type * subinterfaces (wlan#, WDS, and VLAN interfaces) @@ -1793,6 +1817,9 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, rcu_read_lock(); + /* Measure frame arrival for Tx latency statistics calculation */ + ieee80211_tx_latency_start_msrmnt(local, skb); + switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: sta = rcu_dereference(sdata->u.vlan.sta); @@ -2112,7 +2139,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, */ if (head_need > 0 || skb_cloned(skb)) { - head_need += IEEE80211_ENCRYPT_HEADROOM; + head_need += sdata->encrypt_headroom; head_need += local->tx_headroom; head_need = max_t(int, 0, head_need); if (ieee80211_skb_resize(sdata, skb, head_need, true)) { @@ -2139,7 +2166,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, if (ieee80211_is_data_qos(fc)) { __le16 *qos_control; - qos_control = (__le16*) skb_push(skb, 2); + qos_control = (__le16 *) skb_push(skb, 2); memcpy(skb_push(skb, hdrlen - 2), &hdr, hdrlen - 2); /* * Maybe we could actually set some fields here, for now just @@ -2301,7 +2328,7 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, if (atomic_read(&ps->num_sta_ps) > 0) /* in the hope that this is faster than * checking byte-for-byte */ - have_bits = !bitmap_empty((unsigned long*)ps->tim, + have_bits = !bitmap_empty((unsigned long *)ps->tim, IEEE80211_MAX_AID+1); if (ps->dtim_count == 0) @@ -2527,7 +2554,8 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, */ skb = dev_alloc_skb(local->tx_headroom + beacon->head_len + - beacon->tail_len + 256); + beacon->tail_len + 256 + + local->hw.extra_beacon_tailroom); if (!skb) goto out; @@ -2559,7 +2587,8 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, ieee80211_update_csa(sdata, presp); - skb = dev_alloc_skb(local->tx_headroom + presp->head_len); + skb = dev_alloc_skb(local->tx_headroom + presp->head_len + + local->hw.extra_beacon_tailroom); if (!skb) goto out; skb_reserve(skb, local->tx_headroom); @@ -2580,13 +2609,13 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, ieee80211_update_csa(sdata, bcn); if (ifmsh->sync_ops) - ifmsh->sync_ops->adjust_tbtt( - sdata); + ifmsh->sync_ops->adjust_tbtt(sdata, bcn); skb = dev_alloc_skb(local->tx_headroom + bcn->head_len + 256 + /* TIM IE */ - bcn->tail_len); + bcn->tail_len + + local->hw.extra_beacon_tailroom); if (!skb) goto out; skb_reserve(skb, local->tx_headroom); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 9f9b9bd3fd4..676dc0967f3 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -76,7 +76,7 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, } if (ieee80211_is_ctl(fc)) { - if(ieee80211_is_pspoll(fc)) + if (ieee80211_is_pspoll(fc)) return hdr->addr1; if (ieee80211_is_back_req(fc)) { @@ -642,6 +642,17 @@ void ieee80211_iterate_active_interfaces_rtnl( } EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_rtnl); +struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + + if (!ieee80211_sdata_running(sdata) || + !(sdata->flags & IEEE80211_SDATA_IN_DRIVER)) + return NULL; + return &sdata->vif; +} +EXPORT_SYMBOL_GPL(wdev_to_ieee80211_vif); + /* * Nothing should have been stuffed into the workqueue during * the suspend->resume cycle. If this WARN is seen then there @@ -1451,6 +1462,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) struct sta_info *sta; int res, i; bool reconfig_due_to_wowlan = false; + struct ieee80211_sub_if_data *sched_scan_sdata; + bool sched_scan_stopped = false; #ifdef CONFIG_PM if (local->suspended) @@ -1754,6 +1767,27 @@ int ieee80211_reconfig(struct ieee80211_local *local) #else WARN_ON(1); #endif + + /* + * Reconfigure sched scan if it was interrupted by FW restart or + * suspend. + */ + mutex_lock(&local->mtx); + sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata, + lockdep_is_held(&local->mtx)); + if (sched_scan_sdata && local->sched_scan_req) + /* + * Sched scan stopped, but we don't want to report it. Instead, + * we're trying to reschedule. + */ + if (__ieee80211_request_sched_scan_start(sched_scan_sdata, + local->sched_scan_req)) + sched_scan_stopped = true; + mutex_unlock(&local->mtx); + + if (sched_scan_stopped) + cfg80211_sched_scan_stopped(local->hw.wiphy); + return 0; } @@ -1804,6 +1838,26 @@ void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->chanctx_mtx); } +void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_chanctx_conf *chanctx_conf; + struct ieee80211_chanctx *chanctx; + + mutex_lock(&local->chanctx_mtx); + + chanctx_conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); + + if (WARN_ON_ONCE(!chanctx_conf)) + goto unlock; + + chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf); + ieee80211_recalc_chanctx_min_def(local, chanctx); + unlock: + mutex_unlock(&local->chanctx_mtx); +} + static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id) { int i; @@ -2259,19 +2313,28 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, void ieee80211_dfs_cac_cancel(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; + struct cfg80211_chan_def chandef; + mutex_lock(&local->mtx); mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { - cancel_delayed_work_sync(&sdata->dfs_cac_timer_work); + /* it might be waiting for the local->mtx, but then + * by the time it gets it, sdata->wdev.cac_started + * will no longer be true + */ + cancel_delayed_work(&sdata->dfs_cac_timer_work); if (sdata->wdev.cac_started) { + chandef = sdata->vif.bss_conf.chandef; ieee80211_vif_release_channel(sdata); cfg80211_cac_event(sdata->dev, + &chandef, NL80211_RADAR_CAC_ABORTED, GFP_KERNEL); } } mutex_unlock(&local->iflist_mtx); + mutex_unlock(&local->mtx); } void ieee80211_dfs_radar_detected_work(struct work_struct *work) @@ -2445,7 +2508,6 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, if (ieee80211_vif_is_mesh(&sdata->vif)) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - __le16 pre_value; skb_put(skb, 8); *pos++ = WLAN_EID_CHAN_SWITCH_PARAM; /* EID */ @@ -2457,11 +2519,259 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, WLAN_EID_CHAN_SWITCH_PARAM_TX_RESTRICT : 0x00; put_unaligned_le16(WLAN_REASON_MESH_CHAN, pos); /* Reason Cd */ pos += 2; - pre_value = cpu_to_le16(ifmsh->pre_value); - memcpy(pos, &pre_value, 2); /* Precedence Value */ + put_unaligned_le16(ifmsh->pre_value, pos);/* Precedence Value */ pos += 2; } ieee80211_tx_skb(sdata, skb); return 0; } + +bool ieee80211_cs_valid(const struct ieee80211_cipher_scheme *cs) +{ + return !(cs == NULL || cs->cipher == 0 || + cs->hdr_len < cs->pn_len + cs->pn_off || + cs->hdr_len <= cs->key_idx_off || + cs->key_idx_shift > 7 || + cs->key_idx_mask == 0); +} + +bool ieee80211_cs_list_valid(const struct ieee80211_cipher_scheme *cs, int n) +{ + int i; + + /* Ensure we have enough iftype bitmap space for all iftype values */ + WARN_ON((NUM_NL80211_IFTYPES / 8 + 1) > sizeof(cs[0].iftype)); + + for (i = 0; i < n; i++) + if (!ieee80211_cs_valid(&cs[i])) + return false; + + return true; +} + +const struct ieee80211_cipher_scheme * +ieee80211_cs_get(struct ieee80211_local *local, u32 cipher, + enum nl80211_iftype iftype) +{ + const struct ieee80211_cipher_scheme *l = local->hw.cipher_schemes; + int n = local->hw.n_cipher_schemes; + int i; + const struct ieee80211_cipher_scheme *cs = NULL; + + for (i = 0; i < n; i++) { + if (l[i].cipher == cipher) { + cs = &l[i]; + break; + } + } + + if (!cs || !(cs->iftype & BIT(iftype))) + return NULL; + + return cs; +} + +int ieee80211_cs_headroom(struct ieee80211_local *local, + struct cfg80211_crypto_settings *crypto, + enum nl80211_iftype iftype) +{ + const struct ieee80211_cipher_scheme *cs; + int headroom = IEEE80211_ENCRYPT_HEADROOM; + int i; + + for (i = 0; i < crypto->n_ciphers_pairwise; i++) { + cs = ieee80211_cs_get(local, crypto->ciphers_pairwise[i], + iftype); + + if (cs && headroom < cs->hdr_len) + headroom = cs->hdr_len; + } + + cs = ieee80211_cs_get(local, crypto->cipher_group, iftype); + if (cs && headroom < cs->hdr_len) + headroom = cs->hdr_len; + + return headroom; +} + +static bool +ieee80211_extend_noa_desc(struct ieee80211_noa_data *data, u32 tsf, int i) +{ + s32 end = data->desc[i].start + data->desc[i].duration - (tsf + 1); + int skip; + + if (end > 0) + return false; + + /* End time is in the past, check for repetitions */ + skip = DIV_ROUND_UP(-end, data->desc[i].interval); + if (data->count[i] < 255) { + if (data->count[i] <= skip) { + data->count[i] = 0; + return false; + } + + data->count[i] -= skip; + } + + data->desc[i].start += skip * data->desc[i].interval; + + return true; +} + +static bool +ieee80211_extend_absent_time(struct ieee80211_noa_data *data, u32 tsf, + s32 *offset) +{ + bool ret = false; + int i; + + for (i = 0; i < IEEE80211_P2P_NOA_DESC_MAX; i++) { + s32 cur; + + if (!data->count[i]) + continue; + + if (ieee80211_extend_noa_desc(data, tsf + *offset, i)) + ret = true; + + cur = data->desc[i].start - tsf; + if (cur > *offset) + continue; + + cur = data->desc[i].start + data->desc[i].duration - tsf; + if (cur > *offset) + *offset = cur; + } + + return ret; +} + +static u32 +ieee80211_get_noa_absent_time(struct ieee80211_noa_data *data, u32 tsf) +{ + s32 offset = 0; + int tries = 0; + /* + * arbitrary limit, used to avoid infinite loops when combined NoA + * descriptors cover the full time period. + */ + int max_tries = 5; + + ieee80211_extend_absent_time(data, tsf, &offset); + do { + if (!ieee80211_extend_absent_time(data, tsf, &offset)) + break; + + tries++; + } while (tries < max_tries); + + return offset; +} + +void ieee80211_update_p2p_noa(struct ieee80211_noa_data *data, u32 tsf) +{ + u32 next_offset = BIT(31) - 1; + int i; + + data->absent = 0; + data->has_next_tsf = false; + for (i = 0; i < IEEE80211_P2P_NOA_DESC_MAX; i++) { + s32 start; + + if (!data->count[i]) + continue; + + ieee80211_extend_noa_desc(data, tsf, i); + start = data->desc[i].start - tsf; + if (start <= 0) + data->absent |= BIT(i); + + if (next_offset > start) + next_offset = start; + + data->has_next_tsf = true; + } + + if (data->absent) + next_offset = ieee80211_get_noa_absent_time(data, tsf); + + data->next_tsf = tsf + next_offset; +} +EXPORT_SYMBOL(ieee80211_update_p2p_noa); + +int ieee80211_parse_p2p_noa(const struct ieee80211_p2p_noa_attr *attr, + struct ieee80211_noa_data *data, u32 tsf) +{ + int ret = 0; + int i; + + memset(data, 0, sizeof(*data)); + + for (i = 0; i < IEEE80211_P2P_NOA_DESC_MAX; i++) { + const struct ieee80211_p2p_noa_desc *desc = &attr->desc[i]; + + if (!desc->count || !desc->duration) + continue; + + data->count[i] = desc->count; + data->desc[i].start = le32_to_cpu(desc->start_time); + data->desc[i].duration = le32_to_cpu(desc->duration); + data->desc[i].interval = le32_to_cpu(desc->interval); + + if (data->count[i] > 1 && + data->desc[i].interval < data->desc[i].duration) + continue; + + ieee80211_extend_noa_desc(data, tsf, i); + ret++; + } + + if (ret) + ieee80211_update_p2p_noa(data, tsf); + + return ret; +} +EXPORT_SYMBOL(ieee80211_parse_p2p_noa); + +void ieee80211_recalc_dtim(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + u64 tsf = drv_get_tsf(local, sdata); + u64 dtim_count = 0; + u16 beacon_int = sdata->vif.bss_conf.beacon_int * 1024; + u8 dtim_period = sdata->vif.bss_conf.dtim_period; + struct ps_data *ps; + u8 bcns_from_dtim; + + if (tsf == -1ULL || !beacon_int || !dtim_period) + return; + + if (sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + if (!sdata->bss) + return; + + ps = &sdata->bss->ps; + } else if (ieee80211_vif_is_mesh(&sdata->vif)) { + ps = &sdata->u.mesh.ps; + } else { + return; + } + + /* + * actually finds last dtim_count, mac80211 will update in + * __beacon_add_tim(). + * dtim_count = dtim_period - (tsf / bcn_int) % dtim_period + */ + do_div(tsf, beacon_int); + bcns_from_dtim = do_div(tsf, dtim_period); + /* just had a DTIM */ + if (!bcns_from_dtim) + dtim_count = 0; + else + dtim_count = dtim_period - bcns_from_dtim; + + ps->dtim_count = dtim_count; +} diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index de0112785aa..d75f35c6e1a 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -182,16 +182,15 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, IEEE80211_VHT_CAP_SHORT_GI_160); /* remaining ones */ - if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) { + if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) vht_cap->cap |= cap_info & (IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE | - IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX); - } + IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MASK); if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE) vht_cap->cap |= cap_info & (IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE | - IEEE80211_VHT_CAP_BEAMFORMEE_STS_MAX); + IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK); if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE) vht_cap->cap |= cap_info & diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index afba19cb6f8..21211c60ca9 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -106,6 +106,7 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, struct sta_info *sta = NULL; const u8 *ra = NULL; bool qos = false; + struct mac80211_qos_map *qos_map; if (local->hw.queues < IEEE80211_NUM_ACS || skb->len < 6) { skb->priority = 0; /* required for correct WPA/11i MIC */ @@ -155,7 +156,11 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, /* use the data classifier to determine what 802.1d tag the * data frame has */ - skb->priority = cfg80211_classify8021d(skb); + rcu_read_lock(); + qos_map = rcu_dereference(sdata->qos_map); + skb->priority = cfg80211_classify8021d(skb, qos_map ? + &qos_map->qos_map : NULL); + rcu_read_unlock(); return ieee80211_downgrade_queue(sdata, skb); } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index d6572822076..21448d629b1 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -127,7 +127,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) * APs with pairwise keys should never receive Michael MIC * errors for non-zero keyidx because these are reserved for * group keys and only the AP is sending real multicast - * frames in the BSS. ( + * frames in the BSS. */ return RX_DROP_UNUSABLE; } @@ -545,6 +545,106 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) return RX_CONTINUE; } +static ieee80211_tx_result +ieee80211_crypto_cs_encrypt(struct ieee80211_tx_data *tx, + struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + struct ieee80211_key *key = tx->key; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + const struct ieee80211_cipher_scheme *cs = key->sta->cipher_scheme; + int hdrlen; + u8 *pos; + + if (info->control.hw_key && + !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) { + /* hwaccel has no need for preallocated head room */ + return TX_CONTINUE; + } + + if (unlikely(skb_headroom(skb) < cs->hdr_len && + pskb_expand_head(skb, cs->hdr_len, 0, GFP_ATOMIC))) + return TX_DROP; + + hdrlen = ieee80211_hdrlen(hdr->frame_control); + + pos = skb_push(skb, cs->hdr_len); + memmove(pos, pos + cs->hdr_len, hdrlen); + skb_set_network_header(skb, skb_network_offset(skb) + cs->hdr_len); + + return TX_CONTINUE; +} + +static inline int ieee80211_crypto_cs_pn_compare(u8 *pn1, u8 *pn2, int len) +{ + int i; + + /* pn is little endian */ + for (i = len - 1; i >= 0; i--) { + if (pn1[i] < pn2[i]) + return -1; + else if (pn1[i] > pn2[i]) + return 1; + } + + return 0; +} + +static ieee80211_rx_result +ieee80211_crypto_cs_decrypt(struct ieee80211_rx_data *rx) +{ + struct ieee80211_key *key = rx->key; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; + const struct ieee80211_cipher_scheme *cs = NULL; + int hdrlen = ieee80211_hdrlen(hdr->frame_control); + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); + int data_len; + u8 *rx_pn; + u8 *skb_pn; + u8 qos_tid; + + if (!rx->sta || !rx->sta->cipher_scheme || + !(status->flag & RX_FLAG_DECRYPTED)) + return RX_DROP_UNUSABLE; + + if (!ieee80211_is_data(hdr->frame_control)) + return RX_CONTINUE; + + cs = rx->sta->cipher_scheme; + + data_len = rx->skb->len - hdrlen - cs->hdr_len; + + if (data_len < 0) + return RX_DROP_UNUSABLE; + + if (ieee80211_is_data_qos(hdr->frame_control)) + qos_tid = *ieee80211_get_qos_ctl(hdr) & + IEEE80211_QOS_CTL_TID_MASK; + else + qos_tid = 0; + + if (skb_linearize(rx->skb)) + return RX_DROP_UNUSABLE; + + hdr = (struct ieee80211_hdr *)rx->skb->data; + + rx_pn = key->u.gen.rx_pn[qos_tid]; + skb_pn = rx->skb->data + hdrlen + cs->pn_off; + + if (ieee80211_crypto_cs_pn_compare(skb_pn, rx_pn, cs->pn_len) <= 0) + return RX_DROP_UNUSABLE; + + memcpy(rx_pn, skb_pn, cs->pn_len); + + /* remove security header and MIC */ + if (pskb_trim(rx->skb, rx->skb->len - cs->mic_len)) + return RX_DROP_UNUSABLE; + + memmove(rx->skb->data + cs->hdr_len, rx->skb->data, hdrlen); + skb_pull(rx->skb, cs->hdr_len); + + return RX_CONTINUE; +} static void bip_aad(struct sk_buff *skb, u8 *aad) { @@ -685,6 +785,7 @@ ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx) { struct sk_buff *skb; struct ieee80211_tx_info *info = NULL; + ieee80211_tx_result res; skb_queue_walk(&tx->skbs, skb) { info = IEEE80211_SKB_CB(skb); @@ -692,9 +793,24 @@ ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx) /* handle hw-only algorithm */ if (!info->control.hw_key) return TX_DROP; + + if (tx->key->sta->cipher_scheme) { + res = ieee80211_crypto_cs_encrypt(tx, skb); + if (res != TX_CONTINUE) + return res; + } } ieee80211_tx_set_protected(tx); return TX_CONTINUE; } + +ieee80211_rx_result +ieee80211_crypto_hw_decrypt(struct ieee80211_rx_data *rx) +{ + if (rx->sta->cipher_scheme) + return ieee80211_crypto_cs_decrypt(rx); + + return RX_DROP_UNUSABLE; +} diff --git a/net/mac80211/wpa.h b/net/mac80211/wpa.h index 07e33f899c7..62e5a12dfe0 100644 --- a/net/mac80211/wpa.h +++ b/net/mac80211/wpa.h @@ -34,5 +34,7 @@ ieee80211_rx_result ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx); ieee80211_tx_result ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx); +ieee80211_rx_result +ieee80211_crypto_hw_decrypt(struct ieee80211_rx_data *rx); #endif /* WPA_H */ diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c index e24bcf97729..372d8a222b9 100644 --- a/net/mac802154/wpan.c +++ b/net/mac802154/wpan.c @@ -444,8 +444,8 @@ mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb) case IEEE802154_FC_TYPE_DATA: return mac802154_process_data(sdata->dev, skb); default: - pr_warning("ieee802154: bad frame received (type = %d)\n", - mac_cb_type(skb)); + pr_warn("ieee802154: bad frame received (type = %d)\n", + mac_cb_type(skb)); kfree_skb(skb); return NET_RX_DROP; } diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c3398cd99b9..c37467562fd 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -414,47 +414,112 @@ config NETFILTER_SYNPROXY endif # NF_CONNTRACK config NF_TABLES - depends on NETFILTER_NETLINK + select NETFILTER_NETLINK tristate "Netfilter nf_tables support" + help + nftables is the new packet classification framework that intends to + replace the existing {ip,ip6,arp,eb}_tables infrastructure. It + provides a pseudo-state machine with an extensible instruction-set + (also known as expressions) that the userspace 'nft' utility + (http://www.netfilter.org/projects/nftables) uses to build the + rule-set. It also comes with the generic set infrastructure that + allows you to construct mappings between matchings and actions + for performance lookups. + + To compile it as a module, choose M here. + +config NF_TABLES_INET + depends on NF_TABLES && IPV6 + select NF_TABLES_IPV4 + select NF_TABLES_IPV6 + tristate "Netfilter nf_tables mixed IPv4/IPv6 tables support" + help + This option enables support for a mixed IPv4/IPv6 "inet" table. config NFT_EXTHDR depends on NF_TABLES tristate "Netfilter nf_tables IPv6 exthdr module" + help + This option adds the "exthdr" expression that you can use to match + IPv6 extension headers. config NFT_META depends on NF_TABLES tristate "Netfilter nf_tables meta module" + help + This option adds the "meta" expression that you can use to match and + to set packet metainformation such as the packet mark. config NFT_CT depends on NF_TABLES depends on NF_CONNTRACK tristate "Netfilter nf_tables conntrack module" + help + This option adds the "meta" expression that you can use to match + connection tracking information such as the flow state. config NFT_RBTREE depends on NF_TABLES tristate "Netfilter nf_tables rbtree set module" + help + This option adds the "rbtree" set type (Red Black tree) that is used + to build interval-based sets. config NFT_HASH depends on NF_TABLES tristate "Netfilter nf_tables hash set module" + help + This option adds the "hash" set type that is used to build one-way + mappings between matchings and actions. config NFT_COUNTER depends on NF_TABLES tristate "Netfilter nf_tables counter module" + help + This option adds the "counter" expression that you can use to + include packet and byte counters in a rule. config NFT_LOG depends on NF_TABLES tristate "Netfilter nf_tables log module" + help + This option adds the "log" expression that you can use to log + packets matching some criteria. config NFT_LIMIT depends on NF_TABLES tristate "Netfilter nf_tables limit module" + help + This option adds the "limit" expression that you can use to + ratelimit rule matchings. config NFT_NAT depends on NF_TABLES depends on NF_CONNTRACK depends on NF_NAT tristate "Netfilter nf_tables nat module" + help + This option adds the "nat" expression that you can use to perform + typical Network Address Translation (NAT) packet transformations. + +config NFT_QUEUE + depends on NF_TABLES + depends on NETFILTER_XTABLES + depends on NETFILTER_NETLINK_QUEUE + tristate "Netfilter nf_tables queue module" + help + This is required if you intend to use the userspace queueing + infrastructure (also known as NFQUEUE) from nftables. + +config NFT_REJECT + depends on NF_TABLES + depends on NF_TABLES_IPV6 || !NF_TABLES_IPV6 + default m if NETFILTER_ADVANCED=n + tristate "Netfilter nf_tables reject support" + help + This option adds the "reject" expression that you can use to + explicitly deny and notify via TCP reset/ICMP informational errors + unallowed traffic. config NFT_COMPAT depends on NF_TABLES @@ -858,6 +923,16 @@ config NETFILTER_XT_MATCH_BPF To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_CGROUP + tristate '"control group" match support' + depends on NETFILTER_ADVANCED + depends on CGROUPS + select CGROUP_NET_CLASSID + ---help--- + Socket/process control group matching allows you to match locally + generated packets based on which net_cls control group processes + belong to. + config NETFILTER_XT_MATCH_CLUSTER tristate '"cluster" match support' depends on NF_CONNTRACK @@ -1035,6 +1110,15 @@ config NETFILTER_XT_MATCH_HL in the IPv6 header, or the time-to-live field in the IPv4 header of the packet. +config NETFILTER_XT_MATCH_IPCOMP + tristate '"ipcomp" match support' + depends on NETFILTER_ADVANCED + help + This match extension allows you to match a range of CPIs(16 bits) + inside IPComp header of IPSec packets. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_IPRANGE tristate '"iprange" address range match support' depends on NETFILTER_ADVANCED @@ -1055,6 +1139,16 @@ config NETFILTER_XT_MATCH_IPVS If unsure, say N. +config NETFILTER_XT_MATCH_L2TP + tristate '"l2tp" match support' + depends on NETFILTER_ADVANCED + default L2TP + ---help--- + This option adds an "L2TP" match, which allows you to match against + L2TP protocol header fields. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_LENGTH tristate '"length" match support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 394483b2c19..ee9c4de5f8e 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -70,13 +70,15 @@ nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o obj-$(CONFIG_NF_TABLES) += nf_tables.o +obj-$(CONFIG_NF_TABLES_INET) += nf_tables_inet.o obj-$(CONFIG_NFT_COMPAT) += nft_compat.o obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o obj-$(CONFIG_NFT_META) += nft_meta.o obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o obj-$(CONFIG_NFT_NAT) += nft_nat.o -#nf_tables-objs += nft_meta_target.o +obj-$(CONFIG_NFT_QUEUE) += nft_queue.o +obj-$(CONFIG_NFT_REJECT) += nft_reject.o obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o obj-$(CONFIG_NFT_HASH) += nft_hash.o obj-$(CONFIG_NFT_COUNTER) += nft_counter.o @@ -133,8 +135,10 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o +obj-$(CONFIG_NETFILTER_XT_MATCH_IPCOMP) += xt_ipcomp.o obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o obj-$(CONFIG_NETFILTER_XT_MATCH_IPVS) += xt_ipvs.o +obj-$(CONFIG_NETFILTER_XT_MATCH_L2TP) += xt_l2tp.o obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o @@ -142,6 +146,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o obj-$(CONFIG_NETFILTER_XT_MATCH_NFACCT) += xt_nfacct.o obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o +obj-$(CONFIG_NETFILTER_XT_MATCH_CGROUP) += xt_cgroup.o obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index bac7e01df67..de770ec39e5 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -625,34 +625,6 @@ EXPORT_SYMBOL_GPL(ip_set_name_byindex); */ /* - * Find set by name, reference it once. The reference makes sure the - * thing pointed to, does not go away under our feet. - * - * The nfnl mutex is used in the function. - */ -ip_set_id_t -ip_set_nfnl_get(struct net *net, const char *name) -{ - ip_set_id_t i, index = IPSET_INVALID_ID; - struct ip_set *s; - struct ip_set_net *inst = ip_set_pernet(net); - - nfnl_lock(NFNL_SUBSYS_IPSET); - for (i = 0; i < inst->ip_set_max; i++) { - s = nfnl_set(inst, i); - if (s != NULL && STREQ(s->name, name)) { - __ip_set_get(s); - index = i; - break; - } - } - nfnl_unlock(NFNL_SUBSYS_IPSET); - - return index; -} -EXPORT_SYMBOL_GPL(ip_set_nfnl_get); - -/* * Find set by index, reference it once. The reference makes sure the * thing pointed to, does not go away under our feet. * diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 4c8e5c0aa1a..59a1a85bcb3 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1209,7 +1209,7 @@ void ip_vs_random_dropentry(struct net *net) * Randomly scan 1/32 of the whole table every second */ for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) { - unsigned int hash = net_random() & ip_vs_conn_tab_mask; + unsigned int hash = prandom_u32() & ip_vs_conn_tab_mask; hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->flags & IP_VS_CONN_F_TEMPLATE) diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index 5a355a46d1d..5882bbfd198 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -19,8 +19,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * * * Authors: diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index f63c2388f38..db801263ee9 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1637,7 +1637,10 @@ static int sync_thread_master(void *data) continue; } while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) { - int ret = __wait_event_interruptible(*sk_sleep(sk), + /* (Ab)use interruptible sleep to avoid increasing + * the load avg. + */ + __wait_event_interruptible(*sk_sleep(sk), sock_writeable(sk) || kthread_should_stop()); if (unlikely(kthread_should_stop())) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 43549eb7a7b..8824ed0ccc9 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -60,12 +60,6 @@ int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, const struct nlattr *attr) __read_mostly; EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook); -int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff); -EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook); - DEFINE_SPINLOCK(nf_conntrack_lock); EXPORT_SYMBOL_GPL(nf_conntrack_lock); @@ -361,15 +355,6 @@ begin: return NULL; } -struct nf_conntrack_tuple_hash * -__nf_conntrack_find(struct net *net, u16 zone, - const struct nf_conntrack_tuple *tuple) -{ - return ____nf_conntrack_find(net, zone, tuple, - hash_conntrack_raw(tuple, zone)); -} -EXPORT_SYMBOL_GPL(__nf_conntrack_find); - /* Find a connection corresponding to a tuple. */ static struct nf_conntrack_tuple_hash * __nf_conntrack_find_get(struct net *net, u16 zone, diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 08870b85904..bb322d0beb4 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2118,8 +2118,16 @@ ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct) return err; } #if defined(CONFIG_NF_CONNTRACK_MARK) - if (cda[CTA_MARK]) - ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); + if (cda[CTA_MARK]) { + u32 mask = 0, mark, newmark; + if (cda[CTA_MARK_MASK]) + mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK])); + + mark = ntohl(nla_get_be32(cda[CTA_MARK])); + newmark = (ct->mark & mask) ^ mark; + if (newmark != ct->mark) + ct->mark = newmark; + } #endif return 0; } diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index ce3004156ee..b65d5864b6d 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -92,12 +92,6 @@ nf_ct_l3proto_find_get(u_int16_t l3proto) } EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get); -void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p) -{ - module_put(p->me); -} -EXPORT_SYMBOL_GPL(nf_ct_l3proto_put); - int nf_ct_l3proto_try_module_get(unsigned short l3proto) { diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index a99b6c3427b..cb372f96f10 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -428,7 +428,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, const char *msg; u_int8_t state; - dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh); + dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); BUG_ON(dh == NULL); state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE]; @@ -457,7 +457,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, out_invalid: if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL, - NULL, msg); + NULL, "%s", msg); return false; } @@ -486,7 +486,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, u_int8_t type, old_state, new_state; enum ct_dccp_roles role; - dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh); + dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); BUG_ON(dh == NULL); type = dh->dccph_type; @@ -577,7 +577,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl, unsigned int cscov; const char *msg; - dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh); + dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); if (dh == NULL) { msg = "nf_ct_dccp: short packet "; goto out_invalid; @@ -614,7 +614,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl, out_invalid: if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, msg); + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", msg); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 63a81540221..d3f5cd6dd96 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -315,7 +315,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, * manips not an issue. */ if (maniptype == NF_NAT_MANIP_SRC && - !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { + !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { /* try the original tuple first */ if (in_range(l3proto, l4proto, orig_tuple, range)) { if (!nf_nat_used_tuple(orig_tuple, ct)) { @@ -339,7 +339,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, */ /* Only bother mapping if it's not already in range and unique */ - if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { + if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { if (l4proto->in_range(tuple, maniptype, &range->min_proto, diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c index 9baaf734c14..83a72a235ca 100644 --- a/net/netfilter/nf_nat_proto_common.c +++ b/net/netfilter/nf_nat_proto_common.c @@ -74,22 +74,24 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, range_size = ntohs(range->max_proto.all) - min + 1; } - if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) + if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) { off = l3proto->secure_port(tuple, maniptype == NF_NAT_MANIP_SRC ? tuple->dst.u.all : tuple->src.u.all); - else + } else if (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) { + off = prandom_u32(); + } else { off = *rover; + } for (i = 0; ; ++off) { *portptr = htons(min + off % range_size); if (++i != range_size && nf_nat_used_tuple(tuple, ct)) continue; - if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) + if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) *rover = off; return; } - return; } EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 71a9f49a768..117bbaaddde 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -124,37 +124,43 @@ static inline u64 nf_tables_alloc_handle(struct nft_table *table) return ++table->hgenerator; } -static struct nf_chain_type *chain_type[AF_MAX][NFT_CHAIN_T_MAX]; +static const struct nf_chain_type *chain_type[AF_MAX][NFT_CHAIN_T_MAX]; -static int __nf_tables_chain_type_lookup(int family, const struct nlattr *nla) +static const struct nf_chain_type * +__nf_tables_chain_type_lookup(int family, const struct nlattr *nla) { int i; - for (i=0; i<NFT_CHAIN_T_MAX; i++) { + for (i = 0; i < NFT_CHAIN_T_MAX; i++) { if (chain_type[family][i] != NULL && !nla_strcmp(nla, chain_type[family][i]->name)) - return i; + return chain_type[family][i]; } - return -1; + return NULL; } -static int nf_tables_chain_type_lookup(const struct nft_af_info *afi, - const struct nlattr *nla, - bool autoload) +static const struct nf_chain_type * +nf_tables_chain_type_lookup(const struct nft_af_info *afi, + const struct nlattr *nla, + bool autoload) { - int type; + const struct nf_chain_type *type; type = __nf_tables_chain_type_lookup(afi->family, nla); + if (type != NULL) + return type; #ifdef CONFIG_MODULES - if (type < 0 && autoload) { + if (autoload) { nfnl_unlock(NFNL_SUBSYS_NFTABLES); request_module("nft-chain-%u-%*.s", afi->family, nla_len(nla)-1, (const char *)nla_data(nla)); nfnl_lock(NFNL_SUBSYS_NFTABLES); type = __nf_tables_chain_type_lookup(afi->family, nla); + if (type != NULL) + return ERR_PTR(-EAGAIN); } #endif - return type; + return ERR_PTR(-ENOENT); } static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { @@ -180,7 +186,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq, nfmsg->res_id = 0; if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || - nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags))) + nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) || + nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use))) goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -306,7 +313,8 @@ err: return err; } -static int nf_tables_table_enable(struct nft_table *table) +static int nf_tables_table_enable(const struct nft_af_info *afi, + struct nft_table *table) { struct nft_chain *chain; int err, i = 0; @@ -315,7 +323,7 @@ static int nf_tables_table_enable(struct nft_table *table) if (!(chain->flags & NFT_BASE_CHAIN)) continue; - err = nf_register_hook(&nft_base_chain(chain)->ops); + err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops); if (err < 0) goto err; @@ -330,18 +338,20 @@ err: if (i-- <= 0) break; - nf_unregister_hook(&nft_base_chain(chain)->ops); + nf_unregister_hooks(nft_base_chain(chain)->ops, afi->nops); } return err; } -static int nf_tables_table_disable(struct nft_table *table) +static int nf_tables_table_disable(const struct nft_af_info *afi, + struct nft_table *table) { struct nft_chain *chain; list_for_each_entry(chain, &table->chains, list) { if (chain->flags & NFT_BASE_CHAIN) - nf_unregister_hook(&nft_base_chain(chain)->ops); + nf_unregister_hooks(nft_base_chain(chain)->ops, + afi->nops); } return 0; @@ -356,7 +366,7 @@ static int nf_tables_updtable(struct sock *nlsk, struct sk_buff *skb, int family = nfmsg->nfgen_family, ret = 0; if (nla[NFTA_TABLE_FLAGS]) { - __be32 flags; + u32 flags; flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS])); if (flags & ~NFT_TABLE_F_DORMANT) @@ -364,12 +374,12 @@ static int nf_tables_updtable(struct sock *nlsk, struct sk_buff *skb, if ((flags & NFT_TABLE_F_DORMANT) && !(table->flags & NFT_TABLE_F_DORMANT)) { - ret = nf_tables_table_disable(table); + ret = nf_tables_table_disable(afi, table); if (ret >= 0) table->flags |= NFT_TABLE_F_DORMANT; } else if (!(flags & NFT_TABLE_F_DORMANT) && table->flags & NFT_TABLE_F_DORMANT) { - ret = nf_tables_table_enable(table); + ret = nf_tables_table_enable(afi, table); if (ret >= 0) table->flags &= ~NFT_TABLE_F_DORMANT; } @@ -392,6 +402,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, struct nft_table *table; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + u32 flags = 0; afi = nf_tables_afinfo_lookup(net, family, true); if (IS_ERR(afi)) @@ -413,25 +424,25 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, return nf_tables_updtable(nlsk, skb, nlh, nla, afi, table); } + if (nla[NFTA_TABLE_FLAGS]) { + flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS])); + if (flags & ~NFT_TABLE_F_DORMANT) + return -EINVAL; + } + + if (!try_module_get(afi->owner)) + return -EAFNOSUPPORT; + table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL); - if (table == NULL) + if (table == NULL) { + module_put(afi->owner); return -ENOMEM; + } nla_strlcpy(table->name, name, nla_len(name)); INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); - - if (nla[NFTA_TABLE_FLAGS]) { - __be32 flags; - - flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS])); - if (flags & ~NFT_TABLE_F_DORMANT) { - kfree(table); - return -EINVAL; - } - - table->flags |= flags; - } + table->flags = flags; list_add_tail(&table->list, &afi->tables); nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); @@ -456,16 +467,17 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(table)) return PTR_ERR(table); - if (table->use) + if (!list_empty(&table->chains) || !list_empty(&table->sets)) return -EBUSY; list_del(&table->list); nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family); kfree(table); + module_put(afi->owner); return 0; } -int nft_register_chain_type(struct nf_chain_type *ctype) +int nft_register_chain_type(const struct nf_chain_type *ctype) { int err = 0; @@ -474,10 +486,6 @@ int nft_register_chain_type(struct nf_chain_type *ctype) err = -EBUSY; goto out; } - - if (!try_module_get(ctype->me)) - goto out; - chain_type[ctype->family][ctype->type] = ctype; out: nfnl_unlock(NFNL_SUBSYS_NFTABLES); @@ -485,11 +493,10 @@ out: } EXPORT_SYMBOL_GPL(nft_register_chain_type); -void nft_unregister_chain_type(struct nf_chain_type *ctype) +void nft_unregister_chain_type(const struct nf_chain_type *ctype) { nfnl_lock(NFNL_SUBSYS_NFTABLES); chain_type[ctype->family][ctype->type] = NULL; - module_put(ctype->me); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } EXPORT_SYMBOL_GPL(nft_unregister_chain_type); @@ -597,7 +604,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, if (chain->flags & NFT_BASE_CHAIN) { const struct nft_base_chain *basechain = nft_base_chain(chain); - const struct nf_hook_ops *ops = &basechain->ops; + const struct nf_hook_ops *ops = &basechain->ops[0]; struct nlattr *nest; nest = nla_nest_start(skb, NFTA_CHAIN_HOOK); @@ -613,9 +620,8 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, htonl(basechain->policy))) goto nla_put_failure; - if (nla_put_string(skb, NFTA_CHAIN_TYPE, - chain_type[ops->pf][nft_base_chain(chain)->type]->name)) - goto nla_put_failure; + if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name)) + goto nla_put_failure; if (nft_dump_stats(skb, nft_base_chain(chain)->stats)) goto nla_put_failure; @@ -756,22 +762,6 @@ err: return err; } -static int -nf_tables_chain_policy(struct nft_base_chain *chain, const struct nlattr *attr) -{ - switch (ntohl(nla_get_be32(attr))) { - case NF_DROP: - chain->policy = NF_DROP; - break; - case NF_ACCEPT: - chain->policy = NF_ACCEPT; - break; - default: - return -EINVAL; - } - return 0; -} - static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 }, [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, @@ -830,7 +820,9 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, struct nlattr *ha[NFTA_HOOK_MAX + 1]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + u8 policy = NF_ACCEPT; u64 handle = 0; + unsigned int i; int err; bool create; @@ -844,9 +836,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(table)) return PTR_ERR(table); - if (table->use == UINT_MAX) - return -EOVERFLOW; - chain = NULL; name = nla[NFTA_CHAIN_NAME]; @@ -864,6 +853,22 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, } } + if (nla[NFTA_CHAIN_POLICY]) { + if ((chain != NULL && + !(chain->flags & NFT_BASE_CHAIN)) || + nla[NFTA_CHAIN_HOOK] == NULL) + return -EOPNOTSUPP; + + policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY])); + switch (policy) { + case NF_DROP: + case NF_ACCEPT: + break; + default: + return -EINVAL; + } + } + if (chain != NULL) { if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; @@ -874,16 +879,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]))) return -EEXIST; - if (nla[NFTA_CHAIN_POLICY]) { - if (!(chain->flags & NFT_BASE_CHAIN)) - return -EOPNOTSUPP; - - err = nf_tables_chain_policy(nft_base_chain(chain), - nla[NFTA_CHAIN_POLICY]); - if (err < 0) - return err; - } - if (nla[NFTA_CHAIN_COUNTERS]) { if (!(chain->flags & NFT_BASE_CHAIN)) return -EOPNOTSUPP; @@ -894,24 +889,31 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, return err; } + if (nla[NFTA_CHAIN_POLICY]) + nft_base_chain(chain)->policy = policy; + if (nla[NFTA_CHAIN_HANDLE] && name) nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); goto notify; } + if (table->use == UINT_MAX) + return -EOVERFLOW; + if (nla[NFTA_CHAIN_HOOK]) { + const struct nf_chain_type *type; struct nf_hook_ops *ops; nf_hookfn *hookfn; - u32 hooknum; - int type = NFT_CHAIN_T_DEFAULT; + u32 hooknum, priority; + type = chain_type[family][NFT_CHAIN_T_DEFAULT]; if (nla[NFTA_CHAIN_TYPE]) { type = nf_tables_chain_type_lookup(afi, nla[NFTA_CHAIN_TYPE], create); - if (type < 0) - return -ENOENT; + if (IS_ERR(type)) + return PTR_ERR(type); } err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], @@ -925,46 +927,23 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); if (hooknum >= afi->nhooks) return -EINVAL; + priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); - hookfn = chain_type[family][type]->fn[hooknum]; - if (hookfn == NULL) + if (!(type->hook_mask & (1 << hooknum))) return -EOPNOTSUPP; + if (!try_module_get(type->owner)) + return -ENOENT; + hookfn = type->hooks[hooknum]; basechain = kzalloc(sizeof(*basechain), GFP_KERNEL); if (basechain == NULL) return -ENOMEM; - basechain->type = type; - chain = &basechain->chain; - - ops = &basechain->ops; - ops->pf = family; - ops->owner = afi->owner; - ops->hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); - ops->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); - ops->priv = chain; - ops->hook = hookfn; - if (afi->hooks[ops->hooknum]) - ops->hook = afi->hooks[ops->hooknum]; - - chain->flags |= NFT_BASE_CHAIN; - - if (nla[NFTA_CHAIN_POLICY]) { - err = nf_tables_chain_policy(basechain, - nla[NFTA_CHAIN_POLICY]); - if (err < 0) { - free_percpu(basechain->stats); - kfree(basechain); - return err; - } - } else - basechain->policy = NF_ACCEPT; - if (nla[NFTA_CHAIN_COUNTERS]) { err = nf_tables_counters(basechain, nla[NFTA_CHAIN_COUNTERS]); if (err < 0) { - free_percpu(basechain->stats); + module_put(type->owner); kfree(basechain); return err; } @@ -972,12 +951,33 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, struct nft_stats __percpu *newstats; newstats = alloc_percpu(struct nft_stats); - if (newstats == NULL) + if (newstats == NULL) { + module_put(type->owner); + kfree(basechain); return -ENOMEM; + } + rcu_assign_pointer(basechain->stats, newstats); + } + + basechain->type = type; + chain = &basechain->chain; - rcu_assign_pointer(nft_base_chain(chain)->stats, - newstats); + for (i = 0; i < afi->nops; i++) { + ops = &basechain->ops[i]; + ops->pf = family; + ops->owner = afi->owner; + ops->hooknum = hooknum; + ops->priority = priority; + ops->priv = chain; + ops->hook = afi->hooks[ops->hooknum]; + if (hookfn) + ops->hook = hookfn; + if (afi->hook_ops_init) + afi->hook_ops_init(ops, i); } + + chain->flags |= NFT_BASE_CHAIN; + basechain->policy = policy; } else { chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (chain == NULL) @@ -992,8 +992,9 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (!(table->flags & NFT_TABLE_F_DORMANT) && chain->flags & NFT_BASE_CHAIN) { - err = nf_register_hook(&nft_base_chain(chain)->ops); + err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops); if (err < 0) { + module_put(basechain->type->owner); free_percpu(basechain->stats); kfree(basechain); return err; @@ -1014,6 +1015,7 @@ static void nf_tables_rcu_chain_destroy(struct rcu_head *head) BUG_ON(chain->use > 0); if (chain->flags & NFT_BASE_CHAIN) { + module_put(nft_base_chain(chain)->type->owner); free_percpu(nft_base_chain(chain)->stats); kfree(nft_base_chain(chain)); } else @@ -1051,7 +1053,7 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, if (!(table->flags & NFT_TABLE_F_DORMANT) && chain->flags & NFT_BASE_CHAIN) - nf_unregister_hook(&nft_base_chain(chain)->ops); + nf_unregister_hooks(nft_base_chain(chain)->ops, afi->nops); nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN, family); @@ -1931,12 +1933,14 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, { struct net *net = sock_net(skb->sk); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - const struct nft_af_info *afi; + const struct nft_af_info *afi = NULL; const struct nft_table *table = NULL; - afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); + if (nfmsg->nfgen_family != NFPROTO_UNSPEC) { + afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + } if (nla[NFTA_SET_TABLE] != NULL) { table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); @@ -1981,11 +1985,14 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, return -ENOMEM; list_for_each_entry(i, &ctx->table->sets, list) { - if (!sscanf(i->name, name, &n)) + int tmp; + + if (!sscanf(i->name, name, &tmp)) continue; - if (n < 0 || n > BITS_PER_LONG * PAGE_SIZE) + if (tmp < 0 || tmp > BITS_PER_LONG * PAGE_SIZE) continue; - set_bit(n, inuse); + + set_bit(tmp, inuse); } n = find_first_zero_bit(inuse, BITS_PER_LONG * PAGE_SIZE); @@ -2102,8 +2109,8 @@ done: return skb->len; } -static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, - struct netlink_callback *cb) +static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb, + struct netlink_callback *cb) { const struct nft_set *set; unsigned int idx, s_idx = cb->args[0]; @@ -2139,6 +2146,61 @@ done: return skb->len; } +static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nft_set *set; + unsigned int idx, s_idx = cb->args[0]; + const struct nft_af_info *afi; + struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; + struct net *net = sock_net(skb->sk); + int cur_family = cb->args[3]; + + if (cb->args[1]) + return skb->len; + + list_for_each_entry(afi, &net->nft.af_info, list) { + if (cur_family) { + if (afi->family != cur_family) + continue; + + cur_family = 0; + } + + list_for_each_entry(table, &afi->tables, list) { + if (cur_table) { + if (cur_table != table) + continue; + + cur_table = NULL; + } + + ctx->table = table; + ctx->afi = afi; + idx = 0; + list_for_each_entry(set, &ctx->table->sets, list) { + if (idx < s_idx) + goto cont; + if (nf_tables_fill_set(skb, ctx, set, + NFT_MSG_NEWSET, + NLM_F_MULTI) < 0) { + cb->args[0] = idx; + cb->args[2] = (unsigned long) table; + cb->args[3] = afi->family; + goto done; + } +cont: + idx++; + } + if (s_idx) + s_idx = 0; + } + } + cb->args[1] = 1; +done: + return skb->len; +} + static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); @@ -2155,9 +2217,12 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) if (err < 0) return err; - if (ctx.table == NULL) - ret = nf_tables_dump_sets_all(&ctx, skb, cb); - else + if (ctx.table == NULL) { + if (ctx.afi == NULL) + ret = nf_tables_dump_sets_all(&ctx, skb, cb); + else + ret = nf_tables_dump_sets_family(&ctx, skb, cb); + } else ret = nf_tables_dump_sets_table(&ctx, skb, cb); return ret; @@ -2170,6 +2235,7 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, const struct nft_set *set; struct nft_ctx ctx; struct sk_buff *skb2; + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); int err; /* Verify existance before starting dump */ @@ -2184,6 +2250,10 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, return netlink_dump_start(nlsk, skb, nlh, &c); } + /* Only accept unspec with dump */ + if (nfmsg->nfgen_family == NFPROTO_UNSPEC) + return -EAFNOSUPPORT; + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) return PTR_ERR(set); @@ -2353,6 +2423,7 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_set *set; struct nft_ctx ctx; int err; @@ -2364,6 +2435,9 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, if (err < 0) return err; + if (nfmsg->nfgen_family == NFPROTO_UNSPEC) + return -EAFNOSUPPORT; + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) return PTR_ERR(set); @@ -2535,9 +2609,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) u32 portid, seq; int event, err; - nfmsg = nlmsg_data(cb->nlh); - err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_ELEM_LIST_MAX, - nft_set_elem_list_policy); + err = nlmsg_parse(cb->nlh, sizeof(struct nfgenmsg), nla, + NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy); if (err < 0) return err; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index cb9e685caae..0d879fcb876 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -109,14 +109,14 @@ static inline void nft_trace_packet(const struct nft_pktinfo *pkt, { struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); - nf_log_packet(net, pkt->xt.family, pkt->hooknum, pkt->skb, pkt->in, + nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", chain->table->name, chain->name, comments[type], rulenum); } unsigned int -nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) +nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) { const struct nft_chain *chain = ops->priv; const struct nft_rule *rule; @@ -164,7 +164,7 @@ next_rule: break; } - switch (data[NFT_REG_VERDICT].verdict) { + switch (data[NFT_REG_VERDICT].verdict & NF_VERDICT_MASK) { case NF_ACCEPT: case NF_DROP: case NF_QUEUE: @@ -172,6 +172,9 @@ next_rule: nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); return data[NFT_REG_VERDICT].verdict; + } + + switch (data[NFT_REG_VERDICT].verdict) { case NFT_JUMP: if (unlikely(pkt->skb->nf_trace)) nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); @@ -213,7 +216,7 @@ next_rule: return nft_base_chain(chain)->policy; } -EXPORT_SYMBOL_GPL(nft_do_chain_pktinfo); +EXPORT_SYMBOL_GPL(nft_do_chain); int __init nf_tables_core_module_init(void) { diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c new file mode 100644 index 00000000000..9dd2d216cfc --- /dev/null +++ b/net/netfilter/nf_tables_inet.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2012-2014 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv6.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_ipv4.h> +#include <net/netfilter/nf_tables_ipv6.h> +#include <net/ip.h> + +static void nft_inet_hook_ops_init(struct nf_hook_ops *ops, unsigned int n) +{ + struct nft_af_info *afi; + + if (n == 1) + afi = &nft_af_ipv4; + else + afi = &nft_af_ipv6; + + ops->pf = afi->family; + if (afi->hooks[ops->hooknum]) + ops->hook = afi->hooks[ops->hooknum]; +} + +static struct nft_af_info nft_af_inet __read_mostly = { + .family = NFPROTO_INET, + .nhooks = NF_INET_NUMHOOKS, + .owner = THIS_MODULE, + .nops = 2, + .hook_ops_init = nft_inet_hook_ops_init, +}; + +static int __net_init nf_tables_inet_init_net(struct net *net) +{ + net->nft.inet = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); + if (net->nft.inet == NULL) + return -ENOMEM; + memcpy(net->nft.inet, &nft_af_inet, sizeof(nft_af_inet)); + + if (nft_register_afinfo(net, net->nft.inet) < 0) + goto err; + + return 0; + +err: + kfree(net->nft.inet); + return -ENOMEM; +} + +static void __net_exit nf_tables_inet_exit_net(struct net *net) +{ + nft_unregister_afinfo(net->nft.inet); + kfree(net->nft.inet); +} + +static struct pernet_operations nf_tables_inet_net_ops = { + .init = nf_tables_inet_init_net, + .exit = nf_tables_inet_exit_net, +}; + +static const struct nf_chain_type filter_inet = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_INET, + .owner = THIS_MODULE, + .hook_mask = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING), +}; + +static int __init nf_tables_inet_init(void) +{ + int ret; + + nft_register_chain_type(&filter_inet); + ret = register_pernet_subsys(&nf_tables_inet_net_ops); + if (ret < 0) + nft_unregister_chain_type(&filter_inet); + + return ret; +} + +static void __exit nf_tables_inet_exit(void) +{ + unregister_pernet_subsys(&nf_tables_inet_net_ops); + nft_unregister_chain_type(&filter_inet); +} + +module_init(nf_tables_inet_init); +module_exit(nf_tables_inet_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_FAMILY(1); diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 21258cf7009..f072fe80351 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -29,6 +29,7 @@ #include <linux/netfilter/nfnetlink_queue.h> #include <linux/list.h> #include <net/sock.h> +#include <net/tcp_states.h> #include <net/netfilter/nf_queue.h> #include <net/netns/generic.h> #include <net/netfilter/nfnetlink_queue.h> @@ -235,51 +236,6 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data) spin_unlock_bh(&queue->lock); } -static void -nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) -{ - int i, j = 0; - int plen = 0; /* length of skb->head fragment */ - struct page *page; - unsigned int offset; - - /* dont bother with small payloads */ - if (len <= skb_tailroom(to)) { - skb_copy_bits(from, 0, skb_put(to, len), len); - return; - } - - if (hlen) { - skb_copy_bits(from, 0, skb_put(to, hlen), hlen); - len -= hlen; - } else { - plen = min_t(int, skb_headlen(from), len); - if (plen) { - page = virt_to_head_page(from->head); - offset = from->data - (unsigned char *)page_address(page); - __skb_fill_page_desc(to, 0, page, offset, plen); - get_page(page); - j = 1; - len -= plen; - } - } - - to->truesize += len + plen; - to->len += len + plen; - to->data_len += len + plen; - - for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { - if (!len) - break; - skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; - skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); - len -= skb_shinfo(to)->frags[j].size; - skb_frag_ref(to, j); - j++; - } - skb_shinfo(to)->nr_frags = j; -} - static int nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet, bool csum_verify) @@ -297,6 +253,31 @@ nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet, return flags ? nla_put_be32(nlskb, NFQA_SKB_INFO, htonl(flags)) : 0; } +static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk) +{ + const struct cred *cred; + + if (sk->sk_state == TCP_TIME_WAIT) + return 0; + + read_lock_bh(&sk->sk_callback_lock); + if (sk->sk_socket && sk->sk_socket->file) { + cred = sk->sk_socket->file->f_cred; + if (nla_put_be32(skb, NFQA_UID, + htonl(from_kuid_munged(&init_user_ns, cred->fsuid)))) + goto nla_put_failure; + if (nla_put_be32(skb, NFQA_GID, + htonl(from_kgid_munged(&init_user_ns, cred->fsgid)))) + goto nla_put_failure; + } + read_unlock_bh(&sk->sk_callback_lock); + return 0; + +nla_put_failure: + read_unlock_bh(&sk->sk_callback_lock); + return -1; +} + static struct sk_buff * nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct nf_queue_entry *entry, @@ -304,7 +285,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, { size_t size; size_t data_len = 0, cap_len = 0; - int hlen = 0; + unsigned int hlen = 0; struct sk_buff *skb; struct nlattr *nla; struct nfqnl_msg_packet_hdr *pmsg; @@ -356,14 +337,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (data_len > entskb->len) data_len = entskb->len; - if (!entskb->head_frag || - skb_headlen(entskb) < L1_CACHE_BYTES || - skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS) - hlen = skb_headlen(entskb); - - if (skb_has_frag_list(entskb)) - hlen = entskb->len; - hlen = min_t(int, data_len, hlen); + hlen = skb_zerocopy_headlen(entskb); + hlen = min_t(unsigned int, hlen, data_len); size += sizeof(struct nlattr) + hlen; cap_len = entskb->len; break; @@ -372,6 +347,11 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (queue->flags & NFQA_CFG_F_CONNTRACK) ct = nfqnl_ct_get(entskb, &size, &ctinfo); + if (queue->flags & NFQA_CFG_F_UID_GID) { + size += (nla_total_size(sizeof(u_int32_t)) /* uid */ + + nla_total_size(sizeof(u_int32_t))); /* gid */ + } + skb = nfnetlink_alloc_skb(net, size, queue->peer_portid, GFP_ATOMIC); if (!skb) @@ -484,6 +464,10 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, goto nla_put_failure; } + if ((queue->flags & NFQA_CFG_F_UID_GID) && entskb->sk && + nfqnl_put_sk_uidgid(skb, entskb->sk) < 0) + goto nla_put_failure; + if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) goto nla_put_failure; @@ -504,7 +488,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, nla->nla_type = NFQA_PAYLOAD; nla->nla_len = nla_attr_size(data_len); - nfqnl_zcopy(skb, entskb, data_len, hlen); + skb_zerocopy(skb, entskb, data_len, hlen); } nlh->nlmsg_len = skb->len; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index da0c1f4ada1..82cb8236f8a 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -92,7 +92,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, if (ctx->chain->flags & NFT_BASE_CHAIN) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); - const struct nf_hook_ops *ops = &basechain->ops; + const struct nf_hook_ops *ops = &basechain->ops[0]; par->hook_mask = 1 << ops->hooknum; } @@ -253,7 +253,7 @@ static int nft_target_validate(const struct nft_ctx *ctx, if (ctx->chain->flags & NFT_BASE_CHAIN) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); - const struct nf_hook_ops *ops = &basechain->ops; + const struct nf_hook_ops *ops = &basechain->ops[0]; hook_mask = 1 << ops->hooknum; if (hook_mask & target->hooks) @@ -323,7 +323,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, if (ctx->chain->flags & NFT_BASE_CHAIN) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); - const struct nf_hook_ops *ops = &basechain->ops; + const struct nf_hook_ops *ops = &basechain->ops[0]; par->hook_mask = 1 << ops->hooknum; } @@ -449,7 +449,7 @@ static int nft_match_validate(const struct nft_ctx *ctx, if (ctx->chain->flags & NFT_BASE_CHAIN) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); - const struct nf_hook_ops *ops = &basechain->ops; + const struct nf_hook_ops *ops = &basechain->ops[0]; hook_mask = 1 << ops->hooknum; if (hook_mask & match->hooks) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 955f4e6e708..917052e2060 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -18,17 +18,21 @@ #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_tuple.h> #include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_ecache.h> struct nft_ct { enum nft_ct_keys key:8; enum ip_conntrack_dir dir:8; - enum nft_registers dreg:8; + union{ + enum nft_registers dreg:8; + enum nft_registers sreg:8; + }; uint8_t family; }; -static void nft_ct_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +static void nft_ct_get_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { const struct nft_ct *priv = nft_expr_priv(expr); struct nft_data *dest = &data[priv->dreg]; @@ -123,24 +127,79 @@ err: data[NFT_REG_VERDICT].verdict = NFT_BREAK; } +static void nft_ct_set_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_ct *priv = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; +#ifdef CONFIG_NF_CONNTRACK_MARK + u32 value = data[priv->sreg].data[0]; +#endif + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL) + return; + + switch (priv->key) { +#ifdef CONFIG_NF_CONNTRACK_MARK + case NFT_CT_MARK: + if (ct->mark != value) { + ct->mark = value; + nf_conntrack_event_cache(IPCT_MARK, ct); + } + break; +#endif + } +} + static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = { [NFTA_CT_DREG] = { .type = NLA_U32 }, [NFTA_CT_KEY] = { .type = NLA_U32 }, [NFTA_CT_DIRECTION] = { .type = NLA_U8 }, + [NFTA_CT_SREG] = { .type = NLA_U32 }, }; -static int nft_ct_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_ct_l3proto_try_module_get(uint8_t family) { - struct nft_ct *priv = nft_expr_priv(expr); int err; - if (tb[NFTA_CT_DREG] == NULL || - tb[NFTA_CT_KEY] == NULL) - return -EINVAL; + if (family == NFPROTO_INET) { + err = nf_ct_l3proto_try_module_get(NFPROTO_IPV4); + if (err < 0) + goto err1; + err = nf_ct_l3proto_try_module_get(NFPROTO_IPV6); + if (err < 0) + goto err2; + } else { + err = nf_ct_l3proto_try_module_get(family); + if (err < 0) + goto err1; + } + return 0; + +err2: + nf_ct_l3proto_module_put(NFPROTO_IPV4); +err1: + return err; +} + +static void nft_ct_l3proto_module_put(uint8_t family) +{ + if (family == NFPROTO_INET) { + nf_ct_l3proto_module_put(NFPROTO_IPV4); + nf_ct_l3proto_module_put(NFPROTO_IPV6); + } else + nf_ct_l3proto_module_put(family); +} + +static int nft_ct_init_validate_get(const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_ct *priv = nft_expr_priv(expr); - priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); if (tb[NFTA_CT_DIRECTION] != NULL) { priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]); switch (priv->dir) { @@ -179,34 +238,72 @@ static int nft_ct_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - err = nf_ct_l3proto_try_module_get(ctx->afi->family); + return 0; +} + +static int nft_ct_init_validate_set(uint32_t key) +{ + switch (key) { + case NFT_CT_MARK: + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int nft_ct_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_ct *priv = nft_expr_priv(expr); + int err; + + priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); + + if (tb[NFTA_CT_DREG]) { + err = nft_ct_init_validate_get(expr, tb); + if (err < 0) + return err; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + err = nft_validate_data_load(ctx, priv->dreg, NULL, + NFT_DATA_VALUE); + if (err < 0) + return err; + } else { + err = nft_ct_init_validate_set(priv->key); + if (err < 0) + return err; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + } + + err = nft_ct_l3proto_try_module_get(ctx->afi->family); if (err < 0) return err; - priv->family = ctx->afi->family; - priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - goto err1; + priv->family = ctx->afi->family; - err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); - if (err < 0) - goto err1; return 0; - -err1: - nf_ct_l3proto_module_put(ctx->afi->family); - return err; } static void nft_ct_destroy(const struct nft_expr *expr) { struct nft_ct *priv = nft_expr_priv(expr); - nf_ct_l3proto_module_put(priv->family); + nft_ct_l3proto_module_put(priv->family); } -static int nft_ct_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ct *priv = nft_expr_priv(expr); @@ -222,19 +319,61 @@ nla_put_failure: return -1; } +static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_ct *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_CT_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + static struct nft_expr_type nft_ct_type; -static const struct nft_expr_ops nft_ct_ops = { +static const struct nft_expr_ops nft_ct_get_ops = { .type = &nft_ct_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), - .eval = nft_ct_eval, + .eval = nft_ct_get_eval, .init = nft_ct_init, .destroy = nft_ct_destroy, - .dump = nft_ct_dump, + .dump = nft_ct_get_dump, }; +static const struct nft_expr_ops nft_ct_set_ops = { + .type = &nft_ct_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), + .eval = nft_ct_set_eval, + .init = nft_ct_init, + .destroy = nft_ct_destroy, + .dump = nft_ct_set_dump, +}; + +static const struct nft_expr_ops * +nft_ct_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (tb[NFTA_CT_KEY] == NULL) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_CT_DREG] && tb[NFTA_CT_SREG]) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_CT_DREG]) + return &nft_ct_get_ops; + + if (tb[NFTA_CT_SREG]) + return &nft_ct_set_ops; + + return ERR_PTR(-EINVAL); +} + static struct nft_expr_type nft_ct_type __read_mostly = { .name = "ct", - .ops = &nft_ct_ops, + .select_ops = &nft_ct_select_ops, .policy = nft_ct_policy, .maxattr = NFTA_CT_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 57cad072a13..5af790123ad 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -33,7 +33,7 @@ static void nft_log_eval(const struct nft_expr *expr, const struct nft_log *priv = nft_expr_priv(expr); struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); - nf_log_packet(net, priv->family, pkt->hooknum, pkt->skb, pkt->in, + nf_log_packet(net, priv->family, pkt->ops->hooknum, pkt->skb, pkt->in, pkt->out, &priv->loginfo, "%s", priv->prefix); } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 8c28220a90b..e8254ad2e5a 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -21,12 +21,15 @@ struct nft_meta { enum nft_meta_keys key:8; - enum nft_registers dreg:8; + union { + enum nft_registers dreg:8; + enum nft_registers sreg:8; + }; }; -static void nft_meta_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +static void nft_meta_get_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { const struct nft_meta *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; @@ -40,6 +43,12 @@ static void nft_meta_eval(const struct nft_expr *expr, case NFT_META_PROTOCOL: *(__be16 *)dest->data = skb->protocol; break; + case NFT_META_NFPROTO: + dest->data[0] = pkt->ops->pf; + break; + case NFT_META_L4PROTO: + dest->data[0] = pkt->tprot; + break; case NFT_META_PRIORITY: dest->data[0] = skb->priority; break; @@ -132,25 +141,54 @@ err: data[NFT_REG_VERDICT].verdict = NFT_BREAK; } +static void nft_meta_set_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *meta = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + u32 value = data[meta->sreg].data[0]; + + switch (meta->key) { + case NFT_META_MARK: + skb->mark = value; + break; + case NFT_META_PRIORITY: + skb->priority = value; + break; + case NFT_META_NFTRACE: + skb->nf_trace = 1; + break; + default: + WARN_ON(1); + } +} + static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { [NFTA_META_DREG] = { .type = NLA_U32 }, [NFTA_META_KEY] = { .type = NLA_U32 }, + [NFTA_META_SREG] = { .type = NLA_U32 }, }; -static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_meta_init_validate_set(uint32_t key) { - struct nft_meta *priv = nft_expr_priv(expr); - int err; - - if (tb[NFTA_META_DREG] == NULL || - tb[NFTA_META_KEY] == NULL) - return -EINVAL; + switch (key) { + case NFT_META_MARK: + case NFT_META_PRIORITY: + case NFT_META_NFTRACE: + return 0; + default: + return -EOPNOTSUPP; + } +} - priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); - switch (priv->key) { +static int nft_meta_init_validate_get(uint32_t key) +{ + switch (key) { case NFT_META_LEN: case NFT_META_PROTOCOL: + case NFT_META_NFPROTO: + case NFT_META_L4PROTO: case NFT_META_PRIORITY: case NFT_META_MARK: case NFT_META_IIF: @@ -167,26 +205,72 @@ static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, #ifdef CONFIG_NETWORK_SECMARK case NFT_META_SECMARK: #endif - break; + return 0; default: return -EOPNOTSUPP; } - priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); - err = nft_validate_output_register(priv->dreg); +} + +static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_meta *priv = nft_expr_priv(expr); + int err; + + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + + if (tb[NFTA_META_DREG]) { + err = nft_meta_init_validate_get(priv->key); + if (err < 0) + return err; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + return nft_validate_data_load(ctx, priv->dreg, NULL, + NFT_DATA_VALUE); + } + + err = nft_meta_init_validate_set(priv->key); + if (err < 0) + return err; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_META_SREG])); + err = nft_validate_input_register(priv->sreg); if (err < 0) return err; - return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + + return 0; } -static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_meta_get_dump(struct sk_buff *skb, + const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); + if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key))) + goto nla_put_failure; if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg))) goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static int nft_meta_set_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key))) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_META_SREG, htonl(priv->sreg))) + goto nla_put_failure; + return 0; nla_put_failure: @@ -194,17 +278,44 @@ nla_put_failure: } static struct nft_expr_type nft_meta_type; -static const struct nft_expr_ops nft_meta_ops = { +static const struct nft_expr_ops nft_meta_get_ops = { + .type = &nft_meta_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .eval = nft_meta_get_eval, + .init = nft_meta_init, + .dump = nft_meta_get_dump, +}; + +static const struct nft_expr_ops nft_meta_set_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), - .eval = nft_meta_eval, + .eval = nft_meta_set_eval, .init = nft_meta_init, - .dump = nft_meta_dump, + .dump = nft_meta_set_dump, }; +static const struct nft_expr_ops * +nft_meta_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (tb[NFTA_META_KEY] == NULL) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG]) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_META_DREG]) + return &nft_meta_get_ops; + + if (tb[NFTA_META_SREG]) + return &nft_meta_set_ops; + + return ERR_PTR(-EINVAL); +} + static struct nft_expr_type nft_meta_type __read_mostly = { .name = "meta", - .ops = &nft_meta_ops, + .select_ops = &nft_meta_select_ops, .policy = nft_meta_policy, .maxattr = NFTA_META_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_meta_target.c b/net/netfilter/nft_meta_target.c deleted file mode 100644 index 71177df75ff..00000000000 --- a/net/netfilter/nft_meta_target.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/list.h> -#include <linux/rbtree.h> -#include <linux/netlink.h> -#include <linux/netfilter.h> -#include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> - -struct nft_meta { - enum nft_meta_keys key; -}; - -static void nft_meta_eval(const struct nft_expr *expr, - struct nft_data *nfres, - struct nft_data *data, - const struct nft_pktinfo *pkt) -{ - const struct nft_meta *meta = nft_expr_priv(expr); - struct sk_buff *skb = pkt->skb; - u32 val = data->data[0]; - - switch (meta->key) { - case NFT_META_MARK: - skb->mark = val; - break; - case NFT_META_PRIORITY: - skb->priority = val; - break; - case NFT_META_NFTRACE: - skb->nf_trace = val; - break; -#ifdef CONFIG_NETWORK_SECMARK - case NFT_META_SECMARK: - skb->secmark = val; - break; -#endif - default: - WARN_ON(1); - } -} - -static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { - [NFTA_META_KEY] = { .type = NLA_U32 }, -}; - -static int nft_meta_init(const struct nft_expr *expr, struct nlattr *tb[]) -{ - struct nft_meta *meta = nft_expr_priv(expr); - - if (tb[NFTA_META_KEY] == NULL) - return -EINVAL; - - meta->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); - switch (meta->key) { - case NFT_META_MARK: - case NFT_META_PRIORITY: - case NFT_META_NFTRACE: -#ifdef CONFIG_NETWORK_SECMARK - case NFT_META_SECMARK: -#endif - break; - default: - return -EINVAL; - } - - return 0; -} - -static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr) -{ - struct nft_meta *meta = nft_expr_priv(expr); - - NLA_PUT_BE32(skb, NFTA_META_KEY, htonl(meta->key)); - return 0; - -nla_put_failure: - return -1; -} - -static struct nft_expr_ops meta_target __read_mostly = { - .name = "meta", - .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), - .owner = THIS_MODULE, - .eval = nft_meta_eval, - .init = nft_meta_init, - .dump = nft_meta_dump, - .policy = nft_meta_policy, - .maxattr = NFTA_META_MAX, -}; - -static int __init nft_meta_target_init(void) -{ - return nft_register_expr(&meta_target); -} - -static void __exit nft_meta_target_exit(void) -{ - nft_unregister_expr(&meta_target); -} - -module_init(nft_meta_target_init); -module_exit(nft_meta_target_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_ALIAS_NFT_EXPR("meta"); diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c new file mode 100644 index 00000000000..cbea473d69e --- /dev/null +++ b/net/netfilter/nft_queue.c @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2013 Eric Leblond <eric@regit.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code partly funded by OISF + * (http://www.openinfosecfoundation.org/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/jhash.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_queue.h> + +static u32 jhash_initval __read_mostly; + +struct nft_queue { + u16 queuenum; + u16 queues_total; + u16 flags; + u8 family; +}; + +static void nft_queue_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_queue *priv = nft_expr_priv(expr); + u32 queue = priv->queuenum; + u32 ret; + + if (priv->queues_total > 1) { + if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT) { + int cpu = smp_processor_id(); + + queue = priv->queuenum + cpu % priv->queues_total; + } else { + queue = nfqueue_hash(pkt->skb, queue, + priv->queues_total, priv->family, + jhash_initval); + } + } + + ret = NF_QUEUE_NR(queue); + if (priv->flags & NFT_QUEUE_FLAG_BYPASS) + ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; + + data[NFT_REG_VERDICT].verdict = ret; +} + +static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = { + [NFTA_QUEUE_NUM] = { .type = NLA_U16 }, + [NFTA_QUEUE_TOTAL] = { .type = NLA_U16 }, + [NFTA_QUEUE_FLAGS] = { .type = NLA_U16 }, +}; + +static int nft_queue_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_queue *priv = nft_expr_priv(expr); + + if (tb[NFTA_QUEUE_NUM] == NULL) + return -EINVAL; + + init_hashrandom(&jhash_initval); + priv->family = ctx->afi->family; + priv->queuenum = ntohs(nla_get_be16(tb[NFTA_QUEUE_NUM])); + + if (tb[NFTA_QUEUE_TOTAL] != NULL) + priv->queues_total = ntohs(nla_get_be16(tb[NFTA_QUEUE_TOTAL])); + if (tb[NFTA_QUEUE_FLAGS] != NULL) { + priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS])); + if (priv->flags & ~NFT_QUEUE_FLAG_MASK) + return -EINVAL; + } + return 0; +} + +static int nft_queue_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_queue *priv = nft_expr_priv(expr); + + if (nla_put_be16(skb, NFTA_QUEUE_NUM, htons(priv->queuenum)) || + nla_put_be16(skb, NFTA_QUEUE_TOTAL, htons(priv->queues_total)) || + nla_put_be16(skb, NFTA_QUEUE_FLAGS, htons(priv->flags))) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_queue_type; +static const struct nft_expr_ops nft_queue_ops = { + .type = &nft_queue_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_queue)), + .eval = nft_queue_eval, + .init = nft_queue_init, + .dump = nft_queue_dump, +}; + +static struct nft_expr_type nft_queue_type __read_mostly = { + .name = "queue", + .ops = &nft_queue_ops, + .policy = nft_queue_policy, + .maxattr = NFTA_QUEUE_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_queue_module_init(void) +{ + return nft_register_expr(&nft_queue_type); +} + +static void __exit nft_queue_module_exit(void) +{ + nft_unregister_expr(&nft_queue_type); +} + +module_init(nft_queue_module_init); +module_exit(nft_queue_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Eric Leblond <eric@regit.org>"); +MODULE_ALIAS_NFT_EXPR("queue"); diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/netfilter/nft_reject.c index 4a5e94ac314..5e204711d70 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/netfilter/nft_reject.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2013 Eric Leblond <eric@regit.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -16,10 +17,16 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/icmp.h> +#include <net/netfilter/ipv4/nf_reject.h> + +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) +#include <net/netfilter/ipv6/nf_reject.h> +#endif struct nft_reject { enum nft_reject_types type:8; u8 icmp_code; + u8 family; }; static void nft_reject_eval(const struct nft_expr *expr, @@ -27,12 +34,26 @@ static void nft_reject_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); - +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) + struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); +#endif switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - icmp_send(pkt->skb, ICMP_DEST_UNREACH, priv->icmp_code, 0); + if (priv->family == NFPROTO_IPV4) + nf_send_unreach(pkt->skb, priv->icmp_code); +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) + else if (priv->family == NFPROTO_IPV6) + nf_send_unreach6(net, pkt->skb, priv->icmp_code, + pkt->ops->hooknum); +#endif break; case NFT_REJECT_TCP_RST: + if (priv->family == NFPROTO_IPV4) + nf_send_reset(pkt->skb, pkt->ops->hooknum); +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) + else if (priv->family == NFPROTO_IPV6) + nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); +#endif break; } @@ -53,6 +74,7 @@ static int nft_reject_init(const struct nft_ctx *ctx, if (tb[NFTA_REJECT_TYPE] == NULL) return -EINVAL; + priv->family = ctx->afi->family; priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE])); switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index da35ac06a97..5929be622c5 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -211,8 +211,10 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, ret = 0; if ((info->ct_events || info->exp_events) && !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events, - GFP_KERNEL)) + GFP_KERNEL)) { + ret = -EINVAL; goto err3; + } if (info->helper[0]) { ret = xt_ct_set_helper(ct, info->helper, par); diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index ed00fef5899..8f1779ff7e3 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -11,15 +11,13 @@ #include <linux/module.h> #include <linux/skbuff.h> -#include <linux/ip.h> -#include <linux/ipv6.h> -#include <linux/jhash.h> - #include <linux/netfilter.h> #include <linux/netfilter_arp.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_NFQUEUE.h> +#include <net/netfilter/nf_queue.h> + MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("Xtables: packet forwarding to netlink"); MODULE_LICENSE("GPL"); @@ -28,7 +26,6 @@ MODULE_ALIAS("ip6t_NFQUEUE"); MODULE_ALIAS("arpt_NFQUEUE"); static u32 jhash_initval __read_mostly; -static bool rnd_inited __read_mostly; static unsigned int nfqueue_tg(struct sk_buff *skb, const struct xt_action_param *par) @@ -38,69 +35,16 @@ nfqueue_tg(struct sk_buff *skb, const struct xt_action_param *par) return NF_QUEUE_NR(tinfo->queuenum); } -static u32 hash_v4(const struct sk_buff *skb) -{ - const struct iphdr *iph = ip_hdr(skb); - - /* packets in either direction go into same queue */ - if ((__force u32)iph->saddr < (__force u32)iph->daddr) - return jhash_3words((__force u32)iph->saddr, - (__force u32)iph->daddr, iph->protocol, jhash_initval); - - return jhash_3words((__force u32)iph->daddr, - (__force u32)iph->saddr, iph->protocol, jhash_initval); -} - -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) -static u32 hash_v6(const struct sk_buff *skb) -{ - const struct ipv6hdr *ip6h = ipv6_hdr(skb); - u32 a, b, c; - - if ((__force u32)ip6h->saddr.s6_addr32[3] < - (__force u32)ip6h->daddr.s6_addr32[3]) { - a = (__force u32) ip6h->saddr.s6_addr32[3]; - b = (__force u32) ip6h->daddr.s6_addr32[3]; - } else { - b = (__force u32) ip6h->saddr.s6_addr32[3]; - a = (__force u32) ip6h->daddr.s6_addr32[3]; - } - - if ((__force u32)ip6h->saddr.s6_addr32[1] < - (__force u32)ip6h->daddr.s6_addr32[1]) - c = (__force u32) ip6h->saddr.s6_addr32[1]; - else - c = (__force u32) ip6h->daddr.s6_addr32[1]; - - return jhash_3words(a, b, c, jhash_initval); -} -#endif - -static u32 -nfqueue_hash(const struct sk_buff *skb, const struct xt_action_param *par) -{ - const struct xt_NFQ_info_v1 *info = par->targinfo; - u32 queue = info->queuenum; - - if (par->family == NFPROTO_IPV4) - queue += ((u64) hash_v4(skb) * info->queues_total) >> 32; -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - else if (par->family == NFPROTO_IPV6) - queue += ((u64) hash_v6(skb) * info->queues_total) >> 32; -#endif - - return queue; -} - static unsigned int nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_NFQ_info_v1 *info = par->targinfo; u32 queue = info->queuenum; - if (info->queues_total > 1) - queue = nfqueue_hash(skb, par); - + if (info->queues_total > 1) { + queue = nfqueue_hash(skb, queue, info->queues_total, + par->family, jhash_initval); + } return NF_QUEUE_NR(queue); } @@ -120,10 +64,8 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par) const struct xt_NFQ_info_v3 *info = par->targinfo; u32 maxid; - if (unlikely(!rnd_inited)) { - get_random_bytes(&jhash_initval, sizeof(jhash_initval)); - rnd_inited = true; - } + init_hashrandom(&jhash_initval); + if (info->queues_total == 0) { pr_err("NFQUEUE: number of total queues is 0\n"); return -EINVAL; @@ -154,8 +96,10 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) int cpu = smp_processor_id(); queue = info->queuenum + cpu % info->queues_total; - } else - queue = nfqueue_hash(skb, par); + } else { + queue = nfqueue_hash(skb, queue, info->queues_total, + par->family, jhash_initval); + } } ret = NF_QUEUE_NR(queue); diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c new file mode 100644 index 00000000000..9a8e77e7f8d --- /dev/null +++ b/net/netfilter/xt_cgroup.c @@ -0,0 +1,71 @@ +/* + * Xtables module to match the process control group. + * + * Might be used to implement individual "per-application" firewall + * policies in contrast to global policies based on control groups. + * Matching is based upon processes tagged to net_cls' classid marker. + * + * (C) 2013 Daniel Borkmann <dborkman@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/skbuff.h> +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_cgroup.h> +#include <net/sock.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>"); +MODULE_DESCRIPTION("Xtables: process control group matching"); +MODULE_ALIAS("ipt_cgroup"); +MODULE_ALIAS("ip6t_cgroup"); + +static int cgroup_mt_check(const struct xt_mtchk_param *par) +{ + struct xt_cgroup_info *info = par->matchinfo; + + if (info->invert & ~1) + return -EINVAL; + + return info->id ? 0 : -EINVAL; +} + +static bool +cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_cgroup_info *info = par->matchinfo; + + if (skb->sk == NULL) + return false; + + return (info->id == skb->sk->sk_classid) ^ info->invert; +} + +static struct xt_match cgroup_mt_reg __read_mostly = { + .name = "cgroup", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = cgroup_mt_check, + .match = cgroup_mt, + .matchsize = sizeof(struct xt_cgroup_info), + .me = THIS_MODULE, + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING), +}; + +static int __init cgroup_mt_init(void) +{ + return xt_register_match(&cgroup_mt_reg); +} + +static void __exit cgroup_mt_exit(void) +{ + xt_unregister_match(&cgroup_mt_reg); +} + +module_init(cgroup_mt_init); +module_exit(cgroup_mt_exit); diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 7278145e6a6..69f78e96fdb 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -17,8 +17,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/module.h> diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c new file mode 100644 index 00000000000..a4c7561698c --- /dev/null +++ b/net/netfilter/xt_ipcomp.c @@ -0,0 +1,111 @@ +/* Kernel module to match IPComp parameters for IPv4 and IPv6 + * + * Copyright (C) 2013 WindRiver + * + * Author: + * Fan Du <fan.du@windriver.com> + * + * Based on: + * net/netfilter/xt_esp.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/in.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> + +#include <linux/netfilter/xt_ipcomp.h> +#include <linux/netfilter/x_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Fan Du <fan.du@windriver.com>"); +MODULE_DESCRIPTION("Xtables: IPv4/6 IPsec-IPComp SPI match"); + +/* Returns 1 if the spi is matched by the range, 0 otherwise */ +static inline bool +spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) +{ + bool r; + pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n", + invert ? '!' : ' ', min, spi, max); + r = (spi >= min && spi <= max) ^ invert; + pr_debug(" result %s\n", r ? "PASS" : "FAILED"); + return r; +} + +static bool comp_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + struct ip_comp_hdr _comphdr; + const struct ip_comp_hdr *chdr; + const struct xt_ipcomp *compinfo = par->matchinfo; + + /* Must not be a fragment. */ + if (par->fragoff != 0) + return false; + + chdr = skb_header_pointer(skb, par->thoff, sizeof(_comphdr), &_comphdr); + if (chdr == NULL) { + /* We've been asked to examine this packet, and we + * can't. Hence, no choice but to drop. + */ + pr_debug("Dropping evil IPComp tinygram.\n"); + par->hotdrop = true; + return 0; + } + + return spi_match(compinfo->spis[0], compinfo->spis[1], + ntohl(chdr->cpi << 16), + !!(compinfo->invflags & XT_IPCOMP_INV_SPI)); +} + +static int comp_mt_check(const struct xt_mtchk_param *par) +{ + const struct xt_ipcomp *compinfo = par->matchinfo; + + /* Must specify no unknown invflags */ + if (compinfo->invflags & ~XT_IPCOMP_INV_MASK) { + pr_err("unknown flags %X\n", compinfo->invflags); + return -EINVAL; + } + return 0; +} + +static struct xt_match comp_mt_reg[] __read_mostly = { + { + .name = "ipcomp", + .family = NFPROTO_IPV4, + .match = comp_mt, + .matchsize = sizeof(struct xt_ipcomp), + .proto = IPPROTO_COMP, + .checkentry = comp_mt_check, + .me = THIS_MODULE, + }, + { + .name = "ipcomp", + .family = NFPROTO_IPV6, + .match = comp_mt, + .matchsize = sizeof(struct xt_ipcomp), + .proto = IPPROTO_COMP, + .checkentry = comp_mt_check, + .me = THIS_MODULE, + }, +}; + +static int __init comp_mt_init(void) +{ + return xt_register_matches(comp_mt_reg, ARRAY_SIZE(comp_mt_reg)); +} + +static void __exit comp_mt_exit(void) +{ + xt_unregister_matches(comp_mt_reg, ARRAY_SIZE(comp_mt_reg)); +} + +module_init(comp_mt_init); +module_exit(comp_mt_exit); diff --git a/net/netfilter/xt_l2tp.c b/net/netfilter/xt_l2tp.c new file mode 100644 index 00000000000..8aee572771f --- /dev/null +++ b/net/netfilter/xt_l2tp.c @@ -0,0 +1,354 @@ +/* Kernel module to match L2TP header parameters. */ + +/* (C) 2013 James Chapman <jchapman@katalix.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/if_ether.h> +#include <net/ip.h> +#include <linux/ipv6.h> +#include <net/ipv6.h> +#include <net/udp.h> +#include <linux/l2tp.h> + +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_tcpudp.h> +#include <linux/netfilter/xt_l2tp.h> + +/* L2TP header masks */ +#define L2TP_HDR_T_BIT 0x8000 +#define L2TP_HDR_L_BIT 0x4000 +#define L2TP_HDR_VER 0x000f + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("James Chapman <jchapman@katalix.com>"); +MODULE_DESCRIPTION("Xtables: L2TP header match"); +MODULE_ALIAS("ipt_l2tp"); +MODULE_ALIAS("ip6t_l2tp"); + +/* The L2TP fields that can be matched */ +struct l2tp_data { + u32 tid; + u32 sid; + u8 type; + u8 version; +}; + +union l2tp_val { + __be16 val16[2]; + __be32 val32; +}; + +static bool l2tp_match(const struct xt_l2tp_info *info, struct l2tp_data *data) +{ + if ((info->flags & XT_L2TP_TYPE) && (info->type != data->type)) + return false; + + if ((info->flags & XT_L2TP_VERSION) && (info->version != data->version)) + return false; + + /* Check tid only for L2TPv3 control or any L2TPv2 packets */ + if ((info->flags & XT_L2TP_TID) && + ((data->type == XT_L2TP_TYPE_CONTROL) || (data->version == 2)) && + (info->tid != data->tid)) + return false; + + /* Check sid only for L2TP data packets */ + if ((info->flags & XT_L2TP_SID) && (data->type == XT_L2TP_TYPE_DATA) && + (info->sid != data->sid)) + return false; + + return true; +} + +/* Parse L2TP header fields when UDP encapsulation is used. Handles + * L2TPv2 and L2TPv3. Note the L2TPv3 control and data packets have a + * different format. See + * RFC2661, Section 3.1, L2TPv2 Header Format + * RFC3931, Section 3.2.1, L2TPv3 Control Message Header + * RFC3931, Section 3.2.2, L2TPv3 Data Message Header + * RFC3931, Section 4.1.2.1, L2TPv3 Session Header over UDP + */ +static bool l2tp_udp_mt(const struct sk_buff *skb, struct xt_action_param *par, u16 thoff) +{ + const struct xt_l2tp_info *info = par->matchinfo; + int uhlen = sizeof(struct udphdr); + int offs = thoff + uhlen; + union l2tp_val *lh; + union l2tp_val lhbuf; + u16 flags; + struct l2tp_data data = { 0, }; + + if (par->fragoff != 0) + return false; + + /* Extract L2TP header fields. The flags in the first 16 bits + * tell us where the other fields are. + */ + lh = skb_header_pointer(skb, offs, 2, &lhbuf); + if (lh == NULL) + return false; + + flags = ntohs(lh->val16[0]); + if (flags & L2TP_HDR_T_BIT) + data.type = XT_L2TP_TYPE_CONTROL; + else + data.type = XT_L2TP_TYPE_DATA; + data.version = (u8) flags & L2TP_HDR_VER; + + /* Now extract the L2TP tid/sid. These are in different places + * for L2TPv2 (rfc2661) and L2TPv3 (rfc3931). For L2TPv2, we + * must also check to see if the length field is present, + * since this affects the offsets into the packet of the + * tid/sid fields. + */ + if (data.version == 3) { + lh = skb_header_pointer(skb, offs + 4, 4, &lhbuf); + if (lh == NULL) + return false; + if (data.type == XT_L2TP_TYPE_CONTROL) + data.tid = ntohl(lh->val32); + else + data.sid = ntohl(lh->val32); + } else if (data.version == 2) { + if (flags & L2TP_HDR_L_BIT) + offs += 2; + lh = skb_header_pointer(skb, offs + 2, 4, &lhbuf); + if (lh == NULL) + return false; + data.tid = (u32) ntohs(lh->val16[0]); + data.sid = (u32) ntohs(lh->val16[1]); + } else + return false; + + return l2tp_match(info, &data); +} + +/* Parse L2TP header fields for IP encapsulation (no UDP header). + * L2TPv3 data packets have a different form with IP encap. See + * RC3931, Section 4.1.1.1, L2TPv3 Session Header over IP. + * RC3931, Section 4.1.1.2, L2TPv3 Control and Data Traffic over IP. + */ +static bool l2tp_ip_mt(const struct sk_buff *skb, struct xt_action_param *par, u16 thoff) +{ + const struct xt_l2tp_info *info = par->matchinfo; + union l2tp_val *lh; + union l2tp_val lhbuf; + struct l2tp_data data = { 0, }; + + /* For IP encap, the L2TP sid is the first 32-bits. */ + lh = skb_header_pointer(skb, thoff, sizeof(lhbuf), &lhbuf); + if (lh == NULL) + return false; + if (lh->val32 == 0) { + /* Must be a control packet. The L2TP tid is further + * into the packet. + */ + data.type = XT_L2TP_TYPE_CONTROL; + lh = skb_header_pointer(skb, thoff + 8, sizeof(lhbuf), + &lhbuf); + if (lh == NULL) + return false; + data.tid = ntohl(lh->val32); + } else { + data.sid = ntohl(lh->val32); + data.type = XT_L2TP_TYPE_DATA; + } + + data.version = 3; + + return l2tp_match(info, &data); +} + +static bool l2tp_mt4(const struct sk_buff *skb, struct xt_action_param *par) +{ + struct iphdr *iph = ip_hdr(skb); + u8 ipproto = iph->protocol; + + /* l2tp_mt_check4 already restricts the transport protocol */ + switch (ipproto) { + case IPPROTO_UDP: + return l2tp_udp_mt(skb, par, par->thoff); + case IPPROTO_L2TP: + return l2tp_ip_mt(skb, par, par->thoff); + } + + return false; +} + +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) +static bool l2tp_mt6(const struct sk_buff *skb, struct xt_action_param *par) +{ + unsigned int thoff = 0; + unsigned short fragoff = 0; + int ipproto; + + ipproto = ipv6_find_hdr(skb, &thoff, -1, &fragoff, NULL); + if (fragoff != 0) + return false; + + /* l2tp_mt_check6 already restricts the transport protocol */ + switch (ipproto) { + case IPPROTO_UDP: + return l2tp_udp_mt(skb, par, thoff); + case IPPROTO_L2TP: + return l2tp_ip_mt(skb, par, thoff); + } + + return false; +} +#endif + +static int l2tp_mt_check(const struct xt_mtchk_param *par) +{ + const struct xt_l2tp_info *info = par->matchinfo; + + /* Check for invalid flags */ + if (info->flags & ~(XT_L2TP_TID | XT_L2TP_SID | XT_L2TP_VERSION | + XT_L2TP_TYPE)) { + pr_info("unknown flags: %x\n", info->flags); + return -EINVAL; + } + + /* At least one of tid, sid or type=control must be specified */ + if ((!(info->flags & XT_L2TP_TID)) && + (!(info->flags & XT_L2TP_SID)) && + ((!(info->flags & XT_L2TP_TYPE)) || + (info->type != XT_L2TP_TYPE_CONTROL))) { + pr_info("invalid flags combination: %x\n", info->flags); + return -EINVAL; + } + + /* If version 2 is specified, check that incompatible params + * are not supplied + */ + if (info->flags & XT_L2TP_VERSION) { + if ((info->version < 2) || (info->version > 3)) { + pr_info("wrong L2TP version: %u\n", info->version); + return -EINVAL; + } + + if (info->version == 2) { + if ((info->flags & XT_L2TP_TID) && + (info->tid > 0xffff)) { + pr_info("v2 tid > 0xffff: %u\n", info->tid); + return -EINVAL; + } + if ((info->flags & XT_L2TP_SID) && + (info->sid > 0xffff)) { + pr_info("v2 sid > 0xffff: %u\n", info->sid); + return -EINVAL; + } + } + } + + return 0; +} + +static int l2tp_mt_check4(const struct xt_mtchk_param *par) +{ + const struct xt_l2tp_info *info = par->matchinfo; + const struct ipt_entry *e = par->entryinfo; + const struct ipt_ip *ip = &e->ip; + int ret; + + ret = l2tp_mt_check(par); + if (ret != 0) + return ret; + + if ((ip->proto != IPPROTO_UDP) && + (ip->proto != IPPROTO_L2TP)) { + pr_info("missing protocol rule (udp|l2tpip)\n"); + return -EINVAL; + } + + if ((ip->proto == IPPROTO_L2TP) && + (info->version == 2)) { + pr_info("v2 doesn't support IP mode\n"); + return -EINVAL; + } + + return 0; +} + +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) +static int l2tp_mt_check6(const struct xt_mtchk_param *par) +{ + const struct xt_l2tp_info *info = par->matchinfo; + const struct ip6t_entry *e = par->entryinfo; + const struct ip6t_ip6 *ip = &e->ipv6; + int ret; + + ret = l2tp_mt_check(par); + if (ret != 0) + return ret; + + if ((ip->proto != IPPROTO_UDP) && + (ip->proto != IPPROTO_L2TP)) { + pr_info("missing protocol rule (udp|l2tpip)\n"); + return -EINVAL; + } + + if ((ip->proto == IPPROTO_L2TP) && + (info->version == 2)) { + pr_info("v2 doesn't support IP mode\n"); + return -EINVAL; + } + + return 0; +} +#endif + +static struct xt_match l2tp_mt_reg[] __read_mostly = { + { + .name = "l2tp", + .revision = 0, + .family = NFPROTO_IPV4, + .match = l2tp_mt4, + .matchsize = XT_ALIGN(sizeof(struct xt_l2tp_info)), + .checkentry = l2tp_mt_check4, + .hooks = ((1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD)), + .me = THIS_MODULE, + }, +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) + { + .name = "l2tp", + .revision = 0, + .family = NFPROTO_IPV6, + .match = l2tp_mt6, + .matchsize = XT_ALIGN(sizeof(struct xt_l2tp_info)), + .checkentry = l2tp_mt_check6, + .hooks = ((1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD)), + .me = THIS_MODULE, + }, +#endif +}; + +static int __init l2tp_mt_init(void) +{ + return xt_register_matches(&l2tp_mt_reg[0], ARRAY_SIZE(l2tp_mt_reg)); +} + +static void __exit l2tp_mt_exit(void) +{ + xt_unregister_matches(&l2tp_mt_reg[0], ARRAY_SIZE(l2tp_mt_reg)); +} + +module_init(l2tp_mt_init); +module_exit(l2tp_mt_exit); diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 647d989a01e..7174611bd67 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -13,8 +13,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index 4fe4fb4276d..11de55e7a86 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c @@ -37,7 +37,7 @@ statistic_mt(const struct sk_buff *skb, struct xt_action_param *par) switch (info->mode) { case XT_STATISTIC_MODE_RANDOM: - if ((net_random() & 0x7FFFFFFF) < info->u.random.probability) + if ((prandom_u32() & 0x7FFFFFFF) < info->u.random.probability) ret = !ret; break; case XT_STATISTIC_MODE_NTH: diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c index 6f1701322fb..d0a3acfa574 100644 --- a/net/netlabel/netlabel_addrlist.c +++ b/net/netlabel/netlabel_addrlist.c @@ -24,8 +24,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h index a1287ce1813..d0f38bc9af6 100644 --- a/net/netlabel/netlabel_addrlist.h +++ b/net/netlabel/netlabel_addrlist.h @@ -24,8 +24,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 69345cebe3a..c2f2a53a487 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -23,8 +23,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h index d24d774bfd6..875826808b0 100644 --- a/net/netlabel/netlabel_cipso_v4.h +++ b/net/netlabel/netlabel_cipso_v4.h @@ -23,8 +23,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 85d842e6e43..f0cb92f3dda 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -24,8 +24,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index b9be0eed898..680caf4dff5 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -24,8 +24,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index dce1bebf7ae..3045a964f39 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -23,8 +23,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 8ef83ee97c6..e66e977ef2f 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -23,8 +23,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h index 5a9f31ce579..8b6e1ab62b4 100644 --- a/net/netlabel/netlabel_mgmt.h +++ b/net/netlabel/netlabel_mgmt.h @@ -23,8 +23,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 43817d73ccf..78a63c18779 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -23,8 +23,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h index 700af49022a..3a9e5dc9511 100644 --- a/net/netlabel/netlabel_unlabeled.h +++ b/net/netlabel/netlabel_unlabeled.h @@ -23,8 +23,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index 9650c4ad5f8..1e779bb7fa4 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -23,8 +23,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index 81969785e27..4a397cde1a4 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -23,8 +23,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index bca50b95c18..fdf51353cf7 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -131,7 +131,7 @@ int netlink_add_tap(struct netlink_tap *nt) } EXPORT_SYMBOL_GPL(netlink_add_tap); -int __netlink_remove_tap(struct netlink_tap *nt) +static int __netlink_remove_tap(struct netlink_tap *nt) { bool found = false; struct netlink_tap *tmp; @@ -155,7 +155,6 @@ out: return found ? 0 : -ENODEV; } -EXPORT_SYMBOL_GPL(__netlink_remove_tap); int netlink_remove_tap(struct netlink_tap *nt) { @@ -204,6 +203,8 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, if (nskb) { nskb->dev = dev; nskb->protocol = htons((u16) sk->sk_protocol); + nskb->pkt_type = netlink_is_kernel(sk) ? + PACKET_KERNEL : PACKET_USER; ret = dev_queue_xmit(nskb); if (unlikely(ret > 0)) @@ -239,6 +240,13 @@ static void netlink_deliver_tap(struct sk_buff *skb) rcu_read_unlock(); } +static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src, + struct sk_buff *skb) +{ + if (!(netlink_is_kernel(dst) && netlink_is_kernel(src))) + netlink_deliver_tap(skb); +} + static void netlink_overrun(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); @@ -1697,14 +1705,10 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, ret = -ECONNREFUSED; if (nlk->netlink_rcv != NULL) { - /* We could do a netlink_deliver_tap(skb) here as well - * but since this is intended for the kernel only, we - * should rather let it stay under the hood. - */ - ret = skb->len; netlink_skb_set_owner_r(skb, sk); NETLINK_CB(skb).sk = ssk; + netlink_deliver_tap_kernel(sk, ssk, skb); nlk->netlink_rcv(skb); consume_skb(skb); } else { @@ -1769,6 +1773,9 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size, if (ring->pg_vec == NULL) goto out_put; + if (ring->frame_size - NL_MMAP_HDRLEN < size) + goto out_put; + skb = alloc_skb_head(gfp_mask); if (skb == NULL) goto err1; @@ -1778,6 +1785,7 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size, if (ring->pg_vec == NULL) goto out_free; + /* check again under lock */ maxlen = ring->frame_size - NL_MMAP_HDRLEN; if (maxlen < size) goto out_free; @@ -2214,7 +2222,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); - struct sockaddr_nl *addr = msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name); u32 dst_portid; u32 dst_group; struct sk_buff *skb; @@ -2345,7 +2353,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied); if (msg->msg_name) { - struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name); addr->nl_family = AF_NETLINK; addr->nl_pad = 0; addr->nl_pid = NETLINK_CB(skb).portid; @@ -2535,21 +2543,6 @@ void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group) netlink_update_socket_mc(nlk_sk(sk), group, 0); } -/** - * netlink_clear_multicast_users - kick off multicast listeners - * - * This function removes all listeners from the given group. - * @ksk: The kernel netlink socket, as returned by - * netlink_kernel_create(). - * @group: The multicast group to clear. - */ -void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) -{ - netlink_table_grab(); - __netlink_clear_multicast_users(ksk, group); - netlink_table_ungrab(); -} - struct nlmsghdr * __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) { diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 713671ae45a..b1dcdb932a8 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -461,6 +461,26 @@ int genl_unregister_family(struct genl_family *family) EXPORT_SYMBOL(genl_unregister_family); /** + * genlmsg_new_unicast - Allocate generic netlink message for unicast + * @payload: size of the message payload + * @info: information on destination + * @flags: the type of memory to allocate + * + * Allocates a new sk_buff large enough to cover the specified payload + * plus required Netlink headers. Will check receiving socket for + * memory mapped i/o capability and use it if enabled. Will fall back + * to non-mapped skb if message size exceeds the frame size of the ring. + */ +struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info, + gfp_t flags) +{ + size_t len = nlmsg_total_size(genlmsg_total_size(payload)); + + return netlink_alloc_skb(info->dst_sk, len, info->snd_portid, flags); +} +EXPORT_SYMBOL_GPL(genlmsg_new_unicast); + +/** * genlmsg_put - Add generic netlink header to netlink message * @skb: socket buffer holding the message * @portid: netlink portid the message is addressed to @@ -600,6 +620,7 @@ static int genl_family_rcv_msg(struct genl_family *family, info.genlhdr = nlmsg_data(nlh); info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; info.attrs = attrbuf; + info.dst_sk = skb->sk; genl_info_net_set(&info, net); memset(&info.user_ptr, 0, sizeof(info.user_ptr)); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 53c19a35fc6..b74aa075552 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1028,7 +1028,7 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct nr_sock *nr = nr_sk(sk); - struct sockaddr_ax25 *usax = (struct sockaddr_ax25 *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_ax25 *, usax, msg->msg_name); int err; struct sockaddr_ax25 sax; struct sk_buff *skb; @@ -1137,7 +1137,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; - struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_ax25 *, sax, msg->msg_name); size_t copied; struct sk_buff *skb; int er; diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c index 9d68441e2a5..2277276f52b 100644 --- a/net/nfc/af_nfc.c +++ b/net/nfc/af_nfc.c @@ -16,9 +16,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/nfc.h> diff --git a/net/nfc/core.c b/net/nfc/core.c index 83b9927e7d1..ca1e65f4b13 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -16,9 +16,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__ @@ -135,11 +133,8 @@ int nfc_dev_up(struct nfc_dev *dev) dev->dev_up = true; /* We have to enable the device before discovering SEs */ - if (dev->ops->discover_se) { - rc = dev->ops->discover_se(dev); - if (rc) - pr_warn("SE discovery failed\n"); - } + if (dev->ops->discover_se && dev->ops->discover_se(dev)) + pr_err("SE discovery failed\n"); error: device_unlock(&dev->dev); diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index 09fc9543995..c129d1571ca 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -339,7 +339,6 @@ int digital_target_found(struct nfc_digital_dev *ddev, pr_debug("rf_tech=%d, protocol=%d\n", rf_tech, protocol); ddev->curr_rf_tech = rf_tech; - ddev->curr_protocol = protocol; if (DIGITAL_DRV_CAPS_IN_CRC(ddev)) { ddev->skb_add_crc = digital_skb_add_crc_none; @@ -541,8 +540,14 @@ static int digital_dep_link_up(struct nfc_dev *nfc_dev, __u8 comm_mode, __u8 *gb, size_t gb_len) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); + int rc; + + rc = digital_in_send_atr_req(ddev, target, comm_mode, gb, gb_len); - return digital_in_send_atr_req(ddev, target, comm_mode, gb, gb_len); + if (!rc) + ddev->curr_protocol = NFC_PROTO_NFC_DEP; + + return rc; } static int digital_dep_link_down(struct nfc_dev *nfc_dev) @@ -557,6 +562,20 @@ static int digital_dep_link_down(struct nfc_dev *nfc_dev) static int digital_activate_target(struct nfc_dev *nfc_dev, struct nfc_target *target, __u32 protocol) { + struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); + + if (ddev->poll_tech_count) { + pr_err("Can't activate a target while polling\n"); + return -EBUSY; + } + + if (ddev->curr_protocol) { + pr_err("A target is already active\n"); + return -EBUSY; + } + + ddev->curr_protocol = protocol; + return 0; } @@ -565,6 +584,11 @@ static void digital_deactivate_target(struct nfc_dev *nfc_dev, { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); + if (!ddev->curr_protocol) { + pr_err("No active target\n"); + return; + } + ddev->curr_protocol = 0; } diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c index 8b362e802d2..d4ed25ff723 100644 --- a/net/nfc/digital_dep.c +++ b/net/nfc/digital_dep.c @@ -32,7 +32,6 @@ #define DIGITAL_ATR_REQ_MIN_SIZE 16 #define DIGITAL_ATR_REQ_MAX_SIZE 64 -#define DIGITAL_NFCID3_LEN ((u8)8) #define DIGITAL_LR_BITS_PAYLOAD_SIZE_254B 0x30 #define DIGITAL_GB_BIT 0x02 @@ -206,10 +205,9 @@ int digital_in_send_atr_req(struct nfc_digital_dev *ddev, atr_req->dir = DIGITAL_NFC_DEP_FRAME_DIR_OUT; atr_req->cmd = DIGITAL_CMD_ATR_REQ; if (target->nfcid2_len) - memcpy(atr_req->nfcid3, target->nfcid2, - max(target->nfcid2_len, DIGITAL_NFCID3_LEN)); + memcpy(atr_req->nfcid3, target->nfcid2, NFC_NFCID2_MAXSIZE); else - get_random_bytes(atr_req->nfcid3, DIGITAL_NFCID3_LEN); + get_random_bytes(atr_req->nfcid3, NFC_NFCID3_MAXSIZE); atr_req->did = 0; atr_req->bs = 0; @@ -382,6 +380,33 @@ int digital_in_send_dep_req(struct nfc_digital_dev *ddev, data_exch); } +static void digital_tg_set_rf_tech(struct nfc_digital_dev *ddev, u8 rf_tech) +{ + ddev->curr_rf_tech = rf_tech; + + ddev->skb_add_crc = digital_skb_add_crc_none; + ddev->skb_check_crc = digital_skb_check_crc_none; + + if (DIGITAL_DRV_CAPS_TG_CRC(ddev)) + return; + + switch (ddev->curr_rf_tech) { + case NFC_DIGITAL_RF_TECH_106A: + ddev->skb_add_crc = digital_skb_add_crc_a; + ddev->skb_check_crc = digital_skb_check_crc_a; + break; + + case NFC_DIGITAL_RF_TECH_212F: + case NFC_DIGITAL_RF_TECH_424F: + ddev->skb_add_crc = digital_skb_add_crc_f; + ddev->skb_check_crc = digital_skb_check_crc_f; + break; + + default: + break; + } +} + static void digital_tg_recv_dep_req(struct nfc_digital_dev *ddev, void *arg, struct sk_buff *resp) { @@ -472,11 +497,13 @@ int digital_tg_send_dep_res(struct nfc_digital_dev *ddev, struct sk_buff *skb) static void digital_tg_send_psl_res_complete(struct nfc_digital_dev *ddev, void *arg, struct sk_buff *resp) { - u8 rf_tech = PTR_ERR(arg); + u8 rf_tech = (unsigned long)arg; if (IS_ERR(resp)) return; + digital_tg_set_rf_tech(ddev, rf_tech); + digital_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_RF_TECH, rf_tech); digital_tg_listen(ddev, 1500, digital_tg_recv_dep_req, NULL); @@ -508,7 +535,7 @@ static int digital_tg_send_psl_res(struct nfc_digital_dev *ddev, u8 did, ddev->skb_add_crc(skb); rc = digital_tg_send_cmd(ddev, skb, 0, digital_tg_send_psl_res_complete, - ERR_PTR(rf_tech)); + (void *)(unsigned long)rf_tech); if (rc) kfree_skb(skb); @@ -661,16 +688,10 @@ void digital_tg_recv_atr_req(struct nfc_digital_dev *ddev, void *arg, if (resp->data[0] == DIGITAL_NFC_DEP_NFCA_SOD_SB) { min_size = DIGITAL_ATR_REQ_MIN_SIZE + 2; - - ddev->curr_rf_tech = NFC_DIGITAL_RF_TECH_106A; - ddev->skb_add_crc = digital_skb_add_crc_a; - ddev->skb_check_crc = digital_skb_check_crc_a; + digital_tg_set_rf_tech(ddev, NFC_DIGITAL_RF_TECH_106A); } else { min_size = DIGITAL_ATR_REQ_MIN_SIZE + 1; - - ddev->curr_rf_tech = NFC_DIGITAL_RF_TECH_212F; - ddev->skb_add_crc = digital_skb_add_crc_f; - ddev->skb_check_crc = digital_skb_check_crc_f; + digital_tg_set_rf_tech(ddev, NFC_DIGITAL_RF_TECH_212F); } if (resp->len < min_size) { @@ -678,10 +699,7 @@ void digital_tg_recv_atr_req(struct nfc_digital_dev *ddev, void *arg, goto exit; } - if (DIGITAL_DRV_CAPS_TG_CRC(ddev)) { - ddev->skb_add_crc = digital_skb_add_crc_none; - ddev->skb_check_crc = digital_skb_check_crc_none; - } + ddev->curr_protocol = NFC_PROTO_NFC_DEP_MASK; rc = ddev->skb_check_crc(resp); if (rc) { diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c index 64f922be928..a9f4d2e62d8 100644 --- a/net/nfc/hci/command.c +++ b/net/nfc/hci/command.c @@ -12,9 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) "hci: %s: " fmt, __func__ diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index d07ca4c5cf8..d45b638e77c 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -12,9 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) "hci: %s: " fmt, __func__ @@ -337,11 +335,8 @@ exit: kfree_skb(skb); exit_noskb: - if (r) { - /* TODO: There was an error dispatching the event, - * how to propagate up to nfc core? - */ - } + if (r) + nfc_hci_driver_failure(hdev, r); } static void nfc_hci_cmd_timeout(unsigned long data) diff --git a/net/nfc/hci/hci.h b/net/nfc/hci/hci.h index b274d12c18a..c3d2e2c1394 100644 --- a/net/nfc/hci/hci.h +++ b/net/nfc/hci/hci.h @@ -12,9 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef __LOCAL_HCI_H diff --git a/net/nfc/hci/hcp.c b/net/nfc/hci/hcp.c index b6b4109f234..e9de1514656 100644 --- a/net/nfc/hci/hcp.c +++ b/net/nfc/hci/hcp.c @@ -12,9 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) "hci: %s: " fmt, __func__ diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c index fe5e966e5b8..a07d2b81848 100644 --- a/net/nfc/hci/llc.c +++ b/net/nfc/hci/llc.c @@ -13,9 +13,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include <net/nfc/llc.h> diff --git a/net/nfc/hci/llc.h b/net/nfc/hci/llc.h index 7be0b7f3ceb..5dad4c57ffb 100644 --- a/net/nfc/hci/llc.h +++ b/net/nfc/hci/llc.h @@ -13,9 +13,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef __LOCAL_LLC_H_ diff --git a/net/nfc/hci/llc_nop.c b/net/nfc/hci/llc_nop.c index 87b10291b40..d0435d5a197 100644 --- a/net/nfc/hci/llc_nop.c +++ b/net/nfc/hci/llc_nop.c @@ -13,9 +13,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/types.h> diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index 3e53c1e029d..401c7e25527 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -13,9 +13,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) "shdlc: %s: " fmt, __func__ diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index f4d48b57ea1..de1789e3cc8 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -12,9 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ enum llcp_state { diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index 1017894807c..bec6ed15f50 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -12,9 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) "llcp: %s: " fmt, __func__ @@ -677,7 +675,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock, do { remote_miu = sock->remote_miu > LLCP_MAX_MIU ? - local->remote_miu : sock->remote_miu; + LLCP_DEFAULT_MIU : sock->remote_miu; frag_len = min_t(size_t, remote_miu, remaining_len); @@ -686,8 +684,10 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock, pdu = llcp_allocate_pdu(sock, LLCP_PDU_I, frag_len + LLCP_SEQUENCE_SIZE); - if (pdu == NULL) + if (pdu == NULL) { + kfree(msg_data); return -ENOMEM; + } skb_put(pdu, LLCP_SEQUENCE_SIZE); diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 81cd3416c7d..6184bd1fba3 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -12,9 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) "llcp: %s: " fmt, __func__ @@ -945,7 +943,6 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local, new_sock->local = nfc_llcp_local_get(local); new_sock->rw = sock->rw; new_sock->miux = sock->miux; - new_sock->remote_miu = local->remote_miu; new_sock->nfc_protocol = sock->nfc_protocol; new_sock->dsap = ssap; new_sock->target_idx = local->target_idx; diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 824c6056bf8..51f077a92fa 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -12,9 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) "llcp: %s: " fmt, __func__ @@ -702,7 +700,6 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, llcp_sock->dev = dev; llcp_sock->local = nfc_llcp_local_get(local); - llcp_sock->remote_miu = llcp_sock->local->remote_miu; llcp_sock->ssap = nfc_llcp_get_local_ssap(local); if (llcp_sock->ssap == LLCP_SAP_MAX) { ret = -ENOMEM; @@ -772,8 +769,8 @@ static int llcp_sock_sendmsg(struct kiocb *iocb, struct socket *sock, lock_sock(sk); if (sk->sk_type == SOCK_DGRAM) { - struct sockaddr_nfc_llcp *addr = - (struct sockaddr_nfc_llcp *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, addr, + msg->msg_name); if (msg->msg_namelen < sizeof(*addr)) { release_sock(sk); @@ -845,8 +842,8 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (sk->sk_type == SOCK_DGRAM && msg->msg_name) { struct nfc_llcp_ui_cb *ui_cb = nfc_llcp_ui_skb_cb(skb); - struct sockaddr_nfc_llcp *sockaddr = - (struct sockaddr_nfc_llcp *) msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, sockaddr, + msg->msg_name); msg->msg_namelen = sizeof(struct sockaddr_nfc_llcp); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index b943d46a164..46bda010bf1 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -20,8 +20,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ @@ -302,6 +301,9 @@ static int nci_open_device(struct nci_dev *ndev) rc = __nci_request(ndev, nci_reset_req, 0, msecs_to_jiffies(NCI_RESET_TIMEOUT)); + if (ndev->ops->setup(ndev)) + ndev->ops->setup(ndev); + if (!rc) { rc = __nci_request(ndev, nci_init_req, 0, msecs_to_jiffies(NCI_INIT_TIMEOUT)); @@ -362,6 +364,8 @@ static int nci_close_device(struct nci_dev *ndev) msecs_to_jiffies(NCI_RESET_TIMEOUT)); clear_bit(NCI_INIT, &ndev->flags); + del_timer_sync(&ndev->cmd_timer); + /* Flush cmd wq */ flush_workqueue(ndev->cmd_wq); @@ -409,12 +413,26 @@ static int nci_dev_down(struct nfc_dev *nfc_dev) return nci_close_device(ndev); } +int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val) +{ + struct nci_set_config_param param; + + if (!val || !len) + return 0; + + param.id = id; + param.len = len; + param.val = val; + + return __nci_request(ndev, nci_set_config_req, (unsigned long)¶m, + msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT)); +} +EXPORT_SYMBOL(nci_set_config); + static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); struct nci_set_config_param param; - __u8 local_gb[NFC_MAX_GT_LEN]; - int i; param.val = nfc_get_local_general_bytes(nfc_dev, ¶m.len); if ((param.val == NULL) || (param.len == 0)) @@ -423,11 +441,7 @@ static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev) if (param.len > NFC_MAX_GT_LEN) return -EINVAL; - for (i = 0; i < param.len; i++) - local_gb[param.len-1-i] = param.val[i]; - param.id = NCI_PN_ATR_REQ_GEN_BYTES; - param.val = local_gb; return nci_request(ndev, nci_set_config_req, (unsigned long)¶m, msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT)); diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index 2a9399dd6c6..6c3aef85287 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -16,8 +16,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/nfc/nci/lib.c b/net/nfc/nci/lib.c index 6b7fd26c68d..ed774a2e989 100644 --- a/net/nfc/nci/lib.c +++ b/net/nfc/nci/lib.c @@ -20,8 +20,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index b2aa98ef092..1e905097456 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -20,8 +20,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c index dd072f38ad0..041de51ccdb 100644 --- a/net/nfc/nci/rsp.c +++ b/net/nfc/nci/rsp.c @@ -20,8 +20,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * */ diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index a9b2342d525..ebbf6fb88b3 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -16,9 +16,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__ diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index aaf606fc1fa..9d6e74f7e6b 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -16,9 +16,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef __LOCAL_NFC_H diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 66bcd2eb577..c27a6e86cae 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -16,9 +16,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__ diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 65cfaa81607..2c77e7b1a91 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -165,7 +165,7 @@ static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh, } csum_replace4(&nh->check, *addr, new_addr); - skb->rxhash = 0; + skb_clear_hash(skb); *addr = new_addr; } @@ -199,7 +199,7 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, if (recalculate_csum) update_ipv6_checksum(skb, l4_proto, addr, new_addr); - skb->rxhash = 0; + skb_clear_hash(skb); memcpy(addr, new_addr, sizeof(__be32[4])); } @@ -296,7 +296,7 @@ static void set_tp_port(struct sk_buff *skb, __be16 *port, { inet_proto_csum_replace2(check, skb, *port, new_port, 0); *port = new_port; - skb->rxhash = 0; + skb_clear_hash(skb); } static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port) @@ -310,7 +310,7 @@ static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port) uh->check = CSUM_MANGLED_0; } else { *port = new_port; - skb->rxhash = 0; + skb_clear_hash(skb); } } @@ -381,7 +381,7 @@ static int set_sctp(struct sk_buff *skb, /* Carry any checksum errors through. */ sh->checksum = old_csum ^ old_correct_csum ^ new_csum; - skb->rxhash = 0; + skb_clear_hash(skb); } return 0; @@ -445,7 +445,7 @@ static int sample(struct datapath *dp, struct sk_buff *skb, a = nla_next(a, &rem)) { switch (nla_type(a)) { case OVS_SAMPLE_ATTR_PROBABILITY: - if (net_random() >= nla_get_u32(a)) + if (prandom_u32() >= nla_get_u32(a)) return 0; break; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6f5e1dd3be2..df4692826ea 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -108,10 +108,9 @@ int lockdep_ovsl_is_held(void) #endif static struct vport *new_vport(const struct vport_parms *); -static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *, +static int queue_gso_packets(struct datapath *dp, struct sk_buff *, const struct dp_upcall_info *); -static int queue_userspace_packet(struct net *, int dp_ifindex, - struct sk_buff *, +static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, const struct dp_upcall_info *); /* Must be called with rcu_read_lock or ovs_mutex. */ @@ -133,7 +132,7 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex) } /* Must be called with rcu_read_lock or ovs_mutex. */ -const char *ovs_dp_name(const struct datapath *dp) +static const char *ovs_dp_name(const struct datapath *dp) { struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL); return vport->ops->get_name(vport); @@ -234,7 +233,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) } /* Look up flow. */ - flow = ovs_flow_tbl_lookup(&dp->table, &key, &n_mask_hit); + flow = ovs_flow_tbl_lookup_stats(&dp->table, &key, &n_mask_hit); if (unlikely(!flow)) { struct dp_upcall_info upcall; @@ -251,9 +250,9 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) OVS_CB(skb)->flow = flow; OVS_CB(skb)->pkt_key = &key; - stats_counter = &stats->n_hit; - ovs_flow_used(OVS_CB(skb)->flow, skb); + ovs_flow_stats_update(OVS_CB(skb)->flow, skb); ovs_execute_actions(dp, skb); + stats_counter = &stats->n_hit; out: /* Update datapath statistics. */ @@ -277,7 +276,6 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { struct dp_stats_percpu *stats; - int dp_ifindex; int err; if (upcall_info->portid == 0) { @@ -285,16 +283,10 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, goto err; } - dp_ifindex = get_dpifindex(dp); - if (!dp_ifindex) { - err = -ENODEV; - goto err; - } - if (!skb_is_gso(skb)) - err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); + err = queue_userspace_packet(dp, skb, upcall_info); else - err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); + err = queue_gso_packets(dp, skb, upcall_info); if (err) goto err; @@ -310,8 +302,7 @@ err: return err; } -static int queue_gso_packets(struct net *net, int dp_ifindex, - struct sk_buff *skb, +static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { unsigned short gso_type = skb_shinfo(skb)->gso_type; @@ -320,14 +311,14 @@ static int queue_gso_packets(struct net *net, int dp_ifindex, struct sk_buff *segs, *nskb; int err; - segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false); + segs = __skb_gso_segment(skb, NETIF_F_SG, false); if (IS_ERR(segs)) return PTR_ERR(segs); /* Queue all of the segments. */ skb = segs; do { - err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info); + err = queue_userspace_packet(dp, skb, upcall_info); if (err) break; @@ -380,11 +371,11 @@ static size_t key_attr_size(void) + nla_total_size(28); /* OVS_KEY_ATTR_ND */ } -static size_t upcall_msg_size(const struct sk_buff *skb, - const struct nlattr *userdata) +static size_t upcall_msg_size(const struct nlattr *userdata, + unsigned int hdrlen) { size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) - + nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */ + + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */ /* OVS_PACKET_ATTR_USERDATA */ @@ -394,15 +385,24 @@ static size_t upcall_msg_size(const struct sk_buff *skb, return size; } -static int queue_userspace_packet(struct net *net, int dp_ifindex, - struct sk_buff *skb, +static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { struct ovs_header *upcall; struct sk_buff *nskb = NULL; struct sk_buff *user_skb; /* to be queued to userspace */ struct nlattr *nla; - int err; + struct genl_info info = { + .dst_sk = ovs_dp_get_net(dp)->genl_sock, + .snd_portid = upcall_info->portid, + }; + size_t len; + unsigned int hlen; + int err, dp_ifindex; + + dp_ifindex = get_dpifindex(dp); + if (!dp_ifindex) + return -ENODEV; if (vlan_tx_tag_present(skb)) { nskb = skb_clone(skb, GFP_ATOMIC); @@ -422,7 +422,22 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, goto out; } - user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC); + /* Complete checksum if needed */ + if (skb->ip_summed == CHECKSUM_PARTIAL && + (err = skb_checksum_help(skb))) + goto out; + + /* Older versions of OVS user space enforce alignment of the last + * Netlink attribute to NLA_ALIGNTO which would require extensive + * padding logic. Only perform zerocopy if padding is not required. + */ + if (dp->user_features & OVS_DP_F_UNALIGNED) + hlen = skb_zerocopy_headlen(skb); + else + hlen = skb->len; + + len = upcall_msg_size(upcall_info->userdata, hlen); + user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC); if (!user_skb) { err = -ENOMEM; goto out; @@ -441,26 +456,24 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, nla_len(upcall_info->userdata), nla_data(upcall_info->userdata)); - nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); + /* Only reserve room for attribute header, packet data is added + * in skb_zerocopy() */ + if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { + err = -ENOBUFS; + goto out; + } + nla->nla_len = nla_attr_size(skb->len); - skb_copy_and_csum_dev(skb, nla_data(nla)); + skb_zerocopy(user_skb, skb, skb->len, hlen); - genlmsg_end(user_skb, upcall); - err = genlmsg_unicast(net, user_skb, upcall_info->portid); + ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len; + err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); out: kfree_skb(nskb); return err; } -static void clear_stats(struct sw_flow *flow) -{ - flow->used = 0; - flow->tcp_flags = 0; - flow->packet_count = 0; - flow->byte_count = 0; -} - static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) { struct ovs_header *ovs_header = info->userhdr; @@ -499,7 +512,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) packet->protocol = htons(ETH_P_802_2); /* Build an sw_flow for sending this packet. */ - flow = ovs_flow_alloc(); + flow = ovs_flow_alloc(false); err = PTR_ERR(flow); if (IS_ERR(flow)) goto err_kfree_skb; @@ -635,10 +648,10 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, const int skb_orig_len = skb->len; struct nlattr *start; struct ovs_flow_stats stats; + __be16 tcp_flags; + unsigned long used; struct ovs_header *ovs_header; struct nlattr *nla; - unsigned long used; - u8 tcp_flags; int err; ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); @@ -667,24 +680,17 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, nla_nest_end(skb, nla); - spin_lock_bh(&flow->lock); - used = flow->used; - stats.n_packets = flow->packet_count; - stats.n_bytes = flow->byte_count; - tcp_flags = (u8)ntohs(flow->tcp_flags); - spin_unlock_bh(&flow->lock); - + ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); if (used && nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used))) goto nla_put_failure; if (stats.n_packets && - nla_put(skb, OVS_FLOW_ATTR_STATS, - sizeof(struct ovs_flow_stats), &stats)) + nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats)) goto nla_put_failure; - if (tcp_flags && - nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags)) + if ((u8)ntohs(tcp_flags) && + nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags))) goto nla_put_failure; /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if @@ -701,8 +707,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, if (start) { const struct sw_flow_actions *sf_acts; - sf_acts = rcu_dereference_check(flow->sf_acts, - lockdep_ovsl_is_held()); + sf_acts = rcu_dereference_ovsl(flow->sf_acts); err = ovs_nla_put_actions(sf_acts->actions, sf_acts->actions_len, skb); @@ -726,39 +731,34 @@ error: return err; } -static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow) +static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow, + struct genl_info *info) { - const struct sw_flow_actions *sf_acts; + size_t len; - sf_acts = ovsl_dereference(flow->sf_acts); + len = ovs_flow_cmd_msg_size(ovsl_dereference(flow->sf_acts)); - return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL); + return genlmsg_new_unicast(len, info, GFP_KERNEL); } static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp, - u32 portid, u32 seq, u8 cmd) + struct genl_info *info, + u8 cmd) { struct sk_buff *skb; int retval; - skb = ovs_flow_cmd_alloc_info(flow); + skb = ovs_flow_cmd_alloc_info(flow, info); if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd); + retval = ovs_flow_cmd_fill_info(flow, dp, skb, info->snd_portid, + info->snd_seq, 0, cmd); BUG_ON(retval < 0); return skb; } -static struct sw_flow *__ovs_flow_tbl_lookup(struct flow_table *tbl, - const struct sw_flow_key *key) -{ - u32 __always_unused n_mask_hit; - - return ovs_flow_tbl_lookup(tbl, key, &n_mask_hit); -} - static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; @@ -770,6 +770,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct sw_flow_actions *acts = NULL; struct sw_flow_match match; + bool exact_5tuple; int error; /* Extract key. */ @@ -778,7 +779,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto error; ovs_match_init(&match, &key, &mask); - error = ovs_nla_get_match(&match, + error = ovs_nla_get_match(&match, &exact_5tuple, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); if (error) goto error; @@ -809,7 +810,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_unlock_ovs; /* Check if this is a duplicate flow */ - flow = __ovs_flow_tbl_lookup(&dp->table, &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (!flow) { /* Bail out if we're not allowed to create a new flow. */ error = -ENOENT; @@ -817,12 +818,11 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_unlock_ovs; /* Allocate flow. */ - flow = ovs_flow_alloc(); + flow = ovs_flow_alloc(!exact_5tuple); if (IS_ERR(flow)) { error = PTR_ERR(flow); goto err_unlock_ovs; } - clear_stats(flow); flow->key = masked_key; flow->unmasked_key = key; @@ -835,8 +835,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_flow_free; } - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, - info->snd_seq, OVS_FLOW_CMD_NEW); + reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); } else { /* We found a matching flow. */ struct sw_flow_actions *old_acts; @@ -864,15 +863,11 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); ovs_nla_free_flow_actions(old_acts); - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, - info->snd_seq, OVS_FLOW_CMD_NEW); + reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); /* Clear stats. */ - if (a[OVS_FLOW_ATTR_CLEAR]) { - spin_lock_bh(&flow->lock); - clear_stats(flow); - spin_unlock_bh(&flow->lock); - } + if (a[OVS_FLOW_ATTR_CLEAR]) + ovs_flow_stats_clear(flow); } ovs_unlock(); @@ -910,7 +905,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) } ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL); if (err) return err; @@ -921,14 +916,13 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - flow = __ovs_flow_tbl_lookup(&dp->table, &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { err = -ENOENT; goto unlock; } - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, - info->snd_seq, OVS_FLOW_CMD_NEW); + reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -965,17 +959,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) } ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL); if (err) goto unlock; - flow = __ovs_flow_tbl_lookup(&dp->table, &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { err = -ENOENT; goto unlock; } - reply = ovs_flow_cmd_alloc_info(flow); + reply = ovs_flow_cmd_alloc_info(flow, info); if (!reply) { err = -ENOMEM; goto unlock; @@ -1061,6 +1055,7 @@ static const struct genl_ops dp_flow_genl_ops[] = { static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, + [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, }; static struct genl_family dp_datapath_genl_family = { @@ -1119,6 +1114,9 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, &dp_megaflow_stats)) goto nla_put_failure; + if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) + goto nla_put_failure; + return genlmsg_end(skb, ovs_header); nla_put_failure: @@ -1127,17 +1125,17 @@ error: return -EMSGSIZE; } -static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid, - u32 seq, u8 cmd) +static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, + struct genl_info *info, u8 cmd) { struct sk_buff *skb; int retval; - skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL); + skb = genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd); + retval = ovs_dp_cmd_fill_info(dp, skb, info->snd_portid, info->snd_seq, 0, cmd); if (retval < 0) { kfree_skb(skb); return ERR_PTR(retval); @@ -1165,6 +1163,24 @@ static struct datapath *lookup_datapath(struct net *net, return dp ? dp : ERR_PTR(-ENODEV); } +static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info) +{ + struct datapath *dp; + + dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); + if (!dp) + return; + + WARN(dp->user_features, "Dropping previously announced user features\n"); + dp->user_features = 0; +} + +static void ovs_dp_change(struct datapath *dp, struct nlattr **a) +{ + if (a[OVS_DP_ATTR_USER_FEATURES]) + dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); +} + static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; @@ -1223,17 +1239,27 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.port_no = OVSP_LOCAL; parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); + ovs_dp_change(dp, a); + vport = new_vport(&parms); if (IS_ERR(vport)) { err = PTR_ERR(vport); if (err == -EBUSY) err = -EEXIST; + if (err == -EEXIST) { + /* An outdated user space instance that does not understand + * the concept of user_features has attempted to create a new + * datapath and is likely to reuse it. Drop all user features. + */ + if (info->genlhdr->version < OVS_DP_VER_FEATURES) + ovs_dp_reset_user_features(skb, info); + } + goto err_destroy_ports_array; } - reply = ovs_dp_cmd_build_info(dp, info->snd_portid, - info->snd_seq, OVS_DP_CMD_NEW); + reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); err = PTR_ERR(reply); if (IS_ERR(reply)) goto err_destroy_local_port; @@ -1299,8 +1325,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) goto unlock; - reply = ovs_dp_cmd_build_info(dp, info->snd_portid, - info->snd_seq, OVS_DP_CMD_DEL); + reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_DEL); err = PTR_ERR(reply); if (IS_ERR(reply)) goto unlock; @@ -1328,8 +1353,9 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) goto unlock; - reply = ovs_dp_cmd_build_info(dp, info->snd_portid, - info->snd_seq, OVS_DP_CMD_NEW); + ovs_dp_change(dp, info->attrs); + + reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); genl_set_err(&dp_datapath_genl_family, sock_net(skb->sk), 0, @@ -1360,8 +1386,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - reply = ovs_dp_cmd_build_info(dp, info->snd_portid, - info->snd_seq, OVS_DP_CMD_NEW); + reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -1441,7 +1466,7 @@ struct genl_family dp_vport_genl_family = { .parallel_ops = true, }; -struct genl_multicast_group ovs_dp_vport_multicast_group = { +static struct genl_multicast_group ovs_dp_vport_multicast_group = { .name = OVS_VPORT_MCGROUP }; diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 4067ea41be2..6be9fbb5e9c 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -88,6 +88,8 @@ struct datapath { /* Network namespace ref. */ struct net *net; #endif + + u32 user_features; }; /** @@ -145,6 +147,8 @@ int lockdep_ovsl_is_held(void); #define ASSERT_OVSL() WARN_ON(unlikely(!lockdep_ovsl_is_held())) #define ovsl_dereference(p) \ rcu_dereference_protected(p, lockdep_ovsl_is_held()) +#define rcu_dereference_ovsl(p) \ + rcu_dereference_check(p, lockdep_ovsl_is_held()) static inline struct net *ovs_dp_get_net(struct datapath *dp) { @@ -178,14 +182,12 @@ static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_n extern struct notifier_block ovs_dp_device_notifier; extern struct genl_family dp_vport_genl_family; -extern struct genl_multicast_group ovs_dp_vport_multicast_group; void ovs_dp_process_received_packet(struct vport *, struct sk_buff *); void ovs_dp_detach_port(struct vport *); int ovs_dp_upcall(struct datapath *, struct sk_buff *, const struct dp_upcall_info *); -const char *ovs_dp_name(const struct datapath *dp); struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq, u8 cmd); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index b409f527960..16f4b46161d 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -35,6 +35,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/sctp.h> +#include <linux/smp.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/icmp.h> @@ -60,10 +61,16 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies) #define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF)) -void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) +void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) { + struct flow_stats *stats; __be16 tcp_flags = 0; + if (!flow->stats.is_percpu) + stats = flow->stats.stat; + else + stats = this_cpu_ptr(flow->stats.cpu_stats); + if ((flow->key.eth.type == htons(ETH_P_IP) || flow->key.eth.type == htons(ETH_P_IPV6)) && flow->key.ip.proto == IPPROTO_TCP && @@ -71,12 +78,87 @@ void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb)); } - spin_lock(&flow->lock); - flow->used = jiffies; - flow->packet_count++; - flow->byte_count += skb->len; - flow->tcp_flags |= tcp_flags; - spin_unlock(&flow->lock); + spin_lock(&stats->lock); + stats->used = jiffies; + stats->packet_count++; + stats->byte_count += skb->len; + stats->tcp_flags |= tcp_flags; + spin_unlock(&stats->lock); +} + +static void stats_read(struct flow_stats *stats, + struct ovs_flow_stats *ovs_stats, + unsigned long *used, __be16 *tcp_flags) +{ + spin_lock(&stats->lock); + if (time_after(stats->used, *used)) + *used = stats->used; + *tcp_flags |= stats->tcp_flags; + ovs_stats->n_packets += stats->packet_count; + ovs_stats->n_bytes += stats->byte_count; + spin_unlock(&stats->lock); +} + +void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, + unsigned long *used, __be16 *tcp_flags) +{ + int cpu, cur_cpu; + + *used = 0; + *tcp_flags = 0; + memset(ovs_stats, 0, sizeof(*ovs_stats)); + + if (!flow->stats.is_percpu) { + stats_read(flow->stats.stat, ovs_stats, used, tcp_flags); + } else { + cur_cpu = get_cpu(); + for_each_possible_cpu(cpu) { + struct flow_stats *stats; + + if (cpu == cur_cpu) + local_bh_disable(); + + stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); + stats_read(stats, ovs_stats, used, tcp_flags); + + if (cpu == cur_cpu) + local_bh_enable(); + } + put_cpu(); + } +} + +static void stats_reset(struct flow_stats *stats) +{ + spin_lock(&stats->lock); + stats->used = 0; + stats->packet_count = 0; + stats->byte_count = 0; + stats->tcp_flags = 0; + spin_unlock(&stats->lock); +} + +void ovs_flow_stats_clear(struct sw_flow *flow) +{ + int cpu, cur_cpu; + + if (!flow->stats.is_percpu) { + stats_reset(flow->stats.stat); + } else { + cur_cpu = get_cpu(); + + for_each_possible_cpu(cpu) { + + if (cpu == cur_cpu) + local_bh_disable(); + + stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu)); + + if (cpu == cur_cpu) + local_bh_enable(); + } + put_cpu(); + } } static int check_header(struct sk_buff *skb, int len) diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 1510f51dbf7..2d770e28a3a 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -19,6 +19,7 @@ #ifndef FLOW_H #define FLOW_H 1 +#include <linux/cache.h> #include <linux/kernel.h> #include <linux/netlink.h> #include <linux/openvswitch.h> @@ -122,8 +123,8 @@ struct sw_flow_key { } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ struct sw_flow_key_range { - size_t start; - size_t end; + unsigned short int start; + unsigned short int end; }; struct sw_flow_mask { @@ -146,6 +147,22 @@ struct sw_flow_actions { struct nlattr actions[]; }; +struct flow_stats { + u64 packet_count; /* Number of packets matched. */ + u64 byte_count; /* Number of bytes matched. */ + unsigned long used; /* Last used time (in jiffies). */ + spinlock_t lock; /* Lock for atomic stats update. */ + __be16 tcp_flags; /* Union of seen TCP flags. */ +}; + +struct sw_flow_stats { + bool is_percpu; + union { + struct flow_stats *stat; + struct flow_stats __percpu *cpu_stats; + }; +}; + struct sw_flow { struct rcu_head rcu; struct hlist_node hash_node[2]; @@ -155,12 +172,7 @@ struct sw_flow { struct sw_flow_key unmasked_key; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; - - spinlock_t lock; /* Lock for values below. */ - unsigned long used; /* Last used time (in jiffies). */ - u64 packet_count; /* Number of packets matched. */ - u64 byte_count; /* Number of bytes matched. */ - __be16 tcp_flags; /* Union of seen TCP flags. */ + struct sw_flow_stats stats; }; struct arp_eth_header { @@ -177,7 +189,10 @@ struct arp_eth_header { unsigned char ar_tip[4]; /* target IP address */ } __packed; -void ovs_flow_used(struct sw_flow *, struct sk_buff *); +void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb); +void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *stats, + unsigned long *used, __be16 *tcp_flags); +void ovs_flow_stats_clear(struct sw_flow *flow); u64 ovs_flow_used_time(unsigned long flow_jiffies); int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 2bc1bc1aca3..4d000acaed0 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -266,6 +266,20 @@ static bool is_all_zero(const u8 *fp, size_t size) return true; } +static bool is_all_set(const u8 *fp, size_t size) +{ + int i; + + if (!fp) + return false; + + for (i = 0; i < size; i++) + if (fp[i] != 0xff) + return false; + + return true; +} + static int __parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], u64 *attrsp, bool nz) @@ -487,8 +501,9 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, return 0; } -static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, - const struct nlattr **a, bool is_mask) +static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple, + u64 attrs, const struct nlattr **a, + bool is_mask) { int err; u64 orig_attrs = attrs; @@ -545,6 +560,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); } + if (is_mask && exact_5tuple) { + if (match->mask->key.eth.type != htons(0xffff)) + *exact_5tuple = false; + } + if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { const struct ovs_key_ipv4 *ipv4_key; @@ -567,6 +587,13 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, SW_FLOW_KEY_PUT(match, ipv4.addr.dst, ipv4_key->ipv4_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_IPV4); + + if (is_mask && exact_5tuple && *exact_5tuple) { + if (ipv4_key->ipv4_proto != 0xff || + ipv4_key->ipv4_src != htonl(0xffffffff) || + ipv4_key->ipv4_dst != htonl(0xffffffff)) + *exact_5tuple = false; + } } if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { @@ -598,6 +625,13 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_IPV6); + + if (is_mask && exact_5tuple && *exact_5tuple) { + if (ipv6_key->ipv6_proto != 0xff || + !is_all_set((u8 *)ipv6_key->ipv6_src, sizeof(match->key->ipv6.addr.src)) || + !is_all_set((u8 *)ipv6_key->ipv6_dst, sizeof(match->key->ipv6.addr.dst))) + *exact_5tuple = false; + } } if (attrs & (1 << OVS_KEY_ATTR_ARP)) { @@ -640,6 +674,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, tcp_key->tcp_dst, is_mask); } attrs &= ~(1 << OVS_KEY_ATTR_TCP); + + if (is_mask && exact_5tuple && *exact_5tuple && + (tcp_key->tcp_src != htons(0xffff) || + tcp_key->tcp_dst != htons(0xffff))) + *exact_5tuple = false; } if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { @@ -671,6 +710,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, udp_key->udp_dst, is_mask); } attrs &= ~(1 << OVS_KEY_ATTR_UDP); + + if (is_mask && exact_5tuple && *exact_5tuple && + (udp_key->udp_src != htons(0xffff) || + udp_key->udp_dst != htons(0xffff))) + *exact_5tuple = false; } if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { @@ -756,6 +800,7 @@ static void sw_flow_mask_set(struct sw_flow_mask *mask, * attribute specifies the mask field of the wildcarded flow. */ int ovs_nla_get_match(struct sw_flow_match *match, + bool *exact_5tuple, const struct nlattr *key, const struct nlattr *mask) { @@ -803,10 +848,13 @@ int ovs_nla_get_match(struct sw_flow_match *match, } } - err = ovs_key_from_nlattrs(match, key_attrs, a, false); + err = ovs_key_from_nlattrs(match, NULL, key_attrs, a, false); if (err) return err; + if (exact_5tuple) + *exact_5tuple = true; + if (mask) { err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); if (err) @@ -844,7 +892,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, } } - err = ovs_key_from_nlattrs(match, mask_attrs, a, true); + err = ovs_key_from_nlattrs(match, exact_5tuple, mask_attrs, a, true); if (err) return err; } else { @@ -1128,19 +1176,11 @@ struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size) return sfa; } -/* RCU callback used by ovs_nla_free_flow_actions. */ -static void rcu_free_acts_callback(struct rcu_head *rcu) -{ - struct sw_flow_actions *sf_acts = container_of(rcu, - struct sw_flow_actions, rcu); - kfree(sf_acts); -} - /* Schedules 'sf_acts' to be freed after the next RCU grace period. * The caller must hold rcu_read_lock for this to be sensible. */ void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) { - call_rcu(&sf_acts->rcu, rcu_free_acts_callback); + kfree_rcu(sf_acts, rcu); } static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 440151045d3..b31fbe28bc7 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -45,6 +45,7 @@ int ovs_nla_put_flow(const struct sw_flow_key *, int ovs_nla_get_flow_metadata(struct sw_flow *flow, const struct nlattr *attr); int ovs_nla_get_match(struct sw_flow_match *match, + bool *exact_5tuple, const struct nlattr *, const struct nlattr *); diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index e4254270608..c58a0fe3c88 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -25,7 +25,7 @@ #include <linux/if_vlan.h> #include <net/llc_pdu.h> #include <linux/kernel.h> -#include <linux/jhash.h> +#include <linux/hash.h> #include <linux/jiffies.h> #include <linux/llc.h> #include <linux/module.h> @@ -44,8 +44,6 @@ #include <net/ipv6.h> #include <net/ndisc.h> -#include "datapath.h" - #define TBL_MIN_BUCKETS 1024 #define REHASH_INTERVAL (10 * 60 * HZ) @@ -72,19 +70,42 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, *d++ = *s++ & *m++; } -struct sw_flow *ovs_flow_alloc(void) +struct sw_flow *ovs_flow_alloc(bool percpu_stats) { struct sw_flow *flow; + int cpu; flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); if (!flow) return ERR_PTR(-ENOMEM); - spin_lock_init(&flow->lock); flow->sf_acts = NULL; flow->mask = NULL; + flow->stats.is_percpu = percpu_stats; + + if (!percpu_stats) { + flow->stats.stat = kzalloc(sizeof(*flow->stats.stat), GFP_KERNEL); + if (!flow->stats.stat) + goto err; + + spin_lock_init(&flow->stats.stat->lock); + } else { + flow->stats.cpu_stats = alloc_percpu(struct flow_stats); + if (!flow->stats.cpu_stats) + goto err; + + for_each_possible_cpu(cpu) { + struct flow_stats *cpu_stats; + + cpu_stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); + spin_lock_init(&cpu_stats->lock); + } + } return flow; +err: + kmem_cache_free(flow_cache, flow); + return ERR_PTR(-ENOMEM); } int ovs_flow_tbl_count(struct flow_table *table) @@ -118,6 +139,10 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets) static void flow_free(struct sw_flow *flow) { kfree((struct sf_flow_acts __force *)flow->sf_acts); + if (flow->stats.is_percpu) + free_percpu(flow->stats.cpu_stats); + else + kfree(flow->stats.stat); kmem_cache_free(flow_cache, flow); } @@ -128,13 +153,6 @@ static void rcu_free_flow_callback(struct rcu_head *rcu) flow_free(flow); } -static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu) -{ - struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu); - - kfree(mask); -} - static void flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) { if (!mask) @@ -146,7 +164,7 @@ static void flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) if (!mask->ref_count) { list_del_rcu(&mask->list); if (deferred) - call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb); + kfree_rcu(mask, rcu); else kfree(mask); } @@ -362,7 +380,7 @@ static u32 flow_hash(const struct sw_flow_key *key, int key_start, /* Make sure number of hash bytes are multiple of u32. */ BUILD_BUG_ON(sizeof(long) % sizeof(u32)); - return jhash2(hash_key, hash_u32s, 0); + return arch_fast_hash2(hash_key, hash_u32s, 0); } static int flow_key_start(const struct sw_flow_key *key) @@ -429,11 +447,11 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, return NULL; } -struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, +struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, const struct sw_flow_key *key, u32 *n_mask_hit) { - struct table_instance *ti = rcu_dereference(tbl->ti); + struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); struct sw_flow_mask *mask; struct sw_flow *flow; @@ -447,6 +465,14 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, return NULL; } +struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, + const struct sw_flow_key *key) +{ + u32 __always_unused n_mask_hit; + + return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit); +} + int ovs_flow_tbl_num_masks(const struct flow_table *table) { struct sw_flow_mask *mask; @@ -514,11 +540,7 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, return NULL; } -/** - * add a new mask into the mask list. - * The caller needs to make sure that 'mask' is not the same - * as any masks that are already on the list. - */ +/* Add 'mask' into the mask list, if it is not already there. */ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, struct sw_flow_mask *new) { diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index fbe45d5ad07..1996e34c0fd 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -55,7 +55,7 @@ struct flow_table { int ovs_flow_init(void); void ovs_flow_exit(void); -struct sw_flow *ovs_flow_alloc(void); +struct sw_flow *ovs_flow_alloc(bool percpu_stats); void ovs_flow_free(struct sw_flow *, bool deferred); int ovs_flow_tbl_init(struct flow_table *); @@ -69,9 +69,11 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); int ovs_flow_tbl_num_masks(const struct flow_table *table); struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table, u32 *bucket, u32 *idx); -struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, +struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *, const struct sw_flow_key *, u32 *n_mask_hit); +struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, + const struct sw_flow_key *); bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, struct sw_flow_match *match); diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index d830a95f03a..208dd9a26dd 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -33,6 +33,9 @@ #include "vport.h" #include "vport-internal_dev.h" +static void ovs_vport_record_error(struct vport *, + enum vport_err_type err_type); + /* List of statically compiled vport implementations. Don't forget to also * add yours to the list at the bottom of vport.h. */ static const struct vport_ops *vport_ops_list[] = { @@ -136,14 +139,14 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, vport->ops = ops; INIT_HLIST_NODE(&vport->dp_hash_node); - vport->percpu_stats = alloc_percpu(struct pcpu_tstats); + vport->percpu_stats = alloc_percpu(struct pcpu_sw_netstats); if (!vport->percpu_stats) { kfree(vport); return ERR_PTR(-ENOMEM); } for_each_possible_cpu(i) { - struct pcpu_tstats *vport_stats; + struct pcpu_sw_netstats *vport_stats; vport_stats = per_cpu_ptr(vport->percpu_stats, i); u64_stats_init(&vport_stats->syncp); } @@ -275,8 +278,8 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) spin_unlock_bh(&vport->stats_lock); for_each_possible_cpu(i) { - const struct pcpu_tstats *percpu_stats; - struct pcpu_tstats local_stats; + const struct pcpu_sw_netstats *percpu_stats; + struct pcpu_sw_netstats local_stats; unsigned int start; percpu_stats = per_cpu_ptr(vport->percpu_stats, i); @@ -344,7 +347,7 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, struct ovs_key_ipv4_tunnel *tun_key) { - struct pcpu_tstats *stats; + struct pcpu_sw_netstats *stats; stats = this_cpu_ptr(vport->percpu_stats); u64_stats_update_begin(&stats->syncp); @@ -370,7 +373,7 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) int sent = vport->ops->send(vport, skb); if (likely(sent > 0)) { - struct pcpu_tstats *stats; + struct pcpu_sw_netstats *stats; stats = this_cpu_ptr(vport->percpu_stats); @@ -396,7 +399,8 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) * If using the vport generic stats layer indicate that an error of the given * type has occurred. */ -void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type) +static void ovs_vport_record_error(struct vport *vport, + enum vport_err_type err_type) { spin_lock(&vport->stats_lock); diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 1a9fbcec6e1..d7e50a17396 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -87,7 +87,7 @@ struct vport { struct hlist_node dp_hash_node; const struct vport_ops *ops; - struct pcpu_tstats __percpu *percpu_stats; + struct pcpu_sw_netstats __percpu *percpu_stats; spinlock_t stats_lock; struct vport_err_stats err_stats; @@ -192,7 +192,6 @@ static inline struct vport *vport_from_priv(const void *priv) void ovs_vport_receive(struct vport *, struct sk_buff *, struct ovs_key_ipv4_tunnel *); -void ovs_vport_record_error(struct vport *, enum vport_err_type err_type); /* List of statically compiled vport implementations. Don't forget to also * add yours to the list at the top of vport.c. */ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 88cfbc18955..6a2bb37506c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -88,7 +88,7 @@ #include <linux/virtio_net.h> #include <linux/errqueue.h> #include <linux/net_tstamp.h> -#include <linux/reciprocal_div.h> +#include <linux/percpu.h> #ifdef CONFIG_INET #include <net/inet_common.h> #endif @@ -237,6 +237,48 @@ struct packet_skb_cb { static void __fanout_unlink(struct sock *sk, struct packet_sock *po); static void __fanout_link(struct sock *sk, struct packet_sock *po); +static int packet_direct_xmit(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + const struct net_device_ops *ops = dev->netdev_ops; + netdev_features_t features; + struct netdev_queue *txq; + u16 queue_map; + int ret; + + if (unlikely(!netif_running(dev) || + !netif_carrier_ok(dev))) { + kfree_skb(skb); + return NET_XMIT_DROP; + } + + features = netif_skb_features(skb); + if (skb_needs_linearize(skb, features) && + __skb_linearize(skb)) { + kfree_skb(skb); + return NET_XMIT_DROP; + } + + queue_map = skb_get_queue_mapping(skb); + txq = netdev_get_tx_queue(dev, queue_map); + + __netif_tx_lock_bh(txq); + if (unlikely(netif_xmit_frozen_or_stopped(txq))) { + ret = NETDEV_TX_BUSY; + kfree_skb(skb); + goto out; + } + + ret = ops->ndo_start_xmit(skb, dev); + if (likely(dev_xmit_complete(ret))) + txq_trans_update(txq); + else + kfree_skb(skb); +out: + __netif_tx_unlock_bh(txq); + return ret; +} + static struct net_device *packet_cached_dev_get(struct packet_sock *po) { struct net_device *dev; @@ -261,6 +303,16 @@ static void packet_cached_dev_reset(struct packet_sock *po) RCU_INIT_POINTER(po->cached_dev, NULL); } +static bool packet_use_direct_xmit(const struct packet_sock *po) +{ + return po->xmit == packet_direct_xmit; +} + +static u16 packet_pick_tx_queue(struct net_device *dev) +{ + return (u16) raw_smp_processor_id() % dev->real_num_tx_queues; +} + /* register_prot_hook must be invoked with the po->bind_lock held, * or from a context in which asynchronous accesses to the packet * socket is not possible (packet_create()). @@ -458,7 +510,8 @@ static void prb_shutdown_retire_blk_timer(struct packet_sock *po, { struct tpacket_kbdq_core *pkc; - pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; + pkc = tx_ring ? GET_PBDQC_FROM_RB(&po->tx_ring) : + GET_PBDQC_FROM_RB(&po->rx_ring); spin_lock_bh(&rb_queue->lock); pkc->delete_blk_timer = 1; @@ -484,7 +537,8 @@ static void prb_setup_retire_blk_timer(struct packet_sock *po, int tx_ring) if (tx_ring) BUG(); - pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; + pkc = tx_ring ? GET_PBDQC_FROM_RB(&po->tx_ring) : + GET_PBDQC_FROM_RB(&po->rx_ring); prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired); } @@ -542,7 +596,7 @@ static void init_prb_bdqc(struct packet_sock *po, struct pgv *pg_vec, union tpacket_req_u *req_u, int tx_ring) { - struct tpacket_kbdq_core *p1 = &rb->prb_bdqc; + struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb); struct tpacket_block_desc *pbd; memset(p1, 0x0, sizeof(*p1)); @@ -606,7 +660,7 @@ static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc) static void prb_retire_rx_blk_timer_expired(unsigned long data) { struct packet_sock *po = (struct packet_sock *)data; - struct tpacket_kbdq_core *pkc = &po->rx_ring.prb_bdqc; + struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring); unsigned int frozen; struct tpacket_block_desc *pbd; @@ -909,7 +963,7 @@ static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb) static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { - ppd->hv1.tp_rxhash = skb_get_rxhash(pkc->skb); + ppd->hv1.tp_rxhash = skb_get_hash(pkc->skb); } static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc, @@ -923,9 +977,11 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, { if (vlan_tx_tag_present(pkc->skb)) { ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb); - ppd->tp_status = TP_STATUS_VLAN_VALID; + ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto); + ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { ppd->hv1.tp_vlan_tci = 0; + ppd->hv1.tp_vlan_tpid = 0; ppd->tp_status = TP_STATUS_AVAILABLE; } } @@ -933,6 +989,7 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { + ppd->hv1.tp_padding = 0; prb_fill_vlan_info(pkc, ppd); if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH) @@ -1111,6 +1168,47 @@ static void packet_increment_head(struct packet_ring_buffer *buff) buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; } +static void packet_inc_pending(struct packet_ring_buffer *rb) +{ + this_cpu_inc(*rb->pending_refcnt); +} + +static void packet_dec_pending(struct packet_ring_buffer *rb) +{ + this_cpu_dec(*rb->pending_refcnt); +} + +static unsigned int packet_read_pending(const struct packet_ring_buffer *rb) +{ + unsigned int refcnt = 0; + int cpu; + + /* We don't use pending refcount in rx_ring. */ + if (rb->pending_refcnt == NULL) + return 0; + + for_each_possible_cpu(cpu) + refcnt += *per_cpu_ptr(rb->pending_refcnt, cpu); + + return refcnt; +} + +static int packet_alloc_pending(struct packet_sock *po) +{ + po->rx_ring.pending_refcnt = NULL; + + po->tx_ring.pending_refcnt = alloc_percpu(unsigned int); + if (unlikely(po->tx_ring.pending_refcnt == NULL)) + return -ENOBUFS; + + return 0; +} + +static void packet_free_pending(struct packet_sock *po) +{ + free_percpu(po->tx_ring.pending_refcnt); +} + static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) { struct sock *sk = &po->sk; @@ -1163,7 +1261,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - return reciprocal_divide(skb->rxhash, num); + return reciprocal_scale(skb->rxhash, num); } static unsigned int fanout_demux_lb(struct packet_fanout *f, @@ -1190,7 +1288,7 @@ static unsigned int fanout_demux_rnd(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - return reciprocal_divide(prandom_u32(), num); + return prandom_u32_max(num); } static unsigned int fanout_demux_rollover(struct packet_fanout *f, @@ -1214,6 +1312,13 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f, return idx; } +static unsigned int fanout_demux_qm(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int num) +{ + return skb_get_queue_mapping(skb) % num; +} + static bool fanout_has_flag(struct packet_fanout *f, u16 flag) { return f->flags & (flag >> 8); @@ -1241,7 +1346,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, if (!skb) return 0; } - skb_get_rxhash(skb); + skb_get_hash(skb); idx = fanout_demux_hash(f, skb, num); break; case PACKET_FANOUT_LB: @@ -1253,6 +1358,9 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, case PACKET_FANOUT_RND: idx = fanout_demux_rnd(f, skb, num); break; + case PACKET_FANOUT_QM: + idx = fanout_demux_qm(f, skb, num); + break; case PACKET_FANOUT_ROLLOVER: idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num); break; @@ -1299,9 +1407,9 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po) spin_unlock(&f->lock); } -static bool match_fanout_group(struct packet_type *ptype, struct sock * sk) +static bool match_fanout_group(struct packet_type *ptype, struct sock *sk) { - if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout) + if (ptype->af_packet_priv == (void *)((struct packet_sock *)sk)->fanout) return true; return false; @@ -1323,6 +1431,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) case PACKET_FANOUT_LB: case PACKET_FANOUT_CPU: case PACKET_FANOUT_RND: + case PACKET_FANOUT_QM: break; default: return -EINVAL; @@ -1485,7 +1594,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; - struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name); struct sk_buff *skb = NULL; struct net_device *dev; __be16 proto = 0; @@ -1758,6 +1867,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct timespec ts; __u32 ts_status; + /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT. + * We may add members to them until current aligned size without forcing + * userspace to call getsockopt(..., PACKET_HDRLEN, ...). + */ + BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h2)) != 32); + BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h3)) != 48); + if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -1864,11 +1980,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h2->tp_nsec = ts.tv_nsec; if (vlan_tx_tag_present(skb)) { h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); - status |= TP_STATUS_VLAN_VALID; + h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto); + status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { h.h2->tp_vlan_tci = 0; + h.h2->tp_vlan_tpid = 0; } - h.h2->tp_padding = 0; + memset(h.h2->tp_padding, 0, sizeof(h.h2->tp_padding)); hdrlen = sizeof(*h.h2); break; case TPACKET_V3: @@ -1882,6 +2000,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h3->tp_net = netoff; h.h3->tp_sec = ts.tv_sec; h.h3->tp_nsec = ts.tv_nsec; + memset(h.h3->tp_padding, 0, sizeof(h.h3->tp_padding)); hdrlen = sizeof(*h.h3); break; default: @@ -1900,19 +2019,20 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, sll->sll_ifindex = dev->ifindex; smp_mb(); + #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 - { + if (po->tp_version <= TPACKET_V2) { u8 *start, *end; - if (po->tp_version <= TPACKET_V2) { - end = (u8 *)PAGE_ALIGN((unsigned long)h.raw - + macoff + snaplen); - for (start = h.raw; start < end; start += PAGE_SIZE) - flush_dcache_page(pgv_to_page(start)); - } - smp_wmb(); + end = (u8 *) PAGE_ALIGN((unsigned long) h.raw + + macoff + snaplen); + + for (start = h.raw; start < end; start += PAGE_SIZE) + flush_dcache_page(pgv_to_page(start)); } + smp_wmb(); #endif + if (po->tp_version <= TPACKET_V2) __packet_set_status(po, h.raw, status); else @@ -1941,14 +2061,13 @@ ring_is_full: static void tpacket_destruct_skb(struct sk_buff *skb) { struct packet_sock *po = pkt_sk(skb->sk); - void *ph; if (likely(po->tx_ring.pg_vec)) { + void *ph; __u32 ts; ph = skb_shinfo(skb)->destructor_arg; - BUG_ON(atomic_read(&po->tx_ring.pending) == 0); - atomic_dec(&po->tx_ring.pending); + packet_dec_pending(&po->tx_ring); ts = __packet_set_timestamp(po, ph, skb); __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts); @@ -1992,9 +2111,10 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb_reserve(skb, hlen); skb_reset_network_header(skb); - skb_probe_transport_header(skb, 0); - if (po->tp_tx_has_off) { + if (!packet_use_direct_xmit(po)) + skb_probe_transport_header(skb, 0); + if (unlikely(po->tp_tx_has_off)) { int off_min, off_max, off; off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); off_max = po->tx_ring.frame_size - tp_len; @@ -2087,7 +2207,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) __be16 proto; int err, reserve = 0; void *ph; - struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name); + bool need_wait = !(msg->msg_flags & MSG_DONTWAIT); int tp_len, size_max; unsigned char *addr; int len_sum = 0; @@ -2130,10 +2251,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) do { ph = packet_current_frame(po, &po->tx_ring, - TP_STATUS_SEND_REQUEST); - + TP_STATUS_SEND_REQUEST); if (unlikely(ph == NULL)) { - schedule(); + if (need_wait && need_resched()) + schedule(); continue; } @@ -2164,12 +2285,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } } + skb_set_queue_mapping(skb, packet_pick_tx_queue(dev)); skb->destructor = tpacket_destruct_skb; __packet_set_status(po, ph, TP_STATUS_SENDING); - atomic_inc(&po->tx_ring.pending); + packet_inc_pending(&po->tx_ring); status = TP_STATUS_SEND_REQUEST; - err = dev_queue_xmit(skb); + err = po->xmit(skb); if (unlikely(err > 0)) { err = net_xmit_errno(err); if (err && __packet_get_status(po, ph) == @@ -2187,9 +2309,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) packet_increment_head(&po->tx_ring); len_sum += tp_len; } while (likely((ph != NULL) || - ((!(msg->msg_flags & MSG_DONTWAIT)) && - (atomic_read(&po->tx_ring.pending)))) - ); + /* Note: packet_read_pending() might be slow if we have + * to call it as it's per_cpu variable, but in fast-path + * we already short-circuit the loop with the first + * condition, and luckily don't have to go that path + * anyway. + */ + (need_wait && packet_read_pending(&po->tx_ring)))); err = len_sum; goto out_put; @@ -2228,11 +2354,10 @@ static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad, return skb; } -static int packet_snd(struct socket *sock, - struct msghdr *msg, size_t len) +static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; - struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name); struct sk_buff *skb; struct net_device *dev; __be16 proto; @@ -2374,6 +2499,7 @@ static int packet_snd(struct socket *sock, skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; + skb_set_queue_mapping(skb, packet_pick_tx_queue(dev)); if (po->has_vnet_hdr) { if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { @@ -2394,16 +2520,12 @@ static int packet_snd(struct socket *sock, len += vnet_hdr_len; } - skb_probe_transport_header(skb, reserve); - + if (!packet_use_direct_xmit(po)) + skb_probe_transport_header(skb, reserve); if (unlikely(extra_len == 4)) skb->no_fcs = 1; - /* - * Now send it - */ - - err = dev_queue_xmit(skb); + err = po->xmit(skb); if (err > 0 && (err = net_xmit_errno(err)) != 0) goto out_unlock; @@ -2425,6 +2547,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); + if (po->tx_ring.pg_vec) return tpacket_snd(po, msg); else @@ -2491,6 +2614,7 @@ static int packet_release(struct socket *sock) /* Purge queues */ skb_queue_purge(&sk->sk_receive_queue); + packet_free_pending(po); sk_refcnt_debug_release(sk); sock_put(sk); @@ -2501,9 +2625,12 @@ static int packet_release(struct socket *sock) * Attach a packet hook. */ -static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol) +static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) { struct packet_sock *po = pkt_sk(sk); + const struct net_device *dev_curr; + __be16 proto_curr; + bool need_rehook; if (po->fanout) { if (dev) @@ -2513,21 +2640,29 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc } lock_sock(sk); - spin_lock(&po->bind_lock); - unregister_prot_hook(sk, true); - po->num = protocol; - po->prot_hook.type = protocol; - if (po->prot_hook.dev) - dev_put(po->prot_hook.dev); + proto_curr = po->prot_hook.type; + dev_curr = po->prot_hook.dev; + + need_rehook = proto_curr != proto || dev_curr != dev; + + if (need_rehook) { + unregister_prot_hook(sk, true); + + po->num = proto; + po->prot_hook.type = proto; + + if (po->prot_hook.dev) + dev_put(po->prot_hook.dev); - po->prot_hook.dev = dev; - po->ifindex = dev ? dev->ifindex : 0; + po->prot_hook.dev = dev; - packet_cached_dev_assign(po, dev); + po->ifindex = dev ? dev->ifindex : 0; + packet_cached_dev_assign(po, dev); + } - if (protocol == 0) + if (proto == 0 || !need_rehook) goto out_unlock; if (!dev || (dev->flags & IFF_UP)) { @@ -2639,6 +2774,11 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po = pkt_sk(sk); sk->sk_family = PF_PACKET; po->num = proto; + po->xmit = dev_queue_xmit; + + err = packet_alloc_pending(po); + if (err) + goto out2; packet_cached_dev_reset(po); @@ -2672,6 +2812,8 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, preempt_enable(); return 0; +out2: + sk_free(sk); out: return err; } @@ -2791,6 +2933,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, * in, we fill it in now. */ if (sock->type == SOCK_PACKET) { + __sockaddr_check_size(sizeof(struct sockaddr_pkt)); msg->msg_namelen = sizeof(struct sockaddr_pkt); } else { struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; @@ -2813,11 +2956,12 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, aux.tp_net = skb_network_offset(skb); if (vlan_tx_tag_present(skb)) { aux.tp_vlan_tci = vlan_tx_tag_get(skb); - aux.tp_status |= TP_STATUS_VLAN_VALID; + aux.tp_vlan_tpid = ntohs(skb->vlan_proto); + aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { aux.tp_vlan_tci = 0; + aux.tp_vlan_tpid = 0; } - aux.tp_padding = 0; put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } @@ -3218,6 +3362,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv po->tp_tx_has_off = !!val; return 0; } + case PACKET_QDISC_BYPASS: + { + int val; + + if (optlen != sizeof(val)) + return -EINVAL; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + + po->xmit = val ? packet_direct_xmit : dev_queue_xmit; + return 0; + } default: return -ENOPROTOOPT; } @@ -3310,6 +3466,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, case PACKET_TX_HAS_OFF: val = po->tp_tx_has_off; break; + case PACKET_QDISC_BYPASS: + val = packet_use_direct_xmit(po); + break; default: return -ENOPROTOOPT; } @@ -3501,34 +3660,26 @@ static void free_pg_vec(struct pgv *pg_vec, unsigned int order, static char *alloc_one_pg_vec_page(unsigned long order) { - char *buffer = NULL; + char *buffer; gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; buffer = (char *) __get_free_pages(gfp_flags, order); - if (buffer) return buffer; - /* - * __get_free_pages failed, fall back to vmalloc - */ + /* __get_free_pages failed, fall back to vmalloc */ buffer = vzalloc((1 << order) * PAGE_SIZE); - if (buffer) return buffer; - /* - * vmalloc failed, lets dig into swap here - */ + /* vmalloc failed, lets dig into swap here */ gfp_flags &= ~__GFP_NORETRY; - buffer = (char *)__get_free_pages(gfp_flags, order); + buffer = (char *) __get_free_pages(gfp_flags, order); if (buffer) return buffer; - /* - * complete and utter failure - */ + /* complete and utter failure */ return NULL; } @@ -3583,7 +3734,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, if (!closing) { if (atomic_read(&po->mapped)) goto out; - if (atomic_read(&rb->pending)) + if (packet_read_pending(rb)) goto out; } diff --git a/net/packet/diag.c b/net/packet/diag.c index a9584a2f6d6..533ce4ff108 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -3,6 +3,7 @@ #include <linux/net.h> #include <linux/netdevice.h> #include <linux/packet_diag.h> +#include <linux/percpu.h> #include <net/net_namespace.h> #include <net/sock.h> diff --git a/net/packet/internal.h b/net/packet/internal.h index 1035fa2d909..eb9580a6b25 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -64,7 +64,7 @@ struct packet_ring_buffer { unsigned int pg_vec_pages; unsigned int pg_vec_len; - atomic_t pending; + unsigned int __percpu *pending_refcnt; struct tpacket_kbdq_core prb_bdqc; }; @@ -114,6 +114,7 @@ struct packet_sock { unsigned int tp_tx_has_off:1; unsigned int tp_tstamp; struct net_device __rcu *cached_dev; + int (*xmit)(struct sk_buff *skb); struct packet_type prot_hook ____cacheline_aligned_in_smp; }; diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index 38946b26e47..290352c0e6b 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -86,7 +86,7 @@ static int pn_init(struct sock *sk) static int pn_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { - struct sockaddr_pn *target; + DECLARE_SOCKADDR(struct sockaddr_pn *, target, msg->msg_name); struct sk_buff *skb; int err; @@ -94,13 +94,12 @@ static int pn_sendmsg(struct kiocb *iocb, struct sock *sk, MSG_CMSG_COMPAT)) return -EOPNOTSUPP; - if (msg->msg_name == NULL) + if (target == NULL) return -EDESTADDRREQ; if (msg->msg_namelen < sizeof(struct sockaddr_pn)) return -EINVAL; - target = (struct sockaddr_pn *)msg->msg_name; if (target->spn_family != AF_PHONET) return -EAFNOSUPPORT; @@ -160,6 +159,7 @@ static int pn_recvmsg(struct kiocb *iocb, struct sock *sk, rval = (flags & MSG_TRUNC) ? skb->len : copylen; if (msg->msg_name != NULL) { + __sockaddr_check_size(sizeof(sa)); memcpy(msg->msg_name, &sa, sizeof(sa)); *addr_len = sizeof(sa); } diff --git a/net/rds/bind.c b/net/rds/bind.c index b5ad65a0067..a2e6562da75 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -117,7 +117,7 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) rover = be16_to_cpu(*port); last = rover; } else { - rover = max_t(u16, net_random(), 2); + rover = max_t(u16, prandom_u32(), 2); last = rover - 1; } diff --git a/net/rds/recv.c b/net/rds/recv.c index de339b24ca1..bd82522534f 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -402,7 +402,7 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, struct rds_sock *rs = rds_sk_to_rs(sk); long timeo; int ret = 0, nonblock = msg_flags & MSG_DONTWAIT; - struct sockaddr_in *sin; + DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); struct rds_incoming *inc = NULL; /* udp_recvmsg()->sock_recvtimeo() gets away without locking too.. */ @@ -479,7 +479,6 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, rds_stats_inc(s_recv_delivered); - sin = (struct sockaddr_in *)msg->msg_name; if (sin) { sin->sin_family = AF_INET; sin->sin_port = inc->i_hdr.h_sport; diff --git a/net/rds/send.c b/net/rds/send.c index 88eace57dd6..a82fb660ec0 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -922,7 +922,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, { struct sock *sk = sock->sk; struct rds_sock *rs = rds_sk_to_rs(sk); - struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); __be32 daddr; __be16 dport; struct rds_message *rm = NULL; diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 1bacc107994..ed7e0b4e7f9 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -14,9 +14,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/kernel.h> diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 62ced6516c5..c2cca2ee6ae 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1012,7 +1012,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros make_rose->source_call = facilities.source_call; make_rose->source_ndigis = facilities.source_ndigis; for (n = 0 ; n < facilities.source_ndigis ; n++) - make_rose->source_digis[n]= facilities.source_digis[n]; + make_rose->source_digis[n] = facilities.source_digis[n]; make_rose->neighbour = neigh; make_rose->device = dev; make_rose->facilities = facilities; @@ -1051,7 +1051,7 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); - struct sockaddr_rose *usrose = (struct sockaddr_rose *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_rose *, usrose, msg->msg_name); int err; struct full_sockaddr_rose srose; struct sk_buff *skb; @@ -1253,7 +1253,8 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, if (msg->msg_name) { struct sockaddr_rose *srose; - struct full_sockaddr_rose *full_srose = msg->msg_name; + DECLARE_SOCKADDR(struct full_sockaddr_rose *, full_srose, + msg->msg_name); memset(msg->msg_name, 0, sizeof(struct full_sockaddr_rose)); srose = msg->msg_name; diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index 28dbdb911b8..50005888be5 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -146,7 +146,7 @@ static netdev_tx_t rose_xmit(struct sk_buff *skb, struct net_device *dev) static const struct header_ops rose_header_ops = { .create = rose_header, - .rebuild= rose_rebuild_header, + .rebuild = rose_rebuild_header, }; static const struct net_device_ops rose_netdev_ops = { diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c index e4d9cbcff40..cd97a0ce48d 100644 --- a/net/rxrpc/ar-ack.c +++ b/net/rxrpc/ar-ack.c @@ -21,10 +21,17 @@ static unsigned int rxrpc_ack_defer = 1; -static const char *const rxrpc_acks[] = { - "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", "IDL", - "-?-" -}; +static const char *rxrpc_acks(u8 reason) +{ + static const char *const str[] = { + "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", + "IDL", "-?-" + }; + + if (reason >= ARRAY_SIZE(str)) + reason = ARRAY_SIZE(str) - 1; + return str[reason]; +} static const s8 rxrpc_ack_priority[] = { [0] = 0, @@ -50,7 +57,7 @@ void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, ASSERTCMP(prior, >, 0); _enter("{%d},%s,%%%x,%u", - call->debug_id, rxrpc_acks[ack_reason], ntohl(serial), + call->debug_id, rxrpc_acks(ack_reason), ntohl(serial), immediate); if (prior < rxrpc_ack_priority[call->ackr_reason]) { @@ -637,7 +644,7 @@ process_further: hard, ntohl(ack.previousPacket), ntohl(ack.serial), - rxrpc_acks[ack.reason], + rxrpc_acks(ack.reason), ack.nAcks); rxrpc_extract_ackinfo(call, skb, latest, ack.nAcks); @@ -1180,7 +1187,7 @@ send_ACK: ntohl(ack.firstPacket), ntohl(ack.previousPacket), ntohl(ack.serial), - rxrpc_acks[ack.reason], + rxrpc_acks(ack.reason), ack.nAcks); del_timer_sync(&call->ack_timer); diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index e1ac183d50b..d0e8f1c1898 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -152,8 +152,8 @@ int rxrpc_client_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx, if (trans) { service_id = rx->service_id; if (msg->msg_name) { - struct sockaddr_rxrpc *srx = - (struct sockaddr_rxrpc *) msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, + msg->msg_name); service_id = htons(srx->srx_service); } key = rx->key; diff --git a/net/sched/Kconfig b/net/sched/Kconfig index ad1f1d81920..a1a8e29e5fc 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -286,6 +286,28 @@ config NET_SCH_FQ If unsure, say N. +config NET_SCH_HHF + tristate "Heavy-Hitter Filter (HHF)" + help + Say Y here if you want to use the Heavy-Hitter Filter (HHF) + packet scheduling algorithm. + + To compile this driver as a module, choose M here: the module + will be called sch_hhf. + +config NET_SCH_PIE + tristate "Proportional Integral controller Enhanced (PIE) scheduler" + help + Say Y here if you want to use the Proportional Integral controller + Enhanced scheduler packet scheduling algorithm. + For more information, please see + http://tools.ietf.org/html/draft-pan-tsvwg-pie-00 + + To compile this driver as a module, choose M here: the module + will be called sch_pie. + + If unsure, say N. + config NET_SCH_INGRESS tristate "Ingress Qdisc" depends on NET_CLS_ACT @@ -435,6 +457,7 @@ config NET_CLS_FLOW config NET_CLS_CGROUP tristate "Control Group Classifier" select NET_CLS + select CGROUP_NET_CLASSID depends on CGROUPS ---help--- Say Y here if you want to classify packets based on the control diff --git a/net/sched/Makefile b/net/sched/Makefile index 35fa47a494a..0a869a11f3e 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -40,6 +40,8 @@ obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o +obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o +obj-$(CONFIG_NET_SCH_PIE) += sch_pie.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 69cb848e834..72bdc716634 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -29,25 +29,16 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) { - unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); - struct tcf_common **p1p; - - for (p1p = &hinfo->htab[h]; *p1p; p1p = &(*p1p)->tcfc_next) { - if (*p1p == p) { - write_lock_bh(hinfo->lock); - *p1p = p->tcfc_next; - write_unlock_bh(hinfo->lock); - gen_kill_estimator(&p->tcfc_bstats, - &p->tcfc_rate_est); - /* - * gen_estimator est_timer() might access p->tcfc_lock - * or bstats, wait a RCU grace period before freeing p - */ - kfree_rcu(p, tcfc_rcu); - return; - } - } - WARN_ON(1); + spin_lock_bh(&hinfo->lock); + hlist_del(&p->tcfc_head); + spin_unlock_bh(&hinfo->lock); + gen_kill_estimator(&p->tcfc_bstats, + &p->tcfc_rate_est); + /* + * gen_estimator est_timer() might access p->tcfc_lock + * or bstats, wait a RCU grace period before freeing p + */ + kfree_rcu(p, tcfc_rcu); } EXPORT_SYMBOL(tcf_hash_destroy); @@ -71,20 +62,22 @@ int tcf_hash_release(struct tcf_common *p, int bind, EXPORT_SYMBOL(tcf_hash_release); static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, - struct tc_action *a, struct tcf_hashinfo *hinfo) + struct tc_action *a) { + struct tcf_hashinfo *hinfo = a->ops->hinfo; + struct hlist_head *head; struct tcf_common *p; int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; struct nlattr *nest; - read_lock_bh(hinfo->lock); + spin_lock_bh(&hinfo->lock); s_i = cb->args[0]; for (i = 0; i < (hinfo->hmask + 1); i++) { - p = hinfo->htab[tcf_hash(i, hinfo->hmask)]; + head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; - for (; p; p = p->tcfc_next) { + hlist_for_each_entry_rcu(p, head, tcfc_head) { index++; if (index < s_i) continue; @@ -107,7 +100,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, } } done: - read_unlock_bh(hinfo->lock); + spin_unlock_bh(&hinfo->lock); if (n_i) cb->args[0] += n_i; return n_i; @@ -117,10 +110,12 @@ nla_put_failure: goto done; } -static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a, - struct tcf_hashinfo *hinfo) +static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a) { - struct tcf_common *p, *s_p; + struct tcf_hashinfo *hinfo = a->ops->hinfo; + struct hlist_head *head; + struct hlist_node *n; + struct tcf_common *p; struct nlattr *nest; int i = 0, n_i = 0; @@ -130,14 +125,12 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a, if (nla_put_string(skb, TCA_KIND, a->ops->kind)) goto nla_put_failure; for (i = 0; i < (hinfo->hmask + 1); i++) { - p = hinfo->htab[tcf_hash(i, hinfo->hmask)]; - - while (p != NULL) { - s_p = p->tcfc_next; - if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) + head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; + hlist_for_each_entry_safe(p, n, head, tcfc_head) { + if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) { module_put(a->ops->owner); - n_i++; - p = s_p; + n_i++; + } } } if (nla_put_u32(skb, TCA_FCNT, n_i)) @@ -150,48 +143,45 @@ nla_put_failure: return -EINVAL; } -int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, - int type, struct tc_action *a) +static int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, + int type, struct tc_action *a) { - struct tcf_hashinfo *hinfo = a->ops->hinfo; - if (type == RTM_DELACTION) { - return tcf_del_walker(skb, a, hinfo); + return tcf_del_walker(skb, a); } else if (type == RTM_GETACTION) { - return tcf_dump_walker(skb, cb, a, hinfo); + return tcf_dump_walker(skb, cb, a); } else { WARN(1, "tcf_generic_walker: unknown action %d\n", type); return -EINVAL; } } -EXPORT_SYMBOL(tcf_generic_walker); -struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo) +static struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo) { - struct tcf_common *p; + struct tcf_common *p = NULL; + struct hlist_head *head; - read_lock_bh(hinfo->lock); - for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p; - p = p->tcfc_next) { + spin_lock_bh(&hinfo->lock); + head = &hinfo->htab[tcf_hash(index, hinfo->hmask)]; + hlist_for_each_entry_rcu(p, head, tcfc_head) if (p->tcfc_index == index) break; - } - read_unlock_bh(hinfo->lock); + spin_unlock_bh(&hinfo->lock); return p; } -EXPORT_SYMBOL(tcf_hash_lookup); -u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo) +u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo) { - u32 val = *idx_gen; + u32 val = hinfo->index; do { if (++val == 0) val = 1; } while (tcf_hash_lookup(val, hinfo)); - return (*idx_gen = val); + hinfo->index = val; + return val; } EXPORT_SYMBOL(tcf_hash_new_index); @@ -208,9 +198,9 @@ int tcf_hash_search(struct tc_action *a, u32 index) } EXPORT_SYMBOL(tcf_hash_search); -struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind, - struct tcf_hashinfo *hinfo) +struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind) { + struct tcf_hashinfo *hinfo = a->ops->hinfo; struct tcf_common *p = NULL; if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) { if (bind) @@ -223,9 +213,9 @@ struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind, EXPORT_SYMBOL(tcf_hash_check); struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, - struct tc_action *a, int size, int bind, - u32 *idx_gen, struct tcf_hashinfo *hinfo) + struct tc_action *a, int size, int bind) { + struct tcf_hashinfo *hinfo = a->ops->hinfo; struct tcf_common *p = kzalloc(size, GFP_KERNEL); if (unlikely(!p)) @@ -235,7 +225,8 @@ struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, p->tcfc_bindcnt = 1; spin_lock_init(&p->tcfc_lock); - p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo); + INIT_HLIST_NODE(&p->tcfc_head); + p->tcfc_index = index ? index : tcf_hash_new_index(hinfo); p->tcfc_tm.install = jiffies; p->tcfc_tm.lastuse = jiffies; if (est) { @@ -256,19 +247,18 @@ void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo) { unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); - write_lock_bh(hinfo->lock); - p->tcfc_next = hinfo->htab[h]; - hinfo->htab[h] = p; - write_unlock_bh(hinfo->lock); + spin_lock_bh(&hinfo->lock); + hlist_add_head(&p->tcfc_head, &hinfo->htab[h]); + spin_unlock_bh(&hinfo->lock); } EXPORT_SYMBOL(tcf_hash_insert); -static struct tc_action_ops *act_base = NULL; +static LIST_HEAD(act_base); static DEFINE_RWLOCK(act_mod_lock); int tcf_register_action(struct tc_action_ops *act) { - struct tc_action_ops *a, **ap; + struct tc_action_ops *a; /* Must supply act, dump, cleanup and init */ if (!act->act || !act->dump || !act->cleanup || !act->init) @@ -281,14 +271,13 @@ int tcf_register_action(struct tc_action_ops *act) act->walk = tcf_generic_walker; write_lock(&act_mod_lock); - for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) { + list_for_each_entry(a, &act_base, head) { if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) { write_unlock(&act_mod_lock); return -EEXIST; } } - act->next = NULL; - *ap = act; + list_add_tail(&act->head, &act_base); write_unlock(&act_mod_lock); return 0; } @@ -296,17 +285,16 @@ EXPORT_SYMBOL(tcf_register_action); int tcf_unregister_action(struct tc_action_ops *act) { - struct tc_action_ops *a, **ap; + struct tc_action_ops *a; int err = -ENOENT; write_lock(&act_mod_lock); - for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) - if (a == act) + list_for_each_entry(a, &act_base, head) { + if (a == act) { + list_del(&act->head); + err = 0; break; - if (a) { - *ap = a->next; - a->next = NULL; - err = 0; + } } write_unlock(&act_mod_lock); return err; @@ -316,69 +304,42 @@ EXPORT_SYMBOL(tcf_unregister_action); /* lookup by name */ static struct tc_action_ops *tc_lookup_action_n(char *kind) { - struct tc_action_ops *a = NULL; + struct tc_action_ops *a, *res = NULL; if (kind) { read_lock(&act_mod_lock); - for (a = act_base; a; a = a->next) { + list_for_each_entry(a, &act_base, head) { if (strcmp(kind, a->kind) == 0) { - if (!try_module_get(a->owner)) { - read_unlock(&act_mod_lock); - return NULL; - } + if (try_module_get(a->owner)) + res = a; break; } } read_unlock(&act_mod_lock); } - return a; + return res; } /* lookup by nlattr */ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind) { - struct tc_action_ops *a = NULL; + struct tc_action_ops *a, *res = NULL; if (kind) { read_lock(&act_mod_lock); - for (a = act_base; a; a = a->next) { + list_for_each_entry(a, &act_base, head) { if (nla_strcmp(kind, a->kind) == 0) { - if (!try_module_get(a->owner)) { - read_unlock(&act_mod_lock); - return NULL; - } + if (try_module_get(a->owner)) + res = a; break; } } read_unlock(&act_mod_lock); } - return a; + return res; } -#if 0 -/* lookup by id */ -static struct tc_action_ops *tc_lookup_action_id(u32 type) -{ - struct tc_action_ops *a = NULL; - - if (type) { - read_lock(&act_mod_lock); - for (a = act_base; a; a = a->next) { - if (a->type == type) { - if (!try_module_get(a->owner)) { - read_unlock(&act_mod_lock); - return NULL; - } - break; - } - } - read_unlock(&act_mod_lock); - } - return a; -} -#endif - -int tcf_action_exec(struct sk_buff *skb, const struct tc_action *act, +int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions, struct tcf_result *res) { const struct tc_action *a; @@ -389,53 +350,39 @@ int tcf_action_exec(struct sk_buff *skb, const struct tc_action *act, ret = TC_ACT_OK; goto exec_done; } - while ((a = act) != NULL) { + list_for_each_entry(a, actions, list) { repeat: - if (a->ops) { - ret = a->ops->act(skb, a, res); - if (TC_MUNGED & skb->tc_verd) { - /* copied already, allow trampling */ - skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); - skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd); - } - if (ret == TC_ACT_REPEAT) - goto repeat; /* we need a ttl - JHS */ - if (ret != TC_ACT_PIPE) - goto exec_done; + ret = a->ops->act(skb, a, res); + if (TC_MUNGED & skb->tc_verd) { + /* copied already, allow trampling */ + skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); + skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd); } - act = a->next; + if (ret == TC_ACT_REPEAT) + goto repeat; /* we need a ttl - JHS */ + if (ret != TC_ACT_PIPE) + goto exec_done; } exec_done: return ret; } EXPORT_SYMBOL(tcf_action_exec); -void tcf_action_destroy(struct tc_action *act, int bind) +void tcf_action_destroy(struct list_head *actions, int bind) { - struct tc_action *a; + struct tc_action *a, *tmp; - for (a = act; a; a = act) { - if (a->ops) { - if (a->ops->cleanup(a, bind) == ACT_P_DELETED) - module_put(a->ops->owner); - act = act->next; - kfree(a); - } else { - /*FIXME: Remove later - catch insertion bugs*/ - WARN(1, "tcf_action_destroy: BUG? destroying NULL ops\n"); - act = act->next; - kfree(a); - } + list_for_each_entry_safe(a, tmp, actions, list) { + if (a->ops->cleanup(a, bind) == ACT_P_DELETED) + module_put(a->ops->owner); + list_del(&a->list); + kfree(a); } } int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { - int err = -EINVAL; - - if (a->ops == NULL) - return err; return a->ops->dump(skb, a, bind, ref); } @@ -446,9 +393,6 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - if (a->ops == NULL) - return err; - if (nla_put_string(skb, TCA_KIND, a->ops->kind)) goto nla_put_failure; if (tcf_action_copy_stats(skb, a, 0)) @@ -469,14 +413,13 @@ nla_put_failure: EXPORT_SYMBOL(tcf_action_dump_1); int -tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref) +tcf_action_dump(struct sk_buff *skb, struct list_head *actions, int bind, int ref) { struct tc_action *a; int err = -EINVAL; struct nlattr *nest; - while ((a = act) != NULL) { - act = a->next; + list_for_each_entry(a, actions, list) { nest = nla_nest_start(skb, a->order); if (nest == NULL) goto nla_put_failure; @@ -551,6 +494,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, if (a == NULL) goto err_mod; + a->ops = a_o; + INIT_LIST_HEAD(&a->list); /* backward compatibility for policer */ if (name == NULL) err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, a, ovr, bind); @@ -565,7 +510,6 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, */ if (err != ACT_P_CREATED) module_put(a_o->owner); - a->ops = a_o; return a; @@ -577,37 +521,33 @@ err_out: return ERR_PTR(err); } -struct tc_action *tcf_action_init(struct net *net, struct nlattr *nla, +int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, char *name, int ovr, - int bind) + int bind, struct list_head *actions) { struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; - struct tc_action *head = NULL, *act, *act_prev = NULL; + struct tc_action *act; int err; int i; err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL); if (err < 0) - return ERR_PTR(err); + return err; for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { act = tcf_action_init_1(net, tb[i], est, name, ovr, bind); - if (IS_ERR(act)) + if (IS_ERR(act)) { + err = PTR_ERR(act); goto err; + } act->order = i; - - if (head == NULL) - head = act; - else - act_prev->next = act; - act_prev = act; + list_add_tail(&act->list, actions); } - return head; + return 0; err: - if (head != NULL) - tcf_action_destroy(head, bind); - return act; + tcf_action_destroy(actions, bind); + return err; } int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, @@ -615,9 +555,9 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, { int err = 0; struct gnet_dump d; - struct tcf_act_hdr *h = a->priv; + struct tcf_common *p = a->priv; - if (h == NULL) + if (p == NULL) goto errout; /* compat_mode being true specifies a call that is supposed @@ -626,24 +566,20 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, if (compat_mode) { if (a->type == TCA_OLD_COMPAT) err = gnet_stats_start_copy_compat(skb, 0, - TCA_STATS, TCA_XSTATS, &h->tcf_lock, &d); + TCA_STATS, TCA_XSTATS, &p->tcfc_lock, &d); else return 0; } else err = gnet_stats_start_copy(skb, TCA_ACT_STATS, - &h->tcf_lock, &d); + &p->tcfc_lock, &d); if (err < 0) goto errout; - if (a->ops != NULL && a->ops->get_stats != NULL) - if (a->ops->get_stats(skb, a) < 0) - goto errout; - - if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 || - gnet_stats_copy_rate_est(&d, &h->tcf_bstats, - &h->tcf_rate_est) < 0 || - gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0) + if (gnet_stats_copy_basic(&d, &p->tcfc_bstats) < 0 || + gnet_stats_copy_rate_est(&d, &p->tcfc_bstats, + &p->tcfc_rate_est) < 0 || + gnet_stats_copy_queue(&d, &p->tcfc_qstats) < 0) goto errout; if (gnet_stats_finish_copy(&d) < 0) @@ -656,7 +592,7 @@ errout: } static int -tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 portid, u32 seq, +tca_get_fill(struct sk_buff *skb, struct list_head *actions, u32 portid, u32 seq, u16 flags, int event, int bind, int ref) { struct tcamsg *t; @@ -676,7 +612,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 portid, u32 seq, if (nest == NULL) goto out_nlmsg_trim; - if (tcf_action_dump(skb, a, bind, ref) < 0) + if (tcf_action_dump(skb, actions, bind, ref) < 0) goto out_nlmsg_trim; nla_nest_end(skb, nest); @@ -691,14 +627,14 @@ out_nlmsg_trim: static int act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, - struct tc_action *a, int event) + struct list_head *actions, int event) { struct sk_buff *skb; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; - if (tca_get_fill(skb, a, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) { + if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) { kfree_skb(skb); return -EINVAL; } @@ -729,9 +665,10 @@ tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid) if (a == NULL) goto err_out; + INIT_LIST_HEAD(&a->list); err = -EINVAL; a->ops = tc_lookup_action(tb[TCA_ACT_KIND]); - if (a->ops == NULL) + if (a->ops == NULL) /* could happen in batch of actions */ goto err_free; err = -ENOENT; if (a->ops->lookup(a, index) == 0) @@ -748,12 +685,12 @@ err_out: return ERR_PTR(err); } -static void cleanup_a(struct tc_action *act) +static void cleanup_a(struct list_head *actions) { - struct tc_action *a; + struct tc_action *a, *tmp; - for (a = act; a; a = act) { - act = a->next; + list_for_each_entry_safe(a, tmp, actions, list) { + list_del(&a->list); kfree(a); } } @@ -768,6 +705,7 @@ static struct tc_action *create_a(int i) return NULL; } act->order = i; + INIT_LIST_HEAD(&act->list); return act; } @@ -806,7 +744,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, err = -EINVAL; kind = tb[TCA_ACT_KIND]; a->ops = tc_lookup_action(kind); - if (a->ops == NULL) + if (a->ops == NULL) /*some idjot trying to flush unknown action */ goto err_out; nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); @@ -850,12 +788,40 @@ noflush_out: } static int +tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, + u32 portid) +{ + int ret; + struct sk_buff *skb; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION, + 0, 1) <= 0) { + kfree_skb(skb); + return -EINVAL; + } + + /* now do the delete */ + tcf_action_destroy(actions, 0); + + ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); + if (ret > 0) + return 0; + return ret; +} + +static int tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, int event) { int i, ret; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; - struct tc_action *head = NULL, *act, *act_prev = NULL; + struct tc_action *act; + LIST_HEAD(actions); ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL); if (ret < 0) @@ -875,117 +841,62 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, goto err; } act->order = i; - - if (head == NULL) - head = act; - else - act_prev->next = act; - act_prev = act; + list_add_tail(&act->list, &actions); } if (event == RTM_GETACTION) - ret = act_get_notify(net, portid, n, head, event); + ret = act_get_notify(net, portid, n, &actions, event); else { /* delete */ - struct sk_buff *skb; - - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) { - ret = -ENOBUFS; - goto err; - } - - if (tca_get_fill(skb, head, portid, n->nlmsg_seq, 0, event, - 0, 1) <= 0) { - kfree_skb(skb); - ret = -EINVAL; + ret = tcf_del_notify(net, n, &actions, portid); + if (ret) goto err; - } - - /* now do the delete */ - tcf_action_destroy(head, 0); - ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); - if (ret > 0) - return 0; return ret; } err: - cleanup_a(head); + cleanup_a(&actions); return ret; } -static int tcf_add_notify(struct net *net, struct tc_action *a, - u32 portid, u32 seq, int event, u16 flags) +static int +tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, + u32 portid) { - struct tcamsg *t; - struct nlmsghdr *nlh; struct sk_buff *skb; - struct nlattr *nest; - unsigned char *b; int err = 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; - b = skb_tail_pointer(skb); - - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags); - if (!nlh) - goto out_kfree_skb; - t = nlmsg_data(nlh); - t->tca_family = AF_UNSPEC; - t->tca__pad1 = 0; - t->tca__pad2 = 0; - - nest = nla_nest_start(skb, TCA_ACT_TAB); - if (nest == NULL) - goto out_kfree_skb; - - if (tcf_action_dump(skb, a, 0, 0) < 0) - goto out_kfree_skb; - - nla_nest_end(skb, nest); - - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - NETLINK_CB(skb).dst_group = RTNLGRP_TC; + if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags, + RTM_NEWACTION, 0, 0) <= 0) { + kfree_skb(skb); + return -EINVAL; + } - err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); if (err > 0) err = 0; return err; - -out_kfree_skb: - kfree_skb(skb); - return -1; } - static int tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, int ovr) { int ret = 0; - struct tc_action *act; - struct tc_action *a; - u32 seq = n->nlmsg_seq; + LIST_HEAD(actions); - act = tcf_action_init(net, nla, NULL, NULL, ovr, 0); - if (act == NULL) - goto done; - if (IS_ERR(act)) { - ret = PTR_ERR(act); + ret = tcf_action_init(net, nla, NULL, NULL, ovr, 0, &actions); + if (ret) goto done; - } /* dump then free all the actions after update; inserted policy * stays intact */ - ret = tcf_add_notify(net, act, portid, seq, RTM_NEWACTION, n->nlmsg_flags); - for (a = act; a; a = act) { - act = a->next; - kfree(a); - } + ret = tcf_add_notify(net, n, &actions, portid); + cleanup_a(&actions); done: return ret; } diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 11fe1a41643..2210187c45c 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -37,15 +37,7 @@ #include <net/tc_act/tc_csum.h> #define CSUM_TAB_MASK 15 -static struct tcf_common *tcf_csum_ht[CSUM_TAB_MASK + 1]; -static u32 csum_idx_gen; -static DEFINE_RWLOCK(csum_lock); - -static struct tcf_hashinfo csum_hash_info = { - .htab = tcf_csum_ht, - .hmask = CSUM_TAB_MASK, - .lock = &csum_lock, -}; +static struct tcf_hashinfo csum_hash_info; static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = { [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, @@ -71,17 +63,16 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est, return -EINVAL; parm = nla_data(tb[TCA_CSUM_PARMS]); - pc = tcf_hash_check(parm->index, a, bind, &csum_hash_info); + pc = tcf_hash_check(parm->index, a, bind); if (!pc) { - pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, - &csum_idx_gen, &csum_hash_info); + pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind); if (IS_ERR(pc)) return PTR_ERR(pc); ret = ACT_P_CREATED; } else { if (bind)/* dont override defaults */ return 0; - tcf_hash_release(pc, bind, &csum_hash_info); + tcf_hash_release(pc, bind, a->ops->hinfo); if (!ovr) return -EEXIST; } @@ -93,7 +84,7 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est, spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(pc, &csum_hash_info); + tcf_hash_insert(pc, a->ops->hinfo); return ret; } @@ -580,7 +571,6 @@ static struct tc_action_ops act_csum_ops = { .kind = "csum", .hinfo = &csum_hash_info, .type = TCA_ACT_CSUM, - .capab = TCA_CAP_NONE, .owner = THIS_MODULE, .act = tcf_csum, .dump = tcf_csum_dump, @@ -593,6 +583,10 @@ MODULE_LICENSE("GPL"); static int __init csum_init_module(void) { + int err = tcf_hashinfo_init(&csum_hash_info, CSUM_TAB_MASK); + if (err) + return err; + return tcf_register_action(&act_csum_ops); } diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index eb9ba60ebab..a0eed30d581 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -24,20 +24,12 @@ #include <net/tc_act/tc_gact.h> #define GACT_TAB_MASK 15 -static struct tcf_common *tcf_gact_ht[GACT_TAB_MASK + 1]; -static u32 gact_idx_gen; -static DEFINE_RWLOCK(gact_lock); - -static struct tcf_hashinfo gact_hash_info = { - .htab = tcf_gact_ht, - .hmask = GACT_TAB_MASK, - .lock = &gact_lock, -}; +static struct tcf_hashinfo gact_hash_info; #ifdef CONFIG_GACT_PROB static int gact_net_rand(struct tcf_gact *gact) { - if (!gact->tcfg_pval || net_random() % gact->tcfg_pval) + if (!gact->tcfg_pval || prandom_u32() % gact->tcfg_pval) return gact->tcf_action; return gact->tcfg_paction; } @@ -94,17 +86,16 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, } #endif - pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info); + pc = tcf_hash_check(parm->index, a, bind); if (!pc) { - pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), - bind, &gact_idx_gen, &gact_hash_info); + pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), bind); if (IS_ERR(pc)) return PTR_ERR(pc); ret = ACT_P_CREATED; } else { if (bind)/* dont override defaults */ return 0; - tcf_hash_release(pc, bind, &gact_hash_info); + tcf_hash_release(pc, bind, a->ops->hinfo); if (!ovr) return -EEXIST; } @@ -122,7 +113,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, #endif spin_unlock_bh(&gact->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(pc, &gact_hash_info); + tcf_hash_insert(pc, a->ops->hinfo); return ret; } @@ -131,7 +122,7 @@ static int tcf_gact_cleanup(struct tc_action *a, int bind) struct tcf_gact *gact = a->priv; if (gact) - return tcf_hash_release(&gact->common, bind, &gact_hash_info); + return tcf_hash_release(&gact->common, bind, a->ops->hinfo); return 0; } @@ -202,7 +193,6 @@ static struct tc_action_ops act_gact_ops = { .kind = "gact", .hinfo = &gact_hash_info, .type = TCA_ACT_GACT, - .capab = TCA_CAP_NONE, .owner = THIS_MODULE, .act = tcf_gact, .dump = tcf_gact_dump, @@ -216,6 +206,9 @@ MODULE_LICENSE("GPL"); static int __init gact_init_module(void) { + int err = tcf_hashinfo_init(&gact_hash_info, GACT_TAB_MASK); + if (err) + return err; #ifdef CONFIG_GACT_PROB pr_info("GACT probability on\n"); #else @@ -227,6 +220,7 @@ static int __init gact_init_module(void) static void __exit gact_cleanup_module(void) { tcf_unregister_action(&act_gact_ops); + tcf_hashinfo_destroy(&gact_hash_info); } module_init(gact_init_module); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index dcbfe8ce04a..0a6d6217402 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -29,15 +29,7 @@ #define IPT_TAB_MASK 15 -static struct tcf_common *tcf_ipt_ht[IPT_TAB_MASK + 1]; -static u32 ipt_idx_gen; -static DEFINE_RWLOCK(ipt_lock); - -static struct tcf_hashinfo ipt_hash_info = { - .htab = tcf_ipt_ht, - .hmask = IPT_TAB_MASK, - .lock = &ipt_lock, -}; +static struct tcf_hashinfo ipt_hash_info; static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook) { @@ -133,10 +125,9 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, if (tb[TCA_IPT_INDEX] != NULL) index = nla_get_u32(tb[TCA_IPT_INDEX]); - pc = tcf_hash_check(index, a, bind, &ipt_hash_info); + pc = tcf_hash_check(index, a, bind); if (!pc) { - pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind, - &ipt_idx_gen, &ipt_hash_info); + pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind); if (IS_ERR(pc)) return PTR_ERR(pc); ret = ACT_P_CREATED; @@ -179,7 +170,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, ipt->tcfi_hook = hook; spin_unlock_bh(&ipt->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(pc, &ipt_hash_info); + tcf_hash_insert(pc, a->ops->hinfo); return ret; err3: @@ -295,7 +286,6 @@ static struct tc_action_ops act_ipt_ops = { .kind = "ipt", .hinfo = &ipt_hash_info, .type = TCA_ACT_IPT, - .capab = TCA_CAP_NONE, .owner = THIS_MODULE, .act = tcf_ipt, .dump = tcf_ipt_dump, @@ -306,8 +296,7 @@ static struct tc_action_ops act_ipt_ops = { static struct tc_action_ops act_xt_ops = { .kind = "xt", .hinfo = &ipt_hash_info, - .type = TCA_ACT_IPT, - .capab = TCA_CAP_NONE, + .type = TCA_ACT_XT, .owner = THIS_MODULE, .act = tcf_ipt, .dump = tcf_ipt_dump, @@ -322,7 +311,11 @@ MODULE_ALIAS("act_xt"); static int __init ipt_init_module(void) { - int ret1, ret2; + int ret1, ret2, err; + err = tcf_hashinfo_init(&ipt_hash_info, IPT_TAB_MASK); + if (err) + return err; + ret1 = tcf_register_action(&act_xt_ops); if (ret1 < 0) printk("Failed to load xt action\n"); @@ -330,9 +323,10 @@ static int __init ipt_init_module(void) if (ret2 < 0) printk("Failed to load ipt action\n"); - if (ret1 < 0 && ret2 < 0) + if (ret1 < 0 && ret2 < 0) { + tcf_hashinfo_destroy(&ipt_hash_info); return ret1; - else + } else return 0; } @@ -340,6 +334,7 @@ static void __exit ipt_cleanup_module(void) { tcf_unregister_action(&act_xt_ops); tcf_unregister_action(&act_ipt_ops); + tcf_hashinfo_destroy(&ipt_hash_info); } module_init(ipt_init_module); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 252378121ce..0b2c6d39d39 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -30,16 +30,8 @@ #include <linux/if_arp.h> #define MIRRED_TAB_MASK 7 -static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1]; -static u32 mirred_idx_gen; -static DEFINE_RWLOCK(mirred_lock); static LIST_HEAD(mirred_list); - -static struct tcf_hashinfo mirred_hash_info = { - .htab = tcf_mirred_ht, - .hmask = MIRRED_TAB_MASK, - .lock = &mirred_lock, -}; +static struct tcf_hashinfo mirred_hash_info; static int tcf_mirred_release(struct tcf_mirred *m, int bind) { @@ -109,12 +101,11 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, dev = NULL; } - pc = tcf_hash_check(parm->index, a, bind, &mirred_hash_info); + pc = tcf_hash_check(parm->index, a, bind); if (!pc) { if (dev == NULL) return -EINVAL; - pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind, - &mirred_idx_gen, &mirred_hash_info); + pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind); if (IS_ERR(pc)) return PTR_ERR(pc); ret = ACT_P_CREATED; @@ -140,7 +131,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&m->tcf_lock); if (ret == ACT_P_CREATED) { list_add(&m->tcfm_list, &mirred_list); - tcf_hash_insert(pc, &mirred_hash_info); + tcf_hash_insert(pc, a->ops->hinfo); } return ret; @@ -261,12 +252,10 @@ static struct notifier_block mirred_device_notifier = { .notifier_call = mirred_device_event, }; - static struct tc_action_ops act_mirred_ops = { .kind = "mirred", .hinfo = &mirred_hash_info, .type = TCA_ACT_MIRRED, - .capab = TCA_CAP_NONE, .owner = THIS_MODULE, .act = tcf_mirred, .dump = tcf_mirred_dump, @@ -284,14 +273,20 @@ static int __init mirred_init_module(void) if (err) return err; + err = tcf_hashinfo_init(&mirred_hash_info, MIRRED_TAB_MASK); + if (err) { + unregister_netdevice_notifier(&mirred_device_notifier); + return err; + } pr_info("Mirror/redirect action on\n"); return tcf_register_action(&act_mirred_ops); } static void __exit mirred_cleanup_module(void) { - unregister_netdevice_notifier(&mirred_device_notifier); tcf_unregister_action(&act_mirred_ops); + tcf_hashinfo_destroy(&mirred_hash_info); + unregister_netdevice_notifier(&mirred_device_notifier); } module_init(mirred_init_module); diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 76869538d02..81f0404bb33 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -30,15 +30,8 @@ #define NAT_TAB_MASK 15 -static struct tcf_common *tcf_nat_ht[NAT_TAB_MASK + 1]; -static u32 nat_idx_gen; -static DEFINE_RWLOCK(nat_lock); - -static struct tcf_hashinfo nat_hash_info = { - .htab = tcf_nat_ht, - .hmask = NAT_TAB_MASK, - .lock = &nat_lock, -}; + +static struct tcf_hashinfo nat_hash_info; static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { [TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) }, @@ -64,17 +57,16 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, return -EINVAL; parm = nla_data(tb[TCA_NAT_PARMS]); - pc = tcf_hash_check(parm->index, a, bind, &nat_hash_info); + pc = tcf_hash_check(parm->index, a, bind); if (!pc) { - pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, - &nat_idx_gen, &nat_hash_info); + pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind); if (IS_ERR(pc)) return PTR_ERR(pc); ret = ACT_P_CREATED; } else { if (bind) return 0; - tcf_hash_release(pc, bind, &nat_hash_info); + tcf_hash_release(pc, bind, a->ops->hinfo); if (!ovr) return -EEXIST; } @@ -90,7 +82,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(pc, &nat_hash_info); + tcf_hash_insert(pc, a->ops->hinfo); return ret; } @@ -303,7 +295,6 @@ static struct tc_action_ops act_nat_ops = { .kind = "nat", .hinfo = &nat_hash_info, .type = TCA_ACT_NAT, - .capab = TCA_CAP_NONE, .owner = THIS_MODULE, .act = tcf_nat, .dump = tcf_nat_dump, @@ -316,12 +307,16 @@ MODULE_LICENSE("GPL"); static int __init nat_init_module(void) { + int err = tcf_hashinfo_init(&nat_hash_info, NAT_TAB_MASK); + if (err) + return err; return tcf_register_action(&act_nat_ops); } static void __exit nat_cleanup_module(void) { tcf_unregister_action(&act_nat_ops); + tcf_hashinfo_destroy(&nat_hash_info); } module_init(nat_init_module); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 7aa2dcd989f..be3f0f6875b 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -24,15 +24,8 @@ #include <net/tc_act/tc_pedit.h> #define PEDIT_TAB_MASK 15 -static struct tcf_common *tcf_pedit_ht[PEDIT_TAB_MASK + 1]; -static u32 pedit_idx_gen; -static DEFINE_RWLOCK(pedit_lock); - -static struct tcf_hashinfo pedit_hash_info = { - .htab = tcf_pedit_ht, - .hmask = PEDIT_TAB_MASK, - .lock = &pedit_lock, -}; + +static struct tcf_hashinfo pedit_hash_info; static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = { [TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) }, @@ -64,12 +57,11 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, if (nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm) + ksize) return -EINVAL; - pc = tcf_hash_check(parm->index, a, bind, &pedit_hash_info); + pc = tcf_hash_check(parm->index, a, bind); if (!pc) { if (!parm->nkeys) return -EINVAL; - pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, - &pedit_idx_gen, &pedit_hash_info); + pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind); if (IS_ERR(pc)) return PTR_ERR(pc); p = to_pedit(pc); @@ -84,7 +76,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { p = to_pedit(pc); - tcf_hash_release(pc, bind, &pedit_hash_info); + tcf_hash_release(pc, bind, a->ops->hinfo); if (bind) return 0; if (!ovr) @@ -108,7 +100,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, memcpy(p->tcfp_keys, parm->keys, ksize); spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(pc, &pedit_hash_info); + tcf_hash_insert(pc, a->ops->hinfo); return ret; } @@ -240,7 +232,6 @@ static struct tc_action_ops act_pedit_ops = { .kind = "pedit", .hinfo = &pedit_hash_info, .type = TCA_ACT_PEDIT, - .capab = TCA_CAP_NONE, .owner = THIS_MODULE, .act = tcf_pedit, .dump = tcf_pedit_dump, @@ -254,11 +245,15 @@ MODULE_LICENSE("GPL"); static int __init pedit_init_module(void) { + int err = tcf_hashinfo_init(&pedit_hash_info, PEDIT_TAB_MASK); + if (err) + return err; return tcf_register_action(&act_pedit_ops); } static void __exit pedit_cleanup_module(void) { + tcf_hashinfo_destroy(&pedit_hash_info); tcf_unregister_action(&act_pedit_ops); } diff --git a/net/sched/act_police.c b/net/sched/act_police.c index ef246d87e68..1778209a332 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -41,15 +41,7 @@ struct tcf_police { container_of(pc, struct tcf_police, common) #define POL_TAB_MASK 15 -static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; -static u32 police_idx_gen; -static DEFINE_RWLOCK(police_lock); - -static struct tcf_hashinfo police_hash_info = { - .htab = tcf_police_ht, - .hmask = POL_TAB_MASK, - .lock = &police_lock, -}; +static struct tcf_hashinfo police_hash_info; /* old policer structure from before tc actions */ struct tc_police_compat { @@ -67,18 +59,20 @@ struct tc_police_compat { static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb, int type, struct tc_action *a) { + struct tcf_hashinfo *hinfo = a->ops->hinfo; + struct hlist_head *head; struct tcf_common *p; int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; struct nlattr *nest; - read_lock_bh(&police_lock); + spin_lock_bh(&hinfo->lock); s_i = cb->args[0]; for (i = 0; i < (POL_TAB_MASK + 1); i++) { - p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)]; + head = &hinfo->htab[tcf_hash(i, POL_TAB_MASK)]; - for (; p; p = p->tcfc_next) { + hlist_for_each_entry_rcu(p, head, tcfc_head) { index++; if (index < s_i) continue; @@ -101,7 +95,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c } } done: - read_unlock_bh(&police_lock); + spin_unlock_bh(&hinfo->lock); if (n_i) cb->args[0] += n_i; return n_i; @@ -111,29 +105,6 @@ nla_put_failure: goto done; } -static void tcf_police_destroy(struct tcf_police *p) -{ - unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); - struct tcf_common **p1p; - - for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) { - if (*p1p == &p->common) { - write_lock_bh(&police_lock); - *p1p = p->tcf_next; - write_unlock_bh(&police_lock); - gen_kill_estimator(&p->tcf_bstats, - &p->tcf_rate_est); - /* - * gen_estimator est_timer() might access p->tcf_lock - * or bstats, wait a RCU grace period before freeing p - */ - kfree_rcu(p, tcf_rcu); - return; - } - } - WARN_ON(1); -} - static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { [TCA_POLICE_RATE] = { .len = TC_RTAB_SIZE }, [TCA_POLICE_PEAKRATE] = { .len = TC_RTAB_SIZE }, @@ -151,6 +122,7 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla, struct tc_police *parm; struct tcf_police *police; struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; + struct tcf_hashinfo *hinfo = a->ops->hinfo; int size; if (nla == NULL) @@ -168,12 +140,8 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_POLICE_TBF]); if (parm->index) { - struct tcf_common *pc; - - pc = tcf_hash_lookup(parm->index, &police_hash_info); - if (pc != NULL) { - a->priv = pc; - police = to_police(pc); + if (tcf_hash_search(a, parm->index)) { + police = to_police(a->priv); if (bind) { police->tcf_bindcnt += 1; police->tcf_refcnt += 1; @@ -266,12 +234,11 @@ override: police->tcfp_t_c = ktime_to_ns(ktime_get()); police->tcf_index = parm->index ? parm->index : - tcf_hash_new_index(&police_idx_gen, &police_hash_info); + tcf_hash_new_index(a->ops->hinfo); h = tcf_hash(police->tcf_index, POL_TAB_MASK); - write_lock_bh(&police_lock); - police->tcf_next = tcf_police_ht[h]; - tcf_police_ht[h] = &police->common; - write_unlock_bh(&police_lock); + spin_lock_bh(&hinfo->lock); + hlist_add_head(&police->tcf_head, &hinfo->htab[h]); + spin_unlock_bh(&hinfo->lock); a->priv = police; return ret; @@ -279,10 +246,8 @@ override: failure_unlock: spin_unlock_bh(&police->tcf_lock); failure: - if (P_tab) - qdisc_put_rtab(P_tab); - if (R_tab) - qdisc_put_rtab(R_tab); + qdisc_put_rtab(P_tab); + qdisc_put_rtab(R_tab); if (ret == ACT_P_CREATED) kfree(police); return err; @@ -291,19 +256,9 @@ failure: static int tcf_act_police_cleanup(struct tc_action *a, int bind) { struct tcf_police *p = a->priv; - int ret = 0; - - if (p != NULL) { - if (bind) - p->tcf_bindcnt--; - - p->tcf_refcnt--; - if (p->tcf_refcnt <= 0 && !p->tcf_bindcnt) { - tcf_police_destroy(p); - ret = 1; - } - } - return ret; + if (p) + return tcf_hash_release(&p->common, bind, &police_hash_info); + return 0; } static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, @@ -404,7 +359,6 @@ static struct tc_action_ops act_police_ops = { .kind = "police", .hinfo = &police_hash_info, .type = TCA_ID_POLICE, - .capab = TCA_CAP_NONE, .owner = THIS_MODULE, .act = tcf_act_police, .dump = tcf_act_police_dump, @@ -416,12 +370,19 @@ static struct tc_action_ops act_police_ops = { static int __init police_init_module(void) { - return tcf_register_action(&act_police_ops); + int err = tcf_hashinfo_init(&police_hash_info, POL_TAB_MASK); + if (err) + return err; + err = tcf_register_action(&act_police_ops); + if (err) + tcf_hashinfo_destroy(&police_hash_info); + return err; } static void __exit police_cleanup_module(void) { + tcf_hashinfo_destroy(&police_hash_info); tcf_unregister_action(&act_police_ops); } diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index f7b45ab8538..8ef2f1fcbfb 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -25,15 +25,7 @@ #include <net/tc_act/tc_defact.h> #define SIMP_TAB_MASK 7 -static struct tcf_common *tcf_simp_ht[SIMP_TAB_MASK + 1]; -static u32 simp_idx_gen; -static DEFINE_RWLOCK(simp_lock); - -static struct tcf_hashinfo simp_hash_info = { - .htab = tcf_simp_ht, - .hmask = SIMP_TAB_MASK, - .lock = &simp_lock, -}; +static struct tcf_hashinfo simp_hash_info; #define SIMP_MAX_DATA 32 static int tcf_simp(struct sk_buff *skb, const struct tc_action *a, @@ -122,10 +114,9 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_DEF_PARMS]); defdata = nla_data(tb[TCA_DEF_DATA]); - pc = tcf_hash_check(parm->index, a, bind, &simp_hash_info); + pc = tcf_hash_check(parm->index, a, bind); if (!pc) { - pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, - &simp_idx_gen, &simp_hash_info); + pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind); if (IS_ERR(pc)) return PTR_ERR(pc); @@ -153,7 +144,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, } if (ret == ACT_P_CREATED) - tcf_hash_insert(pc, &simp_hash_info); + tcf_hash_insert(pc, a->ops->hinfo); return ret; } @@ -198,7 +189,6 @@ static struct tc_action_ops act_simp_ops = { .kind = "simple", .hinfo = &simp_hash_info, .type = TCA_ACT_SIMP, - .capab = TCA_CAP_NONE, .owner = THIS_MODULE, .act = tcf_simp, .dump = tcf_simp_dump, @@ -212,14 +202,23 @@ MODULE_LICENSE("GPL"); static int __init simp_init_module(void) { - int ret = tcf_register_action(&act_simp_ops); + int err, ret; + err = tcf_hashinfo_init(&simp_hash_info, SIMP_TAB_MASK); + if (err) + return err; + + ret = tcf_register_action(&act_simp_ops); if (!ret) pr_info("Simple TC action Loaded\n"); + else + tcf_hashinfo_destroy(&simp_hash_info); + return ret; } static void __exit simp_cleanup_module(void) { + tcf_hashinfo_destroy(&simp_hash_info); tcf_unregister_action(&act_simp_ops); } diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 8fe9d25c300..98725080b5a 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -11,8 +11,7 @@ * more details. * * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. + * this program; if not, see <http://www.gnu.org/licenses/>. * * Author: Alexander Duyck <alexander.h.duyck@intel.com> */ @@ -29,15 +28,7 @@ #include <net/tc_act/tc_skbedit.h> #define SKBEDIT_TAB_MASK 15 -static struct tcf_common *tcf_skbedit_ht[SKBEDIT_TAB_MASK + 1]; -static u32 skbedit_idx_gen; -static DEFINE_RWLOCK(skbedit_lock); - -static struct tcf_hashinfo skbedit_hash_info = { - .htab = tcf_skbedit_ht, - .hmask = SKBEDIT_TAB_MASK, - .lock = &skbedit_lock, -}; +static struct tcf_hashinfo skbedit_hash_info; static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) @@ -109,10 +100,9 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_SKBEDIT_PARMS]); - pc = tcf_hash_check(parm->index, a, bind, &skbedit_hash_info); + pc = tcf_hash_check(parm->index, a, bind); if (!pc) { - pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, - &skbedit_idx_gen, &skbedit_hash_info); + pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind); if (IS_ERR(pc)) return PTR_ERR(pc); @@ -122,7 +112,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, d = to_skbedit(pc); if (bind) return 0; - tcf_hash_release(pc, bind, &skbedit_hash_info); + tcf_hash_release(pc, bind, a->ops->hinfo); if (!ovr) return -EEXIST; } @@ -142,7 +132,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&d->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(pc, &skbedit_hash_info); + tcf_hash_insert(pc, a->ops->hinfo); return ret; } @@ -198,7 +188,6 @@ static struct tc_action_ops act_skbedit_ops = { .kind = "skbedit", .hinfo = &skbedit_hash_info, .type = TCA_ACT_SKBEDIT, - .capab = TCA_CAP_NONE, .owner = THIS_MODULE, .act = tcf_skbedit, .dump = tcf_skbedit_dump, @@ -212,11 +201,15 @@ MODULE_LICENSE("GPL"); static int __init skbedit_init_module(void) { + int err = tcf_hashinfo_init(&skbedit_hash_info, SKBEDIT_TAB_MASK); + if (err) + return err; return tcf_register_action(&act_skbedit_ops); } static void __exit skbedit_cleanup_module(void) { + tcf_hashinfo_destroy(&skbedit_hash_info); tcf_unregister_action(&act_skbedit_ops); } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8e118af9097..29a30a14c31 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -31,8 +31,7 @@ #include <net/pkt_cls.h> /* The list of all installed classifier types */ - -static struct tcf_proto_ops *tcf_proto_base __read_mostly; +static LIST_HEAD(tcf_proto_base); /* Protects list of registered TC modules. It is pure SMP lock. */ static DEFINE_RWLOCK(cls_mod_lock); @@ -41,36 +40,35 @@ static DEFINE_RWLOCK(cls_mod_lock); static const struct tcf_proto_ops *tcf_proto_lookup_ops(struct nlattr *kind) { - const struct tcf_proto_ops *t = NULL; + const struct tcf_proto_ops *t, *res = NULL; if (kind) { read_lock(&cls_mod_lock); - for (t = tcf_proto_base; t; t = t->next) { + list_for_each_entry(t, &tcf_proto_base, head) { if (nla_strcmp(kind, t->kind) == 0) { - if (!try_module_get(t->owner)) - t = NULL; + if (try_module_get(t->owner)) + res = t; break; } } read_unlock(&cls_mod_lock); } - return t; + return res; } /* Register(unregister) new classifier type */ int register_tcf_proto_ops(struct tcf_proto_ops *ops) { - struct tcf_proto_ops *t, **tp; + struct tcf_proto_ops *t; int rc = -EEXIST; write_lock(&cls_mod_lock); - for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next) + list_for_each_entry(t, &tcf_proto_base, head) if (!strcmp(ops->kind, t->kind)) goto out; - ops->next = NULL; - *tp = ops; + list_add_tail(&ops->head, &tcf_proto_base); rc = 0; out: write_unlock(&cls_mod_lock); @@ -80,19 +78,17 @@ EXPORT_SYMBOL(register_tcf_proto_ops); int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) { - struct tcf_proto_ops *t, **tp; + struct tcf_proto_ops *t; int rc = -ENOENT; write_lock(&cls_mod_lock); - for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next) - if (t == ops) + list_for_each_entry(t, &tcf_proto_base, head) { + if (t == ops) { + list_del(&t->head); + rc = 0; break; - - if (!t) - goto out; - *tp = t->next; - rc = 0; -out: + } + } write_unlock(&cls_mod_lock); return rc; } @@ -344,7 +340,7 @@ errout: return err; } -static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, +static int tcf_fill_node(struct net *net, struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh, u32 portid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; @@ -366,7 +362,7 @@ static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, tcm->tcm_handle = fh; if (RTM_DELTFILTER != event) { tcm->tcm_handle = 0; - if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0) + if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0) goto nla_put_failure; } nlh->nlmsg_len = skb_tail_pointer(skb) - b; @@ -389,7 +385,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, if (!skb) return -ENOBUFS; - if (tcf_fill_node(skb, tp, fh, portid, n->nlmsg_seq, 0, event) <= 0) { + if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, 0, event) <= 0) { kfree_skb(skb); return -EINVAL; } @@ -408,8 +404,9 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, struct tcf_walker *arg) { struct tcf_dump_args *a = (void *)arg; + struct net *net = sock_net(a->skb->sk); - return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).portid, + return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); } @@ -467,7 +464,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (t > s_t) memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); if (cb->args[1] == 0) { - if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).portid, + if (tcf_fill_node(net, skb, tp, 0, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) break; @@ -500,46 +497,41 @@ out: void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT - if (exts->action) { - tcf_action_destroy(exts->action, TCA_ACT_UNBIND); - exts->action = NULL; - } + tcf_action_destroy(&exts->actions, TCA_ACT_UNBIND); + INIT_LIST_HEAD(&exts->actions); #endif } EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, - struct nlattr *rate_tlv, struct tcf_exts *exts, - const struct tcf_ext_map *map) + struct nlattr *rate_tlv, struct tcf_exts *exts) { - memset(exts, 0, sizeof(*exts)); - #ifdef CONFIG_NET_CLS_ACT { struct tc_action *act; - if (map->police && tb[map->police]) { - act = tcf_action_init_1(net, tb[map->police], rate_tlv, + INIT_LIST_HEAD(&exts->actions); + if (exts->police && tb[exts->police]) { + act = tcf_action_init_1(net, tb[exts->police], rate_tlv, "police", TCA_ACT_NOREPLACE, TCA_ACT_BIND); if (IS_ERR(act)) return PTR_ERR(act); - act->type = TCA_OLD_COMPAT; - exts->action = act; - } else if (map->action && tb[map->action]) { - act = tcf_action_init(net, tb[map->action], rate_tlv, + act->type = exts->type = TCA_OLD_COMPAT; + list_add(&act->list, &exts->actions); + } else if (exts->action && tb[exts->action]) { + int err; + err = tcf_action_init(net, tb[exts->action], rate_tlv, NULL, TCA_ACT_NOREPLACE, - TCA_ACT_BIND); - if (IS_ERR(act)) - return PTR_ERR(act); - - exts->action = act; + TCA_ACT_BIND, &exts->actions); + if (err) + return err; } } #else - if ((map->action && tb[map->action]) || - (map->police && tb[map->police])) + if ((exts->action && tb[exts->action]) || + (exts->police && tb[exts->police])) return -EOPNOTSUPP; #endif @@ -551,43 +543,44 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, struct tcf_exts *src) { #ifdef CONFIG_NET_CLS_ACT - if (src->action) { - struct tc_action *act; + if (!list_empty(&src->actions)) { + LIST_HEAD(tmp); tcf_tree_lock(tp); - act = dst->action; - dst->action = src->action; + list_splice_init(&dst->actions, &tmp); + list_splice(&src->actions, &dst->actions); tcf_tree_unlock(tp); - if (act) - tcf_action_destroy(act, TCA_ACT_UNBIND); + tcf_action_destroy(&tmp, TCA_ACT_UNBIND); } #endif } EXPORT_SYMBOL(tcf_exts_change); -int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts, - const struct tcf_ext_map *map) +#define tcf_exts_first_act(ext) \ + list_first_entry(&(exts)->actions, struct tc_action, list) + +int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT - if (map->action && exts->action) { + if (exts->action && !list_empty(&exts->actions)) { /* * again for backward compatible mode - we want * to work with both old and new modes of entering * tc data even if iproute2 was newer - jhs */ struct nlattr *nest; - - if (exts->action->type != TCA_OLD_COMPAT) { - nest = nla_nest_start(skb, map->action); + if (exts->type != TCA_OLD_COMPAT) { + nest = nla_nest_start(skb, exts->action); if (nest == NULL) goto nla_put_failure; - if (tcf_action_dump(skb, exts->action, 0, 0) < 0) + if (tcf_action_dump(skb, &exts->actions, 0, 0) < 0) goto nla_put_failure; nla_nest_end(skb, nest); - } else if (map->police) { - nest = nla_nest_start(skb, map->police); - if (nest == NULL) + } else if (exts->police) { + struct tc_action *act = tcf_exts_first_act(exts); + nest = nla_nest_start(skb, exts->police); + if (nest == NULL || !act) goto nla_put_failure; - if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0) + if (tcf_action_dump_old(skb, act, 0, 0) < 0) goto nla_put_failure; nla_nest_end(skb, nest); } @@ -600,17 +593,14 @@ nla_put_failure: __attribute__ ((unused)) EXPORT_SYMBOL(tcf_exts_dump); -int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts, - const struct tcf_ext_map *map) +int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT - if (exts->action) - if (tcf_action_copy_stats(skb, exts->action, 1) < 0) - goto nla_put_failure; + struct tc_action *a = tcf_exts_first_act(exts); + if (tcf_action_copy_stats(skb, a, 1) < 0) + return -1; #endif return 0; -nla_put_failure: __attribute__ ((unused)) - return -1; } EXPORT_SYMBOL(tcf_exts_dump_stats); diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 636d9131d87..e98ca99c202 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -34,16 +34,11 @@ struct basic_filter { struct list_head link; }; -static const struct tcf_ext_map basic_ext_map = { - .action = TCA_BASIC_ACT, - .police = TCA_BASIC_POLICE -}; - static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { int r; - struct basic_head *head = (struct basic_head *) tp->root; + struct basic_head *head = tp->root; struct basic_filter *f; list_for_each_entry(f, &head->flist, link) { @@ -61,7 +56,7 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, static unsigned long basic_get(struct tcf_proto *tp, u32 handle) { unsigned long l = 0UL; - struct basic_head *head = (struct basic_head *) tp->root; + struct basic_head *head = tp->root; struct basic_filter *f; if (head == NULL) @@ -112,7 +107,7 @@ static void basic_destroy(struct tcf_proto *tp) static int basic_delete(struct tcf_proto *tp, unsigned long arg) { - struct basic_head *head = (struct basic_head *) tp->root; + struct basic_head *head = tp->root; struct basic_filter *t, *f = (struct basic_filter *) arg; list_for_each_entry(t, &head->flist, link) @@ -141,7 +136,8 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, struct tcf_exts e; struct tcf_ematch_tree t; - err = tcf_exts_validate(net, tp, tb, est, &e, &basic_ext_map); + tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE); + err = tcf_exts_validate(net, tp, tb, est, &e); if (err < 0) return err; @@ -168,7 +164,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, struct nlattr **tca, unsigned long *arg) { int err; - struct basic_head *head = (struct basic_head *) tp->root; + struct basic_head *head = tp->root; struct nlattr *tb[TCA_BASIC_MAX + 1]; struct basic_filter *f = (struct basic_filter *) *arg; @@ -191,6 +187,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (f == NULL) goto errout; + tcf_exts_init(&f->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE); err = -EINVAL; if (handle) f->handle = handle; @@ -228,7 +225,7 @@ errout: static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg) { - struct basic_head *head = (struct basic_head *) tp->root; + struct basic_head *head = tp->root; struct basic_filter *f; list_for_each_entry(f, &head->flist, link) { @@ -244,7 +241,7 @@ skip: } } -static int basic_dump(struct tcf_proto *tp, unsigned long fh, +static int basic_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { struct basic_filter *f = (struct basic_filter *) fh; @@ -263,13 +260,13 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh, nla_put_u32(skb, TCA_BASIC_CLASSID, f->res.classid)) goto nla_put_failure; - if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 || + if (tcf_exts_dump(skb, &f->exts) < 0 || tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0) goto nla_put_failure; nla_nest_end(skb, nest); - if (tcf_exts_dump_stats(skb, &f->exts, &basic_ext_map) < 0) + if (tcf_exts_dump_stats(skb, &f->exts) < 0) goto nla_put_failure; return skb->len; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 1002a822628..8e3cf49118e 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -46,11 +46,6 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, }; -static const struct tcf_ext_map bpf_ext_map = { - .action = TCA_BPF_ACT, - .police = TCA_BPF_POLICE, -}; - static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { @@ -174,7 +169,8 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, if (!tb[TCA_BPF_OPS_LEN] || !tb[TCA_BPF_OPS] || !tb[TCA_BPF_CLASSID]) return -EINVAL; - ret = tcf_exts_validate(net, tp, tb, est, &exts, &bpf_ext_map); + tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); + ret = tcf_exts_validate(net, tp, tb, est, &exts); if (ret < 0) return ret; @@ -271,6 +267,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (prog == NULL) return -ENOBUFS; + tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE); if (handle == 0) prog->handle = cls_bpf_grab_new_handle(tp, head); else @@ -298,7 +295,7 @@ errout: return ret; } -static int cls_bpf_dump(struct tcf_proto *tp, unsigned long fh, +static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *tm) { struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh; @@ -323,14 +320,14 @@ static int cls_bpf_dump(struct tcf_proto *tp, unsigned long fh, if (nla == NULL) goto nla_put_failure; - memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); + memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); - if (tcf_exts_dump(skb, &prog->exts, &bpf_ext_map) < 0) + if (tcf_exts_dump(skb, &prog->exts) < 0) goto nla_put_failure; nla_nest_end(skb, nest); - if (tcf_exts_dump_stats(skb, &prog->exts, &bpf_ext_map) < 0) + if (tcf_exts_dump_stats(skb, &prog->exts) < 0) goto nla_put_failure; return skb->len; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 16006c92c3f..8e2158ab551 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -11,109 +11,13 @@ #include <linux/module.h> #include <linux/slab.h> -#include <linux/types.h> -#include <linux/string.h> -#include <linux/errno.h> #include <linux/skbuff.h> -#include <linux/cgroup.h> #include <linux/rcupdate.h> -#include <linux/fdtable.h> #include <net/rtnetlink.h> #include <net/pkt_cls.h> #include <net/sock.h> #include <net/cls_cgroup.h> -static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css) -{ - return css ? container_of(css, struct cgroup_cls_state, css) : NULL; -} - -static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p) -{ - return css_cls_state(task_css(p, net_cls_subsys_id)); -} - -static struct cgroup_subsys_state * -cgrp_css_alloc(struct cgroup_subsys_state *parent_css) -{ - struct cgroup_cls_state *cs; - - cs = kzalloc(sizeof(*cs), GFP_KERNEL); - if (!cs) - return ERR_PTR(-ENOMEM); - return &cs->css; -} - -static int cgrp_css_online(struct cgroup_subsys_state *css) -{ - struct cgroup_cls_state *cs = css_cls_state(css); - struct cgroup_cls_state *parent = css_cls_state(css_parent(css)); - - if (parent) - cs->classid = parent->classid; - return 0; -} - -static void cgrp_css_free(struct cgroup_subsys_state *css) -{ - kfree(css_cls_state(css)); -} - -static int update_classid(const void *v, struct file *file, unsigned n) -{ - int err; - struct socket *sock = sock_from_file(file, &err); - if (sock) - sock->sk->sk_classid = (u32)(unsigned long)v; - return 0; -} - -static void cgrp_attach(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset) -{ - struct task_struct *p; - struct cgroup_cls_state *cs = css_cls_state(css); - void *v = (void *)(unsigned long)cs->classid; - - cgroup_taskset_for_each(p, css, tset) { - task_lock(p); - iterate_fd(p->files, 0, update_classid, v); - task_unlock(p); - } -} - -static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft) -{ - return css_cls_state(css)->classid; -} - -static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, - u64 value) -{ - css_cls_state(css)->classid = (u32) value; - return 0; -} - -static struct cftype ss_files[] = { - { - .name = "classid", - .read_u64 = read_classid, - .write_u64 = write_classid, - }, - { } /* terminate */ -}; - -struct cgroup_subsys net_cls_subsys = { - .name = "net_cls", - .css_alloc = cgrp_css_alloc, - .css_online = cgrp_css_online, - .css_free = cgrp_css_free, - .attach = cgrp_attach, - .subsys_id = net_cls_subsys_id, - .base_cftypes = ss_files, - .module = THIS_MODULE, -}; - struct cls_cgroup_head { u32 handle; struct tcf_exts exts; @@ -172,11 +76,6 @@ static int cls_cgroup_init(struct tcf_proto *tp) return 0; } -static const struct tcf_ext_map cgroup_ext_map = { - .action = TCA_CGROUP_ACT, - .police = TCA_CGROUP_POLICE, -}; - static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, }; @@ -203,6 +102,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, if (head == NULL) return -ENOBUFS; + tcf_exts_init(&head->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); head->handle = handle; tcf_tree_lock(tp); @@ -218,8 +118,8 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, - &cgroup_ext_map); + tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e); if (err < 0) return err; @@ -264,7 +164,7 @@ skip: arg->count++; } -static int cls_cgroup_dump(struct tcf_proto *tp, unsigned long fh, +static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { struct cls_cgroup_head *head = tp->root; @@ -277,13 +177,13 @@ static int cls_cgroup_dump(struct tcf_proto *tp, unsigned long fh, if (nest == NULL) goto nla_put_failure; - if (tcf_exts_dump(skb, &head->exts, &cgroup_ext_map) < 0 || + if (tcf_exts_dump(skb, &head->exts) < 0 || tcf_em_tree_dump(skb, &head->ematches, TCA_CGROUP_EMATCHES) < 0) goto nla_put_failure; nla_nest_end(skb, nest); - if (tcf_exts_dump_stats(skb, &head->exts, &cgroup_ext_map) < 0) + if (tcf_exts_dump_stats(skb, &head->exts) < 0) goto nla_put_failure; return skb->len; @@ -309,25 +209,12 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = { static int __init init_cgroup_cls(void) { - int ret; - - ret = cgroup_load_subsys(&net_cls_subsys); - if (ret) - goto out; - - ret = register_tcf_proto_ops(&cls_cgroup_ops); - if (ret) - cgroup_unload_subsys(&net_cls_subsys); - -out: - return ret; + return register_tcf_proto_ops(&cls_cgroup_ops); } static void __exit exit_cgroup_cls(void) { unregister_tcf_proto_ops(&cls_cgroup_ops); - - cgroup_unload_subsys(&net_cls_subsys); } module_init(init_cgroup_cls); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 7881e2fccbc..257029c5433 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -56,11 +56,6 @@ struct flow_filter { u32 hashrnd; }; -static const struct tcf_ext_map flow_ext_map = { - .action = TCA_FLOW_ACT, - .police = TCA_FLOW_POLICE, -}; - static inline u32 addr_fold(void *addr) { unsigned long a = (unsigned long)addr; @@ -220,7 +215,7 @@ static u32 flow_get_vlan_tag(const struct sk_buff *skb) static u32 flow_get_rxhash(struct sk_buff *skb) { - return skb_get_rxhash(skb); + return skb_get_hash(skb); } static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow) @@ -397,7 +392,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, return -EOPNOTSUPP; } - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &flow_ext_map); + tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e); if (err < 0) return err; @@ -455,6 +451,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, f->handle = handle; f->mask = ~0U; + tcf_exts_init(&f->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); get_random_bytes(&f->hashrnd, 4); f->perturb_timer.function = flow_perturbation; @@ -566,7 +563,7 @@ static void flow_put(struct tcf_proto *tp, unsigned long f) { } -static int flow_dump(struct tcf_proto *tp, unsigned long fh, +static int flow_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { struct flow_filter *f = (struct flow_filter *)fh; @@ -608,7 +605,7 @@ static int flow_dump(struct tcf_proto *tp, unsigned long fh, nla_put_u32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ)) goto nla_put_failure; - if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0) + if (tcf_exts_dump(skb, &f->exts) < 0) goto nla_put_failure; #ifdef CONFIG_NET_EMATCH if (f->ematches.hdr.nmatches && @@ -617,7 +614,7 @@ static int flow_dump(struct tcf_proto *tp, unsigned long fh, #endif nla_nest_end(skb, nest); - if (tcf_exts_dump_stats(skb, &f->exts, &flow_ext_map) < 0) + if (tcf_exts_dump_stats(skb, &f->exts) < 0) goto nla_put_failure; return skb->len; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 9b97172db84..a366537f82c 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -41,16 +41,11 @@ struct fw_filter { u32 id; struct tcf_result res; #ifdef CONFIG_NET_CLS_IND - char indev[IFNAMSIZ]; + int ifindex; #endif /* CONFIG_NET_CLS_IND */ struct tcf_exts exts; }; -static const struct tcf_ext_map fw_ext_map = { - .action = TCA_FW_ACT, - .police = TCA_FW_POLICE -}; - static inline int fw_hash(u32 handle) { if (HTSIZE == 4096) @@ -80,7 +75,7 @@ static inline int fw_hash(u32 handle) static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - struct fw_head *head = (struct fw_head *)tp->root; + struct fw_head *head = tp->root; struct fw_filter *f; int r; u32 id = skb->mark; @@ -91,7 +86,7 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, if (f->id == id) { *res = f->res; #ifdef CONFIG_NET_CLS_IND - if (!tcf_match_indev(skb, f->indev)) + if (!tcf_match_indev(skb, f->ifindex)) continue; #endif /* CONFIG_NET_CLS_IND */ r = tcf_exts_exec(skb, &f->exts, res); @@ -116,7 +111,7 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, static unsigned long fw_get(struct tcf_proto *tp, u32 handle) { - struct fw_head *head = (struct fw_head *)tp->root; + struct fw_head *head = tp->root; struct fw_filter *f; if (head == NULL) @@ -165,7 +160,7 @@ static void fw_destroy(struct tcf_proto *tp) static int fw_delete(struct tcf_proto *tp, unsigned long arg) { - struct fw_head *head = (struct fw_head *)tp->root; + struct fw_head *head = tp->root; struct fw_filter *f = (struct fw_filter *)arg; struct fw_filter **fp; @@ -195,12 +190,13 @@ static int fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, struct nlattr **tb, struct nlattr **tca, unsigned long base) { - struct fw_head *head = (struct fw_head *)tp->root; + struct fw_head *head = tp->root; struct tcf_exts e; u32 mask; int err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &fw_ext_map); + tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e); if (err < 0) return err; @@ -211,9 +207,13 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, #ifdef CONFIG_NET_CLS_IND if (tb[TCA_FW_INDEV]) { - err = tcf_change_indev(tp, f->indev, tb[TCA_FW_INDEV]); - if (err < 0) + int ret; + ret = tcf_change_indev(net, tb[TCA_FW_INDEV]); + if (ret < 0) { + err = ret; goto errout; + } + f->ifindex = ret; } #endif /* CONFIG_NET_CLS_IND */ @@ -239,7 +239,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, struct nlattr **tca, unsigned long *arg) { - struct fw_head *head = (struct fw_head *)tp->root; + struct fw_head *head = tp->root; struct fw_filter *f = (struct fw_filter *) *arg; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_FW_MAX + 1]; @@ -280,6 +280,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, if (f == NULL) return -ENOBUFS; + tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE); f->id = handle; err = fw_change_attrs(net, tp, f, tb, tca, base); @@ -301,7 +302,7 @@ errout: static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) { - struct fw_head *head = (struct fw_head *)tp->root; + struct fw_head *head = tp->root; int h; if (head == NULL) @@ -327,10 +328,10 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } -static int fw_dump(struct tcf_proto *tp, unsigned long fh, +static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { - struct fw_head *head = (struct fw_head *)tp->root; + struct fw_head *head = tp->root; struct fw_filter *f = (struct fw_filter *)fh; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; @@ -351,20 +352,23 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh, nla_put_u32(skb, TCA_FW_CLASSID, f->res.classid)) goto nla_put_failure; #ifdef CONFIG_NET_CLS_IND - if (strlen(f->indev) && - nla_put_string(skb, TCA_FW_INDEV, f->indev)) - goto nla_put_failure; + if (f->ifindex) { + struct net_device *dev; + dev = __dev_get_by_index(net, f->ifindex); + if (dev && nla_put_string(skb, TCA_FW_INDEV, dev->name)) + goto nla_put_failure; + } #endif /* CONFIG_NET_CLS_IND */ if (head->mask != 0xFFFFFFFF && nla_put_u32(skb, TCA_FW_MASK, head->mask)) goto nla_put_failure; - if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0) + if (tcf_exts_dump(skb, &f->exts) < 0) goto nla_put_failure; nla_nest_end(skb, nest); - if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0) + if (tcf_exts_dump_stats(skb, &f->exts) < 0) goto nla_put_failure; return skb->len; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 37da567d833..1ad3068f2ce 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -59,11 +59,6 @@ struct route4_filter { #define ROUTE4_FAILURE ((struct route4_filter *)(-1L)) -static const struct tcf_ext_map route_ext_map = { - .police = TCA_ROUTE4_POLICE, - .action = TCA_ROUTE4_ACT -}; - static inline int route4_fastmap_hash(u32 id, int iif) { return id & 0xF; @@ -128,7 +123,7 @@ static inline int route4_hash_wild(void) static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - struct route4_head *head = (struct route4_head *)tp->root; + struct route4_head *head = tp->root; struct dst_entry *dst; struct route4_bucket *b; struct route4_filter *f; @@ -218,7 +213,7 @@ static inline u32 from_hash(u32 id) static unsigned long route4_get(struct tcf_proto *tp, u32 handle) { - struct route4_head *head = (struct route4_head *)tp->root; + struct route4_head *head = tp->root; struct route4_bucket *b; struct route4_filter *f; unsigned int h1, h2; @@ -289,7 +284,7 @@ static void route4_destroy(struct tcf_proto *tp) static int route4_delete(struct tcf_proto *tp, unsigned long arg) { - struct route4_head *head = (struct route4_head *)tp->root; + struct route4_head *head = tp->root; struct route4_filter **fp, *f = (struct route4_filter *)arg; unsigned int h = 0; struct route4_bucket *b; @@ -347,7 +342,8 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, struct route4_bucket *b; struct tcf_exts e; - err = tcf_exts_validate(net, tp, tb, est, &e, &route_ext_map); + tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); + err = tcf_exts_validate(net, tp, tb, est, &e); if (err < 0) return err; @@ -481,6 +477,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, if (f == NULL) goto errout; + tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); err = route4_set_parms(net, tp, base, f, handle, head, tb, tca[TCA_RATE], 1); if (err < 0) @@ -554,7 +551,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } -static int route4_dump(struct tcf_proto *tp, unsigned long fh, +static int route4_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { struct route4_filter *f = (struct route4_filter *)fh; @@ -589,12 +586,12 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh, nla_put_u32(skb, TCA_ROUTE4_CLASSID, f->res.classid)) goto nla_put_failure; - if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0) + if (tcf_exts_dump(skb, &f->exts) < 0) goto nla_put_failure; nla_nest_end(skb, nest); - if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0) + if (tcf_exts_dump_stats(skb, &f->exts) < 0) goto nla_put_failure; return skb->len; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 252d8b05872..19f8e5dfa8b 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -116,11 +116,6 @@ static inline unsigned int hash_src(__be32 *src) return h & 0xF; } -static struct tcf_ext_map rsvp_ext_map = { - .police = TCA_RSVP_POLICE, - .action = TCA_RSVP_ACT -}; - #define RSVP_APPLY_RESULT() \ { \ int r = tcf_exts_exec(skb, &f->exts, res); \ @@ -440,7 +435,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map); + tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e); if (err < 0) return err; @@ -471,6 +467,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, if (f == NULL) goto errout2; + tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE); h2 = 16; if (tb[TCA_RSVP_SRC]) { memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src)); @@ -597,7 +594,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } -static int rsvp_dump(struct tcf_proto *tp, unsigned long fh, +static int rsvp_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { struct rsvp_filter *f = (struct rsvp_filter *)fh; @@ -633,12 +630,12 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh, nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src)) goto nla_put_failure; - if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0) + if (tcf_exts_dump(skb, &f->exts) < 0) goto nla_put_failure; nla_nest_end(skb, nest); - if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0) + if (tcf_exts_dump_stats(skb, &f->exts) < 0) goto nla_put_failure; return skb->len; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index b86535a4016..eed8404443d 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -24,9 +24,6 @@ #define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */ -#define PRIV(tp) ((struct tcindex_data *) (tp)->root) - - struct tcindex_filter_result { struct tcf_exts exts; struct tcf_result res; @@ -50,11 +47,6 @@ struct tcindex_data { int fall_through; /* 0: only classify if explicit match */ }; -static const struct tcf_ext_map tcindex_ext_map = { - .police = TCA_TCINDEX_POLICE, - .action = TCA_TCINDEX_ACT -}; - static inline int tcindex_filter_is_set(struct tcindex_filter_result *r) { @@ -82,7 +74,7 @@ tcindex_lookup(struct tcindex_data *p, u16 key) static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - struct tcindex_data *p = PRIV(tp); + struct tcindex_data *p = tp->root; struct tcindex_filter_result *f; int key = (skb->tc_index & p->mask) >> p->shift; @@ -107,7 +99,7 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp, static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle) { - struct tcindex_data *p = PRIV(tp); + struct tcindex_data *p = tp->root; struct tcindex_filter_result *r; pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle); @@ -145,7 +137,7 @@ static int tcindex_init(struct tcf_proto *tp) static int __tcindex_delete(struct tcf_proto *tp, unsigned long arg, int lock) { - struct tcindex_data *p = PRIV(tp); + struct tcindex_data *p = tp->root; struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg; struct tcindex_filter *f = NULL; @@ -209,17 +201,21 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tcindex_filter *f = NULL; /* make gcc behave */ struct tcf_exts e; - err = tcf_exts_validate(net, tp, tb, est, &e, &tcindex_ext_map); + tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); + err = tcf_exts_validate(net, tp, tb, est, &e); if (err < 0) return err; memcpy(&cp, p, sizeof(cp)); memset(&new_filter_result, 0, sizeof(new_filter_result)); + tcf_exts_init(&new_filter_result.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (old_r) memcpy(&cr, r, sizeof(cr)); - else + else { memset(&cr, 0, sizeof(cr)); + tcf_exts_init(&cr.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); + } if (tb[TCA_TCINDEX_HASH]) cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); @@ -339,7 +335,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, { struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_TCINDEX_MAX + 1]; - struct tcindex_data *p = PRIV(tp); + struct tcindex_data *p = tp->root; struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg; int err; @@ -361,7 +357,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) { - struct tcindex_data *p = PRIV(tp); + struct tcindex_data *p = tp->root; struct tcindex_filter *f, *next; int i; @@ -408,7 +404,7 @@ static int tcindex_destroy_element(struct tcf_proto *tp, static void tcindex_destroy(struct tcf_proto *tp) { - struct tcindex_data *p = PRIV(tp); + struct tcindex_data *p = tp->root; struct tcf_walker walker; pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p); @@ -423,10 +419,10 @@ static void tcindex_destroy(struct tcf_proto *tp) } -static int tcindex_dump(struct tcf_proto *tp, unsigned long fh, +static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { - struct tcindex_data *p = PRIV(tp); + struct tcindex_data *p = tp->root; struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; @@ -468,11 +464,11 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh, nla_put_u32(skb, TCA_TCINDEX_CLASSID, r->res.classid)) goto nla_put_failure; - if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0) + if (tcf_exts_dump(skb, &r->exts) < 0) goto nla_put_failure; nla_nest_end(skb, nest); - if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0) + if (tcf_exts_dump_stats(skb, &r->exts) < 0) goto nla_put_failure; } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index eb07a1e536e..84c28daff84 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -48,7 +48,7 @@ struct tc_u_knode { struct tc_u_hnode *ht_up; struct tcf_exts exts; #ifdef CONFIG_NET_CLS_IND - char indev[IFNAMSIZ]; + int ifindex; #endif u8 fshift; struct tcf_result res; @@ -79,11 +79,6 @@ struct tc_u_common { u32 hgenerator; }; -static const struct tcf_ext_map u32_ext_map = { - .action = TCA_U32_ACT, - .police = TCA_U32_POLICE -}; - static inline unsigned int u32_hash_fold(__be32 key, const struct tc_u32_sel *sel, u8 fshift) @@ -100,7 +95,7 @@ static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct unsigned int off; } stack[TC_U32_MAXDEPTH]; - struct tc_u_hnode *ht = (struct tc_u_hnode *)tp->root; + struct tc_u_hnode *ht = tp->root; unsigned int off = skb_network_offset(skb); struct tc_u_knode *n; int sdepth = 0; @@ -157,7 +152,7 @@ check_terminal: *res = n->res; #ifdef CONFIG_NET_CLS_IND - if (!tcf_match_indev(skb, n->indev)) { + if (!tcf_match_indev(skb, n->ifindex)) { n = n->next; goto next_knode; } @@ -352,7 +347,7 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n) return 0; } -static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key) +static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) { struct tc_u_knode **kp; struct tc_u_hnode *ht = key->ht_up; @@ -496,7 +491,8 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, int err; struct tcf_exts e; - err = tcf_exts_validate(net, tp, tb, est, &e, &u32_ext_map); + tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE); + err = tcf_exts_validate(net, tp, tb, est, &e); if (err < 0) return err; @@ -531,9 +527,11 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, #ifdef CONFIG_NET_CLS_IND if (tb[TCA_U32_INDEV]) { - err = tcf_change_indev(tp, n->indev, tb[TCA_U32_INDEV]); - if (err < 0) + int ret; + ret = tcf_change_indev(net, tb[TCA_U32_INDEV]); + if (ret < 0) goto errout; + n->ifindex = ret; } #endif tcf_exts_change(tp, &n->exts, &e); @@ -646,6 +644,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, n->ht_up = ht; n->handle = handle; n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; + tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE); #ifdef CONFIG_CLS_U32_MARK if (tb[TCA_U32_MARK]) { @@ -715,7 +714,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } -static int u32_dump(struct tcf_proto *tp, unsigned long fh, +static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { struct tc_u_knode *n = (struct tc_u_knode *)fh; @@ -759,13 +758,16 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh, goto nla_put_failure; #endif - if (tcf_exts_dump(skb, &n->exts, &u32_ext_map) < 0) + if (tcf_exts_dump(skb, &n->exts) < 0) goto nla_put_failure; #ifdef CONFIG_NET_CLS_IND - if (strlen(n->indev) && - nla_put_string(skb, TCA_U32_INDEV, n->indev)) - goto nla_put_failure; + if (n->ifindex) { + struct net_device *dev; + dev = __dev_get_by_index(net, n->ifindex); + if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name)) + goto nla_put_failure; + } #endif #ifdef CONFIG_CLS_U32_PERF if (nla_put(skb, TCA_U32_PCNT, @@ -778,7 +780,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh, nla_nest_end(skb, nest); if (TC_U32_KEY(n->handle)) - if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0) + if (tcf_exts_dump_stats(skb, &n->exts) < 0) goto nla_put_failure; return skb->len; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index e5cef956722..9b8c0b0e60d 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -222,7 +222,7 @@ META_COLLECTOR(int_maclen) META_COLLECTOR(int_rxhash) { - dst->value = skb_get_rxhash(skb); + dst->value = skb_get_hash(skb); } /************************************************************************** @@ -271,40 +271,52 @@ META_COLLECTOR(int_rtiif) * Socket Attributes **************************************************************************/ -#define SKIP_NONLOCAL(skb) \ - if (unlikely(skb->sk == NULL)) { \ - *err = -1; \ - return; \ - } +#define skip_nonlocal(skb) \ + (unlikely(skb->sk == NULL)) META_COLLECTOR(int_sk_family) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_family; } META_COLLECTOR(int_sk_state) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_state; } META_COLLECTOR(int_sk_reuse) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_reuse; } META_COLLECTOR(int_sk_bound_if) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } /* No error if bound_dev_if is 0, legal userspace check */ dst->value = skb->sk->sk_bound_dev_if; } META_COLLECTOR(var_sk_bound_if) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } if (skb->sk->sk_bound_dev_if == 0) { dst->value = (unsigned long) "any"; @@ -322,151 +334,226 @@ META_COLLECTOR(var_sk_bound_if) META_COLLECTOR(int_sk_refcnt) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = atomic_read(&skb->sk->sk_refcnt); } META_COLLECTOR(int_sk_rcvbuf) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_rcvbuf; } META_COLLECTOR(int_sk_shutdown) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_shutdown; } META_COLLECTOR(int_sk_proto) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_protocol; } META_COLLECTOR(int_sk_type) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_type; } META_COLLECTOR(int_sk_rmem_alloc) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = sk_rmem_alloc_get(skb->sk); } META_COLLECTOR(int_sk_wmem_alloc) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = sk_wmem_alloc_get(skb->sk); } META_COLLECTOR(int_sk_omem_alloc) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = atomic_read(&skb->sk->sk_omem_alloc); } META_COLLECTOR(int_sk_rcv_qlen) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_receive_queue.qlen; } META_COLLECTOR(int_sk_snd_qlen) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_write_queue.qlen; } META_COLLECTOR(int_sk_wmem_queued) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_wmem_queued; } META_COLLECTOR(int_sk_fwd_alloc) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_forward_alloc; } META_COLLECTOR(int_sk_sndbuf) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_sndbuf; } META_COLLECTOR(int_sk_alloc) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = (__force int) skb->sk->sk_allocation; } META_COLLECTOR(int_sk_hash) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_hash; } META_COLLECTOR(int_sk_lingertime) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_lingertime / HZ; } META_COLLECTOR(int_sk_err_qlen) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_error_queue.qlen; } META_COLLECTOR(int_sk_ack_bl) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_ack_backlog; } META_COLLECTOR(int_sk_max_ack_bl) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_max_ack_backlog; } META_COLLECTOR(int_sk_prio) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_priority; } META_COLLECTOR(int_sk_rcvlowat) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_rcvlowat; } META_COLLECTOR(int_sk_rcvtimeo) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_rcvtimeo / HZ; } META_COLLECTOR(int_sk_sndtimeo) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_sndtimeo / HZ; } META_COLLECTOR(int_sk_sendmsg_off) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_frag.offset; } META_COLLECTOR(int_sk_write_pend) { - SKIP_NONLOCAL(skb); + if (skip_nonlocal(skb)) { + *err = -1; + return; + } dst->value = skb->sk->sk_write_pending; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index cd81505662b..1313145e3b8 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -135,7 +135,7 @@ static DEFINE_RWLOCK(qdisc_mod_lock); static struct Qdisc_ops *qdisc_base; -/* Register/uregister queueing discipline */ +/* Register/unregister queueing discipline */ int register_qdisc(struct Qdisc_ops *qops) { @@ -271,11 +271,15 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) return NULL; } -static void qdisc_list_add(struct Qdisc *q) +void qdisc_list_add(struct Qdisc *q) { + struct Qdisc *root = qdisc_dev(q)->qdisc; + + WARN_ON_ONCE(root == &noop_qdisc); if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) - list_add_tail(&q->list, &qdisc_dev(q)->qdisc->list); + list_add_tail(&q->list, &root->list); } +EXPORT_SYMBOL(qdisc_list_add); void qdisc_list_del(struct Qdisc *q) { diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 7a42c81a19e..2f80d01d42a 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1058,9 +1058,10 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/ q->quanta[prio]; } - if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) { - pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n", - cl->common.classid, cl->quantum); + if (cl->quantum <= 0 || + cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) { + pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n", + cl->common.classid, cl->quantum); cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1; } } @@ -1782,8 +1783,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); if (err) { - if (rtab) - qdisc_put_rtab(rtab); + qdisc_put_rtab(rtab); return err; } } diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index ddd73cb2d7b..2aee02802c2 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -14,7 +14,6 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/skbuff.h> -#include <linux/reciprocal_div.h> #include <linux/vmalloc.h> #include <net/pkt_sched.h> #include <net/inet_ecn.h> @@ -77,12 +76,6 @@ struct choke_sched_data { struct sk_buff **tab; }; -/* deliver a random number between 0 and N - 1 */ -static u32 random_N(unsigned int N) -{ - return reciprocal_divide(prandom_u32(), N); -} - /* number of elements in queue including holes */ static unsigned int choke_len(const struct choke_sched_data *q) { @@ -233,7 +226,7 @@ static struct sk_buff *choke_peek_random(const struct choke_sched_data *q, int retrys = 3; do { - *pidx = (q->head + random_N(choke_len(q))) & q->tab_mask; + *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask; skb = q->tab[*pidx]; if (skb) return skb; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 3886365cc20..49d6ef338b5 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -47,7 +47,7 @@ struct dsmark_qdisc_data { static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index) { - return (index <= p->indices && index > 0); + return index <= p->indices && index > 0; } /* ------------------------- Class/flow operations ------------------------- */ @@ -57,8 +57,8 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, { struct dsmark_qdisc_data *p = qdisc_priv(sch); - pr_debug("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n", - sch, p, new, old); + pr_debug("%s(sch %p,[qdisc %p],new %p,old %p)\n", + __func__, sch, p, new, old); if (new == NULL) { new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, @@ -85,8 +85,8 @@ static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg) static unsigned long dsmark_get(struct Qdisc *sch, u32 classid) { - pr_debug("dsmark_get(sch %p,[qdisc %p],classid %x)\n", - sch, qdisc_priv(sch), classid); + pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", + __func__, sch, qdisc_priv(sch), classid); return TC_H_MIN(classid) + 1; } @@ -118,8 +118,8 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, int err = -EINVAL; u8 mask = 0; - pr_debug("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x)," - "arg 0x%lx\n", sch, p, classid, parent, *arg); + pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n", + __func__, sch, p, classid, parent, *arg); if (!dsmark_valid_index(p, *arg)) { err = -ENOENT; @@ -166,7 +166,8 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker) struct dsmark_qdisc_data *p = qdisc_priv(sch); int i; - pr_debug("dsmark_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker); + pr_debug("%s(sch %p,[qdisc %p],walker %p)\n", + __func__, sch, p, walker); if (walker->stop) return; @@ -199,7 +200,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) struct dsmark_qdisc_data *p = qdisc_priv(sch); int err; - pr_debug("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); + pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p); if (p->set_tc_index) { switch (skb->protocol) { @@ -275,7 +276,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) struct sk_buff *skb; u32 index; - pr_debug("dsmark_dequeue(sch %p,[qdisc %p])\n", sch, p); + pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); skb = p->q->ops->dequeue(p->q); if (skb == NULL) @@ -303,8 +304,8 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) * and don't need yet another qdisc as a bypass. */ if (p->mask[index] != 0xff || p->value[index]) - pr_warning("dsmark_dequeue: unsupported protocol %d\n", - ntohs(skb->protocol)); + pr_warn("%s: unsupported protocol %d\n", + __func__, ntohs(skb->protocol)); break; } @@ -315,7 +316,7 @@ static struct sk_buff *dsmark_peek(struct Qdisc *sch) { struct dsmark_qdisc_data *p = qdisc_priv(sch); - pr_debug("dsmark_peek(sch %p,[qdisc %p])\n", sch, p); + pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); return p->q->ops->peek(p->q); } @@ -325,7 +326,7 @@ static unsigned int dsmark_drop(struct Qdisc *sch) struct dsmark_qdisc_data *p = qdisc_priv(sch); unsigned int len; - pr_debug("dsmark_reset(sch %p,[qdisc %p])\n", sch, p); + pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); if (p->q->ops->drop == NULL) return 0; @@ -346,7 +347,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) u16 indices; u8 *mask; - pr_debug("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); + pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt); if (!opt) goto errout; @@ -384,7 +385,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) if (p->q == NULL) p->q = &noop_qdisc; - pr_debug("dsmark_init: qdisc %p\n", p->q); + pr_debug("%s: qdisc %p\n", __func__, p->q); err = 0; errout: @@ -395,7 +396,7 @@ static void dsmark_reset(struct Qdisc *sch) { struct dsmark_qdisc_data *p = qdisc_priv(sch); - pr_debug("dsmark_reset(sch %p,[qdisc %p])\n", sch, p); + pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); qdisc_reset(p->q); sch->q.qlen = 0; } @@ -404,7 +405,7 @@ static void dsmark_destroy(struct Qdisc *sch) { struct dsmark_qdisc_data *p = qdisc_priv(sch); - pr_debug("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p); + pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); tcf_destroy_chain(&p->filter_list); qdisc_destroy(p->q); @@ -417,7 +418,7 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, struct dsmark_qdisc_data *p = qdisc_priv(sch); struct nlattr *opts = NULL; - pr_debug("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n", sch, p, cl); + pr_debug("%s(sch %p,[qdisc %p],class %ld\n", __func__, sch, p, cl); if (!dsmark_valid_index(p, cl)) return -EINVAL; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 95d84396190..08ef7a42c0e 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -47,6 +47,7 @@ #include <linux/rbtree.h> #include <linux/hash.h> #include <linux/prefetch.h> +#include <linux/vmalloc.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/sock.h> @@ -225,7 +226,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) /* By forcing low order bit to 1, we make sure to not * collide with a local flow (socket pointers are word aligned) */ - sk = (struct sock *)(skb_get_rxhash(skb) | 1L); + sk = (struct sock *)(skb_get_hash(skb) | 1L); } root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)]; @@ -578,15 +579,36 @@ static void fq_rehash(struct fq_sched_data *q, q->stat_gc_flows += fcnt; } -static int fq_resize(struct fq_sched_data *q, u32 log) +static void *fq_alloc_node(size_t sz, int node) { + void *ptr; + + ptr = kmalloc_node(sz, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN, node); + if (!ptr) + ptr = vmalloc_node(sz, node); + return ptr; +} + +static void fq_free(void *addr) +{ + if (addr && is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); +} + +static int fq_resize(struct Qdisc *sch, u32 log) +{ + struct fq_sched_data *q = qdisc_priv(sch); struct rb_root *array; u32 idx; if (q->fq_root && log == q->fq_trees_log) return 0; - array = kmalloc(sizeof(struct rb_root) << log, GFP_KERNEL); + /* If XPS was setup, we can allocate memory on right NUMA node */ + array = fq_alloc_node(sizeof(struct rb_root) << log, + netdev_queue_numa_node_read(sch->dev_queue)); if (!array) return -ENOMEM; @@ -595,7 +617,7 @@ static int fq_resize(struct fq_sched_data *q, u32 log) if (q->fq_root) { fq_rehash(q, q->fq_root, q->fq_trees_log, array, log); - kfree(q->fq_root); + fq_free(q->fq_root); } q->fq_root = array; q->fq_trees_log = log; @@ -676,7 +698,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) } if (!err) - err = fq_resize(q, fq_log); + err = fq_resize(sch, fq_log); while (sch->q.qlen > sch->limit) { struct sk_buff *skb = fq_dequeue(sch); @@ -697,7 +719,7 @@ static void fq_destroy(struct Qdisc *sch) struct fq_sched_data *q = qdisc_priv(sch); fq_reset(sch); - kfree(q->fq_root); + fq_free(q->fq_root); qdisc_watchdog_cancel(&q->watchdog); } @@ -723,7 +745,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt) if (opt) err = fq_change(sch, opt); else - err = fq_resize(q, q->fq_trees_log); + err = fq_resize(sch, q->fq_trees_log); return err; } diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 55786283a3d..ba5bc929eac 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -390,7 +390,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) sch->limit = 10*1024; q->flows_cnt = 1024; q->quantum = psched_mtu(qdisc_dev(sch)); - q->perturbation = net_random(); + q->perturbation = prandom_u32(); INIT_LIST_HEAD(&q->new_flows); INIT_LIST_HEAD(&q->old_flows); codel_params_init(&q->cparams); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 7fc899a943a..e82e43b69c3 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -338,13 +338,13 @@ EXPORT_SYMBOL(netif_carrier_off); cheaper. */ -static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc) +static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc) { kfree_skb(skb); return NET_XMIT_CN; } -static struct sk_buff *noop_dequeue(struct Qdisc * qdisc) +static struct sk_buff *noop_dequeue(struct Qdisc *qdisc) { return NULL; } @@ -718,8 +718,8 @@ static void attach_default_qdiscs(struct net_device *dev) } else { qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT); if (qdisc) { - qdisc->ops->attach(qdisc); dev->qdisc = qdisc; + qdisc->ops->attach(qdisc); } } } diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index d42234c0f13..12cbc09157f 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -370,8 +370,8 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps) for (i = table->DPs; i < MAX_DPs; i++) { if (table->tab[i]) { - pr_warning("GRED: Warning: Destroying " - "shadowed VQ 0x%x\n", i); + pr_warn("GRED: Warning: Destroying shadowed VQ 0x%x\n", + i); gred_destroy_vq(table->tab[i]); table->tab[i] = NULL; } diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c new file mode 100644 index 00000000000..647680b1c62 --- /dev/null +++ b/net/sched/sch_hhf.c @@ -0,0 +1,745 @@ +/* net/sched/sch_hhf.c Heavy-Hitter Filter (HHF) + * + * Copyright (C) 2013 Terry Lam <vtlam@google.com> + * Copyright (C) 2013 Nandita Dukkipati <nanditad@google.com> + */ + +#include <linux/jhash.h> +#include <linux/jiffies.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/vmalloc.h> +#include <net/flow_keys.h> +#include <net/pkt_sched.h> +#include <net/sock.h> + +/* Heavy-Hitter Filter (HHF) + * + * Principles : + * Flows are classified into two buckets: non-heavy-hitter and heavy-hitter + * buckets. Initially, a new flow starts as non-heavy-hitter. Once classified + * as heavy-hitter, it is immediately switched to the heavy-hitter bucket. + * The buckets are dequeued by a Weighted Deficit Round Robin (WDRR) scheduler, + * in which the heavy-hitter bucket is served with less weight. + * In other words, non-heavy-hitters (e.g., short bursts of critical traffic) + * are isolated from heavy-hitters (e.g., persistent bulk traffic) and also have + * higher share of bandwidth. + * + * To capture heavy-hitters, we use the "multi-stage filter" algorithm in the + * following paper: + * [EV02] C. Estan and G. Varghese, "New Directions in Traffic Measurement and + * Accounting", in ACM SIGCOMM, 2002. + * + * Conceptually, a multi-stage filter comprises k independent hash functions + * and k counter arrays. Packets are indexed into k counter arrays by k hash + * functions, respectively. The counters are then increased by the packet sizes. + * Therefore, + * - For a heavy-hitter flow: *all* of its k array counters must be large. + * - For a non-heavy-hitter flow: some of its k array counters can be large + * due to hash collision with other small flows; however, with high + * probability, not *all* k counters are large. + * + * By the design of the multi-stage filter algorithm, the false negative rate + * (heavy-hitters getting away uncaptured) is zero. However, the algorithm is + * susceptible to false positives (non-heavy-hitters mistakenly classified as + * heavy-hitters). + * Therefore, we also implement the following optimizations to reduce false + * positives by avoiding unnecessary increment of the counter values: + * - Optimization O1: once a heavy-hitter is identified, its bytes are not + * accounted in the array counters. This technique is called "shielding" + * in Section 3.3.1 of [EV02]. + * - Optimization O2: conservative update of counters + * (Section 3.3.2 of [EV02]), + * New counter value = max {old counter value, + * smallest counter value + packet bytes} + * + * Finally, we refresh the counters periodically since otherwise the counter + * values will keep accumulating. + * + * Once a flow is classified as heavy-hitter, we also save its per-flow state + * in an exact-matching flow table so that its subsequent packets can be + * dispatched to the heavy-hitter bucket accordingly. + * + * + * At a high level, this qdisc works as follows: + * Given a packet p: + * - If the flow-id of p (e.g., TCP 5-tuple) is already in the exact-matching + * heavy-hitter flow table, denoted table T, then send p to the heavy-hitter + * bucket. + * - Otherwise, forward p to the multi-stage filter, denoted filter F + * + If F decides that p belongs to a non-heavy-hitter flow, then send p + * to the non-heavy-hitter bucket. + * + Otherwise, if F decides that p belongs to a new heavy-hitter flow, + * then set up a new flow entry for the flow-id of p in the table T and + * send p to the heavy-hitter bucket. + * + * In this implementation: + * - T is a fixed-size hash-table with 1024 entries. Hash collision is + * resolved by linked-list chaining. + * - F has four counter arrays, each array containing 1024 32-bit counters. + * That means 4 * 1024 * 32 bits = 16KB of memory. + * - Since each array in F contains 1024 counters, 10 bits are sufficient to + * index into each array. + * Hence, instead of having four hash functions, we chop the 32-bit + * skb-hash into three 10-bit chunks, and the remaining 10-bit chunk is + * computed as XOR sum of those three chunks. + * - We need to clear the counter arrays periodically; however, directly + * memsetting 16KB of memory can lead to cache eviction and unwanted delay. + * So by representing each counter by a valid bit, we only need to reset + * 4K of 1 bit (i.e. 512 bytes) instead of 16KB of memory. + * - The Deficit Round Robin engine is taken from fq_codel implementation + * (net/sched/sch_fq_codel.c). Note that wdrr_bucket corresponds to + * fq_codel_flow in fq_codel implementation. + * + */ + +/* Non-configurable parameters */ +#define HH_FLOWS_CNT 1024 /* number of entries in exact-matching table T */ +#define HHF_ARRAYS_CNT 4 /* number of arrays in multi-stage filter F */ +#define HHF_ARRAYS_LEN 1024 /* number of counters in each array of F */ +#define HHF_BIT_MASK_LEN 10 /* masking 10 bits */ +#define HHF_BIT_MASK 0x3FF /* bitmask of 10 bits */ + +#define WDRR_BUCKET_CNT 2 /* two buckets for Weighted DRR */ +enum wdrr_bucket_idx { + WDRR_BUCKET_FOR_HH = 0, /* bucket id for heavy-hitters */ + WDRR_BUCKET_FOR_NON_HH = 1 /* bucket id for non-heavy-hitters */ +}; + +#define hhf_time_before(a, b) \ + (typecheck(u32, a) && typecheck(u32, b) && ((s32)((a) - (b)) < 0)) + +/* Heavy-hitter per-flow state */ +struct hh_flow_state { + u32 hash_id; /* hash of flow-id (e.g. TCP 5-tuple) */ + u32 hit_timestamp; /* last time heavy-hitter was seen */ + struct list_head flowchain; /* chaining under hash collision */ +}; + +/* Weighted Deficit Round Robin (WDRR) scheduler */ +struct wdrr_bucket { + struct sk_buff *head; + struct sk_buff *tail; + struct list_head bucketchain; + int deficit; +}; + +struct hhf_sched_data { + struct wdrr_bucket buckets[WDRR_BUCKET_CNT]; + u32 perturbation; /* hash perturbation */ + u32 quantum; /* psched_mtu(qdisc_dev(sch)); */ + u32 drop_overlimit; /* number of times max qdisc packet + * limit was hit + */ + struct list_head *hh_flows; /* table T (currently active HHs) */ + u32 hh_flows_limit; /* max active HH allocs */ + u32 hh_flows_overlimit; /* num of disallowed HH allocs */ + u32 hh_flows_total_cnt; /* total admitted HHs */ + u32 hh_flows_current_cnt; /* total current HHs */ + u32 *hhf_arrays[HHF_ARRAYS_CNT]; /* HH filter F */ + u32 hhf_arrays_reset_timestamp; /* last time hhf_arrays + * was reset + */ + unsigned long *hhf_valid_bits[HHF_ARRAYS_CNT]; /* shadow valid bits + * of hhf_arrays + */ + /* Similar to the "new_flows" vs. "old_flows" concept in fq_codel DRR */ + struct list_head new_buckets; /* list of new buckets */ + struct list_head old_buckets; /* list of old buckets */ + + /* Configurable HHF parameters */ + u32 hhf_reset_timeout; /* interval to reset counter + * arrays in filter F + * (default 40ms) + */ + u32 hhf_admit_bytes; /* counter thresh to classify as + * HH (default 128KB). + * With these default values, + * 128KB / 40ms = 25 Mbps + * i.e., we expect to capture HHs + * sending > 25 Mbps. + */ + u32 hhf_evict_timeout; /* aging threshold to evict idle + * HHs out of table T. This should + * be large enough to avoid + * reordering during HH eviction. + * (default 1s) + */ + u32 hhf_non_hh_weight; /* WDRR weight for non-HHs + * (default 2, + * i.e., non-HH : HH = 2 : 1) + */ +}; + +static u32 hhf_time_stamp(void) +{ + return jiffies; +} + +static unsigned int skb_hash(const struct hhf_sched_data *q, + const struct sk_buff *skb) +{ + struct flow_keys keys; + unsigned int hash; + + if (skb->sk && skb->sk->sk_hash) + return skb->sk->sk_hash; + + skb_flow_dissect(skb, &keys); + hash = jhash_3words((__force u32)keys.dst, + (__force u32)keys.src ^ keys.ip_proto, + (__force u32)keys.ports, q->perturbation); + return hash; +} + +/* Looks up a heavy-hitter flow in a chaining list of table T. */ +static struct hh_flow_state *seek_list(const u32 hash, + struct list_head *head, + struct hhf_sched_data *q) +{ + struct hh_flow_state *flow, *next; + u32 now = hhf_time_stamp(); + + if (list_empty(head)) + return NULL; + + list_for_each_entry_safe(flow, next, head, flowchain) { + u32 prev = flow->hit_timestamp + q->hhf_evict_timeout; + + if (hhf_time_before(prev, now)) { + /* Delete expired heavy-hitters, but preserve one entry + * to avoid kzalloc() when next time this slot is hit. + */ + if (list_is_last(&flow->flowchain, head)) + return NULL; + list_del(&flow->flowchain); + kfree(flow); + q->hh_flows_current_cnt--; + } else if (flow->hash_id == hash) { + return flow; + } + } + return NULL; +} + +/* Returns a flow state entry for a new heavy-hitter. Either reuses an expired + * entry or dynamically alloc a new entry. + */ +static struct hh_flow_state *alloc_new_hh(struct list_head *head, + struct hhf_sched_data *q) +{ + struct hh_flow_state *flow; + u32 now = hhf_time_stamp(); + + if (!list_empty(head)) { + /* Find an expired heavy-hitter flow entry. */ + list_for_each_entry(flow, head, flowchain) { + u32 prev = flow->hit_timestamp + q->hhf_evict_timeout; + + if (hhf_time_before(prev, now)) + return flow; + } + } + + if (q->hh_flows_current_cnt >= q->hh_flows_limit) { + q->hh_flows_overlimit++; + return NULL; + } + /* Create new entry. */ + flow = kzalloc(sizeof(struct hh_flow_state), GFP_ATOMIC); + if (!flow) + return NULL; + + q->hh_flows_current_cnt++; + INIT_LIST_HEAD(&flow->flowchain); + list_add_tail(&flow->flowchain, head); + + return flow; +} + +/* Assigns packets to WDRR buckets. Implements a multi-stage filter to + * classify heavy-hitters. + */ +static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch) +{ + struct hhf_sched_data *q = qdisc_priv(sch); + u32 tmp_hash, hash; + u32 xorsum, filter_pos[HHF_ARRAYS_CNT], flow_pos; + struct hh_flow_state *flow; + u32 pkt_len, min_hhf_val; + int i; + u32 prev; + u32 now = hhf_time_stamp(); + + /* Reset the HHF counter arrays if this is the right time. */ + prev = q->hhf_arrays_reset_timestamp + q->hhf_reset_timeout; + if (hhf_time_before(prev, now)) { + for (i = 0; i < HHF_ARRAYS_CNT; i++) + bitmap_zero(q->hhf_valid_bits[i], HHF_ARRAYS_LEN); + q->hhf_arrays_reset_timestamp = now; + } + + /* Get hashed flow-id of the skb. */ + hash = skb_hash(q, skb); + + /* Check if this packet belongs to an already established HH flow. */ + flow_pos = hash & HHF_BIT_MASK; + flow = seek_list(hash, &q->hh_flows[flow_pos], q); + if (flow) { /* found its HH flow */ + flow->hit_timestamp = now; + return WDRR_BUCKET_FOR_HH; + } + + /* Now pass the packet through the multi-stage filter. */ + tmp_hash = hash; + xorsum = 0; + for (i = 0; i < HHF_ARRAYS_CNT - 1; i++) { + /* Split the skb_hash into three 10-bit chunks. */ + filter_pos[i] = tmp_hash & HHF_BIT_MASK; + xorsum ^= filter_pos[i]; + tmp_hash >>= HHF_BIT_MASK_LEN; + } + /* The last chunk is computed as XOR sum of other chunks. */ + filter_pos[HHF_ARRAYS_CNT - 1] = xorsum ^ tmp_hash; + + pkt_len = qdisc_pkt_len(skb); + min_hhf_val = ~0U; + for (i = 0; i < HHF_ARRAYS_CNT; i++) { + u32 val; + + if (!test_bit(filter_pos[i], q->hhf_valid_bits[i])) { + q->hhf_arrays[i][filter_pos[i]] = 0; + __set_bit(filter_pos[i], q->hhf_valid_bits[i]); + } + + val = q->hhf_arrays[i][filter_pos[i]] + pkt_len; + if (min_hhf_val > val) + min_hhf_val = val; + } + + /* Found a new HH iff all counter values > HH admit threshold. */ + if (min_hhf_val > q->hhf_admit_bytes) { + /* Just captured a new heavy-hitter. */ + flow = alloc_new_hh(&q->hh_flows[flow_pos], q); + if (!flow) /* memory alloc problem */ + return WDRR_BUCKET_FOR_NON_HH; + flow->hash_id = hash; + flow->hit_timestamp = now; + q->hh_flows_total_cnt++; + + /* By returning without updating counters in q->hhf_arrays, + * we implicitly implement "shielding" (see Optimization O1). + */ + return WDRR_BUCKET_FOR_HH; + } + + /* Conservative update of HHF arrays (see Optimization O2). */ + for (i = 0; i < HHF_ARRAYS_CNT; i++) { + if (q->hhf_arrays[i][filter_pos[i]] < min_hhf_val) + q->hhf_arrays[i][filter_pos[i]] = min_hhf_val; + } + return WDRR_BUCKET_FOR_NON_HH; +} + +/* Removes one skb from head of bucket. */ +static struct sk_buff *dequeue_head(struct wdrr_bucket *bucket) +{ + struct sk_buff *skb = bucket->head; + + bucket->head = skb->next; + skb->next = NULL; + return skb; +} + +/* Tail-adds skb to bucket. */ +static void bucket_add(struct wdrr_bucket *bucket, struct sk_buff *skb) +{ + if (bucket->head == NULL) + bucket->head = skb; + else + bucket->tail->next = skb; + bucket->tail = skb; + skb->next = NULL; +} + +static unsigned int hhf_drop(struct Qdisc *sch) +{ + struct hhf_sched_data *q = qdisc_priv(sch); + struct wdrr_bucket *bucket; + + /* Always try to drop from heavy-hitters first. */ + bucket = &q->buckets[WDRR_BUCKET_FOR_HH]; + if (!bucket->head) + bucket = &q->buckets[WDRR_BUCKET_FOR_NON_HH]; + + if (bucket->head) { + struct sk_buff *skb = dequeue_head(bucket); + + sch->q.qlen--; + sch->qstats.drops++; + sch->qstats.backlog -= qdisc_pkt_len(skb); + kfree_skb(skb); + } + + /* Return id of the bucket from which the packet was dropped. */ + return bucket - q->buckets; +} + +static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct hhf_sched_data *q = qdisc_priv(sch); + enum wdrr_bucket_idx idx; + struct wdrr_bucket *bucket; + + idx = hhf_classify(skb, sch); + + bucket = &q->buckets[idx]; + bucket_add(bucket, skb); + sch->qstats.backlog += qdisc_pkt_len(skb); + + if (list_empty(&bucket->bucketchain)) { + unsigned int weight; + + /* The logic of new_buckets vs. old_buckets is the same as + * new_flows vs. old_flows in the implementation of fq_codel, + * i.e., short bursts of non-HHs should have strict priority. + */ + if (idx == WDRR_BUCKET_FOR_HH) { + /* Always move heavy-hitters to old bucket. */ + weight = 1; + list_add_tail(&bucket->bucketchain, &q->old_buckets); + } else { + weight = q->hhf_non_hh_weight; + list_add_tail(&bucket->bucketchain, &q->new_buckets); + } + bucket->deficit = weight * q->quantum; + } + if (++sch->q.qlen < sch->limit) + return NET_XMIT_SUCCESS; + + q->drop_overlimit++; + /* Return Congestion Notification only if we dropped a packet from this + * bucket. + */ + if (hhf_drop(sch) == idx) + return NET_XMIT_CN; + + /* As we dropped a packet, better let upper stack know this. */ + qdisc_tree_decrease_qlen(sch, 1); + return NET_XMIT_SUCCESS; +} + +static struct sk_buff *hhf_dequeue(struct Qdisc *sch) +{ + struct hhf_sched_data *q = qdisc_priv(sch); + struct sk_buff *skb = NULL; + struct wdrr_bucket *bucket; + struct list_head *head; + +begin: + head = &q->new_buckets; + if (list_empty(head)) { + head = &q->old_buckets; + if (list_empty(head)) + return NULL; + } + bucket = list_first_entry(head, struct wdrr_bucket, bucketchain); + + if (bucket->deficit <= 0) { + int weight = (bucket - q->buckets == WDRR_BUCKET_FOR_HH) ? + 1 : q->hhf_non_hh_weight; + + bucket->deficit += weight * q->quantum; + list_move_tail(&bucket->bucketchain, &q->old_buckets); + goto begin; + } + + if (bucket->head) { + skb = dequeue_head(bucket); + sch->q.qlen--; + sch->qstats.backlog -= qdisc_pkt_len(skb); + } + + if (!skb) { + /* Force a pass through old_buckets to prevent starvation. */ + if ((head == &q->new_buckets) && !list_empty(&q->old_buckets)) + list_move_tail(&bucket->bucketchain, &q->old_buckets); + else + list_del_init(&bucket->bucketchain); + goto begin; + } + qdisc_bstats_update(sch, skb); + bucket->deficit -= qdisc_pkt_len(skb); + + return skb; +} + +static void hhf_reset(struct Qdisc *sch) +{ + struct sk_buff *skb; + + while ((skb = hhf_dequeue(sch)) != NULL) + kfree_skb(skb); +} + +static void *hhf_zalloc(size_t sz) +{ + void *ptr = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN); + + if (!ptr) + ptr = vzalloc(sz); + + return ptr; +} + +static void hhf_free(void *addr) +{ + if (addr) { + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); + } +} + +static void hhf_destroy(struct Qdisc *sch) +{ + int i; + struct hhf_sched_data *q = qdisc_priv(sch); + + for (i = 0; i < HHF_ARRAYS_CNT; i++) { + hhf_free(q->hhf_arrays[i]); + hhf_free(q->hhf_valid_bits[i]); + } + + for (i = 0; i < HH_FLOWS_CNT; i++) { + struct hh_flow_state *flow, *next; + struct list_head *head = &q->hh_flows[i]; + + if (list_empty(head)) + continue; + list_for_each_entry_safe(flow, next, head, flowchain) { + list_del(&flow->flowchain); + kfree(flow); + } + } + hhf_free(q->hh_flows); +} + +static const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = { + [TCA_HHF_BACKLOG_LIMIT] = { .type = NLA_U32 }, + [TCA_HHF_QUANTUM] = { .type = NLA_U32 }, + [TCA_HHF_HH_FLOWS_LIMIT] = { .type = NLA_U32 }, + [TCA_HHF_RESET_TIMEOUT] = { .type = NLA_U32 }, + [TCA_HHF_ADMIT_BYTES] = { .type = NLA_U32 }, + [TCA_HHF_EVICT_TIMEOUT] = { .type = NLA_U32 }, + [TCA_HHF_NON_HH_WEIGHT] = { .type = NLA_U32 }, +}; + +static int hhf_change(struct Qdisc *sch, struct nlattr *opt) +{ + struct hhf_sched_data *q = qdisc_priv(sch); + struct nlattr *tb[TCA_HHF_MAX + 1]; + unsigned int qlen; + int err; + u64 non_hh_quantum; + u32 new_quantum = q->quantum; + u32 new_hhf_non_hh_weight = q->hhf_non_hh_weight; + + if (!opt) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_HHF_MAX, opt, hhf_policy); + if (err < 0) + return err; + + sch_tree_lock(sch); + + if (tb[TCA_HHF_BACKLOG_LIMIT]) + sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]); + + if (tb[TCA_HHF_QUANTUM]) + new_quantum = nla_get_u32(tb[TCA_HHF_QUANTUM]); + + if (tb[TCA_HHF_NON_HH_WEIGHT]) + new_hhf_non_hh_weight = nla_get_u32(tb[TCA_HHF_NON_HH_WEIGHT]); + + non_hh_quantum = (u64)new_quantum * new_hhf_non_hh_weight; + if (non_hh_quantum > INT_MAX) + return -EINVAL; + q->quantum = new_quantum; + q->hhf_non_hh_weight = new_hhf_non_hh_weight; + + if (tb[TCA_HHF_HH_FLOWS_LIMIT]) + q->hh_flows_limit = nla_get_u32(tb[TCA_HHF_HH_FLOWS_LIMIT]); + + if (tb[TCA_HHF_RESET_TIMEOUT]) { + u32 us = nla_get_u32(tb[TCA_HHF_RESET_TIMEOUT]); + + q->hhf_reset_timeout = usecs_to_jiffies(us); + } + + if (tb[TCA_HHF_ADMIT_BYTES]) + q->hhf_admit_bytes = nla_get_u32(tb[TCA_HHF_ADMIT_BYTES]); + + if (tb[TCA_HHF_EVICT_TIMEOUT]) { + u32 us = nla_get_u32(tb[TCA_HHF_EVICT_TIMEOUT]); + + q->hhf_evict_timeout = usecs_to_jiffies(us); + } + + qlen = sch->q.qlen; + while (sch->q.qlen > sch->limit) { + struct sk_buff *skb = hhf_dequeue(sch); + + kfree_skb(skb); + } + qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); + + sch_tree_unlock(sch); + return 0; +} + +static int hhf_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct hhf_sched_data *q = qdisc_priv(sch); + int i; + + sch->limit = 1000; + q->quantum = psched_mtu(qdisc_dev(sch)); + q->perturbation = prandom_u32(); + INIT_LIST_HEAD(&q->new_buckets); + INIT_LIST_HEAD(&q->old_buckets); + + /* Configurable HHF parameters */ + q->hhf_reset_timeout = HZ / 25; /* 40 ms */ + q->hhf_admit_bytes = 131072; /* 128 KB */ + q->hhf_evict_timeout = HZ; /* 1 sec */ + q->hhf_non_hh_weight = 2; + + if (opt) { + int err = hhf_change(sch, opt); + + if (err) + return err; + } + + if (!q->hh_flows) { + /* Initialize heavy-hitter flow table. */ + q->hh_flows = hhf_zalloc(HH_FLOWS_CNT * + sizeof(struct list_head)); + if (!q->hh_flows) + return -ENOMEM; + for (i = 0; i < HH_FLOWS_CNT; i++) + INIT_LIST_HEAD(&q->hh_flows[i]); + + /* Cap max active HHs at twice len of hh_flows table. */ + q->hh_flows_limit = 2 * HH_FLOWS_CNT; + q->hh_flows_overlimit = 0; + q->hh_flows_total_cnt = 0; + q->hh_flows_current_cnt = 0; + + /* Initialize heavy-hitter filter arrays. */ + for (i = 0; i < HHF_ARRAYS_CNT; i++) { + q->hhf_arrays[i] = hhf_zalloc(HHF_ARRAYS_LEN * + sizeof(u32)); + if (!q->hhf_arrays[i]) { + hhf_destroy(sch); + return -ENOMEM; + } + } + q->hhf_arrays_reset_timestamp = hhf_time_stamp(); + + /* Initialize valid bits of heavy-hitter filter arrays. */ + for (i = 0; i < HHF_ARRAYS_CNT; i++) { + q->hhf_valid_bits[i] = hhf_zalloc(HHF_ARRAYS_LEN / + BITS_PER_BYTE); + if (!q->hhf_valid_bits[i]) { + hhf_destroy(sch); + return -ENOMEM; + } + } + + /* Initialize Weighted DRR buckets. */ + for (i = 0; i < WDRR_BUCKET_CNT; i++) { + struct wdrr_bucket *bucket = q->buckets + i; + + INIT_LIST_HEAD(&bucket->bucketchain); + } + } + + return 0; +} + +static int hhf_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct hhf_sched_data *q = qdisc_priv(sch); + struct nlattr *opts; + + opts = nla_nest_start(skb, TCA_OPTIONS); + if (opts == NULL) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_HHF_BACKLOG_LIMIT, sch->limit) || + nla_put_u32(skb, TCA_HHF_QUANTUM, q->quantum) || + nla_put_u32(skb, TCA_HHF_HH_FLOWS_LIMIT, q->hh_flows_limit) || + nla_put_u32(skb, TCA_HHF_RESET_TIMEOUT, + jiffies_to_usecs(q->hhf_reset_timeout)) || + nla_put_u32(skb, TCA_HHF_ADMIT_BYTES, q->hhf_admit_bytes) || + nla_put_u32(skb, TCA_HHF_EVICT_TIMEOUT, + jiffies_to_usecs(q->hhf_evict_timeout)) || + nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT, q->hhf_non_hh_weight)) + goto nla_put_failure; + + nla_nest_end(skb, opts); + return skb->len; + +nla_put_failure: + return -1; +} + +static int hhf_dump_stats(struct Qdisc *sch, struct gnet_dump *d) +{ + struct hhf_sched_data *q = qdisc_priv(sch); + struct tc_hhf_xstats st = { + .drop_overlimit = q->drop_overlimit, + .hh_overlimit = q->hh_flows_overlimit, + .hh_tot_count = q->hh_flows_total_cnt, + .hh_cur_count = q->hh_flows_current_cnt, + }; + + return gnet_stats_copy_app(d, &st, sizeof(st)); +} + +static struct Qdisc_ops hhf_qdisc_ops __read_mostly = { + .id = "hhf", + .priv_size = sizeof(struct hhf_sched_data), + + .enqueue = hhf_enqueue, + .dequeue = hhf_dequeue, + .peek = qdisc_peek_dequeued, + .drop = hhf_drop, + .init = hhf_init, + .reset = hhf_reset, + .destroy = hhf_destroy, + .change = hhf_change, + .dump = hhf_dump, + .dump_stats = hhf_dump_stats, + .owner = THIS_MODULE, +}; + +static int __init hhf_module_init(void) +{ + return register_qdisc(&hhf_qdisc_ops); +} + +static void __exit hhf_module_exit(void) +{ + unregister_qdisc(&hhf_qdisc_ops); +} + +module_init(hhf_module_init) +module_exit(hhf_module_exit) +MODULE_AUTHOR("Terry Lam"); +MODULE_AUTHOR("Nandita Dukkipati"); +MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 717b2108f85..722e137df24 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -219,11 +219,16 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, if (skb->priority == sch->handle) return HTB_DIRECT; /* X:0 (direct flow) selected */ cl = htb_find(skb->priority, sch); - if (cl && cl->level == 0) - return cl; + if (cl) { + if (cl->level == 0) + return cl; + /* Start with inner filter chain if a non-leaf class is selected */ + tcf = cl->filter_list; + } else { + tcf = q->filter_list; + } *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - tcf = q->filter_list; while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { @@ -712,7 +717,7 @@ static s64 htb_do_events(struct htb_sched *q, const int level, /* too much load - let's continue after a break for scheduling */ if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) { - pr_warning("htb: too many events!\n"); + pr_warn("htb: too many events!\n"); q->warned |= HTB_WARN_TOOMANYEVENTS; } @@ -1276,9 +1281,10 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) struct Qdisc *new_q = NULL; int last_child = 0; - // TODO: why don't allow to delete subtree ? references ? does - // tc subsys quarantee us that in htb_destroy it holds no class - // refs so that we can remove children safely there ? + /* TODO: why don't allow to delete subtree ? references ? does + * tc subsys guarantee us that in htb_destroy it holds no class + * refs so that we can remove children safely there ? + */ if (cl->children || cl->filter_cnt) return -EBUSY; @@ -1337,7 +1343,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)*arg, *parent; struct nlattr *opt = tca[TCA_OPTIONS]; - struct qdisc_rate_table *rtab = NULL, *ctab = NULL; struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_opt *hopt; u64 rate64, ceil64; @@ -1361,16 +1366,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, goto failure; /* Keeping backward compatible with rate_table based iproute2 tc */ - if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) { - rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]); - if (rtab) - qdisc_put_rtab(rtab); - } - if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) { - ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]); - if (ctab) - qdisc_put_rtab(ctab); - } + if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) + qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB])); + + if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) + qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB])); if (!cl) { /* new class */ struct Qdisc *new_q; @@ -1494,15 +1494,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->quantum = min_t(u64, quantum, INT_MAX); if (!hopt->quantum && cl->quantum < 1000) { - pr_warning( - "HTB: quantum of class %X is small. Consider r2q change.\n", - cl->common.classid); + pr_warn("HTB: quantum of class %X is small. Consider r2q change.\n", + cl->common.classid); cl->quantum = 1000; } if (!hopt->quantum && cl->quantum > 200000) { - pr_warning( - "HTB: quantum of class %X is big. Consider r2q change.\n", - cl->common.classid); + pr_warn("HTB: quantum of class %X is big. Consider r2q change.\n", + cl->common.classid); cl->quantum = 200000; } if (hopt->quantum) diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 2e56185736d..a8b2864a696 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -78,14 +78,19 @@ static void mq_attach(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct mq_sched *priv = qdisc_priv(sch); - struct Qdisc *qdisc; + struct Qdisc *qdisc, *old; unsigned int ntx; for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { qdisc = priv->qdiscs[ntx]; - qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc); - if (qdisc) - qdisc_destroy(qdisc); + old = dev_graft_qdisc(qdisc->dev_queue, qdisc); + if (old) + qdisc_destroy(old); +#ifdef CONFIG_NET_SCHED + if (ntx < dev->real_num_tx_queues) + qdisc_list_add(qdisc); +#endif + } kfree(priv->qdiscs); priv->qdiscs = NULL; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index d44c868cb53..6749e2f540d 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -167,15 +167,17 @@ static void mqprio_attach(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct mqprio_sched *priv = qdisc_priv(sch); - struct Qdisc *qdisc; + struct Qdisc *qdisc, *old; unsigned int ntx; /* Attach underlying qdisc */ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { qdisc = priv->qdiscs[ntx]; - qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc); - if (qdisc) - qdisc_destroy(qdisc); + old = dev_graft_qdisc(qdisc->dev_queue, qdisc); + if (old) + qdisc_destroy(old); + if (ntx < dev->real_num_tx_queues) + qdisc_list_add(qdisc); } kfree(priv->qdiscs); priv->qdiscs = NULL; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 2a2b096d9a6..afb050a735f 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -11,8 +11,7 @@ * more details. * * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. + * this program; if not, see <http://www.gnu.org/licenses/>. * * Author: Alexander Duyck <alexander.h.duyck@intel.com> */ diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index bccd52b36e9..de1059af6da 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -88,10 +88,10 @@ struct netem_sched_data { u32 duplicate; u32 reorder; u32 corrupt; - u32 rate; + u64 rate; s32 packet_overhead; u32 cell_size; - u32 cell_size_reciprocal; + struct reciprocal_value cell_size_reciprocal; s32 cell_overhead; struct crndstate { @@ -110,6 +110,13 @@ struct netem_sched_data { CLG_GILB_ELL, } loss_model; + enum { + TX_IN_GAP_PERIOD = 1, + TX_IN_BURST_PERIOD, + LOST_IN_GAP_PERIOD, + LOST_IN_BURST_PERIOD, + } _4_state_model; + /* Correlated Loss Generation models */ struct clgstate { /* state of the Markov chain */ @@ -169,7 +176,7 @@ static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) static void init_crandom(struct crndstate *state, unsigned long rho) { state->rho = rho; - state->last = net_random(); + state->last = prandom_u32(); } /* get_crandom - correlated random number generator @@ -182,9 +189,9 @@ static u32 get_crandom(struct crndstate *state) unsigned long answer; if (state->rho == 0) /* no correlation */ - return net_random(); + return prandom_u32(); - value = net_random(); + value = prandom_u32(); rho = (u64)state->rho + 1; answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32; state->last = answer; @@ -198,50 +205,52 @@ static u32 get_crandom(struct crndstate *state) static bool loss_4state(struct netem_sched_data *q) { struct clgstate *clg = &q->clg; - u32 rnd = net_random(); + u32 rnd = prandom_u32(); /* * Makes a comparison between rnd and the transition * probabilities outgoing from the current state, then decides the * next state and if the next packet has to be transmitted or lost. * The four states correspond to: - * 1 => successfully transmitted packets within a gap period - * 4 => isolated losses within a gap period - * 3 => lost packets within a burst period - * 2 => successfully transmitted packets within a burst period + * TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period + * LOST_IN_BURST_PERIOD => isolated losses within a gap period + * LOST_IN_GAP_PERIOD => lost packets within a burst period + * TX_IN_GAP_PERIOD => successfully transmitted packets within a burst period */ switch (clg->state) { - case 1: + case TX_IN_GAP_PERIOD: if (rnd < clg->a4) { - clg->state = 4; + clg->state = LOST_IN_BURST_PERIOD; return true; } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) { - clg->state = 3; + clg->state = LOST_IN_GAP_PERIOD; return true; - } else if (clg->a1 + clg->a4 < rnd) - clg->state = 1; + } else if (clg->a1 + clg->a4 < rnd) { + clg->state = TX_IN_GAP_PERIOD; + } break; - case 2: + case TX_IN_BURST_PERIOD: if (rnd < clg->a5) { - clg->state = 3; + clg->state = LOST_IN_GAP_PERIOD; return true; - } else - clg->state = 2; + } else { + clg->state = TX_IN_BURST_PERIOD; + } break; - case 3: + case LOST_IN_GAP_PERIOD: if (rnd < clg->a3) - clg->state = 2; + clg->state = TX_IN_BURST_PERIOD; else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) { - clg->state = 1; + clg->state = TX_IN_GAP_PERIOD; } else if (clg->a2 + clg->a3 < rnd) { - clg->state = 3; + clg->state = LOST_IN_GAP_PERIOD; return true; } break; - case 4: - clg->state = 1; + case LOST_IN_BURST_PERIOD: + clg->state = TX_IN_GAP_PERIOD; break; } @@ -264,15 +273,15 @@ static bool loss_gilb_ell(struct netem_sched_data *q) switch (clg->state) { case 1: - if (net_random() < clg->a1) + if (prandom_u32() < clg->a1) clg->state = 2; - if (net_random() < clg->a4) + if (prandom_u32() < clg->a4) return true; break; case 2: - if (net_random() < clg->a2) + if (prandom_u32() < clg->a2) clg->state = 1; - if (net_random() > clg->a3) + if (prandom_u32() > clg->a3) return true; } @@ -457,7 +466,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) skb_checksum_help(skb))) return qdisc_drop(skb, sch); - skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); + skb->data[prandom_u32() % skb_headlen(skb)] ^= + 1<<(prandom_u32() % 8); } if (unlikely(skb_queue_len(&sch->q) >= sch->limit)) @@ -495,7 +505,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) now = netem_skb_cb(last)->time_to_send; } - delay += packet_len_2_sched_time(skb->len, q); + delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q); } cb->time_to_send = now + delay; @@ -715,9 +725,11 @@ static void get_rate(struct Qdisc *sch, const struct nlattr *attr) q->rate = r->rate; q->packet_overhead = r->packet_overhead; q->cell_size = r->cell_size; + q->cell_overhead = r->cell_overhead; if (q->cell_size) q->cell_size_reciprocal = reciprocal_value(q->cell_size); - q->cell_overhead = r->cell_overhead; + else + q->cell_size_reciprocal = (struct reciprocal_value) { 0 }; } static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr) @@ -729,7 +741,7 @@ static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr) nla_for_each_nested(la, attr, rem) { u16 type = nla_type(la); - switch(type) { + switch (type) { case NETEM_LOSS_GI: { const struct tc_netem_gimodel *gi = nla_data(la); @@ -782,6 +794,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) }, [TCA_NETEM_LOSS] = { .type = NLA_NESTED }, [TCA_NETEM_ECN] = { .type = NLA_U32 }, + [TCA_NETEM_RATE64] = { .type = NLA_U64 }, }; static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, @@ -852,6 +865,10 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_NETEM_RATE]) get_rate(sch, tb[TCA_NETEM_RATE]); + if (tb[TCA_NETEM_RATE64]) + q->rate = max_t(u64, q->rate, + nla_get_u64(tb[TCA_NETEM_RATE64])); + if (tb[TCA_NETEM_ECN]) q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]); @@ -974,7 +991,13 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt)) goto nla_put_failure; - rate.rate = q->rate; + if (q->rate >= (1ULL << 32)) { + if (nla_put_u64(skb, TCA_NETEM_RATE64, q->rate)) + goto nla_put_failure; + rate.rate = ~0U; + } else { + rate.rate = q->rate; + } rate.packet_overhead = q->packet_overhead; rate.cell_size = q->cell_size; rate.cell_overhead = q->cell_overhead; diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c new file mode 100644 index 00000000000..a255d0200a5 --- /dev/null +++ b/net/sched/sch_pie.c @@ -0,0 +1,555 @@ +/* Copyright (C) 2013 Cisco Systems, Inc, 2013. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Author: Vijay Subramanian <vijaynsu@cisco.com> + * Author: Mythili Prabhu <mysuryan@cisco.com> + * + * ECN support is added by Naeem Khademi <naeemk@ifi.uio.no> + * University of Oslo, Norway. + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/skbuff.h> +#include <net/pkt_sched.h> +#include <net/inet_ecn.h> + +#define QUEUE_THRESHOLD 10000 +#define DQCOUNT_INVALID -1 +#define MAX_PROB 0xffffffff +#define PIE_SCALE 8 + +/* parameters used */ +struct pie_params { + psched_time_t target; /* user specified target delay in pschedtime */ + u32 tupdate; /* timer frequency (in jiffies) */ + u32 limit; /* number of packets that can be enqueued */ + u32 alpha; /* alpha and beta are between -4 and 4 */ + u32 beta; /* and are used for shift relative to 1 */ + bool ecn; /* true if ecn is enabled */ + bool bytemode; /* to scale drop early prob based on pkt size */ +}; + +/* variables used */ +struct pie_vars { + u32 prob; /* probability but scaled by u32 limit. */ + psched_time_t burst_time; + psched_time_t qdelay; + psched_time_t qdelay_old; + u64 dq_count; /* measured in bytes */ + psched_time_t dq_tstamp; /* drain rate */ + u32 avg_dq_rate; /* bytes per pschedtime tick,scaled */ + u32 qlen_old; /* in bytes */ +}; + +/* statistics gathering */ +struct pie_stats { + u32 packets_in; /* total number of packets enqueued */ + u32 dropped; /* packets dropped due to pie_action */ + u32 overlimit; /* dropped due to lack of space in queue */ + u32 maxq; /* maximum queue size */ + u32 ecn_mark; /* packets marked with ECN */ +}; + +/* private data for the Qdisc */ +struct pie_sched_data { + struct pie_params params; + struct pie_vars vars; + struct pie_stats stats; + struct timer_list adapt_timer; +}; + +static void pie_params_init(struct pie_params *params) +{ + params->alpha = 2; + params->beta = 20; + params->tupdate = usecs_to_jiffies(30 * USEC_PER_MSEC); /* 30 ms */ + params->limit = 1000; /* default of 1000 packets */ + params->target = PSCHED_NS2TICKS(20 * NSEC_PER_MSEC); /* 20 ms */ + params->ecn = false; + params->bytemode = false; +} + +static void pie_vars_init(struct pie_vars *vars) +{ + vars->dq_count = DQCOUNT_INVALID; + vars->avg_dq_rate = 0; + /* default of 100 ms in pschedtime */ + vars->burst_time = PSCHED_NS2TICKS(100 * NSEC_PER_MSEC); +} + +static bool drop_early(struct Qdisc *sch, u32 packet_size) +{ + struct pie_sched_data *q = qdisc_priv(sch); + u32 rnd; + u32 local_prob = q->vars.prob; + u32 mtu = psched_mtu(qdisc_dev(sch)); + + /* If there is still burst allowance left skip random early drop */ + if (q->vars.burst_time > 0) + return false; + + /* If current delay is less than half of target, and + * if drop prob is low already, disable early_drop + */ + if ((q->vars.qdelay < q->params.target / 2) + && (q->vars.prob < MAX_PROB / 5)) + return false; + + /* If we have fewer than 2 mtu-sized packets, disable drop_early, + * similar to min_th in RED + */ + if (sch->qstats.backlog < 2 * mtu) + return false; + + /* If bytemode is turned on, use packet size to compute new + * probablity. Smaller packets will have lower drop prob in this case + */ + if (q->params.bytemode && packet_size <= mtu) + local_prob = (local_prob / mtu) * packet_size; + else + local_prob = q->vars.prob; + + rnd = prandom_u32(); + if (rnd < local_prob) + return true; + + return false; +} + +static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct pie_sched_data *q = qdisc_priv(sch); + bool enqueue = false; + + if (unlikely(qdisc_qlen(sch) >= sch->limit)) { + q->stats.overlimit++; + goto out; + } + + if (!drop_early(sch, skb->len)) { + enqueue = true; + } else if (q->params.ecn && (q->vars.prob <= MAX_PROB / 10) && + INET_ECN_set_ce(skb)) { + /* If packet is ecn capable, mark it if drop probability + * is lower than 10%, else drop it. + */ + q->stats.ecn_mark++; + enqueue = true; + } + + /* we can enqueue the packet */ + if (enqueue) { + q->stats.packets_in++; + if (qdisc_qlen(sch) > q->stats.maxq) + q->stats.maxq = qdisc_qlen(sch); + + return qdisc_enqueue_tail(skb, sch); + } + +out: + q->stats.dropped++; + return qdisc_drop(skb, sch); +} + +static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = { + [TCA_PIE_TARGET] = {.type = NLA_U32}, + [TCA_PIE_LIMIT] = {.type = NLA_U32}, + [TCA_PIE_TUPDATE] = {.type = NLA_U32}, + [TCA_PIE_ALPHA] = {.type = NLA_U32}, + [TCA_PIE_BETA] = {.type = NLA_U32}, + [TCA_PIE_ECN] = {.type = NLA_U32}, + [TCA_PIE_BYTEMODE] = {.type = NLA_U32}, +}; + +static int pie_change(struct Qdisc *sch, struct nlattr *opt) +{ + struct pie_sched_data *q = qdisc_priv(sch); + struct nlattr *tb[TCA_PIE_MAX + 1]; + unsigned int qlen; + int err; + + if (!opt) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_PIE_MAX, opt, pie_policy); + if (err < 0) + return err; + + sch_tree_lock(sch); + + /* convert from microseconds to pschedtime */ + if (tb[TCA_PIE_TARGET]) { + /* target is in us */ + u32 target = nla_get_u32(tb[TCA_PIE_TARGET]); + + /* convert to pschedtime */ + q->params.target = PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC); + } + + /* tupdate is in jiffies */ + if (tb[TCA_PIE_TUPDATE]) + q->params.tupdate = usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE])); + + if (tb[TCA_PIE_LIMIT]) { + u32 limit = nla_get_u32(tb[TCA_PIE_LIMIT]); + + q->params.limit = limit; + sch->limit = limit; + } + + if (tb[TCA_PIE_ALPHA]) + q->params.alpha = nla_get_u32(tb[TCA_PIE_ALPHA]); + + if (tb[TCA_PIE_BETA]) + q->params.beta = nla_get_u32(tb[TCA_PIE_BETA]); + + if (tb[TCA_PIE_ECN]) + q->params.ecn = nla_get_u32(tb[TCA_PIE_ECN]); + + if (tb[TCA_PIE_BYTEMODE]) + q->params.bytemode = nla_get_u32(tb[TCA_PIE_BYTEMODE]); + + /* Drop excess packets if new limit is lower */ + qlen = sch->q.qlen; + while (sch->q.qlen > sch->limit) { + struct sk_buff *skb = __skb_dequeue(&sch->q); + + sch->qstats.backlog -= qdisc_pkt_len(skb); + qdisc_drop(skb, sch); + } + qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); + + sch_tree_unlock(sch); + return 0; +} + +static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb) +{ + + struct pie_sched_data *q = qdisc_priv(sch); + int qlen = sch->qstats.backlog; /* current queue size in bytes */ + + /* If current queue is about 10 packets or more and dq_count is unset + * we have enough packets to calculate the drain rate. Save + * current time as dq_tstamp and start measurement cycle. + */ + if (qlen >= QUEUE_THRESHOLD && q->vars.dq_count == DQCOUNT_INVALID) { + q->vars.dq_tstamp = psched_get_time(); + q->vars.dq_count = 0; + } + + /* Calculate the average drain rate from this value. If queue length + * has receded to a small value viz., <= QUEUE_THRESHOLD bytes,reset + * the dq_count to -1 as we don't have enough packets to calculate the + * drain rate anymore The following if block is entered only when we + * have a substantial queue built up (QUEUE_THRESHOLD bytes or more) + * and we calculate the drain rate for the threshold here. dq_count is + * in bytes, time difference in psched_time, hence rate is in + * bytes/psched_time. + */ + if (q->vars.dq_count != DQCOUNT_INVALID) { + q->vars.dq_count += skb->len; + + if (q->vars.dq_count >= QUEUE_THRESHOLD) { + psched_time_t now = psched_get_time(); + u32 dtime = now - q->vars.dq_tstamp; + u32 count = q->vars.dq_count << PIE_SCALE; + + if (dtime == 0) + return; + + count = count / dtime; + + if (q->vars.avg_dq_rate == 0) + q->vars.avg_dq_rate = count; + else + q->vars.avg_dq_rate = + (q->vars.avg_dq_rate - + (q->vars.avg_dq_rate >> 3)) + (count >> 3); + + /* If the queue has receded below the threshold, we hold + * on to the last drain rate calculated, else we reset + * dq_count to 0 to re-enter the if block when the next + * packet is dequeued + */ + if (qlen < QUEUE_THRESHOLD) + q->vars.dq_count = DQCOUNT_INVALID; + else { + q->vars.dq_count = 0; + q->vars.dq_tstamp = psched_get_time(); + } + + if (q->vars.burst_time > 0) { + if (q->vars.burst_time > dtime) + q->vars.burst_time -= dtime; + else + q->vars.burst_time = 0; + } + } + } +} + +static void calculate_probability(struct Qdisc *sch) +{ + struct pie_sched_data *q = qdisc_priv(sch); + u32 qlen = sch->qstats.backlog; /* queue size in bytes */ + psched_time_t qdelay = 0; /* in pschedtime */ + psched_time_t qdelay_old = q->vars.qdelay; /* in pschedtime */ + s32 delta = 0; /* determines the change in probability */ + u32 oldprob; + u32 alpha, beta; + bool update_prob = true; + + q->vars.qdelay_old = q->vars.qdelay; + + if (q->vars.avg_dq_rate > 0) + qdelay = (qlen << PIE_SCALE) / q->vars.avg_dq_rate; + else + qdelay = 0; + + /* If qdelay is zero and qlen is not, it means qlen is very small, less + * than dequeue_rate, so we do not update probabilty in this round + */ + if (qdelay == 0 && qlen != 0) + update_prob = false; + + /* Add ranges for alpha and beta, more aggressive for high dropping + * mode and gentle steps for light dropping mode + * In light dropping mode, take gentle steps; in medium dropping mode, + * take medium steps; in high dropping mode, take big steps. + */ + if (q->vars.prob < MAX_PROB / 100) { + alpha = + (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7; + beta = + (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7; + } else if (q->vars.prob < MAX_PROB / 10) { + alpha = + (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5; + beta = + (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5; + } else { + alpha = + (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4; + beta = + (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4; + } + + /* alpha and beta should be between 0 and 32, in multiples of 1/16 */ + delta += alpha * ((qdelay - q->params.target)); + delta += beta * ((qdelay - qdelay_old)); + + oldprob = q->vars.prob; + + /* to ensure we increase probability in steps of no more than 2% */ + if (delta > (s32) (MAX_PROB / (100 / 2)) && + q->vars.prob >= MAX_PROB / 10) + delta = (MAX_PROB / 100) * 2; + + /* Non-linear drop: + * Tune drop probability to increase quickly for high delays(>= 250ms) + * 250ms is derived through experiments and provides error protection + */ + + if (qdelay > (PSCHED_NS2TICKS(250 * NSEC_PER_MSEC))) + delta += MAX_PROB / (100 / 2); + + q->vars.prob += delta; + + if (delta > 0) { + /* prevent overflow */ + if (q->vars.prob < oldprob) { + q->vars.prob = MAX_PROB; + /* Prevent normalization error. If probability is at + * maximum value already, we normalize it here, and + * skip the check to do a non-linear drop in the next + * section. + */ + update_prob = false; + } + } else { + /* prevent underflow */ + if (q->vars.prob > oldprob) + q->vars.prob = 0; + } + + /* Non-linear drop in probability: Reduce drop probability quickly if + * delay is 0 for 2 consecutive Tupdate periods. + */ + + if ((qdelay == 0) && (qdelay_old == 0) && update_prob) + q->vars.prob = (q->vars.prob * 98) / 100; + + q->vars.qdelay = qdelay; + q->vars.qlen_old = qlen; + + /* We restart the measurement cycle if the following conditions are met + * 1. If the delay has been low for 2 consecutive Tupdate periods + * 2. Calculated drop probability is zero + * 3. We have atleast one estimate for the avg_dq_rate ie., + * is a non-zero value + */ + if ((q->vars.qdelay < q->params.target / 2) && + (q->vars.qdelay_old < q->params.target / 2) && + (q->vars.prob == 0) && + (q->vars.avg_dq_rate > 0)) + pie_vars_init(&q->vars); +} + +static void pie_timer(unsigned long arg) +{ + struct Qdisc *sch = (struct Qdisc *)arg; + struct pie_sched_data *q = qdisc_priv(sch); + spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + + spin_lock(root_lock); + calculate_probability(sch); + + /* reset the timer to fire after 'tupdate'. tupdate is in jiffies. */ + if (q->params.tupdate) + mod_timer(&q->adapt_timer, jiffies + q->params.tupdate); + spin_unlock(root_lock); + +} + +static int pie_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct pie_sched_data *q = qdisc_priv(sch); + + pie_params_init(&q->params); + pie_vars_init(&q->vars); + sch->limit = q->params.limit; + + setup_timer(&q->adapt_timer, pie_timer, (unsigned long)sch); + mod_timer(&q->adapt_timer, jiffies + HZ / 2); + + if (opt) { + int err = pie_change(sch, opt); + + if (err) + return err; + } + + return 0; +} + +static int pie_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct pie_sched_data *q = qdisc_priv(sch); + struct nlattr *opts; + + opts = nla_nest_start(skb, TCA_OPTIONS); + if (opts == NULL) + goto nla_put_failure; + + /* convert target from pschedtime to us */ + if (nla_put_u32(skb, TCA_PIE_TARGET, + ((u32) PSCHED_TICKS2NS(q->params.target)) / + NSEC_PER_USEC) || + nla_put_u32(skb, TCA_PIE_LIMIT, sch->limit) || + nla_put_u32(skb, TCA_PIE_TUPDATE, jiffies_to_usecs(q->params.tupdate)) || + nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) || + nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) || + nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) || + nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode)) + goto nla_put_failure; + + return nla_nest_end(skb, opts); + +nla_put_failure: + nla_nest_cancel(skb, opts); + return -1; + +} + +static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) +{ + struct pie_sched_data *q = qdisc_priv(sch); + struct tc_pie_xstats st = { + .prob = q->vars.prob, + .delay = ((u32) PSCHED_TICKS2NS(q->vars.qdelay)) / + NSEC_PER_USEC, + /* unscale and return dq_rate in bytes per sec */ + .avg_dq_rate = q->vars.avg_dq_rate * + (PSCHED_TICKS_PER_SEC) >> PIE_SCALE, + .packets_in = q->stats.packets_in, + .overlimit = q->stats.overlimit, + .maxq = q->stats.maxq, + .dropped = q->stats.dropped, + .ecn_mark = q->stats.ecn_mark, + }; + + return gnet_stats_copy_app(d, &st, sizeof(st)); +} + +static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch) +{ + struct sk_buff *skb; + skb = __qdisc_dequeue_head(sch, &sch->q); + + if (!skb) + return NULL; + + pie_process_dequeue(sch, skb); + return skb; +} + +static void pie_reset(struct Qdisc *sch) +{ + struct pie_sched_data *q = qdisc_priv(sch); + qdisc_reset_queue(sch); + pie_vars_init(&q->vars); +} + +static void pie_destroy(struct Qdisc *sch) +{ + struct pie_sched_data *q = qdisc_priv(sch); + q->params.tupdate = 0; + del_timer_sync(&q->adapt_timer); +} + +static struct Qdisc_ops pie_qdisc_ops __read_mostly = { + .id = "pie", + .priv_size = sizeof(struct pie_sched_data), + .enqueue = pie_qdisc_enqueue, + .dequeue = pie_qdisc_dequeue, + .peek = qdisc_peek_dequeued, + .init = pie_init, + .destroy = pie_destroy, + .reset = pie_reset, + .change = pie_change, + .dump = pie_dump, + .dump_stats = pie_dump_stats, + .owner = THIS_MODULE, +}; + +static int __init pie_module_init(void) +{ + return register_qdisc(&pie_qdisc_ops); +} + +static void __exit pie_module_exit(void) +{ + unregister_qdisc(&pie_qdisc_ops); +} + +module_init(pie_module_init); +module_exit(pie_module_exit); + +MODULE_DESCRIPTION("Proportional Integral controller Enhanced (PIE) scheduler"); +MODULE_AUTHOR("Vijay Subramanian"); +MODULE_AUTHOR("Mythili Prabhu"); +MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 30ea4674cab..9b0f7093d97 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -220,7 +220,7 @@ static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_da static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q) { - q->bins[slot].perturbation = net_random(); + q->bins[slot].perturbation = prandom_u32(); } static void sfb_swap_slot(struct sfb_sched_data *q) @@ -381,7 +381,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) goto enqueue; } - r = net_random() & SFB_MAX_PROB; + r = prandom_u32() & SFB_MAX_PROB; if (unlikely(r < p_min)) { if (unlikely(p_min > SFB_MAX_PROB / 2)) { diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index d3a1bc26dbf..87317ff0b4e 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -237,10 +237,12 @@ static inline void sfq_link(struct sfq_sched_data *q, sfq_index x) } #define sfq_unlink(q, x, n, p) \ - n = q->slots[x].dep.next; \ - p = q->slots[x].dep.prev; \ - sfq_dep_head(q, p)->next = n; \ - sfq_dep_head(q, n)->prev = p + do { \ + n = q->slots[x].dep.next; \ + p = q->slots[x].dep.prev; \ + sfq_dep_head(q, p)->next = n; \ + sfq_dep_head(q, n)->prev = p; \ + } while (0) static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x) @@ -627,7 +629,7 @@ static void sfq_perturbation(unsigned long arg) spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); - q->perturbation = net_random(); + q->perturbation = prandom_u32(); if (!q->filter_list && q->tail) sfq_rehash(sch); spin_unlock(root_lock); @@ -696,7 +698,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) del_timer(&q->perturb_timer); if (q->perturb_period) { mod_timer(&q->perturb_timer, jiffies + q->perturb_period); - q->perturbation = net_random(); + q->perturbation = prandom_u32(); } sch_tree_unlock(sch); kfree(p); @@ -757,7 +759,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) q->quantum = psched_mtu(qdisc_dev(sch)); q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); q->perturb_period = 0; - q->perturbation = net_random(); + q->perturbation = prandom_u32(); if (opt) { int err = sfq_change(sch, opt); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 887e672f9d7..fbba5b0ec12 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -307,6 +307,8 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = { [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, [TCA_TBF_RATE64] = { .type = NLA_U64 }, [TCA_TBF_PRATE64] = { .type = NLA_U64 }, + [TCA_TBF_BURST] = { .type = NLA_U32 }, + [TCA_TBF_PBURST] = { .type = NLA_U32 }, }; static int tbf_change(struct Qdisc *sch, struct nlattr *opt) @@ -358,7 +360,12 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) rate64 = nla_get_u64(tb[TCA_TBF_RATE64]); psched_ratecfg_precompute(&rate, &qopt->rate, rate64); - max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U); + if (tb[TCA_TBF_BURST]) { + max_size = nla_get_u32(tb[TCA_TBF_BURST]); + buffer = psched_l2t_ns(&rate, max_size); + } else { + max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U); + } if (qopt->peakrate.rate) { if (tb[TCA_TBF_PRATE64]) @@ -366,12 +373,18 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64); if (peak.rate_bytes_ps <= rate.rate_bytes_ps) { pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n", - peak.rate_bytes_ps, rate.rate_bytes_ps); + peak.rate_bytes_ps, rate.rate_bytes_ps); err = -EINVAL; goto done; } - max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu)); + if (tb[TCA_TBF_PBURST]) { + u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]); + max_size = min_t(u32, max_size, pburst); + mtu = psched_l2t_ns(&peak, pburst); + } else { + max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu)); + } } if (max_size < psched_mtu(qdisc_dev(sch))) @@ -391,9 +404,15 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) q->qdisc = child; } q->limit = qopt->limit; - q->mtu = PSCHED_TICKS2NS(qopt->mtu); + if (tb[TCA_TBF_PBURST]) + q->mtu = mtu; + else + q->mtu = PSCHED_TICKS2NS(qopt->mtu); q->max_size = max_size; - q->buffer = PSCHED_TICKS2NS(qopt->buffer); + if (tb[TCA_TBF_BURST]) + q->buffer = buffer; + else + q->buffer = PSCHED_TICKS2NS(qopt->buffer); q->tokens = q->buffer; q->ptokens = q->mtu; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 31ed008c8e1..5ae60920067 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -22,9 +22,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -90,14 +89,12 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Initialize the object handling fields. */ atomic_set(&asoc->base.refcnt, 1); - asoc->base.dead = false; /* Initialize the bind addr area. */ sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port); asoc->state = SCTP_STATE_CLOSED; asoc->cookie_life = ms_to_ktime(sp->assocparams.sasoc_cookie_life); - asoc->frag_point = 0; asoc->user_frag = sp->user_frag; /* Set the association max_retrans and RTO values from the @@ -110,8 +107,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->rto_max = msecs_to_jiffies(sp->rtoinfo.srto_max); asoc->rto_min = msecs_to_jiffies(sp->rtoinfo.srto_min); - asoc->overall_error_count = 0; - /* Initialize the association's heartbeat interval based on the * sock configured value. */ @@ -132,18 +127,15 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a */ asoc->param_flags = sp->param_flags; - /* Initialize the maximum mumber of new data packets that can be sent + /* Initialize the maximum number of new data packets that can be sent * in a burst. */ asoc->max_burst = sp->max_burst; /* initialize association timers */ - asoc->timeouts[SCTP_EVENT_TIMEOUT_NONE] = 0; asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] = asoc->rto_initial; asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] = asoc->rto_initial; asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = asoc->rto_initial; - asoc->timeouts[SCTP_EVENT_TIMEOUT_T3_RTX] = 0; - asoc->timeouts[SCTP_EVENT_TIMEOUT_T4_RTO] = 0; /* sctpimpguide Section 2.12.2 * If the 'T5-shutdown-guard' timer is used, it SHOULD be set to the @@ -152,7 +144,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->timeouts[SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD] = 5 * asoc->rto_max; - asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = sp->autoclose * HZ; @@ -172,11 +163,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->max_init_timeo = msecs_to_jiffies(sp->initmsg.sinit_max_init_timeo); - /* Allocate storage for the ssnmap after the inbound and outbound - * streams have been negotiated during Init. - */ - asoc->ssnmap = NULL; - /* Set the local window size for receive. * This is also the rcvbuf space per association. * RFC 6 - A SCTP receiver MUST be able to receive a minimum of @@ -189,25 +175,15 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->a_rwnd = asoc->rwnd; - asoc->rwnd_over = 0; - asoc->rwnd_press = 0; - /* Use my own max window until I learn something better. */ asoc->peer.rwnd = SCTP_DEFAULT_MAXWINDOW; - /* Set the sndbuf size for transmit. */ - asoc->sndbuf_used = 0; - /* Initialize the receive memory counter */ atomic_set(&asoc->rmem_alloc, 0); init_waitqueue_head(&asoc->wait); asoc->c.my_vtag = sctp_generate_tag(ep); - asoc->peer.i.init_tag = 0; /* INIT needs a vtag of 0. */ - asoc->c.peer_vtag = 0; - asoc->c.my_ttag = 0; - asoc->c.peer_ttag = 0; asoc->c.my_port = ep->base.bind_addr.port; asoc->c.initial_tsn = sctp_generate_tsn(ep); @@ -218,7 +194,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->adv_peer_ack_point = asoc->ctsn_ack_point; asoc->highest_sacked = asoc->ctsn_ack_point; asoc->last_cwr_tsn = asoc->ctsn_ack_point; - asoc->unack_data = 0; /* ADDIP Section 4.1 Asconf Chunk Procedures * @@ -237,7 +212,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Make an empty list of remote transport addresses. */ INIT_LIST_HEAD(&asoc->peer.transport_addr_list); - asoc->peer.transport_count = 0; /* RFC 2960 5.1 Normal Establishment of an Association * @@ -251,20 +225,15 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a * already received one packet.] */ asoc->peer.sack_needed = 1; - asoc->peer.sack_cnt = 0; asoc->peer.sack_generation = 1; /* Assume that the peer will tell us if he recognizes ASCONF * as part of INIT exchange. - * The sctp_addip_noauth option is there for backward compatibilty + * The sctp_addip_noauth option is there for backward compatibility * and will revert old behavior. */ - asoc->peer.asconf_capable = 0; if (net->sctp.addip_noauth) asoc->peer.asconf_capable = 1; - asoc->asconf_addr_del_pending = NULL; - asoc->src_out_of_asoc_ok = 0; - asoc->new_transport = NULL; /* Create an input queue. */ sctp_inq_init(&asoc->base.inqueue); @@ -276,12 +245,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a if (!sctp_ulpq_init(&asoc->ulpq, asoc)) goto fail_init; - memset(&asoc->peer.tsn_map, 0, sizeof(struct sctp_tsnmap)); - - asoc->need_ecne = 0; - - asoc->assoc_id = 0; - /* Assume that peer would support both address types unless we are * told otherwise. */ @@ -297,9 +260,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->default_timetolive = sp->default_timetolive; asoc->default_rcv_context = sp->default_rcv_context; - /* SCTP_GET_ASSOC_STATS COUNTERS */ - memset(&asoc->stats, 0, sizeof(struct sctp_priv_assoc_stats)); - /* AUTH related initializations */ INIT_LIST_HEAD(&asoc->endpoint_shared_keys); err = sctp_auth_asoc_copy_shkeys(ep, asoc, gfp); @@ -307,9 +267,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a goto fail_init; asoc->active_key_id = ep->active_key_id; - asoc->asoc_shared_key = NULL; - asoc->default_hmac_id = 0; /* Save the hmacs and chunks list into this association */ if (ep->auth_hmacs_list) memcpy(asoc->c.auth_hmacs, ep->auth_hmacs_list, @@ -994,17 +952,13 @@ int sctp_cmp_addr_exact(const union sctp_addr *ss1, */ struct sctp_chunk *sctp_get_ecne_prepend(struct sctp_association *asoc) { - struct sctp_chunk *chunk; + if (!asoc->need_ecne) + return NULL; /* Send ECNE if needed. * Not being able to allocate a chunk here is not deadly. */ - if (asoc->need_ecne) - chunk = sctp_make_ecne(asoc, asoc->last_ecne_tsn); - else - chunk = NULL; - - return chunk; + return sctp_make_ecne(asoc, asoc->last_ecne_tsn); } /* @@ -1265,7 +1219,7 @@ void sctp_assoc_update(struct sctp_association *asoc, } } - /* SCTP-AUTH: Save the peer parameters from the new assocaitions + /* SCTP-AUTH: Save the peer parameters from the new associations * and also move the association shared keys over */ kfree(asoc->peer.peer_random); @@ -1393,7 +1347,7 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) } /* Should we send a SACK to update our peer? */ -static inline int sctp_peer_needs_update(struct sctp_association *asoc) +static inline bool sctp_peer_needs_update(struct sctp_association *asoc) { struct net *net = sock_net(asoc->base.sk); switch (asoc->state) { @@ -1405,12 +1359,12 @@ static inline int sctp_peer_needs_update(struct sctp_association *asoc) ((asoc->rwnd - asoc->a_rwnd) >= max_t(__u32, (asoc->base.sk->sk_rcvbuf >> net->sctp.rwnd_upd_shift), asoc->pathmtu))) - return 1; + return true; break; default: break; } - return 0; + return false; } /* Increase asoc's rwnd by len and send any window update SACK if needed. */ @@ -1490,7 +1444,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len) /* If we've reached or overflowed our receive buffer, announce * a 0 rwnd if rwnd would still be positive. Store the - * the pottential pressure overflow so that the window can be restored + * the potential pressure overflow so that the window can be restored * back to original value. */ if (rx_count >= asoc->base.sk->sk_rcvbuf) diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 46b5977978a..683c7d1b130 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -16,9 +16,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -42,7 +41,7 @@ static struct sctp_hmac sctp_hmac_list[SCTP_AUTH_NUM_HMACS] = { }, { .hmac_id = SCTP_AUTH_HMAC_ID_SHA1, - .hmac_name="hmac(sha1)", + .hmac_name = "hmac(sha1)", .hmac_len = SCTP_SHA1_SIG_SIZE, }, { @@ -52,7 +51,7 @@ static struct sctp_hmac sctp_hmac_list[SCTP_AUTH_NUM_HMACS] = { #if defined (CONFIG_CRYPTO_SHA256) || defined (CONFIG_CRYPTO_SHA256_MODULE) { .hmac_id = SCTP_AUTH_HMAC_ID_SHA256, - .hmac_name="hmac(sha256)", + .hmac_name = "hmac(sha256)", .hmac_len = SCTP_SHA256_SIG_SIZE, } #endif @@ -164,7 +163,7 @@ static int sctp_auth_compare_vectors(struct sctp_auth_bytes *vector1, * lead-zero padded. If it is not, it * is automatically larger numerically. */ - for (i = 0; i < abs(diff); i++ ) { + for (i = 0; i < abs(diff); i++) { if (longer[i] != 0) return diff; } @@ -227,9 +226,9 @@ static struct sctp_auth_bytes *sctp_auth_make_local_vector( gfp_t gfp) { return sctp_auth_make_key_vector( - (sctp_random_param_t*)asoc->c.auth_random, - (sctp_chunks_param_t*)asoc->c.auth_chunks, - (sctp_hmac_algo_param_t*)asoc->c.auth_hmacs, + (sctp_random_param_t *)asoc->c.auth_random, + (sctp_chunks_param_t *)asoc->c.auth_chunks, + (sctp_hmac_algo_param_t *)asoc->c.auth_hmacs, gfp); } @@ -500,8 +499,7 @@ void sctp_auth_destroy_hmacs(struct crypto_hash *auth_hmacs[]) if (!auth_hmacs) return; - for (i = 0; i < SCTP_AUTH_NUM_HMACS; i++) - { + for (i = 0; i < SCTP_AUTH_NUM_HMACS; i++) { if (auth_hmacs[i]) crypto_free_hash(auth_hmacs[i]); } @@ -648,15 +646,15 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param) */ for (i = 0; !found && i < len; i++) { switch (param->chunks[i]) { - case SCTP_CID_INIT: - case SCTP_CID_INIT_ACK: - case SCTP_CID_SHUTDOWN_COMPLETE: - case SCTP_CID_AUTH: + case SCTP_CID_INIT: + case SCTP_CID_INIT_ACK: + case SCTP_CID_SHUTDOWN_COMPLETE: + case SCTP_CID_AUTH: break; - default: + default: if (param->chunks[i] == chunk) - found = 1; + found = 1; break; } } diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 077bb070052..871cdf9567e 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -21,9 +21,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index f2044fcb9dd..158701da2d3 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -18,9 +18,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -255,7 +254,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, SCTP_INC_STATS_USER(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS); /* Create chunks for all the full sized DATA chunks. */ - for (i=0, len=first_len; i < whole; i++) { + for (i = 0, len = first_len; i < whole; i++) { frag = SCTP_DATA_MIDDLE_FRAG; if (0 == i) @@ -318,7 +317,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, goto errout; } - err = sctp_user_addto_chunk(chunk, offset, over,msgh->msg_iov); + err = sctp_user_addto_chunk(chunk, offset, over, msgh->msg_iov); /* Put the chunk->skb back into the form expected by send. */ __skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr diff --git a/net/sctp/command.c b/net/sctp/command.c index 3d9a9ff69c0..dd737585161 100644 --- a/net/sctp/command.c +++ b/net/sctp/command.c @@ -19,9 +19,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/net/sctp/debug.c b/net/sctp/debug.c index e89015d8935..95d7b15dad2 100644 --- a/net/sctp/debug.c +++ b/net/sctp/debug.c @@ -22,9 +22,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 09b8daac87c..8e5fdea0521 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -23,9 +23,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -369,9 +368,9 @@ struct sctp_association *sctp_endpoint_lookup_assoc( { struct sctp_association *asoc; - sctp_local_bh_disable(); + local_bh_disable(); asoc = __sctp_endpoint_lookup_assoc(ep, paddr, transport); - sctp_local_bh_enable(); + local_bh_enable(); return asoc; } diff --git a/net/sctp/input.c b/net/sctp/input.c index 98b69bbecdd..f2e2cbd2d75 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -23,9 +23,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -120,7 +119,7 @@ int sctp_rcv(struct sk_buff *skb) struct sctp_af *af; struct net *net = dev_net(skb->dev); - if (skb->pkt_type!=PACKET_HOST) + if (skb->pkt_type != PACKET_HOST) goto discard_it; SCTP_INC_STATS_BH(net, SCTP_MIB_INSCTPPACKS); @@ -181,8 +180,7 @@ int sctp_rcv(struct sk_buff *skb) * If a frame arrives on an interface and the receiving socket is * bound to another interface, via SO_BINDTODEVICE, treat it as OOTB */ - if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) - { + if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) { if (asoc) { sctp_association_put(asoc); asoc = NULL; @@ -240,7 +238,7 @@ int sctp_rcv(struct sk_buff *skb) * bottom halves on this lock, but a user may be in the lock too, * so check if it is busy. */ - sctp_bh_lock_sock(sk); + bh_lock_sock(sk); if (sk != rcvr->sk) { /* Our cached sk is different from the rcvr->sk. This is @@ -250,14 +248,14 @@ int sctp_rcv(struct sk_buff *skb) * be doing something with the new socket. Switch our veiw * of the current sk. */ - sctp_bh_unlock_sock(sk); + bh_unlock_sock(sk); sk = rcvr->sk; - sctp_bh_lock_sock(sk); + bh_lock_sock(sk); } if (sock_owned_by_user(sk)) { if (sctp_add_backlog(sk, skb)) { - sctp_bh_unlock_sock(sk); + bh_unlock_sock(sk); sctp_chunk_free(chunk); skb = NULL; /* sctp_chunk_free already freed the skb */ goto discard_release; @@ -268,7 +266,7 @@ int sctp_rcv(struct sk_buff *skb) sctp_inq_push(&chunk->rcvr->inqueue, chunk); } - sctp_bh_unlock_sock(sk); + bh_unlock_sock(sk); /* Release the asoc/ep ref we took in the lookup calls. */ if (asoc) @@ -329,7 +327,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) */ sk = rcvr->sk; - sctp_bh_lock_sock(sk); + bh_lock_sock(sk); if (sock_owned_by_user(sk)) { if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) @@ -339,7 +337,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) } else sctp_inq_push(inqueue, chunk); - sctp_bh_unlock_sock(sk); + bh_unlock_sock(sk); /* If the chunk was backloged again, don't drop refs */ if (backloged) @@ -524,7 +522,7 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, goto out; } - sctp_bh_lock_sock(sk); + bh_lock_sock(sk); /* If too many ICMPs get dropped on busy * servers this needs to be solved differently. @@ -537,17 +535,15 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, return sk; out: - if (asoc) - sctp_association_put(asoc); + sctp_association_put(asoc); return NULL; } /* Common cleanup code for icmp/icmpv6 error handler. */ void sctp_err_finish(struct sock *sk, struct sctp_association *asoc) { - sctp_bh_unlock_sock(sk); - if (asoc) - sctp_association_put(asoc); + bh_unlock_sock(sk); + sctp_association_put(asoc); } /* @@ -613,8 +609,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) if (ICMP_FRAG_NEEDED == code) { sctp_icmp_frag_needed(sk, asoc, transport, info); goto out_unlock; - } - else { + } else { if (ICMP_PROT_UNREACH == code) { sctp_icmp_proto_unreachable(sk, asoc, transport); @@ -723,17 +718,17 @@ static void __sctp_hash_endpoint(struct sctp_endpoint *ep) epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port); head = &sctp_ep_hashtable[epb->hashent]; - sctp_write_lock(&head->lock); + write_lock(&head->lock); hlist_add_head(&epb->node, &head->chain); - sctp_write_unlock(&head->lock); + write_unlock(&head->lock); } /* Add an endpoint to the hash. Local BH-safe. */ void sctp_hash_endpoint(struct sctp_endpoint *ep) { - sctp_local_bh_disable(); + local_bh_disable(); __sctp_hash_endpoint(ep); - sctp_local_bh_enable(); + local_bh_enable(); } /* Remove endpoint from the hash table. */ @@ -749,17 +744,17 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep) head = &sctp_ep_hashtable[epb->hashent]; - sctp_write_lock(&head->lock); + write_lock(&head->lock); hlist_del_init(&epb->node); - sctp_write_unlock(&head->lock); + write_unlock(&head->lock); } /* Remove endpoint from the hash. Local BH-safe. */ void sctp_unhash_endpoint(struct sctp_endpoint *ep) { - sctp_local_bh_disable(); + local_bh_disable(); __sctp_unhash_endpoint(ep); - sctp_local_bh_enable(); + local_bh_enable(); } /* Look up an endpoint. */ @@ -803,9 +798,9 @@ static void __sctp_hash_established(struct sctp_association *asoc) head = &sctp_assoc_hashtable[epb->hashent]; - sctp_write_lock(&head->lock); + write_lock(&head->lock); hlist_add_head(&epb->node, &head->chain); - sctp_write_unlock(&head->lock); + write_unlock(&head->lock); } /* Add an association to the hash. Local BH-safe. */ @@ -814,9 +809,9 @@ void sctp_hash_established(struct sctp_association *asoc) if (asoc->temp) return; - sctp_local_bh_disable(); + local_bh_disable(); __sctp_hash_established(asoc); - sctp_local_bh_enable(); + local_bh_enable(); } /* Remove association from the hash table. */ @@ -833,9 +828,9 @@ static void __sctp_unhash_established(struct sctp_association *asoc) head = &sctp_assoc_hashtable[epb->hashent]; - sctp_write_lock(&head->lock); + write_lock(&head->lock); hlist_del_init(&epb->node); - sctp_write_unlock(&head->lock); + write_unlock(&head->lock); } /* Remove association from the hash table. Local BH-safe. */ @@ -844,9 +839,9 @@ void sctp_unhash_established(struct sctp_association *asoc) if (asoc->temp) return; - sctp_local_bh_disable(); + local_bh_disable(); __sctp_unhash_established(asoc); - sctp_local_bh_enable(); + local_bh_enable(); } /* Look up an association. */ @@ -896,9 +891,9 @@ struct sctp_association *sctp_lookup_association(struct net *net, { struct sctp_association *asoc; - sctp_local_bh_disable(); + local_bh_disable(); asoc = __sctp_lookup_association(net, laddr, paddr, transportp); - sctp_local_bh_enable(); + local_bh_enable(); return asoc; } @@ -1058,31 +1053,31 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, if (ch_end > skb_tail_pointer(skb)) break; - switch(ch->type) { - case SCTP_CID_AUTH: - have_auth = chunk_num; - break; - - case SCTP_CID_COOKIE_ECHO: - /* If a packet arrives containing an AUTH chunk as - * a first chunk, a COOKIE-ECHO chunk as the second - * chunk, and possibly more chunks after them, and - * the receiver does not have an STCB for that - * packet, then authentication is based on - * the contents of the COOKIE- ECHO chunk. - */ - if (have_auth == 1 && chunk_num == 2) - return NULL; - break; - - case SCTP_CID_ASCONF: - if (have_auth || net->sctp.addip_noauth) - asoc = __sctp_rcv_asconf_lookup( - net, ch, laddr, - sctp_hdr(skb)->source, - transportp); - default: - break; + switch (ch->type) { + case SCTP_CID_AUTH: + have_auth = chunk_num; + break; + + case SCTP_CID_COOKIE_ECHO: + /* If a packet arrives containing an AUTH chunk as + * a first chunk, a COOKIE-ECHO chunk as the second + * chunk, and possibly more chunks after them, and + * the receiver does not have an STCB for that + * packet, then authentication is based on + * the contents of the COOKIE- ECHO chunk. + */ + if (have_auth == 1 && chunk_num == 2) + return NULL; + break; + + case SCTP_CID_ASCONF: + if (have_auth || net->sctp.addip_noauth) + asoc = __sctp_rcv_asconf_lookup( + net, ch, laddr, + sctp_hdr(skb)->source, + transportp); + default: + break; } if (asoc) @@ -1119,19 +1114,10 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, return NULL; /* If this is INIT/INIT-ACK look inside the chunk too. */ - switch (ch->type) { - case SCTP_CID_INIT: - case SCTP_CID_INIT_ACK: + if (ch->type == SCTP_CID_INIT || ch->type == SCTP_CID_INIT_ACK) return __sctp_rcv_init_lookup(net, skb, laddr, transportp); - break; - default: - return __sctp_rcv_walk_lookup(net, skb, laddr, transportp); - break; - } - - - return NULL; + return __sctp_rcv_walk_lookup(net, skb, laddr, transportp); } /* Lookup an association for an inbound skb. */ diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 5856932fdc3..4de12afa13d 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -24,9 +24,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 7567e6f1a92..0f6259a6a93 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -21,9 +21,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -173,7 +172,8 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, switch (type) { case ICMPV6_PKT_TOOBIG: - sctp_icmp_frag_needed(sk, asoc, transport, ntohl(info)); + if (ip6_sk_accept_pmtu(sk)) + sctp_icmp_frag_needed(sk, asoc, transport, ntohl(info)); goto out_unlock; case ICMPV6_PARAMPROB: if (ICMPV6_UNK_NEXTHDR == code) { @@ -263,7 +263,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, } final_p = fl6_update_dst(fl6, np->opt, &final); - dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, fl6, final_p); if (!asoc || saddr) goto out; @@ -322,7 +322,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, fl6->saddr = baddr->v6.sin6_addr; fl6->fl6_sport = baddr->v6.sin6_port; final_p = fl6_update_dst(fl6, np->opt, &final); - dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, fl6, final_p); } out: @@ -402,7 +402,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, } /* Initialize a sockaddr_storage from in incoming skb. */ -static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb, +static void sctp_v6_from_skb(union sctp_addr *addr, struct sk_buff *skb, int is_saddr) { __be16 *port; diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c index 647396baa56..40e7fac96c4 100644 --- a/net/sctp/objcnt.c +++ b/net/sctp/objcnt.c @@ -20,9 +20,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -98,7 +97,7 @@ static void sctp_objcnt_seq_stop(struct seq_file *seq, void *v) { } -static void * sctp_objcnt_seq_next(struct seq_file *seq, void *v, loff_t *pos) +static void *sctp_objcnt_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return (*pos >= ARRAY_SIZE(sctp_dbg_objcnt)) ? NULL : (void *)pos; diff --git a/net/sctp/output.c b/net/sctp/output.c index 0fb140f8f08..0f4d15fc262 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -20,9 +20,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -281,7 +280,7 @@ static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet, /* We believe that this chunk is OK to add to the packet */ switch (chunk->chunk_hdr->type) { - case SCTP_CID_DATA: + case SCTP_CID_DATA: /* Account for the data being in the packet */ sctp_packet_append_data(packet, chunk); /* Disallow SACK bundling after DATA. */ @@ -293,17 +292,17 @@ static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet, /* timestamp the chunk for rtx purposes */ chunk->sent_at = jiffies; break; - case SCTP_CID_COOKIE_ECHO: + case SCTP_CID_COOKIE_ECHO: packet->has_cookie_echo = 1; break; - case SCTP_CID_SACK: + case SCTP_CID_SACK: packet->has_sack = 1; if (chunk->asoc) chunk->asoc->stats.osacks++; break; - case SCTP_CID_AUTH: + case SCTP_CID_AUTH: packet->has_auth = 1; packet->auth = chunk; break; @@ -388,7 +387,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) int err = 0; int padding; /* How much padding do we need? */ __u8 has_data = 0; - struct dst_entry *dst = tp->dst; + struct dst_entry *dst; unsigned char *auth = NULL; /* pointer to auth in skb data */ pr_debug("%s: packet:%p\n", __func__, packet); @@ -421,9 +420,9 @@ int sctp_packet_transmit(struct sctp_packet *packet) } } dst = dst_clone(tp->dst); - skb_dst_set(nskb, dst); if (!dst) goto no_route; + skb_dst_set(nskb, dst); /* Build the SCTP header. */ sh = (struct sctphdr *)skb_push(nskb, sizeof(struct sctphdr)); @@ -541,8 +540,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) } else { /* no need to seed pseudo checksum for SCTP */ nskb->ip_summed = CHECKSUM_PARTIAL; - nskb->csum_start = (skb_transport_header(nskb) - - nskb->head); + nskb->csum_start = skb_transport_header(nskb) - nskb->head; nskb->csum_offset = offsetof(struct sctphdr, checksum); } } @@ -559,7 +557,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) * Note: The works for IPv6 layer checks this bit too later * in transmission. See IP6_ECN_flow_xmit(). */ - (*tp->af_specific->ecn_capable)(nskb->sk); + tp->af_specific->ecn_capable(nskb->sk); /* Set up the IP options. */ /* BUG: not implemented @@ -594,7 +592,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len); nskb->local_df = packet->ipfragok; - (*tp->af_specific->sctp_xmit)(nskb, tp); + tp->af_specific->sctp_xmit(nskb, tp); out: sctp_packet_reset(packet); diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 59268f6e2c3..9c77947c059 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -22,9 +22,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -111,7 +110,7 @@ static inline int sctp_cacc_skip_3_1_d(struct sctp_transport *primary, struct sctp_transport *transport, int count_of_newacks) { - if (count_of_newacks >=2 && transport != primary) + if (count_of_newacks >= 2 && transport != primary) return 1; return 0; } @@ -468,7 +467,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, struct net *net = sock_net(q->asoc->base.sk); int error = 0; - switch(reason) { + switch (reason) { case SCTP_RTXR_T3_RTX: SCTP_INC_STATS(net, SCTP_MIB_T3_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX); @@ -1083,7 +1082,7 @@ sctp_flush_out: * * --xguo */ - while ((ltransport = sctp_list_dequeue(&transport_list)) != NULL ) { + while ((ltransport = sctp_list_dequeue(&transport_list)) != NULL) { struct sctp_transport *t = list_entry(ltransport, struct sctp_transport, send_ready); @@ -1212,7 +1211,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) * destinations for which cacc_saw_newack is set. */ if (transport->cacc.cacc_saw_newack) - count_of_newacks ++; + count_of_newacks++; } /* Move the Cumulative TSN Ack Point if appropriate. */ diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c index ce1ffd81177..ab8d9f96a17 100644 --- a/net/sctp/primitive.c +++ b/net/sctp/primitive.c @@ -23,9 +23,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 0c064215684..0947f1e15eb 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -16,9 +16,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -178,7 +177,7 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa rcu_read_unlock(); } -static void * sctp_eps_seq_start(struct seq_file *seq, loff_t *pos) +static void *sctp_eps_seq_start(struct seq_file *seq, loff_t *pos) { if (*pos >= sctp_ep_hashsize) return NULL; @@ -197,7 +196,7 @@ static void sctp_eps_seq_stop(struct seq_file *seq, void *v) } -static void * sctp_eps_seq_next(struct seq_file *seq, void *v, loff_t *pos) +static void *sctp_eps_seq_next(struct seq_file *seq, void *v, loff_t *pos) { if (++*pos >= sctp_ep_hashsize) return NULL; @@ -219,7 +218,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) return -ENOMEM; head = &sctp_ep_hashtable[hash]; - sctp_local_bh_disable(); + local_bh_disable(); read_lock(&head->lock); sctp_for_each_hentry(epb, &head->chain) { ep = sctp_ep(epb); @@ -236,7 +235,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "\n"); } read_unlock(&head->lock); - sctp_local_bh_enable(); + local_bh_enable(); return 0; } @@ -283,7 +282,7 @@ void sctp_eps_proc_exit(struct net *net) } -static void * sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos) +static void *sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos) { if (*pos >= sctp_assoc_hashsize) return NULL; @@ -306,7 +305,7 @@ static void sctp_assocs_seq_stop(struct seq_file *seq, void *v) } -static void * sctp_assocs_seq_next(struct seq_file *seq, void *v, loff_t *pos) +static void *sctp_assocs_seq_next(struct seq_file *seq, void *v, loff_t *pos) { if (++*pos >= sctp_assoc_hashsize) return NULL; @@ -327,7 +326,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) return -ENOMEM; head = &sctp_assoc_hashtable[hash]; - sctp_local_bh_disable(); + local_bh_disable(); read_lock(&head->lock); sctp_for_each_hentry(epb, &head->chain) { assoc = sctp_assoc(epb); @@ -363,7 +362,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "\n"); } read_unlock(&head->lock); - sctp_local_bh_enable(); + local_bh_enable(); return 0; } @@ -447,7 +446,7 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) return -ENOMEM; head = &sctp_assoc_hashtable[hash]; - sctp_local_bh_disable(); + local_bh_disable(); read_lock(&head->lock); rcu_read_lock(); sctp_for_each_hentry(epb, &head->chain) { @@ -506,7 +505,7 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) rcu_read_unlock(); read_unlock(&head->lock); - sctp_local_bh_enable(); + local_bh_enable(); return 0; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 5e17092f4ad..4e1d0fcb028 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -23,9 +23,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -635,10 +634,10 @@ static void sctp_addr_wq_timeout_handler(unsigned long arg) /* ignore bound-specific endpoints */ if (!sctp_is_ep_boundall(sk)) continue; - sctp_bh_lock_sock(sk); + bh_lock_sock(sk); if (sctp_asconf_mgmt(sp, addrw) < 0) pr_debug("%s: sctp_asconf_mgmt failed\n", __func__); - sctp_bh_unlock_sock(sk); + bh_unlock_sock(sk); } #if IS_ENABLED(CONFIG_IPV6) free_next: @@ -1031,6 +1030,7 @@ static const struct net_protocol sctp_protocol = { .err_handler = sctp_v4_err, .no_policy = 1, .netns_ok = 1, + .icmp_strict_tag_validation = 1, }; /* IPv4 address related functions. */ @@ -1066,8 +1066,8 @@ static struct sctp_af sctp_af_inet = { #endif }; -struct sctp_pf *sctp_get_pf_specific(sa_family_t family) { - +struct sctp_pf *sctp_get_pf_specific(sa_family_t family) +{ switch (family) { case PF_INET: return sctp_pf_inet_specific; @@ -1461,7 +1461,6 @@ static __init int sctp_init(void) if (status) goto err_v6_add_protocol; - status = 0; out: return status; err_v6_add_protocol: diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index fe690320b1e..632090b961c 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -23,9 +23,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -79,6 +78,8 @@ static int sctp_process_param(struct sctp_association *asoc, gfp_t gfp); static void *sctp_addto_param(struct sctp_chunk *chunk, int len, const void *data); +static void *sctp_addto_chunk_fixed(struct sctp_chunk *, int len, + const void *data); /* Control chunk destructor */ static void sctp_control_release_owner(struct sk_buff *skb) @@ -1476,8 +1477,8 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data) /* Append bytes to the end of a chunk. Returns NULL if there isn't sufficient * space in the chunk */ -void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk, - int len, const void *data) +static void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk, + int len, const void *data) { if (skb_tailroom(chunk->skb) >= len) return sctp_addto_chunk(chunk, len, data); @@ -1968,13 +1969,13 @@ static int sctp_verify_ext_param(struct net *net, union sctp_params param) for (i = 0; i < num_ext; i++) { switch (param.ext->chunks[i]) { - case SCTP_CID_AUTH: - have_auth = 1; - break; - case SCTP_CID_ASCONF: - case SCTP_CID_ASCONF_ACK: - have_asconf = 1; - break; + case SCTP_CID_AUTH: + have_auth = 1; + break; + case SCTP_CID_ASCONF: + case SCTP_CID_ASCONF_ACK: + have_asconf = 1; + break; } } @@ -2001,25 +2002,24 @@ static void sctp_process_ext_param(struct sctp_association *asoc, for (i = 0; i < num_ext; i++) { switch (param.ext->chunks[i]) { - case SCTP_CID_FWD_TSN: - if (net->sctp.prsctp_enable && - !asoc->peer.prsctp_capable) + case SCTP_CID_FWD_TSN: + if (net->sctp.prsctp_enable && !asoc->peer.prsctp_capable) asoc->peer.prsctp_capable = 1; - break; - case SCTP_CID_AUTH: - /* if the peer reports AUTH, assume that he - * supports AUTH. - */ - if (net->sctp.auth_enable) - asoc->peer.auth_capable = 1; - break; - case SCTP_CID_ASCONF: - case SCTP_CID_ASCONF_ACK: - if (net->sctp.addip_enable) - asoc->peer.asconf_capable = 1; - break; - default: - break; + break; + case SCTP_CID_AUTH: + /* if the peer reports AUTH, assume that he + * supports AUTH. + */ + if (net->sctp.auth_enable) + asoc->peer.auth_capable = 1; + break; + case SCTP_CID_ASCONF: + case SCTP_CID_ASCONF_ACK: + if (net->sctp.addip_enable) + asoc->peer.asconf_capable = 1; + break; + default: + break; } } } @@ -2252,7 +2252,7 @@ int sctp_verify_init(struct net *net, const struct sctp_association *asoc, * VIOLATION error. We build the ERROR chunk here and let the normal * error handling code build and send the packet. */ - if (param.v != (void*)chunk->chunk_end) + if (param.v != (void *)chunk->chunk_end) return sctp_process_inv_paramlength(asoc, param.p, chunk, errp); /* The only missing mandatory param possible today is @@ -2267,14 +2267,14 @@ int sctp_verify_init(struct net *net, const struct sctp_association *asoc, result = sctp_verify_param(net, asoc, param, cid, chunk, errp); switch (result) { - case SCTP_IERROR_ABORT: - case SCTP_IERROR_NOMEM: - return 0; - case SCTP_IERROR_ERROR: - return 1; - case SCTP_IERROR_NO_ERROR: - default: - break; + case SCTP_IERROR_ABORT: + case SCTP_IERROR_NOMEM: + return 0; + case SCTP_IERROR_ERROR: + return 1; + case SCTP_IERROR_NO_ERROR: + default: + break; } } /* for (loop through all parameters) */ @@ -2309,7 +2309,7 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, * added as the primary transport. The source address seems to * be a a better choice than any of the embedded addresses. */ - if(!sctp_assoc_add_peer(asoc, peer_addr, gfp, SCTP_ACTIVE)) + if (!sctp_assoc_add_peer(asoc, peer_addr, gfp, SCTP_ACTIVE)) goto nomem; if (sctp_cmp_addr_exact(sctp_source(chunk), peer_addr)) @@ -3335,7 +3335,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, while (asconf_ack_len > 0) { if (asconf_ack_param->crr_id == asconf_param->crr_id) { - switch(asconf_ack_param->param_hdr.type) { + switch (asconf_ack_param->param_hdr.type) { case SCTP_PARAM_SUCCESS_REPORT: return SCTP_ERROR_NO_ERROR; case SCTP_PARAM_ERR_CAUSE: diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 1a6eef39ab2..bd859154000 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -22,9 +22,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -249,7 +248,7 @@ void sctp_generate_t3_rtx_event(unsigned long peer) /* Check whether a task is in the sock. */ - sctp_bh_lock_sock(asoc->base.sk); + bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { pr_debug("%s: sock is busy\n", __func__); @@ -276,7 +275,7 @@ void sctp_generate_t3_rtx_event(unsigned long peer) asoc->base.sk->sk_err = -error; out_unlock: - sctp_bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(asoc->base.sk); sctp_transport_put(transport); } @@ -289,7 +288,7 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc, struct net *net = sock_net(asoc->base.sk); int error = 0; - sctp_bh_lock_sock(asoc->base.sk); + bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { pr_debug("%s: sock is busy: timer %d\n", __func__, timeout_type); @@ -316,7 +315,7 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc, asoc->base.sk->sk_err = -error; out_unlock: - sctp_bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(asoc->base.sk); sctp_association_put(asoc); } @@ -368,7 +367,7 @@ void sctp_generate_heartbeat_event(unsigned long data) struct sctp_association *asoc = transport->asoc; struct net *net = sock_net(asoc->base.sk); - sctp_bh_lock_sock(asoc->base.sk); + bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { pr_debug("%s: sock is busy\n", __func__); @@ -393,7 +392,7 @@ void sctp_generate_heartbeat_event(unsigned long data) asoc->base.sk->sk_err = -error; out_unlock: - sctp_bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(asoc->base.sk); sctp_transport_put(transport); } @@ -405,8 +404,8 @@ void sctp_generate_proto_unreach_event(unsigned long data) struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; struct net *net = sock_net(asoc->base.sk); - - sctp_bh_lock_sock(asoc->base.sk); + + bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { pr_debug("%s: sock is busy\n", __func__); @@ -428,7 +427,7 @@ void sctp_generate_proto_unreach_event(unsigned long data) asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); out_unlock: - sctp_bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(asoc->base.sk); sctp_association_put(asoc); } @@ -544,7 +543,7 @@ static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands, { struct sctp_ulpevent *event; - event = sctp_ulpevent_make_assoc_change(asoc,0, SCTP_CANT_STR_ASSOC, + event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_CANT_STR_ASSOC, (__u16)error, 0, 0, NULL, GFP_ATOMIC); @@ -1116,7 +1115,7 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, sctp_init_cmd_seq(&commands); debug_pre_sfn(); - status = (*state_fn->fn)(net, ep, asoc, subtype, event_arg, &commands); + status = state_fn->fn(net, ep, asoc, subtype, event_arg, &commands); debug_post_sfn(); error = sctp_side_effects(event_type, subtype, state, diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index a26065be728..483dcd71b3c 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -22,9 +22,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -2946,7 +2945,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net, return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); - error = sctp_eat_data(asoc, chunk, commands ); + error = sctp_eat_data(asoc, chunk, commands); switch (error) { case SCTP_IERROR_NO_ERROR: break; @@ -3067,7 +3066,7 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net, return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); - error = sctp_eat_data(asoc, chunk, commands ); + error = sctp_eat_data(asoc, chunk, commands); switch (error) { case SCTP_IERROR_NO_ERROR: case SCTP_IERROR_HIGH_TSN: @@ -3682,8 +3681,7 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net, asconf_ack->dest = chunk->source; sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(asconf_ack)); if (asoc->new_transport) { - sctp_sf_heartbeat(ep, asoc, type, asoc->new_transport, - commands); + sctp_sf_heartbeat(ep, asoc, type, asoc->new_transport, commands); ((struct sctp_association *)asoc)->new_transport = NULL; } @@ -3766,7 +3764,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); - sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET,SCTP_NULL()); + sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, @@ -3800,7 +3798,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, /* We are going to ABORT, so we might as well stop * processing the rest of the chunks in the packet. */ - sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET,SCTP_NULL()); + sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, @@ -4452,7 +4450,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen( void *arg, sctp_cmd_seq_t *commands) { - static const char err_str[]="The following chunk had invalid length:"; + static const char err_str[] = "The following chunk had invalid length:"; return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str, sizeof(err_str)); @@ -4515,7 +4513,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn( void *arg, sctp_cmd_seq_t *commands) { - static const char err_str[]="The cumulative tsn ack beyond the max tsn currently sent:"; + static const char err_str[] = "The cumulative tsn ack beyond the max tsn currently sent:"; return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str, sizeof(err_str)); @@ -4535,7 +4533,7 @@ static sctp_disposition_t sctp_sf_violation_chunk( void *arg, sctp_cmd_seq_t *commands) { - static const char err_str[]="The following chunk violates protocol:"; + static const char err_str[] = "The following chunk violates protocol:"; if (!asoc) return sctp_sf_violation(net, ep, asoc, type, arg, commands); @@ -4611,7 +4609,7 @@ sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net, sctp_cmd_seq_t *commands) { struct sctp_chunk *repl; - struct sctp_association* my_asoc; + struct sctp_association *my_asoc; /* The comment below says that we enter COOKIE-WAIT AFTER * sending the INIT, but that doesn't actually work in our @@ -6001,7 +5999,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, /* Special case the INIT-ACK as there is no peer's vtag * yet. */ - switch(chunk->chunk_hdr->type) { + switch (chunk->chunk_hdr->type) { case SCTP_CID_INIT_ACK: { sctp_initack_chunk_t *initack; @@ -6018,7 +6016,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, /* Special case the INIT and stale COOKIE_ECHO as there is no * vtag yet. */ - switch(chunk->chunk_hdr->type) { + switch (chunk->chunk_hdr->type) { case SCTP_CID_INIT: { sctp_init_chunk_t *init; @@ -6208,7 +6206,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, */ if (*sk->sk_prot_creator->memory_pressure) { if (sctp_tsnmap_has_gap(map) && - (sctp_tsnmap_get_ctsn(map) + 1) == tsn) { + (sctp_tsnmap_get_ctsn(map) + 1) == tsn) { pr_debug("%s: under pressure, reneging for tsn:%u\n", __func__, tsn); deliver = SCTP_CMD_RENEGE; @@ -6232,7 +6230,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, /* We are going to ABORT, so we might as well stop * processing the rest of the chunks in the packet. */ - sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET,SCTP_NULL()); + sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index c5999b2dde7..a987d54b379 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -22,9 +22,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -70,7 +69,7 @@ static const sctp_sm_table_entry_t bug = { if ((event_subtype._type > (_max))) { \ pr_warn("table %p possible attack: event %d exceeds max %d\n", \ _table, event_subtype._type, _max); \ - rtn = &bug; \ + rtn = &bug; \ } else \ rtn = &_table[event_subtype._type][(int)state]; \ \ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 42b709c95cf..9e91d6e5df6 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -28,9 +28,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -83,7 +82,7 @@ static int sctp_writeable(struct sock *sk); static void sctp_wfree(struct sk_buff *skb); static int sctp_wait_for_sndbuf(struct sctp_association *, long *timeo_p, size_t msg_len); -static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p); +static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p); static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); static int sctp_wait_for_accept(struct sock *sk, long timeo); static void sctp_wait_for_close(struct sock *sk, long timeo); @@ -273,7 +272,7 @@ static int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len) { int retval = 0; - sctp_lock_sock(sk); + lock_sock(sk); pr_debug("%s: sk:%p, addr:%p, addr_len:%d\n", __func__, sk, addr, addr_len); @@ -285,7 +284,7 @@ static int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len) else retval = -EINVAL; - sctp_release_sock(sk); + release_sock(sk); return retval; } @@ -953,7 +952,7 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw) * * Returns 0 if ok, <0 errno code on error. */ -static int sctp_setsockopt_bindx(struct sock* sk, +static int sctp_setsockopt_bindx(struct sock *sk, struct sockaddr __user *addrs, int addrs_size, int op) { @@ -1040,7 +1039,7 @@ out: * Common routine for handling connect() and sctp_connectx(). * Connect will come in with just a single address. */ -static int __sctp_connect(struct sock* sk, +static int __sctp_connect(struct sock *sk, struct sockaddr *kaddrs, int addrs_size, sctp_assoc_t *assoc_id) @@ -1300,7 +1299,7 @@ out_free: * * Returns >=0 if ok, <0 errno code on error. */ -static int __sctp_setsockopt_connectx(struct sock* sk, +static int __sctp_setsockopt_connectx(struct sock *sk, struct sockaddr __user *addrs, int addrs_size, sctp_assoc_t *assoc_id) @@ -1338,7 +1337,7 @@ static int __sctp_setsockopt_connectx(struct sock* sk, * This is an older interface. It's kept for backward compatibility * to the option that doesn't provide association id. */ -static int sctp_setsockopt_connectx_old(struct sock* sk, +static int sctp_setsockopt_connectx_old(struct sock *sk, struct sockaddr __user *addrs, int addrs_size) { @@ -1351,7 +1350,7 @@ static int sctp_setsockopt_connectx_old(struct sock* sk, * indication to the call. Error is always negative and association id is * always positive. */ -static int sctp_setsockopt_connectx(struct sock* sk, +static int sctp_setsockopt_connectx(struct sock *sk, struct sockaddr __user *addrs, int addrs_size) { @@ -1374,7 +1373,7 @@ static int sctp_setsockopt_connectx(struct sock* sk, * addrs_num structure member. That way we can re-use the existing * code. */ -static int sctp_getsockopt_connectx3(struct sock* sk, int len, +static int sctp_getsockopt_connectx3(struct sock *sk, int len, char __user *optval, int __user *optlen) { @@ -1462,7 +1461,7 @@ static void sctp_close(struct sock *sk, long timeout) pr_debug("%s: sk:%p, timeout:%ld\n", __func__, sk, timeout); - sctp_lock_sock(sk); + lock_sock(sk); sk->sk_shutdown = SHUTDOWN_MASK; sk->sk_state = SCTP_SS_CLOSING; @@ -1506,13 +1505,13 @@ static void sctp_close(struct sock *sk, long timeout) sctp_wait_for_close(sk, timeout); /* This will run the backlog queue. */ - sctp_release_sock(sk); + release_sock(sk); /* Supposedly, no process has access to the socket, but * the net layers still may. */ - sctp_local_bh_disable(); - sctp_bh_lock_sock(sk); + local_bh_disable(); + bh_lock_sock(sk); /* Hold the sock, since sk_common_release() will put sock_put() * and we have just a little more cleanup. @@ -1520,8 +1519,8 @@ static void sctp_close(struct sock *sk, long timeout) sock_hold(sk); sk_common_release(sk); - sctp_bh_unlock_sock(sk); - sctp_local_bh_enable(); + bh_unlock_sock(sk); + local_bh_enable(); sock_put(sk); @@ -1569,7 +1568,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_endpoint *ep; - struct sctp_association *new_asoc=NULL, *asoc=NULL; + struct sctp_association *new_asoc = NULL, *asoc = NULL; struct sctp_transport *transport, *chunk_tp; struct sctp_chunk *chunk; union sctp_addr to; @@ -1666,7 +1665,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, pr_debug("%s: about to look up association\n", __func__); - sctp_lock_sock(sk); + lock_sock(sk); /* If a msg_name has been specified, assume this is to be used. */ if (msg_name) { @@ -1744,7 +1743,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, * either the default or the user specified stream counts. */ if (sinfo) { - if (!sinit || (sinit && !sinit->sinit_num_ostreams)) { + if (!sinit || !sinit->sinit_num_ostreams) { /* Check against the defaults. */ if (sinfo->sinfo_stream >= sp->initmsg.sinit_num_ostreams) { @@ -1950,7 +1949,7 @@ out_free: sctp_association_free(asoc); } out_unlock: - sctp_release_sock(sk); + release_sock(sk); out_nounlock: return sctp_error(sk, msg_flags, err); @@ -2036,7 +2035,7 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, "addr_len:%p)\n", __func__, sk, msg, len, noblock, flags, addr_len); - sctp_lock_sock(sk); + lock_sock(sk); if (sctp_style(sk, TCP) && !sctp_sstate(sk, ESTABLISHED)) { err = -ENOTCONN; @@ -2120,7 +2119,7 @@ out_free: sctp_ulpevent_free(event); } out: - sctp_release_sock(sk); + release_sock(sk); return err; } @@ -2463,7 +2462,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk, int hb_change, pmtud_change, sackdelay_change; if (optlen != sizeof(struct sctp_paddrparams)) - return - EINVAL; + return -EINVAL; if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; @@ -2484,7 +2483,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk, /* If an address other than INADDR_ANY is specified, and * no transport is found, then the request is invalid. */ - if (!sctp_is_any(sk, ( union sctp_addr *)¶ms.spp_address)) { + if (!sctp_is_any(sk, (union sctp_addr *)¶ms.spp_address)) { trans = sctp_addr_id2transport(sk, ¶ms.spp_address, params.spp_assoc_id); if (!trans) @@ -2528,6 +2527,16 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk, return 0; } +static inline __u32 sctp_spp_sackdelay_enable(__u32 param_flags) +{ + return (param_flags & ~SPP_SACKDELAY) | SPP_SACKDELAY_ENABLE; +} + +static inline __u32 sctp_spp_sackdelay_disable(__u32 param_flags) +{ + return (param_flags & ~SPP_SACKDELAY) | SPP_SACKDELAY_DISABLE; +} + /* * 7.1.23. Get or set delayed ack timer (SCTP_DELAYED_SACK) * @@ -2579,8 +2588,11 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk, if (params.sack_delay == 0 && params.sack_freq == 0) return 0; } else if (optlen == sizeof(struct sctp_assoc_value)) { - pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n"); - pr_warn("Use struct sctp_sack_info instead\n"); + pr_warn_ratelimited(DEPRECATED + "%s (pid %d) " + "Use of struct sctp_assoc_value in delayed_ack socket option.\n" + "Use struct sctp_sack_info instead\n", + current->comm, task_pid_nr(current)); if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; @@ -2589,7 +2601,7 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk, else params.sack_freq = 0; } else - return - EINVAL; + return -EINVAL; /* Validate value parameter. */ if (params.sack_delay > 500) @@ -2608,37 +2620,31 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk, asoc->sackdelay = msecs_to_jiffies(params.sack_delay); asoc->param_flags = - (asoc->param_flags & ~SPP_SACKDELAY) | - SPP_SACKDELAY_ENABLE; + sctp_spp_sackdelay_enable(asoc->param_flags); } else { sp->sackdelay = params.sack_delay; sp->param_flags = - (sp->param_flags & ~SPP_SACKDELAY) | - SPP_SACKDELAY_ENABLE; + sctp_spp_sackdelay_enable(sp->param_flags); } } if (params.sack_freq == 1) { if (asoc) { asoc->param_flags = - (asoc->param_flags & ~SPP_SACKDELAY) | - SPP_SACKDELAY_DISABLE; + sctp_spp_sackdelay_disable(asoc->param_flags); } else { sp->param_flags = - (sp->param_flags & ~SPP_SACKDELAY) | - SPP_SACKDELAY_DISABLE; + sctp_spp_sackdelay_disable(sp->param_flags); } } else if (params.sack_freq > 1) { if (asoc) { asoc->sackfreq = params.sack_freq; asoc->param_flags = - (asoc->param_flags & ~SPP_SACKDELAY) | - SPP_SACKDELAY_ENABLE; + sctp_spp_sackdelay_enable(asoc->param_flags); } else { sp->sackfreq = params.sack_freq; sp->param_flags = - (sp->param_flags & ~SPP_SACKDELAY) | - SPP_SACKDELAY_ENABLE; + sctp_spp_sackdelay_enable(sp->param_flags); } } @@ -2650,18 +2656,15 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk, trans->sackdelay = msecs_to_jiffies(params.sack_delay); trans->param_flags = - (trans->param_flags & ~SPP_SACKDELAY) | - SPP_SACKDELAY_ENABLE; + sctp_spp_sackdelay_enable(trans->param_flags); } if (params.sack_freq == 1) { trans->param_flags = - (trans->param_flags & ~SPP_SACKDELAY) | - SPP_SACKDELAY_DISABLE; + sctp_spp_sackdelay_disable(trans->param_flags); } else if (params.sack_freq > 1) { trans->sackfreq = params.sack_freq; trans->param_flags = - (trans->param_flags & ~SPP_SACKDELAY) | - SPP_SACKDELAY_ENABLE; + sctp_spp_sackdelay_enable(trans->param_flags); } } } @@ -2995,8 +2998,11 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int val; if (optlen == sizeof(int)) { - pr_warn("Use of int in maxseg socket option deprecated\n"); - pr_warn("Use struct sctp_assoc_value instead\n"); + pr_warn_ratelimited(DEPRECATED + "%s (pid %d) " + "Use of int in maxseg socket option.\n" + "Use struct sctp_assoc_value instead\n", + current->comm, task_pid_nr(current)); if (copy_from_user(&val, optval, optlen)) return -EFAULT; params.assoc_id = 0; @@ -3253,8 +3259,11 @@ static int sctp_setsockopt_maxburst(struct sock *sk, int assoc_id = 0; if (optlen == sizeof(int)) { - pr_warn("Use of int in max_burst socket option deprecated\n"); - pr_warn("Use struct sctp_assoc_value instead\n"); + pr_warn_ratelimited(DEPRECATED + "%s (pid %d) " + "Use of int in max_burst socket option deprecated.\n" + "Use struct sctp_assoc_value instead\n", + current->comm, task_pid_nr(current)); if (copy_from_user(&val, optval, optlen)) return -EFAULT; } else if (optlen == sizeof(struct sctp_assoc_value)) { @@ -3333,7 +3342,7 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk, if (optlen < sizeof(struct sctp_hmacalgo)) return -EINVAL; - hmacs= memdup_user(optval, optlen); + hmacs = memdup_user(optval, optlen); if (IS_ERR(hmacs)) return PTR_ERR(hmacs); @@ -3371,7 +3380,7 @@ static int sctp_setsockopt_auth_key(struct sock *sk, if (optlen <= sizeof(struct sctp_authkey)) return -EINVAL; - authkey= memdup_user(optval, optlen); + authkey = memdup_user(optval, optlen); if (IS_ERR(authkey)) return PTR_ERR(authkey); @@ -3581,7 +3590,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, goto out_nounlock; } - sctp_lock_sock(sk); + lock_sock(sk); switch (optname) { case SCTP_SOCKOPT_BINDX_ADD: @@ -3699,7 +3708,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, break; } - sctp_release_sock(sk); + release_sock(sk); out_nounlock: return retval; @@ -3727,7 +3736,7 @@ static int sctp_connect(struct sock *sk, struct sockaddr *addr, int err = 0; struct sctp_af *af; - sctp_lock_sock(sk); + lock_sock(sk); pr_debug("%s: sk:%p, sockaddr:%p, addr_len:%d\n", __func__, sk, addr, addr_len); @@ -3743,7 +3752,7 @@ static int sctp_connect(struct sock *sk, struct sockaddr *addr, err = __sctp_connect(sk, addr, af->sockaddr_len, NULL); } - sctp_release_sock(sk); + release_sock(sk); return err; } @@ -3769,7 +3778,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err) long timeo; int error = 0; - sctp_lock_sock(sk); + lock_sock(sk); sp = sctp_sk(sk); ep = sp->ep; @@ -3807,7 +3816,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err) sctp_sock_migrate(sk, newsk, asoc, SCTP_SOCKET_TCP); out: - sctp_release_sock(sk); + release_sock(sk); *err = error; return newsk; } @@ -3817,7 +3826,7 @@ static int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg) { int rc = -ENOTCONN; - sctp_lock_sock(sk); + lock_sock(sk); /* * SEQPACKET-style sockets in LISTENING state are valid, for @@ -3847,7 +3856,7 @@ static int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg) break; } out: - sctp_release_sock(sk); + release_sock(sk); return rc; } @@ -3925,7 +3934,7 @@ static int sctp_init_sock(struct sock *sk) */ sp->hbinterval = net->sctp.hb_interval; sp->pathmaxrxt = net->sctp.max_retrans_path; - sp->pathmtu = 0; // allow default discovery + sp->pathmtu = 0; /* allow default discovery */ sp->sackdelay = net->sctp.sack_timeout; sp->sackfreq = 2; sp->param_flags = SPP_HB_ENABLE | @@ -4468,7 +4477,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, /* If an address other than INADDR_ANY is specified, and * no transport is found, then the request is invalid. */ - if (!sctp_is_any(sk, ( union sctp_addr *)¶ms.spp_address)) { + if (!sctp_is_any(sk, (union sctp_addr *)¶ms.spp_address)) { trans = sctp_addr_id2transport(sk, ¶ms.spp_address, params.spp_assoc_id); if (!trans) { @@ -4574,12 +4583,15 @@ static int sctp_getsockopt_delayed_ack(struct sock *sk, int len, if (copy_from_user(¶ms, optval, len)) return -EFAULT; } else if (len == sizeof(struct sctp_assoc_value)) { - pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n"); - pr_warn("Use struct sctp_sack_info instead\n"); + pr_warn_ratelimited(DEPRECATED + "%s (pid %d) " + "Use of struct sctp_assoc_value in delayed_ack socket option.\n" + "Use struct sctp_sack_info instead\n", + current->comm, task_pid_nr(current)); if (copy_from_user(¶ms, optval, len)) return -EFAULT; } else - return - EINVAL; + return -EINVAL; /* Get association, if sack_assoc_id != 0 and the socket is a one * to many style socket, and an association was not found, then @@ -4669,8 +4681,8 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, if (!asoc) return -EINVAL; - to = optval + offsetof(struct sctp_getaddrs,addrs); - space_left = len - offsetof(struct sctp_getaddrs,addrs); + to = optval + offsetof(struct sctp_getaddrs, addrs); + space_left = len - offsetof(struct sctp_getaddrs, addrs); list_for_each_entry(from, &asoc->peer.transport_addr_list, transports) { @@ -4730,7 +4742,7 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, memcpy(to, &temp, addrlen); to += addrlen; - cnt ++; + cnt++; space_left -= addrlen; *bytes_copied += addrlen; } @@ -4779,8 +4791,8 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, bp = &asoc->base.bind_addr; } - to = optval + offsetof(struct sctp_getaddrs,addrs); - space_left = len - offsetof(struct sctp_getaddrs,addrs); + to = optval + offsetof(struct sctp_getaddrs, addrs); + space_left = len - offsetof(struct sctp_getaddrs, addrs); addrs = kmalloc(space_left, GFP_KERNEL); if (!addrs) @@ -4819,7 +4831,7 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, memcpy(buf, &temp, addrlen); buf += addrlen; bytes_copied += addrlen; - cnt ++; + cnt++; space_left -= addrlen; } @@ -5091,7 +5103,7 @@ static int sctp_getsockopt_associnfo(struct sock *sk, int len, assocparams.sasoc_cookie_life = ktime_to_ms(asoc->cookie_life); list_for_each(pos, &asoc->peer.transport_addr_list) { - cnt ++; + cnt++; } assocparams.sasoc_number_peer_destinations = cnt; @@ -5219,8 +5231,11 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len, struct sctp_association *asoc; if (len == sizeof(int)) { - pr_warn("Use of int in maxseg socket option deprecated\n"); - pr_warn("Use struct sctp_assoc_value instead\n"); + pr_warn_ratelimited(DEPRECATED + "%s (pid %d) " + "Use of int in maxseg socket option.\n" + "Use struct sctp_assoc_value instead\n", + current->comm, task_pid_nr(current)); params.assoc_id = 0; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); @@ -5311,8 +5326,11 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, struct sctp_association *asoc; if (len == sizeof(int)) { - pr_warn("Use of int in max_burst socket option deprecated\n"); - pr_warn("Use struct sctp_assoc_value instead\n"); + pr_warn_ratelimited(DEPRECATED + "%s (pid %d) " + "Use of int in max_burst socket option.\n" + "Use struct sctp_assoc_value instead\n", + current->comm, task_pid_nr(current)); params.assoc_id = 0; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); @@ -5444,7 +5462,8 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, return -EFAULT; num: len = sizeof(struct sctp_authchunks) + num_chunks; - if (put_user(len, optlen)) return -EFAULT; + if (put_user(len, optlen)) + return -EFAULT; if (put_user(num_chunks, &p->gauth_number_of_chunks)) return -EFAULT; return 0; @@ -5476,7 +5495,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, return -EINVAL; if (asoc) - ch = (struct sctp_chunks_param*)asoc->c.auth_chunks; + ch = (struct sctp_chunks_param *)asoc->c.auth_chunks; else ch = sctp_sk(sk)->ep->auth_chunk_list; @@ -5735,7 +5754,7 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - sctp_lock_sock(sk); + lock_sock(sk); switch (optname) { case SCTP_STATUS: @@ -5859,7 +5878,7 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, break; } - sctp_release_sock(sk); + release_sock(sk); return retval; } @@ -5899,7 +5918,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) pr_debug("%s: begins, snum:%d\n", __func__, snum); - sctp_local_bh_disable(); + local_bh_disable(); if (snum == 0) { /* Search for an available port. */ @@ -5908,7 +5927,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) inet_get_local_port_range(sock_net(sk), &low, &high); remaining = (high - low) + 1; - rover = net_random() % remaining + low; + rover = prandom_u32() % remaining + low; do { rover++; @@ -5918,14 +5937,14 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) continue; index = sctp_phashfn(sock_net(sk), rover); head = &sctp_port_hashtable[index]; - sctp_spin_lock(&head->lock); + spin_lock(&head->lock); sctp_for_each_hentry(pp, &head->chain) if ((pp->port == rover) && net_eq(sock_net(sk), pp->net)) goto next; break; next: - sctp_spin_unlock(&head->lock); + spin_unlock(&head->lock); } while (--remaining > 0); /* Exhausted local port range during search? */ @@ -5946,7 +5965,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) * port iterator, pp being NULL. */ head = &sctp_port_hashtable[sctp_phashfn(sock_net(sk), snum)]; - sctp_spin_lock(&head->lock); + spin_lock(&head->lock); sctp_for_each_hentry(pp, &head->chain) { if ((pp->port == snum) && net_eq(pp->net, sock_net(sk))) goto pp_found; @@ -6030,10 +6049,10 @@ success: ret = 0; fail_unlock: - sctp_spin_unlock(&head->lock); + spin_unlock(&head->lock); fail: - sctp_local_bh_enable(); + local_bh_enable(); return ret; } @@ -6125,7 +6144,7 @@ int sctp_inet_listen(struct socket *sock, int backlog) if (unlikely(backlog < 0)) return err; - sctp_lock_sock(sk); + lock_sock(sk); /* Peeled-off sockets are not allowed to listen(). */ if (sctp_style(sk, UDP_HIGH_BANDWIDTH)) @@ -6158,7 +6177,7 @@ int sctp_inet_listen(struct socket *sock, int backlog) err = 0; out: - sctp_release_sock(sk); + release_sock(sk); return err; } @@ -6267,20 +6286,20 @@ static inline void __sctp_put_port(struct sock *sk) inet_sk(sk)->inet_num)]; struct sctp_bind_bucket *pp; - sctp_spin_lock(&head->lock); + spin_lock(&head->lock); pp = sctp_sk(sk)->bind_hash; __sk_del_bind_node(sk); sctp_sk(sk)->bind_hash = NULL; inet_sk(sk)->inet_num = 0; sctp_bucket_destroy(pp); - sctp_spin_unlock(&head->lock); + spin_unlock(&head->lock); } void sctp_put_port(struct sock *sk) { - sctp_local_bh_disable(); + local_bh_disable(); __sctp_put_port(sk); - sctp_local_bh_enable(); + local_bh_enable(); } /* @@ -6418,7 +6437,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) * Note: This function is the same function as in core/datagram.c * with a few modifications to make lksctp work. */ -static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p) +static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p) { int error; DEFINE_WAIT(wait); @@ -6455,9 +6474,9 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p) * does not fit in the user's buffer, but this seems to be the * only way to honor MSG_DONTWAIT realistically. */ - sctp_release_sock(sk); + release_sock(sk); *timeo_p = schedule_timeout(*timeo_p); - sctp_lock_sock(sk); + lock_sock(sk); ready: finish_wait(sk_sleep(sk), &wait); @@ -6640,10 +6659,10 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, /* Let another process have a go. Since we are going * to sleep anyway. */ - sctp_release_sock(sk); + release_sock(sk); current_timeo = schedule_timeout(current_timeo); BUG_ON(sk != asoc->base.sk); - sctp_lock_sock(sk); + lock_sock(sk); *timeo_p = current_timeo; } @@ -6748,9 +6767,9 @@ static int sctp_wait_for_connect(struct sctp_association *asoc, long *timeo_p) /* Let another process have a go. Since we are going * to sleep anyway. */ - sctp_release_sock(sk); + release_sock(sk); current_timeo = schedule_timeout(current_timeo); - sctp_lock_sock(sk); + lock_sock(sk); *timeo_p = current_timeo; } @@ -6793,9 +6812,9 @@ static int sctp_wait_for_accept(struct sock *sk, long timeo) TASK_INTERRUPTIBLE); if (list_empty(&ep->asocs)) { - sctp_release_sock(sk); + release_sock(sk); timeo = schedule_timeout(timeo); - sctp_lock_sock(sk); + lock_sock(sk); } err = -EINVAL; @@ -6828,9 +6847,9 @@ static void sctp_wait_for_close(struct sock *sk, long timeout) prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (list_empty(&sctp_sk(sk)->ep->asocs)) break; - sctp_release_sock(sk); + release_sock(sk); timeout = schedule_timeout(timeout); - sctp_lock_sock(sk); + lock_sock(sk); } while (!signal_pending(current) && timeout); finish_wait(sk_sleep(sk), &wait); @@ -6931,14 +6950,14 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, /* Hook this new socket in to the bind_hash list. */ head = &sctp_port_hashtable[sctp_phashfn(sock_net(oldsk), inet_sk(oldsk)->inet_num)]; - sctp_local_bh_disable(); - sctp_spin_lock(&head->lock); + local_bh_disable(); + spin_lock(&head->lock); pp = sctp_sk(oldsk)->bind_hash; sk_add_bind_node(newsk, &pp->owner); sctp_sk(newsk)->bind_hash = pp; inet_sk(newsk)->inet_num = inet_sk(oldsk)->inet_num; - sctp_spin_unlock(&head->lock); - sctp_local_bh_enable(); + spin_unlock(&head->lock); + local_bh_enable(); /* Copy the bind_addr list from the original endpoint to the new * endpoint so that we can handle restarts properly @@ -7027,7 +7046,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, newsk->sk_shutdown |= RCV_SHUTDOWN; newsk->sk_state = SCTP_SS_ESTABLISHED; - sctp_release_sock(newsk); + release_sock(newsk); } diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c index 6007124aefa..b9c8521c1a9 100644 --- a/net/sctp/ssnmap.c +++ b/net/sctp/ssnmap.c @@ -18,9 +18,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index b0565afb61c..7135e617ab0 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -19,9 +19,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -425,7 +424,7 @@ void sctp_sysctl_net_unregister(struct net *net) kfree(table); } -static struct ctl_table_header * sctp_sysctl_header; +static struct ctl_table_header *sctp_sysctl_header; /* Sysctl registration. */ void sctp_sysctl_register(void) diff --git a/net/sctp/transport.c b/net/sctp/transport.c index efc46ffed1f..d0810dc5f07 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -24,9 +24,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index fbda2002828..7635f9f2311 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -21,9 +21,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 81089ed6545..85c64658bd0 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -22,9 +22,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 1c1484ed605..5dc94117e9d 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -21,9 +21,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. * * Please send any bug reports or fixes you make to the * email address(es): @@ -44,9 +43,9 @@ #include <net/sctp/sm.h> /* Forward declarations for internal helpers. */ -static struct sctp_ulpevent * sctp_ulpq_reasm(struct sctp_ulpq *ulpq, +static struct sctp_ulpevent *sctp_ulpq_reasm(struct sctp_ulpq *ulpq, struct sctp_ulpevent *); -static struct sctp_ulpevent * sctp_ulpq_order(struct sctp_ulpq *, +static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *, struct sctp_ulpevent *); static void sctp_ulpq_reasm_drain(struct sctp_ulpq *ulpq); @@ -108,7 +107,7 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, event = sctp_ulpq_reasm(ulpq, event); /* Do ordering if needed. */ - if ((event) && (event->msg_flags & MSG_EOR)){ + if ((event) && (event->msg_flags & MSG_EOR)) { /* Create a temporary list to collect chunks on. */ skb_queue_head_init(&temp); __skb_queue_tail(&temp, sctp_event2skb(event)); @@ -337,7 +336,8 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net, pos = f_frag->next; /* Get the last skb in the f_frag's frag_list if present. */ - for (last = list; list; last = list, list = list->next); + for (last = list; list; last = list, list = list->next) + ; /* Add the list of remaining fragments to the first fragments * frag_list. @@ -727,7 +727,7 @@ static void sctp_ulpq_reasm_drain(struct sctp_ulpq *ulpq) while ((event = sctp_ulpq_retrieve_reassembled(ulpq)) != NULL) { /* Do ordering if needed. */ - if ((event) && (event->msg_flags & MSG_EOR)){ + if ((event) && (event->msg_flags & MSG_EOR)) { skb_queue_head_init(&temp); __skb_queue_tail(&temp, sctp_event2skb(event)); diff --git a/net/socket.c b/net/socket.c index e83c416708a..879933aaed4 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1445,48 +1445,61 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, err = fd1; goto out_release_both; } + fd2 = get_unused_fd_flags(flags); if (unlikely(fd2 < 0)) { err = fd2; - put_unused_fd(fd1); - goto out_release_both; + goto out_put_unused_1; } newfile1 = sock_alloc_file(sock1, flags, NULL); if (unlikely(IS_ERR(newfile1))) { err = PTR_ERR(newfile1); - put_unused_fd(fd1); - put_unused_fd(fd2); - goto out_release_both; + goto out_put_unused_both; } newfile2 = sock_alloc_file(sock2, flags, NULL); if (IS_ERR(newfile2)) { err = PTR_ERR(newfile2); - fput(newfile1); - put_unused_fd(fd1); - put_unused_fd(fd2); - sock_release(sock2); - goto out; + goto out_fput_1; } + err = put_user(fd1, &usockvec[0]); + if (err) + goto out_fput_both; + + err = put_user(fd2, &usockvec[1]); + if (err) + goto out_fput_both; + audit_fd_pair(fd1, fd2); + fd_install(fd1, newfile1); fd_install(fd2, newfile2); /* fd1 and fd2 may be already another descriptors. * Not kernel problem. */ - err = put_user(fd1, &usockvec[0]); - if (!err) - err = put_user(fd2, &usockvec[1]); - if (!err) - return 0; + return 0; - sys_close(fd2); - sys_close(fd1); - return err; +out_fput_both: + fput(newfile2); + fput(newfile1); + put_unused_fd(fd2); + put_unused_fd(fd1); + goto out; + +out_fput_1: + fput(newfile1); + put_unused_fd(fd2); + put_unused_fd(fd1); + sock_release(sock2); + goto out; +out_put_unused_both: + put_unused_fd(fd2); +out_put_unused_1: + put_unused_fd(fd1); out_release_both: sock_release(sock2); out_release_1: @@ -2968,11 +2981,8 @@ static int bond_ioctl(struct net *net, unsigned int cmd, struct compat_ifreq __user *ifr32) { struct ifreq kifr; - struct ifreq __user *uifr; mm_segment_t old_fs; int err; - u32 data; - void __user *datap; switch (cmd) { case SIOCBONDENSLAVE: @@ -2989,26 +2999,13 @@ static int bond_ioctl(struct net *net, unsigned int cmd, set_fs(old_fs); return err; - case SIOCBONDSLAVEINFOQUERY: - case SIOCBONDINFOQUERY: - uifr = compat_alloc_user_space(sizeof(*uifr)); - if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) - return -EFAULT; - - if (get_user(data, &ifr32->ifr_ifru.ifru_data)) - return -EFAULT; - - datap = compat_ptr(data); - if (put_user(datap, &uifr->ifr_ifru.ifru_data)) - return -EFAULT; - - return dev_ioctl(net, cmd, uifr); default: return -ENOIOCTLCMD; } } -static int siocdevprivate_ioctl(struct net *net, unsigned int cmd, +/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */ +static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, struct compat_ifreq __user *u_ifreq32) { struct ifreq __user *u_ifreq64; @@ -3019,19 +3016,16 @@ static int siocdevprivate_ioctl(struct net *net, unsigned int cmd, if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]), IFNAMSIZ)) return -EFAULT; - if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data)) + if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data)) return -EFAULT; data64 = compat_ptr(data32); u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64)); - /* Don't check these user accesses, just let that get trapped - * in the ioctl handler instead. - */ if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0], IFNAMSIZ)) return -EFAULT; - if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data)) + if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data)) return -EFAULT; return dev_ioctl(net, cmd, u_ifreq64); @@ -3111,27 +3105,6 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, return err; } -static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32) -{ - void __user *uptr; - compat_uptr_t uptr32; - struct ifreq __user *uifr; - - uifr = compat_alloc_user_space(sizeof(*uifr)); - if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) - return -EFAULT; - - if (get_user(uptr32, &uifr32->ifr_data)) - return -EFAULT; - - uptr = compat_ptr(uptr32); - - if (put_user(uptr, &uifr->ifr_data)) - return -EFAULT; - - return dev_ioctl(net, SIOCSHWTSTAMP, uifr); -} - struct rtentry32 { u32 rt_pad1; struct sockaddr rt_dst; /* target address */ @@ -3243,7 +3216,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, struct net *net = sock_net(sk); if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) - return siocdevprivate_ioctl(net, cmd, argp); + return compat_ifr_data_ioctl(net, cmd, argp); switch (cmd) { case SIOCSIFBR: @@ -3263,8 +3236,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCBONDENSLAVE: case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: - case SIOCBONDSLAVEINFOQUERY: - case SIOCBONDINFOQUERY: case SIOCBONDCHANGEACTIVE: return bond_ioctl(net, cmd, argp); case SIOCADDRT: @@ -3274,8 +3245,11 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, return do_siocgstamp(net, sock, cmd, argp); case SIOCGSTAMPNS: return do_siocgstampns(net, sock, cmd, argp); + case SIOCBONDSLAVEINFOQUERY: + case SIOCBONDINFOQUERY: case SIOCSHWTSTAMP: - return compat_siocshwtstamp(net, argp); + case SIOCGHWTSTAMP: + return compat_ifr_data_ioctl(net, cmd, argp); case FIOSETOWN: case SIOCSPGRP: diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index a72de074172..e521d20e197 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -619,7 +619,7 @@ static void cache_limit_defers(void) /* Consider removing either the first or the last */ if (cache_defer_cnt > DFR_MAX) { - if (net_random() & 1) + if (prandom_u32() & 1) discard = list_entry(cache_defer_list.next, struct cache_deferred_req, recent); else diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 04199bc8416..1750048130a 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1188,7 +1188,7 @@ static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt) static inline void xprt_init_xid(struct rpc_xprt *xprt) { - xprt->xid = net_random(); + xprt->xid = prandom_u32(); } static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index dd9d295813c..75b045e1cd5 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1674,7 +1674,7 @@ static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task) static unsigned short xs_get_random_port(void) { unsigned short range = xprt_max_resvport - xprt_min_resvport; - unsigned short rand = (unsigned short) net_random() % range; + unsigned short rand = (unsigned short) prandom_u32() % range; return rand + xprt_min_resvport; } diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 0d4402587fd..bf860d9e75a 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -621,12 +621,6 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, if (!p) break; /* No more bearers to try */ - if (tipc_bearer_blocked(p)) { - if (!s || tipc_bearer_blocked(s)) - continue; /* Can't use either bearer */ - b = s; - } - tipc_nmap_diff(&bcbearer->remains, &b->nodes, &bcbearer->remains_new); if (bcbearer->remains_new.count == bcbearer->remains.count) @@ -800,7 +794,7 @@ void tipc_bclink_init(void) void tipc_bclink_stop(void) { spin_lock_bh(&bc_lock); - tipc_link_stop(bcl); + tipc_link_purge_queues(bcl); spin_unlock_bh(&bc_lock); memset(bclink, 0, sizeof(*bclink)); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 3f9707a16d0..a38c89969c6 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -1,8 +1,8 @@ /* * net/tipc/bearer.c: TIPC bearer code * - * Copyright (c) 1996-2006, Ericsson AB - * Copyright (c) 2004-2006, 2010-2011, Wind River Systems + * Copyright (c) 1996-2006, 2013, Ericsson AB + * Copyright (c) 2004-2006, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,8 +41,13 @@ #define MAX_ADDR_STR 60 -static struct tipc_media *media_list[MAX_MEDIA]; -static u32 media_count; +static struct tipc_media * const media_info_array[] = { + ð_media_info, +#ifdef CONFIG_TIPC_MEDIA_IB + &ib_media_info, +#endif + NULL +}; struct tipc_bearer tipc_bearers[MAX_BEARERS]; @@ -55,11 +60,11 @@ struct tipc_media *tipc_media_find(const char *name) { u32 i; - for (i = 0; i < media_count; i++) { - if (!strcmp(media_list[i]->name, name)) - return media_list[i]; + for (i = 0; media_info_array[i] != NULL; i++) { + if (!strcmp(media_info_array[i]->name, name)) + break; } - return NULL; + return media_info_array[i]; } /** @@ -69,44 +74,11 @@ static struct tipc_media *media_find_id(u8 type) { u32 i; - for (i = 0; i < media_count; i++) { - if (media_list[i]->type_id == type) - return media_list[i]; + for (i = 0; media_info_array[i] != NULL; i++) { + if (media_info_array[i]->type_id == type) + break; } - return NULL; -} - -/** - * tipc_register_media - register a media type - * - * Bearers for this media type must be activated separately at a later stage. - */ -int tipc_register_media(struct tipc_media *m_ptr) -{ - int res = -EINVAL; - - write_lock_bh(&tipc_net_lock); - - if ((strlen(m_ptr->name) + 1) > TIPC_MAX_MEDIA_NAME) - goto exit; - if (m_ptr->priority > TIPC_MAX_LINK_PRI) - goto exit; - if ((m_ptr->tolerance < TIPC_MIN_LINK_TOL) || - (m_ptr->tolerance > TIPC_MAX_LINK_TOL)) - goto exit; - if (media_count >= MAX_MEDIA) - goto exit; - if (tipc_media_find(m_ptr->name) || media_find_id(m_ptr->type_id)) - goto exit; - - media_list[media_count] = m_ptr; - media_count++; - res = 0; -exit: - write_unlock_bh(&tipc_net_lock); - if (res) - pr_warn("Media <%s> registration error\n", m_ptr->name); - return res; + return media_info_array[i]; } /** @@ -144,13 +116,11 @@ struct sk_buff *tipc_media_get_names(void) if (!buf) return NULL; - read_lock_bh(&tipc_net_lock); - for (i = 0; i < media_count; i++) { + for (i = 0; media_info_array[i] != NULL; i++) { tipc_cfg_append_tlv(buf, TIPC_TLV_MEDIA_NAME, - media_list[i]->name, - strlen(media_list[i]->name) + 1); + media_info_array[i]->name, + strlen(media_info_array[i]->name) + 1); } - read_unlock_bh(&tipc_net_lock); return buf; } @@ -215,31 +185,12 @@ struct tipc_bearer *tipc_bearer_find(const char *name) } /** - * tipc_bearer_find_interface - locates bearer object with matching interface name - */ -struct tipc_bearer *tipc_bearer_find_interface(const char *if_name) -{ - struct tipc_bearer *b_ptr; - char *b_if_name; - u32 i; - - for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) { - if (!b_ptr->active) - continue; - b_if_name = strchr(b_ptr->name, ':') + 1; - if (!strcmp(b_if_name, if_name)) - return b_ptr; - } - return NULL; -} - -/** * tipc_bearer_get_names - record names of bearers in buffer */ struct sk_buff *tipc_bearer_get_names(void) { struct sk_buff *buf; - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; int i, j; buf = tipc_cfg_reply_alloc(MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME)); @@ -247,13 +198,13 @@ struct sk_buff *tipc_bearer_get_names(void) return NULL; read_lock_bh(&tipc_net_lock); - for (i = 0; i < media_count; i++) { + for (i = 0; media_info_array[i] != NULL; i++) { for (j = 0; j < MAX_BEARERS; j++) { - b_ptr = &tipc_bearers[j]; - if (b_ptr->active && (b_ptr->media == media_list[i])) { + b = &tipc_bearers[j]; + if (b->active && (b->media == media_info_array[i])) { tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME, - b_ptr->name, - strlen(b_ptr->name) + 1); + b->name, + strlen(b->name) + 1); } } } @@ -275,31 +226,6 @@ void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest) tipc_disc_remove_dest(b_ptr->link_req); } -/* - * Interrupt enabling new requests after bearer blocking: - * See bearer_send(). - */ -void tipc_continue(struct tipc_bearer *b) -{ - spin_lock_bh(&b->lock); - b->blocked = 0; - spin_unlock_bh(&b->lock); -} - -/* - * tipc_bearer_blocked - determines if bearer is currently blocked - */ -int tipc_bearer_blocked(struct tipc_bearer *b) -{ - int res; - - spin_lock_bh(&b->lock); - res = b->blocked; - spin_unlock_bh(&b->lock); - - return res; -} - /** * tipc_enable_bearer - enable bearer with the given name */ @@ -387,6 +313,7 @@ restart: b_ptr = &tipc_bearers[bearer_id]; strcpy(b_ptr->name, name); + b_ptr->media = m_ptr; res = m_ptr->enable_media(b_ptr); if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", @@ -395,7 +322,6 @@ restart: } b_ptr->identity = bearer_id; - b_ptr->media = m_ptr; b_ptr->tolerance = m_ptr->tolerance; b_ptr->window = m_ptr->window; b_ptr->net_plane = bearer_id + 'A'; @@ -420,17 +346,16 @@ exit: } /** - * tipc_block_bearer - Block the bearer, and reset all its links + * tipc_reset_bearer - Reset all links established over this bearer */ -int tipc_block_bearer(struct tipc_bearer *b_ptr) +static int tipc_reset_bearer(struct tipc_bearer *b_ptr) { struct tipc_link *l_ptr; struct tipc_link *temp_l_ptr; read_lock_bh(&tipc_net_lock); - pr_info("Blocking bearer <%s>\n", b_ptr->name); + pr_info("Resetting bearer <%s>\n", b_ptr->name); spin_lock_bh(&b_ptr->lock); - b_ptr->blocked = 1; list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { struct tipc_node *n_ptr = l_ptr->owner; @@ -456,7 +381,6 @@ static void bearer_disable(struct tipc_bearer *b_ptr) pr_info("Disabling bearer <%s>\n", b_ptr->name); spin_lock_bh(&b_ptr->lock); - b_ptr->blocked = 1; b_ptr->media->disable_media(b_ptr); list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { tipc_link_delete(l_ptr); @@ -490,6 +414,211 @@ int tipc_disable_bearer(const char *name) } +/* tipc_l2_media_addr_set - initialize Ethernet media address structure + * + * Media-dependent "value" field stores MAC address in first 6 bytes + * and zeroes out the remaining bytes. + */ +void tipc_l2_media_addr_set(const struct tipc_bearer *b, + struct tipc_media_addr *a, char *mac) +{ + int len = b->media->hwaddr_len; + + if (unlikely(sizeof(a->value) < len)) { + WARN_ONCE(1, "Media length invalid\n"); + return; + } + + memcpy(a->value, mac, len); + memset(a->value + len, 0, sizeof(a->value) - len); + a->media_id = b->media->type_id; + a->broadcast = !memcmp(mac, b->bcast_addr.value, len); +} + +int tipc_enable_l2_media(struct tipc_bearer *b) +{ + struct net_device *dev; + char *driver_name = strchr((const char *)b->name, ':') + 1; + + /* Find device with specified name */ + dev = dev_get_by_name(&init_net, driver_name); + if (!dev) + return -ENODEV; + + /* Associate TIPC bearer with Ethernet bearer */ + b->media_ptr = dev; + memset(b->bcast_addr.value, 0, sizeof(b->bcast_addr.value)); + memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len); + b->bcast_addr.media_id = b->media->type_id; + b->bcast_addr.broadcast = 1; + b->mtu = dev->mtu; + tipc_l2_media_addr_set(b, &b->addr, (char *)dev->dev_addr); + rcu_assign_pointer(dev->tipc_ptr, b); + return 0; +} + +/* tipc_disable_l2_media - detach TIPC bearer from an Ethernet interface + * + * Mark Ethernet bearer as inactive so that incoming buffers are thrown away, + * then get worker thread to complete bearer cleanup. (Can't do cleanup + * here because cleanup code needs to sleep and caller holds spinlocks.) + */ +void tipc_disable_l2_media(struct tipc_bearer *b) +{ + struct net_device *dev = (struct net_device *)b->media_ptr; + RCU_INIT_POINTER(dev->tipc_ptr, NULL); + dev_put(dev); +} + +/** + * tipc_l2_send_msg - send a TIPC packet out over an Ethernet interface + * @buf: the packet to be sent + * @b_ptr: the bearer through which the packet is to be sent + * @dest: peer destination address + */ +int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, + struct tipc_media_addr *dest) +{ + struct sk_buff *clone; + int delta; + struct net_device *dev = (struct net_device *)b->media_ptr; + + clone = skb_clone(buf, GFP_ATOMIC); + if (!clone) + return 0; + + delta = dev->hard_header_len - skb_headroom(buf); + if ((delta > 0) && + pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { + kfree_skb(clone); + return 0; + } + + skb_reset_network_header(clone); + clone->dev = dev; + clone->protocol = htons(ETH_P_TIPC); + dev_hard_header(clone, dev, ETH_P_TIPC, dest->value, + dev->dev_addr, clone->len); + dev_queue_xmit(clone); + return 0; +} + +/* tipc_bearer_send- sends buffer to destination over bearer + * + * IMPORTANT: + * The media send routine must not alter the buffer being passed in + * as it may be needed for later retransmission! + */ +void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf, + struct tipc_media_addr *dest) +{ + b->media->send_msg(buf, b, dest); +} + +/** + * tipc_l2_rcv_msg - handle incoming TIPC message from an interface + * @buf: the received packet + * @dev: the net device that the packet was received on + * @pt: the packet_type structure which was used to register this handler + * @orig_dev: the original receive net device in case the device is a bond + * + * Accept only packets explicitly sent to this node, or broadcast packets; + * ignores packets sent using interface multicast, and traffic sent to other + * nodes (which can happen if interface is running in promiscuous mode). + */ +static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct tipc_bearer *b_ptr; + + if (!net_eq(dev_net(dev), &init_net)) { + kfree_skb(buf); + return NET_RX_DROP; + } + + rcu_read_lock(); + b_ptr = rcu_dereference(dev->tipc_ptr); + if (likely(b_ptr)) { + if (likely(buf->pkt_type <= PACKET_BROADCAST)) { + buf->next = NULL; + tipc_rcv(buf, b_ptr); + rcu_read_unlock(); + return NET_RX_SUCCESS; + } + } + rcu_read_unlock(); + + kfree_skb(buf); + return NET_RX_DROP; +} + +/** + * tipc_l2_device_event - handle device events from network device + * @nb: the context of the notification + * @evt: the type of event + * @ptr: the net device that the event was on + * + * This function is called by the Ethernet driver in case of link + * change event. + */ +static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, + void *ptr) +{ + struct tipc_bearer *b_ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (!net_eq(dev_net(dev), &init_net)) + return NOTIFY_DONE; + + rcu_read_lock(); + b_ptr = rcu_dereference(dev->tipc_ptr); + if (!b_ptr) { + rcu_read_unlock(); + return NOTIFY_DONE; + } + + b_ptr->mtu = dev->mtu; + + switch (evt) { + case NETDEV_CHANGE: + if (netif_carrier_ok(dev)) + break; + case NETDEV_DOWN: + case NETDEV_CHANGEMTU: + case NETDEV_CHANGEADDR: + tipc_reset_bearer(b_ptr); + break; + case NETDEV_UNREGISTER: + case NETDEV_CHANGENAME: + tipc_disable_bearer(b_ptr->name); + break; + } + rcu_read_unlock(); + + return NOTIFY_OK; +} + +static struct packet_type tipc_packet_type __read_mostly = { + .type = __constant_htons(ETH_P_TIPC), + .func = tipc_l2_rcv_msg, +}; + +static struct notifier_block notifier = { + .notifier_call = tipc_l2_device_event, + .priority = 0, +}; + +int tipc_bearer_setup(void) +{ + dev_add_pack(&tipc_packet_type); + return register_netdevice_notifier(¬ifier); +} + +void tipc_bearer_cleanup(void) +{ + unregister_netdevice_notifier(¬ifier); + dev_remove_pack(&tipc_packet_type); +} void tipc_bearer_stop(void) { @@ -499,5 +628,4 @@ void tipc_bearer_stop(void) if (tipc_bearers[i].active) bearer_disable(&tipc_bearers[i]); } - media_count = 0; } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index e5e04be6fff..4f5db9ad5bf 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -1,7 +1,7 @@ /* * net/tipc/bearer.h: Include file for TIPC bearer code * - * Copyright (c) 1996-2006, Ericsson AB + * Copyright (c) 1996-2006, 2013, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -73,18 +73,18 @@ struct tipc_media_addr { struct tipc_bearer; /** - * struct tipc_media - TIPC media information available to internal users + * struct tipc_media - Media specific info exposed to generic bearer layer * @send_msg: routine which handles buffer transmission * @enable_media: routine which enables a media * @disable_media: routine which disables a media * @addr2str: routine which converts media address to string * @addr2msg: routine which converts media address to protocol message area * @msg2addr: routine which converts media address from protocol message area - * @bcast_addr: media address used in broadcasting * @priority: default link (and bearer) priority * @tolerance: default time (in ms) before declaring link failure * @window: default window (in packets) before declaring link congestion * @type_id: TIPC media identifier + * @hwaddr_len: TIPC media address len * @name: media name */ struct tipc_media { @@ -101,18 +101,20 @@ struct tipc_media { u32 tolerance; u32 window; u32 type_id; + u32 hwaddr_len; char name[TIPC_MAX_MEDIA_NAME]; }; /** - * struct tipc_bearer - TIPC bearer structure + * struct tipc_bearer - Generic TIPC bearer structure + * @dev: ptr to associated network device * @usr_handle: pointer to additional media-specific information about bearer * @mtu: max packet size bearer can support - * @blocked: non-zero if bearer is blocked * @lock: spinlock for controlling access to bearer * @addr: media-specific address associated with bearer * @name: bearer name (format = media:interface) * @media: ptr to media structure associated with bearer + * @bcast_addr: media address used in broadcasting * @priority: default link priority for bearer * @window: default window size for bearer * @tolerance: default link tolerance for bearer @@ -128,9 +130,8 @@ struct tipc_media { * care of initializing all other fields. */ struct tipc_bearer { - void *usr_handle; /* initalized by media */ + void *media_ptr; /* initalized by media */ u32 mtu; /* initalized by media */ - int blocked; /* initalized by media */ struct tipc_media_addr addr; /* initalized by media */ char name[TIPC_MAX_BEARER_NAME]; spinlock_t lock; @@ -159,55 +160,40 @@ extern struct tipc_bearer tipc_bearers[]; /* * TIPC routines available to supported media types */ -int tipc_register_media(struct tipc_media *m_ptr); - -void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr); - -int tipc_block_bearer(struct tipc_bearer *b_ptr); -void tipc_continue(struct tipc_bearer *tb_ptr); +void tipc_rcv(struct sk_buff *buf, struct tipc_bearer *tb_ptr); int tipc_enable_bearer(const char *bearer_name, u32 disc_domain, u32 priority); int tipc_disable_bearer(const char *name); /* * Routines made available to TIPC by supported media types */ -int tipc_eth_media_start(void); -void tipc_eth_media_stop(void); +extern struct tipc_media eth_media_info; #ifdef CONFIG_TIPC_MEDIA_IB -int tipc_ib_media_start(void); -void tipc_ib_media_stop(void); -#else -static inline int tipc_ib_media_start(void) { return 0; } -static inline void tipc_ib_media_stop(void) { return; } +extern struct tipc_media ib_media_info; #endif int tipc_media_set_priority(const char *name, u32 new_value); int tipc_media_set_window(const char *name, u32 new_value); void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); struct sk_buff *tipc_media_get_names(void); +void tipc_l2_media_addr_set(const struct tipc_bearer *b, + struct tipc_media_addr *a, char *mac); +int tipc_enable_l2_media(struct tipc_bearer *b); +void tipc_disable_l2_media(struct tipc_bearer *b); +int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, + struct tipc_media_addr *dest); struct sk_buff *tipc_bearer_get_names(void); void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest); void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest); struct tipc_bearer *tipc_bearer_find(const char *name); -struct tipc_bearer *tipc_bearer_find_interface(const char *if_name); struct tipc_media *tipc_media_find(const char *name); -int tipc_bearer_blocked(struct tipc_bearer *b_ptr); +int tipc_bearer_setup(void); +void tipc_bearer_cleanup(void); void tipc_bearer_stop(void); - -/** - * tipc_bearer_send- sends buffer to destination over bearer - * - * IMPORTANT: - * The media send routine must not alter the buffer being passed in - * as it may be needed for later retransmission! - */ -static inline void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf, - struct tipc_media_addr *dest) -{ - b->media->send_msg(buf, b, dest); -} +void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf, + struct tipc_media_addr *dest); #endif /* _TIPC_BEARER_H */ diff --git a/net/tipc/core.c b/net/tipc/core.c index c6d3f75a9e1..f9e88d8b04c 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -82,8 +82,7 @@ struct sk_buff *tipc_buf_acquire(u32 size) static void tipc_core_stop_net(void) { tipc_net_stop(); - tipc_eth_media_stop(); - tipc_ib_media_stop(); + tipc_bearer_cleanup(); } /** @@ -94,10 +93,7 @@ int tipc_core_start_net(unsigned long addr) int res; tipc_net_start(addr); - res = tipc_eth_media_start(); - if (res < 0) - goto err; - res = tipc_ib_media_start(); + res = tipc_bearer_setup(); if (res < 0) goto err; return res; diff --git a/net/tipc/core.h b/net/tipc/core.h index 94895d4e86a..1ff477b0450 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -47,7 +47,7 @@ #include <linux/mm.h> #include <linux/timer.h> #include <linux/string.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/interrupt.h> #include <linux/atomic.h> #include <asm/hardirq.h> diff --git a/net/tipc/discover.c b/net/tipc/discover.c index ecc758c6eac..412ff41b861 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -50,6 +50,7 @@ * @dest: destination address for request messages * @domain: network domain to which links can be established * @num_nodes: number of nodes currently discovered (i.e. with an active link) + * @lock: spinlock for controlling access to requests * @buf: request message to be (repeatedly) sent * @timer: timer governing period between requests * @timer_intv: current interval between requests (in ms) @@ -59,6 +60,7 @@ struct tipc_link_req { struct tipc_media_addr dest; u32 domain; int num_nodes; + spinlock_t lock; struct sk_buff *buf; struct timer_list timer; unsigned int timer_intv; @@ -239,7 +241,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) /* Accept discovery message & send response, if necessary */ link_fully_up = link_working_working(link); - if ((type == DSC_REQ_MSG) && !link_fully_up && !b_ptr->blocked) { + if ((type == DSC_REQ_MSG) && !link_fully_up) { rbuf = tipc_disc_init_msg(DSC_RESP_MSG, orig, b_ptr); if (rbuf) { tipc_bearer_send(b_ptr, rbuf, &media_addr); @@ -274,7 +276,9 @@ static void disc_update(struct tipc_link_req *req) */ void tipc_disc_add_dest(struct tipc_link_req *req) { + spin_lock_bh(&req->lock); req->num_nodes++; + spin_unlock_bh(&req->lock); } /** @@ -283,18 +287,10 @@ void tipc_disc_add_dest(struct tipc_link_req *req) */ void tipc_disc_remove_dest(struct tipc_link_req *req) { + spin_lock_bh(&req->lock); req->num_nodes--; disc_update(req); -} - -/** - * disc_send_msg - send link setup request message - * @req: ptr to link request structure - */ -static void disc_send_msg(struct tipc_link_req *req) -{ - if (!req->bearer->blocked) - tipc_bearer_send(req->bearer, req->buf, &req->dest); + spin_unlock_bh(&req->lock); } /** @@ -307,7 +303,7 @@ static void disc_timeout(struct tipc_link_req *req) { int max_delay; - spin_lock_bh(&req->bearer->lock); + spin_lock_bh(&req->lock); /* Stop searching if only desired node has been found */ if (tipc_node(req->domain) && req->num_nodes) { @@ -322,7 +318,8 @@ static void disc_timeout(struct tipc_link_req *req) * hold at fast polling rate if don't have any associated nodes, * otherwise hold at slow polling rate */ - disc_send_msg(req); + tipc_bearer_send(req->bearer, req->buf, &req->dest); + req->timer_intv *= 2; if (req->num_nodes) @@ -334,7 +331,7 @@ static void disc_timeout(struct tipc_link_req *req) k_start_timer(&req->timer, req->timer_intv); exit: - spin_unlock_bh(&req->bearer->lock); + spin_unlock_bh(&req->lock); } /** @@ -365,10 +362,11 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest, req->domain = dest_domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; + spin_lock_init(&req->lock); k_init_timer(&req->timer, (Handler)disc_timeout, (unsigned long)req); k_start_timer(&req->timer, req->timer_intv); b_ptr->link_req = req; - disc_send_msg(req); + tipc_bearer_send(req->bearer, req->buf, &req->dest); return 0; } diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index f80d59f5a16..67cf3f935db 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -1,7 +1,7 @@ /* * net/tipc/eth_media.c: Ethernet bearer support for TIPC * - * Copyright (c) 2001-2007, Ericsson AB + * Copyright (c) 2001-2007, 2013, Ericsson AB * Copyright (c) 2005-2008, 2011-2013, Wind River Systems * All rights reserved. * @@ -37,259 +37,11 @@ #include "core.h" #include "bearer.h" -#define MAX_ETH_MEDIA MAX_BEARERS - #define ETH_ADDR_OFFSET 4 /* message header offset of MAC address */ -/** - * struct eth_media - Ethernet bearer data structure - * @bearer: ptr to associated "generic" bearer structure - * @dev: ptr to associated Ethernet network device - * @tipc_packet_type: used in binding TIPC to Ethernet driver - * @setup: work item used when enabling bearer - * @cleanup: work item used when disabling bearer - */ -struct eth_media { - struct tipc_bearer *bearer; - struct net_device *dev; - struct packet_type tipc_packet_type; - struct work_struct setup; - struct work_struct cleanup; -}; - -static struct tipc_media eth_media_info; -static struct eth_media eth_media_array[MAX_ETH_MEDIA]; -static int eth_started; - -static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *dv); -/* - * Network device notifier info - */ -static struct notifier_block notifier = { - .notifier_call = recv_notification, - .priority = 0 -}; - -/** - * eth_media_addr_set - initialize Ethernet media address structure - * - * Media-dependent "value" field stores MAC address in first 6 bytes - * and zeroes out the remaining bytes. - */ -static void eth_media_addr_set(const struct tipc_bearer *tb_ptr, - struct tipc_media_addr *a, char *mac) -{ - memcpy(a->value, mac, ETH_ALEN); - memset(a->value + ETH_ALEN, 0, sizeof(a->value) - ETH_ALEN); - a->media_id = TIPC_MEDIA_TYPE_ETH; - a->broadcast = !memcmp(mac, tb_ptr->bcast_addr.value, ETH_ALEN); -} - -/** - * send_msg - send a TIPC message out over an Ethernet interface - */ -static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, - struct tipc_media_addr *dest) -{ - struct sk_buff *clone; - struct net_device *dev; - int delta; - - clone = skb_clone(buf, GFP_ATOMIC); - if (!clone) - return 0; - - dev = ((struct eth_media *)(tb_ptr->usr_handle))->dev; - delta = dev->hard_header_len - skb_headroom(buf); - - if ((delta > 0) && - pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { - kfree_skb(clone); - return 0; - } - - skb_reset_network_header(clone); - clone->dev = dev; - clone->protocol = htons(ETH_P_TIPC); - dev_hard_header(clone, dev, ETH_P_TIPC, dest->value, - dev->dev_addr, clone->len); - dev_queue_xmit(clone); - return 0; -} - -/** - * recv_msg - handle incoming TIPC message from an Ethernet interface - * - * Accept only packets explicitly sent to this node, or broadcast packets; - * ignores packets sent using Ethernet multicast, and traffic sent to other - * nodes (which can happen if interface is running in promiscuous mode). - */ -static int recv_msg(struct sk_buff *buf, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) -{ - struct eth_media *eb_ptr = (struct eth_media *)pt->af_packet_priv; - - if (!net_eq(dev_net(dev), &init_net)) { - kfree_skb(buf); - return NET_RX_DROP; - } - - if (likely(eb_ptr->bearer)) { - if (likely(buf->pkt_type <= PACKET_BROADCAST)) { - buf->next = NULL; - tipc_recv_msg(buf, eb_ptr->bearer); - return NET_RX_SUCCESS; - } - } - kfree_skb(buf); - return NET_RX_DROP; -} - -/** - * setup_media - setup association between Ethernet bearer and interface - */ -static void setup_media(struct work_struct *work) -{ - struct eth_media *eb_ptr = - container_of(work, struct eth_media, setup); - - dev_add_pack(&eb_ptr->tipc_packet_type); -} - -/** - * enable_media - attach TIPC bearer to an Ethernet interface - */ -static int enable_media(struct tipc_bearer *tb_ptr) -{ - struct net_device *dev; - struct eth_media *eb_ptr = ð_media_array[0]; - struct eth_media *stop = ð_media_array[MAX_ETH_MEDIA]; - char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; - int pending_dev = 0; - - /* Find unused Ethernet bearer structure */ - while (eb_ptr->dev) { - if (!eb_ptr->bearer) - pending_dev++; - if (++eb_ptr == stop) - return pending_dev ? -EAGAIN : -EDQUOT; - } - - /* Find device with specified name */ - dev = dev_get_by_name(&init_net, driver_name); - if (!dev) - return -ENODEV; - - /* Create Ethernet bearer for device */ - eb_ptr->dev = dev; - eb_ptr->tipc_packet_type.type = htons(ETH_P_TIPC); - eb_ptr->tipc_packet_type.dev = dev; - eb_ptr->tipc_packet_type.func = recv_msg; - eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr; - INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list)); - INIT_WORK(&eb_ptr->setup, setup_media); - schedule_work(&eb_ptr->setup); - - /* Associate TIPC bearer with Ethernet bearer */ - eb_ptr->bearer = tb_ptr; - tb_ptr->usr_handle = (void *)eb_ptr; - memset(tb_ptr->bcast_addr.value, 0, sizeof(tb_ptr->bcast_addr.value)); - memcpy(tb_ptr->bcast_addr.value, dev->broadcast, ETH_ALEN); - tb_ptr->bcast_addr.media_id = TIPC_MEDIA_TYPE_ETH; - tb_ptr->bcast_addr.broadcast = 1; - tb_ptr->mtu = dev->mtu; - tb_ptr->blocked = 0; - eth_media_addr_set(tb_ptr, &tb_ptr->addr, (char *)dev->dev_addr); - return 0; -} - -/** - * cleanup_media - break association between Ethernet bearer and interface - * - * This routine must be invoked from a work queue because it can sleep. - */ -static void cleanup_media(struct work_struct *work) -{ - struct eth_media *eb_ptr = - container_of(work, struct eth_media, cleanup); - - dev_remove_pack(&eb_ptr->tipc_packet_type); - dev_put(eb_ptr->dev); - eb_ptr->dev = NULL; -} - -/** - * disable_media - detach TIPC bearer from an Ethernet interface - * - * Mark Ethernet bearer as inactive so that incoming buffers are thrown away, - * then get worker thread to complete bearer cleanup. (Can't do cleanup - * here because cleanup code needs to sleep and caller holds spinlocks.) - */ -static void disable_media(struct tipc_bearer *tb_ptr) -{ - struct eth_media *eb_ptr = (struct eth_media *)tb_ptr->usr_handle; - - eb_ptr->bearer = NULL; - INIT_WORK(&eb_ptr->cleanup, cleanup_media); - schedule_work(&eb_ptr->cleanup); -} - -/** - * recv_notification - handle device updates from OS - * - * Change the state of the Ethernet bearer (if any) associated with the - * specified device. - */ -static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct eth_media *eb_ptr = ð_media_array[0]; - struct eth_media *stop = ð_media_array[MAX_ETH_MEDIA]; - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - - while ((eb_ptr->dev != dev)) { - if (++eb_ptr == stop) - return NOTIFY_DONE; /* couldn't find device */ - } - if (!eb_ptr->bearer) - return NOTIFY_DONE; /* bearer had been disabled */ - - eb_ptr->bearer->mtu = dev->mtu; - - switch (evt) { - case NETDEV_CHANGE: - if (netif_carrier_ok(dev)) - tipc_continue(eb_ptr->bearer); - else - tipc_block_bearer(eb_ptr->bearer); - break; - case NETDEV_UP: - tipc_continue(eb_ptr->bearer); - break; - case NETDEV_DOWN: - tipc_block_bearer(eb_ptr->bearer); - break; - case NETDEV_CHANGEMTU: - case NETDEV_CHANGEADDR: - tipc_block_bearer(eb_ptr->bearer); - tipc_continue(eb_ptr->bearer); - break; - case NETDEV_UNREGISTER: - case NETDEV_CHANGENAME: - tipc_disable_bearer(eb_ptr->bearer->name); - break; - } - return NOTIFY_OK; -} - -/** - * eth_addr2str - convert Ethernet address to string - */ -static int eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) +/* convert Ethernet address to string */ +static int tipc_eth_addr2str(struct tipc_media_addr *a, char *str_buf, + int str_size) { if (str_size < 18) /* 18 = strlen("aa:bb:cc:dd:ee:ff\0") */ return 1; @@ -298,10 +50,8 @@ static int eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) return 0; } -/** - * eth_str2addr - convert Ethernet address format to message header format - */ -static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area) +/* convert Ethernet address format to message header format */ +static int tipc_eth_addr2msg(struct tipc_media_addr *a, char *msg_area) { memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE); msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH; @@ -309,68 +59,30 @@ static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area) return 0; } -/** - * eth_str2addr - convert message header address format to Ethernet format - */ -static int eth_msg2addr(const struct tipc_bearer *tb_ptr, - struct tipc_media_addr *a, char *msg_area) +/* convert message header address format to Ethernet format */ +static int tipc_eth_msg2addr(const struct tipc_bearer *tb_ptr, + struct tipc_media_addr *a, char *msg_area) { if (msg_area[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_ETH) return 1; - eth_media_addr_set(tb_ptr, a, msg_area + ETH_ADDR_OFFSET); + tipc_l2_media_addr_set(tb_ptr, a, msg_area + ETH_ADDR_OFFSET); return 0; } -/* - * Ethernet media registration info - */ -static struct tipc_media eth_media_info = { - .send_msg = send_msg, - .enable_media = enable_media, - .disable_media = disable_media, - .addr2str = eth_addr2str, - .addr2msg = eth_addr2msg, - .msg2addr = eth_msg2addr, +/* Ethernet media registration info */ +struct tipc_media eth_media_info = { + .send_msg = tipc_l2_send_msg, + .enable_media = tipc_enable_l2_media, + .disable_media = tipc_disable_l2_media, + .addr2str = tipc_eth_addr2str, + .addr2msg = tipc_eth_addr2msg, + .msg2addr = tipc_eth_msg2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, .window = TIPC_DEF_LINK_WIN, .type_id = TIPC_MEDIA_TYPE_ETH, + .hwaddr_len = ETH_ALEN, .name = "eth" }; -/** - * tipc_eth_media_start - activate Ethernet bearer support - * - * Register Ethernet media type with TIPC bearer code. Also register - * with OS for notifications about device state changes. - */ -int tipc_eth_media_start(void) -{ - int res; - - if (eth_started) - return -EINVAL; - - res = tipc_register_media(ð_media_info); - if (res) - return res; - - res = register_netdevice_notifier(¬ifier); - if (!res) - eth_started = 1; - return res; -} - -/** - * tipc_eth_media_stop - deactivate Ethernet bearer support - */ -void tipc_eth_media_stop(void) -{ - if (!eth_started) - return; - - flush_scheduled_work(); - unregister_netdevice_notifier(¬ifier); - eth_started = 0; -} diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index c1398929746..844a77e2582 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -42,252 +42,9 @@ #include "core.h" #include "bearer.h" -#define MAX_IB_MEDIA MAX_BEARERS - -/** - * struct ib_media - Infiniband media data structure - * @bearer: ptr to associated "generic" bearer structure - * @dev: ptr to associated Infiniband network device - * @tipc_packet_type: used in binding TIPC to Infiniband driver - * @cleanup: work item used when disabling bearer - */ - -struct ib_media { - struct tipc_bearer *bearer; - struct net_device *dev; - struct packet_type tipc_packet_type; - struct work_struct setup; - struct work_struct cleanup; -}; - -static struct tipc_media ib_media_info; -static struct ib_media ib_media_array[MAX_IB_MEDIA]; -static int ib_started; - -/** - * ib_media_addr_set - initialize Infiniband media address structure - * - * Media-dependent "value" field stores MAC address in first 6 bytes - * and zeroes out the remaining bytes. - */ -static void ib_media_addr_set(const struct tipc_bearer *tb_ptr, - struct tipc_media_addr *a, char *mac) -{ - BUILD_BUG_ON(sizeof(a->value) < INFINIBAND_ALEN); - memcpy(a->value, mac, INFINIBAND_ALEN); - a->media_id = TIPC_MEDIA_TYPE_IB; - a->broadcast = !memcmp(mac, tb_ptr->bcast_addr.value, INFINIBAND_ALEN); -} - -/** - * send_msg - send a TIPC message out over an InfiniBand interface - */ -static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, - struct tipc_media_addr *dest) -{ - struct sk_buff *clone; - struct net_device *dev; - int delta; - - clone = skb_clone(buf, GFP_ATOMIC); - if (!clone) - return 0; - - dev = ((struct ib_media *)(tb_ptr->usr_handle))->dev; - delta = dev->hard_header_len - skb_headroom(buf); - - if ((delta > 0) && - pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { - kfree_skb(clone); - return 0; - } - - skb_reset_network_header(clone); - clone->dev = dev; - clone->protocol = htons(ETH_P_TIPC); - dev_hard_header(clone, dev, ETH_P_TIPC, dest->value, - dev->dev_addr, clone->len); - dev_queue_xmit(clone); - return 0; -} - -/** - * recv_msg - handle incoming TIPC message from an InfiniBand interface - * - * Accept only packets explicitly sent to this node, or broadcast packets; - * ignores packets sent using InfiniBand multicast, and traffic sent to other - * nodes (which can happen if interface is running in promiscuous mode). - */ -static int recv_msg(struct sk_buff *buf, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) -{ - struct ib_media *ib_ptr = (struct ib_media *)pt->af_packet_priv; - - if (!net_eq(dev_net(dev), &init_net)) { - kfree_skb(buf); - return NET_RX_DROP; - } - - if (likely(ib_ptr->bearer)) { - if (likely(buf->pkt_type <= PACKET_BROADCAST)) { - buf->next = NULL; - tipc_recv_msg(buf, ib_ptr->bearer); - return NET_RX_SUCCESS; - } - } - kfree_skb(buf); - return NET_RX_DROP; -} - -/** - * setup_bearer - setup association between InfiniBand bearer and interface - */ -static void setup_media(struct work_struct *work) -{ - struct ib_media *ib_ptr = - container_of(work, struct ib_media, setup); - - dev_add_pack(&ib_ptr->tipc_packet_type); -} - -/** - * enable_media - attach TIPC bearer to an InfiniBand interface - */ -static int enable_media(struct tipc_bearer *tb_ptr) -{ - struct net_device *dev; - struct ib_media *ib_ptr = &ib_media_array[0]; - struct ib_media *stop = &ib_media_array[MAX_IB_MEDIA]; - char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; - int pending_dev = 0; - - /* Find unused InfiniBand bearer structure */ - while (ib_ptr->dev) { - if (!ib_ptr->bearer) - pending_dev++; - if (++ib_ptr == stop) - return pending_dev ? -EAGAIN : -EDQUOT; - } - - /* Find device with specified name */ - dev = dev_get_by_name(&init_net, driver_name); - if (!dev) - return -ENODEV; - - /* Create InfiniBand bearer for device */ - ib_ptr->dev = dev; - ib_ptr->tipc_packet_type.type = htons(ETH_P_TIPC); - ib_ptr->tipc_packet_type.dev = dev; - ib_ptr->tipc_packet_type.func = recv_msg; - ib_ptr->tipc_packet_type.af_packet_priv = ib_ptr; - INIT_LIST_HEAD(&(ib_ptr->tipc_packet_type.list)); - INIT_WORK(&ib_ptr->setup, setup_media); - schedule_work(&ib_ptr->setup); - - /* Associate TIPC bearer with InfiniBand bearer */ - ib_ptr->bearer = tb_ptr; - tb_ptr->usr_handle = (void *)ib_ptr; - memset(tb_ptr->bcast_addr.value, 0, sizeof(tb_ptr->bcast_addr.value)); - memcpy(tb_ptr->bcast_addr.value, dev->broadcast, INFINIBAND_ALEN); - tb_ptr->bcast_addr.media_id = TIPC_MEDIA_TYPE_IB; - tb_ptr->bcast_addr.broadcast = 1; - tb_ptr->mtu = dev->mtu; - tb_ptr->blocked = 0; - ib_media_addr_set(tb_ptr, &tb_ptr->addr, (char *)dev->dev_addr); - return 0; -} - -/** - * cleanup_bearer - break association between InfiniBand bearer and interface - * - * This routine must be invoked from a work queue because it can sleep. - */ -static void cleanup_bearer(struct work_struct *work) -{ - struct ib_media *ib_ptr = - container_of(work, struct ib_media, cleanup); - - dev_remove_pack(&ib_ptr->tipc_packet_type); - dev_put(ib_ptr->dev); - ib_ptr->dev = NULL; -} - -/** - * disable_media - detach TIPC bearer from an InfiniBand interface - * - * Mark InfiniBand bearer as inactive so that incoming buffers are thrown away, - * then get worker thread to complete bearer cleanup. (Can't do cleanup - * here because cleanup code needs to sleep and caller holds spinlocks.) - */ -static void disable_media(struct tipc_bearer *tb_ptr) -{ - struct ib_media *ib_ptr = (struct ib_media *)tb_ptr->usr_handle; - - ib_ptr->bearer = NULL; - INIT_WORK(&ib_ptr->cleanup, cleanup_bearer); - schedule_work(&ib_ptr->cleanup); -} - -/** - * recv_notification - handle device updates from OS - * - * Change the state of the InfiniBand bearer (if any) associated with the - * specified device. - */ -static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct ib_media *ib_ptr = &ib_media_array[0]; - struct ib_media *stop = &ib_media_array[MAX_IB_MEDIA]; - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - - while ((ib_ptr->dev != dev)) { - if (++ib_ptr == stop) - return NOTIFY_DONE; /* couldn't find device */ - } - if (!ib_ptr->bearer) - return NOTIFY_DONE; /* bearer had been disabled */ - - ib_ptr->bearer->mtu = dev->mtu; - - switch (evt) { - case NETDEV_CHANGE: - if (netif_carrier_ok(dev)) - tipc_continue(ib_ptr->bearer); - else - tipc_block_bearer(ib_ptr->bearer); - break; - case NETDEV_UP: - tipc_continue(ib_ptr->bearer); - break; - case NETDEV_DOWN: - tipc_block_bearer(ib_ptr->bearer); - break; - case NETDEV_CHANGEMTU: - case NETDEV_CHANGEADDR: - tipc_block_bearer(ib_ptr->bearer); - tipc_continue(ib_ptr->bearer); - break; - case NETDEV_UNREGISTER: - case NETDEV_CHANGENAME: - tipc_disable_bearer(ib_ptr->bearer->name); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block notifier = { - .notifier_call = recv_notification, - .priority = 0, -}; - -/** - * ib_addr2str - convert InfiniBand address to string - */ -static int ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) +/* convert InfiniBand address to string */ +static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf, + int str_size) { if (str_size < 60) /* 60 = 19 * strlen("xx:") + strlen("xx\0") */ return 1; @@ -297,10 +54,8 @@ static int ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) return 0; } -/** - * ib_addr2msg - convert InfiniBand address format to message header format - */ -static int ib_addr2msg(struct tipc_media_addr *a, char *msg_area) +/* convert InfiniBand address format to message header format */ +static int tipc_ib_addr2msg(struct tipc_media_addr *a, char *msg_area) { memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE); msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_IB; @@ -308,65 +63,27 @@ static int ib_addr2msg(struct tipc_media_addr *a, char *msg_area) return 0; } -/** - * ib_msg2addr - convert message header address format to InfiniBand format - */ -static int ib_msg2addr(const struct tipc_bearer *tb_ptr, - struct tipc_media_addr *a, char *msg_area) +/* convert message header address format to InfiniBand format */ +static int tipc_ib_msg2addr(const struct tipc_bearer *tb_ptr, + struct tipc_media_addr *a, char *msg_area) { - ib_media_addr_set(tb_ptr, a, msg_area); + tipc_l2_media_addr_set(tb_ptr, a, msg_area); return 0; } -/* - * InfiniBand media registration info - */ -static struct tipc_media ib_media_info = { - .send_msg = send_msg, - .enable_media = enable_media, - .disable_media = disable_media, - .addr2str = ib_addr2str, - .addr2msg = ib_addr2msg, - .msg2addr = ib_msg2addr, +/* InfiniBand media registration info */ +struct tipc_media ib_media_info = { + .send_msg = tipc_l2_send_msg, + .enable_media = tipc_enable_l2_media, + .disable_media = tipc_disable_l2_media, + .addr2str = tipc_ib_addr2str, + .addr2msg = tipc_ib_addr2msg, + .msg2addr = tipc_ib_msg2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, .window = TIPC_DEF_LINK_WIN, .type_id = TIPC_MEDIA_TYPE_IB, + .hwaddr_len = INFINIBAND_ALEN, .name = "ib" }; -/** - * tipc_ib_media_start - activate InfiniBand bearer support - * - * Register InfiniBand media type with TIPC bearer code. Also register - * with OS for notifications about device state changes. - */ -int tipc_ib_media_start(void) -{ - int res; - - if (ib_started) - return -EINVAL; - - res = tipc_register_media(&ib_media_info); - if (res) - return res; - - res = register_netdevice_notifier(¬ifier); - if (!res) - ib_started = 1; - return res; -} - -/** - * tipc_ib_media_stop - deactivate InfiniBand bearer support - */ -void tipc_ib_media_stop(void) -{ - if (!ib_started) - return; - - flush_scheduled_work(); - unregister_netdevice_notifier(¬ifier); - ib_started = 0; -} diff --git a/net/tipc/link.c b/net/tipc/link.c index 13b98774582..d4b5de41b68 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1,7 +1,7 @@ /* * net/tipc/link.c: TIPC link code * - * Copyright (c) 1996-2007, 2012, Ericsson AB + * Copyright (c) 1996-2007, 2012-2014, Ericsson AB * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * @@ -78,8 +78,8 @@ static const char *link_unk_evt = "Unknown link event "; static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, struct sk_buff *buf); static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf); -static int link_recv_changeover_msg(struct tipc_link **l_ptr, - struct sk_buff **buf); +static int tipc_link_tunnel_rcv(struct tipc_link **l_ptr, + struct sk_buff **buf); static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance); static int link_send_sections_long(struct tipc_port *sender, struct iovec const *msg_sect, @@ -87,7 +87,6 @@ static int link_send_sections_long(struct tipc_port *sender, static void link_state_event(struct tipc_link *l_ptr, u32 event); static void link_reset_statistics(struct tipc_link *l_ptr); static void link_print(struct tipc_link *l_ptr, const char *str); -static void link_start(struct tipc_link *l_ptr); static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf); static void tipc_link_send_sync(struct tipc_link *l); static void tipc_link_recv_sync(struct tipc_node *n, struct sk_buff *buf); @@ -278,9 +277,11 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, tipc_node_attach_link(n_ptr, l_ptr); - k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr); + k_init_timer(&l_ptr->timer, (Handler)link_timeout, + (unsigned long)l_ptr); list_add_tail(&l_ptr->link_list, &b_ptr->links); - tipc_k_signal((Handler)link_start, (unsigned long)l_ptr); + + link_state_event(l_ptr, STARTING_EVT); return l_ptr; } @@ -305,19 +306,13 @@ void tipc_link_delete(struct tipc_link *l_ptr) tipc_node_lock(l_ptr->owner); tipc_link_reset(l_ptr); tipc_node_detach_link(l_ptr->owner, l_ptr); - tipc_link_stop(l_ptr); + tipc_link_purge_queues(l_ptr); list_del_init(&l_ptr->link_list); tipc_node_unlock(l_ptr->owner); k_term_timer(&l_ptr->timer); kfree(l_ptr); } -static void link_start(struct tipc_link *l_ptr) -{ - tipc_node_lock(l_ptr->owner); - link_state_event(l_ptr, STARTING_EVT); - tipc_node_unlock(l_ptr->owner); -} /** * link_schedule_port - schedule port for deferred sending @@ -386,14 +381,7 @@ exit: */ static void link_release_outqueue(struct tipc_link *l_ptr) { - struct sk_buff *buf = l_ptr->first_out; - struct sk_buff *next; - - while (buf) { - next = buf->next; - kfree_skb(buf); - buf = next; - } + kfree_skb_list(l_ptr->first_out); l_ptr->first_out = NULL; l_ptr->out_queue_size = 0; } @@ -410,37 +398,20 @@ void tipc_link_reset_fragments(struct tipc_link *l_ptr) } /** - * tipc_link_stop - purge all inbound and outbound messages associated with link + * tipc_link_purge_queues - purge all pkt queues associated with link * @l_ptr: pointer to link */ -void tipc_link_stop(struct tipc_link *l_ptr) +void tipc_link_purge_queues(struct tipc_link *l_ptr) { - struct sk_buff *buf; - struct sk_buff *next; - - buf = l_ptr->oldest_deferred_in; - while (buf) { - next = buf->next; - kfree_skb(buf); - buf = next; - } - - buf = l_ptr->first_out; - while (buf) { - next = buf->next; - kfree_skb(buf); - buf = next; - } - + kfree_skb_list(l_ptr->oldest_deferred_in); + kfree_skb_list(l_ptr->first_out); tipc_link_reset_fragments(l_ptr); - kfree_skb(l_ptr->proto_msg_queue); l_ptr->proto_msg_queue = NULL; } void tipc_link_reset(struct tipc_link *l_ptr) { - struct sk_buff *buf; u32 prev_state = l_ptr->state; u32 checkpoint = l_ptr->next_in_no; int was_active_link = tipc_link_is_active(l_ptr); @@ -461,8 +432,7 @@ void tipc_link_reset(struct tipc_link *l_ptr) tipc_node_link_down(l_ptr->owner, l_ptr); tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr); - if (was_active_link && tipc_node_active_links(l_ptr->owner) && - l_ptr->owner->permit_changeover) { + if (was_active_link && tipc_node_active_links(l_ptr->owner)) { l_ptr->reset_checkpoint = checkpoint; l_ptr->exp_msg_count = START_CHANGEOVER; } @@ -471,12 +441,7 @@ void tipc_link_reset(struct tipc_link *l_ptr) link_release_outqueue(l_ptr); kfree_skb(l_ptr->proto_msg_queue); l_ptr->proto_msg_queue = NULL; - buf = l_ptr->oldest_deferred_in; - while (buf) { - struct sk_buff *next = buf->next; - kfree_skb(buf); - buf = next; - } + kfree_skb_list(l_ptr->oldest_deferred_in); if (!list_empty(&l_ptr->waiting_ports)) tipc_link_wakeup_ports(l_ptr, 1); @@ -517,10 +482,11 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) if (!l_ptr->started && (event != STARTING_EVT)) return; /* Not yet. */ - if (link_blocked(l_ptr)) { + /* Check whether changeover is going on */ + if (l_ptr->exp_msg_count) { if (event == TIMEOUT_EVT) link_set_timer(l_ptr, cont_intv); - return; /* Changeover going on */ + return; } switch (l_ptr->state) { @@ -790,8 +756,7 @@ int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf) return link_send_long_buf(l_ptr, buf); /* Packet can be queued or sent. */ - if (likely(!tipc_bearer_blocked(l_ptr->b_ptr) && - !link_congested(l_ptr))) { + if (likely(!link_congested(l_ptr))) { link_add_to_outqueue(l_ptr, buf, msg); tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); @@ -957,14 +922,13 @@ static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf, if (likely(!link_congested(l_ptr))) { if (likely(msg_size(msg) <= l_ptr->max_pkt)) { - if (likely(!tipc_bearer_blocked(l_ptr->b_ptr))) { - link_add_to_outqueue(l_ptr, buf, msg); - tipc_bearer_send(l_ptr->b_ptr, buf, - &l_ptr->media_addr); - l_ptr->unacked_window = 0; - return res; - } - } else + link_add_to_outqueue(l_ptr, buf, msg); + tipc_bearer_send(l_ptr->b_ptr, buf, + &l_ptr->media_addr); + l_ptr->unacked_window = 0; + return res; + } + else *used_max_pkt = l_ptr->max_pkt; } return tipc_link_send_buf(l_ptr, buf); /* All other cases */ @@ -1013,8 +977,7 @@ exit: } /* Exit if link (or bearer) is congested */ - if (link_congested(l_ptr) || - tipc_bearer_blocked(l_ptr->b_ptr)) { + if (link_congested(l_ptr)) { res = link_schedule_port(l_ptr, sender->ref, res); goto exit; @@ -1127,10 +1090,7 @@ again: if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) { res = -EFAULT; error: - for (; buf_chain; buf_chain = buf) { - buf = buf_chain->next; - kfree_skb(buf_chain); - } + kfree_skb_list(buf_chain); return res; } sect_crs += sz; @@ -1180,18 +1140,12 @@ error: if (l_ptr->max_pkt < max_pkt) { sender->max_pkt = l_ptr->max_pkt; tipc_node_unlock(node); - for (; buf_chain; buf_chain = buf) { - buf = buf_chain->next; - kfree_skb(buf_chain); - } + kfree_skb_list(buf_chain); goto again; } } else { reject: - for (; buf_chain; buf_chain = buf) { - buf = buf_chain->next; - kfree_skb(buf_chain); - } + kfree_skb_list(buf_chain); return tipc_port_reject_sections(sender, hdr, msg_sect, len, TIPC_ERR_NO_NODE); } @@ -1209,7 +1163,7 @@ reject: /* * tipc_link_push_packet: Push one unsent packet to the media */ -u32 tipc_link_push_packet(struct tipc_link *l_ptr) +static u32 tipc_link_push_packet(struct tipc_link *l_ptr) { struct sk_buff *buf = l_ptr->first_out; u32 r_q_size = l_ptr->retransm_queue_size; @@ -1281,9 +1235,6 @@ void tipc_link_push_queue(struct tipc_link *l_ptr) { u32 res; - if (tipc_bearer_blocked(l_ptr->b_ptr)) - return; - do { res = tipc_link_push_packet(l_ptr); } while (!res); @@ -1370,26 +1321,15 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *buf, msg = buf_msg(buf); - if (tipc_bearer_blocked(l_ptr->b_ptr)) { - if (l_ptr->retransm_queue_size == 0) { - l_ptr->retransm_queue_head = msg_seqno(msg); - l_ptr->retransm_queue_size = retransmits; - } else { - pr_err("Unexpected retransmit on link %s (qsize=%d)\n", - l_ptr->name, l_ptr->retransm_queue_size); + /* Detect repeated retransmit failures */ + if (l_ptr->last_retransmitted == msg_seqno(msg)) { + if (++l_ptr->stale_count > 100) { + link_retransmit_failure(l_ptr, buf); + return; } - return; } else { - /* Detect repeated retransmit failures on unblocked bearer */ - if (l_ptr->last_retransmitted == msg_seqno(msg)) { - if (++l_ptr->stale_count > 100) { - link_retransmit_failure(l_ptr, buf); - return; - } - } else { - l_ptr->last_retransmitted = msg_seqno(msg); - l_ptr->stale_count = 1; - } + l_ptr->last_retransmitted = msg_seqno(msg); + l_ptr->stale_count = 1; } while (retransmits && (buf != l_ptr->next_out) && buf) { @@ -1476,14 +1416,14 @@ static int link_recv_buf_validate(struct sk_buff *buf) } /** - * tipc_recv_msg - process TIPC messages arriving from off-node + * tipc_rcv - process TIPC packets/messages arriving from off-node * @head: pointer to message buffer chain * @tb_ptr: pointer to bearer message arrived on * * Invoked with no locks held. Bearer pointer must point to a valid bearer * structure (i.e. cannot be NULL), but bearer can be inactive. */ -void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr) +void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) { read_lock_bh(&tipc_net_lock); while (head) { @@ -1658,7 +1598,7 @@ deliver: continue; case CHANGEOVER_PROTOCOL: type = msg_type(msg); - if (link_recv_changeover_msg(&l_ptr, &buf)) { + if (tipc_link_tunnel_rcv(&l_ptr, &buf)) { msg = buf_msg(buf); seq_no = msg_seqno(msg); if (type == ORIGINAL_MSG) @@ -1787,7 +1727,8 @@ void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, l_ptr->proto_msg_queue = NULL; } - if (link_blocked(l_ptr)) + /* Don't send protocol message during link changeover */ + if (l_ptr->exp_msg_count) return; /* Abort non-RESET send if communication with node is prohibited */ @@ -1862,12 +1803,6 @@ void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); buf->priority = TC_PRIO_CONTROL; - /* Defer message if bearer is already blocked */ - if (tipc_bearer_blocked(l_ptr->b_ptr)) { - l_ptr->proto_msg_queue = buf; - return; - } - tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr); l_ptr->unacked_window = 0; kfree_skb(buf); @@ -1886,7 +1821,8 @@ static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf) u32 msg_tol; struct tipc_msg *msg = buf_msg(buf); - if (link_blocked(l_ptr)) + /* Discard protocol message during link changeover */ + if (l_ptr->exp_msg_count) goto exit; /* record unnumbered packet arrival (force mismatch on next timeout) */ @@ -1896,8 +1832,6 @@ static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf) if (tipc_own_addr > msg_prevnode(msg)) l_ptr->b_ptr->net_plane = msg_net_plane(msg); - l_ptr->owner->permit_changeover = msg_redundant_link(msg); - switch (msg_type(msg)) { case RESET_MSG: @@ -2013,13 +1947,13 @@ exit: } -/* - * tipc_link_tunnel(): Send one message via a link belonging to - * another bearer. Owner node is locked. +/* tipc_link_tunnel_xmit(): Tunnel one packet via a link belonging to + * a different bearer. Owner node is locked. */ -static void tipc_link_tunnel(struct tipc_link *l_ptr, - struct tipc_msg *tunnel_hdr, struct tipc_msg *msg, - u32 selector) +static void tipc_link_tunnel_xmit(struct tipc_link *l_ptr, + struct tipc_msg *tunnel_hdr, + struct tipc_msg *msg, + u32 selector) { struct tipc_link *tunnel; struct sk_buff *buf; @@ -2042,12 +1976,13 @@ static void tipc_link_tunnel(struct tipc_link *l_ptr, } - -/* - * changeover(): Send whole message queue via the remaining link - * Owner node is locked. +/* tipc_link_failover_send_queue(): A link has gone down, but a second + * link is still active. We can do failover. Tunnel the failing link's + * whole send queue via the remaining link. This way, we don't lose + * any packets, and sequence order is preserved for subsequent traffic + * sent over the remaining link. Owner node is locked. */ -void tipc_link_changeover(struct tipc_link *l_ptr) +void tipc_link_failover_send_queue(struct tipc_link *l_ptr) { u32 msgcount = l_ptr->out_queue_size; struct sk_buff *crs = l_ptr->first_out; @@ -2058,11 +1993,6 @@ void tipc_link_changeover(struct tipc_link *l_ptr) if (!tunnel) return; - if (!l_ptr->owner->permit_changeover) { - pr_warn("%speer did not permit changeover\n", link_co_err); - return; - } - tipc_msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); @@ -2096,20 +2026,30 @@ void tipc_link_changeover(struct tipc_link *l_ptr) msgcount = msg_msgcnt(msg); while (msgcount--) { msg_set_seqno(m, msg_seqno(msg)); - tipc_link_tunnel(l_ptr, &tunnel_hdr, m, - msg_link_selector(m)); + tipc_link_tunnel_xmit(l_ptr, &tunnel_hdr, m, + msg_link_selector(m)); pos += align(msg_size(m)); m = (struct tipc_msg *)pos; } } else { - tipc_link_tunnel(l_ptr, &tunnel_hdr, msg, - msg_link_selector(msg)); + tipc_link_tunnel_xmit(l_ptr, &tunnel_hdr, msg, + msg_link_selector(msg)); } crs = crs->next; } } -void tipc_link_send_duplicate(struct tipc_link *l_ptr, struct tipc_link *tunnel) +/* tipc_link_dup_send_queue(): A second link has become active. Tunnel a + * duplicate of the first link's send queue via the new link. This way, we + * are guaranteed that currently queued packets from a socket are delivered + * before future traffic from the same socket, even if this is using the + * new link. The last arriving copy of each duplicate packet is dropped at + * the receiving end by the regular protocol check, so packet cardinality + * and sequence order is preserved per sender/receiver socket pair. + * Owner node is locked. + */ +void tipc_link_dup_send_queue(struct tipc_link *l_ptr, + struct tipc_link *tunnel) { struct sk_buff *iter; struct tipc_msg tunnel_hdr; @@ -2165,12 +2105,14 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos) return eb; } -/* - * link_recv_changeover_msg(): Receive tunneled packet sent - * via other link. Node is locked. Return extracted buffer. +/* tipc_link_tunnel_rcv(): Receive a tunneled packet, sent + * via other link as result of a failover (ORIGINAL_MSG) or + * a new active link (DUPLICATE_MSG). Failover packets are + * returned to the active link for delivery upwards. + * Owner node is locked. */ -static int link_recv_changeover_msg(struct tipc_link **l_ptr, - struct sk_buff **buf) +static int tipc_link_tunnel_rcv(struct tipc_link **l_ptr, + struct sk_buff **buf) { struct sk_buff *tunnel_buf = *buf; struct tipc_link *dest_link; @@ -2307,11 +2249,7 @@ static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf) fragm = tipc_buf_acquire(fragm_sz + INT_H_SIZE); if (fragm == NULL) { kfree_skb(buf); - while (buf_chain) { - buf = buf_chain; - buf_chain = buf_chain->next; - kfree_skb(buf); - } + kfree_skb_list(buf_chain); return -ENOMEM; } msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE); diff --git a/net/tipc/link.h b/net/tipc/link.h index 8a6c1026644..3b6aa65b608 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -112,7 +112,6 @@ struct tipc_stats { * @continuity_interval: link continuity testing interval [in ms] * @abort_limit: # of unacknowledged continuity probes needed to reset link * @state: current state of link FSM - * @blocked: indicates if link has been administratively blocked * @fsm_msg_cnt: # of protocol messages link FSM has sent in current state * @proto_msg: template for control messages generated by link * @pmsg: convenience pointer to "proto_msg" field @@ -162,7 +161,6 @@ struct tipc_link { u32 continuity_interval; u32 abort_limit; int state; - int blocked; u32 fsm_msg_cnt; struct { unchar hdr[INT_H_SIZE]; @@ -218,16 +216,20 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, struct tipc_bearer *b_ptr, const struct tipc_media_addr *media_addr); void tipc_link_delete(struct tipc_link *l_ptr); -void tipc_link_changeover(struct tipc_link *l_ptr); -void tipc_link_send_duplicate(struct tipc_link *l_ptr, struct tipc_link *dest); +void tipc_link_failover_send_queue(struct tipc_link *l_ptr); +void tipc_link_dup_send_queue(struct tipc_link *l_ptr, + struct tipc_link *dest); void tipc_link_reset_fragments(struct tipc_link *l_ptr); int tipc_link_is_up(struct tipc_link *l_ptr); int tipc_link_is_active(struct tipc_link *l_ptr); -u32 tipc_link_push_packet(struct tipc_link *l_ptr); -void tipc_link_stop(struct tipc_link *l_ptr); -struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, u16 cmd); -struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_space); -struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space); +void tipc_link_purge_queues(struct tipc_link *l_ptr); +struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, + int req_tlv_space, + u16 cmd); +struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, + int req_tlv_space); +struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, + int req_tlv_space); void tipc_link_reset(struct tipc_link *l_ptr); int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector); void tipc_link_send_names(struct list_head *message_list, u32 dest); @@ -312,11 +314,6 @@ static inline int link_reset_reset(struct tipc_link *l_ptr) return l_ptr->state == RESET_RESET; } -static inline int link_blocked(struct tipc_link *l_ptr) -{ - return l_ptr->exp_msg_count || l_ptr->blocked; -} - static inline int link_congested(struct tipc_link *l_ptr) { return l_ptr->out_queue_size >= l_ptr->queue_limit[0]; diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 09dcd54b04e..92a1533af4e 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -148,8 +148,7 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper, */ static struct sub_seq *tipc_subseq_alloc(u32 cnt) { - struct sub_seq *sseq = kcalloc(cnt, sizeof(struct sub_seq), GFP_ATOMIC); - return sseq; + return kcalloc(cnt, sizeof(struct sub_seq), GFP_ATOMIC); } /** diff --git a/net/tipc/node.c b/net/tipc/node.c index 25100c0a6fe..efe4d41bf11 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -162,7 +162,7 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) pr_info("New link <%s> becomes standby\n", l_ptr->name); return; } - tipc_link_send_duplicate(active[0], l_ptr); + tipc_link_dup_send_queue(active[0], l_ptr); if (l_ptr->priority == active[0]->priority) { active[0] = l_ptr; return; @@ -225,7 +225,7 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) if (active[0] == l_ptr) node_select_active_links(n_ptr); if (tipc_node_is_up(n_ptr)) - tipc_link_changeover(l_ptr); + tipc_link_failover_send_queue(l_ptr); else node_lost_contact(n_ptr); } @@ -235,11 +235,6 @@ int tipc_node_active_links(struct tipc_node *n_ptr) return n_ptr->active_links[0] != NULL; } -int tipc_node_redundant_links(struct tipc_node *n_ptr) -{ - return n_ptr->working_links > 1; -} - int tipc_node_is_up(struct tipc_node *n_ptr) { return tipc_node_active_links(n_ptr); @@ -291,11 +286,7 @@ static void node_lost_contact(struct tipc_node *n_ptr) /* Flush broadcast link info associated with lost node */ if (n_ptr->bclink.recv_permitted) { - while (n_ptr->bclink.deferred_head) { - struct sk_buff *buf = n_ptr->bclink.deferred_head; - n_ptr->bclink.deferred_head = buf->next; - kfree_skb(buf); - } + kfree_skb_list(n_ptr->bclink.deferred_head); n_ptr->bclink.deferred_size = 0; if (n_ptr->bclink.reasm_head) { diff --git a/net/tipc/node.h b/net/tipc/node.h index e5e96c04e16..63e2e8ead2f 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -64,7 +64,6 @@ * @working_links: number of working links to node (both active and standby) * @block_setup: bit mask of conditions preventing link establishment to node * @link_cnt: number of links to node - * @permit_changeover: non-zero if node has redundant links to this system * @signature: node instance identifier * @bclink: broadcast-related info * @acked: sequence # of last outbound b'cast message acknowledged by node @@ -89,7 +88,6 @@ struct tipc_node { int link_cnt; int working_links; int block_setup; - int permit_changeover; u32 signature; struct { u32 acked; @@ -115,7 +113,6 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr); int tipc_node_active_links(struct tipc_node *n_ptr); -int tipc_node_redundant_links(struct tipc_node *n_ptr); int tipc_node_is_up(struct tipc_node *n_ptr); struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space); struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space); diff --git a/net/tipc/port.c b/net/tipc/port.c index d43f3182b1d..b742b265452 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -817,17 +817,14 @@ exit: */ int __tipc_disconnect(struct tipc_port *tp_ptr) { - int res; - if (tp_ptr->connected) { tp_ptr->connected = 0; /* let timer expire on it's own to avoid deadlock! */ tipc_nodesub_unsubscribe(&tp_ptr->subscription); - res = 0; - } else { - res = -ENOTCONN; + return 0; } - return res; + + return -ENOTCONN; } /* diff --git a/net/tipc/server.c b/net/tipc/server.c index fd3fa57a410..b635ca347a8 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -55,7 +55,7 @@ * @usr_data: user-specified field * @rx_action: what to do when connection socket is active * @outqueue: pointer to first outbound message in queue - * @outqueue_lock: controll access to the outqueue + * @outqueue_lock: control access to the outqueue * @outqueue: list of connection objects for its server * @swork: send work item */ diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e741416d1d2..aab4948f0af 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -55,9 +55,6 @@ struct tipc_sock { #define tipc_sk(sk) ((struct tipc_sock *)(sk)) #define tipc_sk_port(sk) (tipc_sk(sk)->p) -#define tipc_rx_ready(sock) (!skb_queue_empty(&sock->sk->sk_receive_queue) || \ - (sock->state == SS_DISCONNECTING)) - static int backlog_rcv(struct sock *sk, struct sk_buff *skb); static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf); static void wakeupdispatch(struct tipc_port *tport); @@ -239,7 +236,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock, int protocol, int tipc_sock_create_local(int type, struct socket **res) { int rc; - struct sock *sk; rc = sock_create_lite(AF_TIPC, type, 0, res); if (rc < 0) { @@ -248,8 +244,6 @@ int tipc_sock_create_local(int type, struct socket **res) } tipc_sk_create(&init_net, *res, 0, 1); - sk = (*res)->sk; - return 0; } @@ -570,6 +564,31 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m) return 0; } +static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) +{ + struct sock *sk = sock->sk; + struct tipc_port *tport = tipc_sk_port(sk); + DEFINE_WAIT(wait); + int done; + + do { + int err = sock_error(sk); + if (err) + return err; + if (sock->state == SS_DISCONNECTING) + return -EPIPE; + if (!*timeo_p) + return -EAGAIN; + if (signal_pending(current)) + return sock_intr_errno(*timeo_p); + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + done = sk_wait_event(sk, timeo_p, !tport->congested); + finish_wait(sk_sleep(sk), &wait); + } while (!done); + return 0; +} + /** * send_msg - send message in connectionless manner * @iocb: if NULL, indicates that socket lock is already held @@ -589,9 +608,9 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct tipc_port *tport = tipc_sk_port(sk); - struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name; + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); int needs_conn; - long timeout_val; + long timeo; int res = -EINVAL; if (unlikely(!dest)) @@ -628,8 +647,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, reject_rx_queue(sk); } - timeout_val = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); - + timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); do { if (dest->addrtype == TIPC_ADDR_NAME) { res = dest_name_check(dest, m); @@ -663,14 +681,9 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, sock->state = SS_CONNECTING; break; } - if (timeout_val <= 0L) { - res = timeout_val ? timeout_val : -EWOULDBLOCK; + res = tipc_wait_for_sndmsg(sock, &timeo); + if (res) break; - } - release_sock(sk); - timeout_val = wait_event_interruptible_timeout(*sk_sleep(sk), - !tport->congested, timeout_val); - lock_sock(sk); } while (1); exit: @@ -679,6 +692,34 @@ exit: return res; } +static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) +{ + struct sock *sk = sock->sk; + struct tipc_port *tport = tipc_sk_port(sk); + DEFINE_WAIT(wait); + int done; + + do { + int err = sock_error(sk); + if (err) + return err; + if (sock->state == SS_DISCONNECTING) + return -EPIPE; + else if (sock->state != SS_CONNECTED) + return -ENOTCONN; + if (!*timeo_p) + return -EAGAIN; + if (signal_pending(current)) + return sock_intr_errno(*timeo_p); + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + done = sk_wait_event(sk, timeo_p, + (!tport->congested || !tport->connected)); + finish_wait(sk_sleep(sk), &wait); + } while (!done); + return 0; +} + /** * send_packet - send a connection-oriented message * @iocb: if NULL, indicates that socket lock is already held @@ -695,9 +736,9 @@ static int send_packet(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct tipc_port *tport = tipc_sk_port(sk); - struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name; - long timeout_val; - int res; + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + int res = -EINVAL; + long timeo; /* Handle implied connection establishment */ if (unlikely(dest)) @@ -709,30 +750,24 @@ static int send_packet(struct kiocb *iocb, struct socket *sock, if (iocb) lock_sock(sk); - timeout_val = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + if (unlikely(sock->state != SS_CONNECTED)) { + if (sock->state == SS_DISCONNECTING) + res = -EPIPE; + else + res = -ENOTCONN; + goto exit; + } + timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); do { - if (unlikely(sock->state != SS_CONNECTED)) { - if (sock->state == SS_DISCONNECTING) - res = -EPIPE; - else - res = -ENOTCONN; - break; - } - res = tipc_send(tport->ref, m->msg_iov, total_len); if (likely(res != -ELINKCONG)) break; - if (timeout_val <= 0L) { - res = timeout_val ? timeout_val : -EWOULDBLOCK; + res = tipc_wait_for_sndpkt(sock, &timeo); + if (res) break; - } - release_sock(sk); - timeout_val = wait_event_interruptible_timeout(*sk_sleep(sk), - (!tport->congested || !tport->connected), timeout_val); - lock_sock(sk); } while (1); - +exit: if (iocb) release_sock(sk); return res; @@ -770,16 +805,11 @@ static int send_stream(struct kiocb *iocb, struct socket *sock, /* Handle special cases where there is no connection */ if (unlikely(sock->state != SS_CONNECTED)) { - if (sock->state == SS_UNCONNECTED) { + if (sock->state == SS_UNCONNECTED) res = send_packet(NULL, sock, m, total_len); - goto exit; - } else if (sock->state == SS_DISCONNECTING) { - res = -EPIPE; - goto exit; - } else { - res = -ENOTCONN; - goto exit; - } + else + res = sock->state == SS_DISCONNECTING ? -EPIPE : -ENOTCONN; + goto exit; } if (unlikely(m->msg_name)) { @@ -876,7 +906,7 @@ static int auto_connect(struct socket *sock, struct tipc_msg *msg) */ static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg) { - struct sockaddr_tipc *addr = (struct sockaddr_tipc *)m->msg_name; + DECLARE_SOCKADDR(struct sockaddr_tipc *, addr, m->msg_name); if (addr) { addr->family = AF_TIPC; @@ -961,6 +991,37 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, return 0; } +static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo) +{ + struct sock *sk = sock->sk; + DEFINE_WAIT(wait); + int err; + + for (;;) { + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + if (skb_queue_empty(&sk->sk_receive_queue)) { + if (sock->state == SS_DISCONNECTING) { + err = -ENOTCONN; + break; + } + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + } + err = 0; + if (!skb_queue_empty(&sk->sk_receive_queue)) + break; + err = sock_intr_errno(timeo); + if (signal_pending(current)) + break; + err = -EAGAIN; + if (!timeo) + break; + } + finish_wait(sk_sleep(sk), &wait); + return err; +} + /** * recv_msg - receive packet-oriented message * @iocb: (unused) @@ -980,7 +1041,7 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock, struct tipc_port *tport = tipc_sk_port(sk); struct sk_buff *buf; struct tipc_msg *msg; - long timeout; + long timeo; unsigned int sz; u32 err; int res; @@ -996,25 +1057,13 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock, goto exit; } - timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); restart: /* Look for a message in receive queue; wait if necessary */ - while (skb_queue_empty(&sk->sk_receive_queue)) { - if (sock->state == SS_DISCONNECTING) { - res = -ENOTCONN; - goto exit; - } - if (timeout <= 0L) { - res = timeout ? timeout : -EWOULDBLOCK; - goto exit; - } - release_sock(sk); - timeout = wait_event_interruptible_timeout(*sk_sleep(sk), - tipc_rx_ready(sock), - timeout); - lock_sock(sk); - } + res = tipc_wait_for_rcvmsg(sock, timeo); + if (res) + goto exit; /* Look at first message in receive queue */ buf = skb_peek(&sk->sk_receive_queue); @@ -1086,7 +1135,7 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, struct tipc_port *tport = tipc_sk_port(sk); struct sk_buff *buf; struct tipc_msg *msg; - long timeout; + long timeo; unsigned int sz; int sz_to_copy, target, needed; int sz_copied = 0; @@ -1099,31 +1148,19 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, lock_sock(sk); - if (unlikely((sock->state == SS_UNCONNECTED))) { + if (unlikely(sock->state == SS_UNCONNECTED)) { res = -ENOTCONN; goto exit; } target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); - timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); restart: /* Look for a message in receive queue; wait if necessary */ - while (skb_queue_empty(&sk->sk_receive_queue)) { - if (sock->state == SS_DISCONNECTING) { - res = -ENOTCONN; - goto exit; - } - if (timeout <= 0L) { - res = timeout ? timeout : -EWOULDBLOCK; - goto exit; - } - release_sock(sk); - timeout = wait_event_interruptible_timeout(*sk_sleep(sk), - tipc_rx_ready(sock), - timeout); - lock_sock(sk); - } + res = tipc_wait_for_rcvmsg(sock, timeo); + if (res) + goto exit; /* Look at first message in receive queue */ buf = skb_peek(&sk->sk_receive_queue); @@ -1327,14 +1364,12 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); - unsigned int limit; if (msg_connected(msg)) - limit = sysctl_tipc_rmem[2]; - else - limit = sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE << - msg_importance(msg); - return limit; + return sysctl_tipc_rmem[2]; + + return sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE << + msg_importance(msg); } /** @@ -1448,6 +1483,28 @@ static void wakeupdispatch(struct tipc_port *tport) sk->sk_write_space(sk); } +static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) +{ + struct sock *sk = sock->sk; + DEFINE_WAIT(wait); + int done; + + do { + int err = sock_error(sk); + if (err) + return err; + if (!*timeo_p) + return -ETIMEDOUT; + if (signal_pending(current)) + return sock_intr_errno(*timeo_p); + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + done = sk_wait_event(sk, timeo_p, sock->state != SS_CONNECTING); + finish_wait(sk_sleep(sk), &wait); + } while (!done); + return 0; +} + /** * connect - establish a connection to another TIPC port * @sock: socket structure @@ -1463,7 +1520,8 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, struct sock *sk = sock->sk; struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest; struct msghdr m = {NULL,}; - unsigned int timeout; + long timeout = (flags & O_NONBLOCK) ? 0 : tipc_sk(sk)->conn_timeout; + socket_state previous; int res; lock_sock(sk); @@ -1485,8 +1543,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, goto exit; } - timeout = (flags & O_NONBLOCK) ? 0 : tipc_sk(sk)->conn_timeout; - + previous = sock->state; switch (sock->state) { case SS_UNCONNECTED: /* Send a 'SYN-' to destination */ @@ -1508,43 +1565,22 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, * case is EINPROGRESS, rather than EALREADY. */ res = -EINPROGRESS; - break; case SS_CONNECTING: - res = -EALREADY; + if (previous == SS_CONNECTING) + res = -EALREADY; + if (!timeout) + goto exit; + timeout = msecs_to_jiffies(timeout); + /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ + res = tipc_wait_for_connect(sock, &timeout); break; case SS_CONNECTED: res = -EISCONN; break; default: res = -EINVAL; - goto exit; - } - - if (sock->state == SS_CONNECTING) { - if (!timeout) - goto exit; - - /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ - release_sock(sk); - res = wait_event_interruptible_timeout(*sk_sleep(sk), - sock->state != SS_CONNECTING, - timeout ? (long)msecs_to_jiffies(timeout) - : MAX_SCHEDULE_TIMEOUT); - lock_sock(sk); - if (res <= 0) { - if (res == 0) - res = -ETIMEDOUT; - else - ; /* leave "res" unchanged */ - goto exit; - } + break; } - - if (unlikely(sock->state == SS_DISCONNECTING)) - res = sock_error(sk); - else - res = 0; - exit: release_sock(sk); return res; @@ -1575,6 +1611,42 @@ static int listen(struct socket *sock, int len) return res; } +static int tipc_wait_for_accept(struct socket *sock, long timeo) +{ + struct sock *sk = sock->sk; + DEFINE_WAIT(wait); + int err; + + /* True wake-one mechanism for incoming connections: only + * one process gets woken up, not the 'whole herd'. + * Since we do not 'race & poll' for established sockets + * anymore, the common case will execute the loop only once. + */ + for (;;) { + prepare_to_wait_exclusive(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + if (skb_queue_empty(&sk->sk_receive_queue)) { + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + } + err = 0; + if (!skb_queue_empty(&sk->sk_receive_queue)) + break; + err = -EINVAL; + if (sock->state != SS_LISTENING) + break; + err = sock_intr_errno(timeo); + if (signal_pending(current)) + break; + err = -EAGAIN; + if (!timeo) + break; + } + finish_wait(sk_sleep(sk), &wait); + return err; +} + /** * accept - wait for connection request * @sock: listening socket @@ -1591,7 +1663,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) struct tipc_port *new_tport; struct tipc_msg *msg; u32 new_ref; - + long timeo; int res; lock_sock(sk); @@ -1601,18 +1673,10 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) goto exit; } - while (skb_queue_empty(&sk->sk_receive_queue)) { - if (flags & O_NONBLOCK) { - res = -EWOULDBLOCK; - goto exit; - } - release_sock(sk); - res = wait_event_interruptible(*sk_sleep(sk), - (!skb_queue_empty(&sk->sk_receive_queue))); - lock_sock(sk); - if (res) - goto exit; - } + timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + res = tipc_wait_for_accept(sock, timeo); + if (res) + goto exit; buf = skb_peek(&sk->sk_receive_queue); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index d38bb45d82e..7cb0bd5b117 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -42,7 +42,7 @@ /** * struct tipc_subscriber - TIPC network topology subscriber * @conid: connection identifier to server connecting to subscriber - * @lock: controll access to subscriber + * @lock: control access to subscriber * @subscription_list: list of subscription objects for this subscriber */ struct tipc_subscriber { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index a427623ee57..29fc8bee970 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -80,6 +80,8 @@ * with BSD names. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/kernel.h> #include <linux/signal.h> @@ -366,7 +368,7 @@ static void unix_sock_destructor(struct sock *sk) WARN_ON(!sk_unhashed(sk)); WARN_ON(sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { - printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk); + pr_info("Attempt to release alive unix socket: %p\n", sk); return; } @@ -378,7 +380,7 @@ static void unix_sock_destructor(struct sock *sk) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); local_bh_enable(); #ifdef UNIX_REFCNT_DEBUG - printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk, + pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, atomic_long_read(&unix_nr_socks)); #endif } @@ -1448,7 +1450,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct unix_sock *u = unix_sk(sk); - struct sockaddr_un *sunaddr = msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); struct sock *other = NULL; int namelen = 0; /* fake GCC */ int err; @@ -1910,7 +1912,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, struct scm_cookie tmp_scm; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); - struct sockaddr_un *sunaddr = msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); int copied = 0; int check_creds = 0; int target; @@ -2441,8 +2443,7 @@ static int __init af_unix_init(void) rc = proto_register(&unix_proto, 1); if (rc != 0) { - printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n", - __func__); + pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); goto out; } diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 687360da62d..9bb63ffec4f 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1779,10 +1779,8 @@ static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, goto out; if (msg->msg_name) { - struct sockaddr_vm *vm_addr; - /* Provide the address of the sender. */ - vm_addr = (struct sockaddr_vm *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name); vsock_addr_init(vm_addr, dg->src.context, dg->src.resource); msg->msg_namelen = sizeof(*vm_addr); } diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 324e8d851dc..11ee4ed04f7 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -29,6 +29,7 @@ static int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, wdev->beacon_interval = 0; wdev->channel = NULL; wdev->ssid_len = 0; + rdev_set_qos_map(rdev, dev, NULL); } return err; diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 9b8cc877eb1..78559b5bbd1 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -277,6 +277,32 @@ void cfg80211_set_dfs_state(struct wiphy *wiphy, width, dfs_state); } +static u32 cfg80211_get_start_freq(u32 center_freq, + u32 bandwidth) +{ + u32 start_freq; + + if (bandwidth <= 20) + start_freq = center_freq; + else + start_freq = center_freq - bandwidth/2 + 10; + + return start_freq; +} + +static u32 cfg80211_get_end_freq(u32 center_freq, + u32 bandwidth) +{ + u32 end_freq; + + if (bandwidth <= 20) + end_freq = center_freq; + else + end_freq = center_freq + bandwidth/2 - 10; + + return end_freq; +} + static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy, u32 center_freq, u32 bandwidth) @@ -284,13 +310,8 @@ static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy, struct ieee80211_channel *c; u32 freq, start_freq, end_freq; - if (bandwidth <= 20) { - start_freq = center_freq; - end_freq = center_freq; - } else { - start_freq = center_freq - bandwidth/2 + 10; - end_freq = center_freq + bandwidth/2 - 10; - } + start_freq = cfg80211_get_start_freq(center_freq, bandwidth); + end_freq = cfg80211_get_end_freq(center_freq, bandwidth); for (freq = start_freq; freq <= end_freq; freq += 20) { c = ieee80211_get_channel(wiphy, freq); @@ -330,33 +351,159 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_chandef_dfs_required); -static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, - u32 center_freq, u32 bandwidth, - u32 prohibited_flags) +static int cfg80211_get_chans_dfs_usable(struct wiphy *wiphy, + u32 center_freq, + u32 bandwidth) { struct ieee80211_channel *c; u32 freq, start_freq, end_freq; + int count = 0; - if (bandwidth <= 20) { - start_freq = center_freq; - end_freq = center_freq; - } else { - start_freq = center_freq - bandwidth/2 + 10; - end_freq = center_freq + bandwidth/2 - 10; + start_freq = cfg80211_get_start_freq(center_freq, bandwidth); + end_freq = cfg80211_get_end_freq(center_freq, bandwidth); + + /* + * Check entire range of channels for the bandwidth. + * Check all channels are DFS channels (DFS_USABLE or + * DFS_AVAILABLE). Return number of usable channels + * (require CAC). Allow DFS and non-DFS channel mix. + */ + for (freq = start_freq; freq <= end_freq; freq += 20) { + c = ieee80211_get_channel(wiphy, freq); + if (!c) + return -EINVAL; + + if (c->flags & IEEE80211_CHAN_DISABLED) + return -EINVAL; + + if (c->flags & IEEE80211_CHAN_RADAR) { + if (c->dfs_state == NL80211_DFS_UNAVAILABLE) + return -EINVAL; + + if (c->dfs_state == NL80211_DFS_USABLE) + count++; + } } + return count; +} + +bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef) +{ + int width; + int r1, r2 = 0; + + if (WARN_ON(!cfg80211_chandef_valid(chandef))) + return false; + + width = cfg80211_chandef_get_width(chandef); + if (width < 0) + return false; + + r1 = cfg80211_get_chans_dfs_usable(wiphy, chandef->center_freq1, + width); + + if (r1 < 0) + return false; + + switch (chandef->width) { + case NL80211_CHAN_WIDTH_80P80: + WARN_ON(!chandef->center_freq2); + r2 = cfg80211_get_chans_dfs_usable(wiphy, + chandef->center_freq2, + width); + if (r2 < 0) + return false; + break; + default: + WARN_ON(chandef->center_freq2); + break; + } + + return (r1 + r2 > 0); +} + + +static bool cfg80211_get_chans_dfs_available(struct wiphy *wiphy, + u32 center_freq, + u32 bandwidth) +{ + struct ieee80211_channel *c; + u32 freq, start_freq, end_freq; + + start_freq = cfg80211_get_start_freq(center_freq, bandwidth); + end_freq = cfg80211_get_end_freq(center_freq, bandwidth); + + /* + * Check entire range of channels for the bandwidth. + * If any channel in between is disabled or has not + * had gone through CAC return false + */ for (freq = start_freq; freq <= end_freq; freq += 20) { c = ieee80211_get_channel(wiphy, freq); if (!c) return false; - /* check for radar flags */ - if ((prohibited_flags & c->flags & IEEE80211_CHAN_RADAR) && + if (c->flags & IEEE80211_CHAN_DISABLED) + return false; + + if ((c->flags & IEEE80211_CHAN_RADAR) && (c->dfs_state != NL80211_DFS_AVAILABLE)) return false; + } + + return true; +} + +static bool cfg80211_chandef_dfs_available(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef) +{ + int width; + int r; + + if (WARN_ON(!cfg80211_chandef_valid(chandef))) + return false; + + width = cfg80211_chandef_get_width(chandef); + if (width < 0) + return false; + + r = cfg80211_get_chans_dfs_available(wiphy, chandef->center_freq1, + width); + + /* If any of channels unavailable for cf1 just return */ + if (!r) + return r; + + switch (chandef->width) { + case NL80211_CHAN_WIDTH_80P80: + WARN_ON(!chandef->center_freq2); + r = cfg80211_get_chans_dfs_available(wiphy, + chandef->center_freq2, + width); + default: + WARN_ON(chandef->center_freq2); + break; + } + + return r; +} - /* check for the other flags */ - if (c->flags & prohibited_flags & ~IEEE80211_CHAN_RADAR) + +static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, + u32 center_freq, u32 bandwidth, + u32 prohibited_flags) +{ + struct ieee80211_channel *c; + u32 freq, start_freq, end_freq; + + start_freq = cfg80211_get_start_freq(center_freq, bandwidth); + end_freq = cfg80211_get_end_freq(center_freq, bandwidth); + + for (freq = start_freq; freq <= end_freq; freq += 20) { + c = ieee80211_get_channel(wiphy, freq); + if (!c || c->flags & prohibited_flags) return false; } @@ -462,14 +609,19 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, struct cfg80211_chan_def *chandef) { bool res; + u32 prohibited_flags = IEEE80211_CHAN_DISABLED | + IEEE80211_CHAN_NO_IR | + IEEE80211_CHAN_RADAR; trace_cfg80211_reg_can_beacon(wiphy, chandef); - res = cfg80211_chandef_usable(wiphy, chandef, - IEEE80211_CHAN_DISABLED | - IEEE80211_CHAN_PASSIVE_SCAN | - IEEE80211_CHAN_NO_IBSS | - IEEE80211_CHAN_RADAR); + if (cfg80211_chandef_dfs_required(wiphy, chandef) > 0 && + cfg80211_chandef_dfs_available(wiphy, chandef)) { + /* We can skip IEEE80211_CHAN_NO_IR if chandef dfs available */ + prohibited_flags = IEEE80211_CHAN_DISABLED; + } + + res = cfg80211_chandef_usable(wiphy, chandef, prohibited_flags); trace_cfg80211_return_bool(res); return res; @@ -510,6 +662,7 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, : CHAN_MODE_EXCLUSIVE; return; } + break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: if (wdev->current_bss) { diff --git a/net/wireless/core.c b/net/wireless/core.c index 52b865fb735..d89dee2259b 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -203,17 +203,8 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, rdev->opencount--; - if (rdev->scan_req && rdev->scan_req->wdev == wdev) { - /* - * If the scan request wasn't notified as done, set it - * to aborted and leak it after a warning. The driver - * should have notified us that it ended at the latest - * during rdev_stop_p2p_device(). - */ - if (WARN_ON(!rdev->scan_req->notified)) - rdev->scan_req->aborted = true; - ___cfg80211_scan_done(rdev, !rdev->scan_req->notified); - } + WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev && + !rdev->scan_req->notified); } static int cfg80211_rfkill_set_block(void *data, bool blocked) @@ -357,8 +348,6 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy.rts_threshold = (u32) -1; rdev->wiphy.coverage_class = 0; - rdev->wiphy.features = NL80211_FEATURE_SCAN_FLUSH; - return &rdev->wiphy; } EXPORT_SYMBOL(wiphy_new); @@ -575,6 +564,8 @@ int wiphy_register(struct wiphy *wiphy) /* check and set up bitrates */ ieee80211_set_bitrate_flags(wiphy); + rdev->wiphy.features |= NL80211_FEATURE_SCAN_FLUSH; + rtnl_lock(); res = device_add(&rdev->wiphy.dev); if (res) { @@ -595,7 +586,7 @@ int wiphy_register(struct wiphy *wiphy) if (IS_ERR(rdev->wiphy.debugfsdir)) rdev->wiphy.debugfsdir = NULL; - if (wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) { + if (wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) { struct regulatory_request request; request.wiphy_idx = get_wiphy_idx(wiphy); @@ -765,13 +756,16 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, { struct net_device *dev = wdev->netdev; + ASSERT_RTNL(); + switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: cfg80211_leave_ibss(rdev, dev, true); break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: - __cfg80211_stop_sched_scan(rdev, false); + if (rdev->sched_scan_req && dev == rdev->sched_scan_req->dev) + __cfg80211_stop_sched_scan(rdev, false); wdev_lock(wdev); #ifdef CONFIG_CFG80211_WEXT @@ -865,11 +859,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; case NETDEV_DOWN: cfg80211_update_iface_num(rdev, wdev->iftype, -1); - if (rdev->scan_req && rdev->scan_req->wdev == wdev) { - if (WARN_ON(!rdev->scan_req->notified)) - rdev->scan_req->aborted = true; - ___cfg80211_scan_done(rdev, true); - } + WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev && + !rdev->scan_req->notified); if (WARN_ON(rdev->sched_scan_req && rdev->sched_scan_req->dev == wdev->netdev)) { diff --git a/net/wireless/core.h b/net/wireless/core.h index af10e59af2d..37ec16d7bb1 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -67,9 +67,7 @@ struct cfg80211_registered_device { struct work_struct scan_done_wk; struct work_struct sched_scan_results_wk; -#ifdef CONFIG_NL80211_TESTMODE - struct genl_info *testmode_info; -#endif + struct genl_info *cur_cmd_info; struct work_struct conn_work; struct work_struct event_work; @@ -317,9 +315,8 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid); void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev); int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, - struct ieee80211_channel *chan, bool offchan, - unsigned int wait, const u8 *buf, size_t len, - bool no_cck, bool dont_wait_for_ack, u64 *cookie); + struct cfg80211_mgmt_tx_params *params, + u64 *cookie); void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, const struct ieee80211_ht_cap *ht_capa_mask); void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, @@ -364,7 +361,7 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, bool pairwise, const u8 *mac_addr); void __cfg80211_scan_done(struct work_struct *wk); -void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak); +void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev); void __cfg80211_sched_scan_results(struct work_struct *wk); int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, bool driver_initiated); @@ -382,6 +379,19 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chanmode, u8 radar_detect); +/** + * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable + * @wiphy: the wiphy to validate against + * @chandef: the channel definition to check + * + * Checks if chandef is usable and we can/need start CAC on such channel. + * + * Return: Return true if all channels available and at least + * one channel require CAC (NL80211_DFS_USABLE) + */ +bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef); + void cfg80211_set_dfs_state(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, enum nl80211_dfs_state dfs_state); diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk index 42ed274e81f..9a8217d2a90 100644 --- a/net/wireless/genregdb.awk +++ b/net/wireless/genregdb.awk @@ -33,15 +33,7 @@ BEGIN { regdb = "const struct ieee80211_regdomain *reg_regdb[] = {\n" } -/^[ \t]*#/ { - # Ignore -} - -!active && /^[ \t]*$/ { - # Ignore -} - -!active && /country/ { +function parse_country_head() { country=$2 sub(/:/, "", country) printf "static const struct ieee80211_regdomain regdom_%s = {\n", country @@ -57,7 +49,8 @@ BEGIN { regdb = regdb "\t®dom_" country ",\n" } -active && /^[ \t]*\(/ { +function parse_reg_rule() +{ start = $1 sub(/\(/, "", start) end = $3 @@ -107,17 +100,21 @@ active && /^[ \t]*\(/ { } else if (flagarray[arg] == "PTMP-ONLY") { flags = flags "\n\t\t\tNL80211_RRF_PTMP_ONLY | " } else if (flagarray[arg] == "PASSIVE-SCAN") { - flags = flags "\n\t\t\tNL80211_RRF_PASSIVE_SCAN | " + flags = flags "\n\t\t\tNL80211_RRF_NO_IR | " } else if (flagarray[arg] == "NO-IBSS") { - flags = flags "\n\t\t\tNL80211_RRF_NO_IBSS | " + flags = flags "\n\t\t\tNL80211_RRF_NO_IR | " + } else if (flagarray[arg] == "NO-IR") { + flags = flags "\n\t\t\tNL80211_RRF_NO_IR | " } + } flags = flags "0" printf "\t\tREG_RULE(%d, %d, %d, %d, %d, %s),\n", start, end, bw, gain, power, flags rules++ } -active && /^[ \t]*$/ { +function print_tail_country() +{ active = 0 printf "\t},\n" printf "\t.n_reg_rules = %d\n", rules @@ -125,7 +122,29 @@ active && /^[ \t]*$/ { rules = 0; } +/^[ \t]*#/ { + # Ignore +} + +!active && /^[ \t]*$/ { + # Ignore +} + +!active && /country/ { + parse_country_head() +} + +active && /^[ \t]*\(/ { + parse_reg_rule() +} + +active && /^[ \t]*$/ { + print_tail_country() +} + END { + if (active) + print_tail_country() print regdb "};" print "" print "int reg_regdb_size = ARRAY_SIZE(reg_regdb);" diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 89737ee2669..f911c5f9f90 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -183,6 +183,8 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) kfree(wdev->connect_keys); wdev->connect_keys = NULL; + rdev_set_qos_map(rdev, dev, NULL); + /* * Delete all the keys ... pairwise keys can't really * exist any more anyway, but default keys might. @@ -274,7 +276,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, for (i = 0; i < sband->n_channels; i++) { chan = &sband->channels[i]; - if (chan->flags & IEEE80211_CHAN_NO_IBSS) + if (chan->flags & IEEE80211_CHAN_NO_IR) continue; if (chan->flags & IEEE80211_CHAN_DISABLED) continue; @@ -346,7 +348,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, chan = ieee80211_get_channel(wdev->wiphy, freq); if (!chan) return -EINVAL; - if (chan->flags & IEEE80211_CHAN_NO_IBSS || + if (chan->flags & IEEE80211_CHAN_NO_IR || chan->flags & IEEE80211_CHAN_DISABLED) return -EINVAL; } diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 0553fd4d85a..885862447b6 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -99,6 +99,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, const struct mesh_config *conf) { struct wireless_dev *wdev = dev->ieee80211_ptr; + u8 radar_detect_width = 0; int err; BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); @@ -141,8 +142,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, for (i = 0; i < sband->n_channels; i++) { chan = &sband->channels[i]; - if (chan->flags & (IEEE80211_CHAN_NO_IBSS | - IEEE80211_CHAN_PASSIVE_SCAN | + if (chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_DISABLED | IEEE80211_CHAN_RADAR)) continue; @@ -178,8 +178,16 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef)) return -EINVAL; - err = cfg80211_can_use_chan(rdev, wdev, setup->chandef.chan, - CHAN_MODE_SHARED); + err = cfg80211_chandef_dfs_required(wdev->wiphy, &setup->chandef); + if (err < 0) + return err; + if (err) + radar_detect_width = BIT(setup->chandef.width); + + err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, + setup->chandef.chan, + CHAN_MODE_SHARED, + radar_detect_width); if (err) return err; @@ -269,6 +277,7 @@ static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, if (!err) { wdev->mesh_id_len = 0; wdev->channel = NULL; + rdev_set_qos_map(rdev, dev, NULL); } return err; diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 6a6b1c8e907..52cca05044a 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -520,9 +520,7 @@ void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, - struct ieee80211_channel *chan, bool offchan, - unsigned int wait, const u8 *buf, size_t len, - bool no_cck, bool dont_wait_for_ack, u64 *cookie) + struct cfg80211_mgmt_tx_params *params, u64 *cookie) { const struct ieee80211_mgmt *mgmt; u16 stype; @@ -533,10 +531,10 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, if (!rdev->ops->mgmt_tx) return -EOPNOTSUPP; - if (len < 24 + 1) + if (params->len < 24 + 1) return -EINVAL; - mgmt = (const struct ieee80211_mgmt *) buf; + mgmt = (const struct ieee80211_mgmt *)params->buf; if (!ieee80211_is_mgmt(mgmt->frame_control)) return -EINVAL; @@ -615,9 +613,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, return -EINVAL; /* Transmit the Action frame as requested by user space */ - return rdev_mgmt_tx(rdev, wdev, chan, offchan, - wait, buf, len, no_cck, dont_wait_for_ack, - cookie); + return rdev_mgmt_tx(rdev, wdev, params, cookie); } bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm, @@ -763,12 +759,12 @@ void cfg80211_radar_event(struct wiphy *wiphy, EXPORT_SYMBOL(cfg80211_radar_event); void cfg80211_cac_event(struct net_device *netdev, + const struct cfg80211_chan_def *chandef, enum nl80211_radar_event event, gfp_t gfp) { struct wireless_dev *wdev = netdev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - struct cfg80211_chan_def chandef; unsigned long timeout; trace_cfg80211_cac_event(netdev, event); @@ -779,14 +775,12 @@ void cfg80211_cac_event(struct net_device *netdev, if (WARN_ON(!wdev->channel)) return; - cfg80211_chandef_create(&chandef, wdev->channel, NL80211_CHAN_NO_HT); - switch (event) { case NL80211_RADAR_CAC_FINISHED: timeout = wdev->cac_start_time + msecs_to_jiffies(IEEE80211_DFS_MIN_CAC_TIME_MS); WARN_ON(!time_after_eq(jiffies, timeout)); - cfg80211_set_dfs_state(wiphy, &chandef, NL80211_DFS_AVAILABLE); + cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_AVAILABLE); break; case NL80211_RADAR_CAC_ABORTED: break; @@ -796,6 +790,6 @@ void cfg80211_cac_event(struct net_device *netdev, } wdev->cac_started = false; - nl80211_radar_notify(rdev, &chandef, event, netdev, gfp); + nl80211_radar_notify(rdev, chandef, event, netdev, gfp); } EXPORT_SYMBOL(cfg80211_cac_event); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 138dc3bb8b6..7a742594916 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -53,6 +53,7 @@ enum nl80211_multicast_groups { NL80211_MCGRP_SCAN, NL80211_MCGRP_REGULATORY, NL80211_MCGRP_MLME, + NL80211_MCGRP_VENDOR, NL80211_MCGRP_TESTMODE /* keep last - ifdef! */ }; @@ -61,6 +62,7 @@ static const struct genl_multicast_group nl80211_mcgrps[] = { [NL80211_MCGRP_SCAN] = { .name = "scan", }, [NL80211_MCGRP_REGULATORY] = { .name = "regulatory", }, [NL80211_MCGRP_MLME] = { .name = "mlme", }, + [NL80211_MCGRP_VENDOR] = { .name = "vendor", }, #ifdef CONFIG_NL80211_TESTMODE [NL80211_MCGRP_TESTMODE] = { .name = "testmode", } #endif @@ -163,7 +165,7 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) if (attrs[NL80211_ATTR_IFINDEX]) { int ifindex = nla_get_u32(attrs[NL80211_ATTR_IFINDEX]); - netdev = dev_get_by_index(netns, ifindex); + netdev = __dev_get_by_index(netns, ifindex); if (netdev) { if (netdev->ieee80211_ptr) tmp = wiphy_to_dev( @@ -171,8 +173,6 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) else tmp = NULL; - dev_put(netdev); - /* not wireless device -- return error */ if (!tmp) return ERR_PTR(-EINVAL); @@ -376,6 +376,12 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_STA_SUPPORTED_CHANNELS] = { .type = NLA_BINARY }, [NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES] = { .type = NLA_BINARY }, [NL80211_ATTR_HANDLE_DFS] = { .type = NLA_FLAG }, + [NL80211_ATTR_OPMODE_NOTIF] = { .type = NLA_U8 }, + [NL80211_ATTR_VENDOR_ID] = { .type = NLA_U32 }, + [NL80211_ATTR_VENDOR_SUBCMD] = { .type = NLA_U32 }, + [NL80211_ATTR_VENDOR_DATA] = { .type = NLA_BINARY }, + [NL80211_ATTR_QOS_MAP] = { .type = NLA_BINARY, + .len = IEEE80211_QOS_MAP_LEN_MAX }, }; /* policy for the key attributes */ @@ -564,12 +570,12 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, if ((chan->flags & IEEE80211_CHAN_DISABLED) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_DISABLED)) goto nla_put_failure; - if ((chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) && - nla_put_flag(msg, NL80211_FREQUENCY_ATTR_PASSIVE_SCAN)) - goto nla_put_failure; - if ((chan->flags & IEEE80211_CHAN_NO_IBSS) && - nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_IBSS)) - goto nla_put_failure; + if (chan->flags & IEEE80211_CHAN_NO_IR) { + if (nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_IR)) + goto nla_put_failure; + if (nla_put_flag(msg, __NL80211_FREQUENCY_ATTR_NO_IBSS)) + goto nla_put_failure; + } if (chan->flags & IEEE80211_CHAN_RADAR) { if (nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR)) goto nla_put_failure; @@ -1247,10 +1253,6 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) && nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP)) goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ) && - nla_put_flag(msg, WIPHY_FLAG_SUPPORTS_5_10_MHZ)) - goto nla_put_failure; - state->split_start++; if (state->split) break; @@ -1454,6 +1456,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (dev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH) CMD(channel_switch, CHANNEL_SWITCH); } + CMD(set_qos_map, SET_QOS_MAP); #ifdef CONFIG_NL80211_TESTMODE CMD(testmode_cmd, TESTMODE); @@ -1579,6 +1582,46 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (nl80211_send_coalesce(msg, dev)) goto nla_put_failure; + if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ) && + (nla_put_flag(msg, NL80211_ATTR_SUPPORT_5_MHZ) || + nla_put_flag(msg, NL80211_ATTR_SUPPORT_10_MHZ))) + goto nla_put_failure; + state->split_start++; + break; + case 11: + if (dev->wiphy.n_vendor_commands) { + const struct nl80211_vendor_cmd_info *info; + struct nlattr *nested; + + nested = nla_nest_start(msg, NL80211_ATTR_VENDOR_DATA); + if (!nested) + goto nla_put_failure; + + for (i = 0; i < dev->wiphy.n_vendor_commands; i++) { + info = &dev->wiphy.vendor_commands[i].info; + if (nla_put(msg, i + 1, sizeof(*info), info)) + goto nla_put_failure; + } + nla_nest_end(msg, nested); + } + + if (dev->wiphy.n_vendor_events) { + const struct nl80211_vendor_cmd_info *info; + struct nlattr *nested; + + nested = nla_nest_start(msg, + NL80211_ATTR_VENDOR_EVENTS); + if (!nested) + goto nla_put_failure; + + for (i = 0; i < dev->wiphy.n_vendor_events; i++) { + info = &dev->wiphy.vendor_events[i]; + if (nla_put(msg, i + 1, sizeof(*info), info)) + goto nla_put_failure; + } + nla_nest_end(msg, nested); + } + /* done */ state->split_start = 0; break; @@ -1611,7 +1654,7 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb, struct cfg80211_registered_device *rdev; int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]); - netdev = dev_get_by_index(sock_net(skb->sk), ifidx); + netdev = __dev_get_by_index(sock_net(skb->sk), ifidx); if (!netdev) return -ENODEV; if (netdev->ieee80211_ptr) { @@ -1619,7 +1662,6 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb, netdev->ieee80211_ptr->wiphy); state->filter_wiphy = rdev->wiphy_idx; } - dev_put(netdev); } return 0; @@ -1942,7 +1984,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_IFINDEX]) { int ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]); - netdev = dev_get_by_index(genl_info_net(info), ifindex); + netdev = __dev_get_by_index(genl_info_net(info), ifindex); if (netdev && netdev->ieee80211_ptr) rdev = wiphy_to_dev(netdev->ieee80211_ptr->wiphy); else @@ -1970,32 +2012,24 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME])); if (result) - goto bad_res; + return result; if (info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS]) { struct ieee80211_txq_params txq_params; struct nlattr *tb[NL80211_TXQ_ATTR_MAX + 1]; - if (!rdev->ops->set_txq_params) { - result = -EOPNOTSUPP; - goto bad_res; - } + if (!rdev->ops->set_txq_params) + return -EOPNOTSUPP; - if (!netdev) { - result = -EINVAL; - goto bad_res; - } + if (!netdev) + return -EINVAL; if (netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) { - result = -EINVAL; - goto bad_res; - } + netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EINVAL; - if (!netif_running(netdev)) { - result = -ENETDOWN; - goto bad_res; - } + if (!netif_running(netdev)) + return -ENETDOWN; nla_for_each_nested(nl_txq_params, info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS], @@ -2006,12 +2040,12 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) txq_params_policy); result = parse_txq_params(tb, &txq_params); if (result) - goto bad_res; + return result; result = rdev_set_txq_params(rdev, netdev, &txq_params); if (result) - goto bad_res; + return result; } } @@ -2020,7 +2054,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) nl80211_can_set_dev_channel(wdev) ? wdev : NULL, info); if (result) - goto bad_res; + return result; } if (info->attrs[NL80211_ATTR_WIPHY_TX_POWER_SETTING]) { @@ -2031,19 +2065,15 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (!(rdev->wiphy.features & NL80211_FEATURE_VIF_TXPOWER)) txp_wdev = NULL; - if (!rdev->ops->set_tx_power) { - result = -EOPNOTSUPP; - goto bad_res; - } + if (!rdev->ops->set_tx_power) + return -EOPNOTSUPP; idx = NL80211_ATTR_WIPHY_TX_POWER_SETTING; type = nla_get_u32(info->attrs[idx]); if (!info->attrs[NL80211_ATTR_WIPHY_TX_POWER_LEVEL] && - (type != NL80211_TX_POWER_AUTOMATIC)) { - result = -EINVAL; - goto bad_res; - } + (type != NL80211_TX_POWER_AUTOMATIC)) + return -EINVAL; if (type != NL80211_TX_POWER_AUTOMATIC) { idx = NL80211_ATTR_WIPHY_TX_POWER_LEVEL; @@ -2052,7 +2082,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) result = rdev_set_tx_power(rdev, txp_wdev, type, mbm); if (result) - goto bad_res; + return result; } if (info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX] && @@ -2060,10 +2090,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) u32 tx_ant, rx_ant; if ((!rdev->wiphy.available_antennas_tx && !rdev->wiphy.available_antennas_rx) || - !rdev->ops->set_antenna) { - result = -EOPNOTSUPP; - goto bad_res; - } + !rdev->ops->set_antenna) + return -EOPNOTSUPP; tx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX]); rx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]); @@ -2071,17 +2099,15 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) /* reject antenna configurations which don't match the * available antenna masks, except for the "all" mask */ if ((~tx_ant && (tx_ant & ~rdev->wiphy.available_antennas_tx)) || - (~rx_ant && (rx_ant & ~rdev->wiphy.available_antennas_rx))) { - result = -EINVAL; - goto bad_res; - } + (~rx_ant && (rx_ant & ~rdev->wiphy.available_antennas_rx))) + return -EINVAL; tx_ant = tx_ant & rdev->wiphy.available_antennas_tx; rx_ant = rx_ant & rdev->wiphy.available_antennas_rx; result = rdev_set_antenna(rdev, tx_ant, rx_ant); if (result) - goto bad_res; + return result; } changed = 0; @@ -2089,30 +2115,27 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]) { retry_short = nla_get_u8( info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]); - if (retry_short == 0) { - result = -EINVAL; - goto bad_res; - } + if (retry_short == 0) + return -EINVAL; + changed |= WIPHY_PARAM_RETRY_SHORT; } if (info->attrs[NL80211_ATTR_WIPHY_RETRY_LONG]) { retry_long = nla_get_u8( info->attrs[NL80211_ATTR_WIPHY_RETRY_LONG]); - if (retry_long == 0) { - result = -EINVAL; - goto bad_res; - } + if (retry_long == 0) + return -EINVAL; + changed |= WIPHY_PARAM_RETRY_LONG; } if (info->attrs[NL80211_ATTR_WIPHY_FRAG_THRESHOLD]) { frag_threshold = nla_get_u32( info->attrs[NL80211_ATTR_WIPHY_FRAG_THRESHOLD]); - if (frag_threshold < 256) { - result = -EINVAL; - goto bad_res; - } + if (frag_threshold < 256) + return -EINVAL; + if (frag_threshold != (u32) -1) { /* * Fragments (apart from the last one) are required to @@ -2142,10 +2165,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) u32 old_frag_threshold, old_rts_threshold; u8 old_coverage_class; - if (!rdev->ops->set_wiphy_params) { - result = -EOPNOTSUPP; - goto bad_res; - } + if (!rdev->ops->set_wiphy_params) + return -EOPNOTSUPP; old_retry_short = rdev->wiphy.retry_short; old_retry_long = rdev->wiphy.retry_long; @@ -2173,11 +2194,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rdev->wiphy.coverage_class = old_coverage_class; } } - - bad_res: - if (netdev) - dev_put(netdev); - return result; + return 0; } static inline u64 wdev_id(struct wireless_dev *wdev) @@ -2187,7 +2204,7 @@ static inline u64 wdev_id(struct wireless_dev *wdev) } static int nl80211_send_chandef(struct sk_buff *msg, - struct cfg80211_chan_def *chandef) + const struct cfg80211_chan_def *chandef) { WARN_ON(!cfg80211_chandef_valid(chandef)); @@ -3236,6 +3253,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) return PTR_ERR(params.acl); } + wdev_lock(wdev); err = rdev_start_ap(rdev, dev, ¶ms); if (!err) { wdev->preset_chandef = params.chandef; @@ -3244,6 +3262,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev->ssid_len = params.ssid_len; memcpy(wdev->ssid, params.ssid, wdev->ssid_len); } + wdev_unlock(wdev); kfree(params.acl); @@ -3272,7 +3291,11 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) if (err) return err; - return rdev_change_beacon(rdev, dev, ¶ms); + wdev_lock(wdev); + err = rdev_change_beacon(rdev, dev, ¶ms); + wdev_unlock(wdev); + + return err; } static int nl80211_stop_ap(struct sk_buff *skb, struct genl_info *info) @@ -4144,6 +4167,12 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.vht_capa = nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); + if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) { + params.opmode_notif_used = true; + params.opmode_notif = + nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]); + } + if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) { params.plink_action = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); @@ -4478,7 +4507,9 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; struct bss_parameters params; + int err; memset(¶ms, 0, sizeof(params)); /* default to not changing parameters */ @@ -4544,7 +4575,11 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; - return rdev_change_bss(rdev, dev, ¶ms); + wdev_lock(wdev); + err = rdev_change_bss(rdev, dev, ¶ms); + wdev_unlock(wdev); + + return err; } static const struct nla_policy reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = { @@ -5098,7 +5133,7 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) char *alpha2 = NULL; int rem_reg_rules = 0, r = 0; u32 num_rules = 0, rule_idx = 0, size_of_regd; - u8 dfs_region = 0; + enum nl80211_dfs_regions dfs_region = NL80211_DFS_UNSET; struct ieee80211_regdomain *rd = NULL; if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) @@ -5119,6 +5154,9 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } + if (!reg_is_valid_request(alpha2)) + return -EINVAL; + size_of_regd = sizeof(struct ieee80211_regdomain) + num_rules * sizeof(struct ieee80211_reg_rule); @@ -5219,12 +5257,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) goto unlock; } } else { - enum ieee80211_band band; - n_channels = 0; - - for (band = 0; band < IEEE80211_NUM_BANDS; band++) - if (wiphy->bands[band]) - n_channels += wiphy->bands[band]->n_channels; + n_channels = ieee80211_get_num_supported_channels(wiphy); } if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) @@ -5365,10 +5398,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) { request->flags = nla_get_u32( info->attrs[NL80211_ATTR_SCAN_FLAGS]); - if (((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && - !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) || - ((request->flags & NL80211_SCAN_FLAG_FLUSH) && - !(wiphy->features & NL80211_FEATURE_SCAN_FLUSH))) { + if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && + !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) { err = -EOPNOTSUPP; goto out_free; } @@ -5434,11 +5465,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, if (!n_channels) return -EINVAL; } else { - n_channels = 0; - - for (band = 0; band < IEEE80211_NUM_BANDS; band++) - if (wiphy->bands[band]) - n_channels += wiphy->bands[band]->n_channels; + n_channels = ieee80211_get_num_supported_channels(wiphy); } if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) @@ -5608,10 +5635,8 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) { request->flags = nla_get_u32( info->attrs[NL80211_ATTR_SCAN_FLAGS]); - if (((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && - !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) || - ((request->flags & NL80211_SCAN_FLAG_FLUSH) && - !(wiphy->features & NL80211_FEATURE_SCAN_FLUSH))) { + if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && + !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) { err = -EOPNOTSUPP; goto out_free; } @@ -5655,8 +5680,13 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_chan_def chandef; + enum nl80211_dfs_regions dfs_region; int err; + dfs_region = reg_get_dfs_region(wdev->wiphy); + if (dfs_region == NL80211_DFS_UNSET) + return -EINVAL; + err = nl80211_parse_chandef(rdev, info, &chandef); if (err) return err; @@ -5674,7 +5704,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (err == 0) return -EINVAL; - if (chandef.chan->dfs_state != NL80211_DFS_USABLE) + if (!cfg80211_chandef_dfs_usable(wdev->wiphy, &chandef)) return -EINVAL; if (!rdev->ops->start_radar_detection) @@ -5814,7 +5844,11 @@ skip_beacons: if (info->attrs[NL80211_ATTR_CH_SWITCH_BLOCK_TX]) params.block_tx = true; - return rdev_channel_switch(rdev, dev, ¶ms); + wdev_lock(wdev); + err = rdev_channel_switch(rdev, dev, ¶ms); + wdev_unlock(wdev); + + return err; } static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, @@ -6677,6 +6711,101 @@ static int nl80211_set_mcast_rate(struct sk_buff *skb, struct genl_info *info) return err; } +static struct sk_buff * +__cfg80211_alloc_vendor_skb(struct cfg80211_registered_device *rdev, + int approxlen, u32 portid, u32 seq, + enum nl80211_commands cmd, + enum nl80211_attrs attr, + const struct nl80211_vendor_cmd_info *info, + gfp_t gfp) +{ + struct sk_buff *skb; + void *hdr; + struct nlattr *data; + + skb = nlmsg_new(approxlen + 100, gfp); + if (!skb) + return NULL; + + hdr = nl80211hdr_put(skb, portid, seq, 0, cmd); + if (!hdr) { + kfree_skb(skb); + return NULL; + } + + if (nla_put_u32(skb, NL80211_ATTR_WIPHY, rdev->wiphy_idx)) + goto nla_put_failure; + + if (info) { + if (nla_put_u32(skb, NL80211_ATTR_VENDOR_ID, + info->vendor_id)) + goto nla_put_failure; + if (nla_put_u32(skb, NL80211_ATTR_VENDOR_SUBCMD, + info->subcmd)) + goto nla_put_failure; + } + + data = nla_nest_start(skb, attr); + + ((void **)skb->cb)[0] = rdev; + ((void **)skb->cb)[1] = hdr; + ((void **)skb->cb)[2] = data; + + return skb; + + nla_put_failure: + kfree_skb(skb); + return NULL; +} + +struct sk_buff *__cfg80211_alloc_event_skb(struct wiphy *wiphy, + enum nl80211_commands cmd, + enum nl80211_attrs attr, + int vendor_event_idx, + int approxlen, gfp_t gfp) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + const struct nl80211_vendor_cmd_info *info; + + switch (cmd) { + case NL80211_CMD_TESTMODE: + if (WARN_ON(vendor_event_idx != -1)) + return NULL; + info = NULL; + break; + case NL80211_CMD_VENDOR: + if (WARN_ON(vendor_event_idx < 0 || + vendor_event_idx >= wiphy->n_vendor_events)) + return NULL; + info = &wiphy->vendor_events[vendor_event_idx]; + break; + default: + WARN_ON(1); + return NULL; + } + + return __cfg80211_alloc_vendor_skb(rdev, approxlen, 0, 0, + cmd, attr, info, gfp); +} +EXPORT_SYMBOL(__cfg80211_alloc_event_skb); + +void __cfg80211_send_event_skb(struct sk_buff *skb, gfp_t gfp) +{ + struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0]; + void *hdr = ((void **)skb->cb)[1]; + struct nlattr *data = ((void **)skb->cb)[2]; + enum nl80211_multicast_groups mcgrp = NL80211_MCGRP_TESTMODE; + + nla_nest_end(skb, data); + genlmsg_end(skb, hdr); + + if (data->nla_type == NL80211_ATTR_VENDOR_DATA) + mcgrp = NL80211_MCGRP_VENDOR; + + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), skb, 0, + mcgrp, gfp); +} +EXPORT_SYMBOL(__cfg80211_send_event_skb); #ifdef CONFIG_NL80211_TESTMODE static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info) @@ -6701,11 +6830,11 @@ static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_TESTDATA]) return -EINVAL; - rdev->testmode_info = info; + rdev->cur_cmd_info = info; err = rdev_testmode_cmd(rdev, wdev, nla_data(info->attrs[NL80211_ATTR_TESTDATA]), nla_len(info->attrs[NL80211_ATTR_TESTDATA])); - rdev->testmode_info = NULL; + rdev->cur_cmd_info = NULL; return err; } @@ -6804,93 +6933,6 @@ static int nl80211_testmode_dump(struct sk_buff *skb, rtnl_unlock(); return err; } - -static struct sk_buff * -__cfg80211_testmode_alloc_skb(struct cfg80211_registered_device *rdev, - int approxlen, u32 portid, u32 seq, gfp_t gfp) -{ - struct sk_buff *skb; - void *hdr; - struct nlattr *data; - - skb = nlmsg_new(approxlen + 100, gfp); - if (!skb) - return NULL; - - hdr = nl80211hdr_put(skb, portid, seq, 0, NL80211_CMD_TESTMODE); - if (!hdr) { - kfree_skb(skb); - return NULL; - } - - if (nla_put_u32(skb, NL80211_ATTR_WIPHY, rdev->wiphy_idx)) - goto nla_put_failure; - data = nla_nest_start(skb, NL80211_ATTR_TESTDATA); - - ((void **)skb->cb)[0] = rdev; - ((void **)skb->cb)[1] = hdr; - ((void **)skb->cb)[2] = data; - - return skb; - - nla_put_failure: - kfree_skb(skb); - return NULL; -} - -struct sk_buff *cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy, - int approxlen) -{ - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - if (WARN_ON(!rdev->testmode_info)) - return NULL; - - return __cfg80211_testmode_alloc_skb(rdev, approxlen, - rdev->testmode_info->snd_portid, - rdev->testmode_info->snd_seq, - GFP_KERNEL); -} -EXPORT_SYMBOL(cfg80211_testmode_alloc_reply_skb); - -int cfg80211_testmode_reply(struct sk_buff *skb) -{ - struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0]; - void *hdr = ((void **)skb->cb)[1]; - struct nlattr *data = ((void **)skb->cb)[2]; - - if (WARN_ON(!rdev->testmode_info)) { - kfree_skb(skb); - return -EINVAL; - } - - nla_nest_end(skb, data); - genlmsg_end(skb, hdr); - return genlmsg_reply(skb, rdev->testmode_info); -} -EXPORT_SYMBOL(cfg80211_testmode_reply); - -struct sk_buff *cfg80211_testmode_alloc_event_skb(struct wiphy *wiphy, - int approxlen, gfp_t gfp) -{ - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - return __cfg80211_testmode_alloc_skb(rdev, approxlen, 0, 0, gfp); -} -EXPORT_SYMBOL(cfg80211_testmode_alloc_event_skb); - -void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) -{ - struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0]; - void *hdr = ((void **)skb->cb)[1]; - struct nlattr *data = ((void **)skb->cb)[2]; - - nla_nest_end(skb, data); - genlmsg_end(skb, hdr); - genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), skb, 0, - NL80211_MCGRP_TESTMODE, gfp); -} -EXPORT_SYMBOL(cfg80211_testmode_event); #endif static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) @@ -7312,11 +7354,72 @@ static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband, return true; } +static u16 vht_mcs_map_to_mcs_mask(u8 vht_mcs_map) +{ + u16 mcs_mask = 0; + + switch (vht_mcs_map) { + case IEEE80211_VHT_MCS_NOT_SUPPORTED: + break; + case IEEE80211_VHT_MCS_SUPPORT_0_7: + mcs_mask = 0x00FF; + break; + case IEEE80211_VHT_MCS_SUPPORT_0_8: + mcs_mask = 0x01FF; + break; + case IEEE80211_VHT_MCS_SUPPORT_0_9: + mcs_mask = 0x03FF; + break; + default: + break; + } + + return mcs_mask; +} + +static void vht_build_mcs_mask(u16 vht_mcs_map, + u16 vht_mcs_mask[NL80211_VHT_NSS_MAX]) +{ + u8 nss; + + for (nss = 0; nss < NL80211_VHT_NSS_MAX; nss++) { + vht_mcs_mask[nss] = vht_mcs_map_to_mcs_mask(vht_mcs_map & 0x03); + vht_mcs_map >>= 2; + } +} + +static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband, + struct nl80211_txrate_vht *txrate, + u16 mcs[NL80211_VHT_NSS_MAX]) +{ + u16 tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); + u16 tx_mcs_mask[NL80211_VHT_NSS_MAX] = {}; + u8 i; + + if (!sband->vht_cap.vht_supported) + return false; + + memset(mcs, 0, sizeof(u16) * NL80211_VHT_NSS_MAX); + + /* Build vht_mcs_mask from VHT capabilities */ + vht_build_mcs_mask(tx_mcs_map, tx_mcs_mask); + + for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { + if ((tx_mcs_mask[i] & txrate->mcs[i]) == txrate->mcs[i]) + mcs[i] = txrate->mcs[i]; + else + return false; + } + + return true; +} + static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = { [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY, .len = NL80211_MAX_SUPP_RATES }, - [NL80211_TXRATE_MCS] = { .type = NLA_BINARY, - .len = NL80211_MAX_SUPP_HT_RATES }, + [NL80211_TXRATE_HT] = { .type = NLA_BINARY, + .len = NL80211_MAX_SUPP_HT_RATES }, + [NL80211_TXRATE_VHT] = { .len = sizeof(struct nl80211_txrate_vht)}, }; static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, @@ -7329,9 +7432,7 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, struct net_device *dev = info->user_ptr[1]; struct nlattr *tx_rates; struct ieee80211_supported_band *sband; - - if (info->attrs[NL80211_ATTR_TX_RATES] == NULL) - return -EINVAL; + u16 vht_tx_mcs_map; if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; @@ -7340,17 +7441,26 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, /* Default to all rates enabled */ for (i = 0; i < IEEE80211_NUM_BANDS; i++) { sband = rdev->wiphy.bands[i]; - mask.control[i].legacy = - sband ? (1 << sband->n_bitrates) - 1 : 0; - if (sband) - memcpy(mask.control[i].mcs, - sband->ht_cap.mcs.rx_mask, - sizeof(mask.control[i].mcs)); - else - memset(mask.control[i].mcs, 0, - sizeof(mask.control[i].mcs)); + + if (!sband) + continue; + + mask.control[i].legacy = (1 << sband->n_bitrates) - 1; + memcpy(mask.control[i].ht_mcs, + sband->ht_cap.mcs.rx_mask, + sizeof(mask.control[i].ht_mcs)); + + if (!sband->vht_cap.vht_supported) + continue; + + vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); + vht_build_mcs_mask(vht_tx_mcs_map, mask.control[i].vht_mcs); } + /* if no rates are given set it back to the defaults */ + if (!info->attrs[NL80211_ATTR_TX_RATES]) + goto out; + /* * The nested attribute uses enum nl80211_band as the index. This maps * directly to the enum ieee80211_band values used in cfg80211. @@ -7375,31 +7485,44 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, nla_len(tb[NL80211_TXRATE_LEGACY])) return -EINVAL; } - if (tb[NL80211_TXRATE_MCS]) { + if (tb[NL80211_TXRATE_HT]) { if (!ht_rateset_to_mask( sband, - nla_data(tb[NL80211_TXRATE_MCS]), - nla_len(tb[NL80211_TXRATE_MCS]), - mask.control[band].mcs)) + nla_data(tb[NL80211_TXRATE_HT]), + nla_len(tb[NL80211_TXRATE_HT]), + mask.control[band].ht_mcs)) + return -EINVAL; + } + if (tb[NL80211_TXRATE_VHT]) { + if (!vht_set_mcs_mask( + sband, + nla_data(tb[NL80211_TXRATE_VHT]), + mask.control[band].vht_mcs)) return -EINVAL; } if (mask.control[band].legacy == 0) { - /* don't allow empty legacy rates if HT - * is not even supported. */ - if (!rdev->wiphy.bands[band]->ht_cap.ht_supported) + /* don't allow empty legacy rates if HT or VHT + * are not even supported. + */ + if (!(rdev->wiphy.bands[band]->ht_cap.ht_supported || + rdev->wiphy.bands[band]->vht_cap.vht_supported)) return -EINVAL; for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) - if (mask.control[band].mcs[i]) - break; + if (mask.control[band].ht_mcs[i]) + goto out; + + for (i = 0; i < NL80211_VHT_NSS_MAX; i++) + if (mask.control[band].vht_mcs[i]) + goto out; /* legacy and mcs rates may not be both empty */ - if (i == IEEE80211_HT_MCS_MASK_LEN) - return -EINVAL; + return -EINVAL; } } +out: return rdev_set_bitrate_mask(rdev, dev, NULL, &mask); } @@ -7447,10 +7570,10 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) void *hdr = NULL; u64 cookie; struct sk_buff *msg = NULL; - unsigned int wait = 0; - bool offchan, no_cck, dont_wait_for_ack; - - dont_wait_for_ack = info->attrs[NL80211_ATTR_DONT_WAIT_FOR_ACK]; + struct cfg80211_mgmt_tx_params params = { + .dont_wait_for_ack = + info->attrs[NL80211_ATTR_DONT_WAIT_FOR_ACK], + }; if (!info->attrs[NL80211_ATTR_FRAME]) return -EINVAL; @@ -7477,24 +7600,24 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_DURATION]) { if (!(rdev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX)) return -EINVAL; - wait = nla_get_u32(info->attrs[NL80211_ATTR_DURATION]); + params.wait = nla_get_u32(info->attrs[NL80211_ATTR_DURATION]); /* * We should wait on the channel for at least a minimum amount * of time (10ms) but no longer than the driver supports. */ - if (wait < NL80211_MIN_REMAIN_ON_CHANNEL_TIME || - wait > rdev->wiphy.max_remain_on_channel_duration) + if (params.wait < NL80211_MIN_REMAIN_ON_CHANNEL_TIME || + params.wait > rdev->wiphy.max_remain_on_channel_duration) return -EINVAL; } - offchan = info->attrs[NL80211_ATTR_OFFCHANNEL_TX_OK]; + params.offchan = info->attrs[NL80211_ATTR_OFFCHANNEL_TX_OK]; - if (offchan && !(rdev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX)) + if (params.offchan && !(rdev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX)) return -EINVAL; - no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); + params.no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); /* get the channel if any has been specified, otherwise pass NULL to * the driver. The latter will use the current one @@ -7506,10 +7629,10 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) return err; } - if (!chandef.chan && offchan) + if (!chandef.chan && params.offchan) return -EINVAL; - if (!dont_wait_for_ack) { + if (!params.dont_wait_for_ack) { msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -7522,10 +7645,10 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) } } - err = cfg80211_mlme_mgmt_tx(rdev, wdev, chandef.chan, offchan, wait, - nla_data(info->attrs[NL80211_ATTR_FRAME]), - nla_len(info->attrs[NL80211_ATTR_FRAME]), - no_cck, dont_wait_for_ack, &cookie); + params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); + params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]); + params.chan = chandef.chan; + err = cfg80211_mlme_mgmt_tx(rdev, wdev, ¶ms, &cookie); if (err) goto free_msg; @@ -8859,6 +8982,162 @@ static int nl80211_crit_protocol_stop(struct sk_buff *skb, return 0; } +static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = + __cfg80211_wdev_from_attrs(genl_info_net(info), info->attrs); + int i, err; + u32 vid, subcmd; + + if (!rdev->wiphy.vendor_commands) + return -EOPNOTSUPP; + + if (IS_ERR(wdev)) { + err = PTR_ERR(wdev); + if (err != -EINVAL) + return err; + wdev = NULL; + } else if (wdev->wiphy != &rdev->wiphy) { + return -EINVAL; + } + + if (!info->attrs[NL80211_ATTR_VENDOR_ID] || + !info->attrs[NL80211_ATTR_VENDOR_SUBCMD]) + return -EINVAL; + + vid = nla_get_u32(info->attrs[NL80211_ATTR_VENDOR_ID]); + subcmd = nla_get_u32(info->attrs[NL80211_ATTR_VENDOR_SUBCMD]); + for (i = 0; i < rdev->wiphy.n_vendor_commands; i++) { + const struct wiphy_vendor_command *vcmd; + void *data = NULL; + int len = 0; + + vcmd = &rdev->wiphy.vendor_commands[i]; + + if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd) + continue; + + if (vcmd->flags & (WIPHY_VENDOR_CMD_NEED_WDEV | + WIPHY_VENDOR_CMD_NEED_NETDEV)) { + if (!wdev) + return -EINVAL; + if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_NETDEV && + !wdev->netdev) + return -EINVAL; + + if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) { + if (wdev->netdev && + !netif_running(wdev->netdev)) + return -ENETDOWN; + if (!wdev->netdev && !wdev->p2p_started) + return -ENETDOWN; + } + } else { + wdev = NULL; + } + + if (info->attrs[NL80211_ATTR_VENDOR_DATA]) { + data = nla_data(info->attrs[NL80211_ATTR_VENDOR_DATA]); + len = nla_len(info->attrs[NL80211_ATTR_VENDOR_DATA]); + } + + rdev->cur_cmd_info = info; + err = rdev->wiphy.vendor_commands[i].doit(&rdev->wiphy, wdev, + data, len); + rdev->cur_cmd_info = NULL; + return err; + } + + return -EOPNOTSUPP; +} + +struct sk_buff *__cfg80211_alloc_reply_skb(struct wiphy *wiphy, + enum nl80211_commands cmd, + enum nl80211_attrs attr, + int approxlen) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + if (WARN_ON(!rdev->cur_cmd_info)) + return NULL; + + return __cfg80211_alloc_vendor_skb(rdev, approxlen, + rdev->cur_cmd_info->snd_portid, + rdev->cur_cmd_info->snd_seq, + cmd, attr, NULL, GFP_KERNEL); +} +EXPORT_SYMBOL(__cfg80211_alloc_reply_skb); + +int cfg80211_vendor_cmd_reply(struct sk_buff *skb) +{ + struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0]; + void *hdr = ((void **)skb->cb)[1]; + struct nlattr *data = ((void **)skb->cb)[2]; + + if (WARN_ON(!rdev->cur_cmd_info)) { + kfree_skb(skb); + return -EINVAL; + } + + nla_nest_end(skb, data); + genlmsg_end(skb, hdr); + return genlmsg_reply(skb, rdev->cur_cmd_info); +} +EXPORT_SYMBOL_GPL(cfg80211_vendor_cmd_reply); + + +static int nl80211_set_qos_map(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct cfg80211_qos_map *qos_map = NULL; + struct net_device *dev = info->user_ptr[1]; + u8 *pos, len, num_des, des_len, des; + int ret; + + if (!rdev->ops->set_qos_map) + return -EOPNOTSUPP; + + if (info->attrs[NL80211_ATTR_QOS_MAP]) { + pos = nla_data(info->attrs[NL80211_ATTR_QOS_MAP]); + len = nla_len(info->attrs[NL80211_ATTR_QOS_MAP]); + + if (len % 2 || len < IEEE80211_QOS_MAP_LEN_MIN || + len > IEEE80211_QOS_MAP_LEN_MAX) + return -EINVAL; + + qos_map = kzalloc(sizeof(struct cfg80211_qos_map), GFP_KERNEL); + if (!qos_map) + return -ENOMEM; + + num_des = (len - IEEE80211_QOS_MAP_LEN_MIN) >> 1; + if (num_des) { + des_len = num_des * + sizeof(struct cfg80211_dscp_exception); + memcpy(qos_map->dscp_exception, pos, des_len); + qos_map->num_des = num_des; + for (des = 0; des < num_des; des++) { + if (qos_map->dscp_exception[des].up > 7) { + kfree(qos_map); + return -EINVAL; + } + } + pos += des_len; + } + memcpy(qos_map->up, pos, IEEE80211_QOS_MAP_LEN_MIN); + } + + wdev_lock(dev->ieee80211_ptr); + ret = nl80211_key_allowed(dev->ieee80211_ptr); + if (!ret) + ret = rdev_set_qos_map(rdev, dev, qos_map); + wdev_unlock(dev->ieee80211_ptr); + + kfree(qos_map); + return ret; +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -9583,6 +9862,22 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_VENDOR, + .doit = nl80211_vendor_cmd, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WIPHY | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_SET_QOS_MAP, + .doit = nl80211_set_qos_map, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, }; /* notification functions */ @@ -10810,21 +11105,18 @@ void cfg80211_ch_switch_notify(struct net_device *dev, struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - trace_cfg80211_ch_switch_notify(dev, chandef); + ASSERT_WDEV_LOCK(wdev); - wdev_lock(wdev); + trace_cfg80211_ch_switch_notify(dev, chandef); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO && wdev->iftype != NL80211_IFTYPE_ADHOC && wdev->iftype != NL80211_IFTYPE_MESH_POINT)) - goto out; + return; wdev->channel = chandef->chan; nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL); -out: - wdev_unlock(wdev); - return; } EXPORT_SYMBOL(cfg80211_ch_switch_notify); @@ -10883,7 +11175,7 @@ EXPORT_SYMBOL(cfg80211_cqm_txe_notify); void nl80211_radar_notify(struct cfg80211_registered_device *rdev, - struct cfg80211_chan_def *chandef, + const struct cfg80211_chan_def *chandef, enum nl80211_radar_event event, struct net_device *netdev, gfp_t gfp) { diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 2c0f2b3c07c..b1b231324e1 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -70,7 +70,7 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, void nl80211_radar_notify(struct cfg80211_registered_device *rdev, - struct cfg80211_chan_def *chandef, + const struct cfg80211_chan_def *chandef, enum nl80211_radar_event event, struct net_device *netdev, gfp_t gfp); diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 37ce9fdfe93..c8e225947ad 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -624,16 +624,12 @@ rdev_cancel_remain_on_channel(struct cfg80211_registered_device *rdev, static inline int rdev_mgmt_tx(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, - struct ieee80211_channel *chan, bool offchan, - unsigned int wait, const u8 *buf, size_t len, - bool no_cck, bool dont_wait_for_ack, u64 *cookie) + struct cfg80211_mgmt_tx_params *params, + u64 *cookie) { int ret; - trace_rdev_mgmt_tx(&rdev->wiphy, wdev, chan, offchan, - wait, no_cck, dont_wait_for_ack); - ret = rdev->ops->mgmt_tx(&rdev->wiphy, wdev, chan, offchan, - wait, buf, len, no_cck, - dont_wait_for_ack, cookie); + trace_rdev_mgmt_tx(&rdev->wiphy, wdev, params); + ret = rdev->ops->mgmt_tx(&rdev->wiphy, wdev, params, cookie); trace_rdev_return_int_cookie(&rdev->wiphy, ret, *cookie); return ret; } @@ -936,4 +932,19 @@ static inline int rdev_channel_switch(struct cfg80211_registered_device *rdev, return ret; } +static inline int rdev_set_qos_map(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_qos_map *qos_map) +{ + int ret = -EOPNOTSUPP; + + if (rdev->ops->set_qos_map) { + trace_rdev_set_qos_map(&rdev->wiphy, dev, qos_map); + ret = rdev->ops->set_qos_map(&rdev->wiphy, dev, qos_map); + trace_rdev_return_int(&rdev->wiphy, ret); + } + + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 7da67fd0b41..9b897fca748 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -120,6 +120,48 @@ static const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy) return rtnl_dereference(wiphy->regd); } +static const char *reg_dfs_region_str(enum nl80211_dfs_regions dfs_region) +{ + switch (dfs_region) { + case NL80211_DFS_UNSET: + return "unset"; + case NL80211_DFS_FCC: + return "FCC"; + case NL80211_DFS_ETSI: + return "ETSI"; + case NL80211_DFS_JP: + return "JP"; + } + return "Unknown"; +} + +enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy) +{ + const struct ieee80211_regdomain *regd = NULL; + const struct ieee80211_regdomain *wiphy_regd = NULL; + + regd = get_cfg80211_regdom(); + if (!wiphy) + goto out; + + wiphy_regd = get_wiphy_regdom(wiphy); + if (!wiphy_regd) + goto out; + + if (wiphy_regd->dfs_region == regd->dfs_region) + goto out; + + REG_DBG_PRINT("%s: device specific dfs_region " + "(%s) disagrees with cfg80211's " + "central dfs_region (%s)\n", + dev_name(&wiphy->dev), + reg_dfs_region_str(wiphy_regd->dfs_region), + reg_dfs_region_str(regd->dfs_region)); + +out: + return regd->dfs_region; +} + static void rcu_free_regdom(const struct ieee80211_regdomain *r) { if (!r) @@ -163,35 +205,29 @@ static const struct ieee80211_regdomain world_regdom = { REG_RULE(2412-10, 2462+10, 40, 6, 20, 0), /* IEEE 802.11b/g, channels 12..13. */ REG_RULE(2467-10, 2472+10, 40, 6, 20, - NL80211_RRF_PASSIVE_SCAN | - NL80211_RRF_NO_IBSS), + NL80211_RRF_NO_IR), /* IEEE 802.11 channel 14 - Only JP enables * this and for 802.11b only */ REG_RULE(2484-10, 2484+10, 20, 6, 20, - NL80211_RRF_PASSIVE_SCAN | - NL80211_RRF_NO_IBSS | + NL80211_RRF_NO_IR | NL80211_RRF_NO_OFDM), /* IEEE 802.11a, channel 36..48 */ REG_RULE(5180-10, 5240+10, 160, 6, 20, - NL80211_RRF_PASSIVE_SCAN | - NL80211_RRF_NO_IBSS), + NL80211_RRF_NO_IR), /* IEEE 802.11a, channel 52..64 - DFS required */ REG_RULE(5260-10, 5320+10, 160, 6, 20, - NL80211_RRF_PASSIVE_SCAN | - NL80211_RRF_NO_IBSS | + NL80211_RRF_NO_IR | NL80211_RRF_DFS), /* IEEE 802.11a, channel 100..144 - DFS required */ REG_RULE(5500-10, 5720+10, 160, 6, 20, - NL80211_RRF_PASSIVE_SCAN | - NL80211_RRF_NO_IBSS | + NL80211_RRF_NO_IR | NL80211_RRF_DFS), /* IEEE 802.11a, channel 149..165 */ REG_RULE(5745-10, 5825+10, 80, 6, 20, - NL80211_RRF_PASSIVE_SCAN | - NL80211_RRF_NO_IBSS), + NL80211_RRF_NO_IR), /* IEEE 802.11ad (60gHz), channels 1..3 */ REG_RULE(56160+2160*1-1080, 56160+2160*3+1080, 2160, 0, 0, 0), @@ -208,11 +244,26 @@ static char user_alpha2[2]; module_param(ieee80211_regdom, charp, 0444); MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); +static void reg_kfree_last_request(void) +{ + struct regulatory_request *lr; + + lr = get_last_request(); + + if (lr != &core_request_world && lr) + kfree_rcu(lr, rcu_head); +} + +static void reg_update_last_request(struct regulatory_request *request) +{ + reg_kfree_last_request(); + rcu_assign_pointer(last_request, request); +} + static void reset_regdomains(bool full_reset, const struct ieee80211_regdomain *new_regdom) { const struct ieee80211_regdomain *r; - struct regulatory_request *lr; ASSERT_RTNL(); @@ -235,10 +286,7 @@ static void reset_regdomains(bool full_reset, if (!full_reset) return; - lr = get_last_request(); - if (lr != &core_request_world && lr) - kfree_rcu(lr, rcu_head); - rcu_assign_pointer(last_request, &core_request_world); + reg_update_last_request(&core_request_world); } /* @@ -456,7 +504,15 @@ static int call_crda(const char *alpha2) return kobject_uevent(®_pdev->dev.kobj, KOBJ_CHANGE); } -static bool reg_is_valid_request(const char *alpha2) +static enum reg_request_treatment +reg_call_crda(struct regulatory_request *request) +{ + if (call_crda(request->alpha2)) + return REG_REQ_IGNORE; + return REG_REQ_OK; +} + +bool reg_is_valid_request(const char *alpha2) { struct regulatory_request *lr = get_last_request(); @@ -557,6 +613,20 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, } /* + * Later on we can perhaps use the more restrictive DFS + * region but we don't have information for that yet so + * for now simply disallow conflicts. + */ +static enum nl80211_dfs_regions +reg_intersect_dfs_region(const enum nl80211_dfs_regions dfs_region1, + const enum nl80211_dfs_regions dfs_region2) +{ + if (dfs_region1 != dfs_region2) + return NL80211_DFS_UNSET; + return dfs_region1; +} + +/* * Helper for regdom_intersect(), this does the real * mathematical intersection fun */ @@ -687,6 +757,8 @@ regdom_intersect(const struct ieee80211_regdomain *rd1, rd->n_reg_rules = num_rules; rd->alpha2[0] = '9'; rd->alpha2[1] = '8'; + rd->dfs_region = reg_intersect_dfs_region(rd1->dfs_region, + rd2->dfs_region); return rd; } @@ -698,10 +770,8 @@ regdom_intersect(const struct ieee80211_regdomain *rd1, static u32 map_regdom_flags(u32 rd_flags) { u32 channel_flags = 0; - if (rd_flags & NL80211_RRF_PASSIVE_SCAN) - channel_flags |= IEEE80211_CHAN_PASSIVE_SCAN; - if (rd_flags & NL80211_RRF_NO_IBSS) - channel_flags |= IEEE80211_CHAN_NO_IBSS; + if (rd_flags & NL80211_RRF_NO_IR_ALL) + channel_flags |= IEEE80211_CHAN_NO_IR; if (rd_flags & NL80211_RRF_DFS) channel_flags |= IEEE80211_CHAN_RADAR; if (rd_flags & NL80211_RRF_NO_OFDM) @@ -854,8 +924,18 @@ static void handle_channel(struct wiphy *wiphy, PTR_ERR(reg_rule) == -ERANGE) return; - REG_DBG_PRINT("Disabling freq %d MHz\n", chan->center_freq); - chan->flags |= IEEE80211_CHAN_DISABLED; + if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER && + request_wiphy && request_wiphy == wiphy && + request_wiphy->regulatory_flags & REGULATORY_STRICT_REG) { + REG_DBG_PRINT("Disabling freq %d MHz for good\n", + chan->center_freq); + chan->orig_flags |= IEEE80211_CHAN_DISABLED; + chan->flags = chan->orig_flags; + } else { + REG_DBG_PRINT("Disabling freq %d MHz\n", + chan->center_freq); + chan->flags |= IEEE80211_CHAN_DISABLED; + } return; } @@ -873,7 +953,7 @@ static void handle_channel(struct wiphy *wiphy, if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER && request_wiphy && request_wiphy == wiphy && - request_wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) { + request_wiphy->regulatory_flags & REGULATORY_STRICT_REG) { /* * This guarantees the driver's requested regulatory domain * will always be used as a base for further regulatory @@ -899,13 +979,11 @@ static void handle_channel(struct wiphy *wiphy, chan->max_reg_power = (int) MBM_TO_DBM(power_rule->max_eirp); if (chan->orig_mpwr) { /* - * Devices that have their own custom regulatory domain - * but also use WIPHY_FLAG_STRICT_REGULATORY will follow the - * passed country IE power settings. + * Devices that use REGULATORY_COUNTRY_IE_FOLLOW_POWER + * will always follow the passed country IE power settings. */ if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && - wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY && - wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) + wiphy->regulatory_flags & REGULATORY_COUNTRY_IE_FOLLOW_POWER) chan->max_power = chan->max_reg_power; else chan->max_power = min(chan->orig_mpwr, @@ -975,8 +1053,8 @@ static bool reg_dev_ignore_cell_hint(struct wiphy *wiphy) static bool wiphy_strict_alpha2_regd(struct wiphy *wiphy) { - if (wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY && - !(wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY)) + if (wiphy->regulatory_flags & REGULATORY_STRICT_REG && + !(wiphy->regulatory_flags & REGULATORY_CUSTOM_REG)) return true; return false; } @@ -994,7 +1072,7 @@ static bool ignore_reg_update(struct wiphy *wiphy, } if (initiator == NL80211_REGDOM_SET_BY_CORE && - wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) { + wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) { REG_DBG_PRINT("Ignoring regulatory request set by %s " "since the driver uses its own custom " "regulatory domain\n", @@ -1032,7 +1110,7 @@ static bool reg_is_world_roaming(struct wiphy *wiphy) return true; if (lr && lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && - wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) + wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) return true; return false; @@ -1060,19 +1138,14 @@ static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx, if (!reg_is_world_roaming(wiphy)) return; - if (wiphy->flags & WIPHY_FLAG_DISABLE_BEACON_HINTS) + if (wiphy->regulatory_flags & REGULATORY_DISABLE_BEACON_HINTS) return; chan_before.center_freq = chan->center_freq; chan_before.flags = chan->flags; - if (chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) { - chan->flags &= ~IEEE80211_CHAN_PASSIVE_SCAN; - channel_changed = true; - } - - if (chan->flags & IEEE80211_CHAN_NO_IBSS) { - chan->flags &= ~IEEE80211_CHAN_NO_IBSS; + if (chan->flags & IEEE80211_CHAN_NO_IR) { + chan->flags &= ~IEEE80211_CHAN_NO_IR; channel_changed = true; } @@ -1205,14 +1278,30 @@ static void reg_process_ht_flags(struct wiphy *wiphy) reg_process_ht_flags_band(wiphy, wiphy->bands[band]); } +static void reg_call_notifier(struct wiphy *wiphy, + struct regulatory_request *request) +{ + if (wiphy->reg_notifier) + wiphy->reg_notifier(wiphy, request); +} + static void wiphy_update_regulatory(struct wiphy *wiphy, enum nl80211_reg_initiator initiator) { enum ieee80211_band band; struct regulatory_request *lr = get_last_request(); - if (ignore_reg_update(wiphy, initiator)) + if (ignore_reg_update(wiphy, initiator)) { + /* + * Regulatory updates set by CORE are ignored for custom + * regulatory cards. Let us notify the changes to the driver, + * as some drivers used this to restore its orig_* reg domain. + */ + if (initiator == NL80211_REGDOM_SET_BY_CORE && + wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) + reg_call_notifier(wiphy, lr); return; + } lr->dfs_region = get_cfg80211_regdom()->dfs_region; @@ -1221,9 +1310,7 @@ static void wiphy_update_regulatory(struct wiphy *wiphy, reg_process_beacons(wiphy); reg_process_ht_flags(wiphy); - - if (wiphy->reg_notifier) - wiphy->reg_notifier(wiphy, lr); + reg_call_notifier(wiphy, lr); } static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) @@ -1236,15 +1323,6 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) list_for_each_entry(rdev, &cfg80211_rdev_list, list) { wiphy = &rdev->wiphy; wiphy_update_regulatory(wiphy, initiator); - /* - * Regulatory updates set by CORE are ignored for custom - * regulatory cards. Let us notify the changes to the driver, - * as some drivers used this to restore its orig_* reg domain. - */ - if (initiator == NL80211_REGDOM_SET_BY_CORE && - wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY && - wiphy->reg_notifier) - wiphy->reg_notifier(wiphy, get_last_request()); } } @@ -1263,7 +1341,8 @@ static void handle_channel_custom(struct wiphy *wiphy, if (IS_ERR(reg_rule)) { REG_DBG_PRINT("Disabling freq %d MHz as custom regd has no rule that fits it\n", chan->center_freq); - chan->flags = IEEE80211_CHAN_DISABLED; + chan->orig_flags |= IEEE80211_CHAN_DISABLED; + chan->flags = chan->orig_flags; return; } @@ -1305,6 +1384,10 @@ void wiphy_apply_custom_regulatory(struct wiphy *wiphy, enum ieee80211_band band; unsigned int bands_set = 0; + WARN(!(wiphy->regulatory_flags & REGULATORY_CUSTOM_REG), + "wiphy should have REGULATORY_CUSTOM_REG\n"); + wiphy->regulatory_flags |= REGULATORY_CUSTOM_REG; + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (!wiphy->bands[band]) continue; @@ -1320,225 +1403,285 @@ void wiphy_apply_custom_regulatory(struct wiphy *wiphy, } EXPORT_SYMBOL(wiphy_apply_custom_regulatory); -/* This has the logic which determines when a new request - * should be ignored. */ -static enum reg_request_treatment -get_reg_request_treatment(struct wiphy *wiphy, - struct regulatory_request *pending_request) +static void reg_set_request_processed(void) { - struct wiphy *last_wiphy = NULL; + bool need_more_processing = false; struct regulatory_request *lr = get_last_request(); - /* All initial requests are respected */ - if (!lr) - return REG_REQ_OK; + lr->processed = true; - switch (pending_request->initiator) { - case NL80211_REGDOM_SET_BY_CORE: - return REG_REQ_OK; - case NL80211_REGDOM_SET_BY_COUNTRY_IE: - if (reg_request_cell_base(lr)) { - /* Trust a Cell base station over the AP's country IE */ - if (regdom_changes(pending_request->alpha2)) - return REG_REQ_IGNORE; - return REG_REQ_ALREADY_SET; - } + spin_lock(®_requests_lock); + if (!list_empty(®_requests_list)) + need_more_processing = true; + spin_unlock(®_requests_lock); - last_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); + if (lr->initiator == NL80211_REGDOM_SET_BY_USER) + cancel_delayed_work(®_timeout); - if (unlikely(!is_an_alpha2(pending_request->alpha2))) - return -EINVAL; - if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) { - if (last_wiphy != wiphy) { - /* - * Two cards with two APs claiming different - * Country IE alpha2s. We could - * intersect them, but that seems unlikely - * to be correct. Reject second one for now. - */ - if (regdom_changes(pending_request->alpha2)) - return REG_REQ_IGNORE; - return REG_REQ_ALREADY_SET; - } - /* - * Two consecutive Country IE hints on the same wiphy. - * This should be picked up early by the driver/stack - */ - if (WARN_ON(regdom_changes(pending_request->alpha2))) - return REG_REQ_OK; - return REG_REQ_ALREADY_SET; - } - return REG_REQ_OK; - case NL80211_REGDOM_SET_BY_DRIVER: - if (lr->initiator == NL80211_REGDOM_SET_BY_CORE) { - if (regdom_changes(pending_request->alpha2)) - return REG_REQ_OK; - return REG_REQ_ALREADY_SET; - } + if (need_more_processing) + schedule_work(®_work); +} - /* - * This would happen if you unplug and plug your card - * back in or if you add a new device for which the previously - * loaded card also agrees on the regulatory domain. - */ - if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER && - !regdom_changes(pending_request->alpha2)) - return REG_REQ_ALREADY_SET; +/** + * reg_process_hint_core - process core regulatory requests + * @pending_request: a pending core regulatory request + * + * The wireless subsystem can use this function to process + * a regulatory request issued by the regulatory core. + * + * Returns one of the different reg request treatment values. + */ +static enum reg_request_treatment +reg_process_hint_core(struct regulatory_request *core_request) +{ + + core_request->intersect = false; + core_request->processed = false; + + reg_update_last_request(core_request); + + return reg_call_crda(core_request); +} +static enum reg_request_treatment +__reg_process_hint_user(struct regulatory_request *user_request) +{ + struct regulatory_request *lr = get_last_request(); + + if (reg_request_cell_base(user_request)) + return reg_ignore_cell_hint(user_request); + + if (reg_request_cell_base(lr)) + return REG_REQ_IGNORE; + + if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) return REG_REQ_INTERSECT; - case NL80211_REGDOM_SET_BY_USER: - if (reg_request_cell_base(pending_request)) - return reg_ignore_cell_hint(pending_request); + /* + * If the user knows better the user should set the regdom + * to their country before the IE is picked up + */ + if (lr->initiator == NL80211_REGDOM_SET_BY_USER && + lr->intersect) + return REG_REQ_IGNORE; + /* + * Process user requests only after previous user/driver/core + * requests have been processed + */ + if ((lr->initiator == NL80211_REGDOM_SET_BY_CORE || + lr->initiator == NL80211_REGDOM_SET_BY_DRIVER || + lr->initiator == NL80211_REGDOM_SET_BY_USER) && + regdom_changes(lr->alpha2)) + return REG_REQ_IGNORE; - if (reg_request_cell_base(lr)) - return REG_REQ_IGNORE; + if (!regdom_changes(user_request->alpha2)) + return REG_REQ_ALREADY_SET; - if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) - return REG_REQ_INTERSECT; - /* - * If the user knows better the user should set the regdom - * to their country before the IE is picked up - */ - if (lr->initiator == NL80211_REGDOM_SET_BY_USER && - lr->intersect) - return REG_REQ_IGNORE; - /* - * Process user requests only after previous user/driver/core - * requests have been processed - */ - if ((lr->initiator == NL80211_REGDOM_SET_BY_CORE || - lr->initiator == NL80211_REGDOM_SET_BY_DRIVER || - lr->initiator == NL80211_REGDOM_SET_BY_USER) && - regdom_changes(lr->alpha2)) - return REG_REQ_IGNORE; + return REG_REQ_OK; +} - if (!regdom_changes(pending_request->alpha2)) - return REG_REQ_ALREADY_SET; +/** + * reg_process_hint_user - process user regulatory requests + * @user_request: a pending user regulatory request + * + * The wireless subsystem can use this function to process + * a regulatory request initiated by userspace. + * + * Returns one of the different reg request treatment values. + */ +static enum reg_request_treatment +reg_process_hint_user(struct regulatory_request *user_request) +{ + enum reg_request_treatment treatment; - return REG_REQ_OK; + treatment = __reg_process_hint_user(user_request); + if (treatment == REG_REQ_IGNORE || + treatment == REG_REQ_ALREADY_SET) { + kfree(user_request); + return treatment; } - return REG_REQ_IGNORE; + user_request->intersect = treatment == REG_REQ_INTERSECT; + user_request->processed = false; + + reg_update_last_request(user_request); + + user_alpha2[0] = user_request->alpha2[0]; + user_alpha2[1] = user_request->alpha2[1]; + + return reg_call_crda(user_request); } -static void reg_set_request_processed(void) +static enum reg_request_treatment +__reg_process_hint_driver(struct regulatory_request *driver_request) { - bool need_more_processing = false; struct regulatory_request *lr = get_last_request(); - lr->processed = true; - - spin_lock(®_requests_lock); - if (!list_empty(®_requests_list)) - need_more_processing = true; - spin_unlock(®_requests_lock); + if (lr->initiator == NL80211_REGDOM_SET_BY_CORE) { + if (regdom_changes(driver_request->alpha2)) + return REG_REQ_OK; + return REG_REQ_ALREADY_SET; + } - if (lr->initiator == NL80211_REGDOM_SET_BY_USER) - cancel_delayed_work(®_timeout); + /* + * This would happen if you unplug and plug your card + * back in or if you add a new device for which the previously + * loaded card also agrees on the regulatory domain. + */ + if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER && + !regdom_changes(driver_request->alpha2)) + return REG_REQ_ALREADY_SET; - if (need_more_processing) - schedule_work(®_work); + return REG_REQ_INTERSECT; } /** - * __regulatory_hint - hint to the wireless core a regulatory domain - * @wiphy: if the hint comes from country information from an AP, this - * is required to be set to the wiphy that received the information - * @pending_request: the regulatory request currently being processed + * reg_process_hint_driver - process driver regulatory requests + * @driver_request: a pending driver regulatory request * - * The Wireless subsystem can use this function to hint to the wireless core - * what it believes should be the current regulatory domain. + * The wireless subsystem can use this function to process + * a regulatory request issued by an 802.11 driver. * * Returns one of the different reg request treatment values. */ static enum reg_request_treatment -__regulatory_hint(struct wiphy *wiphy, - struct regulatory_request *pending_request) +reg_process_hint_driver(struct wiphy *wiphy, + struct regulatory_request *driver_request) { const struct ieee80211_regdomain *regd; - bool intersect = false; enum reg_request_treatment treatment; - struct regulatory_request *lr; - treatment = get_reg_request_treatment(wiphy, pending_request); + treatment = __reg_process_hint_driver(driver_request); switch (treatment) { - case REG_REQ_INTERSECT: - if (pending_request->initiator == - NL80211_REGDOM_SET_BY_DRIVER) { - regd = reg_copy_regd(get_cfg80211_regdom()); - if (IS_ERR(regd)) { - kfree(pending_request); - return PTR_ERR(regd); - } - rcu_assign_pointer(wiphy->regd, regd); - } - intersect = true; - break; case REG_REQ_OK: break; - default: - /* - * If the regulatory domain being requested by the - * driver has already been set just copy it to the - * wiphy - */ - if (treatment == REG_REQ_ALREADY_SET && - pending_request->initiator == NL80211_REGDOM_SET_BY_DRIVER) { - regd = reg_copy_regd(get_cfg80211_regdom()); - if (IS_ERR(regd)) { - kfree(pending_request); - return REG_REQ_IGNORE; - } - treatment = REG_REQ_ALREADY_SET; - rcu_assign_pointer(wiphy->regd, regd); - goto new_request; - } - kfree(pending_request); + case REG_REQ_IGNORE: + kfree(driver_request); return treatment; + case REG_REQ_INTERSECT: + /* fall through */ + case REG_REQ_ALREADY_SET: + regd = reg_copy_regd(get_cfg80211_regdom()); + if (IS_ERR(regd)) { + kfree(driver_request); + return REG_REQ_IGNORE; + } + rcu_assign_pointer(wiphy->regd, regd); } -new_request: - lr = get_last_request(); - if (lr != &core_request_world && lr) - kfree_rcu(lr, rcu_head); - pending_request->intersect = intersect; - pending_request->processed = false; - rcu_assign_pointer(last_request, pending_request); - lr = pending_request; + driver_request->intersect = treatment == REG_REQ_INTERSECT; + driver_request->processed = false; + + reg_update_last_request(driver_request); + + /* + * Since CRDA will not be called in this case as we already + * have applied the requested regulatory domain before we just + * inform userspace we have processed the request + */ + if (treatment == REG_REQ_ALREADY_SET) { + nl80211_send_reg_change_event(driver_request); + reg_set_request_processed(); + return treatment; + } + + return reg_call_crda(driver_request); +} - pending_request = NULL; +static enum reg_request_treatment +__reg_process_hint_country_ie(struct wiphy *wiphy, + struct regulatory_request *country_ie_request) +{ + struct wiphy *last_wiphy = NULL; + struct regulatory_request *lr = get_last_request(); - if (lr->initiator == NL80211_REGDOM_SET_BY_USER) { - user_alpha2[0] = lr->alpha2[0]; - user_alpha2[1] = lr->alpha2[1]; + if (reg_request_cell_base(lr)) { + /* Trust a Cell base station over the AP's country IE */ + if (regdom_changes(country_ie_request->alpha2)) + return REG_REQ_IGNORE; + return REG_REQ_ALREADY_SET; + } else { + if (wiphy->regulatory_flags & REGULATORY_COUNTRY_IE_IGNORE) + return REG_REQ_IGNORE; } - /* When r == REG_REQ_INTERSECT we do need to call CRDA */ - if (treatment != REG_REQ_OK && treatment != REG_REQ_INTERSECT) { + if (unlikely(!is_an_alpha2(country_ie_request->alpha2))) + return -EINVAL; + + if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) + return REG_REQ_OK; + + last_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); + + if (last_wiphy != wiphy) { /* - * Since CRDA will not be called in this case as we already - * have applied the requested regulatory domain before we just - * inform userspace we have processed the request + * Two cards with two APs claiming different + * Country IE alpha2s. We could + * intersect them, but that seems unlikely + * to be correct. Reject second one for now. */ - if (treatment == REG_REQ_ALREADY_SET) { - nl80211_send_reg_change_event(lr); - reg_set_request_processed(); - } - return treatment; + if (regdom_changes(country_ie_request->alpha2)) + return REG_REQ_IGNORE; + return REG_REQ_ALREADY_SET; } + /* + * Two consecutive Country IE hints on the same wiphy. + * This should be picked up early by the driver/stack + */ + if (WARN_ON(regdom_changes(country_ie_request->alpha2))) + return REG_REQ_OK; + return REG_REQ_ALREADY_SET; +} - if (call_crda(lr->alpha2)) +/** + * reg_process_hint_country_ie - process regulatory requests from country IEs + * @country_ie_request: a regulatory request from a country IE + * + * The wireless subsystem can use this function to process + * a regulatory request issued by a country Information Element. + * + * Returns one of the different reg request treatment values. + */ +static enum reg_request_treatment +reg_process_hint_country_ie(struct wiphy *wiphy, + struct regulatory_request *country_ie_request) +{ + enum reg_request_treatment treatment; + + treatment = __reg_process_hint_country_ie(wiphy, country_ie_request); + + switch (treatment) { + case REG_REQ_OK: + break; + case REG_REQ_IGNORE: + /* fall through */ + case REG_REQ_ALREADY_SET: + kfree(country_ie_request); + return treatment; + case REG_REQ_INTERSECT: + kfree(country_ie_request); + /* + * This doesn't happen yet, not sure we + * ever want to support it for this case. + */ + WARN_ONCE(1, "Unexpected intersection for country IEs"); return REG_REQ_IGNORE; - return REG_REQ_OK; + } + + country_ie_request->intersect = false; + country_ie_request->processed = false; + + reg_update_last_request(country_ie_request); + + return reg_call_crda(country_ie_request); } /* This processes *all* regulatory hints */ -static void reg_process_hint(struct regulatory_request *reg_request, - enum nl80211_reg_initiator reg_initiator) +static void reg_process_hint(struct regulatory_request *reg_request) { struct wiphy *wiphy = NULL; + enum reg_request_treatment treatment; if (WARN_ON(!reg_request->alpha2)) return; @@ -1546,23 +1689,37 @@ static void reg_process_hint(struct regulatory_request *reg_request, if (reg_request->wiphy_idx != WIPHY_IDX_INVALID) wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx); - if (reg_initiator == NL80211_REGDOM_SET_BY_DRIVER && !wiphy) { + if (reg_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && !wiphy) { kfree(reg_request); return; } - switch (__regulatory_hint(wiphy, reg_request)) { - case REG_REQ_ALREADY_SET: - /* This is required so that the orig_* parameters are saved */ - if (wiphy && wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) - wiphy_update_regulatory(wiphy, reg_initiator); + switch (reg_request->initiator) { + case NL80211_REGDOM_SET_BY_CORE: + reg_process_hint_core(reg_request); + return; + case NL80211_REGDOM_SET_BY_USER: + treatment = reg_process_hint_user(reg_request); + if (treatment == REG_REQ_OK || + treatment == REG_REQ_ALREADY_SET) + return; + schedule_delayed_work(®_timeout, msecs_to_jiffies(3142)); + return; + case NL80211_REGDOM_SET_BY_DRIVER: + treatment = reg_process_hint_driver(wiphy, reg_request); break; - default: - if (reg_initiator == NL80211_REGDOM_SET_BY_USER) - schedule_delayed_work(®_timeout, - msecs_to_jiffies(3142)); + case NL80211_REGDOM_SET_BY_COUNTRY_IE: + treatment = reg_process_hint_country_ie(wiphy, reg_request); break; + default: + WARN(1, "invalid initiator %d\n", reg_request->initiator); + return; } + + /* This is required so that the orig_* parameters are saved */ + if (treatment == REG_REQ_ALREADY_SET && wiphy && + wiphy->regulatory_flags & REGULATORY_STRICT_REG) + wiphy_update_regulatory(wiphy, reg_request->initiator); } /* @@ -1596,7 +1753,7 @@ static void reg_process_pending_hints(void) spin_unlock(®_requests_lock); - reg_process_hint(reg_request, reg_request->initiator); + reg_process_hint(reg_request); } /* Processes beacon hints -- this has nothing to do with country IEs */ @@ -1696,6 +1853,8 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) if (WARN_ON(!alpha2 || !wiphy)) return -EINVAL; + wiphy->regulatory_flags &= ~REGULATORY_CUSTOM_REG; + request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); if (!request) return -ENOMEM; @@ -1888,7 +2047,7 @@ static void restore_regulatory_settings(bool reset_user) world_alpha2[1] = cfg80211_world_regdom->alpha2[1]; list_for_each_entry(rdev, &cfg80211_rdev_list, list) { - if (rdev->wiphy.flags & WIPHY_FLAG_CUSTOM_REGULATORY) + if (rdev->wiphy.regulatory_flags & REGULATORY_CUSTOM_REG) restore_custom_reg_settings(&rdev->wiphy); } @@ -2016,7 +2175,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) } } -bool reg_supported_dfs_region(u8 dfs_region) +bool reg_supported_dfs_region(enum nl80211_dfs_regions dfs_region) { switch (dfs_region) { case NL80211_DFS_UNSET: @@ -2031,27 +2190,6 @@ bool reg_supported_dfs_region(u8 dfs_region) } } -static void print_dfs_region(u8 dfs_region) -{ - if (!dfs_region) - return; - - switch (dfs_region) { - case NL80211_DFS_FCC: - pr_info(" DFS Master region FCC"); - break; - case NL80211_DFS_ETSI: - pr_info(" DFS Master region ETSI"); - break; - case NL80211_DFS_JP: - pr_info(" DFS Master region JP"); - break; - default: - pr_info(" DFS Master region Unknown"); - break; - } -} - static void print_regdomain(const struct ieee80211_regdomain *rd) { struct regulatory_request *lr = get_last_request(); @@ -2083,7 +2221,7 @@ static void print_regdomain(const struct ieee80211_regdomain *rd) } } - print_dfs_region(rd->dfs_region); + pr_info(" DFS Master region: %s", reg_dfs_region_str(rd->dfs_region)); print_rd_rules(rd); } @@ -2093,48 +2231,60 @@ static void print_regdomain_info(const struct ieee80211_regdomain *rd) print_rd_rules(rd); } -/* Takes ownership of rd only if it doesn't fail */ -static int __set_regdom(const struct ieee80211_regdomain *rd) +static int reg_set_rd_core(const struct ieee80211_regdomain *rd) +{ + if (!is_world_regdom(rd->alpha2)) + return -EINVAL; + update_world_regdomain(rd); + return 0; +} + +static int reg_set_rd_user(const struct ieee80211_regdomain *rd, + struct regulatory_request *user_request) { - const struct ieee80211_regdomain *regd; const struct ieee80211_regdomain *intersected_rd = NULL; - struct wiphy *request_wiphy; - struct regulatory_request *lr = get_last_request(); - /* Some basic sanity checks first */ + if (is_world_regdom(rd->alpha2)) + return -EINVAL; + + if (!regdom_changes(rd->alpha2)) + return -EALREADY; - if (!reg_is_valid_request(rd->alpha2)) + if (!is_valid_rd(rd)) { + pr_err("Invalid regulatory domain detected:\n"); + print_regdomain_info(rd); return -EINVAL; + } - if (is_world_regdom(rd->alpha2)) { - update_world_regdomain(rd); + if (!user_request->intersect) { + reset_regdomains(false, rd); return 0; } - if (!is_alpha2_set(rd->alpha2) && !is_an_alpha2(rd->alpha2) && - !is_unknown_alpha2(rd->alpha2)) + intersected_rd = regdom_intersect(rd, get_cfg80211_regdom()); + if (!intersected_rd) return -EINVAL; - /* - * Lets only bother proceeding on the same alpha2 if the current - * rd is non static (it means CRDA was present and was used last) - * and the pending request came in from a country IE - */ - if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) { - /* - * If someone else asked us to change the rd lets only bother - * checking if the alpha2 changes if CRDA was already called - */ - if (!regdom_changes(rd->alpha2)) - return -EALREADY; - } + kfree(rd); + rd = NULL; + reset_regdomains(false, intersected_rd); - /* - * Now lets set the regulatory domain, update all driver channels - * and finally inform them of what we have done, in case they want - * to review or adjust their own settings based on their own - * internal EEPROM data - */ + return 0; +} + +static int reg_set_rd_driver(const struct ieee80211_regdomain *rd, + struct regulatory_request *driver_request) +{ + const struct ieee80211_regdomain *regd; + const struct ieee80211_regdomain *intersected_rd = NULL; + const struct ieee80211_regdomain *tmp; + struct wiphy *request_wiphy; + + if (is_world_regdom(rd->alpha2)) + return -EINVAL; + + if (!regdom_changes(rd->alpha2)) + return -EALREADY; if (!is_valid_rd(rd)) { pr_err("Invalid regulatory domain detected:\n"); @@ -2142,29 +2292,13 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) return -EINVAL; } - request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); - if (!request_wiphy && - (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER || - lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE)) { + request_wiphy = wiphy_idx_to_wiphy(driver_request->wiphy_idx); + if (!request_wiphy) { schedule_delayed_work(®_timeout, 0); return -ENODEV; } - if (!lr->intersect) { - if (lr->initiator != NL80211_REGDOM_SET_BY_DRIVER) { - reset_regdomains(false, rd); - return 0; - } - - /* - * For a driver hint, lets copy the regulatory domain the - * driver wanted to the wiphy to deal with conflicts - */ - - /* - * Userspace could have sent two replies with only - * one kernel request. - */ + if (!driver_request->intersect) { if (request_wiphy->regd) return -EALREADY; @@ -2177,38 +2311,59 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) return 0; } - /* Intersection requires a bit more work */ + intersected_rd = regdom_intersect(rd, get_cfg80211_regdom()); + if (!intersected_rd) + return -EINVAL; - if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) { - intersected_rd = regdom_intersect(rd, get_cfg80211_regdom()); - if (!intersected_rd) - return -EINVAL; + /* + * We can trash what CRDA provided now. + * However if a driver requested this specific regulatory + * domain we keep it for its private use + */ + tmp = get_wiphy_regdom(request_wiphy); + rcu_assign_pointer(request_wiphy->regd, rd); + rcu_free_regdom(tmp); - /* - * We can trash what CRDA provided now. - * However if a driver requested this specific regulatory - * domain we keep it for its private use - */ - if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER) { - const struct ieee80211_regdomain *tmp; + rd = NULL; - tmp = get_wiphy_regdom(request_wiphy); - rcu_assign_pointer(request_wiphy->regd, rd); - rcu_free_regdom(tmp); - } else { - kfree(rd); - } + reset_regdomains(false, intersected_rd); + + return 0; +} - rd = NULL; +static int reg_set_rd_country_ie(const struct ieee80211_regdomain *rd, + struct regulatory_request *country_ie_request) +{ + struct wiphy *request_wiphy; - reset_regdomains(false, intersected_rd); + if (!is_alpha2_set(rd->alpha2) && !is_an_alpha2(rd->alpha2) && + !is_unknown_alpha2(rd->alpha2)) + return -EINVAL; - return 0; + /* + * Lets only bother proceeding on the same alpha2 if the current + * rd is non static (it means CRDA was present and was used last) + * and the pending request came in from a country IE + */ + + if (!is_valid_rd(rd)) { + pr_err("Invalid regulatory domain detected:\n"); + print_regdomain_info(rd); + return -EINVAL; } - return -EINVAL; -} + request_wiphy = wiphy_idx_to_wiphy(country_ie_request->wiphy_idx); + if (!request_wiphy) { + schedule_delayed_work(®_timeout, 0); + return -ENODEV; + } + + if (country_ie_request->intersect) + return -EINVAL; + reset_regdomains(false, rd); + return 0; +} /* * Use this call to set the current regulatory domain. Conflicts with @@ -2220,10 +2375,32 @@ int set_regdom(const struct ieee80211_regdomain *rd) struct regulatory_request *lr; int r; + if (!reg_is_valid_request(rd->alpha2)) { + kfree(rd); + return -EINVAL; + } + lr = get_last_request(); /* Note that this doesn't update the wiphys, this is done below */ - r = __set_regdom(rd); + switch (lr->initiator) { + case NL80211_REGDOM_SET_BY_CORE: + r = reg_set_rd_core(rd); + break; + case NL80211_REGDOM_SET_BY_USER: + r = reg_set_rd_user(rd, lr); + break; + case NL80211_REGDOM_SET_BY_DRIVER: + r = reg_set_rd_driver(rd, lr); + break; + case NL80211_REGDOM_SET_BY_COUNTRY_IE: + r = reg_set_rd_country_ie(rd, lr); + break; + default: + WARN(1, "invalid initiator %d\n", lr->initiator); + return -EINVAL; + } + if (r) { if (r == -EALREADY) reg_set_request_processed(); diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 9677e3c13da..02bd8f4b092 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -18,8 +18,10 @@ extern const struct ieee80211_regdomain __rcu *cfg80211_regdomain; +bool reg_is_valid_request(const char *alpha2); bool is_world_regdom(const char *alpha2); -bool reg_supported_dfs_region(u8 dfs_region); +bool reg_supported_dfs_region(enum nl80211_dfs_regions dfs_region); +enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy); int regulatory_hint_user(const char *alpha2, enum nl80211_user_reg_hint_type user_reg_hint_type); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d4397eba540..b528e31da2c 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -161,7 +161,7 @@ static void __cfg80211_bss_expire(struct cfg80211_registered_device *dev, dev->bss_generation++; } -void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) +void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev) { struct cfg80211_scan_request *request; struct wireless_dev *wdev; @@ -210,17 +210,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) dev_put(wdev->netdev); rdev->scan_req = NULL; - - /* - * OK. If this is invoked with "leak" then we can't - * free this ... but we've cleaned it up anyway. The - * driver failed to call the scan_done callback, so - * all bets are off, it might still be trying to use - * the scan request or not ... if it accesses the dev - * in there (it shouldn't anyway) then it may crash. - */ - if (!leak) - kfree(request); + kfree(request); } void __cfg80211_scan_done(struct work_struct *wk) @@ -231,7 +221,7 @@ void __cfg80211_scan_done(struct work_struct *wk) scan_done_wk); rtnl_lock(); - ___cfg80211_scan_done(rdev, false); + ___cfg80211_scan_done(rdev); rtnl_unlock(); } @@ -1099,11 +1089,8 @@ int cfg80211_wext_siwscan(struct net_device *dev, /* Determine number of channels, needed to allocate creq */ if (wreq && wreq->num_channels) n_channels = wreq->num_channels; - else { - for (band = 0; band < IEEE80211_NUM_BANDS; band++) - if (wiphy->bands[band]) - n_channels += wiphy->bands[band]->n_channels; - } + else + n_channels = ieee80211_get_num_supported_channels(wiphy); creq = kzalloc(sizeof(*creq) + sizeof(struct cfg80211_ssid) + n_channels * sizeof(void *), diff --git a/net/wireless/sme.c b/net/wireless/sme.c index d3c5bd7c6b5..a6350911850 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -70,18 +70,11 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) if (rdev->scan_req) return -EBUSY; - if (wdev->conn->params.channel) { + if (wdev->conn->params.channel) n_channels = 1; - } else { - enum ieee80211_band band; - n_channels = 0; + else + n_channels = ieee80211_get_num_supported_channels(wdev->wiphy); - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { - if (!wdev->wiphy->bands[band]) - continue; - n_channels += wdev->wiphy->bands[band]->n_channels; - } - } request = kzalloc(sizeof(*request) + sizeof(request->ssids[0]) + sizeof(request->channels[0]) * n_channels, GFP_KERNEL); @@ -872,6 +865,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, for (i = 0; i < 6; i++) rdev_del_key(rdev, dev, i, false, NULL); + rdev_set_qos_map(rdev, dev, NULL); + #ifdef CONFIG_CFG80211_WEXT memset(&wrqu, 0, sizeof(wrqu)); wrqu.ap_addr.sa_family = ARPHRD_ETHER; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index ba5f0d6614d..fbcc23edee5 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -186,6 +186,28 @@ #define BOOL_TO_STR(bo) (bo) ? "true" : "false" +#define QOS_MAP_ENTRY __field(u8, num_des) \ + __array(u8, dscp_exception, \ + 2 * IEEE80211_QOS_MAP_MAX_EX) \ + __array(u8, up, IEEE80211_QOS_MAP_LEN_MIN) +#define QOS_MAP_ASSIGN(qos_map) \ + do { \ + if ((qos_map)) { \ + __entry->num_des = (qos_map)->num_des; \ + memcpy(__entry->dscp_exception, \ + &(qos_map)->dscp_exception, \ + 2 * IEEE80211_QOS_MAP_MAX_EX); \ + memcpy(__entry->up, &(qos_map)->up, \ + IEEE80211_QOS_MAP_LEN_MIN); \ + } else { \ + __entry->num_des = 0; \ + memset(__entry->dscp_exception, 0, \ + 2 * IEEE80211_QOS_MAP_MAX_EX); \ + memset(__entry->up, 0, \ + IEEE80211_QOS_MAP_LEN_MIN); \ + } \ + } while (0) + /************************************************************* * rdev->ops traces * *************************************************************/ @@ -1653,9 +1675,8 @@ TRACE_EVENT(rdev_cancel_remain_on_channel, TRACE_EVENT(rdev_mgmt_tx, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, - struct ieee80211_channel *chan, bool offchan, - unsigned int wait, bool no_cck, bool dont_wait_for_ack), - TP_ARGS(wiphy, wdev, chan, offchan, wait, no_cck, dont_wait_for_ack), + struct cfg80211_mgmt_tx_params *params), + TP_ARGS(wiphy, wdev, params), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY @@ -1668,11 +1689,11 @@ TRACE_EVENT(rdev_mgmt_tx, TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; - CHAN_ASSIGN(chan); - __entry->offchan = offchan; - __entry->wait = wait; - __entry->no_cck = no_cck; - __entry->dont_wait_for_ack = dont_wait_for_ack; + CHAN_ASSIGN(params->chan); + __entry->offchan = params->offchan; + __entry->wait = params->wait; + __entry->no_cck = params->no_cck; + __entry->dont_wait_for_ack = params->dont_wait_for_ack; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", " CHAN_PR_FMT ", offchan: %s," " wait: %u, no cck: %s, dont wait for ack: %s", @@ -1876,6 +1897,24 @@ TRACE_EVENT(rdev_channel_switch, __entry->counter_offset_presp) ); +TRACE_EVENT(rdev_set_qos_map, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_qos_map *qos_map), + TP_ARGS(wiphy, netdev, qos_map), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + QOS_MAP_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + QOS_MAP_ASSIGN(qos_map); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", num_des: %u", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->num_des) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ diff --git a/net/wireless/util.c b/net/wireless/util.c index 935dea9485d..d39c37104ae 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -689,7 +689,8 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, EXPORT_SYMBOL(ieee80211_amsdu_to_8023s); /* Given a data frame determine the 802.1p/1d tag to use. */ -unsigned int cfg80211_classify8021d(struct sk_buff *skb) +unsigned int cfg80211_classify8021d(struct sk_buff *skb, + struct cfg80211_qos_map *qos_map) { unsigned int dscp; unsigned char vlan_priority; @@ -720,6 +721,21 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb) return 0; } + if (qos_map) { + unsigned int i, tmp_dscp = dscp >> 2; + + for (i = 0; i < qos_map->num_des; i++) { + if (tmp_dscp == qos_map->dscp_exception[i].dscp) + return qos_map->dscp_exception[i].up; + } + + for (i = 0; i < 8; i++) { + if (tmp_dscp >= qos_map->up[i].low && + tmp_dscp <= qos_map->up[i].high) + return i; + } + } + return dscp >> 5; } EXPORT_SYMBOL(cfg80211_classify8021d); @@ -863,6 +879,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, dev->ieee80211_ptr->use_4addr = false; dev->ieee80211_ptr->mesh_id_up_len = 0; + wdev_lock(dev->ieee80211_ptr); + rdev_set_qos_map(rdev, dev, NULL); + wdev_unlock(dev->ieee80211_ptr); switch (otype) { case NL80211_IFTYPE_AP: @@ -1462,6 +1481,19 @@ int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, return 0; } +unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy) +{ + enum ieee80211_band band; + unsigned int n_channels = 0; + + for (band = 0; band < IEEE80211_NUM_BANDS; band++) + if (wiphy->bands[band]) + n_channels += wiphy->bands[band]->n_channels; + + return n_channels; +} +EXPORT_SYMBOL(ieee80211_get_num_supported_channels); + /* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ /* Ethernet-II snap header (RFC1042 for most EtherTypes) */ const unsigned char rfc1042_header[] __aligned(2) = diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index e7c6e862580..5661a54ac7e 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -370,7 +370,7 @@ static int cfg80211_wext_siwretry(struct net_device *dev, u8 oshort = wdev->wiphy->retry_short; int err; - if (retry->disabled || + if (retry->disabled || retry->value < 1 || retry->value > 255 || (retry->flags & IW_RETRY_TYPE) != IW_RETRY_LIMIT) return -EINVAL; @@ -412,9 +412,9 @@ int cfg80211_wext_giwretry(struct net_device *dev, * First return short value, iwconfig will ask long value * later if needed */ - retry->flags |= IW_RETRY_LIMIT; + retry->flags |= IW_RETRY_LIMIT | IW_RETRY_SHORT; retry->value = wdev->wiphy->retry_short; - if (wdev->wiphy->retry_long != wdev->wiphy->retry_short) + if (wdev->wiphy->retry_long == wdev->wiphy->retry_short) retry->flags |= IW_RETRY_LONG; return 0; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 7622789d375..6177479c7de 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -35,6 +35,8 @@ * response */ +#define pr_fmt(fmt) "X25: " fmt + #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> @@ -1080,7 +1082,7 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct x25_sock *x25 = x25_sk(sk); - struct sockaddr_x25 *usx25 = (struct sockaddr_x25 *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_x25 *, usx25, msg->msg_name); struct sockaddr_x25 sx25; struct sk_buff *skb; unsigned char *asmptr; @@ -1256,7 +1258,7 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct x25_sock *x25 = x25_sk(sk); - struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_x25 *, sx25, msg->msg_name); size_t copied; int qbit, header_len; struct sk_buff *skb; @@ -1809,7 +1811,7 @@ static int __init x25_init(void) if (rc != 0) goto out_sock; - printk(KERN_INFO "X.25 for Linux Version 0.2\n"); + pr_info("Linux Version 0.2\n"); x25_register_sysctl(); rc = x25_proc_init(); diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index a8a236338e6..39231237e1c 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -17,6 +17,8 @@ * 2000-09-04 Henner Eisen Prevent freeing a dangling skb. */ +#define pr_fmt(fmt) "X25: " fmt + #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/skbuff.h> @@ -89,7 +91,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) */ if (frametype != X25_CLEAR_CONFIRMATION) - printk(KERN_DEBUG "x25_receive_data(): unknown frame type %2x\n",frametype); + pr_debug("x25_receive_data(): unknown frame type %2x\n",frametype); return 0; } @@ -114,7 +116,7 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, */ nb = x25_get_neigh(dev); if (!nb) { - printk(KERN_DEBUG "X.25: unknown neighbour - %s\n", dev->name); + pr_debug("unknown neighbour - %s\n", dev->name); goto drop; } @@ -154,7 +156,7 @@ void x25_establish_link(struct x25_neigh *nb) switch (nb->dev->type) { case ARPHRD_X25: if ((skb = alloc_skb(1, GFP_ATOMIC)) == NULL) { - printk(KERN_ERR "x25_dev: out of memory\n"); + pr_err("x25_dev: out of memory\n"); return; } ptr = skb_put(skb, 1); @@ -189,7 +191,7 @@ void x25_terminate_link(struct x25_neigh *nb) skb = alloc_skb(1, GFP_ATOMIC); if (!skb) { - printk(KERN_ERR "x25_dev: out of memory\n"); + pr_err("x25_dev: out of memory\n"); return; } diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index b8253250d72..7ecd04c2136 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -21,6 +21,8 @@ * on response. */ +#define pr_fmt(fmt) "X25: " fmt + #include <linux/kernel.h> #include <linux/string.h> #include <linux/skbuff.h> @@ -109,7 +111,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, case X25_MARKER: break; default: - printk(KERN_DEBUG "X.25: unknown facility " + pr_debug("unknown facility " "%02X, value %02X\n", p[0], p[1]); break; @@ -132,7 +134,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, *vc_fac_mask |= X25_MASK_WINDOW_SIZE; break; default: - printk(KERN_DEBUG "X.25: unknown facility " + pr_debug("unknown facility " "%02X, values %02X, %02X\n", p[0], p[1], p[2]); break; @@ -143,7 +145,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, case X25_FAC_CLASS_C: if (len < 4) return -1; - printk(KERN_DEBUG "X.25: unknown facility %02X, " + pr_debug("unknown facility %02X, " "values %02X, %02X, %02X\n", p[0], p[1], p[2], p[3]); p += 4; @@ -172,7 +174,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, *vc_fac_mask |= X25_MASK_CALLED_AE; break; default: - printk(KERN_DEBUG "X.25: unknown facility %02X," + pr_debug("unknown facility %02X," "length %d\n", p[0], p[1]); break; } @@ -341,12 +343,12 @@ void x25_limit_facilities(struct x25_facilities *facilities, if (!nb->extended) { if (facilities->winsize_in > 7) { - printk(KERN_DEBUG "X.25: incoming winsize limited to 7\n"); + pr_debug("incoming winsize limited to 7\n"); facilities->winsize_in = 7; } if (facilities->winsize_out > 7) { facilities->winsize_out = 7; - printk( KERN_DEBUG "X.25: outgoing winsize limited to 7\n"); + pr_debug("outgoing winsize limited to 7\n"); } } } diff --git a/net/x25/x25_forward.c b/net/x25/x25_forward.c index c541b622ae1..cf561f1613e 100644 --- a/net/x25/x25_forward.c +++ b/net/x25/x25_forward.c @@ -8,6 +8,9 @@ * History * 03-01-2007 Added forwarding for x.25 Andrew Hendry */ + +#define pr_fmt(fmt) "X25: " fmt + #include <linux/if_arp.h> #include <linux/init.h> #include <linux/slab.h> @@ -51,7 +54,7 @@ int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from, list_for_each(entry, &x25_forward_list) { x25_frwd = list_entry(entry, struct x25_forward, node); if (x25_frwd->lci == lci) { - printk(KERN_WARNING "X.25: call request for lci which is already registered!, transmitting but not registering new pair\n"); + pr_warn("call request for lci which is already registered!, transmitting but not registering new pair\n"); same_lci = 1; } } diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index a49cd4ec551..d1b0dc79bb6 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -23,6 +23,8 @@ * i-frames. */ +#define pr_fmt(fmt) "X25: " fmt + #include <linux/slab.h> #include <linux/errno.h> #include <linux/kernel.h> @@ -317,7 +319,7 @@ static int x25_state3_machine(struct sock *sk, struct sk_buff *skb, int frametyp break; default: - printk(KERN_WARNING "x25: unknown %02X in state 3\n", frametype); + pr_warn("unknown %02X in state 3\n", frametype); break; } diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c index 4acacf3c661..fd5ffb25873 100644 --- a/net/x25/x25_link.c +++ b/net/x25/x25_link.c @@ -21,6 +21,8 @@ * 2000-09-04 Henner Eisen dev_hold() / dev_put() for x25_neigh. */ +#define pr_fmt(fmt) "X25: " fmt + #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/timer.h> @@ -93,13 +95,13 @@ void x25_link_control(struct sk_buff *skb, struct x25_neigh *nb, if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 4)) break; - printk(KERN_WARNING "x25: diagnostic #%d - %02X %02X %02X\n", + pr_warn("diagnostic #%d - %02X %02X %02X\n", skb->data[3], skb->data[4], skb->data[5], skb->data[6]); break; default: - printk(KERN_WARNING "x25: received unknown %02X with LCI 000\n", + pr_warn("received unknown %02X with LCI 000\n", frametype); break; } diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index 5170d52bfd9..6b5af65f491 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -23,6 +23,8 @@ * restriction on response. */ +#define pr_fmt(fmt) "X25: " fmt + #include <linux/slab.h> #include <linux/kernel.h> #include <linux/string.h> @@ -148,7 +150,7 @@ void x25_write_internal(struct sock *sk, int frametype) case X25_RESET_CONFIRMATION: break; default: - printk(KERN_ERR "X.25: invalid frame type %02X\n", frametype); + pr_err("invalid frame type %02X\n", frametype); return; } @@ -338,7 +340,7 @@ int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q, } } - printk(KERN_DEBUG "X.25: invalid PLP frame %02X %02X %02X\n", + pr_debug("invalid PLP frame %02X %02X %02X\n", frame[0], frame[1], frame[2]); return X25_ILLEGAL; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 88843996f93..6c7ac016ce3 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -67,7 +67,7 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) case IPPROTO_COMP: if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr))) return -EINVAL; - *spi = htonl(ntohs(*(__be16*)(skb_transport_header(skb) + 2))); + *spi = htonl(ntohs(*(__be16 *)(skb_transport_header(skb) + 2))); *seq = 0; return 0; default: @@ -77,8 +77,8 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) if (!pskb_may_pull(skb, hlen)) return -EINVAL; - *spi = *(__be32*)(skb_transport_header(skb) + offset); - *seq = *(__be32*)(skb_transport_header(skb) + offset_seq); + *spi = *(__be32 *)(skb_transport_header(skb) + offset); + *seq = *(__be32 *)(skb_transport_header(skb) + offset_seq); return 0; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 0d49945d0b9..4b98b25793c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -39,12 +39,7 @@ #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) #define XFRM_MAX_QUEUE_LEN 100 -DEFINE_MUTEX(xfrm_cfg_mutex); -EXPORT_SYMBOL(xfrm_cfg_mutex); - -static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock); static struct dst_entry *xfrm_policy_sk_bundles; -static DEFINE_RWLOCK(xfrm_policy_lock); static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] @@ -176,7 +171,7 @@ static inline unsigned long make_jiffies(long secs) static void xfrm_policy_timer(unsigned long data) { - struct xfrm_policy *xp = (struct xfrm_policy*)data; + struct xfrm_policy *xp = (struct xfrm_policy *)data; unsigned long now = get_seconds(); long next = LONG_MAX; int warn = 0; @@ -438,7 +433,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) if (!ndst) return; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); for (i = hmask; i >= 0; i--) xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); @@ -446,7 +441,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) net->xfrm.policy_bydst[dir].table = ndst; net->xfrm.policy_bydst[dir].hmask = nhashmask; - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); } @@ -463,7 +458,7 @@ static void xfrm_byidx_resize(struct net *net, int total) if (!nidx) return; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); for (i = hmask; i >= 0; i--) xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); @@ -471,7 +466,7 @@ static void xfrm_byidx_resize(struct net *net, int total) net->xfrm.policy_byidx = nidx; net->xfrm.policy_idx_hmask = nhashmask; - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); } @@ -504,7 +499,7 @@ static inline int xfrm_byidx_should_resize(struct net *net, int total) void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) { - read_lock_bh(&xfrm_policy_lock); + read_lock_bh(&net->xfrm.xfrm_policy_lock); si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN]; si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT]; si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; @@ -513,7 +508,7 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; si->spdhcnt = net->xfrm.policy_idx_hmask; si->spdhmcnt = xfrm_policy_hashmax; - read_unlock_bh(&xfrm_policy_lock); + read_unlock_bh(&net->xfrm.xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_spd_getinfo); @@ -538,7 +533,7 @@ static void xfrm_hash_resize(struct work_struct *work) /* Generate new index... KAME seems to generate them ordered by cost * of an absolute inpredictability of ordering of rules. This will not pass. */ -static u32 xfrm_gen_index(struct net *net, int dir) +static u32 xfrm_gen_index(struct net *net, int dir, u32 index) { static u32 idx_generator; @@ -548,8 +543,14 @@ static u32 xfrm_gen_index(struct net *net, int dir) u32 idx; int found; - idx = (idx_generator | dir); - idx_generator += 8; + if (!index) { + idx = (idx_generator | dir); + idx_generator += 8; + } else { + idx = index; + index = 0; + } + if (idx == 0) idx = 8; list = net->xfrm.policy_byidx + idx_hash(net, idx); @@ -630,7 +631,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) struct hlist_head *chain; struct hlist_node *newpos; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); delpol = NULL; newpos = NULL; @@ -641,7 +642,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { if (excl) { - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return -EEXIST; } delpol = pol; @@ -672,14 +673,14 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) xfrm_policy_requeue(delpol, policy); __xfrm_policy_unlink(delpol, dir); } - policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir); + policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index); hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index)); policy->curlft.add_time = get_seconds(); policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); list_add(&policy->walk.all, &net->xfrm.policy_all); - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (delpol) xfrm_policy_kill(delpol); @@ -699,7 +700,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, struct hlist_head *chain; *err = 0; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, sel, sel->family, dir); ret = NULL; hlist_for_each_entry(pol, chain, bydst) { @@ -712,7 +713,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, *err = security_xfrm_policy_delete( pol->security); if (*err) { - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } __xfrm_policy_unlink(pol, dir); @@ -721,7 +722,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, break; } } - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (ret && delete) xfrm_policy_kill(ret); @@ -740,7 +741,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, return NULL; *err = 0; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); chain = net->xfrm.policy_byidx + idx_hash(net, id); ret = NULL; hlist_for_each_entry(pol, chain, byidx) { @@ -751,7 +752,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, *err = security_xfrm_policy_delete( pol->security); if (*err) { - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } __xfrm_policy_unlink(pol, dir); @@ -760,7 +761,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, break; } } - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (ret && delete) xfrm_policy_kill(ret); @@ -823,7 +824,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) { int dir, err = 0, cnt = 0; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); err = xfrm_policy_flush_secctx_check(net, type, audit_info); if (err) @@ -839,7 +840,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) if (pol->type != type) continue; __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, @@ -848,7 +849,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) xfrm_policy_kill(pol); - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); goto again1; } @@ -860,7 +861,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) if (pol->type != type) continue; __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; xfrm_audit_policy_delete(pol, 1, @@ -869,7 +870,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) audit_info->secid); xfrm_policy_kill(pol); - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); goto again2; } } @@ -878,7 +879,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) if (!cnt) err = -ESRCH; out: - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return err; } EXPORT_SYMBOL(xfrm_policy_flush); @@ -898,7 +899,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, if (list_empty(&walk->walk.all) && walk->seq != 0) return 0; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); if (list_empty(&walk->walk.all)) x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); else @@ -924,7 +925,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, } list_del_init(&walk->walk.all); out: - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return error; } EXPORT_SYMBOL(xfrm_policy_walk); @@ -938,14 +939,14 @@ void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) } EXPORT_SYMBOL(xfrm_policy_walk_init); -void xfrm_policy_walk_done(struct xfrm_policy_walk *walk) +void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net) { if (list_empty(&walk->walk.all)) return; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */ list_del(&walk->walk.all); - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_policy_walk_done); @@ -990,7 +991,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, if (unlikely(!daddr || !saddr)) return NULL; - read_lock_bh(&xfrm_policy_lock); + read_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_direct(net, daddr, saddr, family, dir); ret = NULL; hlist_for_each_entry(pol, chain, bydst) { @@ -1026,7 +1027,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, if (ret) xfrm_pol_hold(ret); fail: - read_unlock_bh(&xfrm_policy_lock); + read_unlock_bh(&net->xfrm.xfrm_policy_lock); return ret; } @@ -1103,8 +1104,9 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, const struct flowi *fl) { struct xfrm_policy *pol; + struct net *net = sock_net(sk); - read_lock_bh(&xfrm_policy_lock); + read_lock_bh(&net->xfrm.xfrm_policy_lock); if ((pol = sk->sk_policy[dir]) != NULL) { bool match = xfrm_selector_match(&pol->selector, fl, sk->sk_family); @@ -1128,7 +1130,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, pol = NULL; } out: - read_unlock_bh(&xfrm_policy_lock); + read_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } @@ -1166,9 +1168,11 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int xfrm_policy_delete(struct xfrm_policy *pol, int dir) { - write_lock_bh(&xfrm_policy_lock); + struct net *net = xp_net(pol); + + write_lock_bh(&net->xfrm.xfrm_policy_lock); pol = __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (pol) { xfrm_policy_kill(pol); return 0; @@ -1187,12 +1191,12 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) return -EINVAL; #endif - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); old_pol = sk->sk_policy[dir]; sk->sk_policy[dir] = pol; if (pol) { pol->curlft.add_time = get_seconds(); - pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir); + pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0); __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); } if (old_pol) { @@ -1204,7 +1208,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) */ __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); } - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (old_pol) { xfrm_policy_kill(old_pol); @@ -1215,6 +1219,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) { struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC); + struct net *net = xp_net(old); if (newp) { newp->selector = old->selector; @@ -1233,9 +1238,9 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) newp->type = old->type; memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir); - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_pol_put(newp); } return newp; @@ -1281,7 +1286,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); xfrm_address_t tmp; - for (nx=0, i = 0; i < policy->xfrm_nr; i++) { + for (nx = 0, i = 0; i < policy->xfrm_nr; i++) { struct xfrm_state *x; xfrm_address_t *remote = daddr; xfrm_address_t *local = saddr; @@ -1311,9 +1316,9 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, error = (x->km.state == XFRM_STATE_ERROR ? -EINVAL : -EAGAIN); xfrm_state_put(x); - } - else if (error == -ESRCH) + } else if (error == -ESRCH) { error = -EAGAIN; + } if (!tmpl->optional) goto fail; @@ -1321,7 +1326,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, return nx; fail: - for (nx--; nx>=0; nx--) + for (nx--; nx >= 0; nx--) xfrm_state_put(xfrm[nx]); return error; } @@ -1358,7 +1363,7 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, return cnx; fail: - for (cnx--; cnx>=0; cnx--) + for (cnx--; cnx >= 0; cnx--) xfrm_state_put(tpp[cnx]); return error; @@ -1636,20 +1641,22 @@ free_dst: goto out; } -static int inline -xfrm_dst_alloc_copy(void **target, const void *src, int size) +#ifdef CONFIG_XFRM_SUB_POLICY +static int xfrm_dst_alloc_copy(void **target, const void *src, int size) { if (!*target) { *target = kmalloc(size, GFP_ATOMIC); if (!*target) return -ENOMEM; } + memcpy(*target, src, size); return 0; } +#endif -static int inline -xfrm_dst_update_parent(struct dst_entry *dst, const struct xfrm_selector *sel) +static int xfrm_dst_update_parent(struct dst_entry *dst, + const struct xfrm_selector *sel) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_dst *xdst = (struct xfrm_dst *)dst; @@ -1660,8 +1667,8 @@ xfrm_dst_update_parent(struct dst_entry *dst, const struct xfrm_selector *sel) #endif } -static int inline -xfrm_dst_update_origin(struct dst_entry *dst, const struct flowi *fl) +static int xfrm_dst_update_origin(struct dst_entry *dst, + const struct flowi *fl) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_dst *xdst = (struct xfrm_dst *)dst; @@ -1699,7 +1706,7 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, xfrm_pols_put(pols, *num_pols); return PTR_ERR(pols[1]); } - (*num_pols) ++; + (*num_pols)++; (*num_xfrms) += pols[1]->xfrm_nr; } } @@ -1753,7 +1760,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, } xdst->num_pols = num_pols; - memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols); + memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); xdst->policy_genid = atomic_read(&pols[0]->genid); return xdst; @@ -1896,8 +1903,7 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, if (IS_ERR(xdst)) return xdst; - if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0 || - (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP)) + if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0) return xdst; dst1 = &xdst->u.dst; @@ -2023,7 +2029,7 @@ make_dummy_bundle: } xdst->num_pols = num_pols; xdst->num_xfrms = num_xfrms; - memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols); + memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); dst_hold(&xdst->u.dst); return &xdst->flo; @@ -2072,7 +2078,6 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); int i, err, num_pols, num_xfrms = 0, drop_pols = 0; -restart: dst = NULL; xdst = NULL; route = NULL; @@ -2106,10 +2111,10 @@ restart: dst_hold(&xdst->u.dst); - spin_lock_bh(&xfrm_policy_sk_bundle_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock); xdst->u.dst.next = xfrm_policy_sk_bundles; xfrm_policy_sk_bundles = &xdst->u.dst; - spin_unlock_bh(&xfrm_policy_sk_bundle_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock); route = xdst->route; } @@ -2133,7 +2138,7 @@ restart: num_pols = xdst->num_pols; num_xfrms = xdst->num_xfrms; - memcpy(pols, xdst->pols, sizeof(struct xfrm_policy*) * num_pols); + memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols); route = xdst->route; } @@ -2152,23 +2157,8 @@ restart: return make_blackhole(net, family, dst_orig); } - if (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP) { - DECLARE_WAITQUEUE(wait, current); - add_wait_queue(&net->xfrm.km_waitq, &wait); - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - set_current_state(TASK_RUNNING); - remove_wait_queue(&net->xfrm.km_waitq, &wait); - - if (!signal_pending(current)) { - dst_release(dst); - goto restart; - } - - err = -ERESTART; - } else - err = -EAGAIN; + err = -EAGAIN; XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); goto error; @@ -2344,7 +2334,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (skb->sp) { int i; - for (i=skb->sp->len-1; i>=0; i--) { + for (i = skb->sp->len-1; i >= 0; i--) { struct xfrm_state *x = skb->sp->xvec[i]; if (!xfrm_selector_match(&x->sel, &fl, family)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); @@ -2390,7 +2380,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, pol->curlft.use_time = get_seconds(); pols[0] = pol; - npols ++; + npols++; #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, @@ -2402,7 +2392,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, return 0; } pols[1]->curlft.use_time = get_seconds(); - npols ++; + npols++; } } #endif @@ -2434,7 +2424,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } xfrm_nr = ti; if (npols > 1) { - xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); + xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net); tpp = stp; } @@ -2563,10 +2553,10 @@ static void __xfrm_garbage_collect(struct net *net) { struct dst_entry *head, *next; - spin_lock_bh(&xfrm_policy_sk_bundle_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock); head = xfrm_policy_sk_bundles; xfrm_policy_sk_bundles = NULL; - spin_unlock_bh(&xfrm_policy_sk_bundle_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock); while (head) { next = head->next; @@ -2950,6 +2940,13 @@ static int __net_init xfrm_net_init(struct net *net) rv = xfrm_sysctl_init(net); if (rv < 0) goto out_sysctl; + + /* Initialize the per-net locks here */ + spin_lock_init(&net->xfrm.xfrm_state_lock); + rwlock_init(&net->xfrm.xfrm_policy_lock); + spin_lock_init(&net->xfrm.xfrm_policy_sk_bundle_lock); + mutex_init(&net->xfrm.xfrm_cfg_mutex); + return 0; out_sysctl: @@ -2992,7 +2989,7 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str); - switch(sel->family) { + switch (sel->family) { case AF_INET: audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4); if (sel->prefixlen_s != 32) @@ -3069,15 +3066,15 @@ static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, return false; } -static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector *sel, - u8 dir, u8 type) +static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel, + u8 dir, u8 type, struct net *net) { struct xfrm_policy *pol, *ret = NULL; struct hlist_head *chain; u32 priority = ~0U; - read_lock_bh(&xfrm_policy_lock); - chain = policy_hash_direct(&init_net, &sel->daddr, &sel->saddr, sel->family, dir); + read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/ + chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); hlist_for_each_entry(pol, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type) { @@ -3086,7 +3083,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector break; } } - chain = &init_net.xfrm.policy_inexact[dir]; + chain = &net->xfrm.policy_inexact[dir]; hlist_for_each_entry(pol, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type && @@ -3099,7 +3096,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector if (ret) xfrm_pol_hold(ret); - read_unlock_bh(&xfrm_policy_lock); + read_unlock_bh(&net->xfrm.xfrm_policy_lock); return ret; } @@ -3210,7 +3207,7 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_migrate, - struct xfrm_kmaddress *k) + struct xfrm_kmaddress *k, struct net *net) { int i, err, nx_cur = 0, nx_new = 0; struct xfrm_policy *pol = NULL; @@ -3223,14 +3220,14 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, goto out; /* Stage 1 - find policy */ - if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) { + if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) { err = -ENOENT; goto out; } /* Stage 2 - find and update state(s) */ for (i = 0, mp = m; i < num_migrate; i++, mp++) { - if ((x = xfrm_migrate_state_find(mp))) { + if ((x = xfrm_migrate_state_find(mp, net))) { x_cur[nx_cur] = x; nx_cur++; if ((xc = xfrm_state_migrate(x, mp))) { diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 80cd1e55b83..fc5abd0b456 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -52,7 +52,7 @@ static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; int i; - for (i=0; xfrm_mib_list[i].name; i++) + for (i = 0; xfrm_mib_list[i].name; i++) seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, snmp_fold_field((void __percpu **) net->mib.xfrm_statistics, diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 8ed9d0dd456..a26b7aa7947 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -35,8 +35,6 @@ destination/tunnel endpoint. (output) */ -static DEFINE_SPINLOCK(xfrm_state_lock); - static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static inline unsigned int xfrm_dst_hash(struct net *net, @@ -127,7 +125,7 @@ static void xfrm_hash_resize(struct work_struct *work) goto out_unlock; } - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; for (i = net->xfrm.state_hmask; i >= 0; i--) @@ -144,7 +142,7 @@ static void xfrm_hash_resize(struct work_struct *work) net->xfrm.state_byspi = nspi; net->xfrm.state_hmask = nhashmask; - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); osize = (ohashmask + 1) * sizeof(struct hlist_head); xfrm_hash_free(odst, osize); @@ -374,8 +372,6 @@ static void xfrm_state_gc_task(struct work_struct *work) hlist_for_each_entry_safe(x, tmp, &gc_list, gclist) xfrm_state_gc_destroy(x); - - wake_up(&net->xfrm.km_waitq); } static inline unsigned long make_jiffies(long secs) @@ -386,11 +382,10 @@ static inline unsigned long make_jiffies(long secs) return secs*HZ; } -static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me) +static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me) { struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer); struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer); - struct net *net = xs_net(x); unsigned long now = get_seconds(); long next = LONG_MAX; int warn = 0; @@ -453,19 +448,15 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me) if (warn) km_state_expired(x, 0, 0); resched: - if (next != LONG_MAX){ + if (next != LONG_MAX) { tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL); } goto out; expired: - if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) { + if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) x->km.state = XFRM_STATE_EXPIRED; - wake_up(&net->xfrm.km_waitq); - next = 2; - goto resched; - } err = __xfrm_state_delete(x); if (!err) @@ -535,14 +526,14 @@ int __xfrm_state_delete(struct xfrm_state *x) if (x->km.state != XFRM_STATE_DEAD) { x->km.state = XFRM_STATE_DEAD; - spin_lock(&xfrm_state_lock); + spin_lock(&net->xfrm.xfrm_state_lock); list_del(&x->km.all); hlist_del(&x->bydst); hlist_del(&x->bysrc); if (x->id.spi) hlist_del(&x->byspi); net->xfrm.state_num--; - spin_unlock(&xfrm_state_lock); + spin_unlock(&net->xfrm.xfrm_state_lock); /* All xfrm_state objects are created by xfrm_state_alloc. * The xfrm_state_alloc call gives a reference, and that @@ -603,7 +594,7 @@ int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info) { int i, err = 0, cnt = 0; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); err = xfrm_state_flush_secctx_check(net, proto, audit_info); if (err) goto out; @@ -616,7 +607,7 @@ restart: if (!xfrm_state_kern(x) && xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); err = xfrm_state_delete(x); xfrm_audit_state_delete(x, err ? 0 : 1, @@ -627,7 +618,7 @@ restart: if (!err) cnt++; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); goto restart; } } @@ -636,19 +627,18 @@ restart: err = 0; out: - spin_unlock_bh(&xfrm_state_lock); - wake_up(&net->xfrm.km_waitq); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return err; } EXPORT_SYMBOL(xfrm_state_flush); void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) { - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); si->sadcnt = net->xfrm.state_num; si->sadhcnt = net->xfrm.state_hmask; si->sadhmcnt = xfrm_state_hashmax; - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); } EXPORT_SYMBOL(xfrm_sad_getinfo); @@ -801,7 +791,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, to_put = NULL; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.family == encap_family && @@ -886,7 +876,7 @@ out: xfrm_state_hold(x); else *err = acquire_in_progress ? -EAGAIN : error; - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); if (to_put) xfrm_state_put(to_put); return x; @@ -900,7 +890,7 @@ xfrm_stateonly_find(struct net *net, u32 mark, unsigned int h; struct xfrm_state *rx = NULL, *x = NULL; - spin_lock(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); h = xfrm_dst_hash(net, daddr, saddr, reqid, family); hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && @@ -918,13 +908,35 @@ xfrm_stateonly_find(struct net *net, u32 mark, if (rx) xfrm_state_hold(rx); - spin_unlock(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return rx; } EXPORT_SYMBOL(xfrm_stateonly_find); +struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi, + unsigned short family) +{ + struct xfrm_state *x; + struct xfrm_state_walk *w; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); + list_for_each_entry(w, &net->xfrm.state_all, all) { + x = container_of(w, struct xfrm_state, km); + if (x->props.family != family || + x->id.spi != spi) + continue; + + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + xfrm_state_hold(x); + return x; + } + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + return NULL; +} +EXPORT_SYMBOL(xfrm_state_lookup_byspi); + static void __xfrm_state_insert(struct xfrm_state *x) { struct net *net = xs_net(x); @@ -950,14 +962,12 @@ static void __xfrm_state_insert(struct xfrm_state *x) if (x->replay_maxage) mod_timer(&x->rtimer, jiffies + x->replay_maxage); - wake_up(&net->xfrm.km_waitq); - net->xfrm.state_num++; xfrm_hash_grow_check(net, x->bydst.next != NULL); } -/* xfrm_state_lock is held */ +/* net->xfrm.xfrm_state_lock is held */ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) { struct net *net = xs_net(xnew); @@ -980,14 +990,16 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) void xfrm_state_insert(struct xfrm_state *x) { - spin_lock_bh(&xfrm_state_lock); + struct net *net = xs_net(x); + + spin_lock_bh(&net->xfrm.xfrm_state_lock); __xfrm_state_bump_genids(x); __xfrm_state_insert(x); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); } EXPORT_SYMBOL(xfrm_state_insert); -/* xfrm_state_lock is held */ +/* net->xfrm.xfrm_state_lock is held */ static struct xfrm_state *__find_acq_core(struct net *net, const struct xfrm_mark *m, unsigned short family, u8 mode, @@ -1079,7 +1091,7 @@ int xfrm_state_add(struct xfrm_state *x) to_put = NULL; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); x1 = __xfrm_state_locate(x, use_spi, family); if (x1) { @@ -1108,7 +1120,7 @@ int xfrm_state_add(struct xfrm_state *x) err = 0; out: - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); if (x1) { xfrm_state_delete(x1); @@ -1203,16 +1215,16 @@ out: return NULL; } -/* xfrm_state_lock is held */ -struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) +/* net->xfrm.xfrm_state_lock is held */ +struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net) { unsigned int h; struct xfrm_state *x; if (m->reqid) { - h = xfrm_dst_hash(&init_net, &m->old_daddr, &m->old_saddr, + h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr, m->reqid, m->old_family); - hlist_for_each_entry(x, init_net.xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -1227,9 +1239,9 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) return x; } } else { - h = xfrm_src_hash(&init_net, &m->old_daddr, &m->old_saddr, + h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr, m->old_family); - hlist_for_each_entry(x, init_net.xfrm.state_bysrc+h, bysrc) { + hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -1247,8 +1259,8 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) } EXPORT_SYMBOL(xfrm_migrate_state_find); -struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x, - struct xfrm_migrate *m) +struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, + struct xfrm_migrate *m) { struct xfrm_state *xc; int err; @@ -1283,10 +1295,11 @@ int xfrm_state_update(struct xfrm_state *x) struct xfrm_state *x1, *to_put; int err; int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); + struct net *net = xs_net(x); to_put = NULL; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); x1 = __xfrm_state_locate(x, use_spi, x->props.family); err = -ESRCH; @@ -1306,7 +1319,7 @@ int xfrm_state_update(struct xfrm_state *x) err = 0; out: - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); if (to_put) xfrm_state_put(to_put); @@ -1357,7 +1370,7 @@ int xfrm_state_check_expire(struct xfrm_state *x) if (x->curlft.bytes >= x->lft.hard_byte_limit || x->curlft.packets >= x->lft.hard_packet_limit) { x->km.state = XFRM_STATE_EXPIRED; - tasklet_hrtimer_start(&x->mtimer, ktime_set(0,0), HRTIMER_MODE_REL); + tasklet_hrtimer_start(&x->mtimer, ktime_set(0, 0), HRTIMER_MODE_REL); return -EINVAL; } @@ -1377,9 +1390,9 @@ xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 { struct xfrm_state *x; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; } EXPORT_SYMBOL(xfrm_state_lookup); @@ -1391,9 +1404,9 @@ xfrm_state_lookup_byaddr(struct net *net, u32 mark, { struct xfrm_state *x; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; } EXPORT_SYMBOL(xfrm_state_lookup_byaddr); @@ -1405,9 +1418,9 @@ xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid, { struct xfrm_state *x; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); x = __find_acq_core(net, mark, family, mode, reqid, proto, daddr, saddr, create); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; } @@ -1416,17 +1429,17 @@ EXPORT_SYMBOL(xfrm_find_acq); #ifdef CONFIG_XFRM_SUB_POLICY int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, - unsigned short family) + unsigned short family, struct net *net) { int err = 0; struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); if (!afinfo) return -EAFNOSUPPORT; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/ if (afinfo->tmpl_sort) err = afinfo->tmpl_sort(dst, src, n); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); xfrm_state_put_afinfo(afinfo); return err; } @@ -1438,13 +1451,15 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, { int err = 0; struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + struct net *net = xs_net(*dst); + if (!afinfo) return -EAFNOSUPPORT; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); if (afinfo->state_sort) err = afinfo->state_sort(dst, src, n); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); xfrm_state_put_afinfo(afinfo); return err; } @@ -1476,9 +1491,9 @@ struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq) { struct xfrm_state *x; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); x = __xfrm_find_acq_byseq(net, mark, seq); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; } EXPORT_SYMBOL(xfrm_find_acq_byseq); @@ -1496,6 +1511,30 @@ u32 xfrm_get_acqseq(void) } EXPORT_SYMBOL(xfrm_get_acqseq); +int verify_spi_info(u8 proto, u32 min, u32 max) +{ + switch (proto) { + case IPPROTO_AH: + case IPPROTO_ESP: + break; + + case IPPROTO_COMP: + /* IPCOMP spi is 16-bits. */ + if (max >= 0x10000) + return -EINVAL; + break; + + default: + return -EINVAL; + } + + if (min > max) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(verify_spi_info); + int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) { struct net *net = xs_net(x); @@ -1525,8 +1564,8 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) x->id.spi = minspi; } else { u32 spi = 0; - for (h=0; h<high-low+1; h++) { - spi = low + net_random()%(high-low+1); + for (h = 0; h < high-low+1; h++) { + spi = low + prandom_u32()%(high-low+1); x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family); if (x0 == NULL) { x->id.spi = htonl(spi); @@ -1536,10 +1575,10 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) } } if (x->id.spi) { - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); err = 0; } @@ -1562,7 +1601,7 @@ int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, if (walk->seq != 0 && list_empty(&walk->all)) return 0; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); if (list_empty(&walk->all)) x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all); else @@ -1586,7 +1625,7 @@ int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, } list_del_init(&walk->all); out: - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); return err; } EXPORT_SYMBOL(xfrm_state_walk); @@ -1600,20 +1639,20 @@ void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto) } EXPORT_SYMBOL(xfrm_state_walk_init); -void xfrm_state_walk_done(struct xfrm_state_walk *walk) +void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net) { if (list_empty(&walk->all)) return; - spin_lock_bh(&xfrm_state_lock); + spin_lock_bh(&net->xfrm.xfrm_state_lock); list_del(&walk->all); - spin_unlock_bh(&xfrm_state_lock); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); } EXPORT_SYMBOL(xfrm_state_walk_done); static void xfrm_replay_timer_handler(unsigned long data) { - struct xfrm_state *x = (struct xfrm_state*)data; + struct xfrm_state *x = (struct xfrm_state *)data; spin_lock(&x->lock); @@ -1655,16 +1694,12 @@ EXPORT_SYMBOL(km_state_notify); void km_state_expired(struct xfrm_state *x, int hard, u32 portid) { - struct net *net = xs_net(x); struct km_event c; c.data.hard = hard; c.portid = portid; c.event = XFRM_MSG_EXPIRE; km_state_notify(x, &c); - - if (hard) - wake_up(&net->xfrm.km_waitq); } EXPORT_SYMBOL(km_state_expired); @@ -1707,16 +1742,12 @@ EXPORT_SYMBOL(km_new_mapping); void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid) { - struct net *net = xp_net(pol); struct km_event c; c.data.hard = hard; c.portid = portid; c.event = XFRM_MSG_POLEXPIRE; km_policy_notify(pol, dir, &c); - - if (hard) - wake_up(&net->xfrm.km_waitq); } EXPORT_SYMBOL(km_policy_expired); @@ -2025,7 +2056,7 @@ int __net_init xfrm_state_init(struct net *net) INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); INIT_HLIST_HEAD(&net->xfrm.state_gc_list); INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task); - init_waitqueue_head(&net->xfrm.km_waitq); + spin_lock_init(&net->xfrm.xfrm_state_lock); return 0; out_byspi: @@ -2070,7 +2101,7 @@ static void xfrm_audit_helper_sainfo(struct xfrm_state *x, audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str); - switch(x->props.family) { + switch (x->props.family) { case AF_INET: audit_log_format(audit_buf, " src=%pI4 dst=%pI4", &x->props.saddr.a4, &x->id.daddr.a4); @@ -2100,7 +2131,7 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family, iph6 = ipv6_hdr(skb); audit_log_format(audit_buf, " src=%pI6 dst=%pI6 flowlbl=0x%x%02x%02x", - &iph6->saddr,&iph6->daddr, + &iph6->saddr, &iph6->daddr, iph6->flow_lbl[0] & 0x0f, iph6->flow_lbl[1], iph6->flow_lbl[2]); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ec97e13743e..1ae3ec7c18b 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -181,7 +181,9 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_CRYPT] || attrs[XFRMA_ALG_COMP] || - attrs[XFRMA_TFCPAD]) + attrs[XFRMA_TFCPAD] || + (ntohl(p->id.spi) >= 0x10000)) + goto out; break; @@ -877,7 +879,10 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) static int xfrm_dump_sa_done(struct netlink_callback *cb) { struct xfrm_state_walk *walk = (struct xfrm_state_walk *) &cb->args[1]; - xfrm_state_walk_done(walk); + struct sock *sk = cb->skb->sk; + struct net *net = sock_net(sk); + + xfrm_state_walk_done(walk, net); return 0; } @@ -1074,29 +1079,6 @@ out_noput: return err; } -static int verify_userspi_info(struct xfrm_userspi_info *p) -{ - switch (p->info.id.proto) { - case IPPROTO_AH: - case IPPROTO_ESP: - break; - - case IPPROTO_COMP: - /* IPCOMP spi is 16-bits. */ - if (p->max >= 0x10000) - return -EINVAL; - break; - - default: - return -EINVAL; - } - - if (p->min > p->max) - return -EINVAL; - - return 0; -} - static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { @@ -1111,7 +1093,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_mark m; p = nlmsg_data(nlh); - err = verify_userspi_info(p); + err = verify_spi_info(p->info.id.proto, p->min, p->max); if (err) goto out_noput; @@ -1189,6 +1171,8 @@ static int verify_policy_type(u8 type) static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) { + int ret; + switch (p->share) { case XFRM_SHARE_ANY: case XFRM_SHARE_SESSION: @@ -1224,7 +1208,13 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) return -EINVAL; } - return verify_policy_dir(p->dir); + ret = verify_policy_dir(p->dir); + if (ret) + return ret; + if (p->index && ((p->index & XFRM_POLICY_MAX) != p->dir)) + return -EINVAL; + + return 0; } static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct nlattr **attrs) @@ -1547,8 +1537,9 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr static int xfrm_dump_policy_done(struct netlink_callback *cb) { struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; + struct net *net = sock_net(cb->skb->sk); - xfrm_policy_walk_done(walk); + xfrm_policy_walk_done(walk, net); return 0; } @@ -1740,11 +1731,11 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct return -EMSGSIZE; id = nlmsg_data(nlh); - memcpy(&id->sa_id.daddr, &x->id.daddr,sizeof(x->id.daddr)); + memcpy(&id->sa_id.daddr, &x->id.daddr, sizeof(x->id.daddr)); id->sa_id.spi = x->id.spi; id->sa_id.family = x->props.family; id->sa_id.proto = x->id.proto; - memcpy(&id->saddr, &x->props.saddr,sizeof(x->props.saddr)); + memcpy(&id->saddr, &x->props.saddr, sizeof(x->props.saddr)); id->reqid = x->props.reqid; id->flags = c->data.aevent; @@ -1833,7 +1824,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct xfrm_state *x; struct km_event c; - int err = - EINVAL; + int err = -EINVAL; u32 mark = 0; struct xfrm_mark m; struct xfrm_aevent_id *p = nlmsg_data(nlh); @@ -2129,6 +2120,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, u8 type; int err; int n = 0; + struct net *net = sock_net(skb->sk); if (attrs[XFRMA_MIGRATE] == NULL) return -EINVAL; @@ -2146,7 +2138,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, if (!n) return 0; - xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp); + xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net); return 0; } @@ -2394,9 +2386,11 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) static void xfrm_netlink_rcv(struct sk_buff *skb) { - mutex_lock(&xfrm_cfg_mutex); + struct net *net = sock_net(skb->sk); + + mutex_lock(&net->xfrm.xfrm_cfg_mutex); netlink_rcv_skb(skb, &xfrm_user_rcv_msg); - mutex_unlock(&xfrm_cfg_mutex); + mutex_unlock(&net->xfrm.xfrm_cfg_mutex); } static inline size_t xfrm_expire_msgsize(void) |