aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c1
-rw-r--r--net/8021q/vlan_core.c5
-rw-r--r--net/8021q/vlan_dev.c69
-rw-r--r--net/appletalk/ddp.c5
-rw-r--r--net/atm/pppoatm.c2
-rw-r--r--net/atm/svc.c10
-rw-r--r--net/batman-adv/bat_iv_ogm.c2
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c44
-rw-r--r--net/batman-adv/distributed-arp-table.c5
-rw-r--r--net/batman-adv/fragmentation.c11
-rw-r--r--net/batman-adv/gateway_client.c11
-rw-r--r--net/batman-adv/hard-interface.c2
-rw-r--r--net/batman-adv/multicast.c6
-rw-r--r--net/batman-adv/network-coding.c2
-rw-r--r--net/batman-adv/originator.c62
-rw-r--r--net/batman-adv/soft-interface.c60
-rw-r--r--net/batman-adv/translation-table.c26
-rw-r--r--net/batman-adv/types.h2
-rw-r--r--net/bluetooth/6lowpan.c65
-rw-r--r--net/bluetooth/hci_conn.c22
-rw-r--r--net/bluetooth/hci_core.c64
-rw-r--r--net/bluetooth/hci_event.c86
-rw-r--r--net/bluetooth/hci_sock.c6
-rw-r--r--net/bluetooth/l2cap_core.c18
-rw-r--r--net/bluetooth/l2cap_sock.c10
-rw-r--r--net/bluetooth/mgmt.c346
-rw-r--r--net/bluetooth/rfcomm/core.c2
-rw-r--r--net/bluetooth/rfcomm/tty.c20
-rw-r--r--net/bluetooth/smp.c225
-rw-r--r--net/bluetooth/smp.h30
-rw-r--r--net/bridge/Makefile4
-rw-r--r--net/bridge/br.c98
-rw-r--r--net/bridge/br_device.c7
-rw-r--r--net/bridge/br_fdb.c11
-rw-r--r--net/bridge/br_if.c5
-rw-r--r--net/bridge/br_input.c12
-rw-r--r--net/bridge/br_mdb.c4
-rw-r--r--net/bridge/br_multicast.c382
-rw-r--r--net/bridge/br_notify.c118
-rw-r--r--net/bridge/br_private.h63
-rw-r--r--net/bridge/br_sysfs_br.c26
-rw-r--r--net/bridge/br_vlan.c179
-rw-r--r--net/bridge/netfilter/Kconfig17
-rw-r--r--net/bridge/netfilter/Makefile1
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c139
-rw-r--r--net/ceph/ceph_common.c2
-rw-r--r--net/ceph/debugfs.c8
-rw-r--r--net/ceph/messenger.c20
-rw-r--r--net/ceph/mon_client.c150
-rw-r--r--net/ceph/osd_client.c2
-rw-r--r--net/ceph/osdmap.c5
-rw-r--r--net/ceph/pagevec.c35
-rw-r--r--net/compat.c9
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/datagram.c26
-rw-r--r--net/core/dev.c199
-rw-r--r--net/core/dev_addr_lists.c85
-rw-r--r--net/core/dst.c16
-rw-r--r--net/core/ethtool.c110
-rw-r--r--net/core/filter.c997
-rw-r--r--net/core/iovec.c61
-rw-r--r--net/core/link_watch.c2
-rw-r--r--net/core/neighbour.c15
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/netclassid_cgroup.c2
-rw-r--r--net/core/netprio_cgroup.c14
-rw-r--r--net/core/ptp_classifier.c2
-rw-r--r--net/core/rtnetlink.c106
-rw-r--r--net/core/secure_seq.c25
-rw-r--r--net/core/skbuff.c31
-rw-r--r--net/core/sock.c4
-rw-r--r--net/core/tso.c77
-rw-r--r--net/core/utils.c8
-rw-r--r--net/dccp/ipv4.c1
-rw-r--r--net/dccp/timer.c2
-rw-r--r--net/decnet/af_decnet.c2
-rw-r--r--net/dns_resolver/dns_query.c8
-rw-r--r--net/dsa/dsa.c3
-rw-r--r--net/ieee802154/6lowpan_rtnl.c4
-rw-r--r--net/ieee802154/nl-mac.c6
-rw-r--r--net/ipv4/af_inet.c12
-rw-r--r--net/ipv4/datagram.c20
-rw-r--r--net/ipv4/gre_demux.c4
-rw-r--r--net/ipv4/gre_offload.c19
-rw-r--r--net/ipv4/icmp.c2
-rw-r--r--net/ipv4/igmp.c14
-rw-r--r--net/ipv4/inetpeer.c20
-rw-r--r--net/ipv4/ip_gre.c1
-rw-r--r--net/ipv4/ip_options.c4
-rw-r--r--net/ipv4/ip_output.c7
-rw-r--r--net/ipv4/ip_tunnel.c46
-rw-r--r--net/ipv4/ip_tunnel_core.c10
-rw-r--r--net/ipv4/ip_vti.c13
-rw-r--r--net/ipv4/ipip.c5
-rw-r--r--net/ipv4/ipmr.c4
-rw-r--r--net/ipv4/netfilter/iptable_nat.c14
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c12
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c80
-rw-r--r--net/ipv4/tcp.c3
-rw-r--r--net/ipv4/tcp_fastopen.c2
-rw-r--r--net/ipv4/tcp_input.c26
-rw-r--r--net/ipv4/tcp_memcontrol.c31
-rw-r--r--net/ipv4/tcp_metrics.c5
-rw-r--r--net/ipv4/tcp_offload.c11
-rw-r--r--net/ipv4/tcp_output.c66
-rw-r--r--net/ipv4/udp.c117
-rw-r--r--net/ipv4/udp_offload.c8
-rw-r--r--net/ipv4/udplite.c1
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c2
-rw-r--r--net/ipv4/xfrm4_output.c32
-rw-r--r--net/ipv4/xfrm4_protocol.c19
-rw-r--r--net/ipv6/af_inet6.c3
-rw-r--r--net/ipv6/ip6_checksum.c38
-rw-r--r--net/ipv6/ip6_offload.c8
-rw-r--r--net/ipv6/ip6_output.c16
-rw-r--r--net/ipv6/ip6_tunnel.c3
-rw-r--r--net/ipv6/ip6_vti.c8
-rw-r--r--net/ipv6/mcast.c13
-rw-r--r--net/ipv6/ndisc.c7
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c14
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c12
-rw-r--r--net/ipv6/output_core.c32
-rw-r--r--net/ipv6/ping.c1
-rw-r--r--net/ipv6/raw.c1
-rw-r--r--net/ipv6/route.c26
-rw-r--r--net/ipv6/sit.c5
-rw-r--r--net/ipv6/tcp_ipv6.c1
-rw-r--r--net/ipv6/tcpv6_offload.c4
-rw-r--r--net/ipv6/udp.c32
-rw-r--r--net/ipv6/udp_offload.c5
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_output.c22
-rw-r--r--net/ipv6/xfrm6_protocol.c11
-rw-r--r--net/ipx/af_ipx.c2
-rw-r--r--net/ipx/ipx_route.c3
-rw-r--r--net/iucv/af_iucv.c34
-rw-r--r--net/key/af_key.c34
-rw-r--r--net/l2tp/l2tp_core.c59
-rw-r--r--net/l2tp/l2tp_core.h4
-rw-r--r--net/l2tp/l2tp_ip.c1
-rw-r--r--net/l2tp/l2tp_ip6.c1
-rw-r--r--net/l2tp/l2tp_netlink.c10
-rw-r--r--net/l2tp/l2tp_ppp.c4
-rw-r--r--net/mac80211/Makefile3
-rw-r--r--net/mac80211/cfg.c541
-rw-r--r--net/mac80211/chan.c4
-rw-r--r--net/mac80211/debugfs_netdev.c6
-rw-r--r--net/mac80211/driver-ops.h174
-rw-r--r--net/mac80211/ibss.c3
-rw-r--r--net/mac80211/ieee80211_i.h18
-rw-r--r--net/mac80211/iface.c10
-rw-r--r--net/mac80211/key.c7
-rw-r--r--net/mac80211/main.c2
-rw-r--r--net/mac80211/mesh.c2
-rw-r--r--net/mac80211/mesh_pathtbl.c6
-rw-r--r--net/mac80211/mesh_sync.c2
-rw-r--r--net/mac80211/mlme.c66
-rw-r--r--net/mac80211/offchannel.c27
-rw-r--r--net/mac80211/rc80211_minstrel.c12
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c22
-rw-r--r--net/mac80211/scan.c15
-rw-r--r--net/mac80211/sta_info.c1
-rw-r--r--net/mac80211/status.c25
-rw-r--r--net/mac80211/tdls.c325
-rw-r--r--net/mac80211/trace.h121
-rw-r--r--net/mac80211/tx.c209
-rw-r--r--net/mac80211/util.c51
-rw-r--r--net/mac80211/vht.c9
-rw-r--r--net/mac802154/llsec.c7
-rw-r--r--net/mac802154/monitor.c3
-rw-r--r--net/mac802154/rx.c11
-rw-r--r--net/mac802154/wpan.c14
-rw-r--r--net/mpls/mpls_gso.c1
-rw-r--r--net/netfilter/ipset/ip_set_core.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c17
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c20
-rw-r--r--net/netfilter/nf_nat_core.c59
-rw-r--r--net/netfilter/nf_tables_api.c1384
-rw-r--r--net/netfilter/nf_tables_core.c51
-rw-r--r--net/netfilter/nfnetlink_acct.c86
-rw-r--r--net/netfilter/nft_compat.c18
-rw-r--r--net/netfilter/nft_ct.c96
-rw-r--r--net/netfilter/nft_hash.c59
-rw-r--r--net/netfilter/nft_lookup.c10
-rw-r--r--net/netfilter/nft_meta.c103
-rw-r--r--net/netfilter/nft_nat.c14
-rw-r--r--net/netfilter/nft_rbtree.c43
-rw-r--r--net/netfilter/xt_bpf.c5
-rw-r--r--net/netfilter/xt_nfacct.c5
-rw-r--r--net/netfilter/xt_recent.c5
-rw-r--r--net/netfilter/xt_repldata.h22
-rw-r--r--net/netlink/af_netlink.c11
-rw-r--r--net/netlink/genetlink.c6
-rw-r--r--net/nfc/digital.h1
-rw-r--r--net/nfc/digital_core.c26
-rw-r--r--net/nfc/digital_dep.c5
-rw-r--r--net/nfc/digital_technology.c230
-rw-r--r--net/nfc/hci/command.c6
-rw-r--r--net/nfc/hci/core.c47
-rw-r--r--net/nfc/llcp_commands.c2
-rw-r--r--net/nfc/llcp_core.c13
-rw-r--r--net/nfc/nci/core.c9
-rw-r--r--net/nfc/nci/ntf.c7
-rw-r--r--net/nfc/nfc.h6
-rw-r--r--net/nfc/rawsock.c94
-rw-r--r--net/openvswitch/actions.c2
-rw-r--r--net/openvswitch/datapath.c782
-rw-r--r--net/openvswitch/flow.c55
-rw-r--r--net/openvswitch/flow.h38
-rw-r--r--net/openvswitch/flow_netlink.c112
-rw-r--r--net/openvswitch/flow_table.c62
-rw-r--r--net/openvswitch/flow_table.h3
-rw-r--r--net/openvswitch/vport-gre.c17
-rw-r--r--net/openvswitch/vport-vxlan.c2
-rw-r--r--net/rds/ib_recv.c4
-rw-r--r--net/rds/iw_recv.c4
-rw-r--r--net/rds/rdma_transport.c2
-rw-r--r--net/rds/send.c6
-rw-r--r--net/rds/tcp_listen.c2
-rw-r--r--net/rds/tcp_send.c2
-rw-r--r--net/rfkill/rfkill-gpio.c59
-rw-r--r--net/rxrpc/ar-key.c2
-rw-r--r--net/sched/cls_bpf.c4
-rw-r--r--net/sched/cls_tcindex.c30
-rw-r--r--net/sched/cls_u32.c19
-rw-r--r--net/sched/sch_api.c2
-rw-r--r--net/sched/sch_choke.c7
-rw-r--r--net/sched/sch_drr.c4
-rw-r--r--net/sched/sch_fq.c5
-rw-r--r--net/sched/sch_fq_codel.c7
-rw-r--r--net/sched/sch_hhf.c7
-rw-r--r--net/sched/sch_netem.c7
-rw-r--r--net/sched/sch_sfq.c7
-rw-r--r--net/sctp/associola.c171
-rw-r--r--net/sctp/endpointola.c2
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/sctp/socket.c3
-rw-r--r--net/sctp/sysctl.c78
-rw-r--r--net/sctp/transport.c2
-rw-r--r--net/sctp/ulpevent.c122
-rw-r--r--net/sunrpc/auth.c3
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c2
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c4
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c2
-rw-r--r--net/sunrpc/backchannel_rqst.c4
-rw-r--r--net/sunrpc/cache.c2
-rw-r--r--net/sunrpc/sched.c5
-rw-r--r--net/sunrpc/socklib.c3
-rw-r--r--net/sunrpc/sunrpc.h13
-rw-r--r--net/sunrpc/svc_xprt.c5
-rw-r--r--net/sunrpc/svcauth.c2
-rw-r--r--net/sunrpc/svcsock.c17
-rw-r--r--net/sunrpc/xdr.c174
-rw-r--r--net/sunrpc/xprt.c32
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c119
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c643
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c230
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c69
-rw-r--r--net/sunrpc/xprtrdma/transport.c90
-rw-r--r--net/sunrpc/xprtrdma/verbs.c753
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h17
-rw-r--r--net/sunrpc/xprtsock.c19
-rw-r--r--net/tipc/bcast.c3
-rw-r--r--net/tipc/msg.c11
-rw-r--r--net/tipc/socket.c13
-rw-r--r--net/unix/af_unix.c2
-rw-r--r--net/wireless/ap.c4
-rw-r--r--net/wireless/chan.c5
-rw-r--r--net/wireless/core.c72
-rw-r--r--net/wireless/core.h9
-rw-r--r--net/wireless/genregdb.awk14
-rw-r--r--net/wireless/ibss.c2
-rw-r--r--net/wireless/mesh.c4
-rw-r--r--net/wireless/nl80211.c172
-rw-r--r--net/wireless/nl80211.h3
-rw-r--r--net/wireless/rdev-ops.h2
-rw-r--r--net/wireless/reg.c25
-rw-r--r--net/wireless/scan.c12
-rw-r--r--net/wireless/sme.c8
-rw-r--r--net/wireless/trace.h40
-rw-r--r--net/wireless/util.c38
-rw-r--r--net/xfrm/xfrm_output.c5
-rw-r--r--net/xfrm/xfrm_policy.c48
-rw-r--r--net/xfrm/xfrm_state.c37
-rw-r--r--net/xfrm/xfrm_user.c96
292 files changed, 8641 insertions, 5351 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 175273f38cb..44ebd5c2cd4 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -169,6 +169,7 @@ int register_vlan_dev(struct net_device *dev)
if (err < 0)
goto out_uninit_mvrp;
+ vlan->nest_level = dev_get_nest_level(real_dev, is_vlan_dev) + 1;
err = register_netdevice(dev);
if (err < 0)
goto out_uninit_mvrp;
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 9012b1c922b..75d42776399 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -114,8 +114,11 @@ EXPORT_SYMBOL(vlan_dev_vlan_proto);
static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
{
- if (skb_cow(skb, skb_headroom(skb)) < 0)
+ if (skb_cow(skb, skb_headroom(skb)) < 0) {
+ kfree_skb(skb);
return NULL;
+ }
+
memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
skb->mac_header += VLAN_HLEN;
return skb;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 8f025afa29f..dd11f612e03 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -493,48 +493,10 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
}
}
-static int vlan_calculate_locking_subclass(struct net_device *real_dev)
-{
- int subclass = 0;
-
- while (is_vlan_dev(real_dev)) {
- subclass++;
- real_dev = vlan_dev_priv(real_dev)->real_dev;
- }
-
- return subclass;
-}
-
-static void vlan_dev_mc_sync(struct net_device *to, struct net_device *from)
-{
- int err = 0, subclass;
-
- subclass = vlan_calculate_locking_subclass(to);
-
- spin_lock_nested(&to->addr_list_lock, subclass);
- err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
- if (!err)
- __dev_set_rx_mode(to);
- spin_unlock(&to->addr_list_lock);
-}
-
-static void vlan_dev_uc_sync(struct net_device *to, struct net_device *from)
-{
- int err = 0, subclass;
-
- subclass = vlan_calculate_locking_subclass(to);
-
- spin_lock_nested(&to->addr_list_lock, subclass);
- err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
- if (!err)
- __dev_set_rx_mode(to);
- spin_unlock(&to->addr_list_lock);
-}
-
static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
{
- vlan_dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
- vlan_dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
+ dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
+ dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
}
/*
@@ -562,6 +524,11 @@ static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass)
netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, &subclass);
}
+static int vlan_dev_get_lock_subclass(struct net_device *dev)
+{
+ return vlan_dev_priv(dev)->nest_level;
+}
+
static const struct header_ops vlan_header_ops = {
.create = vlan_dev_hard_header,
.rebuild = vlan_dev_rebuild_header,
@@ -597,7 +564,6 @@ static const struct net_device_ops vlan_netdev_ops;
static int vlan_dev_init(struct net_device *dev)
{
struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
- int subclass = 0;
netif_carrier_off(dev);
@@ -646,8 +612,7 @@ static int vlan_dev_init(struct net_device *dev)
SET_NETDEV_DEVTYPE(dev, &vlan_type);
- subclass = vlan_calculate_locking_subclass(dev);
- vlan_dev_set_lockdep_class(dev, subclass);
+ vlan_dev_set_lockdep_class(dev, vlan_dev_get_lock_subclass(dev));
vlan_dev_priv(dev)->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats);
if (!vlan_dev_priv(dev)->vlan_pcpu_stats)
@@ -662,8 +627,6 @@ static void vlan_dev_uninit(struct net_device *dev)
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
int i;
- free_percpu(vlan->vlan_pcpu_stats);
- vlan->vlan_pcpu_stats = NULL;
for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) {
while ((pm = vlan->egress_priority_map[i]) != NULL) {
vlan->egress_priority_map[i] = pm->next;
@@ -678,9 +641,9 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
netdev_features_t old_features = features;
- features &= real_dev->vlan_features;
+ features = netdev_intersect_features(features, real_dev->vlan_features);
features |= NETIF_F_RXCSUM;
- features &= real_dev->features;
+ features = netdev_intersect_features(features, real_dev->features);
features |= old_features & NETIF_F_SOFT_FEATURES;
features |= NETIF_F_LLTX;
@@ -817,8 +780,18 @@ static const struct net_device_ops vlan_netdev_ops = {
.ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup,
#endif
.ndo_fix_features = vlan_dev_fix_features,
+ .ndo_get_lock_subclass = vlan_dev_get_lock_subclass,
};
+static void vlan_dev_free(struct net_device *dev)
+{
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+
+ free_percpu(vlan->vlan_pcpu_stats);
+ vlan->vlan_pcpu_stats = NULL;
+ free_netdev(dev);
+}
+
void vlan_setup(struct net_device *dev)
{
ether_setup(dev);
@@ -828,7 +801,7 @@ void vlan_setup(struct net_device *dev)
dev->tx_queue_len = 0;
dev->netdev_ops = &vlan_netdev_ops;
- dev->destructor = free_netdev;
+ dev->destructor = vlan_dev_free;
dev->ethtool_ops = &vlan_ethtool_ops;
memset(dev->broadcast, 0, ETH_ALEN);
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 786ee2f83d5..bfcf6be1d66 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1489,8 +1489,6 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
goto drop;
/* Queue packet (standard) */
- skb->sk = sock;
-
if (sock_queue_rcv_skb(sock, skb) < 0)
goto drop;
@@ -1644,7 +1642,6 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
if (!skb)
goto out;
- skb->sk = sk;
skb_reserve(skb, ddp_dl->header_length);
skb_reserve(skb, dev->hard_header_len);
skb->dev = dev;
@@ -1669,7 +1666,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
goto out;
}
- if (sk->sk_no_check == 1)
+ if (sk->sk_no_check_tx)
ddp->deh_sum = 0;
else
ddp->deh_sum = atalk_checksum(skb, len + sizeof(*ddp));
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 8c93267ce96..c4e09846d1d 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -252,7 +252,7 @@ static int pppoatm_may_send(struct pppoatm_vcc *pvcc, int size)
* we need to ensure there's a memory barrier after it. The bit
* *must* be set before we do the atomic_inc() on pvcc->inflight.
* There's no smp_mb__after_set_bit(), so it's this or abuse
- * smp_mb__after_clear_bit().
+ * smp_mb__after_atomic().
*/
test_and_set_bit(BLOCKED, &pvcc->blocked);
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 1281049c135..d8e5d0c2ebb 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -263,17 +263,11 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr,
goto out;
}
}
-/*
- * Not supported yet
- *
- * #ifndef CONFIG_SINGLE_SIGITF
- */
+
vcc->qos.txtp.max_pcr = SELECT_TOP_PCR(vcc->qos.txtp);
vcc->qos.txtp.pcr = 0;
vcc->qos.txtp.min_pcr = 0;
-/*
- * #endif
- */
+
error = vcc_connect(sock, vcc->itf, vcc->vpi, vcc->vci);
if (!error)
sock->state = SS_CONNECTED;
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index b3bd4ec3fd9..f04224c3200 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1545,6 +1545,8 @@ out_neigh:
if ((orig_neigh_node) && (!is_single_hop_neigh))
batadv_orig_node_free_ref(orig_neigh_node);
out:
+ if (router_ifinfo)
+ batadv_neigh_ifinfo_free_ref(router_ifinfo);
if (router)
batadv_neigh_node_free_ref(router);
if (router_router)
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 6f0d9ec3795..a957c814072 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -800,11 +800,6 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv,
bla_dst = (struct batadv_bla_claim_dst *)hw_dst;
bla_dst_own = &bat_priv->bla.claim_dest;
- /* check if it is a claim packet in general */
- if (memcmp(bla_dst->magic, bla_dst_own->magic,
- sizeof(bla_dst->magic)) != 0)
- return 0;
-
/* if announcement packet, use the source,
* otherwise assume it is in the hw_src
*/
@@ -866,12 +861,13 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
struct sk_buff *skb)
{
- struct batadv_bla_claim_dst *bla_dst;
+ struct batadv_bla_claim_dst *bla_dst, *bla_dst_own;
uint8_t *hw_src, *hw_dst;
- struct vlan_ethhdr *vhdr;
+ struct vlan_hdr *vhdr, vhdr_buf;
struct ethhdr *ethhdr;
struct arphdr *arphdr;
unsigned short vid;
+ int vlan_depth = 0;
__be16 proto;
int headlen;
int ret;
@@ -882,9 +878,24 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
proto = ethhdr->h_proto;
headlen = ETH_HLEN;
if (vid & BATADV_VLAN_HAS_TAG) {
- vhdr = vlan_eth_hdr(skb);
- proto = vhdr->h_vlan_encapsulated_proto;
- headlen += VLAN_HLEN;
+ /* Traverse the VLAN/Ethertypes.
+ *
+ * At this point it is known that the first protocol is a VLAN
+ * header, so start checking at the encapsulated protocol.
+ *
+ * The depth of the VLAN headers is recorded to drop BLA claim
+ * frames encapsulated into multiple VLAN headers (QinQ).
+ */
+ do {
+ vhdr = skb_header_pointer(skb, headlen, VLAN_HLEN,
+ &vhdr_buf);
+ if (!vhdr)
+ return 0;
+
+ proto = vhdr->h_vlan_encapsulated_proto;
+ headlen += VLAN_HLEN;
+ vlan_depth++;
+ } while (proto == htons(ETH_P_8021Q));
}
if (proto != htons(ETH_P_ARP))
@@ -914,6 +925,19 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
hw_src = (uint8_t *)arphdr + sizeof(struct arphdr);
hw_dst = hw_src + ETH_ALEN + 4;
bla_dst = (struct batadv_bla_claim_dst *)hw_dst;
+ bla_dst_own = &bat_priv->bla.claim_dest;
+
+ /* check if it is a claim frame in general */
+ if (memcmp(bla_dst->magic, bla_dst_own->magic,
+ sizeof(bla_dst->magic)) != 0)
+ return 0;
+
+ /* check if there is a claim frame encapsulated deeper in (QinQ) and
+ * drop that, as this is not supported by BLA but should also not be
+ * sent via the mesh.
+ */
+ if (vlan_depth > 1)
+ return 1;
/* check if it is a claim frame. */
ret = batadv_check_claim_group(bat_priv, primary_if, hw_src, hw_dst,
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 60889df808f..f2c066b2171 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -594,7 +594,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
if (!neigh_node)
goto free_orig;
- tmp_skb = pskb_copy(skb, GFP_ATOMIC);
+ tmp_skb = pskb_copy_for_clone(skb, GFP_ATOMIC);
if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, tmp_skb,
cand[i].orig_node,
packet_subtype)) {
@@ -941,8 +941,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
* additional DAT answer may trigger kernel warnings about
* a packet coming from the wrong port.
*/
- if (batadv_is_my_client(bat_priv, dat_entry->mac_addr,
- BATADV_NO_FLAGS)) {
+ if (batadv_is_my_client(bat_priv, dat_entry->mac_addr, vid)) {
ret = true;
goto out;
}
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index bcc4bea632f..f14e54a0569 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -418,12 +418,13 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
struct batadv_neigh_node *neigh_node)
{
struct batadv_priv *bat_priv;
- struct batadv_hard_iface *primary_if;
+ struct batadv_hard_iface *primary_if = NULL;
struct batadv_frag_packet frag_header;
struct sk_buff *skb_fragment;
unsigned mtu = neigh_node->if_incoming->net_dev->mtu;
unsigned header_size = sizeof(frag_header);
unsigned max_fragment_size, max_packet_size;
+ bool ret = false;
/* To avoid merge and refragmentation at next-hops we never send
* fragments larger than BATADV_FRAG_MAX_FRAG_SIZE
@@ -483,7 +484,11 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
skb->len + ETH_HLEN);
batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
- return true;
+ ret = true;
+
out_err:
- return false;
+ if (primary_if)
+ batadv_hardif_free_ref(primary_if);
+
+ return ret;
}
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index c835e137423..90cff585b37 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -42,8 +42,10 @@
static void batadv_gw_node_free_ref(struct batadv_gw_node *gw_node)
{
- if (atomic_dec_and_test(&gw_node->refcount))
+ if (atomic_dec_and_test(&gw_node->refcount)) {
+ batadv_orig_node_free_ref(gw_node->orig_node);
kfree_rcu(gw_node, rcu);
+ }
}
static struct batadv_gw_node *
@@ -406,9 +408,14 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
if (gateway->bandwidth_down == 0)
return;
+ if (!atomic_inc_not_zero(&orig_node->refcount))
+ return;
+
gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC);
- if (!gw_node)
+ if (!gw_node) {
+ batadv_orig_node_free_ref(orig_node);
return;
+ }
INIT_HLIST_NODE(&gw_node->list);
gw_node->orig_node = orig_node;
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index b851cc58085..fbda6b54baf 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -83,7 +83,7 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
return true;
/* no more parents..stop recursion */
- if (net_dev->iflink == net_dev->ifindex)
+ if (net_dev->iflink == 0 || net_dev->iflink == net_dev->ifindex)
return false;
/* recurse over the parent device */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 8c7ca811de6..96b66fd30f9 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -415,7 +415,7 @@ batadv_mcast_forw_ipv4_node_get(struct batadv_priv *bat_priv)
hlist_for_each_entry_rcu(tmp_orig_node,
&bat_priv->mcast.want_all_ipv4_list,
mcast_want_all_ipv4_node) {
- if (!atomic_inc_not_zero(&orig_node->refcount))
+ if (!atomic_inc_not_zero(&tmp_orig_node->refcount))
continue;
orig_node = tmp_orig_node;
@@ -442,7 +442,7 @@ batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv)
hlist_for_each_entry_rcu(tmp_orig_node,
&bat_priv->mcast.want_all_ipv6_list,
mcast_want_all_ipv6_node) {
- if (!atomic_inc_not_zero(&orig_node->refcount))
+ if (!atomic_inc_not_zero(&tmp_orig_node->refcount))
continue;
orig_node = tmp_orig_node;
@@ -493,7 +493,7 @@ batadv_mcast_forw_unsnoop_node_get(struct batadv_priv *bat_priv)
hlist_for_each_entry_rcu(tmp_orig_node,
&bat_priv->mcast.want_all_unsnoopables_list,
mcast_want_all_unsnoopables_node) {
- if (!atomic_inc_not_zero(&orig_node->refcount))
+ if (!atomic_inc_not_zero(&tmp_orig_node->refcount))
continue;
orig_node = tmp_orig_node;
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 40a2fc4bcf4..8d04d174669 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1344,7 +1344,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv,
struct ethhdr *ethhdr;
/* Copy skb header to change the mac header */
- skb = pskb_copy(skb, GFP_ATOMIC);
+ skb = pskb_copy_for_clone(skb, GFP_ATOMIC);
if (!skb)
return;
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index ffd9dfbd9b0..6a484514cd3 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -501,12 +501,17 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
static void batadv_orig_ifinfo_free_rcu(struct rcu_head *rcu)
{
struct batadv_orig_ifinfo *orig_ifinfo;
+ struct batadv_neigh_node *router;
orig_ifinfo = container_of(rcu, struct batadv_orig_ifinfo, rcu);
if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT)
batadv_hardif_free_ref_now(orig_ifinfo->if_outgoing);
+ /* this is the last reference to this object */
+ router = rcu_dereference_protected(orig_ifinfo->router, true);
+ if (router)
+ batadv_neigh_node_free_ref_now(router);
kfree(orig_ifinfo);
}
@@ -702,6 +707,47 @@ free_orig_node:
}
/**
+ * batadv_purge_neigh_ifinfo - purge obsolete ifinfo entries from neighbor
+ * @bat_priv: the bat priv with all the soft interface information
+ * @neigh: orig node which is to be checked
+ */
+static void
+batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv,
+ struct batadv_neigh_node *neigh)
+{
+ struct batadv_neigh_ifinfo *neigh_ifinfo;
+ struct batadv_hard_iface *if_outgoing;
+ struct hlist_node *node_tmp;
+
+ spin_lock_bh(&neigh->ifinfo_lock);
+
+ /* for all ifinfo objects for this neighinator */
+ hlist_for_each_entry_safe(neigh_ifinfo, node_tmp,
+ &neigh->ifinfo_list, list) {
+ if_outgoing = neigh_ifinfo->if_outgoing;
+
+ /* always keep the default interface */
+ if (if_outgoing == BATADV_IF_DEFAULT)
+ continue;
+
+ /* don't purge if the interface is not (going) down */
+ if ((if_outgoing->if_status != BATADV_IF_INACTIVE) &&
+ (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) &&
+ (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED))
+ continue;
+
+ batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+ "neighbor/ifinfo purge: neighbor %pM, iface: %s\n",
+ neigh->addr, if_outgoing->net_dev->name);
+
+ hlist_del_rcu(&neigh_ifinfo->list);
+ batadv_neigh_ifinfo_free_ref(neigh_ifinfo);
+ }
+
+ spin_unlock_bh(&neigh->ifinfo_lock);
+}
+
+/**
* batadv_purge_orig_ifinfo - purge obsolete ifinfo entries from originator
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: orig node which is to be checked
@@ -800,6 +846,11 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
hlist_del_rcu(&neigh_node->list);
batadv_neigh_node_free_ref(neigh_node);
+ } else {
+ /* only necessary if not the whole neighbor is to be
+ * deleted, but some interface has been removed.
+ */
+ batadv_purge_neigh_ifinfo(bat_priv, neigh_node);
}
}
@@ -857,7 +908,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
{
struct batadv_neigh_node *best_neigh_node;
struct batadv_hard_iface *hard_iface;
- bool changed;
+ bool changed_ifinfo, changed_neigh;
if (batadv_has_timed_out(orig_node->last_seen,
2 * BATADV_PURGE_TIMEOUT)) {
@@ -867,10 +918,10 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
jiffies_to_msecs(orig_node->last_seen));
return true;
}
- changed = batadv_purge_orig_ifinfo(bat_priv, orig_node);
- changed = changed || batadv_purge_orig_neighbors(bat_priv, orig_node);
+ changed_ifinfo = batadv_purge_orig_ifinfo(bat_priv, orig_node);
+ changed_neigh = batadv_purge_orig_neighbors(bat_priv, orig_node);
- if (!changed)
+ if (!changed_ifinfo && !changed_neigh)
return false;
/* first for NULL ... */
@@ -1028,7 +1079,8 @@ int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset)
bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq, hard_iface);
out:
- batadv_hardif_free_ref(hard_iface);
+ if (hard_iface)
+ batadv_hardif_free_ref(hard_iface);
return 0;
}
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index e7ee65dc20b..cbd677f48c0 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -448,10 +448,15 @@ out:
* possibly free it
* @softif_vlan: the vlan object to release
*/
-void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *softif_vlan)
+void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *vlan)
{
- if (atomic_dec_and_test(&softif_vlan->refcount))
- kfree_rcu(softif_vlan, rcu);
+ if (atomic_dec_and_test(&vlan->refcount)) {
+ spin_lock_bh(&vlan->bat_priv->softif_vlan_list_lock);
+ hlist_del_rcu(&vlan->list);
+ spin_unlock_bh(&vlan->bat_priv->softif_vlan_list_lock);
+
+ kfree_rcu(vlan, rcu);
+ }
}
/**
@@ -505,6 +510,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
if (!vlan)
return -ENOMEM;
+ vlan->bat_priv = bat_priv;
vlan->vid = vid;
atomic_set(&vlan->refcount, 1);
@@ -516,6 +522,10 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
return err;
}
+ spin_lock_bh(&bat_priv->softif_vlan_list_lock);
+ hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list);
+ spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
+
/* add a new TT local entry. This one will be marked with the NOPURGE
* flag
*/
@@ -523,10 +533,6 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
bat_priv->soft_iface->dev_addr, vid,
BATADV_NULL_IFINDEX, BATADV_NO_MARK);
- spin_lock_bh(&bat_priv->softif_vlan_list_lock);
- hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list);
- spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
-
return 0;
}
@@ -538,18 +544,13 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
static void batadv_softif_destroy_vlan(struct batadv_priv *bat_priv,
struct batadv_softif_vlan *vlan)
{
- spin_lock_bh(&bat_priv->softif_vlan_list_lock);
- hlist_del_rcu(&vlan->list);
- spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
-
- batadv_sysfs_del_vlan(bat_priv, vlan);
-
/* explicitly remove the associated TT local entry because it is marked
* with the NOPURGE flag
*/
batadv_tt_local_remove(bat_priv, bat_priv->soft_iface->dev_addr,
vlan->vid, "vlan interface destroyed", false);
+ batadv_sysfs_del_vlan(bat_priv, vlan);
batadv_softif_vlan_free_ref(vlan);
}
@@ -567,6 +568,8 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
unsigned short vid)
{
struct batadv_priv *bat_priv = netdev_priv(dev);
+ struct batadv_softif_vlan *vlan;
+ int ret;
/* only 802.1Q vlans are supported.
* batman-adv does not know how to handle other types
@@ -576,7 +579,36 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
vid |= BATADV_VLAN_HAS_TAG;
- return batadv_softif_create_vlan(bat_priv, vid);
+ /* if a new vlan is getting created and it already exists, it means that
+ * it was not deleted yet. batadv_softif_vlan_get() increases the
+ * refcount in order to revive the object.
+ *
+ * if it does not exist then create it.
+ */
+ vlan = batadv_softif_vlan_get(bat_priv, vid);
+ if (!vlan)
+ return batadv_softif_create_vlan(bat_priv, vid);
+
+ /* recreate the sysfs object if it was already destroyed (and it should
+ * be since we received a kill_vid() for this vlan
+ */
+ if (!vlan->kobj) {
+ ret = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan);
+ if (ret) {
+ batadv_softif_vlan_free_ref(vlan);
+ return ret;
+ }
+ }
+
+ /* add a new TT local entry. This one will be marked with the NOPURGE
+ * flag. This must be added again, even if the vlan object already
+ * exists, because the entry was deleted by kill_vid()
+ */
+ batadv_tt_local_add(bat_priv->soft_iface,
+ bat_priv->soft_iface->dev_addr, vid,
+ BATADV_NULL_IFINDEX, BATADV_NO_MARK);
+
+ return 0;
}
/**
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index d636bde72c9..5f59e7f899a 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -511,6 +511,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
struct batadv_tt_local_entry *tt_local;
struct batadv_tt_global_entry *tt_global = NULL;
+ struct batadv_softif_vlan *vlan;
struct net_device *in_dev = NULL;
struct hlist_head *head;
struct batadv_tt_orig_list_entry *orig_entry;
@@ -572,6 +573,9 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
if (!tt_local)
goto out;
+ /* increase the refcounter of the related vlan */
+ vlan = batadv_softif_vlan_get(bat_priv, vid);
+
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n",
addr, BATADV_PRINT_VID(vid),
@@ -604,6 +608,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
if (unlikely(hash_added != 0)) {
/* remove the reference for the hash */
batadv_tt_local_entry_free_ref(tt_local);
+ batadv_softif_vlan_free_ref(vlan);
goto out;
}
@@ -1009,6 +1014,7 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
{
struct batadv_tt_local_entry *tt_local_entry;
uint16_t flags, curr_flags = BATADV_NO_FLAGS;
+ struct batadv_softif_vlan *vlan;
tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid);
if (!tt_local_entry)
@@ -1039,6 +1045,11 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
hlist_del_rcu(&tt_local_entry->common.hash_entry);
batadv_tt_local_entry_free_ref(tt_local_entry);
+ /* decrease the reference held for this vlan */
+ vlan = batadv_softif_vlan_get(bat_priv, vid);
+ batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_free_ref(vlan);
+
out:
if (tt_local_entry)
batadv_tt_local_entry_free_ref(tt_local_entry);
@@ -1111,6 +1122,7 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
spinlock_t *list_lock; /* protects write access to the hash lists */
struct batadv_tt_common_entry *tt_common_entry;
struct batadv_tt_local_entry *tt_local;
+ struct batadv_softif_vlan *vlan;
struct hlist_node *node_tmp;
struct hlist_head *head;
uint32_t i;
@@ -1131,6 +1143,13 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
tt_local = container_of(tt_common_entry,
struct batadv_tt_local_entry,
common);
+
+ /* decrease the reference held for this vlan */
+ vlan = batadv_softif_vlan_get(bat_priv,
+ tt_common_entry->vid);
+ batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_free_ref(vlan);
+
batadv_tt_local_entry_free_ref(tt_local);
}
spin_unlock_bh(list_lock);
@@ -3139,6 +3158,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
struct batadv_hashtable *hash = bat_priv->tt.local_hash;
struct batadv_tt_common_entry *tt_common;
struct batadv_tt_local_entry *tt_local;
+ struct batadv_softif_vlan *vlan;
struct hlist_node *node_tmp;
struct hlist_head *head;
spinlock_t *list_lock; /* protects write access to the hash lists */
@@ -3167,6 +3187,12 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
tt_local = container_of(tt_common,
struct batadv_tt_local_entry,
common);
+
+ /* decrease the reference held for this vlan */
+ vlan = batadv_softif_vlan_get(bat_priv, tt_common->vid);
+ batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_free_ref(vlan);
+
batadv_tt_local_entry_free_ref(tt_local);
}
spin_unlock_bh(list_lock);
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 34891a56773..8854c05622a 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -687,6 +687,7 @@ struct batadv_priv_nc {
/**
* struct batadv_softif_vlan - per VLAN attributes set
+ * @bat_priv: pointer to the mesh object
* @vid: VLAN identifier
* @kobj: kobject for sysfs vlan subdirectory
* @ap_isolation: AP isolation state
@@ -696,6 +697,7 @@ struct batadv_priv_nc {
* @rcu: struct used for freeing in a RCU-safe manner
*/
struct batadv_softif_vlan {
+ struct batadv_priv *bat_priv;
unsigned short vid;
struct kobject *kobj;
atomic_t ap_isolation; /* boolean */
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 73492b91105..8796ffa08b4 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -420,12 +420,18 @@ static int conn_send(struct l2cap_conn *conn,
return 0;
}
-static void get_dest_bdaddr(struct in6_addr *ip6_daddr,
- bdaddr_t *addr, u8 *addr_type)
+static u8 get_addr_type_from_eui64(u8 byte)
{
- u8 *eui64;
+ /* Is universal(0) or local(1) bit, */
+ if (byte & 0x02)
+ return ADDR_LE_DEV_RANDOM;
- eui64 = ip6_daddr->s6_addr + 8;
+ return ADDR_LE_DEV_PUBLIC;
+}
+
+static void copy_to_bdaddr(struct in6_addr *ip6_daddr, bdaddr_t *addr)
+{
+ u8 *eui64 = ip6_daddr->s6_addr + 8;
addr->b[0] = eui64[7];
addr->b[1] = eui64[6];
@@ -433,16 +439,19 @@ static void get_dest_bdaddr(struct in6_addr *ip6_daddr,
addr->b[3] = eui64[2];
addr->b[4] = eui64[1];
addr->b[5] = eui64[0];
+}
- addr->b[5] ^= 2;
+static void convert_dest_bdaddr(struct in6_addr *ip6_daddr,
+ bdaddr_t *addr, u8 *addr_type)
+{
+ copy_to_bdaddr(ip6_daddr, addr);
- /* Set universal/local bit to 0 */
- if (addr->b[5] & 1) {
- addr->b[5] &= ~1;
- *addr_type = ADDR_LE_DEV_PUBLIC;
- } else {
- *addr_type = ADDR_LE_DEV_RANDOM;
- }
+ /* We need to toggle the U/L bit that we got from IPv6 address
+ * so that we get the proper address and type of the BD address.
+ */
+ addr->b[5] ^= 0x02;
+
+ *addr_type = get_addr_type_from_eui64(addr->b[5]);
}
static int header_create(struct sk_buff *skb, struct net_device *netdev,
@@ -473,9 +482,11 @@ static int header_create(struct sk_buff *skb, struct net_device *netdev,
/* Get destination BT device from skb.
* If there is no such peer then discard the packet.
*/
- get_dest_bdaddr(&hdr->daddr, &addr, &addr_type);
+ convert_dest_bdaddr(&hdr->daddr, &addr, &addr_type);
- BT_DBG("dest addr %pMR type %d", &addr, addr_type);
+ BT_DBG("dest addr %pMR type %s IP %pI6c", &addr,
+ addr_type == ADDR_LE_DEV_PUBLIC ? "PUBLIC" : "RANDOM",
+ &hdr->daddr);
read_lock_irqsave(&devices_lock, flags);
peer = peer_lookup_ba(dev, &addr, addr_type);
@@ -556,7 +567,7 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev)
} else {
unsigned long flags;
- get_dest_bdaddr(&lowpan_cb(skb)->addr, &addr, &addr_type);
+ convert_dest_bdaddr(&lowpan_cb(skb)->addr, &addr, &addr_type);
eui64_addr = lowpan_cb(skb)->addr.s6_addr + 8;
dev = lowpan_dev(netdev);
@@ -564,8 +575,10 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev)
peer = peer_lookup_ba(dev, &addr, addr_type);
read_unlock_irqrestore(&devices_lock, flags);
- BT_DBG("xmit from %s to %pMR (%pI6c) peer %p", netdev->name,
- &addr, &lowpan_cb(skb)->addr, peer);
+ BT_DBG("xmit %s to %pMR type %s IP %pI6c peer %p",
+ netdev->name, &addr,
+ addr_type == ADDR_LE_DEV_PUBLIC ? "PUBLIC" : "RANDOM",
+ &lowpan_cb(skb)->addr, peer);
if (peer && peer->conn)
err = send_pkt(peer->conn, netdev->dev_addr,
@@ -620,13 +633,13 @@ static void set_addr(u8 *eui, u8 *addr, u8 addr_type)
eui[6] = addr[1];
eui[7] = addr[0];
- eui[0] ^= 2;
-
- /* Universal/local bit set, RFC 4291 */
+ /* Universal/local bit set, BT 6lowpan draft ch. 3.2.1 */
if (addr_type == ADDR_LE_DEV_PUBLIC)
- eui[0] |= 1;
+ eui[0] &= ~0x02;
else
- eui[0] &= ~1;
+ eui[0] |= 0x02;
+
+ BT_DBG("type %d addr %*phC", addr_type, 8, eui);
}
static void set_dev_addr(struct net_device *netdev, bdaddr_t *addr,
@@ -634,7 +647,6 @@ static void set_dev_addr(struct net_device *netdev, bdaddr_t *addr,
{
netdev->addr_assign_type = NET_ADDR_PERM;
set_addr(netdev->dev_addr, addr->b, addr_type);
- netdev->dev_addr[0] ^= 2;
}
static void ifup(struct net_device *netdev)
@@ -684,13 +696,6 @@ static int add_peer_conn(struct l2cap_conn *conn, struct lowpan_dev *dev)
memcpy(&peer->eui64_addr, (u8 *)&peer->peer_addr.s6_addr + 8,
EUI64_ADDR_LEN);
- peer->eui64_addr[0] ^= 2; /* second bit-flip (Universe/Local)
- * is done according RFC2464
- */
-
- raw_dump_inline(__func__, "peer IPv6 address",
- (unsigned char *)&peer->peer_addr, 16);
- raw_dump_inline(__func__, "peer EUI64 address", peer->eui64_addr, 8);
write_lock_irqsave(&devices_lock, flags);
INIT_LIST_HEAD(&peer->list);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 095943c02d6..a7a27bc2c0b 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -28,6 +28,7 @@
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/l2cap.h>
#include "smp.h"
#include "a2mp.h"
@@ -288,10 +289,20 @@ static void hci_conn_timeout(struct work_struct *work)
{
struct hci_conn *conn = container_of(work, struct hci_conn,
disc_work.work);
+ int refcnt = atomic_read(&conn->refcnt);
BT_DBG("hcon %p state %s", conn, state_to_string(conn->state));
- if (atomic_read(&conn->refcnt))
+ WARN_ON(refcnt < 0);
+
+ /* FIXME: It was observed that in pairing failed scenario, refcnt
+ * drops below 0. Probably this is because l2cap_conn_del calls
+ * l2cap_chan_del for each channel, and inside l2cap_chan_del conn is
+ * dropped. After that loop hci_chan_del is called which also drops
+ * conn. For now make sure that ACL is alive if refcnt is higher then 0,
+ * otherwise drop it.
+ */
+ if (refcnt > 0)
return;
switch (conn->state) {
@@ -407,6 +418,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
conn->io_capability = hdev->io_capability;
conn->remote_auth = 0xff;
conn->key_type = 0xff;
+ conn->tx_power = HCI_TX_POWER_INVALID;
+ conn->max_tx_power = HCI_TX_POWER_INVALID;
set_bit(HCI_CONN_POWER_SAVE, &conn->flags);
conn->disc_timeout = HCI_DISCONN_TIMEOUT;
@@ -607,11 +620,6 @@ static void hci_req_add_le_create_conn(struct hci_request *req,
if (hci_update_random_address(req, false, &own_addr_type))
return;
- /* Save the address type used for this connnection attempt so we able
- * to retrieve this information if we need it.
- */
- conn->src_type = own_addr_type;
-
cp.scan_interval = cpu_to_le16(hdev->le_scan_interval);
cp.scan_window = cpu_to_le16(hdev->le_scan_window);
bacpy(&cp.peer_addr, &conn->dst);
@@ -891,7 +899,7 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
/* If we're already encrypted set the REAUTH_PEND flag,
* otherwise set the ENCRYPT_PEND.
*/
- if (conn->key_type != 0xff)
+ if (conn->link_mode & HCI_LM_ENCRYPT)
set_bit(HCI_CONN_REAUTH_PEND, &conn->flags);
else
set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index d31f144860d..0a43cce9a91 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -34,6 +34,7 @@
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/l2cap.h>
#include "smp.h"
@@ -579,6 +580,62 @@ static int sniff_max_interval_get(void *data, u64 *val)
DEFINE_SIMPLE_ATTRIBUTE(sniff_max_interval_fops, sniff_max_interval_get,
sniff_max_interval_set, "%llu\n");
+static int conn_info_min_age_set(void *data, u64 val)
+{
+ struct hci_dev *hdev = data;
+
+ if (val == 0 || val > hdev->conn_info_max_age)
+ return -EINVAL;
+
+ hci_dev_lock(hdev);
+ hdev->conn_info_min_age = val;
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+static int conn_info_min_age_get(void *data, u64 *val)
+{
+ struct hci_dev *hdev = data;
+
+ hci_dev_lock(hdev);
+ *val = hdev->conn_info_min_age;
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(conn_info_min_age_fops, conn_info_min_age_get,
+ conn_info_min_age_set, "%llu\n");
+
+static int conn_info_max_age_set(void *data, u64 val)
+{
+ struct hci_dev *hdev = data;
+
+ if (val == 0 || val < hdev->conn_info_min_age)
+ return -EINVAL;
+
+ hci_dev_lock(hdev);
+ hdev->conn_info_max_age = val;
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+static int conn_info_max_age_get(void *data, u64 *val)
+{
+ struct hci_dev *hdev = data;
+
+ hci_dev_lock(hdev);
+ *val = hdev->conn_info_max_age;
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(conn_info_max_age_fops, conn_info_max_age_get,
+ conn_info_max_age_set, "%llu\n");
+
static int identity_show(struct seq_file *f, void *p)
{
struct hci_dev *hdev = f->private;
@@ -1754,6 +1811,11 @@ static int __hci_init(struct hci_dev *hdev)
&blacklist_fops);
debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops);
+ debugfs_create_file("conn_info_min_age", 0644, hdev->debugfs, hdev,
+ &conn_info_min_age_fops);
+ debugfs_create_file("conn_info_max_age", 0644, hdev->debugfs, hdev,
+ &conn_info_max_age_fops);
+
if (lmp_bredr_capable(hdev)) {
debugfs_create_file("inquiry_cache", 0444, hdev->debugfs,
hdev, &inquiry_cache_fops);
@@ -3789,6 +3851,8 @@ struct hci_dev *hci_alloc_dev(void)
hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT;
hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT;
+ hdev->conn_info_min_age = DEFAULT_CONN_INFO_MIN_AGE;
+ hdev->conn_info_max_age = DEFAULT_CONN_INFO_MAX_AGE;
mutex_init(&hdev->lock);
mutex_init(&hdev->req_lock);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index ca19fd4bbb8..640c54ec1bd 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -45,9 +45,13 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb)
return;
clear_bit(HCI_INQUIRY, &hdev->flags);
- smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */
+ smp_mb__after_atomic(); /* wake_up_bit advises about this barrier */
wake_up_bit(&hdev->flags, HCI_INQUIRY);
+ hci_dev_lock(hdev);
+ hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
+ hci_dev_unlock(hdev);
+
hci_conn_check_pending(hdev);
}
@@ -1245,6 +1249,59 @@ static void hci_cc_write_remote_amp_assoc(struct hci_dev *hdev,
amp_write_rem_assoc_continue(hdev, rp->phy_handle);
}
+static void hci_cc_read_rssi(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ struct hci_rp_read_rssi *rp = (void *) skb->data;
+ struct hci_conn *conn;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+ if (rp->status)
+ return;
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle));
+ if (conn)
+ conn->rssi = rp->rssi;
+
+ hci_dev_unlock(hdev);
+}
+
+static void hci_cc_read_tx_power(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ struct hci_cp_read_tx_power *sent;
+ struct hci_rp_read_tx_power *rp = (void *) skb->data;
+ struct hci_conn *conn;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+ if (rp->status)
+ return;
+
+ sent = hci_sent_cmd_data(hdev, HCI_OP_READ_TX_POWER);
+ if (!sent)
+ return;
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle));
+ if (!conn)
+ goto unlock;
+
+ switch (sent->type) {
+ case 0x00:
+ conn->tx_power = rp->tx_power;
+ break;
+ case 0x01:
+ conn->max_tx_power = rp->tx_power;
+ break;
+ }
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status)
{
BT_DBG("%s status 0x%2.2x", hdev->name, status);
@@ -1400,6 +1457,7 @@ static int hci_outgoing_auth_needed(struct hci_dev *hdev,
* is requested.
*/
if (!hci_conn_ssp_enabled(conn) && !(conn->auth_type & 0x01) &&
+ conn->pending_sec_level != BT_SECURITY_FIPS &&
conn->pending_sec_level != BT_SECURITY_HIGH &&
conn->pending_sec_level != BT_SECURITY_MEDIUM)
return 0;
@@ -1826,7 +1884,7 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (!test_and_clear_bit(HCI_INQUIRY, &hdev->flags))
return;
- smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */
+ smp_mb__after_atomic(); /* wake_up_bit advises about this barrier */
wake_up_bit(&hdev->flags, HCI_INQUIRY);
if (!test_bit(HCI_MGMT, &hdev->dev_flags))
@@ -2637,6 +2695,14 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_cc_write_remote_amp_assoc(hdev, skb);
break;
+ case HCI_OP_READ_RSSI:
+ hci_cc_read_rssi(hdev, skb);
+ break;
+
+ case HCI_OP_READ_TX_POWER:
+ hci_cc_read_tx_power(hdev, skb);
+ break;
+
default:
BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode);
break;
@@ -3015,7 +3081,8 @@ static void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
}
if (key->type == HCI_LK_COMBINATION && key->pin_len < 16 &&
- conn->pending_sec_level == BT_SECURITY_HIGH) {
+ (conn->pending_sec_level == BT_SECURITY_HIGH ||
+ conn->pending_sec_level == BT_SECURITY_FIPS)) {
BT_DBG("%s ignoring key unauthenticated for high security",
hdev->name);
goto not_found;
@@ -3474,7 +3541,11 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
cp.authentication = conn->auth_type;
/* Request MITM protection if our IO caps allow it
- * except for the no-bonding case
+ * except for the no-bonding case.
+ * conn->auth_type is not updated here since
+ * that might cause the user confirmation to be
+ * rejected in case the remote doesn't have the
+ * IO capabilities for MITM.
*/
if (conn->io_capability != HCI_IO_NO_INPUT_OUTPUT &&
cp.authentication != HCI_AT_NO_BONDING)
@@ -3565,8 +3636,11 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev,
/* If we're not the initiators request authorization to
* proceed from user space (mgmt_user_confirm with
- * confirm_hint set to 1). */
- if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags)) {
+ * confirm_hint set to 1). The exception is if neither
+ * side had MITM in which case we do auto-accept.
+ */
+ if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) &&
+ (loc_mitm || rem_mitm)) {
BT_DBG("Confirming auto-accept as acceptor");
confirm_hint = 1;
goto confirm;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index f608bffdb8b..80d25c150a6 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -143,7 +143,7 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
if (!skb_copy) {
/* Create a private copy with headroom */
- skb_copy = __pskb_copy(skb, 1, GFP_ATOMIC);
+ skb_copy = __pskb_copy_fclone(skb, 1, GFP_ATOMIC, true);
if (!skb_copy)
continue;
@@ -247,8 +247,8 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
struct hci_mon_hdr *hdr;
/* Create a private copy with headroom */
- skb_copy = __pskb_copy(skb, HCI_MON_HDR_SIZE,
- GFP_ATOMIC);
+ skb_copy = __pskb_copy_fclone(skb, HCI_MON_HDR_SIZE,
+ GFP_ATOMIC, true);
if (!skb_copy)
continue;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index a1e5bb7d06e..323f23cd2c3 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -471,8 +471,14 @@ void l2cap_chan_set_defaults(struct l2cap_chan *chan)
chan->max_tx = L2CAP_DEFAULT_MAX_TX;
chan->tx_win = L2CAP_DEFAULT_TX_WINDOW;
chan->tx_win_max = L2CAP_DEFAULT_TX_WINDOW;
+ chan->remote_max_tx = chan->max_tx;
+ chan->remote_tx_win = chan->tx_win;
chan->ack_win = L2CAP_DEFAULT_TX_WINDOW;
chan->sec_level = BT_SECURITY_LOW;
+ chan->flush_to = L2CAP_DEFAULT_FLUSH_TO;
+ chan->retrans_timeout = L2CAP_DEFAULT_RETRANS_TO;
+ chan->monitor_timeout = L2CAP_DEFAULT_MONITOR_TO;
+ chan->conf_state = 0;
set_bit(FLAG_FORCE_ACTIVE, &chan->flags);
}
@@ -1657,7 +1663,13 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
kfree_skb(conn->rx_skb);
skb_queue_purge(&conn->pending_rx);
- flush_work(&conn->pending_rx_work);
+
+ /* We can not call flush_work(&conn->pending_rx_work) here since we
+ * might block if we are running on a worker from the same workqueue
+ * pending_rx_work is waiting on.
+ */
+ if (work_pending(&conn->pending_rx_work))
+ cancel_work_sync(&conn->pending_rx_work);
l2cap_unregister_all_users(conn);
@@ -7519,9 +7531,9 @@ int __init l2cap_init(void)
l2cap_debugfs = debugfs_create_file("l2cap", 0444, bt_debugfs,
NULL, &l2cap_debugfs_fops);
- debugfs_create_u16("l2cap_le_max_credits", 0466, bt_debugfs,
+ debugfs_create_u16("l2cap_le_max_credits", 0644, bt_debugfs,
&le_max_credits);
- debugfs_create_u16("l2cap_le_default_mps", 0466, bt_debugfs,
+ debugfs_create_u16("l2cap_le_default_mps", 0644, bt_debugfs,
&le_default_mps);
bt_6lowpan_init();
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index ef5e5b04f34..e1378693cc9 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -787,11 +787,6 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
/*change security for LE channels */
if (chan->scid == L2CAP_CID_ATT) {
- if (!conn->hcon->out) {
- err = -EINVAL;
- break;
- }
-
if (smp_conn_security(conn->hcon, sec.level))
break;
sk->sk_state = BT_CONFIG;
@@ -1180,13 +1175,16 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan)
/* Check for backlog size */
if (sk_acceptq_is_full(parent)) {
BT_DBG("backlog full %d", parent->sk_ack_backlog);
+ release_sock(parent);
return NULL;
}
sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP,
GFP_ATOMIC);
- if (!sk)
+ if (!sk) {
+ release_sock(parent);
return NULL;
+ }
bt_sock_reclassify_lock(sk, BTPROTO_L2CAP);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 54abbce3a39..af8e0a6243b 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -29,12 +29,13 @@
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/l2cap.h>
#include <net/bluetooth/mgmt.h>
#include "smp.h"
#define MGMT_VERSION 1
-#define MGMT_REVISION 5
+#define MGMT_REVISION 6
static const u16 mgmt_commands[] = {
MGMT_OP_READ_INDEX_LIST,
@@ -83,6 +84,7 @@ static const u16 mgmt_commands[] = {
MGMT_OP_SET_DEBUG_KEYS,
MGMT_OP_SET_PRIVACY,
MGMT_OP_LOAD_IRKS,
+ MGMT_OP_GET_CONN_INFO,
};
static const u16 mgmt_events[] = {
@@ -1045,6 +1047,43 @@ static void clean_up_hci_complete(struct hci_dev *hdev, u8 status)
}
}
+static void hci_stop_discovery(struct hci_request *req)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_cp_remote_name_req_cancel cp;
+ struct inquiry_entry *e;
+
+ switch (hdev->discovery.state) {
+ case DISCOVERY_FINDING:
+ if (test_bit(HCI_INQUIRY, &hdev->flags)) {
+ hci_req_add(req, HCI_OP_INQUIRY_CANCEL, 0, NULL);
+ } else {
+ cancel_delayed_work(&hdev->le_scan_disable);
+ hci_req_add_le_scan_disable(req);
+ }
+
+ break;
+
+ case DISCOVERY_RESOLVING:
+ e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY,
+ NAME_PENDING);
+ if (!e)
+ return;
+
+ bacpy(&cp.bdaddr, &e->data.bdaddr);
+ hci_req_add(req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp),
+ &cp);
+
+ break;
+
+ default:
+ /* Passive scanning */
+ if (test_bit(HCI_LE_SCAN, &hdev->dev_flags))
+ hci_req_add_le_scan_disable(req);
+ break;
+ }
+}
+
static int clean_up_hci_state(struct hci_dev *hdev)
{
struct hci_request req;
@@ -1061,9 +1100,7 @@ static int clean_up_hci_state(struct hci_dev *hdev)
if (test_bit(HCI_ADVERTISING, &hdev->dev_flags))
disable_advertising(&req);
- if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) {
- hci_req_add_le_scan_disable(&req);
- }
+ hci_stop_discovery(&req);
list_for_each_entry(conn, &hdev->conn_hash.list, list) {
struct hci_cp_disconnect dc;
@@ -2994,8 +3031,13 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
}
if (addr->type == BDADDR_LE_PUBLIC || addr->type == BDADDR_LE_RANDOM) {
- /* Continue with pairing via SMP */
+ /* Continue with pairing via SMP. The hdev lock must be
+ * released as SMP may try to recquire it for crypto
+ * purposes.
+ */
+ hci_dev_unlock(hdev);
err = smp_user_confirm_reply(conn, mgmt_op, passkey);
+ hci_dev_lock(hdev);
if (!err)
err = cmd_complete(sk, hdev->id, mgmt_op,
@@ -3572,8 +3614,6 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data,
{
struct mgmt_cp_stop_discovery *mgmt_cp = data;
struct pending_cmd *cmd;
- struct hci_cp_remote_name_req_cancel cp;
- struct inquiry_entry *e;
struct hci_request req;
int err;
@@ -3603,52 +3643,22 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data,
hci_req_init(&req, hdev);
- switch (hdev->discovery.state) {
- case DISCOVERY_FINDING:
- if (test_bit(HCI_INQUIRY, &hdev->flags)) {
- hci_req_add(&req, HCI_OP_INQUIRY_CANCEL, 0, NULL);
- } else {
- cancel_delayed_work(&hdev->le_scan_disable);
-
- hci_req_add_le_scan_disable(&req);
- }
-
- break;
-
- case DISCOVERY_RESOLVING:
- e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY,
- NAME_PENDING);
- if (!e) {
- mgmt_pending_remove(cmd);
- err = cmd_complete(sk, hdev->id,
- MGMT_OP_STOP_DISCOVERY, 0,
- &mgmt_cp->type,
- sizeof(mgmt_cp->type));
- hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
- goto unlock;
- }
-
- bacpy(&cp.bdaddr, &e->data.bdaddr);
- hci_req_add(&req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp),
- &cp);
-
- break;
+ hci_stop_discovery(&req);
- default:
- BT_DBG("unknown discovery state %u", hdev->discovery.state);
-
- mgmt_pending_remove(cmd);
- err = cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY,
- MGMT_STATUS_FAILED, &mgmt_cp->type,
- sizeof(mgmt_cp->type));
+ err = hci_req_run(&req, stop_discovery_complete);
+ if (!err) {
+ hci_discovery_set_state(hdev, DISCOVERY_STOPPING);
goto unlock;
}
- err = hci_req_run(&req, stop_discovery_complete);
- if (err < 0)
- mgmt_pending_remove(cmd);
- else
- hci_discovery_set_state(hdev, DISCOVERY_STOPPING);
+ mgmt_pending_remove(cmd);
+
+ /* If no HCI commands were sent we're done */
+ if (err == -ENODATA) {
+ err = cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, 0,
+ &mgmt_cp->type, sizeof(mgmt_cp->type));
+ hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
+ }
unlock:
hci_dev_unlock(hdev);
@@ -4532,7 +4542,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
for (i = 0; i < key_count; i++) {
struct mgmt_ltk_info *key = &cp->keys[i];
- u8 type, addr_type;
+ u8 type, addr_type, authenticated;
if (key->addr.type == BDADDR_LE_PUBLIC)
addr_type = ADDR_LE_DEV_PUBLIC;
@@ -4544,8 +4554,19 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
else
type = HCI_SMP_LTK_SLAVE;
+ switch (key->type) {
+ case MGMT_LTK_UNAUTHENTICATED:
+ authenticated = 0x00;
+ break;
+ case MGMT_LTK_AUTHENTICATED:
+ authenticated = 0x01;
+ break;
+ default:
+ continue;
+ }
+
hci_add_ltk(hdev, &key->addr.bdaddr, addr_type, type,
- key->type, key->val, key->enc_size, key->ediv,
+ authenticated, key->val, key->enc_size, key->ediv,
key->rand);
}
@@ -4557,6 +4578,218 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
return err;
}
+struct cmd_conn_lookup {
+ struct hci_conn *conn;
+ bool valid_tx_power;
+ u8 mgmt_status;
+};
+
+static void get_conn_info_complete(struct pending_cmd *cmd, void *data)
+{
+ struct cmd_conn_lookup *match = data;
+ struct mgmt_cp_get_conn_info *cp;
+ struct mgmt_rp_get_conn_info rp;
+ struct hci_conn *conn = cmd->user_data;
+
+ if (conn != match->conn)
+ return;
+
+ cp = (struct mgmt_cp_get_conn_info *) cmd->param;
+
+ memset(&rp, 0, sizeof(rp));
+ bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr);
+ rp.addr.type = cp->addr.type;
+
+ if (!match->mgmt_status) {
+ rp.rssi = conn->rssi;
+
+ if (match->valid_tx_power) {
+ rp.tx_power = conn->tx_power;
+ rp.max_tx_power = conn->max_tx_power;
+ } else {
+ rp.tx_power = HCI_TX_POWER_INVALID;
+ rp.max_tx_power = HCI_TX_POWER_INVALID;
+ }
+ }
+
+ cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO,
+ match->mgmt_status, &rp, sizeof(rp));
+
+ hci_conn_drop(conn);
+
+ mgmt_pending_remove(cmd);
+}
+
+static void conn_info_refresh_complete(struct hci_dev *hdev, u8 status)
+{
+ struct hci_cp_read_rssi *cp;
+ struct hci_conn *conn;
+ struct cmd_conn_lookup match;
+ u16 handle;
+
+ BT_DBG("status 0x%02x", status);
+
+ hci_dev_lock(hdev);
+
+ /* TX power data is valid in case request completed successfully,
+ * otherwise we assume it's not valid. At the moment we assume that
+ * either both or none of current and max values are valid to keep code
+ * simple.
+ */
+ match.valid_tx_power = !status;
+
+ /* Commands sent in request are either Read RSSI or Read Transmit Power
+ * Level so we check which one was last sent to retrieve connection
+ * handle. Both commands have handle as first parameter so it's safe to
+ * cast data on the same command struct.
+ *
+ * First command sent is always Read RSSI and we fail only if it fails.
+ * In other case we simply override error to indicate success as we
+ * already remembered if TX power value is actually valid.
+ */
+ cp = hci_sent_cmd_data(hdev, HCI_OP_READ_RSSI);
+ if (!cp) {
+ cp = hci_sent_cmd_data(hdev, HCI_OP_READ_TX_POWER);
+ status = 0;
+ }
+
+ if (!cp) {
+ BT_ERR("invalid sent_cmd in response");
+ goto unlock;
+ }
+
+ handle = __le16_to_cpu(cp->handle);
+ conn = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!conn) {
+ BT_ERR("unknown handle (%d) in response", handle);
+ goto unlock;
+ }
+
+ match.conn = conn;
+ match.mgmt_status = mgmt_status(status);
+
+ /* Cache refresh is complete, now reply for mgmt request for given
+ * connection only.
+ */
+ mgmt_pending_foreach(MGMT_OP_GET_CONN_INFO, hdev,
+ get_conn_info_complete, &match);
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
+static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data,
+ u16 len)
+{
+ struct mgmt_cp_get_conn_info *cp = data;
+ struct mgmt_rp_get_conn_info rp;
+ struct hci_conn *conn;
+ unsigned long conn_info_age;
+ int err = 0;
+
+ BT_DBG("%s", hdev->name);
+
+ memset(&rp, 0, sizeof(rp));
+ bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr);
+ rp.addr.type = cp->addr.type;
+
+ if (!bdaddr_type_is_valid(cp->addr.type))
+ return cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO,
+ MGMT_STATUS_INVALID_PARAMS,
+ &rp, sizeof(rp));
+
+ hci_dev_lock(hdev);
+
+ if (!hdev_is_powered(hdev)) {
+ err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO,
+ MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp));
+ goto unlock;
+ }
+
+ if (cp->addr.type == BDADDR_BREDR)
+ conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK,
+ &cp->addr.bdaddr);
+ else
+ conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr);
+
+ if (!conn || conn->state != BT_CONNECTED) {
+ err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO,
+ MGMT_STATUS_NOT_CONNECTED, &rp, sizeof(rp));
+ goto unlock;
+ }
+
+ /* To avoid client trying to guess when to poll again for information we
+ * calculate conn info age as random value between min/max set in hdev.
+ */
+ conn_info_age = hdev->conn_info_min_age +
+ prandom_u32_max(hdev->conn_info_max_age -
+ hdev->conn_info_min_age);
+
+ /* Query controller to refresh cached values if they are too old or were
+ * never read.
+ */
+ if (time_after(jiffies, conn->conn_info_timestamp +
+ msecs_to_jiffies(conn_info_age)) ||
+ !conn->conn_info_timestamp) {
+ struct hci_request req;
+ struct hci_cp_read_tx_power req_txp_cp;
+ struct hci_cp_read_rssi req_rssi_cp;
+ struct pending_cmd *cmd;
+
+ hci_req_init(&req, hdev);
+ req_rssi_cp.handle = cpu_to_le16(conn->handle);
+ hci_req_add(&req, HCI_OP_READ_RSSI, sizeof(req_rssi_cp),
+ &req_rssi_cp);
+
+ /* For LE links TX power does not change thus we don't need to
+ * query for it once value is known.
+ */
+ if (!bdaddr_type_is_le(cp->addr.type) ||
+ conn->tx_power == HCI_TX_POWER_INVALID) {
+ req_txp_cp.handle = cpu_to_le16(conn->handle);
+ req_txp_cp.type = 0x00;
+ hci_req_add(&req, HCI_OP_READ_TX_POWER,
+ sizeof(req_txp_cp), &req_txp_cp);
+ }
+
+ /* Max TX power needs to be read only once per connection */
+ if (conn->max_tx_power == HCI_TX_POWER_INVALID) {
+ req_txp_cp.handle = cpu_to_le16(conn->handle);
+ req_txp_cp.type = 0x01;
+ hci_req_add(&req, HCI_OP_READ_TX_POWER,
+ sizeof(req_txp_cp), &req_txp_cp);
+ }
+
+ err = hci_req_run(&req, conn_info_refresh_complete);
+ if (err < 0)
+ goto unlock;
+
+ cmd = mgmt_pending_add(sk, MGMT_OP_GET_CONN_INFO, hdev,
+ data, len);
+ if (!cmd) {
+ err = -ENOMEM;
+ goto unlock;
+ }
+
+ hci_conn_hold(conn);
+ cmd->user_data = conn;
+
+ conn->conn_info_timestamp = jiffies;
+ } else {
+ /* Cache is valid, just reply with values cached in hci_conn */
+ rp.rssi = conn->rssi;
+ rp.tx_power = conn->tx_power;
+ rp.max_tx_power = conn->max_tx_power;
+
+ err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO,
+ MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
+ }
+
+unlock:
+ hci_dev_unlock(hdev);
+ return err;
+}
+
static const struct mgmt_handler {
int (*func) (struct sock *sk, struct hci_dev *hdev, void *data,
u16 data_len);
@@ -4612,6 +4845,7 @@ static const struct mgmt_handler {
{ set_debug_keys, false, MGMT_SETTING_SIZE },
{ set_privacy, false, MGMT_SET_PRIVACY_SIZE },
{ load_irks, true, MGMT_LOAD_IRKS_SIZE },
+ { get_conn_info, false, MGMT_GET_CONN_INFO_SIZE },
};
@@ -5007,6 +5241,14 @@ void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key,
mgmt_event(MGMT_EV_NEW_LINK_KEY, hdev, &ev, sizeof(ev), NULL);
}
+static u8 mgmt_ltk_type(struct smp_ltk *ltk)
+{
+ if (ltk->authenticated)
+ return MGMT_LTK_AUTHENTICATED;
+
+ return MGMT_LTK_UNAUTHENTICATED;
+}
+
void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent)
{
struct mgmt_ev_new_long_term_key ev;
@@ -5032,7 +5274,7 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent)
bacpy(&ev.key.addr.bdaddr, &key->bdaddr);
ev.key.addr.type = link_to_bdaddr(LE_LINK, key->bdaddr_type);
- ev.key.type = key->authenticated;
+ ev.key.type = mgmt_ltk_type(key);
ev.key.enc_size = key->enc_size;
ev.key.ediv = key->ediv;
ev.key.rand = key->rand;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index cf620260aff..754b6fe4f74 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -307,7 +307,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio)
setup_timer(&d->timer, rfcomm_dlc_timeout, (unsigned long)d);
skb_queue_head_init(&d->tx_queue);
- spin_lock_init(&d->lock);
+ mutex_init(&d->lock);
atomic_set(&d->refcnt, 1);
rfcomm_dlc_clear_state(d);
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 403ec09f480..8e385a0ae60 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -70,7 +70,7 @@ struct rfcomm_dev {
};
static LIST_HEAD(rfcomm_dev_list);
-static DEFINE_SPINLOCK(rfcomm_dev_lock);
+static DEFINE_MUTEX(rfcomm_dev_lock);
static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb);
static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err);
@@ -96,9 +96,9 @@ static void rfcomm_dev_destruct(struct tty_port *port)
if (dev->tty_dev)
tty_unregister_device(rfcomm_tty_driver, dev->id);
- spin_lock(&rfcomm_dev_lock);
+ mutex_lock(&rfcomm_dev_lock);
list_del(&dev->list);
- spin_unlock(&rfcomm_dev_lock);
+ mutex_unlock(&rfcomm_dev_lock);
kfree(dev);
@@ -161,14 +161,14 @@ static struct rfcomm_dev *rfcomm_dev_get(int id)
{
struct rfcomm_dev *dev;
- spin_lock(&rfcomm_dev_lock);
+ mutex_lock(&rfcomm_dev_lock);
dev = __rfcomm_dev_lookup(id);
if (dev && !tty_port_get(&dev->port))
dev = NULL;
- spin_unlock(&rfcomm_dev_lock);
+ mutex_unlock(&rfcomm_dev_lock);
return dev;
}
@@ -224,7 +224,7 @@ static struct rfcomm_dev *__rfcomm_dev_add(struct rfcomm_dev_req *req,
if (!dev)
return ERR_PTR(-ENOMEM);
- spin_lock(&rfcomm_dev_lock);
+ mutex_lock(&rfcomm_dev_lock);
if (req->dev_id < 0) {
dev->id = 0;
@@ -305,11 +305,11 @@ static struct rfcomm_dev *__rfcomm_dev_add(struct rfcomm_dev_req *req,
holds reference to this module. */
__module_get(THIS_MODULE);
- spin_unlock(&rfcomm_dev_lock);
+ mutex_unlock(&rfcomm_dev_lock);
return dev;
out:
- spin_unlock(&rfcomm_dev_lock);
+ mutex_unlock(&rfcomm_dev_lock);
kfree(dev);
return ERR_PTR(err);
}
@@ -524,7 +524,7 @@ static int rfcomm_get_dev_list(void __user *arg)
di = dl->dev_info;
- spin_lock(&rfcomm_dev_lock);
+ mutex_lock(&rfcomm_dev_lock);
list_for_each_entry(dev, &rfcomm_dev_list, list) {
if (!tty_port_get(&dev->port))
@@ -540,7 +540,7 @@ static int rfcomm_get_dev_list(void __user *arg)
break;
}
- spin_unlock(&rfcomm_dev_lock);
+ mutex_unlock(&rfcomm_dev_lock);
dl->dev_num = n;
size = sizeof(*dl) + n * sizeof(*di);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index dfb4e1161c1..e33a982161c 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -35,6 +35,33 @@
#define AUTH_REQ_MASK 0x07
+#define SMP_FLAG_TK_VALID 1
+#define SMP_FLAG_CFM_PENDING 2
+#define SMP_FLAG_MITM_AUTH 3
+#define SMP_FLAG_COMPLETE 4
+#define SMP_FLAG_INITIATOR 5
+
+struct smp_chan {
+ struct l2cap_conn *conn;
+ u8 preq[7]; /* SMP Pairing Request */
+ u8 prsp[7]; /* SMP Pairing Response */
+ u8 prnd[16]; /* SMP Pairing Random (local) */
+ u8 rrnd[16]; /* SMP Pairing Random (remote) */
+ u8 pcnf[16]; /* SMP Pairing Confirm */
+ u8 tk[16]; /* SMP Temporary Key */
+ u8 enc_key_size;
+ u8 remote_key_dist;
+ bdaddr_t id_addr;
+ u8 id_addr_type;
+ u8 irk[16];
+ struct smp_csrk *csrk;
+ struct smp_csrk *slave_csrk;
+ struct smp_ltk *ltk;
+ struct smp_ltk *slave_ltk;
+ struct smp_irk *remote_irk;
+ unsigned long flags;
+};
+
static inline void swap128(const u8 src[16], u8 dst[16])
{
int i;
@@ -358,6 +385,16 @@ static const u8 gen_method[5][5] = {
{ CFM_PASSKEY, CFM_PASSKEY, REQ_PASSKEY, JUST_WORKS, OVERLAP },
};
+static u8 get_auth_method(struct smp_chan *smp, u8 local_io, u8 remote_io)
+{
+ /* If either side has unknown io_caps, use JUST WORKS */
+ if (local_io > SMP_IO_KEYBOARD_DISPLAY ||
+ remote_io > SMP_IO_KEYBOARD_DISPLAY)
+ return JUST_WORKS;
+
+ return gen_method[remote_io][local_io];
+}
+
static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
u8 local_io, u8 remote_io)
{
@@ -369,38 +406,34 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
/* Initialize key for JUST WORKS */
memset(smp->tk, 0, sizeof(smp->tk));
- clear_bit(SMP_FLAG_TK_VALID, &smp->smp_flags);
+ clear_bit(SMP_FLAG_TK_VALID, &smp->flags);
BT_DBG("tk_request: auth:%d lcl:%d rem:%d", auth, local_io, remote_io);
/* If neither side wants MITM, use JUST WORKS */
- /* If either side has unknown io_caps, use JUST WORKS */
/* Otherwise, look up method from the table */
- if (!(auth & SMP_AUTH_MITM) ||
- local_io > SMP_IO_KEYBOARD_DISPLAY ||
- remote_io > SMP_IO_KEYBOARD_DISPLAY)
+ if (!(auth & SMP_AUTH_MITM))
method = JUST_WORKS;
else
- method = gen_method[remote_io][local_io];
+ method = get_auth_method(smp, local_io, remote_io);
/* If not bonding, don't ask user to confirm a Zero TK */
if (!(auth & SMP_AUTH_BONDING) && method == JUST_CFM)
method = JUST_WORKS;
/* Don't confirm locally initiated pairing attempts */
- if (method == JUST_CFM && test_bit(SMP_FLAG_INITIATOR,
- &smp->smp_flags))
+ if (method == JUST_CFM && test_bit(SMP_FLAG_INITIATOR, &smp->flags))
method = JUST_WORKS;
/* If Just Works, Continue with Zero TK */
if (method == JUST_WORKS) {
- set_bit(SMP_FLAG_TK_VALID, &smp->smp_flags);
+ set_bit(SMP_FLAG_TK_VALID, &smp->flags);
return 0;
}
/* Not Just Works/Confirm results in MITM Authentication */
if (method != JUST_CFM)
- set_bit(SMP_FLAG_MITM_AUTH, &smp->smp_flags);
+ set_bit(SMP_FLAG_MITM_AUTH, &smp->flags);
/* If both devices have Keyoard-Display I/O, the master
* Confirms and the slave Enters the passkey.
@@ -419,7 +452,7 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
passkey %= 1000000;
put_unaligned_le32(passkey, smp->tk);
BT_DBG("PassKey: %d", passkey);
- set_bit(SMP_FLAG_TK_VALID, &smp->smp_flags);
+ set_bit(SMP_FLAG_TK_VALID, &smp->flags);
}
hci_dev_lock(hcon->hdev);
@@ -441,15 +474,13 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
return ret;
}
-static void confirm_work(struct work_struct *work)
+static u8 smp_confirm(struct smp_chan *smp)
{
- struct smp_chan *smp = container_of(work, struct smp_chan, confirm);
struct l2cap_conn *conn = smp->conn;
struct hci_dev *hdev = conn->hcon->hdev;
struct crypto_blkcipher *tfm = hdev->tfm_aes;
struct smp_cmd_pairing_confirm cp;
int ret;
- u8 reason;
BT_DBG("conn %p", conn);
@@ -463,35 +494,27 @@ static void confirm_work(struct work_struct *work)
hci_dev_unlock(hdev);
- if (ret) {
- reason = SMP_UNSPECIFIED;
- goto error;
- }
+ if (ret)
+ return SMP_UNSPECIFIED;
- clear_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags);
+ clear_bit(SMP_FLAG_CFM_PENDING, &smp->flags);
smp_send_cmd(smp->conn, SMP_CMD_PAIRING_CONFIRM, sizeof(cp), &cp);
- return;
-
-error:
- smp_failure(conn, reason);
+ return 0;
}
-static void random_work(struct work_struct *work)
+static u8 smp_random(struct smp_chan *smp)
{
- struct smp_chan *smp = container_of(work, struct smp_chan, random);
struct l2cap_conn *conn = smp->conn;
struct hci_conn *hcon = conn->hcon;
struct hci_dev *hdev = hcon->hdev;
struct crypto_blkcipher *tfm = hdev->tfm_aes;
- u8 reason, confirm[16];
+ u8 confirm[16];
int ret;
- if (IS_ERR_OR_NULL(tfm)) {
- reason = SMP_UNSPECIFIED;
- goto error;
- }
+ if (IS_ERR_OR_NULL(tfm))
+ return SMP_UNSPECIFIED;
BT_DBG("conn %p %s", conn, conn->hcon->out ? "master" : "slave");
@@ -504,15 +527,12 @@ static void random_work(struct work_struct *work)
hci_dev_unlock(hdev);
- if (ret) {
- reason = SMP_UNSPECIFIED;
- goto error;
- }
+ if (ret)
+ return SMP_UNSPECIFIED;
if (memcmp(smp->pcnf, confirm, sizeof(smp->pcnf)) != 0) {
BT_ERR("Pairing failed (confirmation values mismatch)");
- reason = SMP_CONFIRM_FAILED;
- goto error;
+ return SMP_CONFIRM_FAILED;
}
if (hcon->out) {
@@ -525,15 +545,13 @@ static void random_work(struct work_struct *work)
memset(stk + smp->enc_key_size, 0,
SMP_MAX_ENC_KEY_SIZE - smp->enc_key_size);
- if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags)) {
- reason = SMP_UNSPECIFIED;
- goto error;
- }
+ if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags))
+ return SMP_UNSPECIFIED;
hci_le_start_enc(hcon, ediv, rand, stk);
hcon->enc_key_size = smp->enc_key_size;
} else {
- u8 stk[16];
+ u8 stk[16], auth;
__le64 rand = 0;
__le16 ediv = 0;
@@ -545,15 +563,17 @@ static void random_work(struct work_struct *work)
memset(stk + smp->enc_key_size, 0,
SMP_MAX_ENC_KEY_SIZE - smp->enc_key_size);
+ if (hcon->pending_sec_level == BT_SECURITY_HIGH)
+ auth = 1;
+ else
+ auth = 0;
+
hci_add_ltk(hcon->hdev, &hcon->dst, hcon->dst_type,
- HCI_SMP_STK_SLAVE, 0, stk, smp->enc_key_size,
+ HCI_SMP_STK_SLAVE, auth, stk, smp->enc_key_size,
ediv, rand);
}
- return;
-
-error:
- smp_failure(conn, reason);
+ return 0;
}
static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
@@ -564,9 +584,6 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
if (!smp)
return NULL;
- INIT_WORK(&smp->confirm, confirm_work);
- INIT_WORK(&smp->random, random_work);
-
smp->conn = conn;
conn->smp_chan = smp;
conn->hcon->smp_conn = conn;
@@ -583,7 +600,7 @@ void smp_chan_destroy(struct l2cap_conn *conn)
BUG_ON(!smp);
- complete = test_bit(SMP_FLAG_COMPLETE, &smp->smp_flags);
+ complete = test_bit(SMP_FLAG_COMPLETE, &smp->flags);
mgmt_smp_complete(conn->hcon, complete);
kfree(smp->csrk);
@@ -634,7 +651,7 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey)
put_unaligned_le32(value, smp->tk);
/* Fall Through */
case MGMT_OP_USER_CONFIRM_REPLY:
- set_bit(SMP_FLAG_TK_VALID, &smp->smp_flags);
+ set_bit(SMP_FLAG_TK_VALID, &smp->flags);
break;
case MGMT_OP_USER_PASSKEY_NEG_REPLY:
case MGMT_OP_USER_CONFIRM_NEG_REPLY:
@@ -646,8 +663,11 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey)
}
/* If it is our turn to send Pairing Confirm, do so now */
- if (test_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags))
- queue_work(hcon->hdev->workqueue, &smp->confirm);
+ if (test_bit(SMP_FLAG_CFM_PENDING, &smp->flags)) {
+ u8 rsp = smp_confirm(smp);
+ if (rsp)
+ smp_failure(conn, rsp);
+ }
return 0;
}
@@ -656,14 +676,13 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
{
struct smp_cmd_pairing rsp, *req = (void *) skb->data;
struct smp_chan *smp;
- u8 key_size;
- u8 auth = SMP_AUTH_NONE;
+ u8 key_size, auth, sec_level;
int ret;
BT_DBG("conn %p", conn);
if (skb->len < sizeof(*req))
- return SMP_UNSPECIFIED;
+ return SMP_INVALID_PARAMS;
if (conn->hcon->link_mode & HCI_LM_MASTER)
return SMP_CMD_NOTSUPP;
@@ -681,10 +700,21 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
skb_pull(skb, sizeof(*req));
/* We didn't start the pairing, so match remote */
- if (req->auth_req & SMP_AUTH_BONDING)
- auth = req->auth_req;
+ auth = req->auth_req;
+
+ sec_level = authreq_to_seclevel(auth);
+ if (sec_level > conn->hcon->pending_sec_level)
+ conn->hcon->pending_sec_level = sec_level;
- conn->hcon->pending_sec_level = authreq_to_seclevel(auth);
+ /* If we need MITM check that it can be acheived */
+ if (conn->hcon->pending_sec_level >= BT_SECURITY_HIGH) {
+ u8 method;
+
+ method = get_auth_method(smp, conn->hcon->io_capability,
+ req->io_capability);
+ if (method == JUST_WORKS || method == JUST_CFM)
+ return SMP_AUTH_REQUIREMENTS;
+ }
build_pairing_cmd(conn, req, &rsp, auth);
@@ -704,7 +734,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
if (ret)
return SMP_UNSPECIFIED;
- clear_bit(SMP_FLAG_INITIATOR, &smp->smp_flags);
+ clear_bit(SMP_FLAG_INITIATOR, &smp->flags);
return 0;
}
@@ -713,14 +743,13 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb)
{
struct smp_cmd_pairing *req, *rsp = (void *) skb->data;
struct smp_chan *smp = conn->smp_chan;
- struct hci_dev *hdev = conn->hcon->hdev;
u8 key_size, auth = SMP_AUTH_NONE;
int ret;
BT_DBG("conn %p", conn);
if (skb->len < sizeof(*rsp))
- return SMP_UNSPECIFIED;
+ return SMP_INVALID_PARAMS;
if (!(conn->hcon->link_mode & HCI_LM_MASTER))
return SMP_CMD_NOTSUPP;
@@ -733,6 +762,16 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb)
if (check_enc_key_size(conn, key_size))
return SMP_ENC_KEY_SIZE;
+ /* If we need MITM check that it can be acheived */
+ if (conn->hcon->pending_sec_level >= BT_SECURITY_HIGH) {
+ u8 method;
+
+ method = get_auth_method(smp, req->io_capability,
+ rsp->io_capability);
+ if (method == JUST_WORKS || method == JUST_CFM)
+ return SMP_AUTH_REQUIREMENTS;
+ }
+
get_random_bytes(smp->prnd, sizeof(smp->prnd));
smp->prsp[0] = SMP_CMD_PAIRING_RSP;
@@ -753,11 +792,11 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb)
if (ret)
return SMP_UNSPECIFIED;
- set_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags);
+ set_bit(SMP_FLAG_CFM_PENDING, &smp->flags);
/* Can't compose response until we have been confirmed */
- if (test_bit(SMP_FLAG_TK_VALID, &smp->smp_flags))
- queue_work(hdev->workqueue, &smp->confirm);
+ if (test_bit(SMP_FLAG_TK_VALID, &smp->flags))
+ return smp_confirm(smp);
return 0;
}
@@ -765,12 +804,11 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb)
static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb)
{
struct smp_chan *smp = conn->smp_chan;
- struct hci_dev *hdev = conn->hcon->hdev;
BT_DBG("conn %p %s", conn, conn->hcon->out ? "master" : "slave");
if (skb->len < sizeof(smp->pcnf))
- return SMP_UNSPECIFIED;
+ return SMP_INVALID_PARAMS;
memcpy(smp->pcnf, skb->data, sizeof(smp->pcnf));
skb_pull(skb, sizeof(smp->pcnf));
@@ -778,10 +816,10 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb)
if (conn->hcon->out)
smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd),
smp->prnd);
- else if (test_bit(SMP_FLAG_TK_VALID, &smp->smp_flags))
- queue_work(hdev->workqueue, &smp->confirm);
+ else if (test_bit(SMP_FLAG_TK_VALID, &smp->flags))
+ return smp_confirm(smp);
else
- set_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags);
+ set_bit(SMP_FLAG_CFM_PENDING, &smp->flags);
return 0;
}
@@ -789,19 +827,16 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb)
static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb)
{
struct smp_chan *smp = conn->smp_chan;
- struct hci_dev *hdev = conn->hcon->hdev;
BT_DBG("conn %p", conn);
if (skb->len < sizeof(smp->rrnd))
- return SMP_UNSPECIFIED;
+ return SMP_INVALID_PARAMS;
memcpy(smp->rrnd, skb->data, sizeof(smp->rrnd));
skb_pull(skb, sizeof(smp->rrnd));
- queue_work(hdev->workqueue, &smp->random);
-
- return 0;
+ return smp_random(smp);
}
static u8 smp_ltk_encrypt(struct l2cap_conn *conn, u8 sec_level)
@@ -832,16 +867,19 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb)
struct smp_cmd_pairing cp;
struct hci_conn *hcon = conn->hcon;
struct smp_chan *smp;
+ u8 sec_level;
BT_DBG("conn %p", conn);
if (skb->len < sizeof(*rp))
- return SMP_UNSPECIFIED;
+ return SMP_INVALID_PARAMS;
if (!(conn->hcon->link_mode & HCI_LM_MASTER))
return SMP_CMD_NOTSUPP;
- hcon->pending_sec_level = authreq_to_seclevel(rp->auth_req);
+ sec_level = authreq_to_seclevel(rp->auth_req);
+ if (sec_level > hcon->pending_sec_level)
+ hcon->pending_sec_level = sec_level;
if (smp_ltk_encrypt(conn, hcon->pending_sec_level))
return 0;
@@ -861,7 +899,7 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb)
smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp);
- clear_bit(SMP_FLAG_INITIATOR, &smp->smp_flags);
+ clear_bit(SMP_FLAG_INITIATOR, &smp->flags);
return 0;
}
@@ -895,9 +933,12 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
if (smp_sufficient_security(hcon, sec_level))
return 1;
+ if (sec_level > hcon->pending_sec_level)
+ hcon->pending_sec_level = sec_level;
+
if (hcon->link_mode & HCI_LM_MASTER)
- if (smp_ltk_encrypt(conn, sec_level))
- goto done;
+ if (smp_ltk_encrypt(conn, hcon->pending_sec_level))
+ return 0;
if (test_and_set_bit(HCI_CONN_LE_SMP_PEND, &hcon->flags))
return 0;
@@ -908,10 +949,11 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
authreq = seclevel_to_authreq(sec_level);
- /* hcon->auth_type is set by pair_device in mgmt.c. If the MITM
- * flag is set we should also set it for the SMP request.
+ /* Require MITM if IO Capability allows or the security level
+ * requires it.
*/
- if ((hcon->auth_type & 0x01))
+ if (hcon->io_capability != HCI_IO_NO_INPUT_OUTPUT ||
+ hcon->pending_sec_level > BT_SECURITY_MEDIUM)
authreq |= SMP_AUTH_MITM;
if (hcon->link_mode & HCI_LM_MASTER) {
@@ -928,10 +970,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
smp_send_cmd(conn, SMP_CMD_SECURITY_REQ, sizeof(cp), &cp);
}
- set_bit(SMP_FLAG_INITIATOR, &smp->smp_flags);
-
-done:
- hcon->pending_sec_level = sec_level;
+ set_bit(SMP_FLAG_INITIATOR, &smp->flags);
return 0;
}
@@ -944,7 +983,7 @@ static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb)
BT_DBG("conn %p", conn);
if (skb->len < sizeof(*rp))
- return SMP_UNSPECIFIED;
+ return SMP_INVALID_PARAMS;
/* Ignore this PDU if it wasn't requested */
if (!(smp->remote_key_dist & SMP_DIST_ENC_KEY))
@@ -969,7 +1008,7 @@ static int smp_cmd_master_ident(struct l2cap_conn *conn, struct sk_buff *skb)
BT_DBG("conn %p", conn);
if (skb->len < sizeof(*rp))
- return SMP_UNSPECIFIED;
+ return SMP_INVALID_PARAMS;
/* Ignore this PDU if it wasn't requested */
if (!(smp->remote_key_dist & SMP_DIST_ENC_KEY))
@@ -1001,7 +1040,7 @@ static int smp_cmd_ident_info(struct l2cap_conn *conn, struct sk_buff *skb)
BT_DBG("");
if (skb->len < sizeof(*info))
- return SMP_UNSPECIFIED;
+ return SMP_INVALID_PARAMS;
/* Ignore this PDU if it wasn't requested */
if (!(smp->remote_key_dist & SMP_DIST_ID_KEY))
@@ -1025,7 +1064,7 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn,
BT_DBG("");
if (skb->len < sizeof(*info))
- return SMP_UNSPECIFIED;
+ return SMP_INVALID_PARAMS;
/* Ignore this PDU if it wasn't requested */
if (!(smp->remote_key_dist & SMP_DIST_ID_KEY))
@@ -1075,7 +1114,7 @@ static int smp_cmd_sign_info(struct l2cap_conn *conn, struct sk_buff *skb)
BT_DBG("conn %p", conn);
if (skb->len < sizeof(*rp))
- return SMP_UNSPECIFIED;
+ return SMP_INVALID_PARAMS;
/* Ignore this PDU if it wasn't requested */
if (!(smp->remote_key_dist & SMP_DIST_SIGN))
@@ -1358,7 +1397,7 @@ int smp_distribute_keys(struct l2cap_conn *conn)
clear_bit(HCI_CONN_LE_SMP_PEND, &hcon->flags);
cancel_delayed_work_sync(&conn->security_timer);
- set_bit(SMP_FLAG_COMPLETE, &smp->smp_flags);
+ set_bit(SMP_FLAG_COMPLETE, &smp->flags);
smp_notify_keys(conn);
smp_chan_destroy(conn);
diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h
index 1277147a915..5a8dc36460a 100644
--- a/net/bluetooth/smp.h
+++ b/net/bluetooth/smp.h
@@ -111,39 +111,11 @@ struct smp_cmd_security_req {
#define SMP_CMD_NOTSUPP 0x07
#define SMP_UNSPECIFIED 0x08
#define SMP_REPEATED_ATTEMPTS 0x09
+#define SMP_INVALID_PARAMS 0x0a
#define SMP_MIN_ENC_KEY_SIZE 7
#define SMP_MAX_ENC_KEY_SIZE 16
-#define SMP_FLAG_TK_VALID 1
-#define SMP_FLAG_CFM_PENDING 2
-#define SMP_FLAG_MITM_AUTH 3
-#define SMP_FLAG_COMPLETE 4
-#define SMP_FLAG_INITIATOR 5
-
-struct smp_chan {
- struct l2cap_conn *conn;
- u8 preq[7]; /* SMP Pairing Request */
- u8 prsp[7]; /* SMP Pairing Response */
- u8 prnd[16]; /* SMP Pairing Random (local) */
- u8 rrnd[16]; /* SMP Pairing Random (remote) */
- u8 pcnf[16]; /* SMP Pairing Confirm */
- u8 tk[16]; /* SMP Temporary Key */
- u8 enc_key_size;
- u8 remote_key_dist;
- bdaddr_t id_addr;
- u8 id_addr_type;
- u8 irk[16];
- struct smp_csrk *csrk;
- struct smp_csrk *slave_csrk;
- struct smp_ltk *ltk;
- struct smp_ltk *slave_ltk;
- struct smp_irk *remote_irk;
- unsigned long smp_flags;
- struct work_struct confirm;
- struct work_struct random;
-};
-
/* SMP Commands */
bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level);
int smp_conn_security(struct hci_conn *hcon, __u8 sec_level);
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index e85498b2f16..8590b942bff 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -5,7 +5,7 @@
obj-$(CONFIG_BRIDGE) += bridge.o
bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \
- br_ioctl.o br_notify.o br_stp.o br_stp_bpdu.o \
+ br_ioctl.o br_stp.o br_stp_bpdu.o \
br_stp_if.o br_stp_timer.o br_netlink.o
bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
@@ -16,4 +16,4 @@ bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o
-obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/
+obj-$(CONFIG_NETFILTER) += netfilter/
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 19311aafcf5..1a755a1e541 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -22,6 +22,104 @@
#include "br_private.h"
+/*
+ * Handle changes in state of network devices enslaved to a bridge.
+ *
+ * Note: don't care about up/down if bridge itself is down, because
+ * port state is checked when bridge is brought up.
+ */
+static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net_bridge_port *p;
+ struct net_bridge *br;
+ bool changed_addr;
+ int err;
+
+ /* register of bridge completed, add sysfs entries */
+ if ((dev->priv_flags & IFF_EBRIDGE) && event == NETDEV_REGISTER) {
+ br_sysfs_addbr(dev);
+ return NOTIFY_DONE;
+ }
+
+ /* not a port of a bridge */
+ p = br_port_get_rtnl(dev);
+ if (!p)
+ return NOTIFY_DONE;
+
+ br = p->br;
+
+ switch (event) {
+ case NETDEV_CHANGEMTU:
+ dev_set_mtu(br->dev, br_min_mtu(br));
+ break;
+
+ case NETDEV_CHANGEADDR:
+ spin_lock_bh(&br->lock);
+ br_fdb_changeaddr(p, dev->dev_addr);
+ changed_addr = br_stp_recalculate_bridge_id(br);
+ spin_unlock_bh(&br->lock);
+
+ if (changed_addr)
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
+
+ break;
+
+ case NETDEV_CHANGE:
+ br_port_carrier_check(p);
+ break;
+
+ case NETDEV_FEAT_CHANGE:
+ netdev_update_features(br->dev);
+ break;
+
+ case NETDEV_DOWN:
+ spin_lock_bh(&br->lock);
+ if (br->dev->flags & IFF_UP)
+ br_stp_disable_port(p);
+ spin_unlock_bh(&br->lock);
+ break;
+
+ case NETDEV_UP:
+ if (netif_running(br->dev) && netif_oper_up(dev)) {
+ spin_lock_bh(&br->lock);
+ br_stp_enable_port(p);
+ spin_unlock_bh(&br->lock);
+ }
+ break;
+
+ case NETDEV_UNREGISTER:
+ br_del_if(br, dev);
+ break;
+
+ case NETDEV_CHANGENAME:
+ err = br_sysfs_renameif(p);
+ if (err)
+ return notifier_from_errno(err);
+ break;
+
+ case NETDEV_PRE_TYPE_CHANGE:
+ /* Forbid underlaying device to change its type. */
+ return NOTIFY_BAD;
+
+ case NETDEV_RESEND_IGMP:
+ /* Propagate to master device */
+ call_netdevice_notifiers(event, br->dev);
+ break;
+ }
+
+ /* Events that may cause spanning tree to refresh */
+ if (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
+ event == NETDEV_CHANGE || event == NETDEV_DOWN)
+ br_ifinfo_notify(RTM_NEWLINK, p);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block br_device_notifier = {
+ .notifier_call = br_device_event
+};
+
static void __net_exit br_net_exit(struct net *net)
{
struct net_device *dev;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index d77e2f0ff0e..568cccd39a3 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -361,8 +361,9 @@ void br_dev_setup(struct net_device *dev)
dev->priv_flags = IFF_EBRIDGE;
dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL |
- NETIF_F_HW_VLAN_CTAG_TX;
- dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX;
+ NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
+ dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_STAG_TX;
dev->vlan_features = COMMON_FEATURES;
br->dev = dev;
@@ -377,6 +378,7 @@ void br_dev_setup(struct net_device *dev)
br->stp_enabled = BR_NO_STP;
br->group_fwd_mask = BR_GROUPFWD_DEFAULT;
+ br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT;
br->designated_root = br->bridge_id;
br->bridge_max_age = br->max_age = 20 * HZ;
@@ -387,4 +389,5 @@ void br_dev_setup(struct net_device *dev)
br_netfilter_rtable_init(br);
br_stp_timer_init(br);
br_multicast_init(br);
+ br_vlan_init(br);
}
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 648d0e84959..b524c36c127 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -538,6 +538,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
{
struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
struct net_bridge_fdb_entry *fdb;
+ bool fdb_modified = false;
/* some users want to always flood. */
if (hold_time(br) == 0)
@@ -558,10 +559,15 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
source->dev->name);
} else {
/* fastpath: update of existing entry */
- fdb->dst = source;
+ if (unlikely(source != fdb->dst)) {
+ fdb->dst = source;
+ fdb_modified = true;
+ }
fdb->updated = jiffies;
if (unlikely(added_by_user))
fdb->added_by_user = 1;
+ if (unlikely(fdb_modified))
+ fdb_notify(br, fdb, RTM_NEWNEIGH);
}
} else {
spin_lock(&br->hash_lock);
@@ -616,6 +622,8 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr))
goto nla_put_failure;
+ if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex))
+ goto nla_put_failure;
ci.ndm_used = jiffies_to_clock_t(now - fdb->used);
ci.ndm_confirmed = 0;
ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated);
@@ -637,6 +645,7 @@ static inline size_t fdb_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct ndmsg))
+ nla_total_size(ETH_ALEN) /* NDA_LLADDR */
+ + nla_total_size(sizeof(u32)) /* NDA_MASTER */
+ nla_total_size(sizeof(u16)) /* NDA_VLAN */
+ nla_total_size(sizeof(struct nda_cacheinfo));
}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 104a811dde5..3eca3fdf8fe 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -153,7 +153,8 @@ void br_manage_promisc(struct net_bridge *br)
* This lets us disable promiscuous mode and write
* this config to hw.
*/
- if (br->auto_cnt <= br_auto_port(p))
+ if (br->auto_cnt == 0 ||
+ (br->auto_cnt == 1 && br_auto_port(p)))
br_port_clear_promisc(p);
else
br_port_set_promisc(p);
@@ -528,7 +529,7 @@ err2:
kobject_put(&p->kobj);
p = NULL; /* kobject_put frees */
err1:
- dev_set_promiscuity(dev, -1);
+ dev_set_allmulti(dev, -1);
put_back:
dev_put(dev);
kfree(p);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 7985deaff52..366c4364907 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -147,8 +147,8 @@ static int br_handle_local_finish(struct sk_buff *skb)
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
u16 vid = 0;
- br_vlan_get_tag(skb, &vid);
- if (p->flags & BR_LEARNING)
+ /* check if vlan is allowed, to avoid spoofing */
+ if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid))
br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
return 0; /* process further */
}
@@ -177,6 +177,8 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
p = br_port_get_rcu(skb->dev);
if (unlikely(is_link_local_ether_addr(dest))) {
+ u16 fwd_mask = p->br->group_fwd_mask_required;
+
/*
* See IEEE 802.1D Table 7-10 Reserved addresses
*
@@ -194,7 +196,8 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
case 0x00: /* Bridge Group Address */
/* If STP is turned off,
then must forward to keep loop detection */
- if (p->br->stp_enabled == BR_NO_STP)
+ if (p->br->stp_enabled == BR_NO_STP ||
+ fwd_mask & (1u << dest[5]))
goto forward;
break;
@@ -203,7 +206,8 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
default:
/* Allow selective forwarding for most other protocols */
- if (p->br->group_fwd_mask & (1u << dest[5]))
+ fwd_mask |= p->br->group_fwd_mask;
+ if (fwd_mask & (1u << dest[5]))
goto forward;
}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index b7b1914dfa2..5df05269d17 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -418,13 +418,13 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
ip.proto = entry->addr.proto;
if (ip.proto == htons(ETH_P_IP)) {
- if (timer_pending(&br->ip4_querier.timer))
+ if (timer_pending(&br->ip4_other_query.timer))
return -EBUSY;
ip.u.ip4 = entry->addr.u.ip4;
#if IS_ENABLED(CONFIG_IPV6)
} else {
- if (timer_pending(&br->ip6_querier.timer))
+ if (timer_pending(&br->ip6_other_query.timer))
return -EBUSY;
ip.u.ip6 = entry->addr.u.ip6;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 7b757b5dc77..abfa0b65a11 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -11,6 +11,7 @@
*/
#include <linux/err.h>
+#include <linux/export.h>
#include <linux/if_ether.h>
#include <linux/igmp.h>
#include <linux/jhash.h>
@@ -35,7 +36,7 @@
#include "br_private.h"
static void br_multicast_start_querier(struct net_bridge *br,
- struct bridge_mcast_query *query);
+ struct bridge_mcast_own_query *query);
unsigned int br_mdb_rehash_seq;
static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
@@ -761,7 +762,7 @@ static void br_multicast_local_router_expired(unsigned long data)
}
static void br_multicast_querier_expired(struct net_bridge *br,
- struct bridge_mcast_query *query)
+ struct bridge_mcast_own_query *query)
{
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) || br->multicast_disabled)
@@ -777,7 +778,7 @@ static void br_ip4_multicast_querier_expired(unsigned long data)
{
struct net_bridge *br = (void *)data;
- br_multicast_querier_expired(br, &br->ip4_query);
+ br_multicast_querier_expired(br, &br->ip4_own_query);
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -785,10 +786,22 @@ static void br_ip6_multicast_querier_expired(unsigned long data)
{
struct net_bridge *br = (void *)data;
- br_multicast_querier_expired(br, &br->ip6_query);
+ br_multicast_querier_expired(br, &br->ip6_own_query);
}
#endif
+static void br_multicast_select_own_querier(struct net_bridge *br,
+ struct br_ip *ip,
+ struct sk_buff *skb)
+{
+ if (ip->proto == htons(ETH_P_IP))
+ br->ip4_querier.addr.u.ip4 = ip_hdr(skb)->saddr;
+#if IS_ENABLED(CONFIG_IPV6)
+ else
+ br->ip6_querier.addr.u.ip6 = ipv6_hdr(skb)->saddr;
+#endif
+}
+
static void __br_multicast_send_query(struct net_bridge *br,
struct net_bridge_port *port,
struct br_ip *ip)
@@ -804,17 +817,19 @@ static void __br_multicast_send_query(struct net_bridge *br,
skb->dev = port->dev;
NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
dev_queue_xmit);
- } else
+ } else {
+ br_multicast_select_own_querier(br, ip, skb);
netif_rx(skb);
+ }
}
static void br_multicast_send_query(struct net_bridge *br,
struct net_bridge_port *port,
- struct bridge_mcast_query *query)
+ struct bridge_mcast_own_query *own_query)
{
unsigned long time;
struct br_ip br_group;
- struct bridge_mcast_querier *querier = NULL;
+ struct bridge_mcast_other_query *other_query = NULL;
if (!netif_running(br->dev) || br->multicast_disabled ||
!br->multicast_querier)
@@ -822,31 +837,32 @@ static void br_multicast_send_query(struct net_bridge *br,
memset(&br_group.u, 0, sizeof(br_group.u));
- if (port ? (query == &port->ip4_query) :
- (query == &br->ip4_query)) {
- querier = &br->ip4_querier;
+ if (port ? (own_query == &port->ip4_own_query) :
+ (own_query == &br->ip4_own_query)) {
+ other_query = &br->ip4_other_query;
br_group.proto = htons(ETH_P_IP);
#if IS_ENABLED(CONFIG_IPV6)
} else {
- querier = &br->ip6_querier;
+ other_query = &br->ip6_other_query;
br_group.proto = htons(ETH_P_IPV6);
#endif
}
- if (!querier || timer_pending(&querier->timer))
+ if (!other_query || timer_pending(&other_query->timer))
return;
__br_multicast_send_query(br, port, &br_group);
time = jiffies;
- time += query->startup_sent < br->multicast_startup_query_count ?
+ time += own_query->startup_sent < br->multicast_startup_query_count ?
br->multicast_startup_query_interval :
br->multicast_query_interval;
- mod_timer(&query->timer, time);
+ mod_timer(&own_query->timer, time);
}
-static void br_multicast_port_query_expired(struct net_bridge_port *port,
- struct bridge_mcast_query *query)
+static void
+br_multicast_port_query_expired(struct net_bridge_port *port,
+ struct bridge_mcast_own_query *query)
{
struct net_bridge *br = port->br;
@@ -868,7 +884,7 @@ static void br_ip4_multicast_port_query_expired(unsigned long data)
{
struct net_bridge_port *port = (void *)data;
- br_multicast_port_query_expired(port, &port->ip4_query);
+ br_multicast_port_query_expired(port, &port->ip4_own_query);
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -876,7 +892,7 @@ static void br_ip6_multicast_port_query_expired(unsigned long data)
{
struct net_bridge_port *port = (void *)data;
- br_multicast_port_query_expired(port, &port->ip6_query);
+ br_multicast_port_query_expired(port, &port->ip6_own_query);
}
#endif
@@ -886,11 +902,11 @@ void br_multicast_add_port(struct net_bridge_port *port)
setup_timer(&port->multicast_router_timer, br_multicast_router_expired,
(unsigned long)port);
- setup_timer(&port->ip4_query.timer, br_ip4_multicast_port_query_expired,
- (unsigned long)port);
+ setup_timer(&port->ip4_own_query.timer,
+ br_ip4_multicast_port_query_expired, (unsigned long)port);
#if IS_ENABLED(CONFIG_IPV6)
- setup_timer(&port->ip6_query.timer, br_ip6_multicast_port_query_expired,
- (unsigned long)port);
+ setup_timer(&port->ip6_own_query.timer,
+ br_ip6_multicast_port_query_expired, (unsigned long)port);
#endif
}
@@ -899,7 +915,7 @@ void br_multicast_del_port(struct net_bridge_port *port)
del_timer_sync(&port->multicast_router_timer);
}
-static void br_multicast_enable(struct bridge_mcast_query *query)
+static void br_multicast_enable(struct bridge_mcast_own_query *query)
{
query->startup_sent = 0;
@@ -916,9 +932,9 @@ void br_multicast_enable_port(struct net_bridge_port *port)
if (br->multicast_disabled || !netif_running(br->dev))
goto out;
- br_multicast_enable(&port->ip4_query);
+ br_multicast_enable(&port->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
- br_multicast_enable(&port->ip6_query);
+ br_multicast_enable(&port->ip6_own_query);
#endif
out:
@@ -938,9 +954,9 @@ void br_multicast_disable_port(struct net_bridge_port *port)
if (!hlist_unhashed(&port->rlist))
hlist_del_init_rcu(&port->rlist);
del_timer(&port->multicast_router_timer);
- del_timer(&port->ip4_query.timer);
+ del_timer(&port->ip4_own_query.timer);
#if IS_ENABLED(CONFIG_IPV6)
- del_timer(&port->ip6_query.timer);
+ del_timer(&port->ip6_own_query.timer);
#endif
spin_unlock(&br->multicast_lock);
}
@@ -1064,15 +1080,80 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
}
#endif
+static bool br_ip4_multicast_select_querier(struct net_bridge *br,
+ struct net_bridge_port *port,
+ __be32 saddr)
+{
+ if (!timer_pending(&br->ip4_own_query.timer) &&
+ !timer_pending(&br->ip4_other_query.timer))
+ goto update;
+
+ if (!br->ip4_querier.addr.u.ip4)
+ goto update;
+
+ if (ntohl(saddr) <= ntohl(br->ip4_querier.addr.u.ip4))
+ goto update;
+
+ return false;
+
+update:
+ br->ip4_querier.addr.u.ip4 = saddr;
+
+ /* update protected by general multicast_lock by caller */
+ rcu_assign_pointer(br->ip4_querier.port, port);
+
+ return true;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static bool br_ip6_multicast_select_querier(struct net_bridge *br,
+ struct net_bridge_port *port,
+ struct in6_addr *saddr)
+{
+ if (!timer_pending(&br->ip6_own_query.timer) &&
+ !timer_pending(&br->ip6_other_query.timer))
+ goto update;
+
+ if (ipv6_addr_cmp(saddr, &br->ip6_querier.addr.u.ip6) <= 0)
+ goto update;
+
+ return false;
+
+update:
+ br->ip6_querier.addr.u.ip6 = *saddr;
+
+ /* update protected by general multicast_lock by caller */
+ rcu_assign_pointer(br->ip6_querier.port, port);
+
+ return true;
+}
+#endif
+
+static bool br_multicast_select_querier(struct net_bridge *br,
+ struct net_bridge_port *port,
+ struct br_ip *saddr)
+{
+ switch (saddr->proto) {
+ case htons(ETH_P_IP):
+ return br_ip4_multicast_select_querier(br, port, saddr->u.ip4);
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ return br_ip6_multicast_select_querier(br, port, &saddr->u.ip6);
+#endif
+ }
+
+ return false;
+}
+
static void
-br_multicast_update_querier_timer(struct net_bridge *br,
- struct bridge_mcast_querier *querier,
- unsigned long max_delay)
+br_multicast_update_query_timer(struct net_bridge *br,
+ struct bridge_mcast_other_query *query,
+ unsigned long max_delay)
{
- if (!timer_pending(&querier->timer))
- querier->delay_time = jiffies + max_delay;
+ if (!timer_pending(&query->timer))
+ query->delay_time = jiffies + max_delay;
- mod_timer(&querier->timer, jiffies + br->multicast_querier_interval);
+ mod_timer(&query->timer, jiffies + br->multicast_querier_interval);
}
/*
@@ -1125,16 +1206,14 @@ timer:
static void br_multicast_query_received(struct net_bridge *br,
struct net_bridge_port *port,
- struct bridge_mcast_querier *querier,
- int saddr,
- bool is_general_query,
+ struct bridge_mcast_other_query *query,
+ struct br_ip *saddr,
unsigned long max_delay)
{
- if (saddr && is_general_query)
- br_multicast_update_querier_timer(br, querier, max_delay);
- else if (timer_pending(&querier->timer))
+ if (!br_multicast_select_querier(br, port, saddr))
return;
+ br_multicast_update_query_timer(br, query, max_delay);
br_multicast_mark_router(br, port);
}
@@ -1149,6 +1228,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
struct igmpv3_query *ih3;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
+ struct br_ip saddr;
unsigned long max_delay;
unsigned long now = jiffies;
__be32 group;
@@ -1190,11 +1270,14 @@ static int br_ip4_multicast_query(struct net_bridge *br,
goto out;
}
- br_multicast_query_received(br, port, &br->ip4_querier, !!iph->saddr,
- !group, max_delay);
+ if (!group) {
+ saddr.proto = htons(ETH_P_IP);
+ saddr.u.ip4 = iph->saddr;
- if (!group)
+ br_multicast_query_received(br, port, &br->ip4_other_query,
+ &saddr, max_delay);
goto out;
+ }
mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group, vid);
if (!mp)
@@ -1234,6 +1317,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
struct mld2_query *mld2q;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
+ struct br_ip saddr;
unsigned long max_delay;
unsigned long now = jiffies;
const struct in6_addr *group = NULL;
@@ -1282,12 +1366,16 @@ static int br_ip6_multicast_query(struct net_bridge *br,
goto out;
}
- br_multicast_query_received(br, port, &br->ip6_querier,
- !ipv6_addr_any(&ip6h->saddr),
- is_general_query, max_delay);
+ if (is_general_query) {
+ saddr.proto = htons(ETH_P_IPV6);
+ saddr.u.ip6 = ip6h->saddr;
- if (!group)
+ br_multicast_query_received(br, port, &br->ip6_other_query,
+ &saddr, max_delay);
+ goto out;
+ } else if (!group) {
goto out;
+ }
mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group, vid);
if (!mp)
@@ -1315,11 +1403,12 @@ out:
}
#endif
-static void br_multicast_leave_group(struct net_bridge *br,
- struct net_bridge_port *port,
- struct br_ip *group,
- struct bridge_mcast_querier *querier,
- struct bridge_mcast_query *query)
+static void
+br_multicast_leave_group(struct net_bridge *br,
+ struct net_bridge_port *port,
+ struct br_ip *group,
+ struct bridge_mcast_other_query *other_query,
+ struct bridge_mcast_own_query *own_query)
{
struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
@@ -1330,7 +1419,7 @@ static void br_multicast_leave_group(struct net_bridge *br,
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) ||
(port && port->state == BR_STATE_DISABLED) ||
- timer_pending(&querier->timer))
+ timer_pending(&other_query->timer))
goto out;
mdb = mlock_dereference(br->mdb, br);
@@ -1344,7 +1433,7 @@ static void br_multicast_leave_group(struct net_bridge *br,
time = jiffies + br->multicast_last_member_count *
br->multicast_last_member_interval;
- mod_timer(&query->timer, time);
+ mod_timer(&own_query->timer, time);
for (p = mlock_dereference(mp->ports, br);
p != NULL;
@@ -1425,17 +1514,19 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
__u16 vid)
{
struct br_ip br_group;
- struct bridge_mcast_query *query = port ? &port->ip4_query :
- &br->ip4_query;
+ struct bridge_mcast_own_query *own_query;
if (ipv4_is_local_multicast(group))
return;
+ own_query = port ? &port->ip4_own_query : &br->ip4_own_query;
+
br_group.u.ip4 = group;
br_group.proto = htons(ETH_P_IP);
br_group.vid = vid;
- br_multicast_leave_group(br, port, &br_group, &br->ip4_querier, query);
+ br_multicast_leave_group(br, port, &br_group, &br->ip4_other_query,
+ own_query);
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -1445,18 +1536,19 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
__u16 vid)
{
struct br_ip br_group;
- struct bridge_mcast_query *query = port ? &port->ip6_query :
- &br->ip6_query;
-
+ struct bridge_mcast_own_query *own_query;
if (ipv6_addr_is_ll_all_nodes(group))
return;
+ own_query = port ? &port->ip6_own_query : &br->ip6_own_query;
+
br_group.u.ip6 = *group;
br_group.proto = htons(ETH_P_IPV6);
br_group.vid = vid;
- br_multicast_leave_group(br, port, &br_group, &br->ip6_querier, query);
+ br_multicast_leave_group(br, port, &br_group, &br->ip6_other_query,
+ own_query);
}
#endif
@@ -1723,12 +1815,14 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
}
static void br_multicast_query_expired(struct net_bridge *br,
- struct bridge_mcast_query *query)
+ struct bridge_mcast_own_query *query,
+ struct bridge_mcast_querier *querier)
{
spin_lock(&br->multicast_lock);
if (query->startup_sent < br->multicast_startup_query_count)
query->startup_sent++;
+ rcu_assign_pointer(querier, NULL);
br_multicast_send_query(br, NULL, query);
spin_unlock(&br->multicast_lock);
}
@@ -1737,7 +1831,7 @@ static void br_ip4_multicast_query_expired(unsigned long data)
{
struct net_bridge *br = (void *)data;
- br_multicast_query_expired(br, &br->ip4_query);
+ br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier);
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -1745,7 +1839,7 @@ static void br_ip6_multicast_query_expired(unsigned long data)
{
struct net_bridge *br = (void *)data;
- br_multicast_query_expired(br, &br->ip6_query);
+ br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier);
}
#endif
@@ -1767,28 +1861,30 @@ void br_multicast_init(struct net_bridge *br)
br->multicast_querier_interval = 255 * HZ;
br->multicast_membership_interval = 260 * HZ;
- br->ip4_querier.delay_time = 0;
+ br->ip4_other_query.delay_time = 0;
+ br->ip4_querier.port = NULL;
#if IS_ENABLED(CONFIG_IPV6)
- br->ip6_querier.delay_time = 0;
+ br->ip6_other_query.delay_time = 0;
+ br->ip6_querier.port = NULL;
#endif
spin_lock_init(&br->multicast_lock);
setup_timer(&br->multicast_router_timer,
br_multicast_local_router_expired, 0);
- setup_timer(&br->ip4_querier.timer, br_ip4_multicast_querier_expired,
- (unsigned long)br);
- setup_timer(&br->ip4_query.timer, br_ip4_multicast_query_expired,
+ setup_timer(&br->ip4_other_query.timer,
+ br_ip4_multicast_querier_expired, (unsigned long)br);
+ setup_timer(&br->ip4_own_query.timer, br_ip4_multicast_query_expired,
(unsigned long)br);
#if IS_ENABLED(CONFIG_IPV6)
- setup_timer(&br->ip6_querier.timer, br_ip6_multicast_querier_expired,
- (unsigned long)br);
- setup_timer(&br->ip6_query.timer, br_ip6_multicast_query_expired,
+ setup_timer(&br->ip6_other_query.timer,
+ br_ip6_multicast_querier_expired, (unsigned long)br);
+ setup_timer(&br->ip6_own_query.timer, br_ip6_multicast_query_expired,
(unsigned long)br);
#endif
}
static void __br_multicast_open(struct net_bridge *br,
- struct bridge_mcast_query *query)
+ struct bridge_mcast_own_query *query)
{
query->startup_sent = 0;
@@ -1800,9 +1896,9 @@ static void __br_multicast_open(struct net_bridge *br,
void br_multicast_open(struct net_bridge *br)
{
- __br_multicast_open(br, &br->ip4_query);
+ __br_multicast_open(br, &br->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
- __br_multicast_open(br, &br->ip6_query);
+ __br_multicast_open(br, &br->ip6_own_query);
#endif
}
@@ -1815,11 +1911,11 @@ void br_multicast_stop(struct net_bridge *br)
int i;
del_timer_sync(&br->multicast_router_timer);
- del_timer_sync(&br->ip4_querier.timer);
- del_timer_sync(&br->ip4_query.timer);
+ del_timer_sync(&br->ip4_other_query.timer);
+ del_timer_sync(&br->ip4_own_query.timer);
#if IS_ENABLED(CONFIG_IPV6)
- del_timer_sync(&br->ip6_querier.timer);
- del_timer_sync(&br->ip6_query.timer);
+ del_timer_sync(&br->ip6_other_query.timer);
+ del_timer_sync(&br->ip6_own_query.timer);
#endif
spin_lock_bh(&br->multicast_lock);
@@ -1923,7 +2019,7 @@ unlock:
}
static void br_multicast_start_querier(struct net_bridge *br,
- struct bridge_mcast_query *query)
+ struct bridge_mcast_own_query *query)
{
struct net_bridge_port *port;
@@ -1934,11 +2030,11 @@ static void br_multicast_start_querier(struct net_bridge *br,
port->state == BR_STATE_BLOCKING)
continue;
- if (query == &br->ip4_query)
- br_multicast_enable(&port->ip4_query);
+ if (query == &br->ip4_own_query)
+ br_multicast_enable(&port->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
else
- br_multicast_enable(&port->ip6_query);
+ br_multicast_enable(&port->ip6_own_query);
#endif
}
}
@@ -1974,9 +2070,9 @@ rollback:
goto rollback;
}
- br_multicast_start_querier(br, &br->ip4_query);
+ br_multicast_start_querier(br, &br->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
- br_multicast_start_querier(br, &br->ip6_query);
+ br_multicast_start_querier(br, &br->ip6_own_query);
#endif
unlock:
@@ -2001,16 +2097,16 @@ int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
max_delay = br->multicast_query_response_interval;
- if (!timer_pending(&br->ip4_querier.timer))
- br->ip4_querier.delay_time = jiffies + max_delay;
+ if (!timer_pending(&br->ip4_other_query.timer))
+ br->ip4_other_query.delay_time = jiffies + max_delay;
- br_multicast_start_querier(br, &br->ip4_query);
+ br_multicast_start_querier(br, &br->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
- if (!timer_pending(&br->ip6_querier.timer))
- br->ip6_querier.delay_time = jiffies + max_delay;
+ if (!timer_pending(&br->ip6_other_query.timer))
+ br->ip6_other_query.delay_time = jiffies + max_delay;
- br_multicast_start_querier(br, &br->ip6_query);
+ br_multicast_start_querier(br, &br->ip6_own_query);
#endif
unlock:
@@ -2061,3 +2157,109 @@ unlock:
return err;
}
+
+/**
+ * br_multicast_list_adjacent - Returns snooped multicast addresses
+ * @dev: The bridge port adjacent to which to retrieve addresses
+ * @br_ip_list: The list to store found, snooped multicast IP addresses in
+ *
+ * Creates a list of IP addresses (struct br_ip_list) sensed by the multicast
+ * snooping feature on all bridge ports of dev's bridge device, excluding
+ * the addresses from dev itself.
+ *
+ * Returns the number of items added to br_ip_list.
+ *
+ * Notes:
+ * - br_ip_list needs to be initialized by caller
+ * - br_ip_list might contain duplicates in the end
+ * (needs to be taken care of by caller)
+ * - br_ip_list needs to be freed by caller
+ */
+int br_multicast_list_adjacent(struct net_device *dev,
+ struct list_head *br_ip_list)
+{
+ struct net_bridge *br;
+ struct net_bridge_port *port;
+ struct net_bridge_port_group *group;
+ struct br_ip_list *entry;
+ int count = 0;
+
+ rcu_read_lock();
+ if (!br_ip_list || !br_port_exists(dev))
+ goto unlock;
+
+ port = br_port_get_rcu(dev);
+ if (!port || !port->br)
+ goto unlock;
+
+ br = port->br;
+
+ list_for_each_entry_rcu(port, &br->port_list, list) {
+ if (!port->dev || port->dev == dev)
+ continue;
+
+ hlist_for_each_entry_rcu(group, &port->mglist, mglist) {
+ entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+ if (!entry)
+ goto unlock;
+
+ entry->addr = group->addr;
+ list_add(&entry->list, br_ip_list);
+ count++;
+ }
+ }
+
+unlock:
+ rcu_read_unlock();
+ return count;
+}
+EXPORT_SYMBOL_GPL(br_multicast_list_adjacent);
+
+/**
+ * br_multicast_has_querier_adjacent - Checks for a querier behind a bridge port
+ * @dev: The bridge port adjacent to which to check for a querier
+ * @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6
+ *
+ * Checks whether the given interface has a bridge on top and if so returns
+ * true if a selected querier is behind one of the other ports of this
+ * bridge. Otherwise returns false.
+ */
+bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto)
+{
+ struct net_bridge *br;
+ struct net_bridge_port *port;
+ bool ret = false;
+
+ rcu_read_lock();
+ if (!br_port_exists(dev))
+ goto unlock;
+
+ port = br_port_get_rcu(dev);
+ if (!port || !port->br)
+ goto unlock;
+
+ br = port->br;
+
+ switch (proto) {
+ case ETH_P_IP:
+ if (!timer_pending(&br->ip4_other_query.timer) ||
+ rcu_dereference(br->ip4_querier.port) == port)
+ goto unlock;
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case ETH_P_IPV6:
+ if (!timer_pending(&br->ip6_other_query.timer) ||
+ rcu_dereference(br->ip6_querier.port) == port)
+ goto unlock;
+ break;
+#endif
+ default:
+ goto unlock;
+ }
+
+ ret = true;
+unlock:
+ rcu_read_unlock();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent);
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
deleted file mode 100644
index 2998dd1769a..00000000000
--- a/net/bridge/br_notify.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Device event handling
- * Linux ethernet bridge
- *
- * Authors:
- * Lennert Buytenhek <buytenh@gnu.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/rtnetlink.h>
-#include <net/net_namespace.h>
-
-#include "br_private.h"
-
-static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr);
-
-struct notifier_block br_device_notifier = {
- .notifier_call = br_device_event
-};
-
-/*
- * Handle changes in state of network devices enslaved to a bridge.
- *
- * Note: don't care about up/down if bridge itself is down, because
- * port state is checked when bridge is brought up.
- */
-static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct net_bridge_port *p;
- struct net_bridge *br;
- bool changed_addr;
- int err;
-
- /* register of bridge completed, add sysfs entries */
- if ((dev->priv_flags & IFF_EBRIDGE) && event == NETDEV_REGISTER) {
- br_sysfs_addbr(dev);
- return NOTIFY_DONE;
- }
-
- /* not a port of a bridge */
- p = br_port_get_rtnl(dev);
- if (!p)
- return NOTIFY_DONE;
-
- br = p->br;
-
- switch (event) {
- case NETDEV_CHANGEMTU:
- dev_set_mtu(br->dev, br_min_mtu(br));
- break;
-
- case NETDEV_CHANGEADDR:
- spin_lock_bh(&br->lock);
- br_fdb_changeaddr(p, dev->dev_addr);
- changed_addr = br_stp_recalculate_bridge_id(br);
- spin_unlock_bh(&br->lock);
-
- if (changed_addr)
- call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
-
- break;
-
- case NETDEV_CHANGE:
- br_port_carrier_check(p);
- break;
-
- case NETDEV_FEAT_CHANGE:
- netdev_update_features(br->dev);
- break;
-
- case NETDEV_DOWN:
- spin_lock_bh(&br->lock);
- if (br->dev->flags & IFF_UP)
- br_stp_disable_port(p);
- spin_unlock_bh(&br->lock);
- break;
-
- case NETDEV_UP:
- if (netif_running(br->dev) && netif_oper_up(dev)) {
- spin_lock_bh(&br->lock);
- br_stp_enable_port(p);
- spin_unlock_bh(&br->lock);
- }
- break;
-
- case NETDEV_UNREGISTER:
- br_del_if(br, dev);
- break;
-
- case NETDEV_CHANGENAME:
- err = br_sysfs_renameif(p);
- if (err)
- return notifier_from_errno(err);
- break;
-
- case NETDEV_PRE_TYPE_CHANGE:
- /* Forbid underlaying device to change its type. */
- return NOTIFY_BAD;
-
- case NETDEV_RESEND_IGMP:
- /* Propagate to master device */
- call_netdevice_notifiers(event, br->dev);
- break;
- }
-
- /* Events that may cause spanning tree to refresh */
- if (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
- event == NETDEV_CHANGE || event == NETDEV_DOWN)
- br_ifinfo_notify(RTM_NEWLINK, p);
-
- return NOTIFY_DONE;
-}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index af067711574..23caf5b0309 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -35,6 +35,8 @@
#define BR_GROUPFWD_DEFAULT 0
/* Don't allow forwarding control protocols like STP and LLDP */
#define BR_GROUPFWD_RESTRICTED 0x4007u
+/* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */
+#define BR_GROUPFWD_8021AD 0xB801u
/* Path to usermode spanning tree program */
#define BR_STP_PROG "/sbin/bridge-stp"
@@ -54,30 +56,24 @@ struct mac_addr
unsigned char addr[ETH_ALEN];
};
-struct br_ip
-{
- union {
- __be32 ip4;
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr ip6;
-#endif
- } u;
- __be16 proto;
- __u16 vid;
-};
-
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
/* our own querier */
-struct bridge_mcast_query {
+struct bridge_mcast_own_query {
struct timer_list timer;
u32 startup_sent;
};
/* other querier */
-struct bridge_mcast_querier {
+struct bridge_mcast_other_query {
struct timer_list timer;
unsigned long delay_time;
};
+
+/* selected querier */
+struct bridge_mcast_querier {
+ struct br_ip addr;
+ struct net_bridge_port __rcu *port;
+};
#endif
struct net_port_vlans {
@@ -178,9 +174,9 @@ struct net_bridge_port
#define BR_PROMISC 0x00000080
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
- struct bridge_mcast_query ip4_query;
+ struct bridge_mcast_own_query ip4_own_query;
#if IS_ENABLED(CONFIG_IPV6)
- struct bridge_mcast_query ip6_query;
+ struct bridge_mcast_own_query ip6_own_query;
#endif /* IS_ENABLED(CONFIG_IPV6) */
unsigned char multicast_router;
struct timer_list multicast_router_timer;
@@ -232,6 +228,7 @@ struct net_bridge
bool nf_call_arptables;
#endif
u16 group_fwd_mask;
+ u16 group_fwd_mask_required;
/* STP */
bridge_id designated_root;
@@ -246,6 +243,7 @@ struct net_bridge
unsigned long bridge_forward_delay;
u8 group_addr[ETH_ALEN];
+ bool group_addr_set;
u16 root_port;
enum {
@@ -282,11 +280,13 @@ struct net_bridge
struct hlist_head router_list;
struct timer_list multicast_router_timer;
+ struct bridge_mcast_other_query ip4_other_query;
+ struct bridge_mcast_own_query ip4_own_query;
struct bridge_mcast_querier ip4_querier;
- struct bridge_mcast_query ip4_query;
#if IS_ENABLED(CONFIG_IPV6)
+ struct bridge_mcast_other_query ip6_other_query;
+ struct bridge_mcast_own_query ip6_own_query;
struct bridge_mcast_querier ip6_querier;
- struct bridge_mcast_query ip6_query;
#endif /* IS_ENABLED(CONFIG_IPV6) */
#endif
@@ -298,6 +298,7 @@ struct net_bridge
u32 auto_cnt;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
u8 vlan_enabled;
+ __be16 vlan_proto;
struct net_port_vlans __rcu *vlan_info;
#endif
};
@@ -333,8 +334,6 @@ struct br_input_skb_cb {
#define br_debug(br, format, args...) \
pr_debug("%s: " format, (br)->dev->name, ##args)
-extern struct notifier_block br_device_notifier;
-
/* called under bridge lock */
static inline int br_is_root_bridge(const struct net_bridge *br)
{
@@ -495,7 +494,7 @@ static inline bool br_multicast_is_router(struct net_bridge *br)
static inline bool
__br_multicast_querier_exists(struct net_bridge *br,
- struct bridge_mcast_querier *querier)
+ struct bridge_mcast_other_query *querier)
{
return time_is_before_jiffies(querier->delay_time) &&
(br->multicast_querier || timer_pending(&querier->timer));
@@ -506,10 +505,10 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br,
{
switch (eth->h_proto) {
case (htons(ETH_P_IP)):
- return __br_multicast_querier_exists(br, &br->ip4_querier);
+ return __br_multicast_querier_exists(br, &br->ip4_other_query);
#if IS_ENABLED(CONFIG_IPV6)
case (htons(ETH_P_IPV6)):
- return __br_multicast_querier_exists(br, &br->ip6_querier);
+ return __br_multicast_querier_exists(br, &br->ip6_other_query);
#endif
default:
return false;
@@ -591,6 +590,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
struct sk_buff *skb, u16 *vid);
bool br_allowed_egress(struct net_bridge *br, const struct net_port_vlans *v,
const struct sk_buff *skb);
+bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid);
struct sk_buff *br_handle_vlan(struct net_bridge *br,
const struct net_port_vlans *v,
struct sk_buff *skb);
@@ -598,7 +598,10 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags);
int br_vlan_delete(struct net_bridge *br, u16 vid);
void br_vlan_flush(struct net_bridge *br);
bool br_vlan_find(struct net_bridge *br, u16 vid);
+void br_recalculate_fwd_mask(struct net_bridge *br);
int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
+int br_vlan_set_proto(struct net_bridge *br, unsigned long val);
+void br_vlan_init(struct net_bridge *br);
int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags);
int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
void nbp_vlan_flush(struct net_bridge_port *port);
@@ -662,6 +665,12 @@ static inline bool br_allowed_egress(struct net_bridge *br,
return true;
}
+static inline bool br_should_learn(struct net_bridge_port *p,
+ struct sk_buff *skb, u16 *vid)
+{
+ return true;
+}
+
static inline struct sk_buff *br_handle_vlan(struct net_bridge *br,
const struct net_port_vlans *v,
struct sk_buff *skb)
@@ -688,6 +697,14 @@ static inline bool br_vlan_find(struct net_bridge *br, u16 vid)
return false;
}
+static inline void br_recalculate_fwd_mask(struct net_bridge *br)
+{
+}
+
+static inline void br_vlan_init(struct net_bridge *br)
+{
+}
+
static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
{
return -EOPNOTSUPP;
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 8dac65552f1..c9e2572b15f 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -312,10 +312,19 @@ static ssize_t group_addr_store(struct device *d,
new_addr[5] == 3) /* 802.1X PAE address */
return -EINVAL;
+ if (!rtnl_trylock())
+ return restart_syscall();
+
spin_lock_bh(&br->lock);
for (i = 0; i < 6; i++)
br->group_addr[i] = new_addr[i];
spin_unlock_bh(&br->lock);
+
+ br->group_addr_set = true;
+ br_recalculate_fwd_mask(br);
+
+ rtnl_unlock();
+
return len;
}
@@ -700,6 +709,22 @@ static ssize_t vlan_filtering_store(struct device *d,
return store_bridge_parm(d, buf, len, br_vlan_filter_toggle);
}
static DEVICE_ATTR_RW(vlan_filtering);
+
+static ssize_t vlan_protocol_show(struct device *d,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_bridge *br = to_bridge(d);
+ return sprintf(buf, "%#06x\n", ntohs(br->vlan_proto));
+}
+
+static ssize_t vlan_protocol_store(struct device *d,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ return store_bridge_parm(d, buf, len, br_vlan_set_proto);
+}
+static DEVICE_ATTR_RW(vlan_protocol);
#endif
static struct attribute *bridge_attrs[] = {
@@ -745,6 +770,7 @@ static struct attribute *bridge_attrs[] = {
#endif
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
&dev_attr_vlan_filtering.attr,
+ &dev_attr_vlan_protocol.attr,
#endif
NULL
};
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 24c5cc55589..2b2774fe070 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -60,7 +60,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
* that ever changes this code will allow tagged
* traffic to enter the bridge.
*/
- err = vlan_vid_add(dev, htons(ETH_P_8021Q), vid);
+ err = vlan_vid_add(dev, br->vlan_proto, vid);
if (err)
return err;
}
@@ -80,7 +80,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
out_filt:
if (p)
- vlan_vid_del(dev, htons(ETH_P_8021Q), vid);
+ vlan_vid_del(dev, br->vlan_proto, vid);
return err;
}
@@ -92,8 +92,10 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid)
__vlan_delete_pvid(v, vid);
clear_bit(vid, v->untagged_bitmap);
- if (v->port_idx)
- vlan_vid_del(v->parent.port->dev, htons(ETH_P_8021Q), vid);
+ if (v->port_idx) {
+ struct net_bridge_port *p = v->parent.port;
+ vlan_vid_del(p->dev, p->br->vlan_proto, vid);
+ }
clear_bit(vid, v->vlan_bitmap);
v->num_vlans--;
@@ -158,7 +160,8 @@ out:
bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
struct sk_buff *skb, u16 *vid)
{
- int err;
+ bool tagged;
+ __be16 proto;
/* If VLAN filtering is disabled on the bridge, all packets are
* permitted.
@@ -172,19 +175,41 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
if (!v)
goto drop;
+ proto = br->vlan_proto;
+
/* If vlan tx offload is disabled on bridge device and frame was
* sent from vlan device on the bridge device, it does not have
* HW accelerated vlan tag.
*/
if (unlikely(!vlan_tx_tag_present(skb) &&
- (skb->protocol == htons(ETH_P_8021Q) ||
- skb->protocol == htons(ETH_P_8021AD)))) {
+ skb->protocol == proto)) {
skb = vlan_untag(skb);
if (unlikely(!skb))
return false;
}
- err = br_vlan_get_tag(skb, vid);
+ if (!br_vlan_get_tag(skb, vid)) {
+ /* Tagged frame */
+ if (skb->vlan_proto != proto) {
+ /* Protocol-mismatch, empty out vlan_tci for new tag */
+ skb_push(skb, ETH_HLEN);
+ skb = __vlan_put_tag(skb, skb->vlan_proto,
+ vlan_tx_tag_get(skb));
+ if (unlikely(!skb))
+ return false;
+
+ skb_pull(skb, ETH_HLEN);
+ skb_reset_mac_len(skb);
+ *vid = 0;
+ tagged = false;
+ } else {
+ tagged = true;
+ }
+ } else {
+ /* Untagged frame */
+ tagged = false;
+ }
+
if (!*vid) {
u16 pvid = br_get_pvid(v);
@@ -199,9 +224,9 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
* ingress frame is considered to belong to this vlan.
*/
*vid = pvid;
- if (likely(err))
+ if (likely(!tagged))
/* Untagged Frame. */
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid);
+ __vlan_hwaccel_put_tag(skb, proto, pvid);
else
/* Priority-tagged Frame.
* At this point, We know that skb->vlan_tci had
@@ -241,6 +266,36 @@ bool br_allowed_egress(struct net_bridge *br,
return false;
}
+/* Called under RCU */
+bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
+{
+ struct net_bridge *br = p->br;
+ struct net_port_vlans *v;
+
+ if (!br->vlan_enabled)
+ return true;
+
+ v = rcu_dereference(p->vlan_info);
+ if (!v)
+ return false;
+
+ if (!br_vlan_get_tag(skb, vid) && skb->vlan_proto != br->vlan_proto)
+ *vid = 0;
+
+ if (!*vid) {
+ *vid = br_get_pvid(v);
+ if (*vid == VLAN_N_VID)
+ return false;
+
+ return true;
+ }
+
+ if (test_bit(*vid, v->vlan_bitmap))
+ return true;
+
+ return false;
+}
+
/* Must be protected by RTNL.
* Must be called with vid in range from 1 to 4094 inclusive.
*/
@@ -323,6 +378,33 @@ out:
return found;
}
+/* Must be protected by RTNL. */
+static void recalculate_group_addr(struct net_bridge *br)
+{
+ if (br->group_addr_set)
+ return;
+
+ spin_lock_bh(&br->lock);
+ if (!br->vlan_enabled || br->vlan_proto == htons(ETH_P_8021Q)) {
+ /* Bridge Group Address */
+ br->group_addr[5] = 0x00;
+ } else { /* vlan_enabled && ETH_P_8021AD */
+ /* Provider Bridge Group Address */
+ br->group_addr[5] = 0x08;
+ }
+ spin_unlock_bh(&br->lock);
+}
+
+/* Must be protected by RTNL. */
+void br_recalculate_fwd_mask(struct net_bridge *br)
+{
+ if (!br->vlan_enabled || br->vlan_proto == htons(ETH_P_8021Q))
+ br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT;
+ else /* vlan_enabled && ETH_P_8021AD */
+ br->group_fwd_mask_required = BR_GROUPFWD_8021AD &
+ ~(1u << br->group_addr[5]);
+}
+
int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
{
if (!rtnl_trylock())
@@ -333,12 +415,87 @@ int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
br->vlan_enabled = val;
br_manage_promisc(br);
+ recalculate_group_addr(br);
+ br_recalculate_fwd_mask(br);
unlock:
rtnl_unlock();
return 0;
}
+int br_vlan_set_proto(struct net_bridge *br, unsigned long val)
+{
+ int err = 0;
+ struct net_bridge_port *p;
+ struct net_port_vlans *pv;
+ __be16 proto, oldproto;
+ u16 vid, errvid;
+
+ if (val != ETH_P_8021Q && val != ETH_P_8021AD)
+ return -EPROTONOSUPPORT;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ proto = htons(val);
+ if (br->vlan_proto == proto)
+ goto unlock;
+
+ /* Add VLANs for the new proto to the device filter. */
+ list_for_each_entry(p, &br->port_list, list) {
+ pv = rtnl_dereference(p->vlan_info);
+ if (!pv)
+ continue;
+
+ for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
+ err = vlan_vid_add(p->dev, proto, vid);
+ if (err)
+ goto err_filt;
+ }
+ }
+
+ oldproto = br->vlan_proto;
+ br->vlan_proto = proto;
+
+ recalculate_group_addr(br);
+ br_recalculate_fwd_mask(br);
+
+ /* Delete VLANs for the old proto from the device filter. */
+ list_for_each_entry(p, &br->port_list, list) {
+ pv = rtnl_dereference(p->vlan_info);
+ if (!pv)
+ continue;
+
+ for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
+ vlan_vid_del(p->dev, oldproto, vid);
+ }
+
+unlock:
+ rtnl_unlock();
+ return err;
+
+err_filt:
+ errvid = vid;
+ for_each_set_bit(vid, pv->vlan_bitmap, errvid)
+ vlan_vid_del(p->dev, proto, vid);
+
+ list_for_each_entry_continue_reverse(p, &br->port_list, list) {
+ pv = rtnl_dereference(p->vlan_info);
+ if (!pv)
+ continue;
+
+ for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
+ vlan_vid_del(p->dev, proto, vid);
+ }
+
+ goto unlock;
+}
+
+void br_vlan_init(struct net_bridge *br)
+{
+ br->vlan_proto = htons(ETH_P_8021Q);
+}
+
/* Must be protected by RTNL.
* Must be called with vid in range from 1 to 4094 inclusive.
*/
@@ -405,7 +562,7 @@ void nbp_vlan_flush(struct net_bridge_port *port)
return;
for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
- vlan_vid_del(port->dev, htons(ETH_P_8021Q), vid);
+ vlan_vid_del(port->dev, port->br->vlan_proto, vid);
__vlan_flush(pv);
}
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 5ca74a0e595..629dc77874a 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -2,14 +2,23 @@
# Bridge netfilter configuration
#
#
-config NF_TABLES_BRIDGE
- depends on NF_TABLES
+menuconfig NF_TABLES_BRIDGE
+ depends on BRIDGE && NETFILTER && NF_TABLES
tristate "Ethernet Bridge nf_tables support"
+if NF_TABLES_BRIDGE
+
+config NFT_BRIDGE_META
+ tristate "Netfilter nf_table bridge meta support"
+ depends on NFT_META
+ help
+ Add support for bridge dedicated meta key.
+
+endif # NF_TABLES_BRIDGE
+
menuconfig BRIDGE_NF_EBTABLES
tristate "Ethernet Bridge tables (ebtables) support"
- depends on BRIDGE && NETFILTER
- select NETFILTER_XTABLES
+ depends on BRIDGE && NETFILTER && NETFILTER_XTABLES
help
ebtables is a general, extensible frame/packet identification
framework. Say 'Y' or 'M' here if you want to do Ethernet
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index ea7629f58b3..6f2f3943d66 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -3,6 +3,7 @@
#
obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o
+obj-$(CONFIG_NFT_BRIDGE_META) += nft_meta_bridge.o
obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
new file mode 100644
index 00000000000..4f02109d708
--- /dev/null
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2014 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_meta.h>
+
+#include "../br_private.h"
+
+static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_meta *priv = nft_expr_priv(expr);
+ const struct net_device *in = pkt->in, *out = pkt->out;
+ struct nft_data *dest = &data[priv->dreg];
+ const struct net_bridge_port *p;
+
+ switch (priv->key) {
+ case NFT_META_BRI_IIFNAME:
+ if (in == NULL || (p = br_port_get_rcu(in)) == NULL)
+ goto err;
+ break;
+ case NFT_META_BRI_OIFNAME:
+ if (out == NULL || (p = br_port_get_rcu(out)) == NULL)
+ goto err;
+ break;
+ default:
+ goto out;
+ }
+
+ strncpy((char *)dest->data, p->br->dev->name, sizeof(dest->data));
+ return;
+out:
+ return nft_meta_get_eval(expr, data, pkt);
+err:
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static int nft_meta_bridge_get_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_meta *priv = nft_expr_priv(expr);
+ int err;
+
+ priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+ switch (priv->key) {
+ case NFT_META_BRI_IIFNAME:
+ case NFT_META_BRI_OIFNAME:
+ break;
+ default:
+ return nft_meta_get_init(ctx, expr, tb);
+ }
+
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+
+ err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static struct nft_expr_type nft_meta_bridge_type;
+static const struct nft_expr_ops nft_meta_bridge_get_ops = {
+ .type = &nft_meta_bridge_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+ .eval = nft_meta_bridge_get_eval,
+ .init = nft_meta_bridge_get_init,
+ .dump = nft_meta_get_dump,
+};
+
+static const struct nft_expr_ops nft_meta_bridge_set_ops = {
+ .type = &nft_meta_bridge_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+ .eval = nft_meta_set_eval,
+ .init = nft_meta_set_init,
+ .dump = nft_meta_set_dump,
+};
+
+static const struct nft_expr_ops *
+nft_meta_bridge_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ if (tb[NFTA_META_KEY] == NULL)
+ return ERR_PTR(-EINVAL);
+
+ if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG])
+ return ERR_PTR(-EINVAL);
+
+ if (tb[NFTA_META_DREG])
+ return &nft_meta_bridge_get_ops;
+
+ if (tb[NFTA_META_SREG])
+ return &nft_meta_bridge_set_ops;
+
+ return ERR_PTR(-EINVAL);
+}
+
+static struct nft_expr_type nft_meta_bridge_type __read_mostly = {
+ .family = NFPROTO_BRIDGE,
+ .name = "meta",
+ .select_ops = &nft_meta_bridge_select_ops,
+ .policy = nft_meta_policy,
+ .maxattr = NFTA_META_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_meta_bridge_module_init(void)
+{
+ return nft_register_expr(&nft_meta_bridge_type);
+}
+
+static void __exit nft_meta_bridge_module_exit(void)
+{
+ nft_unregister_expr(&nft_meta_bridge_type);
+}
+
+module_init(nft_meta_bridge_module_init);
+module_exit(nft_meta_bridge_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "meta");
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 67d7721d237..1675021d8c1 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -72,6 +72,8 @@ const char *ceph_msg_type_name(int type)
case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
case CEPH_MSG_STATFS: return "statfs";
case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
+ case CEPH_MSG_MON_GET_VERSION: return "mon_get_version";
+ case CEPH_MSG_MON_GET_VERSION_REPLY: return "mon_get_version_reply";
case CEPH_MSG_MDS_MAP: return "mds_map";
case CEPH_MSG_CLIENT_SESSION: return "client_session";
case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 10421a4b76f..d1a62c69a9f 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -126,9 +126,13 @@ static int monc_show(struct seq_file *s, void *p)
req = rb_entry(rp, struct ceph_mon_generic_request, node);
op = le16_to_cpu(req->request->hdr.type);
if (op == CEPH_MSG_STATFS)
- seq_printf(s, "%lld statfs\n", req->tid);
+ seq_printf(s, "%llu statfs\n", req->tid);
+ else if (op == CEPH_MSG_POOLOP)
+ seq_printf(s, "%llu poolop\n", req->tid);
+ else if (op == CEPH_MSG_MON_GET_VERSION)
+ seq_printf(s, "%llu mon_get_version", req->tid);
else
- seq_printf(s, "%lld unknown\n", req->tid);
+ seq_printf(s, "%llu unknown\n", req->tid);
}
mutex_unlock(&monc->mutex);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index dac7f9b9868..1948d592aa5 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -557,7 +557,7 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
return r;
}
-static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
+static int __ceph_tcp_sendpage(struct socket *sock, struct page *page,
int offset, size_t size, bool more)
{
int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR);
@@ -570,6 +570,24 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
return ret;
}
+static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
+ int offset, size_t size, bool more)
+{
+ int ret;
+ struct kvec iov;
+
+ /* sendpage cannot properly handle pages with page_count == 0,
+ * we need to fallback to sendmsg if that's the case */
+ if (page_count(page) >= 1)
+ return __ceph_tcp_sendpage(sock, page, offset, size, more);
+
+ iov.iov_base = kmap(page) + offset;
+ iov.iov_len = size;
+ ret = ceph_tcp_sendmsg(sock, &iov, 1, size, more);
+ kunmap(page);
+
+ return ret;
+}
/*
* Shutdown/close the socket for the given connection.
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 2ac9ef35110..067d3af2eaf 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -296,6 +296,33 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
__send_subscribe(monc);
mutex_unlock(&monc->mutex);
}
+EXPORT_SYMBOL(ceph_monc_request_next_osdmap);
+
+int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
+ unsigned long timeout)
+{
+ unsigned long started = jiffies;
+ int ret;
+
+ mutex_lock(&monc->mutex);
+ while (monc->have_osdmap < epoch) {
+ mutex_unlock(&monc->mutex);
+
+ if (timeout != 0 && time_after_eq(jiffies, started + timeout))
+ return -ETIMEDOUT;
+
+ ret = wait_event_interruptible_timeout(monc->client->auth_wq,
+ monc->have_osdmap >= epoch, timeout);
+ if (ret < 0)
+ return ret;
+
+ mutex_lock(&monc->mutex);
+ }
+
+ mutex_unlock(&monc->mutex);
+ return 0;
+}
+EXPORT_SYMBOL(ceph_monc_wait_osdmap);
/*
*
@@ -477,14 +504,13 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con,
return m;
}
-static int do_generic_request(struct ceph_mon_client *monc,
- struct ceph_mon_generic_request *req)
+static int __do_generic_request(struct ceph_mon_client *monc, u64 tid,
+ struct ceph_mon_generic_request *req)
{
int err;
/* register request */
- mutex_lock(&monc->mutex);
- req->tid = ++monc->last_tid;
+ req->tid = tid != 0 ? tid : ++monc->last_tid;
req->request->hdr.tid = cpu_to_le64(req->tid);
__insert_generic_request(monc, req);
monc->num_generic_requests++;
@@ -496,13 +522,24 @@ static int do_generic_request(struct ceph_mon_client *monc,
mutex_lock(&monc->mutex);
rb_erase(&req->node, &monc->generic_request_tree);
monc->num_generic_requests--;
- mutex_unlock(&monc->mutex);
if (!err)
err = req->result;
return err;
}
+static int do_generic_request(struct ceph_mon_client *monc,
+ struct ceph_mon_generic_request *req)
+{
+ int err;
+
+ mutex_lock(&monc->mutex);
+ err = __do_generic_request(monc, 0, req);
+ mutex_unlock(&monc->mutex);
+
+ return err;
+}
+
/*
* statfs
*/
@@ -579,6 +616,96 @@ out:
}
EXPORT_SYMBOL(ceph_monc_do_statfs);
+static void handle_get_version_reply(struct ceph_mon_client *monc,
+ struct ceph_msg *msg)
+{
+ struct ceph_mon_generic_request *req;
+ u64 tid = le64_to_cpu(msg->hdr.tid);
+ void *p = msg->front.iov_base;
+ void *end = p + msg->front_alloc_len;
+ u64 handle;
+
+ dout("%s %p tid %llu\n", __func__, msg, tid);
+
+ ceph_decode_need(&p, end, 2*sizeof(u64), bad);
+ handle = ceph_decode_64(&p);
+ if (tid != 0 && tid != handle)
+ goto bad;
+
+ mutex_lock(&monc->mutex);
+ req = __lookup_generic_req(monc, handle);
+ if (req) {
+ *(u64 *)req->buf = ceph_decode_64(&p);
+ req->result = 0;
+ get_generic_request(req);
+ }
+ mutex_unlock(&monc->mutex);
+ if (req) {
+ complete_all(&req->completion);
+ put_generic_request(req);
+ }
+
+ return;
+bad:
+ pr_err("corrupt mon_get_version reply\n");
+ ceph_msg_dump(msg);
+}
+
+/*
+ * Send MMonGetVersion and wait for the reply.
+ *
+ * @what: one of "mdsmap", "osdmap" or "monmap"
+ */
+int ceph_monc_do_get_version(struct ceph_mon_client *monc, const char *what,
+ u64 *newest)
+{
+ struct ceph_mon_generic_request *req;
+ void *p, *end;
+ u64 tid;
+ int err;
+
+ req = kzalloc(sizeof(*req), GFP_NOFS);
+ if (!req)
+ return -ENOMEM;
+
+ kref_init(&req->kref);
+ req->buf = newest;
+ req->buf_len = sizeof(*newest);
+ init_completion(&req->completion);
+
+ req->request = ceph_msg_new(CEPH_MSG_MON_GET_VERSION,
+ sizeof(u64) + sizeof(u32) + strlen(what),
+ GFP_NOFS, true);
+ if (!req->request) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ req->reply = ceph_msg_new(CEPH_MSG_MON_GET_VERSION_REPLY, 1024,
+ GFP_NOFS, true);
+ if (!req->reply) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ p = req->request->front.iov_base;
+ end = p + req->request->front_alloc_len;
+
+ /* fill out request */
+ mutex_lock(&monc->mutex);
+ tid = ++monc->last_tid;
+ ceph_encode_64(&p, tid); /* handle */
+ ceph_encode_string(&p, end, what, strlen(what));
+
+ err = __do_generic_request(monc, tid, req);
+
+ mutex_unlock(&monc->mutex);
+out:
+ kref_put(&req->kref, release_generic_request);
+ return err;
+}
+EXPORT_SYMBOL(ceph_monc_do_get_version);
+
/*
* pool ops
*/
@@ -981,6 +1108,10 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
handle_statfs_reply(monc, msg);
break;
+ case CEPH_MSG_MON_GET_VERSION_REPLY:
+ handle_get_version_reply(monc, msg);
+ break;
+
case CEPH_MSG_POOLOP_REPLY:
handle_poolop_reply(monc, msg);
break;
@@ -1029,6 +1160,15 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
case CEPH_MSG_AUTH_REPLY:
m = ceph_msg_get(monc->m_auth_reply);
break;
+ case CEPH_MSG_MON_GET_VERSION_REPLY:
+ if (le64_to_cpu(hdr->tid) != 0)
+ return get_generic_reply(con, hdr, skip);
+
+ /*
+ * Older OSDs don't set reply tid even if the orignal
+ * request had a non-zero tid. Workaround this weirdness
+ * by falling through to the allocate case.
+ */
case CEPH_MSG_MON_MAP:
case CEPH_MSG_MDS_MAP:
case CEPH_MSG_OSD_MAP:
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b0dfce77656..05be0c18169 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2491,7 +2491,7 @@ EXPORT_SYMBOL(ceph_osdc_sync);
* Call all pending notify callbacks - for use after a watch is
* unregistered, to make sure no more callbacks for it will be invoked
*/
-extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc)
+void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc)
{
flush_workqueue(osdc->notify_wq);
}
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 8b8a5a24b22..c547e46084d 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -329,6 +329,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
dout("crush decode tunable chooseleaf_descend_once = %d",
c->chooseleaf_descend_once);
+ ceph_decode_need(p, end, sizeof(u8), done);
+ c->chooseleaf_vary_r = ceph_decode_8(p);
+ dout("crush decode tunable chooseleaf_vary_r = %d",
+ c->chooseleaf_vary_r);
+
done:
dout("crush_decode success\n");
return c;
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 815a2249cfa..555013034f7 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -53,7 +53,10 @@ void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
set_page_dirty_lock(pages[i]);
put_page(pages[i]);
}
- kfree(pages);
+ if (is_vmalloc_addr(pages))
+ vfree(pages);
+ else
+ kfree(pages);
}
EXPORT_SYMBOL(ceph_put_page_vector);
@@ -165,36 +168,6 @@ void ceph_copy_from_page_vector(struct page **pages,
EXPORT_SYMBOL(ceph_copy_from_page_vector);
/*
- * copy user data from a page vector into a user pointer
- */
-int ceph_copy_page_vector_to_user(struct page **pages,
- void __user *data,
- loff_t off, size_t len)
-{
- int i = 0;
- int po = off & ~PAGE_CACHE_MASK;
- int left = len;
- int l, bad;
-
- while (left > 0) {
- l = min_t(int, left, PAGE_CACHE_SIZE-po);
- bad = copy_to_user(data, page_address(pages[i]) + po, l);
- if (bad == l)
- return -EFAULT;
- data += l - bad;
- left -= l - bad;
- if (po) {
- po += l - bad;
- if (po == PAGE_CACHE_SIZE)
- po = 0;
- }
- i++;
- }
- return len;
-}
-EXPORT_SYMBOL(ceph_copy_page_vector_to_user);
-
-/*
* Zero an extent within a page vector. Offset is relative to the
* start of the first page.
*/
diff --git a/net/compat.c b/net/compat.c
index 9a76eaf6318..bc8aeefddf3 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -85,7 +85,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
{
int tot_len;
- if (kern_msg->msg_namelen) {
+ if (kern_msg->msg_name && kern_msg->msg_namelen) {
if (mode == VERIFY_READ) {
int err = move_addr_to_kernel(kern_msg->msg_name,
kern_msg->msg_namelen,
@@ -93,10 +93,11 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
if (err < 0)
return err;
}
- if (kern_msg->msg_name)
- kern_msg->msg_name = kern_address;
- } else
+ kern_msg->msg_name = kern_address;
+ } else {
kern_msg->msg_name = NULL;
+ kern_msg->msg_namelen = 0;
+ }
tot_len = iov_from_user_compat_to_kern(kern_iov,
(struct compat_iovec __user *)kern_msg->msg_iov,
diff --git a/net/core/Makefile b/net/core/Makefile
index 826b925aa45..71093d94ad2 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
- sock_diag.o dev_ioctl.o
+ sock_diag.o dev_ioctl.o tso.o
obj-$(CONFIG_XFRM) += flow.o
obj-y += net-sysfs.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index a16ed7bbe37..488dd1a825c 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -740,17 +740,37 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
if (likely(!sum)) {
- if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+ !skb->csum_complete_sw)
netdev_rx_csum_fault(skb->dev);
- skb->ip_summed = CHECKSUM_UNNECESSARY;
}
+ skb->csum_valid = !sum;
return sum;
}
EXPORT_SYMBOL(__skb_checksum_complete_head);
__sum16 __skb_checksum_complete(struct sk_buff *skb)
{
- return __skb_checksum_complete_head(skb, skb->len);
+ __wsum csum;
+ __sum16 sum;
+
+ csum = skb_checksum(skb, 0, skb->len, 0);
+
+ /* skb->csum holds pseudo checksum */
+ sum = csum_fold(csum_add(skb->csum, csum));
+ if (likely(!sum)) {
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+ !skb->csum_complete_sw)
+ netdev_rx_csum_fault(skb->dev);
+ }
+
+ /* Save full packet checksum */
+ skb->csum = csum;
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ skb->csum_complete_sw = 1;
+ skb->csum_valid = !sum;
+
+ return sum;
}
EXPORT_SYMBOL(__skb_checksum_complete);
diff --git a/net/core/dev.c b/net/core/dev.c
index 867adb25b5b..367a586d0c8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -148,6 +148,9 @@ struct list_head ptype_all __read_mostly; /* Taps */
static struct list_head offload_base __read_mostly;
static int netif_rx_internal(struct sk_buff *skb);
+static int call_netdevice_notifiers_info(unsigned long val,
+ struct net_device *dev,
+ struct netdev_notifier_info *info);
/*
* The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -1207,7 +1210,11 @@ EXPORT_SYMBOL(netdev_features_change);
void netdev_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
- call_netdevice_notifiers(NETDEV_CHANGE, dev);
+ struct netdev_notifier_change_info change_info;
+
+ change_info.flags_changed = 0;
+ call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
+ &change_info.info);
rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
}
}
@@ -1326,7 +1333,7 @@ static int __dev_close_many(struct list_head *head)
* dev->stop() will invoke napi_disable() on all of it's
* napi_struct instances on this device.
*/
- smp_mb__after_clear_bit(); /* Commit netif_running(). */
+ smp_mb__after_atomic(); /* Commit netif_running(). */
}
dev_deactivate_many(head);
@@ -2289,8 +2296,8 @@ EXPORT_SYMBOL(skb_checksum_help);
__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
{
+ unsigned int vlan_depth = skb->mac_len;
__be16 type = skb->protocol;
- int vlan_depth = skb->mac_len;
/* Tunnel gso handlers can set protocol to ethernet. */
if (type == htons(ETH_P_TEB)) {
@@ -2303,15 +2310,30 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
type = eth->h_proto;
}
- while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
- struct vlan_hdr *vh;
+ /* if skb->protocol is 802.1Q/AD then the header should already be
+ * present at mac_len - VLAN_HLEN (if mac_len > 0), or at
+ * ETH_HLEN otherwise
+ */
+ if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
+ if (vlan_depth) {
+ if (unlikely(WARN_ON(vlan_depth < VLAN_HLEN)))
+ return 0;
+ vlan_depth -= VLAN_HLEN;
+ } else {
+ vlan_depth = ETH_HLEN;
+ }
+ do {
+ struct vlan_hdr *vh;
- if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
- return 0;
+ if (unlikely(!pskb_may_pull(skb,
+ vlan_depth + VLAN_HLEN)))
+ return 0;
- vh = (struct vlan_hdr *)(skb->data + vlan_depth);
- type = vh->h_vlan_encapsulated_proto;
- vlan_depth += VLAN_HLEN;
+ vh = (struct vlan_hdr *)(skb->data + vlan_depth);
+ type = vh->h_vlan_encapsulated_proto;
+ vlan_depth += VLAN_HLEN;
+ } while (type == htons(ETH_P_8021Q) ||
+ type == htons(ETH_P_8021AD));
}
*depth = vlan_depth;
@@ -2498,13 +2520,39 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
return 0;
}
+/* If MPLS offload request, verify we are testing hardware MPLS features
+ * instead of standard features for the netdev.
+ */
+#ifdef CONFIG_NET_MPLS_GSO
+static netdev_features_t net_mpls_features(struct sk_buff *skb,
+ netdev_features_t features,
+ __be16 type)
+{
+ if (type == htons(ETH_P_MPLS_UC) || type == htons(ETH_P_MPLS_MC))
+ features &= skb->dev->mpls_features;
+
+ return features;
+}
+#else
+static netdev_features_t net_mpls_features(struct sk_buff *skb,
+ netdev_features_t features,
+ __be16 type)
+{
+ return features;
+}
+#endif
+
static netdev_features_t harmonize_features(struct sk_buff *skb,
netdev_features_t features)
{
int tmp;
+ __be16 type;
+
+ type = skb_network_protocol(skb, &tmp);
+ features = net_mpls_features(skb, features, type);
if (skb->ip_summed != CHECKSUM_NONE &&
- !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) {
+ !can_checksum_protocol(features, type)) {
features &= ~NETIF_F_ALL_CSUM;
} else if (illegal_highdma(skb->dev, skb)) {
features &= ~NETIF_F_SG;
@@ -3347,7 +3395,7 @@ static void net_tx_action(struct softirq_action *h)
root_lock = qdisc_lock(q);
if (spin_trylock(root_lock)) {
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(__QDISC_STATE_SCHED,
&q->state);
qdisc_run(q);
@@ -3357,7 +3405,7 @@ static void net_tx_action(struct softirq_action *h)
&q->state)) {
__netif_reschedule(q);
} else {
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(__QDISC_STATE_SCHED,
&q->state);
}
@@ -3957,6 +4005,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
}
NAPI_GRO_CB(skb)->count = 1;
NAPI_GRO_CB(skb)->age = jiffies;
+ NAPI_GRO_CB(skb)->last = skb;
skb_shinfo(skb)->gso_size = skb_gro_len(skb);
skb->next = napi->gro_list;
napi->gro_list = skb;
@@ -4047,6 +4096,8 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->vlan_tci = 0;
skb->dev = napi->dev;
skb->skb_iif = 0;
+ skb->encapsulation = 0;
+ skb_shinfo(skb)->gso_type = 0;
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
napi->skb = skb;
@@ -4185,9 +4236,8 @@ static int process_backlog(struct napi_struct *napi, int quota)
#endif
napi->weight = weight_p;
local_irq_disable();
- while (work < quota) {
+ while (1) {
struct sk_buff *skb;
- unsigned int qlen;
while ((skb = __skb_dequeue(&sd->process_queue))) {
local_irq_enable();
@@ -4201,24 +4251,24 @@ static int process_backlog(struct napi_struct *napi, int quota)
}
rps_lock(sd);
- qlen = skb_queue_len(&sd->input_pkt_queue);
- if (qlen)
- skb_queue_splice_tail_init(&sd->input_pkt_queue,
- &sd->process_queue);
-
- if (qlen < quota - work) {
+ if (skb_queue_empty(&sd->input_pkt_queue)) {
/*
* Inline a custom version of __napi_complete().
* only current cpu owns and manipulates this napi,
- * and NAPI_STATE_SCHED is the only possible flag set on backlog.
- * we can use a plain write instead of clear_bit(),
+ * and NAPI_STATE_SCHED is the only possible flag set
+ * on backlog.
+ * We can use a plain write instead of clear_bit(),
* and we dont need an smp_mb() memory barrier.
*/
list_del(&napi->poll_list);
napi->state = 0;
+ rps_unlock(sd);
- quota = work + qlen;
+ break;
}
+
+ skb_queue_splice_tail_init(&sd->input_pkt_queue,
+ &sd->process_queue);
rps_unlock(sd);
}
local_irq_enable();
@@ -4248,7 +4298,7 @@ void __napi_complete(struct napi_struct *n)
BUG_ON(n->gro_list);
list_del(&n->poll_list);
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(NAPI_STATE_SCHED, &n->state);
}
EXPORT_SYMBOL(__napi_complete);
@@ -4547,6 +4597,32 @@ void *netdev_adjacent_get_private(struct list_head *adj_list)
EXPORT_SYMBOL(netdev_adjacent_get_private);
/**
+ * netdev_upper_get_next_dev_rcu - Get the next dev from upper list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next device from the dev's upper list, starting from iter
+ * position. The caller must hold RCU read lock.
+ */
+struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
+ struct list_head **iter)
+{
+ struct netdev_adjacent *upper;
+
+ WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
+
+ upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
+
+ if (&upper->list == &dev->adj_list.upper)
+ return NULL;
+
+ *iter = &upper->list;
+
+ return upper->dev;
+}
+EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
+
+/**
* netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
* @dev: device
* @iter: list_head ** of the current position
@@ -4628,6 +4704,32 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
/**
+ * netdev_lower_get_next - Get the next device from the lower neighbour
+ * list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent from the dev's lower neighbour
+ * list, starting from iter position. The caller must hold RTNL lock or
+ * its own locking that guarantees that the neighbour lower
+ * list will remain unchainged.
+ */
+void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
+{
+ struct netdev_adjacent *lower;
+
+ lower = list_entry((*iter)->next, struct netdev_adjacent, list);
+
+ if (&lower->list == &dev->adj_list.lower)
+ return NULL;
+
+ *iter = &lower->list;
+
+ return lower->dev;
+}
+EXPORT_SYMBOL(netdev_lower_get_next);
+
+/**
* netdev_lower_get_first_private_rcu - Get the first ->private from the
* lower neighbour list, RCU
* variant
@@ -5077,6 +5179,30 @@ void *netdev_lower_dev_get_private(struct net_device *dev,
}
EXPORT_SYMBOL(netdev_lower_dev_get_private);
+
+int dev_get_nest_level(struct net_device *dev,
+ bool (*type_check)(struct net_device *dev))
+{
+ struct net_device *lower = NULL;
+ struct list_head *iter;
+ int max_nest = -1;
+ int nest;
+
+ ASSERT_RTNL();
+
+ netdev_for_each_lower_dev(dev, lower, iter) {
+ nest = dev_get_nest_level(lower, type_check);
+ if (max_nest < nest)
+ max_nest = nest;
+ }
+
+ if (type_check(dev))
+ max_nest++;
+
+ return max_nest;
+}
+EXPORT_SYMBOL(dev_get_nest_level);
+
static void dev_change_rx_flags(struct net_device *dev, int flags)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -5242,7 +5368,6 @@ void __dev_set_rx_mode(struct net_device *dev)
if (ops->ndo_set_rx_mode)
ops->ndo_set_rx_mode(dev);
}
-EXPORT_SYMBOL(__dev_set_rx_mode);
void dev_set_rx_mode(struct net_device *dev)
{
@@ -5547,7 +5672,7 @@ static int dev_new_index(struct net *net)
/* Delayed registration/unregisteration */
static LIST_HEAD(net_todo_list);
-static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
+DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
static void net_set_todo(struct net_device *dev)
{
@@ -5842,10 +5967,7 @@ static void netdev_init_one_queue(struct net_device *dev,
static void netif_free_tx_queues(struct net_device *dev)
{
- if (is_vmalloc_addr(dev->_tx))
- vfree(dev->_tx);
- else
- kfree(dev->_tx);
+ kvfree(dev->_tx);
}
static int netif_alloc_netdev_queues(struct net_device *dev)
@@ -6319,10 +6441,7 @@ void netdev_freemem(struct net_device *dev)
{
char *addr = (char *)dev - dev->padded;
- if (is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
+ kvfree(addr);
}
/**
@@ -6427,11 +6546,6 @@ free_all:
free_pcpu:
free_percpu(dev->pcpu_refcnt);
- netif_free_tx_queues(dev);
-#ifdef CONFIG_SYSFS
- kfree(dev->_rx);
-#endif
-
free_dev:
netdev_freemem(dev);
return NULL;
@@ -6528,6 +6642,9 @@ EXPORT_SYMBOL(unregister_netdevice_queue);
/**
* unregister_netdevice_many - unregister many devices
* @head: list of devices
+ *
+ * Note: As most callers use a stack allocated list_head,
+ * we force a list_del() to make sure stack wont be corrupted later.
*/
void unregister_netdevice_many(struct list_head *head)
{
@@ -6537,6 +6654,7 @@ void unregister_netdevice_many(struct list_head *head)
rollback_registered_many(head);
list_for_each_entry(dev, head, unreg_list)
net_set_todo(dev);
+ list_del(head);
}
}
EXPORT_SYMBOL(unregister_netdevice_many);
@@ -6992,7 +7110,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
}
}
unregister_netdevice_many(&dev_kill_list);
- list_del(&dev_kill_list);
rtnl_unlock();
}
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 329d5794e7d..b6b230600b9 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -225,6 +225,91 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
}
EXPORT_SYMBOL(__hw_addr_unsync);
+/**
+ * __hw_addr_sync_dev - Synchonize device's multicast list
+ * @list: address list to syncronize
+ * @dev: device to sync
+ * @sync: function to call if address should be added
+ * @unsync: function to call if address should be removed
+ *
+ * This funciton is intended to be called from the ndo_set_rx_mode
+ * function of devices that require explicit address add/remove
+ * notifications. The unsync function may be NULL in which case
+ * the addresses requiring removal will simply be removed without
+ * any notification to the device.
+ **/
+int __hw_addr_sync_dev(struct netdev_hw_addr_list *list,
+ struct net_device *dev,
+ int (*sync)(struct net_device *, const unsigned char *),
+ int (*unsync)(struct net_device *,
+ const unsigned char *))
+{
+ struct netdev_hw_addr *ha, *tmp;
+ int err;
+
+ /* first go through and flush out any stale entries */
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ if (!ha->sync_cnt || ha->refcount != 1)
+ continue;
+
+ /* if unsync is defined and fails defer unsyncing address */
+ if (unsync && unsync(dev, ha->addr))
+ continue;
+
+ ha->sync_cnt--;
+ __hw_addr_del_entry(list, ha, false, false);
+ }
+
+ /* go through and sync new entries to the list */
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ if (ha->sync_cnt)
+ continue;
+
+ err = sync(dev, ha->addr);
+ if (err)
+ return err;
+
+ ha->sync_cnt++;
+ ha->refcount++;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(__hw_addr_sync_dev);
+
+/**
+ * __hw_addr_unsync_dev - Remove synchonized addresses from device
+ * @list: address list to remove syncronized addresses from
+ * @dev: device to sync
+ * @unsync: function to call if address should be removed
+ *
+ * Remove all addresses that were added to the device by __hw_addr_sync_dev().
+ * This function is intended to be called from the ndo_stop or ndo_open
+ * functions on devices that require explicit address add/remove
+ * notifications. If the unsync function pointer is NULL then this function
+ * can be used to just reset the sync_cnt for the addresses in the list.
+ **/
+void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list,
+ struct net_device *dev,
+ int (*unsync)(struct net_device *,
+ const unsigned char *))
+{
+ struct netdev_hw_addr *ha, *tmp;
+
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ if (!ha->sync_cnt)
+ continue;
+
+ /* if unsync is defined and fails defer unsyncing address */
+ if (unsync && unsync(dev, ha->addr))
+ continue;
+
+ ha->sync_cnt--;
+ __hw_addr_del_entry(list, ha, false, false);
+ }
+}
+EXPORT_SYMBOL(__hw_addr_unsync_dev);
+
static void __hw_addr_flush(struct netdev_hw_addr_list *list)
{
struct netdev_hw_addr *ha, *tmp;
diff --git a/net/core/dst.c b/net/core/dst.c
index 80d6286c8b6..a028409ee43 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -269,6 +269,15 @@ again:
}
EXPORT_SYMBOL(dst_destroy);
+static void dst_destroy_rcu(struct rcu_head *head)
+{
+ struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
+
+ dst = dst_destroy(dst);
+ if (dst)
+ __dst_free(dst);
+}
+
void dst_release(struct dst_entry *dst)
{
if (dst) {
@@ -276,11 +285,8 @@ void dst_release(struct dst_entry *dst)
newrefcnt = atomic_dec_return(&dst->__refcnt);
WARN_ON(newrefcnt < 0);
- if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) {
- dst = dst_destroy(dst);
- if (dst)
- __dst_free(dst);
- }
+ if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt)
+ call_rcu(&dst->rcu_head, dst_destroy_rcu);
}
}
EXPORT_SYMBOL(dst_release);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index aa8978ac47d..17cb912793f 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -561,19 +561,17 @@ static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr,
struct ethtool_rxnfc *rx_rings,
u32 size)
{
- int ret = 0, i;
+ int i;
if (copy_from_user(indir, useraddr, size * sizeof(indir[0])))
- ret = -EFAULT;
+ return -EFAULT;
/* Validate ring indices */
- for (i = 0; i < size; i++) {
- if (indir[i] >= rx_rings->data) {
- ret = -EINVAL;
- break;
- }
- }
- return ret;
+ for (i = 0; i < size; i++)
+ if (indir[i] >= rx_rings->data)
+ return -EINVAL;
+
+ return 0;
}
static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
@@ -584,7 +582,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
int ret;
if (!dev->ethtool_ops->get_rxfh_indir_size ||
- !dev->ethtool_ops->get_rxfh_indir)
+ !dev->ethtool_ops->get_rxfh)
return -EOPNOTSUPP;
dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
if (dev_size == 0)
@@ -610,7 +608,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
if (!indir)
return -ENOMEM;
- ret = dev->ethtool_ops->get_rxfh_indir(dev, indir);
+ ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL);
if (ret)
goto out;
@@ -634,7 +632,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
int ret;
u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]);
- if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir ||
+ if (!ops->get_rxfh_indir_size || !ops->set_rxfh ||
!ops->get_rxnfc)
return -EOPNOTSUPP;
@@ -671,7 +669,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
goto out;
}
- ret = ops->set_rxfh_indir(dev, indir);
+ ret = ops->set_rxfh(dev, indir, NULL);
out:
kfree(indir);
@@ -683,11 +681,11 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
{
int ret;
const struct ethtool_ops *ops = dev->ethtool_ops;
- u32 user_indir_size = 0, user_key_size = 0;
+ u32 user_indir_size, user_key_size;
u32 dev_indir_size = 0, dev_key_size = 0;
+ struct ethtool_rxfh rxfh;
u32 total_size;
- u32 indir_offset, indir_bytes;
- u32 key_offset;
+ u32 indir_bytes;
u32 *indir = NULL;
u8 *hkey = NULL;
u8 *rss_config;
@@ -699,33 +697,24 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
if (ops->get_rxfh_indir_size)
dev_indir_size = ops->get_rxfh_indir_size(dev);
-
- indir_offset = offsetof(struct ethtool_rxfh, indir_size);
-
- if (copy_from_user(&user_indir_size,
- useraddr + indir_offset,
- sizeof(user_indir_size)))
- return -EFAULT;
-
- if (copy_to_user(useraddr + indir_offset,
- &dev_indir_size, sizeof(dev_indir_size)))
- return -EFAULT;
-
if (ops->get_rxfh_key_size)
dev_key_size = ops->get_rxfh_key_size(dev);
if ((dev_key_size + dev_indir_size) == 0)
return -EOPNOTSUPP;
- key_offset = offsetof(struct ethtool_rxfh, key_size);
-
- if (copy_from_user(&user_key_size,
- useraddr + key_offset,
- sizeof(user_key_size)))
+ if (copy_from_user(&rxfh, useraddr, sizeof(rxfh)))
return -EFAULT;
+ user_indir_size = rxfh.indir_size;
+ user_key_size = rxfh.key_size;
- if (copy_to_user(useraddr + key_offset,
- &dev_key_size, sizeof(dev_key_size)))
+ /* Check that reserved fields are 0 for now */
+ if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1])
+ return -EINVAL;
+
+ rxfh.indir_size = dev_indir_size;
+ rxfh.key_size = dev_key_size;
+ if (copy_to_user(useraddr, &rxfh, sizeof(rxfh)))
return -EFAULT;
/* If the user buffer size is 0, this is just a query for the
@@ -770,12 +759,11 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
int ret;
const struct ethtool_ops *ops = dev->ethtool_ops;
struct ethtool_rxnfc rx_rings;
- u32 user_indir_size = 0, dev_indir_size = 0, i;
- u32 user_key_size = 0, dev_key_size = 0;
+ struct ethtool_rxfh rxfh;
+ u32 dev_indir_size = 0, dev_key_size = 0, i;
u32 *indir = NULL, indir_bytes = 0;
u8 *hkey = NULL;
u8 *rss_config;
- u32 indir_offset, key_offset;
u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
if (!(ops->get_rxfh_indir_size || ops->get_rxfh_key_size) ||
@@ -784,36 +772,33 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
if (ops->get_rxfh_indir_size)
dev_indir_size = ops->get_rxfh_indir_size(dev);
-
- indir_offset = offsetof(struct ethtool_rxfh, indir_size);
- if (copy_from_user(&user_indir_size,
- useraddr + indir_offset,
- sizeof(user_indir_size)))
- return -EFAULT;
-
if (ops->get_rxfh_key_size)
dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev);
-
if ((dev_key_size + dev_indir_size) == 0)
return -EOPNOTSUPP;
- key_offset = offsetof(struct ethtool_rxfh, key_size);
- if (copy_from_user(&user_key_size,
- useraddr + key_offset,
- sizeof(user_key_size)))
+ if (copy_from_user(&rxfh, useraddr, sizeof(rxfh)))
return -EFAULT;
+ /* Check that reserved fields are 0 for now */
+ if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1])
+ return -EINVAL;
+
/* If either indir or hash key is valid, proceed further.
+ * It is not valid to request that both be unchanged.
*/
- if ((user_indir_size && ((user_indir_size != 0xDEADBEEF) &&
- user_indir_size != dev_indir_size)) ||
- (user_key_size && (user_key_size != dev_key_size)))
+ if ((rxfh.indir_size &&
+ rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE &&
+ rxfh.indir_size != dev_indir_size) ||
+ (rxfh.key_size && (rxfh.key_size != dev_key_size)) ||
+ (rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE &&
+ rxfh.key_size == 0))
return -EINVAL;
- if (user_indir_size != 0xDEADBEEF)
+ if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
indir_bytes = dev_indir_size * sizeof(indir[0]);
- rss_config = kzalloc(indir_bytes + user_key_size, GFP_USER);
+ rss_config = kzalloc(indir_bytes + rxfh.key_size, GFP_USER);
if (!rss_config)
return -ENOMEM;
@@ -822,28 +807,29 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
if (ret)
goto out;
- /* user_indir_size == 0 means reset the indir table to default.
- * user_indir_size == 0xDEADBEEF means indir setting is not requested.
+ /* rxfh.indir_size == 0 means reset the indir table to default.
+ * rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged.
*/
- if (user_indir_size && user_indir_size != 0xDEADBEEF) {
+ if (rxfh.indir_size &&
+ rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) {
indir = (u32 *)rss_config;
ret = ethtool_copy_validate_indir(indir,
useraddr + rss_cfg_offset,
&rx_rings,
- user_indir_size);
+ rxfh.indir_size);
if (ret)
goto out;
- } else if (user_indir_size == 0) {
+ } else if (rxfh.indir_size == 0) {
indir = (u32 *)rss_config;
for (i = 0; i < dev_indir_size; i++)
indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
}
- if (user_key_size) {
+ if (rxfh.key_size) {
hkey = rss_config + indir_bytes;
if (copy_from_user(hkey,
useraddr + rss_cfg_offset + indir_bytes,
- user_key_size)) {
+ rxfh.key_size)) {
ret = -EFAULT;
goto out;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 32c5b44c537..1dbf6462f76 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -59,12 +59,12 @@
#define BPF_R10 regs[BPF_REG_10]
/* Named registers */
-#define A regs[insn->a_reg]
-#define X regs[insn->x_reg]
+#define DST regs[insn->dst_reg]
+#define SRC regs[insn->src_reg]
#define FP regs[BPF_REG_FP]
#define ARG1 regs[BPF_REG_ARG1]
#define CTX regs[BPF_REG_CTX]
-#define K insn->imm
+#define IMM insn->imm
/* No hurry in this branch
*
@@ -153,102 +153,107 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
* keep, 0 for none. @ctx is the data we are operating on, @insn is the
* array of filter instructions.
*/
-unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
+static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
{
u64 stack[MAX_BPF_STACK / sizeof(u64)];
u64 regs[MAX_BPF_REG], tmp;
static const void *jumptable[256] = {
[0 ... 255] = &&default_label,
/* Now overwrite non-defaults ... */
-#define DL(A, B, C) [BPF_##A|BPF_##B|BPF_##C] = &&A##_##B##_##C
- DL(ALU, ADD, X),
- DL(ALU, ADD, K),
- DL(ALU, SUB, X),
- DL(ALU, SUB, K),
- DL(ALU, AND, X),
- DL(ALU, AND, K),
- DL(ALU, OR, X),
- DL(ALU, OR, K),
- DL(ALU, LSH, X),
- DL(ALU, LSH, K),
- DL(ALU, RSH, X),
- DL(ALU, RSH, K),
- DL(ALU, XOR, X),
- DL(ALU, XOR, K),
- DL(ALU, MUL, X),
- DL(ALU, MUL, K),
- DL(ALU, MOV, X),
- DL(ALU, MOV, K),
- DL(ALU, DIV, X),
- DL(ALU, DIV, K),
- DL(ALU, MOD, X),
- DL(ALU, MOD, K),
- DL(ALU, NEG, 0),
- DL(ALU, END, TO_BE),
- DL(ALU, END, TO_LE),
- DL(ALU64, ADD, X),
- DL(ALU64, ADD, K),
- DL(ALU64, SUB, X),
- DL(ALU64, SUB, K),
- DL(ALU64, AND, X),
- DL(ALU64, AND, K),
- DL(ALU64, OR, X),
- DL(ALU64, OR, K),
- DL(ALU64, LSH, X),
- DL(ALU64, LSH, K),
- DL(ALU64, RSH, X),
- DL(ALU64, RSH, K),
- DL(ALU64, XOR, X),
- DL(ALU64, XOR, K),
- DL(ALU64, MUL, X),
- DL(ALU64, MUL, K),
- DL(ALU64, MOV, X),
- DL(ALU64, MOV, K),
- DL(ALU64, ARSH, X),
- DL(ALU64, ARSH, K),
- DL(ALU64, DIV, X),
- DL(ALU64, DIV, K),
- DL(ALU64, MOD, X),
- DL(ALU64, MOD, K),
- DL(ALU64, NEG, 0),
- DL(JMP, CALL, 0),
- DL(JMP, JA, 0),
- DL(JMP, JEQ, X),
- DL(JMP, JEQ, K),
- DL(JMP, JNE, X),
- DL(JMP, JNE, K),
- DL(JMP, JGT, X),
- DL(JMP, JGT, K),
- DL(JMP, JGE, X),
- DL(JMP, JGE, K),
- DL(JMP, JSGT, X),
- DL(JMP, JSGT, K),
- DL(JMP, JSGE, X),
- DL(JMP, JSGE, K),
- DL(JMP, JSET, X),
- DL(JMP, JSET, K),
- DL(JMP, EXIT, 0),
- DL(STX, MEM, B),
- DL(STX, MEM, H),
- DL(STX, MEM, W),
- DL(STX, MEM, DW),
- DL(STX, XADD, W),
- DL(STX, XADD, DW),
- DL(ST, MEM, B),
- DL(ST, MEM, H),
- DL(ST, MEM, W),
- DL(ST, MEM, DW),
- DL(LDX, MEM, B),
- DL(LDX, MEM, H),
- DL(LDX, MEM, W),
- DL(LDX, MEM, DW),
- DL(LD, ABS, W),
- DL(LD, ABS, H),
- DL(LD, ABS, B),
- DL(LD, IND, W),
- DL(LD, IND, H),
- DL(LD, IND, B),
-#undef DL
+ /* 32 bit ALU operations */
+ [BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X,
+ [BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K,
+ [BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X,
+ [BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K,
+ [BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X,
+ [BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K,
+ [BPF_ALU | BPF_OR | BPF_X] = &&ALU_OR_X,
+ [BPF_ALU | BPF_OR | BPF_K] = &&ALU_OR_K,
+ [BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X,
+ [BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K,
+ [BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X,
+ [BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K,
+ [BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X,
+ [BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K,
+ [BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X,
+ [BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K,
+ [BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X,
+ [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K,
+ [BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X,
+ [BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K,
+ [BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X,
+ [BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K,
+ [BPF_ALU | BPF_NEG] = &&ALU_NEG,
+ [BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE,
+ [BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE,
+ /* 64 bit ALU operations */
+ [BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X,
+ [BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K,
+ [BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X,
+ [BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K,
+ [BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X,
+ [BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K,
+ [BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X,
+ [BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K,
+ [BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X,
+ [BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K,
+ [BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X,
+ [BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K,
+ [BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X,
+ [BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K,
+ [BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X,
+ [BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K,
+ [BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X,
+ [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K,
+ [BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X,
+ [BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K,
+ [BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X,
+ [BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K,
+ [BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X,
+ [BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K,
+ [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
+ /* Call instruction */
+ [BPF_JMP | BPF_CALL] = &&JMP_CALL,
+ /* Jumps */
+ [BPF_JMP | BPF_JA] = &&JMP_JA,
+ [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
+ [BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K,
+ [BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X,
+ [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K,
+ [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X,
+ [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K,
+ [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X,
+ [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K,
+ [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X,
+ [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K,
+ [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X,
+ [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K,
+ [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X,
+ [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K,
+ /* Program return */
+ [BPF_JMP | BPF_EXIT] = &&JMP_EXIT,
+ /* Store instructions */
+ [BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B,
+ [BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H,
+ [BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W,
+ [BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW,
+ [BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W,
+ [BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW,
+ [BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B,
+ [BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H,
+ [BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W,
+ [BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW,
+ /* Load instructions */
+ [BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B,
+ [BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H,
+ [BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W,
+ [BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW,
+ [BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W,
+ [BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H,
+ [BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B,
+ [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W,
+ [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H,
+ [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B,
};
void *ptr;
int off;
@@ -259,7 +264,7 @@ unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
ARG1 = (u64) (unsigned long) ctx;
- /* Register for user BPF programs need to be reset first. */
+ /* Registers used in classic BPF programs need to be reset first. */
regs[BPF_REG_A] = 0;
regs[BPF_REG_X] = 0;
@@ -269,16 +274,16 @@ select_insn:
/* ALU */
#define ALU(OPCODE, OP) \
ALU64_##OPCODE##_X: \
- A = A OP X; \
+ DST = DST OP SRC; \
CONT; \
ALU_##OPCODE##_X: \
- A = (u32) A OP (u32) X; \
+ DST = (u32) DST OP (u32) SRC; \
CONT; \
ALU64_##OPCODE##_K: \
- A = A OP K; \
+ DST = DST OP IMM; \
CONT; \
ALU_##OPCODE##_K: \
- A = (u32) A OP (u32) K; \
+ DST = (u32) DST OP (u32) IMM; \
CONT;
ALU(ADD, +)
@@ -290,99 +295,99 @@ select_insn:
ALU(XOR, ^)
ALU(MUL, *)
#undef ALU
- ALU_NEG_0:
- A = (u32) -A;
+ ALU_NEG:
+ DST = (u32) -DST;
CONT;
- ALU64_NEG_0:
- A = -A;
+ ALU64_NEG:
+ DST = -DST;
CONT;
ALU_MOV_X:
- A = (u32) X;
+ DST = (u32) SRC;
CONT;
ALU_MOV_K:
- A = (u32) K;
+ DST = (u32) IMM;
CONT;
ALU64_MOV_X:
- A = X;
+ DST = SRC;
CONT;
ALU64_MOV_K:
- A = K;
+ DST = IMM;
CONT;
ALU64_ARSH_X:
- (*(s64 *) &A) >>= X;
+ (*(s64 *) &DST) >>= SRC;
CONT;
ALU64_ARSH_K:
- (*(s64 *) &A) >>= K;
+ (*(s64 *) &DST) >>= IMM;
CONT;
ALU64_MOD_X:
- if (unlikely(X == 0))
+ if (unlikely(SRC == 0))
return 0;
- tmp = A;
- A = do_div(tmp, X);
+ tmp = DST;
+ DST = do_div(tmp, SRC);
CONT;
ALU_MOD_X:
- if (unlikely(X == 0))
+ if (unlikely(SRC == 0))
return 0;
- tmp = (u32) A;
- A = do_div(tmp, (u32) X);
+ tmp = (u32) DST;
+ DST = do_div(tmp, (u32) SRC);
CONT;
ALU64_MOD_K:
- tmp = A;
- A = do_div(tmp, K);
+ tmp = DST;
+ DST = do_div(tmp, IMM);
CONT;
ALU_MOD_K:
- tmp = (u32) A;
- A = do_div(tmp, (u32) K);
+ tmp = (u32) DST;
+ DST = do_div(tmp, (u32) IMM);
CONT;
ALU64_DIV_X:
- if (unlikely(X == 0))
+ if (unlikely(SRC == 0))
return 0;
- do_div(A, X);
+ do_div(DST, SRC);
CONT;
ALU_DIV_X:
- if (unlikely(X == 0))
+ if (unlikely(SRC == 0))
return 0;
- tmp = (u32) A;
- do_div(tmp, (u32) X);
- A = (u32) tmp;
+ tmp = (u32) DST;
+ do_div(tmp, (u32) SRC);
+ DST = (u32) tmp;
CONT;
ALU64_DIV_K:
- do_div(A, K);
+ do_div(DST, IMM);
CONT;
ALU_DIV_K:
- tmp = (u32) A;
- do_div(tmp, (u32) K);
- A = (u32) tmp;
+ tmp = (u32) DST;
+ do_div(tmp, (u32) IMM);
+ DST = (u32) tmp;
CONT;
ALU_END_TO_BE:
- switch (K) {
+ switch (IMM) {
case 16:
- A = (__force u16) cpu_to_be16(A);
+ DST = (__force u16) cpu_to_be16(DST);
break;
case 32:
- A = (__force u32) cpu_to_be32(A);
+ DST = (__force u32) cpu_to_be32(DST);
break;
case 64:
- A = (__force u64) cpu_to_be64(A);
+ DST = (__force u64) cpu_to_be64(DST);
break;
}
CONT;
ALU_END_TO_LE:
- switch (K) {
+ switch (IMM) {
case 16:
- A = (__force u16) cpu_to_le16(A);
+ DST = (__force u16) cpu_to_le16(DST);
break;
case 32:
- A = (__force u32) cpu_to_le32(A);
+ DST = (__force u32) cpu_to_le32(DST);
break;
case 64:
- A = (__force u64) cpu_to_le64(A);
+ DST = (__force u64) cpu_to_le64(DST);
break;
}
CONT;
/* CALL */
- JMP_CALL_0:
+ JMP_CALL:
/* Function call scratches BPF_R1-BPF_R5 registers,
* preserves BPF_R6-BPF_R9, and stores return value
* into BPF_R0.
@@ -392,106 +397,106 @@ select_insn:
CONT;
/* JMP */
- JMP_JA_0:
+ JMP_JA:
insn += insn->off;
CONT;
JMP_JEQ_X:
- if (A == X) {
+ if (DST == SRC) {
insn += insn->off;
CONT_JMP;
}
CONT;
JMP_JEQ_K:
- if (A == K) {
+ if (DST == IMM) {
insn += insn->off;
CONT_JMP;
}
CONT;
JMP_JNE_X:
- if (A != X) {
+ if (DST != SRC) {
insn += insn->off;
CONT_JMP;
}
CONT;
JMP_JNE_K:
- if (A != K) {
+ if (DST != IMM) {
insn += insn->off;
CONT_JMP;
}
CONT;
JMP_JGT_X:
- if (A > X) {
+ if (DST > SRC) {
insn += insn->off;
CONT_JMP;
}
CONT;
JMP_JGT_K:
- if (A > K) {
+ if (DST > IMM) {
insn += insn->off;
CONT_JMP;
}
CONT;
JMP_JGE_X:
- if (A >= X) {
+ if (DST >= SRC) {
insn += insn->off;
CONT_JMP;
}
CONT;
JMP_JGE_K:
- if (A >= K) {
+ if (DST >= IMM) {
insn += insn->off;
CONT_JMP;
}
CONT;
JMP_JSGT_X:
- if (((s64) A) > ((s64) X)) {
+ if (((s64) DST) > ((s64) SRC)) {
insn += insn->off;
CONT_JMP;
}
CONT;
JMP_JSGT_K:
- if (((s64) A) > ((s64) K)) {
+ if (((s64) DST) > ((s64) IMM)) {
insn += insn->off;
CONT_JMP;
}
CONT;
JMP_JSGE_X:
- if (((s64) A) >= ((s64) X)) {
+ if (((s64) DST) >= ((s64) SRC)) {
insn += insn->off;
CONT_JMP;
}
CONT;
JMP_JSGE_K:
- if (((s64) A) >= ((s64) K)) {
+ if (((s64) DST) >= ((s64) IMM)) {
insn += insn->off;
CONT_JMP;
}
CONT;
JMP_JSET_X:
- if (A & X) {
+ if (DST & SRC) {
insn += insn->off;
CONT_JMP;
}
CONT;
JMP_JSET_K:
- if (A & K) {
+ if (DST & IMM) {
insn += insn->off;
CONT_JMP;
}
CONT;
- JMP_EXIT_0:
+ JMP_EXIT:
return BPF_R0;
/* STX and ST and LDX*/
-#define LDST(SIZEOP, SIZE) \
- STX_MEM_##SIZEOP: \
- *(SIZE *)(unsigned long) (A + insn->off) = X; \
- CONT; \
- ST_MEM_##SIZEOP: \
- *(SIZE *)(unsigned long) (A + insn->off) = K; \
- CONT; \
- LDX_MEM_##SIZEOP: \
- A = *(SIZE *)(unsigned long) (X + insn->off); \
+#define LDST(SIZEOP, SIZE) \
+ STX_MEM_##SIZEOP: \
+ *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \
+ CONT; \
+ ST_MEM_##SIZEOP: \
+ *(SIZE *)(unsigned long) (DST + insn->off) = IMM; \
+ CONT; \
+ LDX_MEM_##SIZEOP: \
+ DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
CONT;
LDST(B, u8)
@@ -499,16 +504,16 @@ select_insn:
LDST(W, u32)
LDST(DW, u64)
#undef LDST
- STX_XADD_W: /* lock xadd *(u32 *)(A + insn->off) += X */
- atomic_add((u32) X, (atomic_t *)(unsigned long)
- (A + insn->off));
+ STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
+ atomic_add((u32) SRC, (atomic_t *)(unsigned long)
+ (DST + insn->off));
CONT;
- STX_XADD_DW: /* lock xadd *(u64 *)(A + insn->off) += X */
- atomic64_add((u64) X, (atomic64_t *)(unsigned long)
- (A + insn->off));
+ STX_XADD_DW: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
+ atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
+ (DST + insn->off));
CONT;
- LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + K)) */
- off = K;
+ LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
+ off = IMM;
load_word:
/* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
* only appearing in the programs where ctx ==
@@ -522,47 +527,51 @@ load_word:
* BPF_R6-BPF_R9, and store return value into BPF_R0.
*
* Implicit input:
- * ctx
+ * ctx == skb == BPF_R6 == CTX
*
* Explicit input:
- * X == any register
- * K == 32-bit immediate
+ * SRC == any register
+ * IMM == 32-bit immediate
*
* Output:
* BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
*/
- ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp);
+
+ ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp);
if (likely(ptr != NULL)) {
BPF_R0 = get_unaligned_be32(ptr);
CONT;
}
+
return 0;
- LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + K)) */
- off = K;
+ LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */
+ off = IMM;
load_half:
- ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp);
+ ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp);
if (likely(ptr != NULL)) {
BPF_R0 = get_unaligned_be16(ptr);
CONT;
}
+
return 0;
- LD_ABS_B: /* BPF_R0 = *(u8 *) (ctx + K) */
- off = K;
+ LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */
+ off = IMM;
load_byte:
- ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp);
+ ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp);
if (likely(ptr != NULL)) {
BPF_R0 = *(u8 *)ptr;
CONT;
}
+
return 0;
- LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + X + K)) */
- off = K + X;
+ LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */
+ off = IMM + SRC;
goto load_word;
- LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + X + K)) */
- off = K + X;
+ LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */
+ off = IMM + SRC;
goto load_half;
- LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + X + K) */
- off = K + X;
+ LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */
+ off = IMM + SRC;
goto load_byte;
default_label:
@@ -571,20 +580,15 @@ load_byte:
return 0;
}
-u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
- const struct sock_filter_int *insni)
- __attribute__ ((alias ("__sk_run_filter")));
-
-u32 sk_run_filter_int_skb(const struct sk_buff *ctx,
- const struct sock_filter_int *insni)
- __attribute__ ((alias ("__sk_run_filter")));
-EXPORT_SYMBOL_GPL(sk_run_filter_int_skb);
-
/* Helper to find the offset of pkt_type in sk_buff structure. We want
* to make sure its still a 3bit field starting at a byte boundary;
* taken from arch/x86/net/bpf_jit_comp.c.
*/
+#ifdef __BIG_ENDIAN_BITFIELD
+#define PKT_TYPE_MAX (7 << 5)
+#else
#define PKT_TYPE_MAX 7
+#endif
static unsigned int pkt_type_offset(void)
{
struct sk_buff skb_probe = { .pkt_type = ~0, };
@@ -671,15 +675,11 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
case SKF_AD_OFF + SKF_AD_PROTOCOL:
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
- /* A = *(u16 *) (ctx + offsetof(protocol)) */
- *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
- offsetof(struct sk_buff, protocol));
- insn++;
-
+ /* A = *(u16 *) (CTX + offsetof(protocol)) */
+ *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+ offsetof(struct sk_buff, protocol));
/* A = ntohs(A) [emitting a nop or swap16] */
- insn->code = BPF_ALU | BPF_END | BPF_FROM_BE;
- insn->a_reg = BPF_REG_A;
- insn->imm = 16;
+ *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
break;
case SKF_AD_OFF + SKF_AD_PKTTYPE:
@@ -688,37 +688,31 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
if (insn->off < 0)
return false;
insn++;
-
*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
+#ifdef __BIG_ENDIAN_BITFIELD
+ insn++;
+ *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 5);
+#endif
break;
case SKF_AD_OFF + SKF_AD_IFINDEX:
case SKF_AD_OFF + SKF_AD_HATYPE:
- *insn = BPF_LDX_MEM(size_to_bpf(FIELD_SIZEOF(struct sk_buff, dev)),
- BPF_REG_TMP, BPF_REG_CTX,
- offsetof(struct sk_buff, dev));
- insn++;
-
- /* if (tmp != 0) goto pc+1 */
- *insn = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
- insn++;
-
- *insn = BPF_EXIT_INSN();
- insn++;
-
BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
-
- insn->a_reg = BPF_REG_A;
- insn->x_reg = BPF_REG_TMP;
-
- if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) {
- insn->code = BPF_LDX | BPF_MEM | BPF_W;
- insn->off = offsetof(struct net_device, ifindex);
- } else {
- insn->code = BPF_LDX | BPF_MEM | BPF_H;
- insn->off = offsetof(struct net_device, type);
- }
+ BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0);
+
+ *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
+ BPF_REG_TMP, BPF_REG_CTX,
+ offsetof(struct sk_buff, dev));
+ /* if (tmp != 0) goto pc + 1 */
+ *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
+ *insn++ = BPF_EXIT_INSN();
+ if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
+ *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
+ offsetof(struct net_device, ifindex));
+ else
+ *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
+ offsetof(struct net_device, type));
break;
case SKF_AD_OFF + SKF_AD_MARK:
@@ -745,22 +739,17 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
case SKF_AD_OFF + SKF_AD_VLAN_TAG:
case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
-
- /* A = *(u16 *) (ctx + offsetof(vlan_tci)) */
- *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
- offsetof(struct sk_buff, vlan_tci));
- insn++;
-
BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+ /* A = *(u16 *) (CTX + offsetof(vlan_tci)) */
+ *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+ offsetof(struct sk_buff, vlan_tci));
if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A,
~VLAN_TAG_PRESENT);
} else {
/* A >>= 12 */
- *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12);
- insn++;
-
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12);
/* A &= 1 */
*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1);
}
@@ -771,35 +760,28 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
case SKF_AD_OFF + SKF_AD_CPU:
case SKF_AD_OFF + SKF_AD_RANDOM:
- /* arg1 = ctx */
- *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG1, BPF_REG_CTX);
- insn++;
-
+ /* arg1 = CTX */
+ *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
/* arg2 = A */
- *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG2, BPF_REG_A);
- insn++;
-
+ *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
/* arg3 = X */
- *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG3, BPF_REG_X);
- insn++;
-
- /* Emit call(ctx, arg2=A, arg3=X) */
- insn->code = BPF_JMP | BPF_CALL;
+ *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
+ /* Emit call(arg1=CTX, arg2=A, arg3=X) */
switch (fp->k) {
case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
- insn->imm = __skb_get_pay_offset - __bpf_call_base;
+ *insn = BPF_EMIT_CALL(__skb_get_pay_offset);
break;
case SKF_AD_OFF + SKF_AD_NLATTR:
- insn->imm = __skb_get_nlattr - __bpf_call_base;
+ *insn = BPF_EMIT_CALL(__skb_get_nlattr);
break;
case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
- insn->imm = __skb_get_nlattr_nest - __bpf_call_base;
+ *insn = BPF_EMIT_CALL(__skb_get_nlattr_nest);
break;
case SKF_AD_OFF + SKF_AD_CPU:
- insn->imm = __get_raw_cpu_id - __bpf_call_base;
+ *insn = BPF_EMIT_CALL(__get_raw_cpu_id);
break;
case SKF_AD_OFF + SKF_AD_RANDOM:
- insn->imm = __get_random_u32 - __bpf_call_base;
+ *insn = BPF_EMIT_CALL(__get_random_u32);
break;
}
break;
@@ -858,11 +840,11 @@ int sk_convert_filter(struct sock_filter *prog, int len,
BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
- if (len <= 0 || len >= BPF_MAXINSNS)
+ if (len <= 0 || len > BPF_MAXINSNS)
return -EINVAL;
if (new_prog) {
- addrs = kzalloc(len * sizeof(*addrs), GFP_KERNEL);
+ addrs = kcalloc(len, sizeof(*addrs), GFP_KERNEL);
if (!addrs)
return -ENOMEM;
}
@@ -871,9 +853,8 @@ do_pass:
new_insn = new_prog;
fp = prog;
- if (new_insn) {
- *new_insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_CTX, BPF_REG_ARG1);
- }
+ if (new_insn)
+ *new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
new_insn++;
for (i = 0; i < len; fp++, i++) {
@@ -921,17 +902,16 @@ do_pass:
convert_bpf_extensions(fp, &insn))
break;
- insn->code = fp->code;
- insn->a_reg = BPF_REG_A;
- insn->x_reg = BPF_REG_X;
- insn->imm = fp->k;
+ *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
break;
- /* Jump opcodes map as-is, but offsets need adjustment. */
- case BPF_JMP | BPF_JA:
- target = i + fp->k + 1;
- insn->code = fp->code;
-#define EMIT_JMP \
+ /* Jump transformation cannot use BPF block macros
+ * everywhere as offset calculation and target updates
+ * require a bit more work than the rest, i.e. jump
+ * opcodes map as-is, but offsets need adjustment.
+ */
+
+#define BPF_EMIT_JMP \
do { \
if (target >= len || target < 0) \
goto err; \
@@ -940,7 +920,10 @@ do_pass:
insn->off -= insn - tmp_insns; \
} while (0)
- EMIT_JMP;
+ case BPF_JMP | BPF_JA:
+ target = i + fp->k + 1;
+ insn->code = fp->code;
+ BPF_EMIT_JMP;
break;
case BPF_JMP | BPF_JEQ | BPF_K:
@@ -956,17 +939,14 @@ do_pass:
* immediate into tmp register and use it
* in compare insn.
*/
- insn->code = BPF_ALU | BPF_MOV | BPF_K;
- insn->a_reg = BPF_REG_TMP;
- insn->imm = fp->k;
- insn++;
+ *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
- insn->a_reg = BPF_REG_A;
- insn->x_reg = BPF_REG_TMP;
+ insn->dst_reg = BPF_REG_A;
+ insn->src_reg = BPF_REG_TMP;
bpf_src = BPF_X;
} else {
- insn->a_reg = BPF_REG_A;
- insn->x_reg = BPF_REG_X;
+ insn->dst_reg = BPF_REG_A;
+ insn->src_reg = BPF_REG_X;
insn->imm = fp->k;
bpf_src = BPF_SRC(fp->code);
}
@@ -975,7 +955,7 @@ do_pass:
if (fp->jf == 0) {
insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
target = i + fp->jt + 1;
- EMIT_JMP;
+ BPF_EMIT_JMP;
break;
}
@@ -983,116 +963,94 @@ do_pass:
if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
insn->code = BPF_JMP | BPF_JNE | bpf_src;
target = i + fp->jf + 1;
- EMIT_JMP;
+ BPF_EMIT_JMP;
break;
}
/* Other jumps are mapped into two insns: Jxx and JA. */
target = i + fp->jt + 1;
insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
- EMIT_JMP;
+ BPF_EMIT_JMP;
insn++;
insn->code = BPF_JMP | BPF_JA;
target = i + fp->jf + 1;
- EMIT_JMP;
+ BPF_EMIT_JMP;
break;
/* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
case BPF_LDX | BPF_MSH | BPF_B:
/* tmp = A */
- *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_TMP, BPF_REG_A);
- insn++;
-
+ *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A);
/* A = BPF_R0 = *(u8 *) (skb->data + K) */
- *insn = BPF_LD_ABS(BPF_B, fp->k);
- insn++;
-
+ *insn++ = BPF_LD_ABS(BPF_B, fp->k);
/* A &= 0xf */
- *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
- insn++;
-
+ *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
/* A <<= 2 */
- *insn = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
- insn++;
-
+ *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
/* X = A */
- *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_X, BPF_REG_A);
- insn++;
-
+ *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
/* A = tmp */
- *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_A, BPF_REG_TMP);
+ *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
break;
/* RET_K, RET_A are remaped into 2 insns. */
case BPF_RET | BPF_A:
case BPF_RET | BPF_K:
- insn->code = BPF_ALU | BPF_MOV |
- (BPF_RVAL(fp->code) == BPF_K ?
- BPF_K : BPF_X);
- insn->a_reg = 0;
- insn->x_reg = BPF_REG_A;
- insn->imm = fp->k;
- insn++;
-
+ *insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ?
+ BPF_K : BPF_X, BPF_REG_0,
+ BPF_REG_A, fp->k);
*insn = BPF_EXIT_INSN();
break;
/* Store to stack. */
case BPF_ST:
case BPF_STX:
- insn->code = BPF_STX | BPF_MEM | BPF_W;
- insn->a_reg = BPF_REG_FP;
- insn->x_reg = fp->code == BPF_ST ?
- BPF_REG_A : BPF_REG_X;
- insn->off = -(BPF_MEMWORDS - fp->k) * 4;
+ *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
+ BPF_ST ? BPF_REG_A : BPF_REG_X,
+ -(BPF_MEMWORDS - fp->k) * 4);
break;
/* Load from stack. */
case BPF_LD | BPF_MEM:
case BPF_LDX | BPF_MEM:
- insn->code = BPF_LDX | BPF_MEM | BPF_W;
- insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
- BPF_REG_A : BPF_REG_X;
- insn->x_reg = BPF_REG_FP;
- insn->off = -(BPF_MEMWORDS - fp->k) * 4;
+ *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
+ BPF_REG_A : BPF_REG_X, BPF_REG_FP,
+ -(BPF_MEMWORDS - fp->k) * 4);
break;
/* A = K or X = K */
case BPF_LD | BPF_IMM:
case BPF_LDX | BPF_IMM:
- insn->code = BPF_ALU | BPF_MOV | BPF_K;
- insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
- BPF_REG_A : BPF_REG_X;
- insn->imm = fp->k;
+ *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
+ BPF_REG_A : BPF_REG_X, fp->k);
break;
/* X = A */
case BPF_MISC | BPF_TAX:
- *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_X, BPF_REG_A);
+ *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
break;
/* A = X */
case BPF_MISC | BPF_TXA:
- *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_A, BPF_REG_X);
+ *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
break;
/* A = skb->len or X = skb->len */
case BPF_LD | BPF_W | BPF_LEN:
case BPF_LDX | BPF_W | BPF_LEN:
- insn->code = BPF_LDX | BPF_MEM | BPF_W;
- insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
- BPF_REG_A : BPF_REG_X;
- insn->x_reg = BPF_REG_CTX;
- insn->off = offsetof(struct sk_buff, len);
+ *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
+ BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
+ offsetof(struct sk_buff, len));
break;
- /* access seccomp_data fields */
+ /* Access seccomp_data fields. */
case BPF_LDX | BPF_ABS | BPF_W:
/* A = *(u32 *) (ctx + K) */
*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
break;
+ /* Unkown instruction. */
default:
goto err;
}
@@ -1101,7 +1059,6 @@ do_pass:
if (new_prog)
memcpy(new_insn, tmp_insns,
sizeof(*insn) * (insn - tmp_insns));
-
new_insn += insn - tmp_insns;
}
@@ -1116,7 +1073,6 @@ do_pass:
new_flen = new_insn - new_prog;
if (pass > 2)
goto err;
-
goto do_pass;
}
@@ -1140,44 +1096,46 @@ err:
*/
static int check_load_and_stores(struct sock_filter *filter, int flen)
{
- u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
+ u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
int pc, ret = 0;
BUILD_BUG_ON(BPF_MEMWORDS > 16);
- masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
+
+ masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL);
if (!masks)
return -ENOMEM;
+
memset(masks, 0xff, flen * sizeof(*masks));
for (pc = 0; pc < flen; pc++) {
memvalid &= masks[pc];
switch (filter[pc].code) {
- case BPF_S_ST:
- case BPF_S_STX:
+ case BPF_ST:
+ case BPF_STX:
memvalid |= (1 << filter[pc].k);
break;
- case BPF_S_LD_MEM:
- case BPF_S_LDX_MEM:
+ case BPF_LD | BPF_MEM:
+ case BPF_LDX | BPF_MEM:
if (!(memvalid & (1 << filter[pc].k))) {
ret = -EINVAL;
goto error;
}
break;
- case BPF_S_JMP_JA:
- /* a jump must set masks on target */
+ case BPF_JMP | BPF_JA:
+ /* A jump must set masks on target */
masks[pc + 1 + filter[pc].k] &= memvalid;
memvalid = ~0;
break;
- case BPF_S_JMP_JEQ_K:
- case BPF_S_JMP_JEQ_X:
- case BPF_S_JMP_JGE_K:
- case BPF_S_JMP_JGE_X:
- case BPF_S_JMP_JGT_K:
- case BPF_S_JMP_JGT_X:
- case BPF_S_JMP_JSET_X:
- case BPF_S_JMP_JSET_K:
- /* a jump must set masks on targets */
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JSET | BPF_K:
+ case BPF_JMP | BPF_JSET | BPF_X:
+ /* A jump must set masks on targets */
masks[pc + 1 + filter[pc].jt] &= memvalid;
masks[pc + 1 + filter[pc].jf] &= memvalid;
memvalid = ~0;
@@ -1189,6 +1147,72 @@ error:
return ret;
}
+static bool chk_code_allowed(u16 code_to_probe)
+{
+ static const bool codes[] = {
+ /* 32 bit ALU operations */
+ [BPF_ALU | BPF_ADD | BPF_K] = true,
+ [BPF_ALU | BPF_ADD | BPF_X] = true,
+ [BPF_ALU | BPF_SUB | BPF_K] = true,
+ [BPF_ALU | BPF_SUB | BPF_X] = true,
+ [BPF_ALU | BPF_MUL | BPF_K] = true,
+ [BPF_ALU | BPF_MUL | BPF_X] = true,
+ [BPF_ALU | BPF_DIV | BPF_K] = true,
+ [BPF_ALU | BPF_DIV | BPF_X] = true,
+ [BPF_ALU | BPF_MOD | BPF_K] = true,
+ [BPF_ALU | BPF_MOD | BPF_X] = true,
+ [BPF_ALU | BPF_AND | BPF_K] = true,
+ [BPF_ALU | BPF_AND | BPF_X] = true,
+ [BPF_ALU | BPF_OR | BPF_K] = true,
+ [BPF_ALU | BPF_OR | BPF_X] = true,
+ [BPF_ALU | BPF_XOR | BPF_K] = true,
+ [BPF_ALU | BPF_XOR | BPF_X] = true,
+ [BPF_ALU | BPF_LSH | BPF_K] = true,
+ [BPF_ALU | BPF_LSH | BPF_X] = true,
+ [BPF_ALU | BPF_RSH | BPF_K] = true,
+ [BPF_ALU | BPF_RSH | BPF_X] = true,
+ [BPF_ALU | BPF_NEG] = true,
+ /* Load instructions */
+ [BPF_LD | BPF_W | BPF_ABS] = true,
+ [BPF_LD | BPF_H | BPF_ABS] = true,
+ [BPF_LD | BPF_B | BPF_ABS] = true,
+ [BPF_LD | BPF_W | BPF_LEN] = true,
+ [BPF_LD | BPF_W | BPF_IND] = true,
+ [BPF_LD | BPF_H | BPF_IND] = true,
+ [BPF_LD | BPF_B | BPF_IND] = true,
+ [BPF_LD | BPF_IMM] = true,
+ [BPF_LD | BPF_MEM] = true,
+ [BPF_LDX | BPF_W | BPF_LEN] = true,
+ [BPF_LDX | BPF_B | BPF_MSH] = true,
+ [BPF_LDX | BPF_IMM] = true,
+ [BPF_LDX | BPF_MEM] = true,
+ /* Store instructions */
+ [BPF_ST] = true,
+ [BPF_STX] = true,
+ /* Misc instructions */
+ [BPF_MISC | BPF_TAX] = true,
+ [BPF_MISC | BPF_TXA] = true,
+ /* Return instructions */
+ [BPF_RET | BPF_K] = true,
+ [BPF_RET | BPF_A] = true,
+ /* Jump instructions */
+ [BPF_JMP | BPF_JA] = true,
+ [BPF_JMP | BPF_JEQ | BPF_K] = true,
+ [BPF_JMP | BPF_JEQ | BPF_X] = true,
+ [BPF_JMP | BPF_JGE | BPF_K] = true,
+ [BPF_JMP | BPF_JGE | BPF_X] = true,
+ [BPF_JMP | BPF_JGT | BPF_K] = true,
+ [BPF_JMP | BPF_JGT | BPF_X] = true,
+ [BPF_JMP | BPF_JSET | BPF_K] = true,
+ [BPF_JMP | BPF_JSET | BPF_X] = true,
+ };
+
+ if (code_to_probe >= ARRAY_SIZE(codes))
+ return false;
+
+ return codes[code_to_probe];
+}
+
/**
* sk_chk_filter - verify socket filter code
* @filter: filter to verify
@@ -1205,154 +1229,76 @@ error:
*/
int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
{
- /*
- * Valid instructions are initialized to non-0.
- * Invalid instructions are initialized to 0.
- */
- static const u8 codes[] = {
- [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K,
- [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X,
- [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K,
- [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X,
- [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K,
- [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X,
- [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X,
- [BPF_ALU|BPF_MOD|BPF_K] = BPF_S_ALU_MOD_K,
- [BPF_ALU|BPF_MOD|BPF_X] = BPF_S_ALU_MOD_X,
- [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K,
- [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X,
- [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K,
- [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X,
- [BPF_ALU|BPF_XOR|BPF_K] = BPF_S_ALU_XOR_K,
- [BPF_ALU|BPF_XOR|BPF_X] = BPF_S_ALU_XOR_X,
- [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K,
- [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X,
- [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K,
- [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X,
- [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG,
- [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS,
- [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS,
- [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS,
- [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN,
- [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND,
- [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND,
- [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND,
- [BPF_LD|BPF_IMM] = BPF_S_LD_IMM,
- [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN,
- [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH,
- [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM,
- [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX,
- [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA,
- [BPF_RET|BPF_K] = BPF_S_RET_K,
- [BPF_RET|BPF_A] = BPF_S_RET_A,
- [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K,
- [BPF_LD|BPF_MEM] = BPF_S_LD_MEM,
- [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM,
- [BPF_ST] = BPF_S_ST,
- [BPF_STX] = BPF_S_STX,
- [BPF_JMP|BPF_JA] = BPF_S_JMP_JA,
- [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K,
- [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X,
- [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K,
- [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X,
- [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K,
- [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X,
- [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
- [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
- };
- int pc;
bool anc_found;
+ int pc;
if (flen == 0 || flen > BPF_MAXINSNS)
return -EINVAL;
- /* check the filter code now */
+ /* Check the filter code now */
for (pc = 0; pc < flen; pc++) {
struct sock_filter *ftest = &filter[pc];
- u16 code = ftest->code;
- if (code >= ARRAY_SIZE(codes))
- return -EINVAL;
- code = codes[code];
- if (!code)
+ /* May we actually operate on this code? */
+ if (!chk_code_allowed(ftest->code))
return -EINVAL;
+
/* Some instructions need special checks */
- switch (code) {
- case BPF_S_ALU_DIV_K:
- case BPF_S_ALU_MOD_K:
- /* check for division by zero */
+ switch (ftest->code) {
+ case BPF_ALU | BPF_DIV | BPF_K:
+ case BPF_ALU | BPF_MOD | BPF_K:
+ /* Check for division by zero */
if (ftest->k == 0)
return -EINVAL;
break;
- case BPF_S_LD_MEM:
- case BPF_S_LDX_MEM:
- case BPF_S_ST:
- case BPF_S_STX:
- /* check for invalid memory addresses */
+ case BPF_LD | BPF_MEM:
+ case BPF_LDX | BPF_MEM:
+ case BPF_ST:
+ case BPF_STX:
+ /* Check for invalid memory addresses */
if (ftest->k >= BPF_MEMWORDS)
return -EINVAL;
break;
- case BPF_S_JMP_JA:
- /*
- * Note, the large ftest->k might cause loops.
+ case BPF_JMP | BPF_JA:
+ /* Note, the large ftest->k might cause loops.
* Compare this with conditional jumps below,
* where offsets are limited. --ANK (981016)
*/
- if (ftest->k >= (unsigned int)(flen-pc-1))
+ if (ftest->k >= (unsigned int)(flen - pc - 1))
return -EINVAL;
break;
- case BPF_S_JMP_JEQ_K:
- case BPF_S_JMP_JEQ_X:
- case BPF_S_JMP_JGE_K:
- case BPF_S_JMP_JGE_X:
- case BPF_S_JMP_JGT_K:
- case BPF_S_JMP_JGT_X:
- case BPF_S_JMP_JSET_X:
- case BPF_S_JMP_JSET_K:
- /* for conditionals both must be safe */
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JSET | BPF_K:
+ case BPF_JMP | BPF_JSET | BPF_X:
+ /* Both conditionals must be safe */
if (pc + ftest->jt + 1 >= flen ||
pc + ftest->jf + 1 >= flen)
return -EINVAL;
break;
- case BPF_S_LD_W_ABS:
- case BPF_S_LD_H_ABS:
- case BPF_S_LD_B_ABS:
+ case BPF_LD | BPF_W | BPF_ABS:
+ case BPF_LD | BPF_H | BPF_ABS:
+ case BPF_LD | BPF_B | BPF_ABS:
anc_found = false;
-#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \
- code = BPF_S_ANC_##CODE; \
- anc_found = true; \
- break
- switch (ftest->k) {
- ANCILLARY(PROTOCOL);
- ANCILLARY(PKTTYPE);
- ANCILLARY(IFINDEX);
- ANCILLARY(NLATTR);
- ANCILLARY(NLATTR_NEST);
- ANCILLARY(MARK);
- ANCILLARY(QUEUE);
- ANCILLARY(HATYPE);
- ANCILLARY(RXHASH);
- ANCILLARY(CPU);
- ANCILLARY(ALU_XOR_X);
- ANCILLARY(VLAN_TAG);
- ANCILLARY(VLAN_TAG_PRESENT);
- ANCILLARY(PAY_OFFSET);
- ANCILLARY(RANDOM);
- }
-
- /* ancillary operation unknown or unsupported */
+ if (bpf_anc_helper(ftest) & BPF_ANC)
+ anc_found = true;
+ /* Ancillary operation unknown or unsupported */
if (anc_found == false && ftest->k >= SKF_AD_OFF)
return -EINVAL;
}
- ftest->code = code;
}
- /* last instruction must be a RET code */
+ /* Last instruction must be a RET code */
switch (filter[flen - 1].code) {
- case BPF_S_RET_K:
- case BPF_S_RET_A:
+ case BPF_RET | BPF_K:
+ case BPF_RET | BPF_A:
return check_load_and_stores(filter, flen);
}
+
return -EINVAL;
}
EXPORT_SYMBOL(sk_chk_filter);
@@ -1397,7 +1343,7 @@ static void sk_filter_release_rcu(struct rcu_head *rcu)
struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
sk_release_orig_filter(fp);
- bpf_jit_free(fp);
+ sk_filter_free(fp);
}
/**
@@ -1436,7 +1382,7 @@ static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp,
fp_new = sock_kmalloc(sk, len, GFP_KERNEL);
if (fp_new) {
*fp_new = *fp;
- /* As we're kepping orig_prog in fp_new along,
+ /* As we're keeping orig_prog in fp_new along,
* we need to make sure we're not evicting it
* from the old fp.
*/
@@ -1452,7 +1398,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
{
struct sock_filter *old_prog;
struct sk_filter *old_fp;
- int i, err, new_len, old_len = fp->len;
+ int err, new_len, old_len = fp->len;
/* We are free to overwrite insns et al right here as it
* won't be used at this point in time anymore internally
@@ -1462,13 +1408,6 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
BUILD_BUG_ON(sizeof(struct sock_filter) !=
sizeof(struct sock_filter_int));
- /* For now, we need to unfiddle BPF_S_* identifiers in place.
- * This can sooner or later on be subject to removal, e.g. when
- * JITs have been converted.
- */
- for (i = 0; i < fp->len; i++)
- sk_decode_filter(&fp->insns[i], &fp->insns[i]);
-
/* Conversion cannot happen on overlapping memory areas,
* so we need to keep the user BPF around until the 2nd
* pass. At this time, the user BPF is stored in fp->insns.
@@ -1497,7 +1436,6 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
goto out_err_free;
}
- fp->bpf_func = sk_run_filter_int_skb;
fp->len = new_len;
/* 2nd pass: remap sock_filter insns into sock_filter_int insns. */
@@ -1510,6 +1448,8 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
*/
goto out_err_free;
+ sk_filter_select_runtime(fp);
+
kfree(old_prog);
return fp;
@@ -1528,6 +1468,29 @@ void __weak bpf_int_jit_compile(struct sk_filter *prog)
{
}
+/**
+ * sk_filter_select_runtime - select execution runtime for BPF program
+ * @fp: sk_filter populated with internal BPF program
+ *
+ * try to JIT internal BPF program, if JIT is not available select interpreter
+ * BPF program will be executed via SK_RUN_FILTER() macro
+ */
+void sk_filter_select_runtime(struct sk_filter *fp)
+{
+ fp->bpf_func = (void *) __sk_run_filter;
+
+ /* Probe if internal BPF can be JITed */
+ bpf_int_jit_compile(fp);
+}
+EXPORT_SYMBOL_GPL(sk_filter_select_runtime);
+
+/* free internal BPF program */
+void sk_filter_free(struct sk_filter *fp)
+{
+ bpf_jit_free(fp);
+}
+EXPORT_SYMBOL_GPL(sk_filter_free);
+
static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
struct sock *sk)
{
@@ -1537,8 +1500,13 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
fp->jited = 0;
err = sk_chk_filter(fp->insns, fp->len);
- if (err)
+ if (err) {
+ if (sk != NULL)
+ sk_filter_uncharge(sk, fp);
+ else
+ kfree(fp);
return ERR_PTR(err);
+ }
/* Probe if we can JIT compile the filter and if so, do
* the compilation of the filter.
@@ -1548,19 +1516,16 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
/* JIT compiler couldn't process this filter, so do the
* internal BPF translation for the optimized interpreter.
*/
- if (!fp->jited) {
+ if (!fp->jited)
fp = __sk_migrate_filter(fp, sk);
- /* Probe if internal BPF can be jit-ed */
- bpf_int_jit_compile(fp);
- }
return fp;
}
/**
* sk_unattached_filter_create - create an unattached filter
- * @fprog: the filter program
* @pfp: the unattached filter that is created
+ * @fprog: the filter program
*
* Create a filter independent of any socket. We first run some
* sanity checks on it to make sure it does not explode on us later.
@@ -1568,7 +1533,7 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
* a negative errno code is returned. On success the return is zero.
*/
int sk_unattached_filter_create(struct sk_filter **pfp,
- struct sock_fprog *fprog)
+ struct sock_fprog_kern *fprog)
{
unsigned int fsize = sk_filter_proglen(fprog);
struct sk_filter *fp;
@@ -1689,84 +1654,6 @@ int sk_detach_filter(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_detach_filter);
-void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
-{
- static const u16 decodes[] = {
- [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K,
- [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X,
- [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K,
- [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X,
- [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K,
- [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X,
- [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X,
- [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K,
- [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X,
- [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K,
- [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X,
- [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K,
- [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X,
- [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K,
- [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X,
- [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K,
- [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X,
- [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K,
- [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X,
- [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG,
- [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS,
- [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS,
- [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_RANDOM] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN,
- [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND,
- [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND,
- [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND,
- [BPF_S_LD_IMM] = BPF_LD|BPF_IMM,
- [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN,
- [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH,
- [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM,
- [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX,
- [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA,
- [BPF_S_RET_K] = BPF_RET|BPF_K,
- [BPF_S_RET_A] = BPF_RET|BPF_A,
- [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K,
- [BPF_S_LD_MEM] = BPF_LD|BPF_MEM,
- [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM,
- [BPF_S_ST] = BPF_ST,
- [BPF_S_STX] = BPF_STX,
- [BPF_S_JMP_JA] = BPF_JMP|BPF_JA,
- [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K,
- [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X,
- [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K,
- [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X,
- [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K,
- [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X,
- [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K,
- [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X,
- };
- u16 code;
-
- code = filt->code;
-
- to->code = decodes[code];
- to->jt = filt->jt;
- to->jf = filt->jf;
- to->k = filt->k;
-}
-
int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
unsigned int len)
{
diff --git a/net/core/iovec.c b/net/core/iovec.c
index b61869429f4..e1ec45ab1e6 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -39,7 +39,7 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a
{
int size, ct, err;
- if (m->msg_namelen) {
+ if (m->msg_name && m->msg_namelen) {
if (mode == VERIFY_READ) {
void __user *namep;
namep = (void __user __force *) m->msg_name;
@@ -48,10 +48,10 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a
if (err < 0)
return err;
}
- if (m->msg_name)
- m->msg_name = address;
+ m->msg_name = address;
} else {
m->msg_name = NULL;
+ m->msg_namelen = 0;
}
size = m->msg_iovlen * sizeof(struct iovec);
@@ -75,61 +75,6 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a
}
/*
- * Copy kernel to iovec. Returns -EFAULT on error.
- */
-
-int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
- int offset, int len)
-{
- int copy;
- for (; len > 0; ++iov) {
- /* Skip over the finished iovecs */
- if (unlikely(offset >= iov->iov_len)) {
- offset -= iov->iov_len;
- continue;
- }
- copy = min_t(unsigned int, iov->iov_len - offset, len);
- if (copy_to_user(iov->iov_base + offset, kdata, copy))
- return -EFAULT;
- offset = 0;
- kdata += copy;
- len -= copy;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(memcpy_toiovecend);
-
-/*
- * Copy iovec to kernel. Returns -EFAULT on error.
- */
-
-int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
- int offset, int len)
-{
- /* Skip over the finished iovecs */
- while (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- iov++;
- }
-
- while (len > 0) {
- u8 __user *base = iov->iov_base + offset;
- int copy = min_t(unsigned int, len, iov->iov_len - offset);
-
- offset = 0;
- if (copy_from_user(kdata, base, copy))
- return -EFAULT;
- len -= copy;
- kdata += copy;
- iov++;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(memcpy_fromiovecend);
-
-/*
* And now for the all-in-one: copy and checksum from a user iovec
* directly to a datagram
* Calls to csum_partial but the last must be in 32 bit chunks
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 9c3a839322b..bd0767e6b2b 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -147,7 +147,7 @@ static void linkwatch_do_dev(struct net_device *dev)
* Make sure the above read is complete since it can be
* rewritten as soon as we clear the bit below.
*/
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
/* We are about to handle this device,
* so new events can be accepted
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8f8a96ef9f3..ef31fef25e5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1248,8 +1248,8 @@ void __neigh_set_probe_once(struct neighbour *neigh)
neigh->updated = jiffies;
if (!(neigh->nud_state & NUD_FAILED))
return;
- neigh->nud_state = NUD_PROBE;
- atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES));
+ neigh->nud_state = NUD_INCOMPLETE;
+ atomic_set(&neigh->probes, neigh_max_probes(neigh));
neigh_add_timer(neigh,
jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
}
@@ -2249,7 +2249,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
ndm->ndm_pad1 = 0;
ndm->ndm_pad2 = 0;
ndm->ndm_flags = pn->flags | NTF_PROXY;
- ndm->ndm_type = NDA_DST;
+ ndm->ndm_type = RTN_UNICAST;
ndm->ndm_ifindex = pn->dev->ifindex;
ndm->ndm_state = NUD_NONE;
@@ -3059,11 +3059,12 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
} else {
+ struct neigh_table *tbl = p->tbl;
dev_name_source = "default";
- t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
- t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
- t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
- t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
+ t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
+ t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
+ t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
+ t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
}
if (handler) {
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 05e949d4820..85b62691f4f 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -24,7 +24,7 @@
static LIST_HEAD(pernet_list);
static struct list_head *first_device = &pernet_list;
-static DEFINE_MUTEX(net_mutex);
+DEFINE_MUTEX(net_mutex);
LIST_HEAD(net_namespace_list);
EXPORT_SYMBOL_GPL(net_namespace_list);
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 22931e1b99b..30d903b19c6 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -42,7 +42,7 @@ cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
static int cgrp_css_online(struct cgroup_subsys_state *css)
{
struct cgroup_cls_state *cs = css_cls_state(css);
- struct cgroup_cls_state *parent = css_cls_state(css_parent(css));
+ struct cgroup_cls_state *parent = css_cls_state(css->parent);
if (parent)
cs->classid = parent->classid;
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 3825f669147..2f385b9bccc 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -140,7 +140,7 @@ cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
static int cgrp_css_online(struct cgroup_subsys_state *css)
{
- struct cgroup_subsys_state *parent_css = css_parent(css);
+ struct cgroup_subsys_state *parent_css = css->parent;
struct net_device *dev;
int ret = 0;
@@ -185,15 +185,15 @@ static int read_priomap(struct seq_file *sf, void *v)
return 0;
}
-static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
- char *buffer)
+static ssize_t write_priomap(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
{
char devname[IFNAMSIZ + 1];
struct net_device *dev;
u32 prio;
int ret;
- if (sscanf(buffer, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)
+ if (sscanf(buf, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)
return -EINVAL;
dev = dev_get_by_name(&init_net, devname);
@@ -202,11 +202,11 @@ static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
rtnl_lock();
- ret = netprio_set_prio(css, dev, prio);
+ ret = netprio_set_prio(of_css(of), dev, prio);
rtnl_unlock();
dev_put(dev);
- return ret;
+ return ret ?: nbytes;
}
static int update_netprio(const void *v, struct file *file, unsigned n)
@@ -239,7 +239,7 @@ static struct cftype ss_files[] = {
{
.name = "ifpriomap",
.seq_show = read_priomap,
- .write_string = write_priomap,
+ .write = write_priomap,
},
{ } /* terminate */
};
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index 37d86157b76..d3027a73fd4 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -133,7 +133,7 @@ void __init ptp_classifier_init(void)
{ 0x16, 0, 0, 0x00000000 },
{ 0x06, 0, 0, 0x00000000 },
};
- struct sock_fprog ptp_prog = {
+ struct sock_fprog_kern ptp_prog = {
.len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter,
};
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9837bebf93c..1063996f831 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -353,15 +353,46 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops)
}
EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
+/* Return with the rtnl_lock held when there are no network
+ * devices unregistering in any network namespace.
+ */
+static void rtnl_lock_unregistering_all(void)
+{
+ struct net *net;
+ bool unregistering;
+ DEFINE_WAIT(wait);
+
+ for (;;) {
+ prepare_to_wait(&netdev_unregistering_wq, &wait,
+ TASK_UNINTERRUPTIBLE);
+ unregistering = false;
+ rtnl_lock();
+ for_each_net(net) {
+ if (net->dev_unreg_count > 0) {
+ unregistering = true;
+ break;
+ }
+ }
+ if (!unregistering)
+ break;
+ __rtnl_unlock();
+ schedule();
+ }
+ finish_wait(&netdev_unregistering_wq, &wait);
+}
+
/**
* rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
* @ops: struct rtnl_link_ops * to unregister
*/
void rtnl_link_unregister(struct rtnl_link_ops *ops)
{
- rtnl_lock();
+ /* Close the race with cleanup_net() */
+ mutex_lock(&net_mutex);
+ rtnl_lock_unregistering_all();
__rtnl_link_unregister(ops);
rtnl_unlock();
+ mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(rtnl_link_unregister);
@@ -767,8 +798,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
size += num_vfs *
(nla_total_size(sizeof(struct ifla_vf_mac)) +
nla_total_size(sizeof(struct ifla_vf_vlan)) +
- nla_total_size(sizeof(struct ifla_vf_tx_rate)) +
- nla_total_size(sizeof(struct ifla_vf_spoofchk)));
+ nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
+ nla_total_size(sizeof(struct ifla_vf_rate)));
return size;
} else
return 0;
@@ -1034,6 +1065,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
struct ifla_vf_info ivi;
struct ifla_vf_mac vf_mac;
struct ifla_vf_vlan vf_vlan;
+ struct ifla_vf_rate vf_rate;
struct ifla_vf_tx_rate vf_tx_rate;
struct ifla_vf_spoofchk vf_spoofchk;
struct ifla_vf_link_state vf_linkstate;
@@ -1054,6 +1086,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
break;
vf_mac.vf =
vf_vlan.vf =
+ vf_rate.vf =
vf_tx_rate.vf =
vf_spoofchk.vf =
vf_linkstate.vf = ivi.vf;
@@ -1061,7 +1094,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
vf_vlan.vlan = ivi.vlan;
vf_vlan.qos = ivi.qos;
- vf_tx_rate.rate = ivi.tx_rate;
+ vf_tx_rate.rate = ivi.max_tx_rate;
+ vf_rate.min_tx_rate = ivi.min_tx_rate;
+ vf_rate.max_tx_rate = ivi.max_tx_rate;
vf_spoofchk.setting = ivi.spoofchk;
vf_linkstate.link_state = ivi.linkstate;
vf = nla_nest_start(skb, IFLA_VF_INFO);
@@ -1071,6 +1106,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
}
if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
+ nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
+ &vf_rate) ||
nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
&vf_tx_rate) ||
nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
@@ -1177,6 +1214,10 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
.len = sizeof(struct ifla_vf_tx_rate) },
[IFLA_VF_SPOOFCHK] = { .type = NLA_BINARY,
.len = sizeof(struct ifla_vf_spoofchk) },
+ [IFLA_VF_RATE] = { .type = NLA_BINARY,
+ .len = sizeof(struct ifla_vf_rate) },
+ [IFLA_VF_LINK_STATE] = { .type = NLA_BINARY,
+ .len = sizeof(struct ifla_vf_link_state) },
};
static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
@@ -1203,6 +1244,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
struct nlattr *tb[IFLA_MAX+1];
u32 ext_filter_mask = 0;
int err;
+ int hdrlen;
s_h = cb->args[0];
s_idx = cb->args[1];
@@ -1210,8 +1252,17 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
cb->seq = net->dev_base_seq;
- if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
- ifla_policy) >= 0) {
+ /* A hack to preserve kernel<->userspace interface.
+ * The correct header is ifinfomsg. It is consistent with rtnl_getlink.
+ * However, before Linux v3.9 the code here assumed rtgenmsg and that's
+ * what iproute2 < v3.9.0 used.
+ * We can detect the old iproute2. Even including the IFLA_EXT_MASK
+ * attribute, its netlink message is shorter than struct ifinfomsg.
+ */
+ hdrlen = nlmsg_len(cb->nlh) < sizeof(struct ifinfomsg) ?
+ sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
+
+ if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) {
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
@@ -1336,11 +1387,29 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
}
case IFLA_VF_TX_RATE: {
struct ifla_vf_tx_rate *ivt;
+ struct ifla_vf_info ivf;
ivt = nla_data(vf);
err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_tx_rate)
- err = ops->ndo_set_vf_tx_rate(dev, ivt->vf,
- ivt->rate);
+ if (ops->ndo_get_vf_config)
+ err = ops->ndo_get_vf_config(dev, ivt->vf,
+ &ivf);
+ if (err)
+ break;
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_rate)
+ err = ops->ndo_set_vf_rate(dev, ivt->vf,
+ ivf.min_tx_rate,
+ ivt->rate);
+ break;
+ }
+ case IFLA_VF_RATE: {
+ struct ifla_vf_rate *ivt;
+ ivt = nla_data(vf);
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_rate)
+ err = ops->ndo_set_vf_rate(dev, ivt->vf,
+ ivt->min_tx_rate,
+ ivt->max_tx_rate);
break;
}
case IFLA_VF_SPOOFCHK: {
@@ -1713,7 +1782,6 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
ops->dellink(dev, &list_kill);
unregister_netdevice_many(&list_kill);
- list_del(&list_kill);
return 0;
}
@@ -1988,11 +2056,15 @@ replay:
if (ops->newlink) {
err = ops->newlink(net, dev, tb, data);
/* Drivers should call free_netdev() in ->destructor
- * and unregister it on failure so that device could be
- * finally freed in rtnl_unlock.
+ * and unregister it on failure after registration
+ * so that device could be finally freed in rtnl_unlock.
*/
- if (err < 0)
+ if (err < 0) {
+ /* If device is not registered at all, free it now */
+ if (dev->reg_state == NETREG_UNINITIALIZED)
+ free_netdev(dev);
goto out;
+ }
} else {
err = register_netdevice(dev);
if (err < 0) {
@@ -2064,9 +2136,13 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
struct nlattr *tb[IFLA_MAX+1];
u32 ext_filter_mask = 0;
u16 min_ifinfo_dump_size = 0;
+ int hdrlen;
+
+ /* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */
+ hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ?
+ sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
- if (nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
- ifla_policy) >= 0) {
+ if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) {
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
}
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 897da56f3af..ba71212f025 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -85,31 +85,6 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#endif
#ifdef CONFIG_INET
-__u32 secure_ip_id(__be32 daddr)
-{
- u32 hash[MD5_DIGEST_WORDS];
-
- net_secret_init();
- hash[0] = (__force __u32) daddr;
- hash[1] = net_secret[13];
- hash[2] = net_secret[14];
- hash[3] = net_secret[15];
-
- md5_transform(hash, net_secret);
-
- return hash[0];
-}
-
-__u32 secure_ipv6_id(const __be32 daddr[4])
-{
- __u32 hash[4];
-
- net_secret_init();
- memcpy(hash, daddr, 16);
- md5_transform(hash, net_secret);
-
- return hash[0];
-}
__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
__be16 sport, __be16 dport)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3d74530ae82..c1a33033cbe 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -689,6 +689,9 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->ooo_okay = old->ooo_okay;
new->no_fcs = old->no_fcs;
new->encapsulation = old->encapsulation;
+ new->encap_hdr_csum = old->encap_hdr_csum;
+ new->csum_valid = old->csum_valid;
+ new->csum_complete_sw = old->csum_complete_sw;
#ifdef CONFIG_XFRM
new->sp = secpath_get(old->sp);
#endif
@@ -951,10 +954,13 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
EXPORT_SYMBOL(skb_copy);
/**
- * __pskb_copy - create copy of an sk_buff with private head.
+ * __pskb_copy_fclone - create copy of an sk_buff with private head.
* @skb: buffer to copy
* @headroom: headroom of new skb
* @gfp_mask: allocation priority
+ * @fclone: if true allocate the copy of the skb from the fclone
+ * cache instead of the head cache; it is recommended to set this
+ * to true for the cases where the copy will likely be cloned
*
* Make a copy of both an &sk_buff and part of its data, located
* in header. Fragmented data remain shared. This is used when
@@ -964,11 +970,12 @@ EXPORT_SYMBOL(skb_copy);
* The returned buffer has a reference count of 1.
*/
-struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
+struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
+ gfp_t gfp_mask, bool fclone)
{
unsigned int size = skb_headlen(skb) + headroom;
- struct sk_buff *n = __alloc_skb(size, gfp_mask,
- skb_alloc_rx_flag(skb), NUMA_NO_NODE);
+ int flags = skb_alloc_rx_flag(skb) | (fclone ? SKB_ALLOC_FCLONE : 0);
+ struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE);
if (!n)
goto out;
@@ -1008,7 +1015,7 @@ struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
out:
return n;
}
-EXPORT_SYMBOL(__pskb_copy);
+EXPORT_SYMBOL(__pskb_copy_fclone);
/**
* pskb_expand_head - reallocate header of &sk_buff
@@ -2881,12 +2888,14 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
int pos;
int dummy;
+ __skb_push(head_skb, doffset);
proto = skb_network_protocol(head_skb, &dummy);
if (unlikely(!proto))
return ERR_PTR(-EINVAL);
- csum = !!can_checksum_protocol(features, proto);
- __skb_push(head_skb, doffset);
+ csum = !head_skb->encap_hdr_csum &&
+ !!can_checksum_protocol(features, proto);
+
headroom = skb_headroom(head_skb);
pos = skb_headlen(head_skb);
@@ -2983,6 +2992,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
skb_put(nskb, len),
len, 0);
+ SKB_GSO_CB(nskb)->csum_start =
+ skb_headroom(nskb) + doffset;
continue;
}
@@ -3052,6 +3063,8 @@ perform_csum_check:
nskb->csum = skb_checksum(nskb, doffset,
nskb->len - doffset, 0);
nskb->ip_summed = CHECKSUM_NONE;
+ SKB_GSO_CB(nskb)->csum_start =
+ skb_headroom(nskb) + doffset;
}
} while ((offset += len) < head_skb->len);
@@ -3076,7 +3089,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
if (unlikely(p->len + len >= 65536))
return -E2BIG;
- lp = NAPI_GRO_CB(p)->last ?: p;
+ lp = NAPI_GRO_CB(p)->last;
pinfo = skb_shinfo(lp);
if (headlen <= offset) {
@@ -3192,7 +3205,7 @@ merge:
__skb_pull(skb, offset);
- if (!NAPI_GRO_CB(p)->last)
+ if (NAPI_GRO_CB(p)->last == p)
skb_shinfo(p)->frag_list = skb;
else
NAPI_GRO_CB(p)->last->next = skb;
diff --git a/net/core/sock.c b/net/core/sock.c
index 664ee4295b6..026e01f7027 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -784,7 +784,7 @@ set_rcvbuf:
break;
case SO_NO_CHECK:
- sk->sk_no_check = valbool;
+ sk->sk_no_check_tx = valbool;
break;
case SO_PRIORITY:
@@ -1064,7 +1064,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
case SO_NO_CHECK:
- v.val = sk->sk_no_check;
+ v.val = sk->sk_no_check_tx;
break;
case SO_PRIORITY:
diff --git a/net/core/tso.c b/net/core/tso.c
new file mode 100644
index 00000000000..8c3203c585b
--- /dev/null
+++ b/net/core/tso.c
@@ -0,0 +1,77 @@
+#include <linux/export.h>
+#include <net/ip.h>
+#include <net/tso.h>
+
+/* Calculate expected number of TX descriptors */
+int tso_count_descs(struct sk_buff *skb)
+{
+ /* The Marvell Way */
+ return skb_shinfo(skb)->gso_segs * 2 + skb_shinfo(skb)->nr_frags;
+}
+EXPORT_SYMBOL(tso_count_descs);
+
+void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
+ int size, bool is_last)
+{
+ struct iphdr *iph;
+ struct tcphdr *tcph;
+ int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ int mac_hdr_len = skb_network_offset(skb);
+
+ memcpy(hdr, skb->data, hdr_len);
+ iph = (struct iphdr *)(hdr + mac_hdr_len);
+ iph->id = htons(tso->ip_id);
+ iph->tot_len = htons(size + hdr_len - mac_hdr_len);
+ tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb));
+ tcph->seq = htonl(tso->tcp_seq);
+ tso->ip_id++;
+
+ if (!is_last) {
+ /* Clear all special flags for not last packet */
+ tcph->psh = 0;
+ tcph->fin = 0;
+ tcph->rst = 0;
+ }
+}
+EXPORT_SYMBOL(tso_build_hdr);
+
+void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size)
+{
+ tso->tcp_seq += size;
+ tso->size -= size;
+ tso->data += size;
+
+ if ((tso->size == 0) &&
+ (tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
+
+ /* Move to next segment */
+ tso->size = frag->size;
+ tso->data = page_address(frag->page.p) + frag->page_offset;
+ tso->next_frag_idx++;
+ }
+}
+EXPORT_SYMBOL(tso_build_data);
+
+void tso_start(struct sk_buff *skb, struct tso_t *tso)
+{
+ int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+
+ tso->ip_id = ntohs(ip_hdr(skb)->id);
+ tso->tcp_seq = ntohl(tcp_hdr(skb)->seq);
+ tso->next_frag_idx = 0;
+
+ /* Build first data */
+ tso->size = skb_headlen(skb) - hdr_len;
+ tso->data = skb->data + hdr_len;
+ if ((tso->size == 0) &&
+ (tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
+
+ /* Move to next segment */
+ tso->size = frag->size;
+ tso->data = page_address(frag->page.p) + frag->page_offset;
+ tso->next_frag_idx++;
+ }
+}
+EXPORT_SYMBOL(tso_start);
diff --git a/net/core/utils.c b/net/core/utils.c
index 2f737bf90b3..eed34338736 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -348,8 +348,8 @@ static void __net_random_once_deferred(struct work_struct *w)
{
struct __net_random_once_work *work =
container_of(w, struct __net_random_once_work, work);
- if (!static_key_enabled(work->key))
- static_key_slow_inc(work->key);
+ BUG_ON(!static_key_enabled(work->key));
+ static_key_slow_dec(work->key);
kfree(work);
}
@@ -367,7 +367,7 @@ static void __net_random_once_disable_jump(struct static_key *key)
}
bool __net_get_random_once(void *buf, int nbytes, bool *done,
- struct static_key *done_key)
+ struct static_key *once_key)
{
static DEFINE_SPINLOCK(lock);
unsigned long flags;
@@ -382,7 +382,7 @@ bool __net_get_random_once(void *buf, int nbytes, bool *done,
*done = true;
spin_unlock_irqrestore(&lock, flags);
- __net_random_once_disable_jump(done_key);
+ __net_random_once_disable_jump(once_key);
return true;
}
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 22b5d818b20..6ca645c4b48 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -1024,7 +1024,6 @@ static struct inet_protosw dccp_v4_protosw = {
.protocol = IPPROTO_DCCP,
.prot = &dccp_v4_prot,
.ops = &inet_dccp_ops,
- .no_check = 0,
.flags = INET_PROTOSW_ICSK,
};
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 16f0b223102..1cd46a345cb 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -280,7 +280,7 @@ static ktime_t dccp_timestamp_seed;
*/
u32 dccp_timestamp(void)
{
- s64 delta = ktime_us_delta(ktime_get_real(), dccp_timestamp_seed);
+ u64 delta = (u64)ktime_us_delta(ktime_get_real(), dccp_timestamp_seed);
do_div(delta, 10);
return delta;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 4c04848953b..ae011b46c07 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -481,7 +481,7 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf
sk->sk_backlog_rcv = dn_nsp_backlog_rcv;
sk->sk_destruct = dn_destruct;
- sk->sk_no_check = 1;
+ sk->sk_no_check_tx = 1;
sk->sk_family = PF_DECnet;
sk->sk_protocol = 0;
sk->sk_allocation = gfp;
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index e7b6d53eef8..dd8696a3dbe 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -93,8 +93,8 @@ int dns_query(const char *type, const char *name, size_t namelen,
}
if (!namelen)
- namelen = strlen(name);
- if (namelen < 3)
+ namelen = strnlen(name, 256);
+ if (namelen < 3 || namelen > 255)
return -EINVAL;
desclen += namelen + 1;
@@ -149,7 +149,9 @@ int dns_query(const char *type, const char *name, size_t namelen,
if (!*_result)
goto put;
- memcpy(*_result, upayload->data, len + 1);
+ memcpy(*_result, upayload->data, len);
+ (*_result)[len] = '\0';
+
if (_expiry)
*_expiry = rkey->expiry;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 0eb5d5e76df..5db37cef50a 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -406,8 +406,9 @@ static int dsa_of_probe(struct platform_device *pdev)
goto out_free;
}
- chip_index = 0;
+ chip_index = -1;
for_each_available_child_of_node(np, child) {
+ chip_index++;
cd = &pd->chip[chip_index];
cd->mii_bus = &mdio_bus->dev;
diff --git a/net/ieee802154/6lowpan_rtnl.c b/net/ieee802154/6lowpan_rtnl.c
index 1ae8a5628fb..fe6bd7a7108 100644
--- a/net/ieee802154/6lowpan_rtnl.c
+++ b/net/ieee802154/6lowpan_rtnl.c
@@ -312,7 +312,7 @@ lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev,
frag_hdr[0] |= LOWPAN_DISPATCH_FRAGN;
frag_cap = round_down(payload_cap - LOWPAN_FRAGN_HEAD_SIZE, 8);
- while (skb_unprocessed >= frag_cap) {
+ do {
dgram_offset += frag_len;
skb_offset += frag_len;
skb_unprocessed -= frag_len;
@@ -328,7 +328,7 @@ lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev,
__func__, frag_tag, skb_offset);
goto err;
}
- }
+ } while (skb_unprocessed > frag_cap);
consume_skb(skb);
return NET_XMIT_SUCCESS;
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index 5617b4c6d6d..a3281b8bfd5 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -832,8 +832,10 @@ int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info)
return -ENODEV;
ops = ieee802154_mlme_ops(dev);
- if (!ops->llsec)
- return -EOPNOTSUPP;
+ if (!ops->llsec) {
+ rc = -EOPNOTSUPP;
+ goto out_dev;
+ }
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 279132bcadd..d156b3c5f36 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -254,7 +254,6 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
struct inet_sock *inet;
struct proto *answer_prot;
unsigned char answer_flags;
- char answer_no_check;
int try_loading_module = 0;
int err;
@@ -312,7 +311,6 @@ lookup_protocol:
sock->ops = answer->ops;
answer_prot = answer->prot;
- answer_no_check = answer->no_check;
answer_flags = answer->flags;
rcu_read_unlock();
@@ -324,7 +322,6 @@ lookup_protocol:
goto out;
err = 0;
- sk->sk_no_check = answer_no_check;
if (INET_PROTOSW_REUSE & answer_flags)
sk->sk_reuse = SK_CAN_REUSE;
@@ -1002,7 +999,6 @@ static struct inet_protosw inetsw_array[] =
.protocol = IPPROTO_TCP,
.prot = &tcp_prot,
.ops = &inet_stream_ops,
- .no_check = 0,
.flags = INET_PROTOSW_PERMANENT |
INET_PROTOSW_ICSK,
},
@@ -1012,7 +1008,6 @@ static struct inet_protosw inetsw_array[] =
.protocol = IPPROTO_UDP,
.prot = &udp_prot,
.ops = &inet_dgram_ops,
- .no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_PERMANENT,
},
@@ -1021,7 +1016,6 @@ static struct inet_protosw inetsw_array[] =
.protocol = IPPROTO_ICMP,
.prot = &ping_prot,
.ops = &inet_dgram_ops,
- .no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_REUSE,
},
@@ -1030,7 +1024,6 @@ static struct inet_protosw inetsw_array[] =
.protocol = IPPROTO_IP, /* wild card */
.prot = &raw_prot,
.ops = &inet_sockraw_ops,
- .no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_REUSE,
}
};
@@ -1261,10 +1254,12 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
+ SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP |
SKB_GSO_SIT |
SKB_GSO_TCPV6 |
SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM |
SKB_GSO_MPLS |
0)))
goto out;
@@ -1434,6 +1429,9 @@ static int inet_gro_complete(struct sk_buff *skb, int nhoff)
int proto = iph->protocol;
int err = -ENOSYS;
+ if (skb->encapsulation)
+ skb_set_inner_network_header(skb, nhoff);
+
csum_replace2(&iph->check, iph->tot_len, newlen);
iph->tot_len = newlen;
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 8b5134c582f..a3095fdefbe 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -86,18 +86,26 @@ out:
}
EXPORT_SYMBOL(ip4_datagram_connect);
+/* Because UDP xmit path can manipulate sk_dst_cache without holding
+ * socket lock, we need to use sk_dst_set() here,
+ * even if we own the socket lock.
+ */
void ip4_datagram_release_cb(struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
const struct ip_options_rcu *inet_opt;
__be32 daddr = inet->inet_daddr;
+ struct dst_entry *dst;
struct flowi4 fl4;
struct rtable *rt;
- if (! __sk_dst_get(sk) || __sk_dst_check(sk, 0))
- return;
-
rcu_read_lock();
+
+ dst = __sk_dst_get(sk);
+ if (!dst || !dst->obsolete || dst->ops->check(dst, 0)) {
+ rcu_read_unlock();
+ return;
+ }
inet_opt = rcu_dereference(inet->inet_opt);
if (inet_opt && inet_opt->opt.srr)
daddr = inet_opt->opt.faddr;
@@ -105,8 +113,10 @@ void ip4_datagram_release_cb(struct sock *sk)
inet->inet_saddr, inet->inet_dport,
inet->inet_sport, sk->sk_protocol,
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
- if (!IS_ERR(rt))
- __sk_dst_set(sk, &rt->dst);
+
+ dst = !IS_ERR(rt) ? &rt->dst : NULL;
+ sk_dst_set(sk, dst);
+
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(ip4_datagram_release_cb);
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index fbfd829f404..0485bf7f8f0 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -68,6 +68,7 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
skb_push(skb, hdr_len);
+ skb_reset_transport_header(skb);
greh = (struct gre_base_hdr *)skb->data;
greh->flags = tnl_flags_to_gre_flags(tpi->flags);
greh->protocol = tpi->proto;
@@ -84,7 +85,8 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
ptr--;
}
if (tpi->flags&TUNNEL_CSUM &&
- !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
+ !(skb_shinfo(skb)->gso_type &
+ (SKB_GSO_GRE|SKB_GSO_GRE_CSUM))) {
*ptr = 0;
*(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
skb->len, 0));
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index f1d32280cb5..f0bdd47bbbc 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -42,6 +42,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
+ SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP)))
goto out;
@@ -55,6 +56,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
goto out;
csum = !!(greh->flags & GRE_CSUM);
+ if (csum)
+ skb->encap_hdr_csum = 1;
if (unlikely(!pskb_may_pull(skb, ghl)))
goto out;
@@ -94,10 +97,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
}
}
- greh = (struct gre_base_hdr *)(skb->data);
+ skb_reset_transport_header(skb);
+
+ greh = (struct gre_base_hdr *)
+ skb_transport_header(skb);
pcsum = (__be32 *)(greh + 1);
*pcsum = 0;
- *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0));
+ *(__sum16 *)pcsum = gso_make_checksum(skb, 0);
}
__skb_push(skb, tnl_hlen - ghl);
@@ -125,10 +131,12 @@ static __sum16 gro_skb_checksum(struct sk_buff *skb)
csum_partial(skb->data, skb_gro_offset(skb), 0));
sum = csum_fold(NAPI_GRO_CB(skb)->csum);
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) {
- if (unlikely(!sum))
+ if (unlikely(!sum) && !skb->csum_complete_sw)
netdev_rx_csum_fault(skb->dev);
- } else
+ } else {
skb->ip_summed = CHECKSUM_COMPLETE;
+ skb->csum_complete_sw = 1;
+ }
return sum;
}
@@ -255,6 +263,9 @@ static int gre_gro_complete(struct sk_buff *skb, int nhoff)
int err = -ENOENT;
__be16 type;
+ skb->encapsulation = 1;
+ skb_shinfo(skb)->gso_type = SKB_GSO_GRE;
+
type = greh->protocol;
if (greh->flags & GRE_KEY)
grehlen += GRE_HEADER_SECTION;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 79c3d947a48..42b7bcf8045 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -739,8 +739,6 @@ static void icmp_unreach(struct sk_buff *skb)
/* fall through */
case 0:
info = ntohs(icmph->un.frag.mtu);
- if (!info)
- goto out;
}
break;
case ICMP_SR_FAILED:
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 17d34e3c2ac..db710b059ba 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -369,7 +369,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
pip->saddr = fl4.saddr;
pip->protocol = IPPROTO_IGMP;
pip->tot_len = 0; /* filled in later */
- ip_select_ident(skb, &rt->dst, NULL);
+ ip_select_ident(skb, NULL);
((u8 *)&pip[1])[0] = IPOPT_RA;
((u8 *)&pip[1])[1] = 4;
((u8 *)&pip[1])[2] = 0;
@@ -714,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
iph->daddr = dst;
iph->saddr = fl4.saddr;
iph->protocol = IPPROTO_IGMP;
- ip_select_ident(skb, &rt->dst, NULL);
+ ip_select_ident(skb, NULL);
((u8 *)&iph[1])[0] = IPOPT_RA;
((u8 *)&iph[1])[1] = 4;
((u8 *)&iph[1])[2] = 0;
@@ -1944,6 +1944,10 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
rtnl_lock();
in_dev = ip_mc_find_dev(net, imr);
+ if (!in_dev) {
+ ret = -ENODEV;
+ goto out;
+ }
ifindex = imr->imr_ifindex;
for (imlp = &inet->mc_list;
(iml = rtnl_dereference(*imlp)) != NULL;
@@ -1961,16 +1965,14 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
*imlp = iml->next_rcu;
- if (in_dev)
- ip_mc_dec_group(in_dev, group);
+ ip_mc_dec_group(in_dev, group);
rtnl_unlock();
/* decrease mem now to avoid the memleak warning */
atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
kfree_rcu(iml, rcu);
return 0;
}
- if (!in_dev)
- ret = -ENODEV;
+out:
rtnl_unlock();
return ret;
}
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index c98cf141f4e..bd5f5928167 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -26,20 +26,7 @@
* Theory of operations.
* We keep one entry for each peer IP address. The nodes contains long-living
* information about the peer which doesn't depend on routes.
- * At this moment this information consists only of ID field for the next
- * outgoing IP packet. This field is incremented with each packet as encoded
- * in inet_getid() function (include/net/inetpeer.h).
- * At the moment of writing this notes identifier of IP packets is generated
- * to be unpredictable using this code only for packets subjected
- * (actually or potentially) to defragmentation. I.e. DF packets less than
- * PMTU in size when local fragmentation is disabled use a constant ID and do
- * not use this code (see ip_select_ident() in include/net/ip.h).
*
- * Route cache entries hold references to our nodes.
- * New cache entries get references via lookup by destination IP address in
- * the avl tree. The reference is grabbed only when it's needed i.e. only
- * when we try to output IP packet which needs an unpredictable ID (see
- * __ip_select_ident() in net/ipv4/route.c).
* Nodes are removed only when reference counter goes to 0.
* When it's happened the node may be removed when a sufficient amount of
* time has been passed since its last use. The less-recently-used entry can
@@ -62,7 +49,6 @@
* refcnt: atomically against modifications on other CPU;
* usually under some other lock to prevent node disappearing
* daddr: unchangeable
- * ip_id_count: atomic value (no lock needed)
*/
static struct kmem_cache *peer_cachep __read_mostly;
@@ -497,10 +483,6 @@ relookup:
p->daddr = *daddr;
atomic_set(&p->refcnt, 1);
atomic_set(&p->rid, 0);
- atomic_set(&p->ip_id_count,
- (daddr->family == AF_INET) ?
- secure_ip_id(daddr->addr.a4) :
- secure_ipv6_id(daddr->addr.a6));
p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
p->rate_tokens = 0;
/* 60*HZ is arbitrary, but chosen enough high so that the first
@@ -522,7 +504,7 @@ EXPORT_SYMBOL_GPL(inet_getpeer);
void inet_putpeer(struct inet_peer *p)
{
p->dtime = (__u32)jiffies;
- smp_mb__before_atomic_dec();
+ smp_mb__before_atomic();
atomic_dec(&p->refcnt);
}
EXPORT_SYMBOL_GPL(inet_putpeer);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index c5a557a06a3..9b842544aea 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -649,6 +649,7 @@ static void ipgre_tap_setup(struct net_device *dev)
{
ether_setup(dev);
dev->netdev_ops = &gre_tap_netdev_ops;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
ip_tunnel_setup(dev, gre_tap_net_id);
}
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 5e7aecea05c..ad382499bac 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -288,6 +288,10 @@ int ip_options_compile(struct net *net,
optptr++;
continue;
}
+ if (unlikely(l < 2)) {
+ pp_ptr = optptr;
+ goto error;
+ }
optlen = optptr[1];
if (optlen < 2 || optlen > l) {
pp_ptr = optptr;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6e231ab58d6..8d3b6b0e985 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
iph->saddr = saddr;
iph->protocol = sk->sk_protocol;
- ip_select_ident(skb, &rt->dst, sk);
+ ip_select_ident(skb, sk);
if (opt && opt->opt.optlen) {
iph->ihl += opt->opt.optlen>>2;
@@ -430,8 +430,7 @@ packet_routed:
ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
}
- ip_select_ident_more(skb, &rt->dst, sk,
- (skb_shinfo(skb)->gso_segs ?: 1) - 1);
+ ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1);
/* TODO : should we use skb->sk here instead of sk ? */
skb->priority = sk->sk_priority;
@@ -1379,7 +1378,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
ip_copy_addrs(iph, fl4);
- ip_select_ident(skb, &rt->dst, sk);
+ ip_select_ident(skb, sk);
if (opt) {
iph->ihl += opt->optlen>>2;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 289c6ee388c..6f9de61dce5 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -73,12 +73,7 @@ static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
{
struct dst_entry *old_dst;
- if (dst) {
- if (dst->flags & DST_NOCACHE)
- dst = NULL;
- else
- dst_clone(dst);
- }
+ dst_clone(dst);
old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
dst_release(old_dst);
}
@@ -108,13 +103,14 @@ static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
rcu_read_lock();
dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
+ if (dst && !atomic_inc_not_zero(&dst->__refcnt))
+ dst = NULL;
if (dst) {
if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
- rcu_read_unlock();
tunnel_dst_reset(t);
- return NULL;
+ dst_release(dst);
+ dst = NULL;
}
- dst_hold(dst);
}
rcu_read_unlock();
return (struct rtable *)dst;
@@ -173,6 +169,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
hlist_for_each_entry_rcu(t, head, hash_node) {
if (remote != t->parms.iph.daddr ||
+ t->parms.iph.saddr != 0 ||
!(t->dev->flags & IFF_UP))
continue;
@@ -189,10 +186,11 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
head = &itn->tunnels[hash];
hlist_for_each_entry_rcu(t, head, hash_node) {
- if ((local != t->parms.iph.saddr &&
- (local != t->parms.iph.daddr ||
- !ipv4_is_multicast(local))) ||
- !(t->dev->flags & IFF_UP))
+ if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
+ (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
+ continue;
+
+ if (!(t->dev->flags & IFF_UP))
continue;
if (!ip_tunnel_key_match(&t->parms, flags, key))
@@ -209,6 +207,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
hlist_for_each_entry_rcu(t, head, hash_node) {
if (t->parms.i_key != key ||
+ t->parms.iph.saddr != 0 ||
+ t->parms.iph.daddr != 0 ||
!(t->dev->flags & IFF_UP))
continue;
@@ -268,6 +268,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
__be32 key = parms->i_key;
+ __be16 flags = parms->i_flags;
int link = parms->link;
struct ip_tunnel *t = NULL;
struct hlist_head *head = ip_bucket(itn, parms);
@@ -275,9 +276,9 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
hlist_for_each_entry_rcu(t, head, hash_node) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
- key == t->parms.i_key &&
link == t->parms.link &&
- type == t->dev->type)
+ type == t->dev->type &&
+ ip_tunnel_key_match(&t->parms, flags, key))
break;
}
return t;
@@ -539,9 +540,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
unsigned int max_headroom; /* The extra header space needed */
__be32 dst;
int err;
- bool connected = true;
+ bool connected;
inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
+ connected = (tunnel->parms.iph.daddr != 0);
dst = tnl_params->daddr;
if (dst == 0) {
@@ -666,6 +668,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
dev->needed_headroom = max_headroom;
if (skb_cow_head(skb, dev->needed_headroom)) {
+ ip_rt_put(rt);
dev->stats.tx_dropped++;
kfree_skb(skb);
return;
@@ -745,10 +748,12 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
goto done;
if (p->iph.ttl)
p->iph.frag_off |= htons(IP_DF);
- if (!(p->i_flags&TUNNEL_KEY))
- p->i_key = 0;
- if (!(p->o_flags&TUNNEL_KEY))
- p->o_key = 0;
+ if (!(p->i_flags & VTI_ISVTI)) {
+ if (!(p->i_flags & TUNNEL_KEY))
+ p->i_key = 0;
+ if (!(p->o_flags & TUNNEL_KEY))
+ p->o_key = 0;
+ }
t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
@@ -879,6 +884,7 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
*/
if (!IS_ERR(itn->fb_tunnel_dev)) {
itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+ itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
}
rtnl_unlock();
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index bcf206c7900..f4c987bb7e9 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -74,7 +74,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
iph->daddr = dst;
iph->saddr = src;
iph->ttl = ttl;
- __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
+ __ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1);
err = ip_local_out_sk(sk, skb);
if (unlikely(net_xmit_eval(err)))
@@ -135,6 +135,14 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
return skb;
}
+ /* If packet is not gso and we are resolving any partial checksum,
+ * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL
+ * on the outer header without confusing devices that implement
+ * NETIF_F_IP_CSUM with encapsulation.
+ */
+ if (csum_help)
+ skb->encapsulation = 0;
+
if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) {
err = skb_checksum_help(skb);
if (unlikely(err))
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index afcee51b90e..b8960f3527f 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -239,6 +239,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
static int vti4_err(struct sk_buff *skb, u32 info)
{
__be32 spi;
+ __u32 mark;
struct xfrm_state *x;
struct ip_tunnel *tunnel;
struct ip_esp_hdr *esph;
@@ -254,6 +255,8 @@ static int vti4_err(struct sk_buff *skb, u32 info)
if (!tunnel)
return -1;
+ mark = be32_to_cpu(tunnel->parms.o_key);
+
switch (protocol) {
case IPPROTO_ESP:
esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
@@ -281,7 +284,7 @@ static int vti4_err(struct sk_buff *skb, u32 info)
return 0;
}
- x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+ x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr,
spi, protocol, AF_INET);
if (!x)
return 0;
@@ -310,7 +313,13 @@ vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return -EINVAL;
}
- p.i_flags |= VTI_ISVTI;
+ if (!(p.i_flags & GRE_KEY))
+ p.i_key = 0;
+ if (!(p.o_flags & GRE_KEY))
+ p.o_key = 0;
+
+ p.i_flags = VTI_ISVTI;
+
err = ip_tunnel_ioctl(dev, &p, cmd);
if (err)
return err;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 812b1835146..62eaa005e14 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -149,13 +149,13 @@ static int ipip_err(struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->dev->ifindex, 0, IPPROTO_IPIP, 0);
+ t->parms.link, 0, IPPROTO_IPIP, 0);
err = 0;
goto out;
}
if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
+ ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
IPPROTO_IPIP, 0);
err = 0;
goto out;
@@ -486,4 +486,5 @@ static void __exit ipip_fini(void)
module_init(ipip_init);
module_exit(ipip_fini);
MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("ipip");
MODULE_ALIAS_NETDEV("tunl0");
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index d84dc8d4c91..65bcaa78904 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -484,7 +484,7 @@ static void reg_vif_setup(struct net_device *dev)
dev->type = ARPHRD_PIMREG;
dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
dev->flags = IFF_NOARP;
- dev->netdev_ops = &reg_vif_netdev_ops,
+ dev->netdev_ops = &reg_vif_netdev_ops;
dev->destructor = free_netdev;
dev->features |= NETIF_F_NETNS_LOCAL;
}
@@ -1663,7 +1663,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
iph->protocol = IPPROTO_IPIP;
iph->ihl = 5;
iph->tot_len = htons(skb->len);
- ip_select_ident(skb, skb_dst(skb), NULL);
+ ip_select_ident(skb, NULL);
ip_send_check(iph);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index ee2886126e3..f1787c04a4d 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -91,17 +91,9 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops,
if (nf_ct_is_untracked(ct))
return NF_ACCEPT;
- nat = nfct_nat(ct);
- if (!nat) {
- /* NAT module was loaded late. */
- if (nf_ct_is_confirmed(ct))
- return NF_ACCEPT;
- nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
- if (nat == NULL) {
- pr_debug("failed to add NAT extension\n");
- return NF_ACCEPT;
- }
- }
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
switch (ctinfo) {
case IP_CT_RELATED:
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index b5b256d45e6..3964157d826 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -48,15 +48,9 @@ static unsigned int nf_nat_fn(const struct nf_hook_ops *ops,
NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));
- nat = nfct_nat(ct);
- if (nat == NULL) {
- /* Conntrack module was loaded late, can't add extension. */
- if (nf_ct_is_confirmed(ct))
- return NF_ACCEPT;
- nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
- if (nat == NULL)
- return NF_ACCEPT;
- }
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
switch (ctinfo) {
case IP_CT_RELATED:
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index a9dbe58bdfe..2c65160565e 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -389,7 +389,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
iph->check = 0;
iph->tot_len = htons(length);
if (!iph->id)
- ip_select_ident(skb, &rt->dst, NULL);
+ ip_select_ident(skb, NULL);
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 50e1e0feddf..190199851c9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -89,6 +89,7 @@
#include <linux/rcupdate.h>
#include <linux/times.h>
#include <linux/slab.h>
+#include <linux/jhash.h>
#include <net/dst.h>
#include <net/net_namespace.h>
#include <net/protocol.h>
@@ -456,39 +457,45 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
return neigh_create(&arp_tbl, pkey, dev);
}
-/*
- * Peer allocation may fail only in serious out-of-memory conditions. However
- * we still can generate some output.
- * Random ID selection looks a bit dangerous because we have no chances to
- * select ID being unique in a reasonable period of time.
- * But broken packet identifier may be better than no packet at all.
+#define IP_IDENTS_SZ 2048u
+struct ip_ident_bucket {
+ atomic_t id;
+ u32 stamp32;
+};
+
+static struct ip_ident_bucket *ip_idents __read_mostly;
+
+/* In order to protect privacy, we add a perturbation to identifiers
+ * if one generator is seldom used. This makes hard for an attacker
+ * to infer how many packets were sent between two points in time.
*/
-static void ip_select_fb_ident(struct iphdr *iph)
+u32 ip_idents_reserve(u32 hash, int segs)
{
- static DEFINE_SPINLOCK(ip_fb_id_lock);
- static u32 ip_fallback_id;
- u32 salt;
+ struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ;
+ u32 old = ACCESS_ONCE(bucket->stamp32);
+ u32 now = (u32)jiffies;
+ u32 delta = 0;
- spin_lock_bh(&ip_fb_id_lock);
- salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
- iph->id = htons(salt & 0xFFFF);
- ip_fallback_id = salt;
- spin_unlock_bh(&ip_fb_id_lock);
+ if (old != now && cmpxchg(&bucket->stamp32, old, now) == old)
+ delta = prandom_u32_max(now - old);
+
+ return atomic_add_return(segs + delta, &bucket->id) - segs;
}
+EXPORT_SYMBOL(ip_idents_reserve);
-void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
+void __ip_select_ident(struct iphdr *iph, int segs)
{
- struct net *net = dev_net(dst->dev);
- struct inet_peer *peer;
+ static u32 ip_idents_hashrnd __read_mostly;
+ u32 hash, id;
- peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
- if (peer) {
- iph->id = htons(inet_getid(peer, more));
- inet_putpeer(peer);
- return;
- }
+ net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
- ip_select_fb_ident(iph);
+ hash = jhash_3words((__force u32)iph->daddr,
+ (__force u32)iph->saddr,
+ iph->protocol,
+ ip_idents_hashrnd);
+ id = ip_idents_reserve(hash, segs);
+ iph->id = htons(id);
}
EXPORT_SYMBOL(__ip_select_ident);
@@ -1029,7 +1036,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
const struct iphdr *iph = (const struct iphdr *) skb->data;
struct flowi4 fl4;
struct rtable *rt;
- struct dst_entry *dst;
+ struct dst_entry *odst = NULL;
bool new = false;
bh_lock_sock(sk);
@@ -1037,16 +1044,17 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
if (!ip_sk_accept_pmtu(sk))
goto out;
- rt = (struct rtable *) __sk_dst_get(sk);
+ odst = sk_dst_get(sk);
- if (sock_owned_by_user(sk) || !rt) {
+ if (sock_owned_by_user(sk) || !odst) {
__ipv4_sk_update_pmtu(skb, sk, mtu);
goto out;
}
__build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
- if (!__sk_dst_check(sk, 0)) {
+ rt = (struct rtable *)odst;
+ if (odst->obsolete && odst->ops->check(odst, 0) == NULL) {
rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
if (IS_ERR(rt))
goto out;
@@ -1056,8 +1064,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
__ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
- dst = dst_check(&rt->dst, 0);
- if (!dst) {
+ if (!dst_check(&rt->dst, 0)) {
if (new)
dst_release(&rt->dst);
@@ -1069,10 +1076,11 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
}
if (new)
- __sk_dst_set(sk, &rt->dst);
+ sk_dst_set(sk, &rt->dst);
out:
bh_unlock_sock(sk);
+ dst_release(odst);
}
EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
@@ -1526,7 +1534,7 @@ static int __mkroute_input(struct sk_buff *skb,
struct in_device *out_dev;
unsigned int flags = 0;
bool do_cache;
- u32 itag;
+ u32 itag = 0;
/* get a working reference to the output device */
out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
@@ -2711,6 +2719,12 @@ int __init ip_rt_init(void)
{
int rc = 0;
+ ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
+ if (!ip_idents)
+ panic("IP: failed to allocate ip_idents\n");
+
+ prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
+
#ifdef CONFIG_IP_ROUTE_CLASSID
ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
if (!ip_rt_acct)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index eb1dde37e67..9d2118e5fbc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1108,7 +1108,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (unlikely(tp->repair)) {
if (tp->repair_queue == TCP_RECV_QUEUE) {
copied = tcp_send_rcvq(sk, msg, size);
- goto out;
+ goto out_nopush;
}
err = -EINVAL;
@@ -1282,6 +1282,7 @@ wait_for_memory:
out:
if (copied)
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
+out_nopush:
release_sock(sk);
return copied + copied_syn;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 62e48cf84e6..9771563ab56 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -131,7 +131,7 @@ static bool tcp_fastopen_create_child(struct sock *sk,
struct dst_entry *dst,
struct request_sock *req)
{
- struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_sock *tp;
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
struct sock *child;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 350b2072f0a..40639c288dc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1106,7 +1106,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
}
/* D-SACK for already forgotten data... Do dumb counting. */
- if (dup_sack && tp->undo_marker && tp->undo_retrans &&
+ if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 &&
!after(end_seq_0, prior_snd_una) &&
after(end_seq_0, tp->undo_marker))
tp->undo_retrans--;
@@ -1162,12 +1162,12 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
unsigned int new_len = (pkt_len / mss) * mss;
if (!in_sack && new_len < pkt_len) {
new_len += mss;
- if (new_len > skb->len)
+ if (new_len >= skb->len)
return 0;
}
pkt_len = new_len;
}
- err = tcp_fragment(sk, skb, pkt_len, mss);
+ err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
if (err < 0)
return err;
}
@@ -1187,7 +1187,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
/* Account D-SACK for retransmitted packet. */
if (dup_sack && (sacked & TCPCB_RETRANS)) {
- if (tp->undo_marker && tp->undo_retrans &&
+ if (tp->undo_marker && tp->undo_retrans > 0 &&
after(end_seq, tp->undo_marker))
tp->undo_retrans--;
if (sacked & TCPCB_SACKED_ACKED)
@@ -1893,7 +1893,7 @@ static void tcp_clear_retrans_partial(struct tcp_sock *tp)
tp->lost_out = 0;
tp->undo_marker = 0;
- tp->undo_retrans = 0;
+ tp->undo_retrans = -1;
}
void tcp_clear_retrans(struct tcp_sock *tp)
@@ -2241,7 +2241,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
break;
mss = skb_shinfo(skb)->gso_size;
- err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
+ err = tcp_fragment(sk, skb, (packets - oldcnt) * mss,
+ mss, GFP_ATOMIC);
if (err < 0)
break;
cnt = packets;
@@ -2664,7 +2665,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
tp->prior_ssthresh = 0;
tp->undo_marker = tp->snd_una;
- tp->undo_retrans = tp->retrans_out;
+ tp->undo_retrans = tp->retrans_out ? : -1;
if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
if (!ece_ack)
@@ -2684,13 +2685,12 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
bool recovered = !before(tp->snd_una, tp->high_seq);
if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
- if (flag & FLAG_ORIG_SACK_ACKED) {
- /* Step 3.b. A timeout is spurious if not all data are
- * lost, i.e., never-retransmitted data are (s)acked.
- */
- tcp_try_undo_loss(sk, true);
+ /* Step 3.b. A timeout is spurious if not all data are
+ * lost, i.e., never-retransmitted data are (s)acked.
+ */
+ if (tcp_try_undo_loss(sk, flag & FLAG_ORIG_SACK_ACKED))
return;
- }
+
if (after(tp->snd_nxt, tp->high_seq) &&
(flag & FLAG_DATA_SACKED || is_dupack)) {
tp->frto = 0; /* Loss was real: 2nd part of step 3.a */
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index d4f015ad6c8..f7a2ec3ac58 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -102,17 +102,19 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
return 0;
}
-static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
- char *buffer)
+static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
{
- struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
unsigned long long val;
int ret = 0;
- switch (cft->private) {
+ buf = strstrip(buf);
+
+ switch (of_cft(of)->private) {
case RES_LIMIT:
/* see memcontrol.c */
- ret = res_counter_memparse_write_strategy(buffer, &val);
+ ret = res_counter_memparse_write_strategy(buf, &val);
if (ret)
break;
ret = tcp_update_limit(memcg, val);
@@ -121,7 +123,7 @@ static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
ret = -EINVAL;
break;
}
- return ret;
+ return ret ?: nbytes;
}
static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val)
@@ -168,17 +170,18 @@ static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
return val;
}
-static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
+static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg;
struct cg_proto *cg_proto;
- memcg = mem_cgroup_from_css(css);
+ memcg = mem_cgroup_from_css(of_css(of));
cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
- return 0;
+ return nbytes;
- switch (event) {
+ switch (of_cft(of)->private) {
case RES_MAX_USAGE:
res_counter_reset_max(&cg_proto->memory_allocated);
break;
@@ -187,13 +190,13 @@ static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
break;
}
- return 0;
+ return nbytes;
}
static struct cftype tcp_files[] = {
{
.name = "kmem.tcp.limit_in_bytes",
- .write_string = tcp_cgroup_write,
+ .write = tcp_cgroup_write,
.read_u64 = tcp_cgroup_read,
.private = RES_LIMIT,
},
@@ -205,13 +208,13 @@ static struct cftype tcp_files[] = {
{
.name = "kmem.tcp.failcnt",
.private = RES_FAILCNT,
- .trigger = tcp_cgroup_reset,
+ .write = tcp_cgroup_reset,
.read_u64 = tcp_cgroup_read,
},
{
.name = "kmem.tcp.max_usage_in_bytes",
.private = RES_MAX_USAGE,
- .trigger = tcp_cgroup_reset,
+ .write = tcp_cgroup_reset,
.read_u64 = tcp_cgroup_read,
},
{ } /* terminate */
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index dcaf72f1021..4fe04180598 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -1159,10 +1159,7 @@ static void __net_exit tcp_net_metrics_exit(struct net *net)
tm = next;
}
}
- if (is_vmalloc_addr(net->ipv4.tcp_metrics_hash))
- vfree(net->ipv4.tcp_metrics_hash);
- else
- kfree(net->ipv4.tcp_metrics_hash);
+ kvfree(net->ipv4.tcp_metrics_hash);
}
static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index b92b81718ca..55046ecd083 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -57,10 +57,12 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
SKB_GSO_TCP_ECN |
SKB_GSO_TCPV6 |
SKB_GSO_GRE |
+ SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP |
SKB_GSO_SIT |
SKB_GSO_MPLS |
SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM |
0) ||
!(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
goto out;
@@ -97,9 +99,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
th->check = newcheck;
if (skb->ip_summed != CHECKSUM_PARTIAL)
- th->check =
- csum_fold(csum_partial(skb_transport_header(skb),
- thlen, skb->csum));
+ th->check = gso_make_checksum(skb, ~th->check);
seq += mss;
if (copy_destructor) {
@@ -133,8 +133,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
(__force u32)delta));
if (skb->ip_summed != CHECKSUM_PARTIAL)
- th->check = csum_fold(csum_partial(skb_transport_header(skb),
- thlen, skb->csum));
+ th->check = gso_make_checksum(skb, ~th->check);
out:
return segs;
}
@@ -310,7 +309,7 @@ static int tcp4_gro_complete(struct sk_buff *skb, int thoff)
th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
iph->daddr, 0);
- skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
return tcp_gro_complete(skb);
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3d61c52bdf7..179b51e6bda 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1074,7 +1074,7 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de
* Remember, these are still headerless SKBs at this point.
*/
int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
- unsigned int mss_now)
+ unsigned int mss_now, gfp_t gfp)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff;
@@ -1089,11 +1089,11 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
if (nsize < 0)
nsize = 0;
- if (skb_unclone(skb, GFP_ATOMIC))
+ if (skb_unclone(skb, gfp))
return -ENOMEM;
/* Get a new skb... force flag on. */
- buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
+ buff = sk_stream_alloc_skb(sk, nsize, gfp);
if (buff == NULL)
return -ENOMEM; /* We'll just try again later. */
@@ -1402,11 +1402,19 @@ static void tcp_cwnd_application_limited(struct sock *sk)
tp->snd_cwnd_stamp = tcp_time_stamp;
}
-static void tcp_cwnd_validate(struct sock *sk, u32 unsent_segs)
+static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
{
struct tcp_sock *tp = tcp_sk(sk);
- tp->lsnd_pending = tp->packets_out + unsent_segs;
+ /* Track the maximum number of outstanding packets in each
+ * window, and remember whether we were cwnd-limited then.
+ */
+ if (!before(tp->snd_una, tp->max_packets_seq) ||
+ tp->packets_out > tp->max_packets_out) {
+ tp->max_packets_out = tp->packets_out;
+ tp->max_packets_seq = tp->snd_nxt;
+ tp->is_cwnd_limited = is_cwnd_limited;
+ }
if (tcp_is_cwnd_limited(sk)) {
/* Network is feed fully. */
@@ -1617,7 +1625,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
/* All of a TSO frame must be composed of paged data. */
if (skb->len != skb->data_len)
- return tcp_fragment(sk, skb, len, mss_now);
+ return tcp_fragment(sk, skb, len, mss_now, gfp);
buff = sk_stream_alloc_skb(sk, 0, gfp);
if (unlikely(buff == NULL))
@@ -1660,7 +1668,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
*
* This algorithm is from John Heffner.
*/
-static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
+static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
+ bool *is_cwnd_limited)
{
struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1724,6 +1733,9 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
if (!tp->tso_deferred)
tp->tso_deferred = 1 | (jiffies << 1);
+ if (cong_win < send_win && cong_win < skb->len)
+ *is_cwnd_limited = true;
+
return true;
send_now:
@@ -1881,9 +1893,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- unsigned int tso_segs, sent_pkts, unsent_segs = 0;
+ unsigned int tso_segs, sent_pkts;
int cwnd_quota;
int result;
+ bool is_cwnd_limited = false;
sent_pkts = 0;
@@ -1908,6 +1921,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
cwnd_quota = tcp_cwnd_test(tp, skb);
if (!cwnd_quota) {
+ is_cwnd_limited = true;
if (push_one == 2)
/* Force out a loss probe pkt. */
cwnd_quota = 1;
@@ -1924,8 +1938,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
nonagle : TCP_NAGLE_PUSH))))
break;
} else {
- if (!push_one && tcp_tso_should_defer(sk, skb))
- goto compute_unsent_segs;
+ if (!push_one &&
+ tcp_tso_should_defer(sk, skb, &is_cwnd_limited))
+ break;
}
/* TCP Small Queues :
@@ -1946,18 +1961,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
/* It is possible TX completion already happened
* before we set TSQ_THROTTLED, so we must
* test again the condition.
- * We abuse smp_mb__after_clear_bit() because
- * there is no smp_mb__after_set_bit() yet
*/
- smp_mb__after_clear_bit();
- if (atomic_read(&sk->sk_wmem_alloc) > limit) {
- u32 unsent_bytes;
-
-compute_unsent_segs:
- unsent_bytes = tp->write_seq - tp->snd_nxt;
- unsent_segs = DIV_ROUND_UP(unsent_bytes, mss_now);
+ smp_mb__after_atomic();
+ if (atomic_read(&sk->sk_wmem_alloc) > limit)
break;
- }
}
limit = mss_now;
@@ -1997,7 +2004,7 @@ repair:
/* Send one loss probe per tail loss episode. */
if (push_one != 2)
tcp_schedule_loss_probe(sk);
- tcp_cwnd_validate(sk, unsent_segs);
+ tcp_cwnd_validate(sk, is_cwnd_limited);
return false;
}
return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
@@ -2113,7 +2120,8 @@ void tcp_send_loss_probe(struct sock *sk)
goto rearm_timer;
if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
- if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss)))
+ if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss,
+ GFP_ATOMIC)))
goto rearm_timer;
skb = tcp_write_queue_tail(sk);
}
@@ -2121,9 +2129,7 @@ void tcp_send_loss_probe(struct sock *sk)
if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
goto rearm_timer;
- /* Probe with zero data doesn't trigger fast recovery. */
- if (skb->len > 0)
- err = __tcp_retransmit_skb(sk, skb);
+ err = __tcp_retransmit_skb(sk, skb);
/* Record snd_nxt for loss detection. */
if (likely(!err))
@@ -2454,7 +2460,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
return -EAGAIN;
if (skb->len > cur_mss) {
- if (tcp_fragment(sk, skb, cur_mss, cur_mss))
+ if (tcp_fragment(sk, skb, cur_mss, cur_mss, GFP_ATOMIC))
return -ENOMEM; /* We'll try again later. */
} else {
int oldpcount = tcp_skb_pcount(skb);
@@ -2519,8 +2525,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
if (!tp->retrans_stamp)
tp->retrans_stamp = TCP_SKB_CB(skb)->when;
- tp->undo_retrans += tcp_skb_pcount(skb);
-
/* snd_nxt is stored to detect loss of retransmitted segment,
* see tcp_input.c tcp_sacktag_write_queue().
*/
@@ -2528,6 +2532,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
} else if (err != -EBUSY) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
}
+
+ if (tp->undo_retrans < 0)
+ tp->undo_retrans = 0;
+ tp->undo_retrans += tcp_skb_pcount(skb);
return err;
}
@@ -3235,7 +3243,7 @@ int tcp_write_wakeup(struct sock *sk)
skb->len > mss) {
seg_size = min(seg_size, mss);
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
- if (tcp_fragment(sk, skb, seg_size, mss))
+ if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC))
return -1;
} else if (!tcp_skb_pcount(skb))
tcp_set_skb_tso_segs(sk, skb, mss);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 590532a7bd2..7d5a8661df7 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -727,13 +727,12 @@ EXPORT_SYMBOL(udp_flush_pending_frames);
void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
{
struct udphdr *uh = udp_hdr(skb);
- struct sk_buff *frags = skb_shinfo(skb)->frag_list;
int offset = skb_transport_offset(skb);
int len = skb->len - offset;
int hlen = len;
__wsum csum = 0;
- if (!frags) {
+ if (!skb_has_frag_list(skb)) {
/*
* Only one fragment on the socket.
*/
@@ -742,15 +741,17 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
uh->check = ~csum_tcpudp_magic(src, dst, len,
IPPROTO_UDP, 0);
} else {
+ struct sk_buff *frags;
+
/*
* HW-checksum won't work as there are two or more
* fragments on the socket so that all csums of sk_buffs
* should be together
*/
- do {
+ skb_walk_frags(skb, frags) {
csum = csum_add(csum, frags->csum);
hlen -= frags->len;
- } while ((frags = frags->next));
+ }
csum = skb_checksum(skb, offset, hlen, csum);
skb->ip_summed = CHECKSUM_NONE;
@@ -762,6 +763,43 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
}
EXPORT_SYMBOL_GPL(udp4_hwcsum);
+/* Function to set UDP checksum for an IPv4 UDP packet. This is intended
+ * for the simple case like when setting the checksum for a UDP tunnel.
+ */
+void udp_set_csum(bool nocheck, struct sk_buff *skb,
+ __be32 saddr, __be32 daddr, int len)
+{
+ struct udphdr *uh = udp_hdr(skb);
+
+ if (nocheck)
+ uh->check = 0;
+ else if (skb_is_gso(skb))
+ uh->check = ~udp_v4_check(len, saddr, daddr, 0);
+ else if (skb_dst(skb) && skb_dst(skb)->dev &&
+ (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
+
+ BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ uh->check = ~udp_v4_check(len, saddr, daddr, 0);
+ } else {
+ __wsum csum;
+
+ BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+ uh->check = 0;
+ csum = skb_checksum(skb, 0, len, 0);
+ uh->check = udp_v4_check(len, saddr, daddr, csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+}
+EXPORT_SYMBOL(udp_set_csum);
+
static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
{
struct sock *sk = skb->sk;
@@ -785,7 +823,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
if (is_udplite) /* UDP-Lite */
csum = udplite_csum(skb);
- else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
+ else if (sk->sk_no_check_tx) { /* UDP csum disabled */
skb->ip_summed = CHECKSUM_NONE;
goto send;
@@ -1550,8 +1588,11 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto csum_error;
- if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf))
+ if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
+ UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
+ is_udplite);
goto drop;
+ }
rc = 0;
@@ -1823,6 +1864,10 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
unsigned int count, slot = udp_hashfn(net, hnum, udp_table.mask);
struct udp_hslot *hslot = &udp_table.hash[slot];
+ /* Do not bother scanning a too big list */
+ if (hslot->count > 10)
+ return NULL;
+
rcu_read_lock();
begin:
count = 0;
@@ -1968,7 +2013,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
int (*push_pending_frames)(struct sock *))
{
struct udp_sock *up = udp_sk(sk);
- int val;
+ int val, valbool;
int err = 0;
int is_udplite = IS_UDPLITE(sk);
@@ -1978,6 +2023,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
if (get_user(val, (int __user *)optval))
return -EFAULT;
+ valbool = val ? 1 : 0;
+
switch (optname) {
case UDP_CORK:
if (val != 0) {
@@ -2007,6 +2054,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
}
break;
+ case UDP_NO_CHECK6_TX:
+ up->no_check6_tx = valbool;
+ break;
+
+ case UDP_NO_CHECK6_RX:
+ up->no_check6_rx = valbool;
+ break;
+
/*
* UDP-Lite's partial checksum coverage (RFC 3828).
*/
@@ -2089,6 +2144,14 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
val = up->encap_type;
break;
+ case UDP_NO_CHECK6_TX:
+ val = up->no_check6_tx;
+ break;
+
+ case UDP_NO_CHECK6_RX:
+ val = up->no_check6_rx;
+ break;
+
/* The following two cannot be changed on UDP sockets, the return is
* always 0 (which corresponds to the full checksum coverage of UDP). */
case UDPLITE_SEND_CSCOV:
@@ -2473,7 +2536,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
__be16 protocol = skb->protocol;
netdev_features_t enc_features;
- int outer_hlen;
+ int udp_offset, outer_hlen;
+ unsigned int oldlen;
+ bool need_csum;
+
+ oldlen = (u16)~skb->len;
if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
goto out;
@@ -2485,6 +2552,10 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
skb->mac_len = skb_inner_network_offset(skb);
skb->protocol = htons(ETH_P_TEB);
+ need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
+ if (need_csum)
+ skb->encap_hdr_csum = 1;
+
/* segment inner packet. */
enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
segs = skb_mac_gso_segment(skb, enc_features);
@@ -2495,10 +2566,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
}
outer_hlen = skb_tnl_header_len(skb);
+ udp_offset = outer_hlen - tnl_hlen;
skb = segs;
do {
struct udphdr *uh;
- int udp_offset = outer_hlen - tnl_hlen;
+ int len;
skb_reset_inner_headers(skb);
skb->encapsulation = 1;
@@ -2509,31 +2581,20 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
skb_reset_mac_header(skb);
skb_set_network_header(skb, mac_len);
skb_set_transport_header(skb, udp_offset);
+ len = skb->len - udp_offset;
uh = udp_hdr(skb);
- uh->len = htons(skb->len - udp_offset);
-
- /* csum segment if tunnel sets skb with csum. */
- if (protocol == htons(ETH_P_IP) && unlikely(uh->check)) {
- struct iphdr *iph = ip_hdr(skb);
+ uh->len = htons(len);
- uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
- skb->len - udp_offset,
- IPPROTO_UDP, 0);
- uh->check = csum_fold(skb_checksum(skb, udp_offset,
- skb->len - udp_offset, 0));
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
+ if (need_csum) {
+ __be32 delta = htonl(oldlen + len);
- } else if (protocol == htons(ETH_P_IPV6)) {
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- u32 len = skb->len - udp_offset;
+ uh->check = ~csum_fold((__force __wsum)
+ ((__force u32)uh->check +
+ (__force u32)delta));
+ uh->check = gso_make_checksum(skb, ~uh->check);
- uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
- len, IPPROTO_UDP, 0);
- uh->check = csum_fold(skb_checksum(skb, udp_offset, len, 0));
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
- skb->ip_summed = CHECKSUM_NONE;
}
skb->protocol = protocol;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 88b4023ecfc..546d2d439dd 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -56,7 +56,8 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
__wsum csum;
if (skb->encapsulation &&
- skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) {
+ (skb_shinfo(skb)->gso_type &
+ (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
segs = skb_udp_tunnel_segment(skb, features);
goto out;
}
@@ -71,8 +72,10 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM |
SKB_GSO_IPIP |
- SKB_GSO_GRE | SKB_GSO_MPLS) ||
+ SKB_GSO_GRE | SKB_GSO_GRE_CSUM |
+ SKB_GSO_MPLS) ||
!(type & (SKB_GSO_UDP))))
goto out;
@@ -197,6 +200,7 @@ unflush:
}
skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
+ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
pp = uo_priv->offload->callbacks.gro_receive(head, skb);
out_unlock:
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 2c46acd4cc3..3b3efbda48e 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -70,7 +70,6 @@ static struct inet_protosw udplite4_protosw = {
.protocol = IPPROTO_UDPLITE,
.prot = &udplite_prot,
.ops = &inet_dgram_ops,
- .no_check = 0, /* must checksum (RFC 3828) */
.flags = INET_PROTOSW_PERMANENT,
};
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 05f2b484954..91771a7c802 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -58,12 +58,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
- ip_select_ident(skb, dst->child, NULL);
top_iph->ttl = ip4_dst_hoplimit(dst->child);
top_iph->saddr = x->props.saddr.a4;
top_iph->daddr = x->id.daddr.a4;
+ ip_select_ident(skb, NULL);
return 0;
}
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 8e8c018d9d2..d5f6bd9a210 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -62,10 +62,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
if (err)
return err;
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED;
-
- skb->protocol = htons(ETH_P_IP);
+ IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
return x->outer_mode->output2(x, skb);
}
@@ -73,27 +70,34 @@ EXPORT_SYMBOL(xfrm4_prepare_output);
int xfrm4_output_finish(struct sk_buff *skb)
{
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ skb->protocol = htons(ETH_P_IP);
+
+#ifdef CONFIG_NETFILTER
+ IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
+#endif
+
+ return xfrm_output(skb);
+}
+
+static int __xfrm4_output(struct sk_buff *skb)
+{
+ struct xfrm_state *x = skb_dst(skb)->xfrm;
+
#ifdef CONFIG_NETFILTER
- if (!skb_dst(skb)->xfrm) {
+ if (!x) {
IPCB(skb)->flags |= IPSKB_REROUTED;
return dst_output(skb);
}
-
- IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
#endif
- skb->protocol = htons(ETH_P_IP);
- return xfrm_output(skb);
+ return x->outer_mode->afinfo->output_finish(skb);
}
int xfrm4_output(struct sock *sk, struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
- struct xfrm_state *x = dst->xfrm;
-
return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb,
- NULL, dst->dev,
- x->outer_mode->afinfo->output_finish,
+ NULL, skb_dst(skb)->dev, __xfrm4_output,
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
index 7f7b243e813..a2ce0101eaa 100644
--- a/net/ipv4/xfrm4_protocol.c
+++ b/net/ipv4/xfrm4_protocol.c
@@ -50,8 +50,12 @@ int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
{
int ret;
struct xfrm4_protocol *handler;
+ struct xfrm4_protocol __rcu **head = proto_handlers(protocol);
- for_each_protocol_rcu(*proto_handlers(protocol), handler)
+ if (!head)
+ return 0;
+
+ for_each_protocol_rcu(*head, handler)
if ((ret = handler->cb_handler(skb, err)) <= 0)
return ret;
@@ -64,15 +68,20 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
{
int ret;
struct xfrm4_protocol *handler;
+ struct xfrm4_protocol __rcu **head = proto_handlers(nexthdr);
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
XFRM_SPI_SKB_CB(skb)->family = AF_INET;
XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
- for_each_protocol_rcu(*proto_handlers(nexthdr), handler)
+ if (!head)
+ goto out;
+
+ for_each_protocol_rcu(*head, handler)
if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL)
return ret;
+out:
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
kfree_skb(skb);
@@ -208,6 +217,9 @@ int xfrm4_protocol_register(struct xfrm4_protocol *handler,
int ret = -EEXIST;
int priority = handler->priority;
+ if (!proto_handlers(protocol) || !netproto(protocol))
+ return -EINVAL;
+
mutex_lock(&xfrm4_protocol_mutex);
if (!rcu_dereference_protected(*proto_handlers(protocol),
@@ -250,6 +262,9 @@ int xfrm4_protocol_deregister(struct xfrm4_protocol *handler,
struct xfrm4_protocol *t;
int ret = -ENOENT;
+ if (!proto_handlers(protocol) || !netproto(protocol))
+ return -EINVAL;
+
mutex_lock(&xfrm4_protocol_mutex);
for (pprev = proto_handlers(protocol);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index dc47cc757b8..7cb4392690d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -106,7 +106,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
struct inet_protosw *answer;
struct proto *answer_prot;
unsigned char answer_flags;
- char answer_no_check;
int try_loading_module = 0;
int err;
@@ -162,7 +161,6 @@ lookup_protocol:
sock->ops = answer->ops;
answer_prot = answer->prot;
- answer_no_check = answer->no_check;
answer_flags = answer->flags;
rcu_read_unlock();
@@ -176,7 +174,6 @@ lookup_protocol:
sock_init_data(sock, sk);
err = 0;
- sk->sk_no_check = answer_no_check;
if (INET_PROTOSW_REUSE & answer_flags)
sk->sk_reuse = SK_CAN_REUSE;
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index da26224a599..9a4d7322fb2 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -84,3 +84,41 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
ip6_compute_pseudo);
}
EXPORT_SYMBOL(udp6_csum_init);
+
+/* Function to set UDP checksum for an IPv6 UDP packet. This is intended
+ * for the simple case like when setting the checksum for a UDP tunnel.
+ */
+void udp6_set_csum(bool nocheck, struct sk_buff *skb,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr, int len)
+{
+ struct udphdr *uh = udp_hdr(skb);
+
+ if (nocheck)
+ uh->check = 0;
+ else if (skb_is_gso(skb))
+ uh->check = ~udp_v6_check(len, saddr, daddr, 0);
+ else if (skb_dst(skb) && skb_dst(skb)->dev &&
+ (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
+
+ BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ uh->check = ~udp_v6_check(len, saddr, daddr, 0);
+ } else {
+ __wsum csum;
+
+ BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+ uh->check = 0;
+ csum = skb_checksum(skb, 0, len, 0);
+ uh->check = udp_v6_check(len, saddr, daddr, csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+}
+EXPORT_SYMBOL(udp6_set_csum);
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 59f95affceb..65eda2a8af4 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -97,9 +97,11 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
+ SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP |
SKB_GSO_SIT |
SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM |
SKB_GSO_MPLS |
SKB_GSO_TCPV6 |
0)))
@@ -196,7 +198,6 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
unsigned int off;
u16 flush = 1;
int proto;
- __wsum csum;
off = skb_gro_offset(skb);
hlen = off + sizeof(*iph);
@@ -264,13 +265,10 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
NAPI_GRO_CB(skb)->flush |= flush;
- csum = skb->csum;
- skb_postpull_rcsum(skb, iph, skb_network_header_len(skb));
+ skb_gro_postpull_rcsum(skb, iph, nlen);
pp = ops->callbacks.gro_receive(head, skb);
- skb->csum = csum;
-
out_unlock:
rcu_read_unlock();
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index ab0cc57f779..45702b8cd14 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -537,6 +537,20 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from);
}
+static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
+{
+ static u32 ip6_idents_hashrnd __read_mostly;
+ u32 hash, id;
+
+ net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
+
+ hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd);
+ hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash);
+
+ id = ip_idents_reserve(hash, 1);
+ fhdr->identification = htonl(id);
+}
+
int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
{
struct sk_buff *frag;
@@ -1229,7 +1243,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
unsigned int maxnonfragsize, headersize;
headersize = sizeof(struct ipv6hdr) +
- (opt ? opt->tot_len : 0) +
+ (opt ? opt->opt_flen + opt->opt_nflen : 0) +
(dst_allfrag(&rt->dst) ?
sizeof(struct frag_hdr) : 0) +
rt->rt6i_nfheader_len;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index b05b609f69d..afa08245836 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -61,6 +61,7 @@
MODULE_AUTHOR("Ville Nuorvala");
MODULE_DESCRIPTION("IPv6 tunneling device");
MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("ip6tnl");
MODULE_ALIAS_NETDEV("ip6tnl0");
#ifdef IP6_TNL_DEBUG
@@ -1557,7 +1558,7 @@ static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[])
{
u8 proto;
- if (!data)
+ if (!data || !data[IFLA_IPTUN_PROTO])
return 0;
proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 2953c0c26c2..9aaa6bb229e 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -511,6 +511,7 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
__be32 spi;
+ __u32 mark;
struct xfrm_state *x;
struct ip6_tnl *t;
struct ip_esp_hdr *esph;
@@ -524,6 +525,8 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!t)
return -1;
+ mark = be32_to_cpu(t->parms.o_key);
+
switch (protocol) {
case IPPROTO_ESP:
esph = (struct ip_esp_hdr *)(skb->data + offset);
@@ -545,7 +548,7 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
type != NDISC_REDIRECT)
return 0;
- x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+ x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr,
spi, protocol, AF_INET6);
if (!x)
return 0;
@@ -1094,7 +1097,6 @@ static int __init vti6_tunnel_init(void)
err = xfrm6_protocol_register(&vti_esp6_protocol, IPPROTO_ESP);
if (err < 0) {
- unregister_pernet_device(&vti6_net_ops);
pr_err("%s: can't register vti6 protocol\n", __func__);
goto out;
@@ -1103,7 +1105,6 @@ static int __init vti6_tunnel_init(void)
err = xfrm6_protocol_register(&vti_ah6_protocol, IPPROTO_AH);
if (err < 0) {
xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
- unregister_pernet_device(&vti6_net_ops);
pr_err("%s: can't register vti6 protocol\n", __func__);
goto out;
@@ -1113,7 +1114,6 @@ static int __init vti6_tunnel_init(void)
if (err < 0) {
xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
- unregister_pernet_device(&vti6_net_ops);
pr_err("%s: can't register vti6 protocol\n", __func__);
goto out;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 08b367c6b9c..617f0958e16 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1301,8 +1301,17 @@ int igmp6_event_query(struct sk_buff *skb)
len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr);
len -= skb_network_header_len(skb);
- /* Drop queries with not link local source */
- if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
+ /* RFC3810 6.2
+ * Upon reception of an MLD message that contains a Query, the node
+ * checks if the source address of the message is a valid link-local
+ * address, if the Hop Limit is set to 1, and if the Router Alert
+ * option is present in the Hop-By-Hop Options header of the IPv6
+ * packet. If any of these checks fails, the packet is dropped.
+ */
+ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL) ||
+ ipv6_hdr(skb)->hop_limit != 1 ||
+ !(IP6CB(skb)->flags & IP6SKB_ROUTERALERT) ||
+ IP6CB(skb)->ra != htons(IPV6_OPT_ROUTERALERT_MLD))
return -EINVAL;
idev = __in6_dev_get(skb->dev);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 09a22f4f36c..ca8d4ea48a5 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -851,7 +851,7 @@ out:
static void ndisc_recv_na(struct sk_buff *skb)
{
struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
- const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
+ struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
u8 *lladdr = NULL;
u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
@@ -944,10 +944,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
/*
* Change: router to host
*/
- struct rt6_info *rt;
- rt = rt6_get_dflt_router(saddr, dev);
- if (rt)
- ip6_del_rt(rt);
+ rt6_clean_tohost(dev_net(dev), saddr);
}
out:
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 84c7f33d0cf..387d8b8fc18 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -90,17 +90,9 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
if (nf_ct_is_untracked(ct))
return NF_ACCEPT;
- nat = nfct_nat(ct);
- if (!nat) {
- /* NAT module was loaded late. */
- if (nf_ct_is_confirmed(ct))
- return NF_ACCEPT;
- nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
- if (nat == NULL) {
- pr_debug("failed to add NAT extension\n");
- return NF_ACCEPT;
- }
- }
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
switch (ctinfo) {
case IP_CT_RELATED:
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 9c3297a768f..d189fcb437f 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -47,15 +47,9 @@ static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
if (ct == NULL || nf_ct_is_untracked(ct))
return NF_ACCEPT;
- nat = nfct_nat(ct);
- if (nat == NULL) {
- /* Conntrack module was loaded late, can't add extension. */
- if (nf_ct_is_confirmed(ct))
- return NF_ACCEPT;
- nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
- if (nat == NULL)
- return NF_ACCEPT;
- }
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
switch (ctinfo) {
case IP_CT_RELATED:
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 6313abd53c9..5ec867e4a8b 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -8,37 +8,6 @@
#include <net/addrconf.h>
#include <net/secure_seq.h>
-void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
-{
- static atomic_t ipv6_fragmentation_id;
- struct in6_addr addr;
- int old, new;
-
-#if IS_ENABLED(CONFIG_IPV6)
- struct inet_peer *peer;
- struct net *net;
-
- net = dev_net(rt->dst.dev);
- peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
- if (peer) {
- fhdr->identification = htonl(inet_getid(peer, 0));
- inet_putpeer(peer);
- return;
- }
-#endif
- do {
- old = atomic_read(&ipv6_fragmentation_id);
- new = old + 1;
- if (!new)
- new = 1;
- } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
-
- addr = rt->rt6i_dst.addr;
- addr.s6_addr32[0] ^= (__force __be32)new;
- fhdr->identification = htonl(secure_ipv6_id(addr.s6_addr32));
-}
-EXPORT_SYMBOL(ipv6_select_ident);
-
int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
{
u16 offset = sizeof(struct ipv6hdr);
@@ -109,6 +78,7 @@ int __ip6_local_out(struct sk_buff *skb)
if (len > IPV6_MAXPLEN)
len = 0;
ipv6_hdr(skb)->payload_len = htons(len);
+ IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
skb_dst(skb)->dev, dst_output);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index a2a1d80dfe0..5b7a1ed2aba 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -51,7 +51,6 @@ static struct inet_protosw pingv6_protosw = {
.protocol = IPPROTO_ICMPV6,
.prot = &pingv6_prot,
.ops = &inet6_dgram_ops,
- .no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_REUSE,
};
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index dddfb5fa2b7..b2dc60b0c76 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1322,7 +1322,6 @@ static struct inet_protosw rawv6_protosw = {
.protocol = IPPROTO_IP, /* wild card */
.prot = &rawv6_prot,
.ops = &inet6_sockraw_ops,
- .no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_REUSE,
};
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f0a8ff9ed89..f23fbd28a50 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1455,7 +1455,7 @@ static int ip6_dst_gc(struct dst_ops *ops)
goto out;
net->ipv6.ip6_rt_gc_expire++;
- fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
+ fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
entries = dst_entries_get_slow(ops);
if (entries < ops->gc_thresh)
net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
@@ -2234,6 +2234,27 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
fib6_clean_all(net, fib6_remove_prefsrc, &adni);
}
+#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
+#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
+
+/* Remove routers and update dst entries when gateway turn into host. */
+static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
+{
+ struct in6_addr *gateway = (struct in6_addr *)arg;
+
+ if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
+ ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
+ ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
+ return -1;
+ }
+ return 0;
+}
+
+void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
+{
+ fib6_clean_all(net, fib6_clean_tohost, gateway);
+}
+
struct arg_dev_net {
struct net_device *dev;
struct net *net;
@@ -2709,6 +2730,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
if (tb[RTA_OIF])
oif = nla_get_u32(tb[RTA_OIF]);
+ if (tb[RTA_MARK])
+ fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
+
if (iif) {
struct net_device *dev;
int flags = 0;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index e5a453ca302..4f408176dc6 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -560,12 +560,12 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->dev->ifindex, 0, IPPROTO_IPV6, 0);
+ t->parms.link, 0, IPPROTO_IPV6, 0);
err = 0;
goto out;
}
if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
+ ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
IPPROTO_IPV6, 0);
err = 0;
goto out;
@@ -1828,4 +1828,5 @@ xfrm_tunnel_failed:
module_init(sit_init);
module_exit(sit_cleanup);
MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("sit");
MODULE_ALIAS_NETDEV("sit0");
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f07b2abba35..229239ad96b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1992,7 +1992,6 @@ static struct inet_protosw tcpv6_protosw = {
.protocol = IPPROTO_TCP,
.prot = &tcpv6_prot,
.ops = &inet6_stream_ops,
- .no_check = 0,
.flags = INET_PROTOSW_PERMANENT |
INET_PROTOSW_ICSK,
};
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 0d78132ff18..01b0ff9a0c2 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -42,7 +42,7 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
if (NAPI_GRO_CB(skb)->flush)
goto skip_csum;
- wsum = skb->csum;
+ wsum = NAPI_GRO_CB(skb)->csum;
switch (skb->ip_summed) {
case CHECKSUM_NONE:
@@ -73,7 +73,7 @@ static int tcp6_gro_complete(struct sk_buff *skb, int thoff)
th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr,
&iph->daddr, 0);
- skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
return tcp_gro_complete(skb);
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 7edf096867c..7092ff78fd8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -674,8 +674,11 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto csum_error;
}
- if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf))
+ if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
+ UDP6_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_RCVBUFERRORS, is_udplite);
goto drop;
+ }
skb_dst_drop(skb);
@@ -690,6 +693,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
bh_unlock_sock(sk);
return rc;
+
csum_error:
UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
drop:
@@ -705,17 +709,16 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
int dif)
{
struct hlist_nulls_node *node;
- struct sock *s = sk;
unsigned short num = ntohs(loc_port);
- sk_nulls_for_each_from(s, node) {
- struct inet_sock *inet = inet_sk(s);
+ sk_nulls_for_each_from(sk, node) {
+ struct inet_sock *inet = inet_sk(sk);
- if (!net_eq(sock_net(s), net))
+ if (!net_eq(sock_net(sk), net))
continue;
- if (udp_sk(s)->udp_port_hash == num &&
- s->sk_family == PF_INET6) {
+ if (udp_sk(sk)->udp_port_hash == num &&
+ sk->sk_family == PF_INET6) {
if (inet->inet_dport) {
if (inet->inet_dport != rmt_port)
continue;
@@ -724,16 +727,16 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
!ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
continue;
- if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)
+ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
continue;
if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
continue;
}
- if (!inet6_mc_check(s, loc_addr, rmt_addr))
+ if (!inet6_mc_check(sk, loc_addr, rmt_addr))
continue;
- return s;
+ return sk;
}
}
return NULL;
@@ -794,10 +797,10 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
dif = inet6_iif(skb);
sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
while (sk) {
- /* If zero checksum and sk_no_check is not on for
+ /* If zero checksum and no_check is not on for
* the socket then skip it.
*/
- if (uh->check || sk->sk_no_check)
+ if (uh->check || udp_sk(sk)->no_check6_rx)
stack[count++] = sk;
sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
@@ -887,7 +890,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (sk != NULL) {
int ret;
- if (!uh->check && !sk->sk_no_check) {
+ if (!uh->check && !udp_sk(sk)->no_check6_rx) {
sock_put(sk);
udp6_csum_zero_error(skb);
goto csum_error;
@@ -1037,7 +1040,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
if (is_udplite)
csum = udplite_csum_outgoing(sk, skb);
- else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
+ else if (up->no_check6_tx) { /* UDP csum disabled */
skb->ip_summed = CHECKSUM_NONE;
goto send;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
@@ -1507,7 +1510,6 @@ static struct inet_protosw udpv6_protosw = {
.protocol = IPPROTO_UDP,
.prot = &udpv6_prot,
.ops = &inet6_dgram_ops,
- .no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_PERMANENT,
};
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index b261ee8b83f..0ae3d98f83e 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -63,7 +63,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
if (unlikely(type & ~(SKB_GSO_UDP |
SKB_GSO_DODGY |
SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM |
SKB_GSO_GRE |
+ SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP |
SKB_GSO_SIT |
SKB_GSO_MPLS) ||
@@ -76,7 +78,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
goto out;
}
- if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL)
+ if (skb->encapsulation && skb_shinfo(skb)->gso_type &
+ (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
segs = skb_udp_tunnel_segment(skb, features);
else {
/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index dfcc4be4689..9cf097e206e 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -64,7 +64,6 @@ static struct inet_protosw udplite6_protosw = {
.protocol = IPPROTO_UDPLITE,
.prot = &udplitev6_prot,
.ops = &inet6_dgram_ops,
- .no_check = 0,
.flags = INET_PROTOSW_PERMANENT,
};
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index f47c8b153dd..433672d07d0 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -114,12 +114,6 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
if (err)
return err;
- memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
-#ifdef CONFIG_NETFILTER
- IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
-#endif
-
- skb->protocol = htons(ETH_P_IPV6);
skb->ignore_df = 1;
return x->outer_mode->output2(x, skb);
@@ -128,11 +122,13 @@ EXPORT_SYMBOL(xfrm6_prepare_output);
int xfrm6_output_finish(struct sk_buff *skb)
{
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ skb->protocol = htons(ETH_P_IPV6);
+
#ifdef CONFIG_NETFILTER
IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
#endif
- skb->protocol = htons(ETH_P_IPV6);
return xfrm_output(skb);
}
@@ -142,6 +138,13 @@ static int __xfrm6_output(struct sk_buff *skb)
struct xfrm_state *x = dst->xfrm;
int mtu;
+#ifdef CONFIG_NETFILTER
+ if (!x) {
+ IP6CB(skb)->flags |= IP6SKB_REROUTED;
+ return dst_output(skb);
+ }
+#endif
+
if (skb->protocol == htons(ETH_P_IPV6))
mtu = ip6_skb_dst_mtu(skb);
else
@@ -165,6 +168,7 @@ static int __xfrm6_output(struct sk_buff *skb)
int xfrm6_output(struct sock *sk, struct sk_buff *skb)
{
- return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL,
- skb_dst(skb)->dev, __xfrm6_output);
+ return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb,
+ NULL, skb_dst(skb)->dev, __xfrm6_output,
+ !(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c
index 6ab989c486f..54d13f8dbba 100644
--- a/net/ipv6/xfrm6_protocol.c
+++ b/net/ipv6/xfrm6_protocol.c
@@ -50,6 +50,10 @@ int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
{
int ret;
struct xfrm6_protocol *handler;
+ struct xfrm6_protocol __rcu **head = proto_handlers(protocol);
+
+ if (!head)
+ return 0;
for_each_protocol_rcu(*proto_handlers(protocol), handler)
if ((ret = handler->cb_handler(skb, err)) <= 0)
@@ -184,10 +188,12 @@ int xfrm6_protocol_register(struct xfrm6_protocol *handler,
struct xfrm6_protocol __rcu **pprev;
struct xfrm6_protocol *t;
bool add_netproto = false;
-
int ret = -EEXIST;
int priority = handler->priority;
+ if (!proto_handlers(protocol) || !netproto(protocol))
+ return -EINVAL;
+
mutex_lock(&xfrm6_protocol_mutex);
if (!rcu_dereference_protected(*proto_handlers(protocol),
@@ -230,6 +236,9 @@ int xfrm6_protocol_deregister(struct xfrm6_protocol *handler,
struct xfrm6_protocol *t;
int ret = -ENOENT;
+ if (!proto_handlers(protocol) || !netproto(protocol))
+ return -EINVAL;
+
mutex_lock(&xfrm6_protocol_mutex);
for (pprev = proto_handlers(protocol);
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 41e4e93cb3a..91729b807c7 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1353,7 +1353,7 @@ static int ipx_create(struct net *net, struct socket *sock, int protocol,
sk_refcnt_debug_inc(sk);
sock_init_data(sock, sk);
- sk->sk_no_check = 1; /* Checksum off by default */
+ sk->sk_no_check_tx = 1; /* Checksum off by default */
sock->ops = &ipx_dgram_ops;
rc = 0;
out:
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index c1f03185c5e..67e7ad3d46b 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -236,7 +236,8 @@ int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
}
/* Apply checksum. Not allowed on 802.3 links. */
- if (sk->sk_no_check || intrfc->if_dlink_type == htons(IPX_FRAME_8023))
+ if (sk->sk_no_check_tx ||
+ intrfc->if_dlink_type == htons(IPX_FRAME_8023))
ipx->ipx_checksum = htons(0xFFFF);
else
ipx->ipx_checksum = ipx_cksum(ipx, len + sizeof(struct ipxhdr));
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 01e77b0ae07..7a95fa4a3de 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -682,6 +682,18 @@ struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock)
return NULL;
}
+static void __iucv_auto_name(struct iucv_sock *iucv)
+{
+ char name[12];
+
+ sprintf(name, "%08x", atomic_inc_return(&iucv_sk_list.autobind_name));
+ while (__iucv_get_sock_by_name(name)) {
+ sprintf(name, "%08x",
+ atomic_inc_return(&iucv_sk_list.autobind_name));
+ }
+ memcpy(iucv->src_name, name, 8);
+}
+
/* Bind an unbound socket */
static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
int addr_len)
@@ -724,8 +736,12 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
rcu_read_lock();
for_each_netdev_rcu(&init_net, dev) {
if (!memcmp(dev->perm_addr, uid, 8)) {
- memcpy(iucv->src_name, sa->siucv_name, 8);
memcpy(iucv->src_user_id, sa->siucv_user_id, 8);
+ /* Check for unitialized siucv_name */
+ if (strncmp(sa->siucv_name, " ", 8) == 0)
+ __iucv_auto_name(iucv);
+ else
+ memcpy(iucv->src_name, sa->siucv_name, 8);
sk->sk_bound_dev_if = dev->ifindex;
iucv->hs_dev = dev;
dev_hold(dev);
@@ -763,7 +779,6 @@ done:
static int iucv_sock_autobind(struct sock *sk)
{
struct iucv_sock *iucv = iucv_sk(sk);
- char name[12];
int err = 0;
if (unlikely(!pr_iucv))
@@ -772,17 +787,9 @@ static int iucv_sock_autobind(struct sock *sk)
memcpy(iucv->src_user_id, iucv_userid, 8);
write_lock_bh(&iucv_sk_list.lock);
-
- sprintf(name, "%08x", atomic_inc_return(&iucv_sk_list.autobind_name));
- while (__iucv_get_sock_by_name(name)) {
- sprintf(name, "%08x",
- atomic_inc_return(&iucv_sk_list.autobind_name));
- }
-
+ __iucv_auto_name(iucv);
write_unlock_bh(&iucv_sk_list.lock);
- memcpy(&iucv->src_name, name, 8);
-
if (!iucv->msglimit)
iucv->msglimit = IUCV_QUEUELEN_DEFAULT;
@@ -1830,7 +1837,7 @@ static void iucv_callback_txdone(struct iucv_path *path,
spin_lock_irqsave(&list->lock, flags);
while (list_skb != (struct sk_buff *)list) {
- if (msg->tag != IUCV_SKB_CB(list_skb)->tag) {
+ if (msg->tag == IUCV_SKB_CB(list_skb)->tag) {
this = list_skb;
break;
}
@@ -1936,11 +1943,10 @@ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb)
sk_acceptq_is_full(sk) ||
!nsk) {
/* error on server socket - connection refused */
- if (nsk)
- sk_free(nsk);
afiucv_swap_src_dest(skb);
trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN;
err = dev_queue_xmit(skb);
+ iucv_sock_kill(nsk);
bh_unlock_sock(sk);
goto out;
}
diff --git a/net/key/af_key.c b/net/key/af_key.c
index f3c83073afc..ba2a2f95911 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1476,9 +1476,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg
else
err = xfrm_state_update(x);
- xfrm_audit_state_add(x, err ? 0 : 1,
- audit_get_loginuid(current),
- audit_get_sessionid(current), 0);
+ xfrm_audit_state_add(x, err ? 0 : 1, true);
if (err < 0) {
x->km.state = XFRM_STATE_DEAD;
@@ -1532,9 +1530,7 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_
c.event = XFRM_MSG_DELSA;
km_state_notify(x, &c);
out:
- xfrm_audit_state_delete(x, err ? 0 : 1,
- audit_get_loginuid(current),
- audit_get_sessionid(current), 0);
+ xfrm_audit_state_delete(x, err ? 0 : 1, true);
xfrm_state_put(x);
return err;
@@ -1726,17 +1722,13 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m
struct net *net = sock_net(sk);
unsigned int proto;
struct km_event c;
- struct xfrm_audit audit_info;
int err, err2;
proto = pfkey_satype2proto(hdr->sadb_msg_satype);
if (proto == 0)
return -EINVAL;
- audit_info.loginuid = audit_get_loginuid(current);
- audit_info.sessionid = audit_get_sessionid(current);
- audit_info.secid = 0;
- err = xfrm_state_flush(net, proto, &audit_info);
+ err = xfrm_state_flush(net, proto, true);
err2 = unicast_flush_resp(sk, hdr);
if (err || err2) {
if (err == -ESRCH) /* empty table - go quietly */
@@ -2288,9 +2280,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_
err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp,
hdr->sadb_msg_type != SADB_X_SPDUPDATE);
- xfrm_audit_policy_add(xp, err ? 0 : 1,
- audit_get_loginuid(current),
- audit_get_sessionid(current), 0);
+ xfrm_audit_policy_add(xp, err ? 0 : 1, true);
if (err)
goto out;
@@ -2372,9 +2362,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
if (xp == NULL)
return -ENOENT;
- xfrm_audit_policy_delete(xp, err ? 0 : 1,
- audit_get_loginuid(current),
- audit_get_sessionid(current), 0);
+ xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
if (err)
goto out;
@@ -2553,7 +2541,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
sel.sport_mask = htons(0xffff);
/* set destination address info of selector */
- sa = ext_hdrs[SADB_EXT_ADDRESS_DST - 1],
+ sa = ext_hdrs[SADB_EXT_ADDRESS_DST - 1];
pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr);
sel.prefixlen_d = sa->sadb_address_prefixlen;
sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
@@ -2622,9 +2610,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
return -ENOENT;
if (delete) {
- xfrm_audit_policy_delete(xp, err ? 0 : 1,
- audit_get_loginuid(current),
- audit_get_sessionid(current), 0);
+ xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
if (err)
goto out;
@@ -2733,13 +2719,9 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
{
struct net *net = sock_net(sk);
struct km_event c;
- struct xfrm_audit audit_info;
int err, err2;
- audit_info.loginuid = audit_get_loginuid(current);
- audit_info.sessionid = audit_get_sessionid(current);
- audit_info.secid = 0;
- err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
+ err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true);
err2 = unicast_flush_resp(sk, hdr);
if (err || err2) {
if (err == -ESRCH) /* empty table - old silent behavior */
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index ed0716a075b..bea25904320 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1095,31 +1095,6 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
return 0;
}
-#if IS_ENABLED(CONFIG_IPV6)
-static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb,
- int udp_len)
-{
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct udphdr *uh = udp_hdr(skb);
-
- if (!skb_dst(skb) || !skb_dst(skb)->dev ||
- !(skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
- __wsum csum = skb_checksum(skb, 0, udp_len, 0);
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- uh->check = csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr, udp_len,
- IPPROTO_UDP, csum);
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
- } else {
- skb->ip_summed = CHECKSUM_PARTIAL;
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct udphdr, check);
- uh->check = ~csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr,
- udp_len, IPPROTO_UDP, 0);
- }
-}
-#endif
-
/* If caller requires the skb to have a ppp header, the header must be
* inserted in the skb data before calling this function.
*/
@@ -1131,7 +1106,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
struct flowi *fl;
struct udphdr *uh;
struct inet_sock *inet;
- __wsum csum;
int headroom;
int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
int udp_len;
@@ -1180,33 +1154,17 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
uh->dest = inet->inet_dport;
udp_len = uhlen + hdr_len + data_len;
uh->len = htons(udp_len);
- uh->check = 0;
/* Calculate UDP checksum if configured to do so */
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
- l2tp_xmit_ipv6_csum(sk, skb, udp_len);
+ udp6_set_csum(udp_get_no_check6_tx(sk),
+ skb, &inet6_sk(sk)->saddr,
+ &sk->sk_v6_daddr, udp_len);
else
#endif
- if (sk->sk_no_check == UDP_CSUM_NOXMIT)
- skb->ip_summed = CHECKSUM_NONE;
- else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
- (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
- skb->ip_summed = CHECKSUM_COMPLETE;
- csum = skb_checksum(skb, 0, udp_len, 0);
- uh->check = csum_tcpudp_magic(inet->inet_saddr,
- inet->inet_daddr,
- udp_len, IPPROTO_UDP, csum);
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
- } else {
- skb->ip_summed = CHECKSUM_PARTIAL;
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct udphdr, check);
- uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
- inet->inet_daddr,
- udp_len, IPPROTO_UDP, 0);
- }
+ udp_set_csum(sk->sk_no_check_tx, skb, inet->inet_saddr,
+ inet->inet_daddr, udp_len);
break;
case L2TP_ENCAPTYPE_IP:
@@ -1435,6 +1393,11 @@ static int l2tp_tunnel_sock_create(struct net *net,
sizeof(udp6_addr), 0);
if (err < 0)
goto out;
+
+ if (cfg->udp6_zero_tx_checksums)
+ udp_set_no_check6_tx(sock->sk, true);
+ if (cfg->udp6_zero_rx_checksums)
+ udp_set_no_check6_rx(sock->sk, true);
} else
#endif
{
@@ -1463,7 +1426,7 @@ static int l2tp_tunnel_sock_create(struct net *net,
}
if (!cfg->use_udp_checksums)
- sock->sk->sk_no_check = UDP_CSUM_NOXMIT;
+ sock->sk->sk_no_check_tx = 1;
break;
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 3f93ccd6ba9..68aa9ffd4ae 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -162,7 +162,9 @@ struct l2tp_tunnel_cfg {
#endif
u16 local_udp_port;
u16 peer_udp_port;
- unsigned int use_udp_checksums:1;
+ unsigned int use_udp_checksums:1,
+ udp6_zero_tx_checksums:1,
+ udp6_zero_rx_checksums:1;
};
struct l2tp_tunnel {
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 3397fe6897c..369a9822488 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -606,7 +606,6 @@ static struct inet_protosw l2tp_ip_protosw = {
.protocol = IPPROTO_L2TP,
.prot = &l2tp_ip_prot,
.ops = &l2tp_ip_ops,
- .no_check = 0,
};
static struct net_protocol l2tp_ip_protocol __read_mostly = {
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index e472d44a3b9..f3f98a156ce 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -755,7 +755,6 @@ static struct inet_protosw l2tp_ip6_protosw = {
.protocol = IPPROTO_L2TP,
.prot = &l2tp_ip6_prot,
.ops = &l2tp_ip6_ops,
- .no_check = 0,
};
static struct inet6_protocol l2tp_ip6_protocol __read_mostly = {
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index bd7387adea9..0ac907adb2f 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -161,6 +161,13 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]);
if (info->attrs[L2TP_ATTR_UDP_CSUM])
cfg.use_udp_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_CSUM]);
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX])
+ cfg.udp6_zero_tx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]);
+ if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX])
+ cfg.udp6_zero_rx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]);
+#endif
}
if (info->attrs[L2TP_ATTR_DEBUG])
@@ -297,8 +304,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
case L2TP_ENCAPTYPE_UDP:
if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) ||
nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)) ||
- nla_put_u8(skb, L2TP_ATTR_UDP_CSUM,
- (sk->sk_no_check != UDP_CSUM_NOXMIT)))
+ nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx))
goto nla_put_failure;
/* NOBREAK */
case L2TP_ENCAPTYPE_IP:
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 950909f04ee..13752d96275 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1365,7 +1365,7 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
int err;
if (level != SOL_PPPOL2TP)
- return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+ return -EINVAL;
if (optlen < sizeof(int))
return -EINVAL;
@@ -1491,7 +1491,7 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
struct pppol2tp_session *ps;
if (level != SOL_PPPOL2TP)
- return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+ return -EINVAL;
if (get_user(len, optlen))
return -EFAULT;
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 9d7d840aac6..1e46ffa6916 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -25,7 +25,8 @@ mac80211-y := \
wme.o \
event.o \
chan.o \
- trace.o mlme.o
+ trace.o mlme.o \
+ tdls.o
mac80211-$(CONFIG_MAC80211_LEDS) += led.o
mac80211-$(CONFIG_MAC80211_DEBUGFS) += \
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 7b8d3cf8957..592f4b152ba 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -472,10 +472,15 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_local *local = sdata->local;
+ struct rate_control_ref *ref = NULL;
struct timespec uptime;
u64 packets = 0;
+ u32 thr = 0;
int i, ac;
+ if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
+ ref = local->rate_ctrl;
+
sinfo->generation = sdata->local->sta_generation;
sinfo->filled = STATION_INFO_INACTIVE_TIME |
@@ -587,6 +592,17 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_ASSOCIATED);
if (test_sta_flag(sta, WLAN_STA_TDLS_PEER))
sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER);
+
+ /* check if the driver has a SW RC implementation */
+ if (ref && ref->ops->get_expected_throughput)
+ thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv);
+ else
+ thr = drv_get_expected_throughput(local, &sta->sta);
+
+ if (thr != 0) {
+ sinfo->filled |= STATION_INFO_EXPECTED_THROUGHPUT;
+ sinfo->expected_throughput = thr;
+ }
}
static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = {
@@ -777,7 +793,7 @@ static void ieee80211_get_et_strings(struct wiphy *wiphy,
}
static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev,
- int idx, u8 *mac, struct station_info *sinfo)
+ int idx, u8 *mac, struct station_info *sinfo)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
@@ -807,7 +823,7 @@ static int ieee80211_dump_survey(struct wiphy *wiphy, struct net_device *dev,
}
static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev,
- u8 *mac, struct station_info *sinfo)
+ const u8 *mac, struct station_info *sinfo)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
@@ -1084,6 +1100,31 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
return 0;
}
+bool ieee80211_csa_needs_block_tx(struct ieee80211_local *local)
+{
+ struct ieee80211_sub_if_data *sdata;
+
+ lockdep_assert_held(&local->mtx);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ if (!ieee80211_sdata_running(sdata))
+ continue;
+
+ if (!sdata->vif.csa_active)
+ continue;
+
+ if (!sdata->csa_block_tx)
+ continue;
+
+ rcu_read_unlock();
+ return true;
+ }
+ rcu_read_unlock();
+
+ return false;
+}
+
static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -1101,7 +1142,14 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
old_probe_resp = sdata_dereference(sdata->u.ap.probe_resp, sdata);
/* abort any running channel switch */
+ mutex_lock(&local->mtx);
sdata->vif.csa_active = false;
+ if (!ieee80211_csa_needs_block_tx(local))
+ ieee80211_wake_queues_by_reason(&local->hw,
+ IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
+ mutex_unlock(&local->mtx);
+
kfree(sdata->u.ap.next_beacon);
sdata->u.ap.next_beacon = NULL;
@@ -1425,7 +1473,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
}
static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
- u8 *mac, struct station_parameters *params)
+ const u8 *mac,
+ struct station_parameters *params)
{
struct ieee80211_local *local = wiphy_priv(wiphy);
struct sta_info *sta;
@@ -1459,6 +1508,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) {
sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
+ } else {
+ sta->sta.tdls = true;
}
err = sta_apply_parameters(local, sta, params);
@@ -1492,7 +1543,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
}
static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
- u8 *mac)
+ const u8 *mac)
{
struct ieee80211_sub_if_data *sdata;
@@ -1506,7 +1557,7 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
}
static int ieee80211_change_station(struct wiphy *wiphy,
- struct net_device *dev, u8 *mac,
+ struct net_device *dev, const u8 *mac,
struct station_parameters *params)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -1631,7 +1682,7 @@ out_err:
#ifdef CONFIG_MAC80211_MESH
static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
- u8 *dst, u8 *next_hop)
+ const u8 *dst, const u8 *next_hop)
{
struct ieee80211_sub_if_data *sdata;
struct mesh_path *mpath;
@@ -1659,7 +1710,7 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
}
static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev,
- u8 *dst)
+ const u8 *dst)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -1670,9 +1721,8 @@ static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev,
return 0;
}
-static int ieee80211_change_mpath(struct wiphy *wiphy,
- struct net_device *dev,
- u8 *dst, u8 *next_hop)
+static int ieee80211_change_mpath(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *dst, const u8 *next_hop)
{
struct ieee80211_sub_if_data *sdata;
struct mesh_path *mpath;
@@ -1764,8 +1814,8 @@ static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev,
}
static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev,
- int idx, u8 *dst, u8 *next_hop,
- struct mpath_info *pinfo)
+ int idx, u8 *dst, u8 *next_hop,
+ struct mpath_info *pinfo)
{
struct ieee80211_sub_if_data *sdata;
struct mesh_path *mpath;
@@ -3019,26 +3069,11 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif)
}
EXPORT_SYMBOL(ieee80211_csa_finish);
-static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
+static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata,
+ u32 *changed)
{
- struct ieee80211_local *local = sdata->local;
- int err, changed = 0;
-
- sdata_assert_lock(sdata);
-
- mutex_lock(&local->mtx);
- sdata->radar_required = sdata->csa_radar_required;
- err = ieee80211_vif_change_channel(sdata, &changed);
- mutex_unlock(&local->mtx);
- if (WARN_ON(err < 0))
- return;
-
- if (!local->use_chanctx) {
- local->_oper_chandef = sdata->csa_chandef;
- ieee80211_hw_config(local, 0);
- }
+ int err;
- sdata->vif.csa_active = false;
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon);
@@ -3046,35 +3081,74 @@ static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
sdata->u.ap.next_beacon = NULL;
if (err < 0)
- return;
- changed |= err;
+ return err;
+ *changed |= err;
break;
case NL80211_IFTYPE_ADHOC:
err = ieee80211_ibss_finish_csa(sdata);
if (err < 0)
- return;
- changed |= err;
+ return err;
+ *changed |= err;
break;
#ifdef CONFIG_MAC80211_MESH
case NL80211_IFTYPE_MESH_POINT:
err = ieee80211_mesh_finish_csa(sdata);
if (err < 0)
- return;
- changed |= err;
+ return err;
+ *changed |= err;
break;
#endif
default:
WARN_ON(1);
- return;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ u32 changed = 0;
+ int err;
+
+ sdata_assert_lock(sdata);
+ lockdep_assert_held(&local->mtx);
+
+ sdata->radar_required = sdata->csa_radar_required;
+ err = ieee80211_vif_change_channel(sdata, &changed);
+ if (err < 0)
+ return err;
+
+ if (!local->use_chanctx) {
+ local->_oper_chandef = sdata->csa_chandef;
+ ieee80211_hw_config(local, 0);
}
+ sdata->vif.csa_active = false;
+
+ err = ieee80211_set_after_csa_beacon(sdata, &changed);
+ if (err)
+ return err;
+
ieee80211_bss_info_change_notify(sdata, changed);
+ cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef);
- ieee80211_wake_queues_by_reason(&sdata->local->hw,
+ if (!ieee80211_csa_needs_block_tx(local))
+ ieee80211_wake_queues_by_reason(&local->hw,
IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_CSA);
- cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef);
+ return 0;
+}
+
+static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
+{
+ if (__ieee80211_csa_finalize(sdata)) {
+ sdata_info(sdata, "failed to finalize CSA, disconnecting\n");
+ cfg80211_stop_iface(sdata->local->hw.wiphy, &sdata->wdev,
+ GFP_KERNEL);
+ }
}
void ieee80211_csa_finalize_work(struct work_struct *work)
@@ -3082,8 +3156,11 @@ void ieee80211_csa_finalize_work(struct work_struct *work)
struct ieee80211_sub_if_data *sdata =
container_of(work, struct ieee80211_sub_if_data,
csa_finalize_work);
+ struct ieee80211_local *local = sdata->local;
sdata_lock(sdata);
+ mutex_lock(&local->mtx);
+
/* AP might have been stopped while waiting for the lock. */
if (!sdata->vif.csa_active)
goto unlock;
@@ -3094,6 +3171,7 @@ void ieee80211_csa_finalize_work(struct work_struct *work)
ieee80211_csa_finalize(sdata);
unlock:
+ mutex_unlock(&local->mtx);
sdata_unlock(sdata);
}
@@ -3129,9 +3207,25 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
if (params->count <= 1)
break;
- sdata->csa_counter_offset_beacon =
- params->counter_offset_beacon;
- sdata->csa_counter_offset_presp = params->counter_offset_presp;
+ if ((params->n_counter_offsets_beacon >
+ IEEE80211_MAX_CSA_COUNTERS_NUM) ||
+ (params->n_counter_offsets_presp >
+ IEEE80211_MAX_CSA_COUNTERS_NUM))
+ return -EINVAL;
+
+ /* make sure we don't have garbage in other counters */
+ memset(sdata->csa_counter_offset_beacon, 0,
+ sizeof(sdata->csa_counter_offset_beacon));
+ memset(sdata->csa_counter_offset_presp, 0,
+ sizeof(sdata->csa_counter_offset_presp));
+
+ memcpy(sdata->csa_counter_offset_beacon,
+ params->counter_offsets_beacon,
+ params->n_counter_offsets_beacon * sizeof(u16));
+ memcpy(sdata->csa_counter_offset_presp,
+ params->counter_offsets_presp,
+ params->n_counter_offsets_presp * sizeof(u16));
+
err = ieee80211_assign_beacon(sdata, &params->beacon_csa);
if (err < 0) {
kfree(sdata->u.ap.next_beacon);
@@ -3220,8 +3314,9 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
return 0;
}
-int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
- struct cfg80211_csa_settings *params)
+static int
+__ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
+ struct cfg80211_csa_settings *params)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
@@ -3230,6 +3325,7 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
int err, num_chanctx, changed = 0;
sdata_assert_lock(sdata);
+ lockdep_assert_held(&local->mtx);
if (!list_empty(&local->roc_list) || local->scanning)
return -EBUSY;
@@ -3272,15 +3368,16 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
return err;
sdata->csa_radar_required = params->radar_required;
-
- if (params->block_tx)
- ieee80211_stop_queues_by_reason(&local->hw,
- IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_CSA);
-
sdata->csa_chandef = params->chandef;
+ sdata->csa_block_tx = params->block_tx;
+ sdata->csa_current_counter = params->count;
sdata->vif.csa_active = true;
+ if (sdata->csa_block_tx)
+ ieee80211_stop_queues_by_reason(&local->hw,
+ IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
+
if (changed) {
ieee80211_bss_info_change_notify(sdata, changed);
drv_channel_switch_beacon(sdata, &params->chandef);
@@ -3292,6 +3389,20 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
return 0;
}
+int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
+ struct cfg80211_csa_settings *params)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = sdata->local;
+ int err;
+
+ mutex_lock(&local->mtx);
+ err = __ieee80211_channel_switch(wiphy, dev, params);
+ mutex_unlock(&local->mtx);
+
+ return err;
+}
+
static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
struct cfg80211_mgmt_tx_params *params,
u64 *cookie)
@@ -3304,6 +3415,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
bool need_offchan = false;
u32 flags;
int ret;
+ u8 *data;
if (params->dont_wait_for_ack)
flags = IEEE80211_TX_CTL_NO_ACK;
@@ -3397,7 +3509,20 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
}
skb_reserve(skb, local->hw.extra_tx_headroom);
- memcpy(skb_put(skb, params->len), params->buf, params->len);
+ data = skb_put(skb, params->len);
+ memcpy(data, params->buf, params->len);
+
+ /* Update CSA counters */
+ if (sdata->vif.csa_active &&
+ (sdata->vif.type == NL80211_IFTYPE_AP ||
+ sdata->vif.type == NL80211_IFTYPE_ADHOC) &&
+ params->n_csa_offsets) {
+ int i;
+ u8 c = sdata->csa_current_counter;
+
+ for (i = 0; i < params->n_csa_offsets; i++)
+ data[params->csa_offsets[i]] = c;
+ }
IEEE80211_SKB_CB(skb)->flags = flags;
@@ -3506,320 +3631,6 @@ static int ieee80211_set_rekey_data(struct wiphy *wiphy,
return 0;
}
-static void ieee80211_tdls_add_ext_capab(struct sk_buff *skb)
-{
- u8 *pos = (void *)skb_put(skb, 7);
-
- *pos++ = WLAN_EID_EXT_CAPABILITY;
- *pos++ = 5; /* len */
- *pos++ = 0x0;
- *pos++ = 0x0;
- *pos++ = 0x0;
- *pos++ = 0x0;
- *pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED;
-}
-
-static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata)
-{
- struct ieee80211_local *local = sdata->local;
- u16 capab;
-
- capab = 0;
- if (ieee80211_get_sdata_band(sdata) != IEEE80211_BAND_2GHZ)
- return capab;
-
- if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE))
- capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME;
- if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE))
- capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
-
- return capab;
-}
-
-static void ieee80211_tdls_add_link_ie(struct sk_buff *skb, u8 *src_addr,
- u8 *peer, u8 *bssid)
-{
- struct ieee80211_tdls_lnkie *lnkid;
-
- lnkid = (void *)skb_put(skb, sizeof(struct ieee80211_tdls_lnkie));
-
- lnkid->ie_type = WLAN_EID_LINK_ID;
- lnkid->ie_len = sizeof(struct ieee80211_tdls_lnkie) - 2;
-
- memcpy(lnkid->bssid, bssid, ETH_ALEN);
- memcpy(lnkid->init_sta, src_addr, ETH_ALEN);
- memcpy(lnkid->resp_sta, peer, ETH_ALEN);
-}
-
-static int
-ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev,
- u8 *peer, u8 action_code, u8 dialog_token,
- u16 status_code, struct sk_buff *skb)
-{
- struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
- enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
- struct ieee80211_tdls_data *tf;
-
- tf = (void *)skb_put(skb, offsetof(struct ieee80211_tdls_data, u));
-
- memcpy(tf->da, peer, ETH_ALEN);
- memcpy(tf->sa, sdata->vif.addr, ETH_ALEN);
- tf->ether_type = cpu_to_be16(ETH_P_TDLS);
- tf->payload_type = WLAN_TDLS_SNAP_RFTYPE;
-
- switch (action_code) {
- case WLAN_TDLS_SETUP_REQUEST:
- tf->category = WLAN_CATEGORY_TDLS;
- tf->action_code = WLAN_TDLS_SETUP_REQUEST;
-
- skb_put(skb, sizeof(tf->u.setup_req));
- tf->u.setup_req.dialog_token = dialog_token;
- tf->u.setup_req.capability =
- cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
-
- ieee80211_add_srates_ie(sdata, skb, false, band);
- ieee80211_add_ext_srates_ie(sdata, skb, false, band);
- ieee80211_tdls_add_ext_capab(skb);
- break;
- case WLAN_TDLS_SETUP_RESPONSE:
- tf->category = WLAN_CATEGORY_TDLS;
- tf->action_code = WLAN_TDLS_SETUP_RESPONSE;
-
- skb_put(skb, sizeof(tf->u.setup_resp));
- tf->u.setup_resp.status_code = cpu_to_le16(status_code);
- tf->u.setup_resp.dialog_token = dialog_token;
- tf->u.setup_resp.capability =
- cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
-
- ieee80211_add_srates_ie(sdata, skb, false, band);
- ieee80211_add_ext_srates_ie(sdata, skb, false, band);
- ieee80211_tdls_add_ext_capab(skb);
- break;
- case WLAN_TDLS_SETUP_CONFIRM:
- tf->category = WLAN_CATEGORY_TDLS;
- tf->action_code = WLAN_TDLS_SETUP_CONFIRM;
-
- skb_put(skb, sizeof(tf->u.setup_cfm));
- tf->u.setup_cfm.status_code = cpu_to_le16(status_code);
- tf->u.setup_cfm.dialog_token = dialog_token;
- break;
- case WLAN_TDLS_TEARDOWN:
- tf->category = WLAN_CATEGORY_TDLS;
- tf->action_code = WLAN_TDLS_TEARDOWN;
-
- skb_put(skb, sizeof(tf->u.teardown));
- tf->u.teardown.reason_code = cpu_to_le16(status_code);
- break;
- case WLAN_TDLS_DISCOVERY_REQUEST:
- tf->category = WLAN_CATEGORY_TDLS;
- tf->action_code = WLAN_TDLS_DISCOVERY_REQUEST;
-
- skb_put(skb, sizeof(tf->u.discover_req));
- tf->u.discover_req.dialog_token = dialog_token;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int
-ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev,
- u8 *peer, u8 action_code, u8 dialog_token,
- u16 status_code, struct sk_buff *skb)
-{
- struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
- enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
- struct ieee80211_mgmt *mgmt;
-
- mgmt = (void *)skb_put(skb, 24);
- memset(mgmt, 0, 24);
- memcpy(mgmt->da, peer, ETH_ALEN);
- memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
- memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN);
-
- mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
- IEEE80211_STYPE_ACTION);
-
- switch (action_code) {
- case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
- skb_put(skb, 1 + sizeof(mgmt->u.action.u.tdls_discover_resp));
- mgmt->u.action.category = WLAN_CATEGORY_PUBLIC;
- mgmt->u.action.u.tdls_discover_resp.action_code =
- WLAN_PUB_ACTION_TDLS_DISCOVER_RES;
- mgmt->u.action.u.tdls_discover_resp.dialog_token =
- dialog_token;
- mgmt->u.action.u.tdls_discover_resp.capability =
- cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
-
- ieee80211_add_srates_ie(sdata, skb, false, band);
- ieee80211_add_ext_srates_ie(sdata, skb, false, band);
- ieee80211_tdls_add_ext_capab(skb);
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
- u8 *peer, u8 action_code, u8 dialog_token,
- u16 status_code, u32 peer_capability,
- const u8 *extra_ies, size_t extra_ies_len)
-{
- struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
- struct ieee80211_local *local = sdata->local;
- struct sk_buff *skb = NULL;
- bool send_direct;
- int ret;
-
- if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
- return -ENOTSUPP;
-
- /* make sure we are in managed mode, and associated */
- if (sdata->vif.type != NL80211_IFTYPE_STATION ||
- !sdata->u.mgd.associated)
- return -EINVAL;
-
- tdls_dbg(sdata, "TDLS mgmt action %d peer %pM\n",
- action_code, peer);
-
- skb = dev_alloc_skb(local->hw.extra_tx_headroom +
- max(sizeof(struct ieee80211_mgmt),
- sizeof(struct ieee80211_tdls_data)) +
- 50 + /* supported rates */
- 7 + /* ext capab */
- extra_ies_len +
- sizeof(struct ieee80211_tdls_lnkie));
- if (!skb)
- return -ENOMEM;
-
- skb_reserve(skb, local->hw.extra_tx_headroom);
-
- switch (action_code) {
- case WLAN_TDLS_SETUP_REQUEST:
- case WLAN_TDLS_SETUP_RESPONSE:
- case WLAN_TDLS_SETUP_CONFIRM:
- case WLAN_TDLS_TEARDOWN:
- case WLAN_TDLS_DISCOVERY_REQUEST:
- ret = ieee80211_prep_tdls_encap_data(wiphy, dev, peer,
- action_code, dialog_token,
- status_code, skb);
- send_direct = false;
- break;
- case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
- ret = ieee80211_prep_tdls_direct(wiphy, dev, peer, action_code,
- dialog_token, status_code,
- skb);
- send_direct = true;
- break;
- default:
- ret = -ENOTSUPP;
- break;
- }
-
- if (ret < 0)
- goto fail;
-
- if (extra_ies_len)
- memcpy(skb_put(skb, extra_ies_len), extra_ies, extra_ies_len);
-
- /* the TDLS link IE is always added last */
- switch (action_code) {
- case WLAN_TDLS_SETUP_REQUEST:
- case WLAN_TDLS_SETUP_CONFIRM:
- case WLAN_TDLS_TEARDOWN:
- case WLAN_TDLS_DISCOVERY_REQUEST:
- /* we are the initiator */
- ieee80211_tdls_add_link_ie(skb, sdata->vif.addr, peer,
- sdata->u.mgd.bssid);
- break;
- case WLAN_TDLS_SETUP_RESPONSE:
- case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
- /* we are the responder */
- ieee80211_tdls_add_link_ie(skb, peer, sdata->vif.addr,
- sdata->u.mgd.bssid);
- break;
- default:
- ret = -ENOTSUPP;
- goto fail;
- }
-
- if (send_direct) {
- ieee80211_tx_skb(sdata, skb);
- return 0;
- }
-
- /*
- * According to 802.11z: Setup req/resp are sent in AC_BK, otherwise
- * we should default to AC_VI.
- */
- switch (action_code) {
- case WLAN_TDLS_SETUP_REQUEST:
- case WLAN_TDLS_SETUP_RESPONSE:
- skb_set_queue_mapping(skb, IEEE80211_AC_BK);
- skb->priority = 2;
- break;
- default:
- skb_set_queue_mapping(skb, IEEE80211_AC_VI);
- skb->priority = 5;
- break;
- }
-
- /* disable bottom halves when entering the Tx path */
- local_bh_disable();
- ret = ieee80211_subif_start_xmit(skb, dev);
- local_bh_enable();
-
- return ret;
-
-fail:
- dev_kfree_skb(skb);
- return ret;
-}
-
-static int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
- u8 *peer, enum nl80211_tdls_operation oper)
-{
- struct sta_info *sta;
- struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
- if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
- return -ENOTSUPP;
-
- if (sdata->vif.type != NL80211_IFTYPE_STATION)
- return -EINVAL;
-
- tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer);
-
- switch (oper) {
- case NL80211_TDLS_ENABLE_LINK:
- rcu_read_lock();
- sta = sta_info_get(sdata, peer);
- if (!sta) {
- rcu_read_unlock();
- return -ENOLINK;
- }
-
- set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH);
- rcu_read_unlock();
- break;
- case NL80211_TDLS_DISABLE_LINK:
- return sta_info_destroy_addr(sdata, peer);
- case NL80211_TDLS_TEARDOWN:
- case NL80211_TDLS_SETUP:
- case NL80211_TDLS_DISCOVERY_REQ:
- /* We don't support in-driver setup/teardown/discovery */
- return -ENOTSUPP;
- default:
- return -ENOTSUPP;
- }
-
- return 0;
-}
-
static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
const u8 *peer, u64 *cookie)
{
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 48e6d6f010c..a310e33972d 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -855,7 +855,7 @@ static void
__ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata,
bool clear)
{
- struct ieee80211_local *local = sdata->local;
+ struct ieee80211_local *local __maybe_unused = sdata->local;
struct ieee80211_sub_if_data *vlan;
struct ieee80211_chanctx_conf *conf;
@@ -871,7 +871,7 @@ __ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata,
* to a channel context that has already been freed.
*/
conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
+ lockdep_is_held(&local->chanctx_mtx));
WARN_ON(!conf);
if (clear)
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 40a64893898..e205ebabfa5 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -34,8 +34,7 @@ static ssize_t ieee80211_if_read(
ssize_t ret = -EINVAL;
read_lock(&dev_base_lock);
- if (sdata->dev->reg_state == NETREG_REGISTERED)
- ret = (*format)(sdata, buf, sizeof(buf));
+ ret = (*format)(sdata, buf, sizeof(buf));
read_unlock(&dev_base_lock);
if (ret >= 0)
@@ -62,8 +61,7 @@ static ssize_t ieee80211_if_write(
ret = -ENODEV;
rtnl_lock();
- if (sdata->dev->reg_state == NETREG_REGISTERED)
- ret = (*write)(sdata, buf, count);
+ ret = (*write)(sdata, buf, count);
rtnl_unlock();
return ret;
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 5331582a2c8..bd782dcffcc 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -5,11 +5,11 @@
#include "ieee80211_i.h"
#include "trace.h"
-static inline void check_sdata_in_driver(struct ieee80211_sub_if_data *sdata)
+static inline bool check_sdata_in_driver(struct ieee80211_sub_if_data *sdata)
{
- WARN(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER),
- "%s: Failed check-sdata-in-driver check, flags: 0x%x\n",
- sdata->dev ? sdata->dev->name : sdata->name, sdata->flags);
+ return !WARN(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER),
+ "%s: Failed check-sdata-in-driver check, flags: 0x%x\n",
+ sdata->dev ? sdata->dev->name : sdata->name, sdata->flags);
}
static inline struct ieee80211_sub_if_data *
@@ -168,7 +168,8 @@ static inline int drv_change_interface(struct ieee80211_local *local,
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_change_interface(local, sdata, type, p2p);
ret = local->ops->change_interface(&local->hw, &sdata->vif, type, p2p);
@@ -181,7 +182,8 @@ static inline void drv_remove_interface(struct ieee80211_local *local,
{
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_remove_interface(local, sdata);
local->ops->remove_interface(&local->hw, &sdata->vif);
@@ -219,7 +221,8 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local,
sdata->vif.type == NL80211_IFTYPE_MONITOR))
return;
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_bss_info_changed(local, sdata, info, changed);
if (local->ops->bss_info_changed)
@@ -278,7 +281,8 @@ static inline int drv_set_key(struct ieee80211_local *local,
might_sleep();
sdata = get_bss_sdata(sdata);
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_set_key(local, cmd, sdata, sta, key);
ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key);
@@ -298,7 +302,8 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local,
ista = &sta->sta;
sdata = get_bss_sdata(sdata);
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_update_tkip_key(local, sdata, conf, ista, iv32);
if (local->ops->update_tkip_key)
@@ -315,7 +320,8 @@ static inline int drv_hw_scan(struct ieee80211_local *local,
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_hw_scan(local, sdata);
ret = local->ops->hw_scan(&local->hw, &sdata->vif, req);
@@ -328,7 +334,8 @@ static inline void drv_cancel_hw_scan(struct ieee80211_local *local,
{
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_cancel_hw_scan(local, sdata);
local->ops->cancel_hw_scan(&local->hw, &sdata->vif);
@@ -345,7 +352,8 @@ drv_sched_scan_start(struct ieee80211_local *local,
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_sched_scan_start(local, sdata);
ret = local->ops->sched_scan_start(&local->hw, &sdata->vif,
@@ -361,7 +369,8 @@ static inline int drv_sched_scan_stop(struct ieee80211_local *local,
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_sched_scan_stop(local, sdata);
ret = local->ops->sched_scan_stop(&local->hw, &sdata->vif);
@@ -462,7 +471,8 @@ static inline void drv_sta_notify(struct ieee80211_local *local,
struct ieee80211_sta *sta)
{
sdata = get_bss_sdata(sdata);
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_sta_notify(local, sdata, cmd, sta);
if (local->ops->sta_notify)
@@ -479,7 +489,8 @@ static inline int drv_sta_add(struct ieee80211_local *local,
might_sleep();
sdata = get_bss_sdata(sdata);
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_sta_add(local, sdata, sta);
if (local->ops->sta_add)
@@ -497,7 +508,8 @@ static inline void drv_sta_remove(struct ieee80211_local *local,
might_sleep();
sdata = get_bss_sdata(sdata);
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_sta_remove(local, sdata, sta);
if (local->ops->sta_remove)
@@ -515,7 +527,8 @@ static inline void drv_sta_add_debugfs(struct ieee80211_local *local,
might_sleep();
sdata = get_bss_sdata(sdata);
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
if (local->ops->sta_add_debugfs)
local->ops->sta_add_debugfs(&local->hw, &sdata->vif,
@@ -545,7 +558,8 @@ static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local,
might_sleep();
sdata = get_bss_sdata(sdata);
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_sta_pre_rcu_remove(local, sdata, &sta->sta);
if (local->ops->sta_pre_rcu_remove)
@@ -566,7 +580,8 @@ int drv_sta_state(struct ieee80211_local *local,
might_sleep();
sdata = get_bss_sdata(sdata);
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_sta_state(local, sdata, &sta->sta, old_state, new_state);
if (local->ops->sta_state) {
@@ -590,7 +605,8 @@ static inline void drv_sta_rc_update(struct ieee80211_local *local,
struct ieee80211_sta *sta, u32 changed)
{
sdata = get_bss_sdata(sdata);
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED &&
(sdata->vif.type != NL80211_IFTYPE_ADHOC &&
@@ -612,7 +628,8 @@ static inline int drv_conf_tx(struct ieee80211_local *local,
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_conf_tx(local, sdata, ac, params);
if (local->ops->conf_tx)
@@ -629,7 +646,8 @@ static inline u64 drv_get_tsf(struct ieee80211_local *local,
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return ret;
trace_drv_get_tsf(local, sdata);
if (local->ops->get_tsf)
@@ -644,7 +662,8 @@ static inline void drv_set_tsf(struct ieee80211_local *local,
{
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_set_tsf(local, sdata, tsf);
if (local->ops->set_tsf)
@@ -657,7 +676,8 @@ static inline void drv_reset_tsf(struct ieee80211_local *local,
{
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_reset_tsf(local, sdata);
if (local->ops->reset_tsf)
@@ -689,7 +709,8 @@ static inline int drv_ampdu_action(struct ieee80211_local *local,
might_sleep();
sdata = get_bss_sdata(sdata);
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, buf_size);
@@ -733,8 +754,8 @@ static inline void drv_flush(struct ieee80211_local *local,
might_sleep();
- if (sdata)
- check_sdata_in_driver(sdata);
+ if (sdata && !check_sdata_in_driver(sdata))
+ return;
trace_drv_flush(local, queues, drop);
if (local->ops->flush)
@@ -854,7 +875,8 @@ static inline int drv_set_bitrate_mask(struct ieee80211_local *local,
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_set_bitrate_mask(local, sdata, mask);
if (local->ops->set_bitrate_mask)
@@ -869,7 +891,8 @@ static inline void drv_set_rekey_data(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct cfg80211_gtk_rekey_data *data)
{
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_set_rekey_data(local, sdata, data);
if (local->ops->set_rekey_data)
@@ -937,7 +960,8 @@ static inline void drv_mgd_prepare_tx(struct ieee80211_local *local,
{
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION);
trace_drv_mgd_prepare_tx(local, sdata);
@@ -964,6 +988,9 @@ static inline int drv_add_chanctx(struct ieee80211_local *local,
static inline void drv_remove_chanctx(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx)
{
+ if (WARN_ON(!ctx->driver_present))
+ return;
+
trace_drv_remove_chanctx(local, ctx);
if (local->ops->remove_chanctx)
local->ops->remove_chanctx(&local->hw, &ctx->conf);
@@ -989,7 +1016,8 @@ static inline int drv_assign_vif_chanctx(struct ieee80211_local *local,
{
int ret = 0;
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_assign_vif_chanctx(local, sdata, ctx);
if (local->ops->assign_vif_chanctx) {
@@ -1007,7 +1035,8 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct ieee80211_chanctx *ctx)
{
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_unassign_vif_chanctx(local, sdata, ctx);
if (local->ops->unassign_vif_chanctx) {
@@ -1019,12 +1048,66 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local,
trace_drv_return_void(local);
}
+static inline int
+drv_switch_vif_chanctx(struct ieee80211_local *local,
+ struct ieee80211_vif_chanctx_switch *vifs,
+ int n_vifs,
+ enum ieee80211_chanctx_switch_mode mode)
+{
+ int ret = 0;
+ int i;
+
+ if (!local->ops->switch_vif_chanctx)
+ return -EOPNOTSUPP;
+
+ for (i = 0; i < n_vifs; i++) {
+ struct ieee80211_chanctx *new_ctx =
+ container_of(vifs[i].new_ctx,
+ struct ieee80211_chanctx,
+ conf);
+ struct ieee80211_chanctx *old_ctx =
+ container_of(vifs[i].old_ctx,
+ struct ieee80211_chanctx,
+ conf);
+
+ WARN_ON_ONCE(!old_ctx->driver_present);
+ WARN_ON_ONCE((mode == CHANCTX_SWMODE_SWAP_CONTEXTS &&
+ new_ctx->driver_present) ||
+ (mode == CHANCTX_SWMODE_REASSIGN_VIF &&
+ !new_ctx->driver_present));
+ }
+
+ trace_drv_switch_vif_chanctx(local, vifs, n_vifs, mode);
+ ret = local->ops->switch_vif_chanctx(&local->hw,
+ vifs, n_vifs, mode);
+ trace_drv_return_int(local, ret);
+
+ if (!ret && mode == CHANCTX_SWMODE_SWAP_CONTEXTS) {
+ for (i = 0; i < n_vifs; i++) {
+ struct ieee80211_chanctx *new_ctx =
+ container_of(vifs[i].new_ctx,
+ struct ieee80211_chanctx,
+ conf);
+ struct ieee80211_chanctx *old_ctx =
+ container_of(vifs[i].old_ctx,
+ struct ieee80211_chanctx,
+ conf);
+
+ new_ctx->driver_present = true;
+ old_ctx->driver_present = false;
+ }
+ }
+
+ return ret;
+}
+
static inline int drv_start_ap(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
int ret = 0;
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_start_ap(local, sdata, &sdata->vif.bss_conf);
if (local->ops->start_ap)
@@ -1036,7 +1119,8 @@ static inline int drv_start_ap(struct ieee80211_local *local,
static inline void drv_stop_ap(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_stop_ap(local, sdata);
if (local->ops->stop_ap)
@@ -1059,7 +1143,8 @@ drv_set_default_unicast_key(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
int key_idx)
{
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
WARN_ON_ONCE(key_idx < -1 || key_idx > 3);
@@ -1101,7 +1186,8 @@ static inline int drv_join_ibss(struct ieee80211_local *local,
int ret = 0;
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_join_ibss(local, sdata, &sdata->vif.bss_conf);
if (local->ops->join_ibss)
@@ -1114,7 +1200,8 @@ static inline void drv_leave_ibss(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_leave_ibss(local, sdata);
if (local->ops->leave_ibss)
@@ -1122,4 +1209,17 @@ static inline void drv_leave_ibss(struct ieee80211_local *local,
trace_drv_return_void(local);
}
+static inline u32 drv_get_expected_throughput(struct ieee80211_local *local,
+ struct ieee80211_sta *sta)
+{
+ u32 ret = 0;
+
+ trace_drv_get_expected_throughput(sta);
+ if (local->ops->get_expected_throughput)
+ ret = local->ops->get_expected_throughput(sta);
+ trace_drv_return_u32(local, ret);
+
+ return ret;
+}
+
#endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index ff4d4155a84..18ee0a256b1 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -143,7 +143,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
*pos++ = csa_settings->block_tx ? 1 : 0;
*pos++ = ieee80211_frequency_to_channel(
csa_settings->chandef.chan->center_freq);
- sdata->csa_counter_offset_beacon = (pos - presp->head);
+ sdata->csa_counter_offset_beacon[0] = (pos - presp->head);
*pos++ = csa_settings->count;
}
@@ -1677,6 +1677,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
sdata->u.ibss.control_port = params->control_port;
sdata->u.ibss.userspace_handles_dfs = params->userspace_handles_dfs;
sdata->u.ibss.basic_rates = params->basic_rates;
+ sdata->u.ibss.last_scan_completed = jiffies;
/* fix basic_rates if channel does not support these rates */
rate_flags = ieee80211_chandef_rate_flags(&params->chandef);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b455f62d357..ac9836e0aab 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -317,6 +317,7 @@ struct ieee80211_roc_work {
bool started, abort, hw_begun, notified;
bool to_be_freed;
+ bool on_channel;
unsigned long hw_start_time;
@@ -753,9 +754,10 @@ struct ieee80211_sub_if_data {
struct mac80211_qos_map __rcu *qos_map;
struct work_struct csa_finalize_work;
- int csa_counter_offset_beacon;
- int csa_counter_offset_presp;
+ u16 csa_counter_offset_beacon[IEEE80211_MAX_CSA_COUNTERS_NUM];
+ u16 csa_counter_offset_presp[IEEE80211_MAX_CSA_COUNTERS_NUM];
bool csa_radar_required;
+ bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */
struct cfg80211_chan_def csa_chandef;
struct list_head assigned_chanctx_list; /* protected by chanctx_mtx */
@@ -765,6 +767,7 @@ struct ieee80211_sub_if_data {
struct ieee80211_chanctx *reserved_chanctx;
struct cfg80211_chan_def reserved_chandef;
bool reserved_radar_required;
+ u8 csa_current_counter;
/* used to reconfigure hardware SM PS */
struct work_struct recalc_smps;
@@ -1458,6 +1461,7 @@ __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
struct cfg80211_sched_scan_request *req);
int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata);
+void ieee80211_sched_scan_end(struct ieee80211_local *local);
void ieee80211_sched_scan_stopped_work(struct work_struct *work);
/* off-channel helpers */
@@ -1472,6 +1476,7 @@ void ieee80211_sw_roc_work(struct work_struct *work);
void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc);
/* channel switch handling */
+bool ieee80211_csa_needs_block_tx(struct ieee80211_local *local);
void ieee80211_csa_finalize_work(struct work_struct *work);
int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_csa_settings *params);
@@ -1833,6 +1838,15 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
u8 radar_detect);
int ieee80211_max_num_channels(struct ieee80211_local *local);
+/* TDLS */
+int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *peer, u8 action_code, u8 dialog_token,
+ u16 status_code, u32 peer_capability,
+ const u8 *extra_ies, size_t extra_ies_len);
+int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *peer, enum nl80211_tdls_operation oper);
+
+
#ifdef CONFIG_MAC80211_NOINLINE
#define debug_noinline noinline
#else
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 7fff3dcaac4..388b863e821 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -399,6 +399,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
sdata->vif.type = NL80211_IFTYPE_MONITOR;
snprintf(sdata->name, IFNAMSIZ, "%s-monitor",
wiphy_name(local->hw.wiphy));
+ sdata->wdev.iftype = NL80211_IFTYPE_MONITOR;
sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM;
@@ -838,8 +839,15 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
cancel_work_sync(&sdata->recalc_smps);
sdata_lock(sdata);
+ mutex_lock(&local->mtx);
sdata->vif.csa_active = false;
+ if (!ieee80211_csa_needs_block_tx(local))
+ ieee80211_wake_queues_by_reason(&local->hw,
+ IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
+ mutex_unlock(&local->mtx);
sdata_unlock(sdata);
+
cancel_work_sync(&sdata->csa_finalize_work);
cancel_delayed_work_sync(&sdata->dfs_cac_timer_work);
@@ -1278,6 +1286,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
sdata->control_port_protocol = cpu_to_be16(ETH_P_PAE);
sdata->control_port_no_encrypt = false;
sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM;
+ sdata->vif.bss_conf.idle = true;
sdata->noack_map = 0;
@@ -1771,7 +1780,6 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
}
mutex_unlock(&local->iflist_mtx);
unregister_netdevice_many(&unreg_list);
- list_del(&unreg_list);
list_for_each_entry_safe(sdata, tmp, &wdev_list, list) {
list_del(&sdata->list);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 6ff65a1ebaa..16d97f044a2 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -325,7 +325,8 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
struct ieee80211_key *key;
int i, j, err;
- BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS);
+ if (WARN_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS))
+ return ERR_PTR(-EINVAL);
key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL);
if (!key)
@@ -481,8 +482,8 @@ int ieee80211_key_link(struct ieee80211_key *key,
int idx, ret;
bool pairwise;
- BUG_ON(!sdata);
- BUG_ON(!key);
+ if (WARN_ON(!sdata || !key))
+ return -EINVAL;
pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
idx = key->conf.keyidx;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 27b9364cdf1..d17c26d6e36 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -956,6 +956,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
if (local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)
local->hw.wiphy->flags |= WIPHY_FLAG_TDLS_EXTERNAL_SETUP;
+ local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CSA_COUNTERS_NUM;
+
result = wiphy_register(local->hw.wiphy);
if (result < 0)
goto fail_wiphy_register;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index b06ddc9519c..6495a3f0428 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -679,7 +679,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
*pos++ = 0x0;
*pos++ = ieee80211_frequency_to_channel(
csa->settings.chandef.chan->center_freq);
- sdata->csa_counter_offset_beacon = hdr_len + 6;
+ sdata->csa_counter_offset_beacon[0] = hdr_len + 6;
*pos++ = csa->settings.count;
*pos++ = WLAN_EID_CHAN_SWITCH_PARAM;
*pos++ = 6;
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 7d050ed6fe5..cf032a8db9d 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -287,8 +287,10 @@ static void mesh_path_move_to_queue(struct mesh_path *gate_mpath,
struct sk_buff_head failq;
unsigned long flags;
- BUG_ON(gate_mpath == from_mpath);
- BUG_ON(!gate_mpath->next_hop);
+ if (WARN_ON(gate_mpath == from_mpath))
+ return;
+ if (WARN_ON(!gate_mpath->next_hop))
+ return;
__skb_queue_head_init(&failq);
diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c
index 2bc5dc25d5a..09625d6205c 100644
--- a/net/mac80211/mesh_sync.c
+++ b/net/mac80211/mesh_sync.c
@@ -171,7 +171,7 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata,
u8 cap;
WARN_ON(ifmsh->mesh_sp_id != IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET);
- BUG_ON(!rcu_read_lock_held());
+ WARN_ON(!rcu_read_lock_held());
cap = beacon->meshconf->meshconf_cap;
spin_lock_bh(&ifmsh->sync_offset_lock);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 488826f188a..3345401be1b 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -975,16 +975,23 @@ static void ieee80211_chswitch_work(struct work_struct *work)
/* XXX: shouldn't really modify cfg80211-owned data! */
ifmgd->associated->channel = sdata->csa_chandef.chan;
+ ieee80211_bss_info_change_notify(sdata, changed);
+
+ mutex_lock(&local->mtx);
+ sdata->vif.csa_active = false;
/* XXX: wait for a beacon first? */
- ieee80211_wake_queues_by_reason(&local->hw,
+ if (!ieee80211_csa_needs_block_tx(local))
+ ieee80211_wake_queues_by_reason(&local->hw,
IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_CSA);
+ mutex_unlock(&local->mtx);
- ieee80211_bss_info_change_notify(sdata, changed);
-
- out:
- sdata->vif.csa_active = false;
ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
+
+ ieee80211_sta_reset_beacon_monitor(sdata);
+ ieee80211_sta_reset_conn_monitor(sdata);
+
+out:
sdata_unlock(sdata);
}
@@ -1100,12 +1107,16 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
mutex_unlock(&local->chanctx_mtx);
sdata->csa_chandef = csa_ie.chandef;
+
+ mutex_lock(&local->mtx);
sdata->vif.csa_active = true;
+ sdata->csa_block_tx = csa_ie.mode;
- if (csa_ie.mode)
+ if (sdata->csa_block_tx)
ieee80211_stop_queues_by_reason(&local->hw,
- IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_CSA);
+ IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
+ mutex_unlock(&local->mtx);
if (local->ops->channel_switch) {
/* use driver's channel switch callback */
@@ -1817,6 +1828,12 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
ifmgd->flags = 0;
mutex_lock(&local->mtx);
ieee80211_vif_release_channel(sdata);
+
+ sdata->vif.csa_active = false;
+ if (!ieee80211_csa_needs_block_tx(local))
+ ieee80211_wake_queues_by_reason(&local->hw,
+ IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
mutex_unlock(&local->mtx);
sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM;
@@ -2045,6 +2062,7 @@ EXPORT_SYMBOL(ieee80211_ap_probereq_get);
static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
{
+ struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
@@ -2058,10 +2076,14 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
true, frame_buf);
ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
+
+ mutex_lock(&local->mtx);
sdata->vif.csa_active = false;
- ieee80211_wake_queues_by_reason(&sdata->local->hw,
+ if (!ieee80211_csa_needs_block_tx(local))
+ ieee80211_wake_queues_by_reason(&local->hw,
IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_CSA);
+ mutex_unlock(&local->mtx);
cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
IEEE80211_DEAUTH_FRAME_LEN);
@@ -3546,6 +3568,9 @@ static void ieee80211_sta_bcn_mon_timer(unsigned long data)
if (local->quiescing)
return;
+ if (sdata->vif.csa_active)
+ return;
+
sdata->u.mgd.connection_loss = false;
ieee80211_queue_work(&sdata->local->hw,
&sdata->u.mgd.beacon_connection_loss_work);
@@ -3561,6 +3586,9 @@ static void ieee80211_sta_conn_mon_timer(unsigned long data)
if (local->quiescing)
return;
+ if (sdata->vif.csa_active)
+ return;
+
ieee80211_queue_work(&local->hw, &ifmgd->monitor_work);
}
@@ -3598,18 +3626,24 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata)
sdata_lock(sdata);
- if (ifmgd->auth_data) {
+ if (ifmgd->auth_data || ifmgd->assoc_data) {
+ const u8 *bssid = ifmgd->auth_data ?
+ ifmgd->auth_data->bss->bssid :
+ ifmgd->assoc_data->bss->bssid;
+
/*
- * If we are trying to authenticate while suspending, cfg80211
- * won't know and won't actually abort those attempts, thus we
- * need to do that ourselves.
+ * If we are trying to authenticate / associate while suspending,
+ * cfg80211 won't know and won't actually abort those attempts,
+ * thus we need to do that ourselves.
*/
- ieee80211_send_deauth_disassoc(sdata,
- ifmgd->auth_data->bss->bssid,
+ ieee80211_send_deauth_disassoc(sdata, bssid,
IEEE80211_STYPE_DEAUTH,
WLAN_REASON_DEAUTH_LEAVING,
false, frame_buf);
- ieee80211_destroy_auth_data(sdata, false);
+ if (ifmgd->assoc_data)
+ ieee80211_destroy_assoc_data(sdata, false);
+ if (ifmgd->auth_data)
+ ieee80211_destroy_auth_data(sdata, false);
cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
IEEE80211_DEAUTH_FRAME_LEN);
}
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 6fb38558a5e..7a17decd27f 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -333,7 +333,7 @@ void ieee80211_sw_roc_work(struct work_struct *work)
container_of(work, struct ieee80211_roc_work, work.work);
struct ieee80211_sub_if_data *sdata = roc->sdata;
struct ieee80211_local *local = sdata->local;
- bool started;
+ bool started, on_channel;
mutex_lock(&local->mtx);
@@ -354,14 +354,26 @@ void ieee80211_sw_roc_work(struct work_struct *work)
if (!roc->started) {
struct ieee80211_roc_work *dep;
- /* start this ROC */
- ieee80211_offchannel_stop_vifs(local);
+ WARN_ON(local->use_chanctx);
+
+ /* If actually operating on the desired channel (with at least
+ * 20 MHz channel width) don't stop all the operations but still
+ * treat it as though the ROC operation started properly, so
+ * other ROC operations won't interfere with this one.
+ */
+ roc->on_channel = roc->chan == local->_oper_chandef.chan &&
+ local->_oper_chandef.width != NL80211_CHAN_WIDTH_5 &&
+ local->_oper_chandef.width != NL80211_CHAN_WIDTH_10;
- /* switch channel etc */
+ /* start this ROC */
ieee80211_recalc_idle(local);
- local->tmp_channel = roc->chan;
- ieee80211_hw_config(local, 0);
+ if (!roc->on_channel) {
+ ieee80211_offchannel_stop_vifs(local);
+
+ local->tmp_channel = roc->chan;
+ ieee80211_hw_config(local, 0);
+ }
/* tell userspace or send frame */
ieee80211_handle_roc_started(roc);
@@ -380,9 +392,10 @@ void ieee80211_sw_roc_work(struct work_struct *work)
finish:
list_del(&roc->list);
started = roc->started;
+ on_channel = roc->on_channel;
ieee80211_roc_notify_destroy(roc, !roc->abort);
- if (started) {
+ if (started && !on_channel) {
ieee80211_flush_queues(local, NULL);
local->tmp_channel = NULL;
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 26fd94fa0ae..1c1469c36dc 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -657,6 +657,17 @@ minstrel_free(void *priv)
kfree(priv);
}
+static u32 minstrel_get_expected_throughput(void *priv_sta)
+{
+ struct minstrel_sta_info *mi = priv_sta;
+ int idx = mi->max_tp_rate[0];
+
+ /* convert pkt per sec in kbps (1200 is the average pkt size used for
+ * computing cur_tp
+ */
+ return MINSTREL_TRUNC(mi->r[idx].cur_tp) * 1200 * 8 / 1024;
+}
+
const struct rate_control_ops mac80211_minstrel = {
.name = "minstrel",
.tx_status = minstrel_tx_status,
@@ -670,6 +681,7 @@ const struct rate_control_ops mac80211_minstrel = {
.add_sta_debugfs = minstrel_add_sta_debugfs,
.remove_sta_debugfs = minstrel_remove_sta_debugfs,
#endif
+ .get_expected_throughput = minstrel_get_expected_throughput,
};
int __init
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index bccaf854a30..85c1e74b771 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -22,7 +22,7 @@
#define MCS_NBITS (AVG_PKT_SIZE << 3)
/* Number of symbols for a packet with (bps) bits per symbol */
-#define MCS_NSYMS(bps) ((MCS_NBITS + (bps) - 1) / (bps))
+#define MCS_NSYMS(bps) DIV_ROUND_UP(MCS_NBITS, (bps))
/* Transmission time (nanoseconds) for a packet containing (syms) symbols */
#define MCS_SYMBOL_TIME(sgi, syms) \
@@ -226,8 +226,9 @@ minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate)
nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len);
nsecs += minstrel_mcs_groups[group].duration[rate];
- tp = 1000000 * ((prob * 1000) / nsecs);
+ /* prob is scaled - see MINSTREL_FRAC above */
+ tp = 1000000 * ((prob * 1000) / nsecs);
mr->cur_tp = MINSTREL_TRUNC(tp);
}
@@ -1031,6 +1032,22 @@ minstrel_ht_free(void *priv)
mac80211_minstrel.free(priv);
}
+static u32 minstrel_ht_get_expected_throughput(void *priv_sta)
+{
+ struct minstrel_ht_sta_priv *msp = priv_sta;
+ struct minstrel_ht_sta *mi = &msp->ht;
+ int i, j;
+
+ if (!msp->is_ht)
+ return mac80211_minstrel.get_expected_throughput(priv_sta);
+
+ i = mi->max_tp_rate / MCS_GROUP_RATES;
+ j = mi->max_tp_rate % MCS_GROUP_RATES;
+
+ /* convert cur_tp from pkt per second in kbps */
+ return mi->groups[i].rates[j].cur_tp * AVG_PKT_SIZE * 8 / 1024;
+}
+
static const struct rate_control_ops mac80211_minstrel_ht = {
.name = "minstrel_ht",
.tx_status = minstrel_ht_tx_status,
@@ -1045,6 +1062,7 @@ static const struct rate_control_ops mac80211_minstrel_ht = {
.add_sta_debugfs = minstrel_ht_add_sta_debugfs,
.remove_sta_debugfs = minstrel_ht_remove_sta_debugfs,
#endif
+ .get_expected_throughput = minstrel_ht_get_expected_throughput,
};
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 28185c8dc19..f40661eb75b 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -1076,12 +1076,8 @@ void ieee80211_sched_scan_results(struct ieee80211_hw *hw)
}
EXPORT_SYMBOL(ieee80211_sched_scan_results);
-void ieee80211_sched_scan_stopped_work(struct work_struct *work)
+void ieee80211_sched_scan_end(struct ieee80211_local *local)
{
- struct ieee80211_local *local =
- container_of(work, struct ieee80211_local,
- sched_scan_stopped_work);
-
mutex_lock(&local->mtx);
if (!rcu_access_pointer(local->sched_scan_sdata)) {
@@ -1099,6 +1095,15 @@ void ieee80211_sched_scan_stopped_work(struct work_struct *work)
cfg80211_sched_scan_stopped(local->hw.wiphy);
}
+void ieee80211_sched_scan_stopped_work(struct work_struct *work)
+{
+ struct ieee80211_local *local =
+ container_of(work, struct ieee80211_local,
+ sched_scan_stopped_work);
+
+ ieee80211_sched_scan_end(local);
+}
+
void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw)
{
struct ieee80211_local *local = hw_to_local(hw);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 632d372bb51..a9b46d8ea22 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -240,6 +240,7 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
sta_dbg(sta->sdata, "Destroyed STA %pM\n", sta->sta.addr);
+ kfree(rcu_dereference_raw(sta->sta.rates));
kfree(sta);
}
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 60cb7a66597..ba29ebc8614 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -541,6 +541,23 @@ static void ieee80211_tx_latency_end_msrmnt(struct ieee80211_local *local,
*/
#define STA_LOST_PKT_THRESHOLD 50
+static void ieee80211_lost_packet(struct sta_info *sta, struct sk_buff *skb)
+{
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+
+ /* This packet was aggregated but doesn't carry status info */
+ if ((info->flags & IEEE80211_TX_CTL_AMPDU) &&
+ !(info->flags & IEEE80211_TX_STAT_AMPDU))
+ return;
+
+ if (++sta->lost_packets < STA_LOST_PKT_THRESHOLD)
+ return;
+
+ cfg80211_cqm_pktloss_notify(sta->sdata->dev, sta->sta.addr,
+ sta->lost_packets, GFP_ATOMIC);
+ sta->lost_packets = 0;
+}
+
void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
{
struct sk_buff *skb2;
@@ -680,12 +697,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
if (info->flags & IEEE80211_TX_STAT_ACK) {
if (sta->lost_packets)
sta->lost_packets = 0;
- } else if (++sta->lost_packets >= STA_LOST_PKT_THRESHOLD) {
- cfg80211_cqm_pktloss_notify(sta->sdata->dev,
- sta->sta.addr,
- sta->lost_packets,
- GFP_ATOMIC);
- sta->lost_packets = 0;
+ } else {
+ ieee80211_lost_packet(sta, skb);
}
}
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
new file mode 100644
index 00000000000..652813b2d3d
--- /dev/null
+++ b/net/mac80211/tdls.c
@@ -0,0 +1,325 @@
+/*
+ * mac80211 TDLS handling code
+ *
+ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2014, Intel Corporation
+ *
+ * This file is GPLv2 as found in COPYING.
+ */
+
+#include <linux/ieee80211.h>
+#include "ieee80211_i.h"
+
+static void ieee80211_tdls_add_ext_capab(struct sk_buff *skb)
+{
+ u8 *pos = (void *)skb_put(skb, 7);
+
+ *pos++ = WLAN_EID_EXT_CAPABILITY;
+ *pos++ = 5; /* len */
+ *pos++ = 0x0;
+ *pos++ = 0x0;
+ *pos++ = 0x0;
+ *pos++ = 0x0;
+ *pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED;
+}
+
+static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ u16 capab;
+
+ capab = 0;
+ if (ieee80211_get_sdata_band(sdata) != IEEE80211_BAND_2GHZ)
+ return capab;
+
+ if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE))
+ capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME;
+ if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE))
+ capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
+
+ return capab;
+}
+
+static void ieee80211_tdls_add_link_ie(struct sk_buff *skb, const u8 *src_addr,
+ const u8 *peer, const u8 *bssid)
+{
+ struct ieee80211_tdls_lnkie *lnkid;
+
+ lnkid = (void *)skb_put(skb, sizeof(struct ieee80211_tdls_lnkie));
+
+ lnkid->ie_type = WLAN_EID_LINK_ID;
+ lnkid->ie_len = sizeof(struct ieee80211_tdls_lnkie) - 2;
+
+ memcpy(lnkid->bssid, bssid, ETH_ALEN);
+ memcpy(lnkid->init_sta, src_addr, ETH_ALEN);
+ memcpy(lnkid->resp_sta, peer, ETH_ALEN);
+}
+
+static int
+ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *peer, u8 action_code, u8 dialog_token,
+ u16 status_code, struct sk_buff *skb)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
+ struct ieee80211_tdls_data *tf;
+
+ tf = (void *)skb_put(skb, offsetof(struct ieee80211_tdls_data, u));
+
+ memcpy(tf->da, peer, ETH_ALEN);
+ memcpy(tf->sa, sdata->vif.addr, ETH_ALEN);
+ tf->ether_type = cpu_to_be16(ETH_P_TDLS);
+ tf->payload_type = WLAN_TDLS_SNAP_RFTYPE;
+
+ switch (action_code) {
+ case WLAN_TDLS_SETUP_REQUEST:
+ tf->category = WLAN_CATEGORY_TDLS;
+ tf->action_code = WLAN_TDLS_SETUP_REQUEST;
+
+ skb_put(skb, sizeof(tf->u.setup_req));
+ tf->u.setup_req.dialog_token = dialog_token;
+ tf->u.setup_req.capability =
+ cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
+
+ ieee80211_add_srates_ie(sdata, skb, false, band);
+ ieee80211_add_ext_srates_ie(sdata, skb, false, band);
+ ieee80211_tdls_add_ext_capab(skb);
+ break;
+ case WLAN_TDLS_SETUP_RESPONSE:
+ tf->category = WLAN_CATEGORY_TDLS;
+ tf->action_code = WLAN_TDLS_SETUP_RESPONSE;
+
+ skb_put(skb, sizeof(tf->u.setup_resp));
+ tf->u.setup_resp.status_code = cpu_to_le16(status_code);
+ tf->u.setup_resp.dialog_token = dialog_token;
+ tf->u.setup_resp.capability =
+ cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
+
+ ieee80211_add_srates_ie(sdata, skb, false, band);
+ ieee80211_add_ext_srates_ie(sdata, skb, false, band);
+ ieee80211_tdls_add_ext_capab(skb);
+ break;
+ case WLAN_TDLS_SETUP_CONFIRM:
+ tf->category = WLAN_CATEGORY_TDLS;
+ tf->action_code = WLAN_TDLS_SETUP_CONFIRM;
+
+ skb_put(skb, sizeof(tf->u.setup_cfm));
+ tf->u.setup_cfm.status_code = cpu_to_le16(status_code);
+ tf->u.setup_cfm.dialog_token = dialog_token;
+ break;
+ case WLAN_TDLS_TEARDOWN:
+ tf->category = WLAN_CATEGORY_TDLS;
+ tf->action_code = WLAN_TDLS_TEARDOWN;
+
+ skb_put(skb, sizeof(tf->u.teardown));
+ tf->u.teardown.reason_code = cpu_to_le16(status_code);
+ break;
+ case WLAN_TDLS_DISCOVERY_REQUEST:
+ tf->category = WLAN_CATEGORY_TDLS;
+ tf->action_code = WLAN_TDLS_DISCOVERY_REQUEST;
+
+ skb_put(skb, sizeof(tf->u.discover_req));
+ tf->u.discover_req.dialog_token = dialog_token;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *peer, u8 action_code, u8 dialog_token,
+ u16 status_code, struct sk_buff *skb)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
+ struct ieee80211_mgmt *mgmt;
+
+ mgmt = (void *)skb_put(skb, 24);
+ memset(mgmt, 0, 24);
+ memcpy(mgmt->da, peer, ETH_ALEN);
+ memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+ memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN);
+
+ mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+ IEEE80211_STYPE_ACTION);
+
+ switch (action_code) {
+ case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
+ skb_put(skb, 1 + sizeof(mgmt->u.action.u.tdls_discover_resp));
+ mgmt->u.action.category = WLAN_CATEGORY_PUBLIC;
+ mgmt->u.action.u.tdls_discover_resp.action_code =
+ WLAN_PUB_ACTION_TDLS_DISCOVER_RES;
+ mgmt->u.action.u.tdls_discover_resp.dialog_token =
+ dialog_token;
+ mgmt->u.action.u.tdls_discover_resp.capability =
+ cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
+
+ ieee80211_add_srates_ie(sdata, skb, false, band);
+ ieee80211_add_ext_srates_ie(sdata, skb, false, band);
+ ieee80211_tdls_add_ext_capab(skb);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *peer, u8 action_code, u8 dialog_token,
+ u16 status_code, u32 peer_capability,
+ const u8 *extra_ies, size_t extra_ies_len)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = sdata->local;
+ struct sk_buff *skb = NULL;
+ bool send_direct;
+ int ret;
+
+ if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
+ return -ENOTSUPP;
+
+ /* make sure we are in managed mode, and associated */
+ if (sdata->vif.type != NL80211_IFTYPE_STATION ||
+ !sdata->u.mgd.associated)
+ return -EINVAL;
+
+ tdls_dbg(sdata, "TDLS mgmt action %d peer %pM\n",
+ action_code, peer);
+
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom +
+ max(sizeof(struct ieee80211_mgmt),
+ sizeof(struct ieee80211_tdls_data)) +
+ 50 + /* supported rates */
+ 7 + /* ext capab */
+ extra_ies_len +
+ sizeof(struct ieee80211_tdls_lnkie));
+ if (!skb)
+ return -ENOMEM;
+
+ skb_reserve(skb, local->hw.extra_tx_headroom);
+
+ switch (action_code) {
+ case WLAN_TDLS_SETUP_REQUEST:
+ case WLAN_TDLS_SETUP_RESPONSE:
+ case WLAN_TDLS_SETUP_CONFIRM:
+ case WLAN_TDLS_TEARDOWN:
+ case WLAN_TDLS_DISCOVERY_REQUEST:
+ ret = ieee80211_prep_tdls_encap_data(wiphy, dev, peer,
+ action_code, dialog_token,
+ status_code, skb);
+ send_direct = false;
+ break;
+ case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
+ ret = ieee80211_prep_tdls_direct(wiphy, dev, peer, action_code,
+ dialog_token, status_code,
+ skb);
+ send_direct = true;
+ break;
+ default:
+ ret = -ENOTSUPP;
+ break;
+ }
+
+ if (ret < 0)
+ goto fail;
+
+ if (extra_ies_len)
+ memcpy(skb_put(skb, extra_ies_len), extra_ies, extra_ies_len);
+
+ /* the TDLS link IE is always added last */
+ switch (action_code) {
+ case WLAN_TDLS_SETUP_REQUEST:
+ case WLAN_TDLS_SETUP_CONFIRM:
+ case WLAN_TDLS_TEARDOWN:
+ case WLAN_TDLS_DISCOVERY_REQUEST:
+ /* we are the initiator */
+ ieee80211_tdls_add_link_ie(skb, sdata->vif.addr, peer,
+ sdata->u.mgd.bssid);
+ break;
+ case WLAN_TDLS_SETUP_RESPONSE:
+ case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
+ /* we are the responder */
+ ieee80211_tdls_add_link_ie(skb, peer, sdata->vif.addr,
+ sdata->u.mgd.bssid);
+ break;
+ default:
+ ret = -ENOTSUPP;
+ goto fail;
+ }
+
+ if (send_direct) {
+ ieee80211_tx_skb(sdata, skb);
+ return 0;
+ }
+
+ /*
+ * According to 802.11z: Setup req/resp are sent in AC_BK, otherwise
+ * we should default to AC_VI.
+ */
+ switch (action_code) {
+ case WLAN_TDLS_SETUP_REQUEST:
+ case WLAN_TDLS_SETUP_RESPONSE:
+ skb_set_queue_mapping(skb, IEEE80211_AC_BK);
+ skb->priority = 2;
+ break;
+ default:
+ skb_set_queue_mapping(skb, IEEE80211_AC_VI);
+ skb->priority = 5;
+ break;
+ }
+
+ /* disable bottom halves when entering the Tx path */
+ local_bh_disable();
+ ret = ieee80211_subif_start_xmit(skb, dev);
+ local_bh_enable();
+
+ return ret;
+
+fail:
+ dev_kfree_skb(skb);
+ return ret;
+}
+
+int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *peer, enum nl80211_tdls_operation oper)
+{
+ struct sta_info *sta;
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+ if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
+ return -ENOTSUPP;
+
+ if (sdata->vif.type != NL80211_IFTYPE_STATION)
+ return -EINVAL;
+
+ tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer);
+
+ switch (oper) {
+ case NL80211_TDLS_ENABLE_LINK:
+ rcu_read_lock();
+ sta = sta_info_get(sdata, peer);
+ if (!sta) {
+ rcu_read_unlock();
+ return -ENOLINK;
+ }
+
+ set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH);
+ rcu_read_unlock();
+ break;
+ case NL80211_TDLS_DISABLE_LINK:
+ return sta_info_destroy_addr(sdata, peer);
+ case NL80211_TDLS_TEARDOWN:
+ case NL80211_TDLS_SETUP:
+ case NL80211_TDLS_DISCOVERY_REQ:
+ /* We don't support in-driver setup/teardown/discovery */
+ return -ENOTSUPP;
+ default:
+ return -ENOTSUPP;
+ }
+
+ return 0;
+}
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index a0b0aea7652..cfe1a0688b5 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -21,10 +21,10 @@
#define VIF_ENTRY __field(enum nl80211_iftype, vif_type) __field(void *, sdata) \
__field(bool, p2p) \
- __string(vif_name, sdata->dev ? sdata->dev->name : "<nodev>")
+ __string(vif_name, sdata->name)
#define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \
__entry->p2p = sdata->vif.p2p; \
- __assign_str(vif_name, sdata->dev ? sdata->dev->name : sdata->name)
+ __assign_str(vif_name, sdata->name)
#define VIF_PR_FMT " vif:%s(%d%s)"
#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : ""
@@ -184,6 +184,20 @@ TRACE_EVENT(drv_return_bool,
"true" : "false")
);
+TRACE_EVENT(drv_return_u32,
+ TP_PROTO(struct ieee80211_local *local, u32 ret),
+ TP_ARGS(local, ret),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ __field(u32, ret)
+ ),
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ __entry->ret = ret;
+ ),
+ TP_printk(LOCAL_PR_FMT " - %u", LOCAL_PR_ARG, __entry->ret)
+);
+
TRACE_EVENT(drv_return_u64,
TP_PROTO(struct ieee80211_local *local, u64 ret),
TP_ARGS(local, ret),
@@ -1375,6 +1389,91 @@ TRACE_EVENT(drv_change_chanctx,
)
);
+#if !defined(__TRACE_VIF_ENTRY)
+#define __TRACE_VIF_ENTRY
+struct trace_vif_entry {
+ enum nl80211_iftype vif_type;
+ bool p2p;
+ char vif_name[IFNAMSIZ];
+} __packed;
+
+struct trace_chandef_entry {
+ u32 control_freq;
+ u32 chan_width;
+ u32 center_freq1;
+ u32 center_freq2;
+} __packed;
+
+struct trace_switch_entry {
+ struct trace_vif_entry vif;
+ struct trace_chandef_entry old_chandef;
+ struct trace_chandef_entry new_chandef;
+} __packed;
+
+#define SWITCH_ENTRY_ASSIGN(to, from) local_vifs[i].to = vifs[i].from
+#endif
+
+TRACE_EVENT(drv_switch_vif_chanctx,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_vif_chanctx_switch *vifs,
+ int n_vifs, enum ieee80211_chanctx_switch_mode mode),
+ TP_ARGS(local, vifs, n_vifs, mode),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ __field(int, n_vifs)
+ __field(u32, mode)
+ __dynamic_array(u8, vifs,
+ sizeof(struct trace_switch_entry) * n_vifs)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ __entry->n_vifs = n_vifs;
+ __entry->mode = mode;
+ {
+ struct trace_switch_entry *local_vifs =
+ __get_dynamic_array(vifs);
+ int i;
+
+ for (i = 0; i < n_vifs; i++) {
+ struct ieee80211_sub_if_data *sdata;
+
+ sdata = container_of(vifs[i].vif,
+ struct ieee80211_sub_if_data,
+ vif);
+
+ SWITCH_ENTRY_ASSIGN(vif.vif_type, vif->type);
+ SWITCH_ENTRY_ASSIGN(vif.p2p, vif->p2p);
+ strncpy(local_vifs[i].vif.vif_name,
+ sdata->name,
+ sizeof(local_vifs[i].vif.vif_name));
+ SWITCH_ENTRY_ASSIGN(old_chandef.control_freq,
+ old_ctx->def.chan->center_freq);
+ SWITCH_ENTRY_ASSIGN(old_chandef.chan_width,
+ old_ctx->def.width);
+ SWITCH_ENTRY_ASSIGN(old_chandef.center_freq1,
+ old_ctx->def.center_freq1);
+ SWITCH_ENTRY_ASSIGN(old_chandef.center_freq2,
+ old_ctx->def.center_freq2);
+ SWITCH_ENTRY_ASSIGN(new_chandef.control_freq,
+ new_ctx->def.chan->center_freq);
+ SWITCH_ENTRY_ASSIGN(new_chandef.chan_width,
+ new_ctx->def.width);
+ SWITCH_ENTRY_ASSIGN(new_chandef.center_freq1,
+ new_ctx->def.center_freq1);
+ SWITCH_ENTRY_ASSIGN(new_chandef.center_freq2,
+ new_ctx->def.center_freq2);
+ }
+ }
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT " n_vifs:%d mode:%d",
+ LOCAL_PR_ARG, __entry->n_vifs, __entry->mode
+ )
+);
+
DECLARE_EVENT_CLASS(local_sdata_chanctx,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
@@ -1499,6 +1598,24 @@ DEFINE_EVENT(local_sdata_evt, drv_leave_ibss,
TP_ARGS(local, sdata)
);
+TRACE_EVENT(drv_get_expected_throughput,
+ TP_PROTO(struct ieee80211_sta *sta),
+
+ TP_ARGS(sta),
+
+ TP_STRUCT__entry(
+ STA_ENTRY
+ ),
+
+ TP_fast_assign(
+ STA_ASSIGN;
+ ),
+
+ TP_printk(
+ STA_PR_FMT, STA_PR_ARG
+ )
+);
+
/*
* Tracing for API calls that drivers call.
*/
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 19d36d4117e..1a252c606ad 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -414,6 +414,9 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
if (ieee80211_has_order(hdr->frame_control))
return TX_CONTINUE;
+ if (ieee80211_is_probe_req(hdr->frame_control))
+ return TX_CONTINUE;
+
if (tx->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)
info->hw_queue = tx->sdata->vif.cab_queue;
@@ -463,6 +466,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
{
struct sta_info *sta = tx->sta;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
struct ieee80211_local *local = tx->local;
if (unlikely(!sta))
@@ -473,6 +477,12 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
!(info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER))) {
int ac = skb_get_queue_mapping(tx->skb);
+ if (ieee80211_is_mgmt(hdr->frame_control) &&
+ !ieee80211_is_bufferable_mmpdu(hdr->frame_control)) {
+ info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER;
+ return TX_CONTINUE;
+ }
+
ps_dbg(sta->sdata, "STA %pM aid %d: PS buffer for AC %d\n",
sta->sta.addr, sta->sta.aid, ac);
if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER)
@@ -531,19 +541,9 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
static ieee80211_tx_result debug_noinline
ieee80211_tx_h_ps_buf(struct ieee80211_tx_data *tx)
{
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
-
if (unlikely(tx->flags & IEEE80211_TX_PS_BUFFERED))
return TX_CONTINUE;
- if (ieee80211_is_mgmt(hdr->frame_control) &&
- !ieee80211_is_bufferable_mmpdu(hdr->frame_control)) {
- if (tx->flags & IEEE80211_TX_UNICAST)
- info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER;
- return TX_CONTINUE;
- }
-
if (tx->flags & IEEE80211_TX_UNICAST)
return ieee80211_tx_h_unicast_ps_buf(tx);
else
@@ -2328,7 +2328,8 @@ void ieee80211_tx_pending(unsigned long data)
/* functions for drivers to get certain frames */
static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
- struct ps_data *ps, struct sk_buff *skb)
+ struct ps_data *ps, struct sk_buff *skb,
+ bool is_template)
{
u8 *pos, *tim;
int aid0 = 0;
@@ -2341,11 +2342,12 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
* checking byte-for-byte */
have_bits = !bitmap_empty((unsigned long *)ps->tim,
IEEE80211_MAX_AID+1);
-
- if (ps->dtim_count == 0)
- ps->dtim_count = sdata->vif.bss_conf.dtim_period - 1;
- else
- ps->dtim_count--;
+ if (!is_template) {
+ if (ps->dtim_count == 0)
+ ps->dtim_count = sdata->vif.bss_conf.dtim_period - 1;
+ else
+ ps->dtim_count--;
+ }
tim = pos = (u8 *) skb_put(skb, 6);
*pos++ = WLAN_EID_TIM;
@@ -2391,7 +2393,8 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
}
static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
- struct ps_data *ps, struct sk_buff *skb)
+ struct ps_data *ps, struct sk_buff *skb,
+ bool is_template)
{
struct ieee80211_local *local = sdata->local;
@@ -2403,24 +2406,24 @@ static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
* of the tim bitmap in mac80211 and the driver.
*/
if (local->tim_in_locked_section) {
- __ieee80211_beacon_add_tim(sdata, ps, skb);
+ __ieee80211_beacon_add_tim(sdata, ps, skb, is_template);
} else {
spin_lock_bh(&local->tim_lock);
- __ieee80211_beacon_add_tim(sdata, ps, skb);
+ __ieee80211_beacon_add_tim(sdata, ps, skb, is_template);
spin_unlock_bh(&local->tim_lock);
}
return 0;
}
-static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata,
- struct beacon_data *beacon)
+static void ieee80211_set_csa(struct ieee80211_sub_if_data *sdata,
+ struct beacon_data *beacon)
{
struct probe_resp *resp;
- int counter_offset_beacon = sdata->csa_counter_offset_beacon;
- int counter_offset_presp = sdata->csa_counter_offset_presp;
u8 *beacon_data;
size_t beacon_data_len;
+ int i;
+ u8 count = sdata->csa_current_counter;
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
@@ -2438,40 +2441,57 @@ static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata,
default:
return;
}
- if (WARN_ON(counter_offset_beacon >= beacon_data_len))
- return;
- /* Warn if the driver did not check for/react to csa
- * completeness. A beacon with CSA counter set to 0 should
- * never occur, because a counter of 1 means switch just
- * before the next beacon.
- */
- if (WARN_ON(beacon_data[counter_offset_beacon] == 1))
- return;
+ for (i = 0; i < IEEE80211_MAX_CSA_COUNTERS_NUM; ++i) {
+ u16 counter_offset_beacon =
+ sdata->csa_counter_offset_beacon[i];
+ u16 counter_offset_presp = sdata->csa_counter_offset_presp[i];
- beacon_data[counter_offset_beacon]--;
+ if (counter_offset_beacon) {
+ if (WARN_ON(counter_offset_beacon >= beacon_data_len))
+ return;
- if (sdata->vif.type == NL80211_IFTYPE_AP && counter_offset_presp) {
- rcu_read_lock();
- resp = rcu_dereference(sdata->u.ap.probe_resp);
+ beacon_data[counter_offset_beacon] = count;
+ }
- /* if nl80211 accepted the offset, this should not happen. */
- if (WARN_ON(!resp)) {
+ if (sdata->vif.type == NL80211_IFTYPE_AP &&
+ counter_offset_presp) {
+ rcu_read_lock();
+ resp = rcu_dereference(sdata->u.ap.probe_resp);
+
+ /* If nl80211 accepted the offset, this should
+ * not happen.
+ */
+ if (WARN_ON(!resp)) {
+ rcu_read_unlock();
+ return;
+ }
+ resp->data[counter_offset_presp] = count;
rcu_read_unlock();
- return;
}
- resp->data[counter_offset_presp]--;
- rcu_read_unlock();
}
}
+u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+ sdata->csa_current_counter--;
+
+ /* the counter should never reach 0 */
+ WARN_ON(!sdata->csa_current_counter);
+
+ return sdata->csa_current_counter;
+}
+EXPORT_SYMBOL(ieee80211_csa_update_counter);
+
bool ieee80211_csa_is_complete(struct ieee80211_vif *vif)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
struct beacon_data *beacon = NULL;
u8 *beacon_data;
size_t beacon_data_len;
- int counter_beacon = sdata->csa_counter_offset_beacon;
+ int counter_beacon = sdata->csa_counter_offset_beacon[0];
int ret = false;
if (!ieee80211_sdata_running(sdata))
@@ -2521,9 +2541,11 @@ bool ieee80211_csa_is_complete(struct ieee80211_vif *vif)
}
EXPORT_SYMBOL(ieee80211_csa_is_complete);
-struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif,
- u16 *tim_offset, u16 *tim_length)
+static struct sk_buff *
+__ieee80211_beacon_get(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_mutable_offsets *offs,
+ bool is_template)
{
struct ieee80211_local *local = hw_to_local(hw);
struct sk_buff *skb = NULL;
@@ -2532,6 +2554,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
enum ieee80211_band band;
struct ieee80211_tx_rate_control txrc;
struct ieee80211_chanctx_conf *chanctx_conf;
+ int csa_off_base = 0;
rcu_read_lock();
@@ -2541,18 +2564,20 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
if (!ieee80211_sdata_running(sdata) || !chanctx_conf)
goto out;
- if (tim_offset)
- *tim_offset = 0;
- if (tim_length)
- *tim_length = 0;
+ if (offs)
+ memset(offs, 0, sizeof(*offs));
if (sdata->vif.type == NL80211_IFTYPE_AP) {
struct ieee80211_if_ap *ap = &sdata->u.ap;
struct beacon_data *beacon = rcu_dereference(ap->beacon);
if (beacon) {
- if (sdata->vif.csa_active)
- ieee80211_update_csa(sdata, beacon);
+ if (sdata->vif.csa_active) {
+ if (!is_template)
+ ieee80211_csa_update_counter(vif);
+
+ ieee80211_set_csa(sdata, beacon);
+ }
/*
* headroom, head length,
@@ -2569,12 +2594,16 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
memcpy(skb_put(skb, beacon->head_len), beacon->head,
beacon->head_len);
- ieee80211_beacon_add_tim(sdata, &ap->ps, skb);
+ ieee80211_beacon_add_tim(sdata, &ap->ps, skb,
+ is_template);
- if (tim_offset)
- *tim_offset = beacon->head_len;
- if (tim_length)
- *tim_length = skb->len - beacon->head_len;
+ if (offs) {
+ offs->tim_offset = beacon->head_len;
+ offs->tim_length = skb->len - beacon->head_len;
+
+ /* for AP the csa offsets are from tail */
+ csa_off_base = skb->len;
+ }
if (beacon->tail)
memcpy(skb_put(skb, beacon->tail_len),
@@ -2589,9 +2618,12 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
if (!presp)
goto out;
- if (sdata->vif.csa_active)
- ieee80211_update_csa(sdata, presp);
+ if (sdata->vif.csa_active) {
+ if (!is_template)
+ ieee80211_csa_update_counter(vif);
+ ieee80211_set_csa(sdata, presp);
+ }
skb = dev_alloc_skb(local->tx_headroom + presp->head_len +
local->hw.extra_beacon_tailroom);
@@ -2611,8 +2643,17 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
if (!bcn)
goto out;
- if (sdata->vif.csa_active)
- ieee80211_update_csa(sdata, bcn);
+ if (sdata->vif.csa_active) {
+ if (!is_template)
+ /* TODO: For mesh csa_counter is in TU, so
+ * decrementing it by one isn't correct, but
+ * for now we leave it consistent with overall
+ * mac80211's behavior.
+ */
+ ieee80211_csa_update_counter(vif);
+
+ ieee80211_set_csa(sdata, bcn);
+ }
if (ifmsh->sync_ops)
ifmsh->sync_ops->adjust_tbtt(sdata, bcn);
@@ -2626,13 +2667,33 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
goto out;
skb_reserve(skb, local->tx_headroom);
memcpy(skb_put(skb, bcn->head_len), bcn->head, bcn->head_len);
- ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb);
+ ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb, is_template);
+
+ if (offs) {
+ offs->tim_offset = bcn->head_len;
+ offs->tim_length = skb->len - bcn->head_len;
+ }
+
memcpy(skb_put(skb, bcn->tail_len), bcn->tail, bcn->tail_len);
} else {
WARN_ON(1);
goto out;
}
+ /* CSA offsets */
+ if (offs) {
+ int i;
+
+ for (i = 0; i < IEEE80211_MAX_CSA_COUNTERS_NUM; i++) {
+ u16 csa_off = sdata->csa_counter_offset_beacon[i];
+
+ if (!csa_off)
+ continue;
+
+ offs->csa_counter_offs[i] = csa_off_base + csa_off;
+ }
+ }
+
band = chanctx_conf->def.chan->band;
info = IEEE80211_SKB_CB(skb);
@@ -2663,6 +2724,32 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
out:
rcu_read_unlock();
return skb;
+
+}
+
+struct sk_buff *
+ieee80211_beacon_get_template(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_mutable_offsets *offs)
+{
+ return __ieee80211_beacon_get(hw, vif, offs, true);
+}
+EXPORT_SYMBOL(ieee80211_beacon_get_template);
+
+struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ u16 *tim_offset, u16 *tim_length)
+{
+ struct ieee80211_mutable_offsets offs = {};
+ struct sk_buff *bcn = __ieee80211_beacon_get(hw, vif, &offs, false);
+
+ if (tim_offset)
+ *tim_offset = offs.tim_offset;
+
+ if (tim_length)
+ *tim_length = offs.tim_length;
+
+ return bcn;
}
EXPORT_SYMBOL(ieee80211_beacon_get_tim);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index c08bd4aca6b..a6cda52ed92 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1096,11 +1096,12 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
int err;
/* 24 + 6 = header + auth_algo + auth_transaction + status_code */
- skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24 + 6 + extra_len);
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom + IEEE80211_WEP_IV_LEN +
+ 24 + 6 + extra_len + IEEE80211_WEP_ICV_LEN);
if (!skb)
return;
- skb_reserve(skb, local->hw.extra_tx_headroom);
+ skb_reserve(skb, local->hw.extra_tx_headroom + IEEE80211_WEP_IV_LEN);
mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6);
memset(mgmt, 0, 24 + 6);
@@ -1457,6 +1458,44 @@ void ieee80211_stop_device(struct ieee80211_local *local)
drv_stop(local);
}
+static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local)
+{
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_chanctx *ctx;
+
+ /*
+ * We get here if during resume the device can't be restarted properly.
+ * We might also get here if this happens during HW reset, which is a
+ * slightly different situation and we need to drop all connections in
+ * the latter case.
+ *
+ * Ask cfg80211 to turn off all interfaces, this will result in more
+ * warnings but at least we'll then get into a clean stopped state.
+ */
+
+ local->resuming = false;
+ local->suspended = false;
+ local->started = false;
+
+ /* scheduled scan clearly can't be running any more, but tell
+ * cfg80211 and clear local state
+ */
+ ieee80211_sched_scan_end(local);
+
+ list_for_each_entry(sdata, &local->interfaces, list)
+ sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER;
+
+ /* Mark channel contexts as not being in the driver any more to avoid
+ * removing them from the driver during the shutdown process...
+ */
+ mutex_lock(&local->chanctx_mtx);
+ list_for_each_entry(ctx, &local->chanctx_list, list)
+ ctx->driver_present = false;
+ mutex_unlock(&local->chanctx_mtx);
+
+ cfg80211_shutdown_all_interfaces(local->hw.wiphy);
+}
+
static void ieee80211_assign_chanctx(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
@@ -1520,9 +1559,11 @@ int ieee80211_reconfig(struct ieee80211_local *local)
*/
res = drv_start(local);
if (res) {
- WARN(local->suspended, "Hardware became unavailable "
- "upon resume. This could be a software issue "
- "prior to suspend or a hardware issue.\n");
+ if (local->suspended)
+ WARN(1, "Hardware became unavailable upon resume. This could be a software issue prior to suspend or a hardware issue.\n");
+ else
+ WARN(1, "Hardware became unavailable during restart.\n");
+ ieee80211_handle_reconfig_failure(local);
return res;
}
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index e9e36a25616..9265adfdabf 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -129,9 +129,12 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
if (!vht_cap_ie || !sband->vht_cap.vht_supported)
return;
- /* A VHT STA must support 40 MHz */
- if (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40))
- return;
+ /*
+ * A VHT STA must support 40 MHz, but if we verify that here
+ * then we break a few things - some APs (e.g. Netgear R6300v2
+ * and others based on the BCM4360 chipset) will unset this
+ * capability bit when operating in 20 MHz.
+ */
vht_cap->vht_supported = true;
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index a83674edaaf..1456f73b02b 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -207,6 +207,8 @@ static bool llsec_key_id_equal(const struct ieee802154_llsec_key_id *a,
return false;
switch (a->mode) {
+ case IEEE802154_SCF_KEY_INDEX:
+ return true;
case IEEE802154_SCF_KEY_SHORT_INDEX:
return a->short_source == b->short_source;
case IEEE802154_SCF_KEY_HW_INDEX:
@@ -283,6 +285,7 @@ int mac802154_llsec_key_del(struct mac802154_llsec *sec,
mkey = container_of(pos->key, struct mac802154_llsec_key, key);
if (llsec_key_id_equal(&pos->id, key)) {
+ list_del_rcu(&pos->list);
llsec_key_put(mkey);
return 0;
}
@@ -773,10 +776,10 @@ int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb)
rc = llsec_do_encrypt(skb, sec, &hdr, key);
llsec_key_put(key);
- return rc < 0 ? rc : 0;
+ return rc;
fail_read:
- read_unlock(&sec->lock);
+ read_unlock_bh(&sec->lock);
fail:
rcu_read_unlock();
return rc;
diff --git a/net/mac802154/monitor.c b/net/mac802154/monitor.c
index 434a26f76a8..a68230e2b25 100644
--- a/net/mac802154/monitor.c
+++ b/net/mac802154/monitor.c
@@ -70,7 +70,8 @@ void mac802154_monitors_rx(struct mac802154_priv *priv, struct sk_buff *skb)
rcu_read_lock();
list_for_each_entry_rcu(sdata, &priv->slaves, list) {
- if (sdata->type != IEEE802154_DEV_MONITOR)
+ if (sdata->type != IEEE802154_DEV_MONITOR ||
+ !netif_running(sdata->dev))
continue;
skb2 = skb_clone(skb, GFP_ATOMIC);
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index 0597b96dc9b..7f820a108a9 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -64,20 +64,23 @@ mac802154_subif_rx(struct ieee802154_dev *hw, struct sk_buff *skb, u8 lqi)
if (skb->len < 2) {
pr_debug("got invalid frame\n");
- goto out;
+ goto fail;
}
crc = crc_ccitt(0, skb->data, skb->len);
if (crc) {
pr_debug("CRC mismatch\n");
- goto out;
+ goto fail;
}
skb_trim(skb, skb->len - 2); /* CRC */
}
mac802154_monitors_rx(priv, skb);
mac802154_wpans_rx(priv, skb);
-out:
- dev_kfree_skb(skb);
+
+ return;
+
+fail:
+ kfree_skb(skb);
}
static void mac802154_rx_worker(struct work_struct *work)
diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c
index 23bc91cf99c..3c3069fd697 100644
--- a/net/mac802154/wpan.c
+++ b/net/mac802154/wpan.c
@@ -472,6 +472,7 @@ mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb,
rc = mac802154_llsec_decrypt(&sdata->sec, skb);
if (rc) {
pr_debug("decryption failed: %i\n", rc);
+ kfree_skb(skb);
return NET_RX_DROP;
}
@@ -566,7 +567,6 @@ static int mac802154_parse_frame_start(struct sk_buff *skb,
void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb)
{
int ret;
- struct sk_buff *sskb;
struct mac802154_sub_if_data *sdata;
struct ieee802154_hdr hdr;
@@ -578,12 +578,16 @@ void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb)
rcu_read_lock();
list_for_each_entry_rcu(sdata, &priv->slaves, list) {
- if (sdata->type != IEEE802154_DEV_WPAN)
+ if (sdata->type != IEEE802154_DEV_WPAN ||
+ !netif_running(sdata->dev))
continue;
- sskb = skb_clone(skb, GFP_ATOMIC);
- if (sskb)
- mac802154_subif_frame(sdata, sskb, &hdr);
+ mac802154_subif_frame(sdata, skb, &hdr);
+ skb = NULL;
+ break;
}
rcu_read_unlock();
+
+ if (skb)
+ kfree_skb(skb);
}
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index 851cd880b0c..6b38d083e1c 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -33,6 +33,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
+ SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP |
SKB_GSO_MPLS)))
goto out;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 117208321f1..ec8114fae50 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -271,10 +271,7 @@ ip_set_free(void *members)
{
pr_debug("%p: free with %s\n", members,
is_vmalloc_addr(members) ? "vfree" : "kfree");
- if (is_vmalloc_addr(members))
- vfree(members);
- else
- kfree(members);
+ kvfree(members);
}
EXPORT_SYMBOL_GPL(ip_set_free);
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index a8eb0a89326..610e19c0e13 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -797,7 +797,6 @@ static void ip_vs_conn_expire(unsigned long data)
ip_vs_control_del(cp);
if (cp->flags & IP_VS_CONN_F_NFCT) {
- ip_vs_conn_drop_conntrack(cp);
/* Do not access conntracks during subsys cleanup
* because nf_conntrack_find_get can not be used after
* conntrack cleanup for the net.
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 4f26ee46b51..e6836755c45 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -97,7 +97,7 @@ const char *ip_vs_proto_name(unsigned int proto)
return "ICMPv6";
#endif
default:
- sprintf(buf, "IP_%d", proto);
+ sprintf(buf, "IP_%u", proto);
return buf;
}
}
@@ -1392,15 +1392,19 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
if (ipip) {
__be32 info = ic->un.gateway;
+ __u8 type = ic->type;
+ __u8 code = ic->code;
/* Update the MTU */
if (ic->type == ICMP_DEST_UNREACH &&
ic->code == ICMP_FRAG_NEEDED) {
struct ip_vs_dest *dest = cp->dest;
u32 mtu = ntohs(ic->un.frag.mtu);
+ __be16 frag_off = cih->frag_off;
/* Strip outer IP and ICMP, go to IPIP header */
- __skb_pull(skb, ihl + sizeof(_icmph));
+ if (pskb_pull(skb, ihl + sizeof(_icmph)) == NULL)
+ goto ignore_ipip;
offset2 -= ihl + sizeof(_icmph);
skb_reset_network_header(skb);
IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n",
@@ -1408,7 +1412,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
ipv4_update_pmtu(skb, dev_net(skb->dev),
mtu, 0, 0, 0, 0);
/* Client uses PMTUD? */
- if (!(cih->frag_off & htons(IP_DF)))
+ if (!(frag_off & htons(IP_DF)))
goto ignore_ipip;
/* Prefer the resulting PMTU */
if (dest) {
@@ -1427,12 +1431,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
/* Strip outer IP, ICMP and IPIP, go to IP header of
* original request.
*/
- __skb_pull(skb, offset2);
+ if (pskb_pull(skb, offset2) == NULL)
+ goto ignore_ipip;
skb_reset_network_header(skb);
IP_VS_DBG(12, "Sending ICMP for %pI4->%pI4: t=%u, c=%u, i=%u\n",
&ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
- ic->type, ic->code, ntohl(info));
- icmp_send(skb, ic->type, ic->code, info);
+ type, code, ntohl(info));
+ icmp_send(skb, type, code, info);
/* ICMP can be shorter but anyways, account it */
ip_vs_out_stats(cp, skb);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c42e83d2751..581a6584ed0 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3778,6 +3778,7 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
cancel_delayed_work_sync(&ipvs->defense_work);
cancel_work_sync(&ipvs->defense_work.work);
unregister_net_sysctl_table(ipvs->sysctl_hdr);
+ ip_vs_stop_estimator(net, &ipvs->tot_stats);
}
#else
@@ -3840,7 +3841,6 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net)
struct netns_ipvs *ipvs = net_ipvs(net);
ip_vs_trash_cleanup(net);
- ip_vs_stop_estimator(net, &ipvs->tot_stats);
ip_vs_control_net_cleanup_sysctl(net);
remove_proc_entry("ip_vs_stats_percpu", net->proc_net);
remove_proc_entry("ip_vs_stats", net->proc_net);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 487b55e0433..73ba1cc7a88 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -883,7 +883,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
iph->daddr = cp->daddr.ip;
iph->saddr = saddr;
iph->ttl = old_iph->ttl;
- ip_select_ident(skb, &rt->dst, NULL);
+ ip_select_ident(skb, NULL);
/* Another hack: avoid icmp_send in ip_fragment */
skb->ignore_df = 1;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 75421f2ba8b..1f4f954c4b4 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -914,7 +914,7 @@ void nf_conntrack_free(struct nf_conn *ct)
nf_ct_ext_destroy(ct);
nf_ct_ext_free(ct);
kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
- smp_mb__before_atomic_dec();
+ smp_mb__before_atomic();
atomic_dec(&net->ct.count);
}
EXPORT_SYMBOL_GPL(nf_conntrack_free);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 58579634427..300ed1eec72 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -597,6 +597,9 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
#ifdef CONFIG_NF_CONNTRACK_MARK
+ nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */
+#endif
+ ctnetlink_proto_size(ct)
+ ctnetlink_label_size(ct)
;
@@ -1150,7 +1153,7 @@ static int ctnetlink_done_list(struct netlink_callback *cb)
static int
ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying)
{
- struct nf_conn *ct, *last = NULL;
+ struct nf_conn *ct, *last;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
@@ -1163,8 +1166,7 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying
if (cb->args[2])
return 0;
- if (cb->args[0] == nr_cpu_ids)
- return 0;
+ last = (struct nf_conn *)cb->args[1];
for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) {
struct ct_pcpu *pcpu;
@@ -1174,7 +1176,6 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying
pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
spin_lock_bh(&pcpu->lock);
- last = (struct nf_conn *)cb->args[1];
list = dying ? &pcpu->dying : &pcpu->unconfirmed;
restart:
hlist_nulls_for_each_entry(h, n, list, hnnode) {
@@ -1193,7 +1194,9 @@ restart:
ct);
rcu_read_unlock();
if (res < 0) {
- nf_conntrack_get(&ct->ct_general);
+ if (!atomic_inc_not_zero(&ct->ct_general.use))
+ continue;
+ cb->args[0] = cpu;
cb->args[1] = (unsigned long)ct;
spin_unlock_bh(&pcpu->lock);
goto out;
@@ -1202,10 +1205,10 @@ restart:
if (cb->args[1]) {
cb->args[1] = 0;
goto restart;
- } else
- cb->args[2] = 1;
+ }
spin_unlock_bh(&pcpu->lock);
}
+ cb->args[2] = 1;
out:
if (last)
nf_ct_put(last);
@@ -2040,6 +2043,9 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
#ifdef CONFIG_NF_CONNTRACK_MARK
+ nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */
+#endif
+ ctnetlink_proto_size(ct)
;
}
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 52ca952b802..a49907b1dab 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -358,6 +358,19 @@ out:
rcu_read_unlock();
}
+struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct)
+{
+ struct nf_conn_nat *nat = nfct_nat(ct);
+ if (nat)
+ return nat;
+
+ if (!nf_ct_is_confirmed(ct))
+ nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+
+ return nat;
+}
+EXPORT_SYMBOL_GPL(nf_ct_nat_ext_add);
+
unsigned int
nf_nat_setup_info(struct nf_conn *ct,
const struct nf_nat_range *range,
@@ -368,14 +381,9 @@ nf_nat_setup_info(struct nf_conn *ct,
struct nf_conn_nat *nat;
/* nat helper or nfctnetlink also setup binding */
- nat = nfct_nat(ct);
- if (!nat) {
- nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
- if (nat == NULL) {
- pr_debug("failed to add NAT extension\n");
- return NF_ACCEPT;
- }
- }
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
maniptype == NF_NAT_MANIP_DST);
@@ -517,6 +525,39 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data)
return i->status & IPS_NAT_MASK ? 1 : 0;
}
+static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
+{
+ struct nf_conn_nat *nat = nfct_nat(ct);
+
+ if (nf_nat_proto_remove(ct, data))
+ return 1;
+
+ if (!nat || !nat->ct)
+ return 0;
+
+ /* This netns is being destroyed, and conntrack has nat null binding.
+ * Remove it from bysource hash, as the table will be freed soon.
+ *
+ * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
+ * will delete entry from already-freed table.
+ */
+ if (!del_timer(&ct->timeout))
+ return 1;
+
+ spin_lock_bh(&nf_nat_lock);
+ hlist_del_rcu(&nat->bysource);
+ ct->status &= ~IPS_NAT_DONE_MASK;
+ nat->ct = NULL;
+ spin_unlock_bh(&nf_nat_lock);
+
+ add_timer(&ct->timeout);
+
+ /* don't delete conntrack. Although that would make things a lot
+ * simpler, we'd end up flushing all conntracks on nat rmmod.
+ */
+ return 0;
+}
+
static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
{
struct nf_nat_proto_clean clean = {
@@ -787,7 +828,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
{
struct nf_nat_proto_clean clean = {};
- nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean, 0, 0);
+ nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean, 0, 0);
synchronize_rcu();
nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3fd159db9f0..8746ff9a835 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -35,7 +35,7 @@ int nft_register_afinfo(struct net *net, struct nft_af_info *afi)
{
INIT_LIST_HEAD(&afi->tables);
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_add_tail(&afi->list, &net->nft.af_info);
+ list_add_tail_rcu(&afi->list, &net->nft.af_info);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
return 0;
}
@@ -51,7 +51,7 @@ EXPORT_SYMBOL_GPL(nft_register_afinfo);
void nft_unregister_afinfo(struct nft_af_info *afi)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_del(&afi->list);
+ list_del_rcu(&afi->list);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_unregister_afinfo);
@@ -88,6 +88,45 @@ nf_tables_afinfo_lookup(struct net *net, int family, bool autoload)
return ERR_PTR(-EAFNOSUPPORT);
}
+static void nft_ctx_init(struct nft_ctx *ctx,
+ const struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ struct nft_af_info *afi,
+ struct nft_table *table,
+ struct nft_chain *chain,
+ const struct nlattr * const *nla)
+{
+ ctx->net = sock_net(skb->sk);
+ ctx->afi = afi;
+ ctx->table = table;
+ ctx->chain = chain;
+ ctx->nla = nla;
+ ctx->portid = NETLINK_CB(skb).portid;
+ ctx->report = nlmsg_report(nlh);
+ ctx->seq = nlh->nlmsg_seq;
+}
+
+static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type,
+ u32 size)
+{
+ struct nft_trans *trans;
+
+ trans = kzalloc(sizeof(struct nft_trans) + size, GFP_KERNEL);
+ if (trans == NULL)
+ return NULL;
+
+ trans->msg_type = msg_type;
+ trans->ctx = *ctx;
+
+ return trans;
+}
+
+static void nft_trans_destroy(struct nft_trans *trans)
+{
+ list_del(&trans->list);
+ kfree(trans);
+}
+
/*
* Tables
*/
@@ -197,20 +236,13 @@ nla_put_failure:
return -1;
}
-static int nf_tables_table_notify(const struct sk_buff *oskb,
- const struct nlmsghdr *nlh,
- const struct nft_table *table,
- int event, int family)
+static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
{
struct sk_buff *skb;
- u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
- u32 seq = nlh ? nlh->nlmsg_seq : 0;
- struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
- bool report;
int err;
- report = nlh ? nlmsg_report(nlh) : false;
- if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ if (!ctx->report &&
+ !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return 0;
err = -ENOBUFS;
@@ -218,18 +250,20 @@ static int nf_tables_table_notify(const struct sk_buff *oskb,
if (skb == NULL)
goto err;
- err = nf_tables_fill_table_info(skb, portid, seq, event, 0,
- family, table);
+ err = nf_tables_fill_table_info(skb, ctx->portid, ctx->seq, event, 0,
+ ctx->afi->family, ctx->table);
if (err < 0) {
kfree_skb(skb);
goto err;
}
- err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
- GFP_KERNEL);
+ err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ ctx->report, GFP_KERNEL);
err:
- if (err < 0)
- nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ if (err < 0) {
+ nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ err);
+ }
return err;
}
@@ -243,11 +277,14 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
- list_for_each_entry(afi, &net->nft.af_info, list) {
+ rcu_read_lock();
+ cb->seq = net->nft.base_seq;
+
+ list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
if (family != NFPROTO_UNSPEC && family != afi->family)
continue;
- list_for_each_entry(table, &afi->tables, list) {
+ list_for_each_entry_rcu(table, &afi->tables, list) {
if (idx < s_idx)
goto cont;
if (idx > s_idx)
@@ -260,15 +297,21 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
NLM_F_MULTI,
afi->family, table) < 0)
goto done;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
}
done:
+ rcu_read_unlock();
cb->args[0] = idx;
return skb->len;
}
+/* Internal table flags */
+#define NFT_TABLE_INACTIVE (1 << 15)
+
static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -295,6 +338,8 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb2)
@@ -343,7 +388,7 @@ err:
return err;
}
-static int nf_tables_table_disable(const struct nft_af_info *afi,
+static void nf_tables_table_disable(const struct nft_af_info *afi,
struct nft_table *table)
{
struct nft_chain *chain;
@@ -353,45 +398,66 @@ static int nf_tables_table_disable(const struct nft_af_info *afi,
nf_unregister_hooks(nft_base_chain(chain)->ops,
afi->nops);
}
-
- return 0;
}
-static int nf_tables_updtable(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct nft_af_info *afi, struct nft_table *table)
+static int nf_tables_updtable(struct nft_ctx *ctx)
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- int family = nfmsg->nfgen_family, ret = 0;
+ struct nft_trans *trans;
+ u32 flags;
+ int ret = 0;
- if (nla[NFTA_TABLE_FLAGS]) {
- u32 flags;
+ if (!ctx->nla[NFTA_TABLE_FLAGS])
+ return 0;
- flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS]));
- if (flags & ~NFT_TABLE_F_DORMANT)
- return -EINVAL;
+ flags = ntohl(nla_get_be32(ctx->nla[NFTA_TABLE_FLAGS]));
+ if (flags & ~NFT_TABLE_F_DORMANT)
+ return -EINVAL;
+
+ if (flags == ctx->table->flags)
+ return 0;
- if ((flags & NFT_TABLE_F_DORMANT) &&
- !(table->flags & NFT_TABLE_F_DORMANT)) {
- ret = nf_tables_table_disable(afi, table);
- if (ret >= 0)
- table->flags |= NFT_TABLE_F_DORMANT;
- } else if (!(flags & NFT_TABLE_F_DORMANT) &&
- table->flags & NFT_TABLE_F_DORMANT) {
- ret = nf_tables_table_enable(afi, table);
- if (ret >= 0)
- table->flags &= ~NFT_TABLE_F_DORMANT;
+ trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
+ sizeof(struct nft_trans_table));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if ((flags & NFT_TABLE_F_DORMANT) &&
+ !(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
+ nft_trans_table_enable(trans) = false;
+ } else if (!(flags & NFT_TABLE_F_DORMANT) &&
+ ctx->table->flags & NFT_TABLE_F_DORMANT) {
+ ret = nf_tables_table_enable(ctx->afi, ctx->table);
+ if (ret >= 0) {
+ ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
+ nft_trans_table_enable(trans) = true;
}
- if (ret < 0)
- goto err;
}
+ if (ret < 0)
+ goto err;
- nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
+ nft_trans_table_update(trans) = true;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ return 0;
err:
+ nft_trans_destroy(trans);
return ret;
}
+static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWTABLE)
+ ctx->table->flags |= NFT_TABLE_INACTIVE;
+
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ return 0;
+}
+
static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -403,6 +469,8 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
u32 flags = 0;
+ struct nft_ctx ctx;
+ int err;
afi = nf_tables_afinfo_lookup(net, family, true);
if (IS_ERR(afi))
@@ -417,11 +485,15 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
}
if (table != NULL) {
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
if (nlh->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- return nf_tables_updtable(nlsk, skb, nlh, nla, afi, table);
+
+ nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ return nf_tables_updtable(&ctx);
}
if (nla[NFTA_TABLE_FLAGS]) {
@@ -444,8 +516,14 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
INIT_LIST_HEAD(&table->sets);
table->flags = flags;
- list_add_tail(&table->list, &afi->tables);
- nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
+ nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
+ if (err < 0) {
+ kfree(table);
+ module_put(afi->owner);
+ return err;
+ }
+ list_add_tail_rcu(&table->list, &afi->tables);
return 0;
}
@@ -457,7 +535,8 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
struct nft_af_info *afi;
struct nft_table *table;
struct net *net = sock_net(skb->sk);
- int family = nfmsg->nfgen_family;
+ int family = nfmsg->nfgen_family, err;
+ struct nft_ctx ctx;
afi = nf_tables_afinfo_lookup(net, family, false);
if (IS_ERR(afi))
@@ -466,17 +545,28 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
if (IS_ERR(table))
return PTR_ERR(table);
-
- if (!list_empty(&table->chains) || !list_empty(&table->sets))
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
+ if (table->use > 0)
return -EBUSY;
- list_del(&table->list);
- nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family);
- kfree(table);
- module_put(afi->owner);
+ nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ err = nft_trans_table_add(&ctx, NFT_MSG_DELTABLE);
+ if (err < 0)
+ return err;
+
+ list_del_rcu(&table->list);
return 0;
}
+static void nf_tables_table_destroy(struct nft_ctx *ctx)
+{
+ BUG_ON(ctx->table->use > 0);
+
+ kfree(ctx->table);
+ module_put(ctx->afi->owner);
+}
+
int nft_register_chain_type(const struct nf_chain_type *ctype)
{
int err = 0;
@@ -541,7 +631,7 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
.len = NFT_CHAIN_MAXNAMELEN - 1 },
[NFTA_CHAIN_HOOK] = { .type = NLA_NESTED },
[NFTA_CHAIN_POLICY] = { .type = NLA_U32 },
- [NFTA_CHAIN_TYPE] = { .type = NLA_NUL_STRING },
+ [NFTA_CHAIN_TYPE] = { .type = NLA_STRING },
[NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED },
};
@@ -554,13 +644,20 @@ static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats)
{
struct nft_stats *cpu_stats, total;
struct nlattr *nest;
+ unsigned int seq;
+ u64 pkts, bytes;
int cpu;
memset(&total, 0, sizeof(total));
for_each_possible_cpu(cpu) {
cpu_stats = per_cpu_ptr(stats, cpu);
- total.pkts += cpu_stats->pkts;
- total.bytes += cpu_stats->bytes;
+ do {
+ seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+ pkts = cpu_stats->pkts;
+ bytes = cpu_stats->bytes;
+ } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
+ total.pkts += pkts;
+ total.bytes += bytes;
}
nest = nla_nest_start(skb, NFTA_CHAIN_COUNTERS);
if (nest == NULL)
@@ -637,21 +734,13 @@ nla_put_failure:
return -1;
}
-static int nf_tables_chain_notify(const struct sk_buff *oskb,
- const struct nlmsghdr *nlh,
- const struct nft_table *table,
- const struct nft_chain *chain,
- int event, int family)
+static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
{
struct sk_buff *skb;
- u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
- struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
- u32 seq = nlh ? nlh->nlmsg_seq : 0;
- bool report;
int err;
- report = nlh ? nlmsg_report(nlh) : false;
- if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ if (!ctx->report &&
+ !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return 0;
err = -ENOBUFS;
@@ -659,18 +748,21 @@ static int nf_tables_chain_notify(const struct sk_buff *oskb,
if (skb == NULL)
goto err;
- err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, family,
- table, chain);
+ err = nf_tables_fill_chain_info(skb, ctx->portid, ctx->seq, event, 0,
+ ctx->afi->family, ctx->table,
+ ctx->chain);
if (err < 0) {
kfree_skb(skb);
goto err;
}
- err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
- GFP_KERNEL);
+ err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ ctx->report, GFP_KERNEL);
err:
- if (err < 0)
- nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ if (err < 0) {
+ nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ err);
+ }
return err;
}
@@ -685,12 +777,15 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
- list_for_each_entry(afi, &net->nft.af_info, list) {
+ rcu_read_lock();
+ cb->seq = net->nft.base_seq;
+
+ list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
if (family != NFPROTO_UNSPEC && family != afi->family)
continue;
- list_for_each_entry(table, &afi->tables, list) {
- list_for_each_entry(chain, &table->chains, list) {
+ list_for_each_entry_rcu(table, &afi->tables, list) {
+ list_for_each_entry_rcu(chain, &table->chains, list) {
if (idx < s_idx)
goto cont;
if (idx > s_idx)
@@ -702,17 +797,19 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
NLM_F_MULTI,
afi->family, table, chain) < 0)
goto done;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
}
}
done:
+ rcu_read_unlock();
cb->args[0] = idx;
return skb->len;
}
-
static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -740,10 +837,14 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
if (IS_ERR(chain))
return PTR_ERR(chain);
+ if (chain->flags & NFT_CHAIN_INACTIVE)
+ return -ENOENT;
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb2)
@@ -767,8 +868,7 @@ static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
[NFTA_COUNTER_BYTES] = { .type = NLA_U64 },
};
-static int
-nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
+static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
{
struct nlattr *tb[NFTA_COUNTER_MAX+1];
struct nft_stats __percpu *newstats;
@@ -777,14 +877,14 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy);
if (err < 0)
- return err;
+ return ERR_PTR(err);
if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS])
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
- newstats = alloc_percpu(struct nft_stats);
+ newstats = netdev_alloc_pcpu_stats(struct nft_stats);
if (newstats == NULL)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
/* Restore old counters on this cpu, no problem. Per-cpu statistics
* are not exposed to userspace.
@@ -793,6 +893,12 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+ return newstats;
+}
+
+static void nft_chain_stats_replace(struct nft_base_chain *chain,
+ struct nft_stats __percpu *newstats)
+{
if (chain->stats) {
struct nft_stats __percpu *oldstats =
nft_dereference(chain->stats);
@@ -802,17 +908,43 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
free_percpu(oldstats);
} else
rcu_assign_pointer(chain->stats, newstats);
+}
+
+static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWCHAIN)
+ ctx->chain->flags |= NFT_CHAIN_INACTIVE;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
}
+static void nf_tables_chain_destroy(struct nft_chain *chain)
+{
+ BUG_ON(chain->use > 0);
+
+ if (chain->flags & NFT_BASE_CHAIN) {
+ module_put(nft_base_chain(chain)->type->owner);
+ free_percpu(nft_base_chain(chain)->stats);
+ kfree(nft_base_chain(chain));
+ } else {
+ kfree(chain);
+ }
+}
+
static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nlattr * uninitialized_var(name);
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
struct nft_base_chain *basechain = NULL;
@@ -822,8 +954,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
u8 policy = NF_ACCEPT;
u64 handle = 0;
unsigned int i;
+ struct nft_stats __percpu *stats;
int err;
bool create;
+ struct nft_ctx ctx;
create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
@@ -869,6 +1003,11 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
}
if (chain != NULL) {
+ struct nft_stats *stats = NULL;
+ struct nft_trans *trans;
+
+ if (chain->flags & NFT_CHAIN_INACTIVE)
+ return -ENOENT;
if (nlh->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
if (nlh->nlmsg_flags & NLM_F_REPLACE)
@@ -882,19 +1021,31 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
if (!(chain->flags & NFT_BASE_CHAIN))
return -EOPNOTSUPP;
- err = nf_tables_counters(nft_base_chain(chain),
- nla[NFTA_CHAIN_COUNTERS]);
- if (err < 0)
- return err;
+ stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
+ if (IS_ERR(stats))
+ return PTR_ERR(stats);
}
- if (nla[NFTA_CHAIN_POLICY])
- nft_base_chain(chain)->policy = policy;
+ nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN,
+ sizeof(struct nft_trans_chain));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ nft_trans_chain_stats(trans) = stats;
+ nft_trans_chain_update(trans) = true;
- if (nla[NFTA_CHAIN_HANDLE] && name)
- nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+ if (nla[NFTA_CHAIN_POLICY])
+ nft_trans_chain_policy(trans) = policy;
+ else
+ nft_trans_chain_policy(trans) = -1;
- goto notify;
+ if (nla[NFTA_CHAIN_HANDLE] && name) {
+ nla_strlcpy(nft_trans_chain_name(trans), name,
+ NFT_CHAIN_MAXNAMELEN);
+ }
+ list_add_tail(&trans->list, &net->nft.commit_list);
+ return 0;
}
if (table->use == UINT_MAX)
@@ -939,23 +1090,21 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
return -ENOMEM;
if (nla[NFTA_CHAIN_COUNTERS]) {
- err = nf_tables_counters(basechain,
- nla[NFTA_CHAIN_COUNTERS]);
- if (err < 0) {
+ stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
+ if (IS_ERR(stats)) {
module_put(type->owner);
kfree(basechain);
- return err;
+ return PTR_ERR(stats);
}
+ basechain->stats = stats;
} else {
- struct nft_stats __percpu *newstats;
-
- newstats = alloc_percpu(struct nft_stats);
- if (newstats == NULL) {
+ stats = netdev_alloc_pcpu_stats(struct nft_stats);
+ if (IS_ERR(stats)) {
module_put(type->owner);
kfree(basechain);
- return -ENOMEM;
+ return PTR_ERR(stats);
}
- rcu_assign_pointer(basechain->stats, newstats);
+ rcu_assign_pointer(basechain->stats, stats);
}
basechain->type = type;
@@ -992,31 +1141,27 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
if (!(table->flags & NFT_TABLE_F_DORMANT) &&
chain->flags & NFT_BASE_CHAIN) {
err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops);
- if (err < 0) {
- module_put(basechain->type->owner);
- free_percpu(basechain->stats);
- kfree(basechain);
- return err;
- }
+ if (err < 0)
+ goto err1;
}
- list_add_tail(&chain->list, &table->chains);
- table->use++;
-notify:
- nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN,
- family);
- return 0;
-}
-static void nf_tables_chain_destroy(struct nft_chain *chain)
-{
- BUG_ON(chain->use > 0);
+ nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN);
+ if (err < 0)
+ goto err2;
- if (chain->flags & NFT_BASE_CHAIN) {
- module_put(nft_base_chain(chain)->type->owner);
- free_percpu(nft_base_chain(chain)->stats);
- kfree(nft_base_chain(chain));
- } else
- kfree(chain);
+ table->use++;
+ list_add_tail_rcu(&chain->list, &table->chains);
+ return 0;
+err2:
+ if (!(table->flags & NFT_TABLE_F_DORMANT) &&
+ chain->flags & NFT_BASE_CHAIN) {
+ nf_unregister_hooks(nft_base_chain(chain)->ops,
+ afi->nops);
+ }
+err1:
+ nf_tables_chain_destroy(chain);
+ return err;
}
static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
@@ -1024,11 +1169,13 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
+ struct nft_ctx ctx;
+ int err;
afi = nf_tables_afinfo_lookup(net, family, false);
if (IS_ERR(afi))
@@ -1037,48 +1184,27 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
if (IS_ERR(chain))
return PTR_ERR(chain);
-
- if (!list_empty(&chain->rules) || chain->use > 0)
+ if (chain->flags & NFT_CHAIN_INACTIVE)
+ return -ENOENT;
+ if (chain->use > 0)
return -EBUSY;
- list_del(&chain->list);
- table->use--;
-
- if (!(table->flags & NFT_TABLE_F_DORMANT) &&
- chain->flags & NFT_BASE_CHAIN)
- nf_unregister_hooks(nft_base_chain(chain)->ops, afi->nops);
-
- nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN,
- family);
-
- /* Make sure all rule references are gone before this is released */
- synchronize_rcu();
+ nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ err = nft_trans_chain_add(&ctx, NFT_MSG_DELCHAIN);
+ if (err < 0)
+ return err;
- nf_tables_chain_destroy(chain);
+ table->use--;
+ list_del_rcu(&chain->list);
return 0;
}
-static void nft_ctx_init(struct nft_ctx *ctx,
- const struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nft_af_info *afi,
- const struct nft_table *table,
- const struct nft_chain *chain,
- const struct nlattr * const *nla)
-{
- ctx->net = sock_net(skb->sk);
- ctx->skb = skb;
- ctx->nlh = nlh;
- ctx->afi = afi;
- ctx->table = table;
- ctx->chain = chain;
- ctx->nla = nla;
-}
-
/*
* Expressions
*/
@@ -1093,7 +1219,10 @@ static void nft_ctx_init(struct nft_ctx *ctx,
int nft_register_expr(struct nft_expr_type *type)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_add_tail(&type->list, &nf_tables_expressions);
+ if (type->family == NFPROTO_UNSPEC)
+ list_add_tail_rcu(&type->list, &nf_tables_expressions);
+ else
+ list_add_rcu(&type->list, &nf_tables_expressions);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
return 0;
}
@@ -1108,7 +1237,7 @@ EXPORT_SYMBOL_GPL(nft_register_expr);
void nft_unregister_expr(struct nft_expr_type *type)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_del(&type->list);
+ list_del_rcu(&type->list);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_unregister_expr);
@@ -1361,22 +1490,15 @@ nla_put_failure:
return -1;
}
-static int nf_tables_rule_notify(const struct sk_buff *oskb,
- const struct nlmsghdr *nlh,
- const struct nft_table *table,
- const struct nft_chain *chain,
+static int nf_tables_rule_notify(const struct nft_ctx *ctx,
const struct nft_rule *rule,
- int event, u32 flags, int family)
+ int event)
{
struct sk_buff *skb;
- u32 portid = NETLINK_CB(oskb).portid;
- struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
- u32 seq = nlh->nlmsg_seq;
- bool report;
int err;
- report = nlmsg_report(nlh);
- if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ if (!ctx->report &&
+ !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return 0;
err = -ENOBUFS;
@@ -1384,18 +1506,21 @@ static int nf_tables_rule_notify(const struct sk_buff *oskb,
if (skb == NULL)
goto err;
- err = nf_tables_fill_rule_info(skb, portid, seq, event, flags,
- family, table, chain, rule);
+ err = nf_tables_fill_rule_info(skb, ctx->portid, ctx->seq, event, 0,
+ ctx->afi->family, ctx->table,
+ ctx->chain, rule);
if (err < 0) {
kfree_skb(skb);
goto err;
}
- err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
- GFP_KERNEL);
+ err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ ctx->report, GFP_KERNEL);
err:
- if (err < 0)
- nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ if (err < 0) {
+ nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ err);
+ }
return err;
}
@@ -1445,16 +1570,17 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
unsigned int idx = 0, s_idx = cb->args[0];
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
- u8 genctr = ACCESS_ONCE(net->nft.genctr);
- u8 gencursor = ACCESS_ONCE(net->nft.gencursor);
- list_for_each_entry(afi, &net->nft.af_info, list) {
+ rcu_read_lock();
+ cb->seq = net->nft.base_seq;
+
+ list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
if (family != NFPROTO_UNSPEC && family != afi->family)
continue;
- list_for_each_entry(table, &afi->tables, list) {
- list_for_each_entry(chain, &table->chains, list) {
- list_for_each_entry(rule, &chain->rules, list) {
+ list_for_each_entry_rcu(table, &afi->tables, list) {
+ list_for_each_entry_rcu(chain, &table->chains, list) {
+ list_for_each_entry_rcu(rule, &chain->rules, list) {
if (!nft_rule_is_active(net, rule))
goto cont;
if (idx < s_idx)
@@ -1468,6 +1594,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
NLM_F_MULTI | NLM_F_APPEND,
afi->family, table, chain, rule) < 0)
goto done;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
@@ -1475,9 +1603,7 @@ cont:
}
}
done:
- /* Invalidate this dump, a transition to the new generation happened */
- if (gencursor != net->nft.gencursor || genctr != net->nft.genctr)
- return -EBUSY;
+ rcu_read_unlock();
cb->args[0] = idx;
return skb->len;
@@ -1511,10 +1637,14 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
if (IS_ERR(chain))
return PTR_ERR(chain);
+ if (chain->flags & NFT_CHAIN_INACTIVE)
+ return -ENOENT;
rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
if (IS_ERR(rule))
@@ -1554,37 +1684,36 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
kfree(rule);
}
-#define NFT_RULE_MAXEXPRS 128
-
-static struct nft_expr_info *info;
-
-static struct nft_rule_trans *
-nf_tables_trans_add(struct nft_ctx *ctx, struct nft_rule *rule)
+static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
+ struct nft_rule *rule)
{
- struct nft_rule_trans *rupd;
+ struct nft_trans *trans;
- rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL);
- if (rupd == NULL)
- return NULL;
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_rule));
+ if (trans == NULL)
+ return NULL;
- rupd->ctx = *ctx;
- rupd->rule = rule;
- list_add_tail(&rupd->list, &ctx->net->nft.commit_list);
+ nft_trans_rule(trans) = rule;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
- return rupd;
+ return trans;
}
+#define NFT_RULE_MAXEXPRS 128
+
+static struct nft_expr_info *info;
+
static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct net *net = sock_net(skb->sk);
struct nft_table *table;
struct nft_chain *chain;
struct nft_rule *rule, *old_rule = NULL;
- struct nft_rule_trans *repl = NULL;
+ struct nft_trans *trans = NULL;
struct nft_expr *expr;
struct nft_ctx ctx;
struct nlattr *tmp;
@@ -1623,6 +1752,9 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
if (!create || nlh->nlmsg_flags & NLM_F_REPLACE)
return -EINVAL;
handle = nf_tables_alloc_handle(table);
+
+ if (chain->use == UINT_MAX)
+ return -EOVERFLOW;
}
if (nla[NFTA_RULE_POSITION]) {
@@ -1682,13 +1814,15 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
if (nlh->nlmsg_flags & NLM_F_REPLACE) {
if (nft_rule_is_active_next(net, old_rule)) {
- repl = nf_tables_trans_add(&ctx, old_rule);
- if (repl == NULL) {
+ trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE,
+ old_rule);
+ if (trans == NULL) {
err = -ENOMEM;
goto err2;
}
nft_rule_disactivate_next(net, old_rule);
- list_add_tail(&rule->list, &old_rule->list);
+ chain->use--;
+ list_add_tail_rcu(&rule->list, &old_rule->list);
} else {
err = -ENOENT;
goto err2;
@@ -1705,19 +1839,20 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
list_add_rcu(&rule->list, &chain->rules);
}
- if (nf_tables_trans_add(&ctx, rule) == NULL) {
+ if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
err = -ENOMEM;
goto err3;
}
+ chain->use++;
return 0;
err3:
list_del_rcu(&rule->list);
- if (repl) {
- list_del_rcu(&repl->rule->list);
- list_del(&repl->list);
- nft_rule_clear(net, repl->rule);
- kfree(repl);
+ if (trans) {
+ list_del_rcu(&nft_trans_rule(trans)->list);
+ nft_rule_clear(net, nft_trans_rule(trans));
+ nft_trans_destroy(trans);
+ chain->use++;
}
err2:
nf_tables_rule_destroy(&ctx, rule);
@@ -1734,9 +1869,10 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule)
{
/* You cannot delete the same rule twice */
if (nft_rule_is_active_next(ctx->net, rule)) {
- if (nf_tables_trans_add(ctx, rule) == NULL)
+ if (nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule) == NULL)
return -ENOMEM;
nft_rule_disactivate_next(ctx->net, rule);
+ ctx->chain->use--;
return 0;
}
return -ENOENT;
@@ -1760,9 +1896,9 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct net *net = sock_net(skb->sk);
- const struct nft_table *table;
+ struct nft_table *table;
struct nft_chain *chain = NULL;
struct nft_rule *rule;
int family = nfmsg->nfgen_family, err = 0;
@@ -1775,6 +1911,8 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
if (nla[NFTA_RULE_CHAIN]) {
chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
@@ -1807,88 +1945,6 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
return err;
}
-static int nf_tables_commit(struct sk_buff *skb)
-{
- struct net *net = sock_net(skb->sk);
- struct nft_rule_trans *rupd, *tmp;
-
- /* Bump generation counter, invalidate any dump in progress */
- net->nft.genctr++;
-
- /* A new generation has just started */
- net->nft.gencursor = gencursor_next(net);
-
- /* Make sure all packets have left the previous generation before
- * purging old rules.
- */
- synchronize_rcu();
-
- list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- /* This rule was inactive in the past and just became active.
- * Clear the next bit of the genmask since its meaning has
- * changed, now it is the future.
- */
- if (nft_rule_is_active(net, rupd->rule)) {
- nft_rule_clear(net, rupd->rule);
- nf_tables_rule_notify(skb, rupd->ctx.nlh,
- rupd->ctx.table, rupd->ctx.chain,
- rupd->rule, NFT_MSG_NEWRULE, 0,
- rupd->ctx.afi->family);
- list_del(&rupd->list);
- kfree(rupd);
- continue;
- }
-
- /* This rule is in the past, get rid of it */
- list_del_rcu(&rupd->rule->list);
- nf_tables_rule_notify(skb, rupd->ctx.nlh,
- rupd->ctx.table, rupd->ctx.chain,
- rupd->rule, NFT_MSG_DELRULE, 0,
- rupd->ctx.afi->family);
- }
-
- /* Make sure we don't see any packet traversing old rules */
- synchronize_rcu();
-
- /* Now we can safely release unused old rules */
- list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- nf_tables_rule_destroy(&rupd->ctx, rupd->rule);
- list_del(&rupd->list);
- kfree(rupd);
- }
-
- return 0;
-}
-
-static int nf_tables_abort(struct sk_buff *skb)
-{
- struct net *net = sock_net(skb->sk);
- struct nft_rule_trans *rupd, *tmp;
-
- list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- if (!nft_rule_is_active_next(net, rupd->rule)) {
- nft_rule_clear(net, rupd->rule);
- list_del(&rupd->list);
- kfree(rupd);
- continue;
- }
-
- /* This rule is inactive, get rid of it */
- list_del_rcu(&rupd->rule->list);
- }
-
- /* Make sure we don't see any packet accessing aborted rules */
- synchronize_rcu();
-
- list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- nf_tables_rule_destroy(&rupd->ctx, rupd->rule);
- list_del(&rupd->list);
- kfree(rupd);
- }
-
- return 0;
-}
-
/*
* Sets
*/
@@ -1898,7 +1954,7 @@ static LIST_HEAD(nf_tables_set_ops);
int nft_register_set(struct nft_set_ops *ops)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_add_tail(&ops->list, &nf_tables_set_ops);
+ list_add_tail_rcu(&ops->list, &nf_tables_set_ops);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
return 0;
}
@@ -1907,14 +1963,23 @@ EXPORT_SYMBOL_GPL(nft_register_set);
void nft_unregister_set(struct nft_set_ops *ops)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_del(&ops->list);
+ list_del_rcu(&ops->list);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_unregister_set);
-static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const nla[])
+/*
+ * Select a set implementation based on the data characteristics and the
+ * given policy. The total memory use might not be known if no size is
+ * given, in that case the amount of memory per element is used.
+ */
+static const struct nft_set_ops *
+nft_select_set_ops(const struct nlattr * const nla[],
+ const struct nft_set_desc *desc,
+ enum nft_set_policies policy)
{
- const struct nft_set_ops *ops;
+ const struct nft_set_ops *ops, *bops;
+ struct nft_set_estimate est, best;
u32 features;
#ifdef CONFIG_MODULES
@@ -1932,15 +1997,45 @@ static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const
features &= NFT_SET_INTERVAL | NFT_SET_MAP;
}
- // FIXME: implement selection properly
+ bops = NULL;
+ best.size = ~0;
+ best.class = ~0;
+
list_for_each_entry(ops, &nf_tables_set_ops, list) {
if ((ops->features & features) != features)
continue;
+ if (!ops->estimate(desc, features, &est))
+ continue;
+
+ switch (policy) {
+ case NFT_SET_POL_PERFORMANCE:
+ if (est.class < best.class)
+ break;
+ if (est.class == best.class && est.size < best.size)
+ break;
+ continue;
+ case NFT_SET_POL_MEMORY:
+ if (est.size < best.size)
+ break;
+ if (est.size == best.size && est.class < best.class)
+ break;
+ continue;
+ default:
+ break;
+ }
+
if (!try_module_get(ops->owner))
continue;
- return ops;
+ if (bops != NULL)
+ module_put(bops->owner);
+
+ bops = ops;
+ best = est;
}
+ if (bops != NULL)
+ return bops;
+
return ERR_PTR(-EOPNOTSUPP);
}
@@ -1953,6 +2048,13 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
[NFTA_SET_KEY_LEN] = { .type = NLA_U32 },
[NFTA_SET_DATA_TYPE] = { .type = NLA_U32 },
[NFTA_SET_DATA_LEN] = { .type = NLA_U32 },
+ [NFTA_SET_POLICY] = { .type = NLA_U32 },
+ [NFTA_SET_DESC] = { .type = NLA_NESTED },
+ [NFTA_SET_ID] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
+ [NFTA_SET_DESC_SIZE] = { .type = NLA_U32 },
};
static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
@@ -1962,8 +2064,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
{
struct net *net = sock_net(skb->sk);
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nft_af_info *afi = NULL;
- const struct nft_table *table = NULL;
+ struct nft_af_info *afi = NULL;
+ struct nft_table *table = NULL;
if (nfmsg->nfgen_family != NFPROTO_UNSPEC) {
afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
@@ -1978,6 +2080,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
}
nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
@@ -1999,13 +2103,27 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
return ERR_PTR(-ENOENT);
}
+struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
+ const struct nlattr *nla)
+{
+ struct nft_trans *trans;
+ u32 id = ntohl(nla_get_be32(nla));
+
+ list_for_each_entry(trans, &net->nft.commit_list, list) {
+ if (trans->msg_type == NFT_MSG_NEWSET &&
+ id == nft_trans_set_id(trans))
+ return nft_trans_set(trans);
+ }
+ return ERR_PTR(-ENOENT);
+}
+
static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
const char *name)
{
const struct nft_set *i;
const char *p;
unsigned long *inuse;
- unsigned int n = 0;
+ unsigned int n = 0, min = 0;
p = strnchr(name, IFNAMSIZ, '%');
if (p != NULL) {
@@ -2015,23 +2133,28 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL);
if (inuse == NULL)
return -ENOMEM;
-
+cont:
list_for_each_entry(i, &ctx->table->sets, list) {
int tmp;
if (!sscanf(i->name, name, &tmp))
continue;
- if (tmp < 0 || tmp >= BITS_PER_BYTE * PAGE_SIZE)
+ if (tmp < min || tmp >= min + BITS_PER_BYTE * PAGE_SIZE)
continue;
- set_bit(tmp, inuse);
+ set_bit(tmp - min, inuse);
}
n = find_first_zero_bit(inuse, BITS_PER_BYTE * PAGE_SIZE);
+ if (n >= BITS_PER_BYTE * PAGE_SIZE) {
+ min += BITS_PER_BYTE * PAGE_SIZE;
+ memset(inuse, 0, PAGE_SIZE);
+ goto cont;
+ }
free_page((unsigned long)inuse);
}
- snprintf(set->name, sizeof(set->name), name, n);
+ snprintf(set->name, sizeof(set->name), name, min + n);
list_for_each_entry(i, &ctx->table->sets, list) {
if (!strcmp(set->name, i->name))
return -ENFILE;
@@ -2044,8 +2167,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
{
struct nfgenmsg *nfmsg;
struct nlmsghdr *nlh;
- u32 portid = NETLINK_CB(ctx->skb).portid;
- u32 seq = ctx->nlh->nlmsg_seq;
+ struct nlattr *desc;
+ u32 portid = ctx->portid;
+ u32 seq = ctx->seq;
event |= NFNL_SUBSYS_NFTABLES << 8;
nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
@@ -2077,6 +2201,14 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
goto nla_put_failure;
}
+ desc = nla_nest_start(skb, NFTA_SET_DESC);
+ if (desc == NULL)
+ goto nla_put_failure;
+ if (set->size &&
+ nla_put_be32(skb, NFTA_SET_DESC_SIZE, htonl(set->size)))
+ goto nla_put_failure;
+ nla_nest_end(skb, desc);
+
return nlmsg_end(skb, nlh);
nla_put_failure:
@@ -2086,19 +2218,18 @@ nla_put_failure:
static int nf_tables_set_notify(const struct nft_ctx *ctx,
const struct nft_set *set,
- int event)
+ int event, gfp_t gfp_flags)
{
struct sk_buff *skb;
- u32 portid = NETLINK_CB(ctx->skb).portid;
- bool report;
+ u32 portid = ctx->portid;
int err;
- report = nlmsg_report(ctx->nlh);
- if (!report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
+ if (!ctx->report &&
+ !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return 0;
err = -ENOBUFS;
- skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb = nlmsg_new(NLMSG_GOODSIZE, gfp_flags);
if (skb == NULL)
goto err;
@@ -2108,8 +2239,8 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx,
goto err;
}
- err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, report,
- GFP_KERNEL);
+ err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES,
+ ctx->report, gfp_flags);
err:
if (err < 0)
nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err);
@@ -2125,7 +2256,10 @@ static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb,
if (cb->args[1])
return skb->len;
- list_for_each_entry(set, &ctx->table->sets, list) {
+ rcu_read_lock();
+ cb->seq = ctx->net->nft.base_seq;
+
+ list_for_each_entry_rcu(set, &ctx->table->sets, list) {
if (idx < s_idx)
goto cont;
if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
@@ -2133,11 +2267,13 @@ static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb,
cb->args[0] = idx;
goto done;
}
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
cb->args[1] = 1;
done:
+ rcu_read_unlock();
return skb->len;
}
@@ -2151,7 +2287,10 @@ static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb,
if (cb->args[1])
return skb->len;
- list_for_each_entry(table, &ctx->afi->tables, list) {
+ rcu_read_lock();
+ cb->seq = ctx->net->nft.base_seq;
+
+ list_for_each_entry_rcu(table, &ctx->afi->tables, list) {
if (cur_table) {
if (cur_table != table)
continue;
@@ -2160,7 +2299,7 @@ static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb,
}
ctx->table = table;
idx = 0;
- list_for_each_entry(set, &ctx->table->sets, list) {
+ list_for_each_entry_rcu(set, &ctx->table->sets, list) {
if (idx < s_idx)
goto cont;
if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
@@ -2169,12 +2308,14 @@ static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb,
cb->args[2] = (unsigned long) table;
goto done;
}
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
}
cb->args[1] = 1;
done:
+ rcu_read_unlock();
return skb->len;
}
@@ -2183,7 +2324,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
{
const struct nft_set *set;
unsigned int idx, s_idx = cb->args[0];
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
struct net *net = sock_net(skb->sk);
int cur_family = cb->args[3];
@@ -2191,7 +2332,10 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
if (cb->args[1])
return skb->len;
- list_for_each_entry(afi, &net->nft.af_info, list) {
+ rcu_read_lock();
+ cb->seq = net->nft.base_seq;
+
+ list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
if (cur_family) {
if (afi->family != cur_family)
continue;
@@ -2199,7 +2343,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
cur_family = 0;
}
- list_for_each_entry(table, &afi->tables, list) {
+ list_for_each_entry_rcu(table, &afi->tables, list) {
if (cur_table) {
if (cur_table != table)
continue;
@@ -2210,7 +2354,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
ctx->table = table;
ctx->afi = afi;
idx = 0;
- list_for_each_entry(set, &ctx->table->sets, list) {
+ list_for_each_entry_rcu(set, &ctx->table->sets, list) {
if (idx < s_idx)
goto cont;
if (nf_tables_fill_set(skb, ctx, set,
@@ -2221,6 +2365,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
cb->args[3] = afi->family;
goto done;
}
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
@@ -2230,6 +2375,7 @@ cont:
}
cb->args[1] = 1;
done:
+ rcu_read_unlock();
return skb->len;
}
@@ -2260,6 +2406,8 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
return ret;
}
+#define NFT_SET_INACTIVE (1 << 15) /* Internal set flag */
+
static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -2289,6 +2437,8 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
if (IS_ERR(set))
return PTR_ERR(set);
+ if (set->flags & NFT_SET_INACTIVE)
+ return -ENOENT;
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (skb2 == NULL)
@@ -2305,13 +2455,50 @@ err:
return err;
}
+static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
+ struct nft_set_desc *desc,
+ const struct nlattr *nla)
+{
+ struct nlattr *da[NFTA_SET_DESC_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla, nft_set_desc_policy);
+ if (err < 0)
+ return err;
+
+ if (da[NFTA_SET_DESC_SIZE] != NULL)
+ desc->size = ntohl(nla_get_be32(da[NFTA_SET_DESC_SIZE]));
+
+ return 0;
+}
+
+static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
+ struct nft_set *set)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
+ nft_trans_set_id(trans) =
+ ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
+ set->flags |= NFT_SET_INACTIVE;
+ }
+ nft_trans_set(trans) = set;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+ return 0;
+}
+
static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nft_set_ops *ops;
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct net *net = sock_net(skb->sk);
struct nft_table *table;
struct nft_set *set;
@@ -2319,14 +2506,18 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
char name[IFNAMSIZ];
unsigned int size;
bool create;
- u32 ktype, klen, dlen, dtype, flags;
+ u32 ktype, dtype, flags, policy;
+ struct nft_set_desc desc;
int err;
if (nla[NFTA_SET_TABLE] == NULL ||
nla[NFTA_SET_NAME] == NULL ||
- nla[NFTA_SET_KEY_LEN] == NULL)
+ nla[NFTA_SET_KEY_LEN] == NULL ||
+ nla[NFTA_SET_ID] == NULL)
return -EINVAL;
+ memset(&desc, 0, sizeof(desc));
+
ktype = NFT_DATA_VALUE;
if (nla[NFTA_SET_KEY_TYPE] != NULL) {
ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
@@ -2334,8 +2525,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
return -EINVAL;
}
- klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
- if (klen == 0 || klen > FIELD_SIZEOF(struct nft_data, data))
+ desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
+ if (desc.klen == 0 || desc.klen > FIELD_SIZEOF(struct nft_data, data))
return -EINVAL;
flags = 0;
@@ -2347,7 +2538,6 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
}
dtype = 0;
- dlen = 0;
if (nla[NFTA_SET_DATA_TYPE] != NULL) {
if (!(flags & NFT_SET_MAP))
return -EINVAL;
@@ -2360,15 +2550,25 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
if (dtype != NFT_DATA_VERDICT) {
if (nla[NFTA_SET_DATA_LEN] == NULL)
return -EINVAL;
- dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
- if (dlen == 0 ||
- dlen > FIELD_SIZEOF(struct nft_data, data))
+ desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
+ if (desc.dlen == 0 ||
+ desc.dlen > FIELD_SIZEOF(struct nft_data, data))
return -EINVAL;
} else
- dlen = sizeof(struct nft_data);
+ desc.dlen = sizeof(struct nft_data);
} else if (flags & NFT_SET_MAP)
return -EINVAL;
+ policy = NFT_SET_POL_PERFORMANCE;
+ if (nla[NFTA_SET_POLICY] != NULL)
+ policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
+
+ if (nla[NFTA_SET_DESC] != NULL) {
+ err = nf_tables_set_desc_parse(&ctx, &desc, nla[NFTA_SET_DESC]);
+ if (err < 0)
+ return err;
+ }
+
create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
@@ -2399,7 +2599,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
if (!(nlh->nlmsg_flags & NLM_F_CREATE))
return -ENOENT;
- ops = nft_select_set_ops(nla);
+ ops = nft_select_set_ops(nla, &desc, policy);
if (IS_ERR(ops))
return PTR_ERR(ops);
@@ -2420,17 +2620,22 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
INIT_LIST_HEAD(&set->bindings);
set->ops = ops;
set->ktype = ktype;
- set->klen = klen;
+ set->klen = desc.klen;
set->dtype = dtype;
- set->dlen = dlen;
+ set->dlen = desc.dlen;
set->flags = flags;
+ set->size = desc.size;
+
+ err = ops->init(set, &desc, nla);
+ if (err < 0)
+ goto err2;
- err = ops->init(set, nla);
+ err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
if (err < 0)
goto err2;
- list_add_tail(&set->list, &table->sets);
- nf_tables_set_notify(&ctx, set, NFT_MSG_NEWSET);
+ list_add_tail_rcu(&set->list, &table->sets);
+ table->use++;
return 0;
err2:
@@ -2440,16 +2645,20 @@ err1:
return err;
}
-static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+static void nft_set_destroy(struct nft_set *set)
{
- list_del(&set->list);
- nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
-
set->ops->destroy(set);
module_put(set->ops->owner);
kfree(set);
}
+static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ list_del_rcu(&set->list);
+ nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC);
+ nft_set_destroy(set);
+}
+
static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -2471,10 +2680,17 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
if (IS_ERR(set))
return PTR_ERR(set);
+ if (set->flags & NFT_SET_INACTIVE)
+ return -ENOENT;
if (!list_empty(&set->bindings))
return -EBUSY;
- nf_tables_set_destroy(&ctx, set);
+ err = nft_trans_set_add(&ctx, NFT_MSG_DELSET, set);
+ if (err < 0)
+ return err;
+
+ list_del_rcu(&set->list);
+ ctx.table->use--;
return 0;
}
@@ -2525,16 +2741,17 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
}
bind:
binding->chain = ctx->chain;
- list_add_tail(&binding->list, &set->bindings);
+ list_add_tail_rcu(&binding->list, &set->bindings);
return 0;
}
void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding)
{
- list_del(&binding->list);
+ list_del_rcu(&binding->list);
- if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
+ if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
+ !(set->flags & NFT_SET_INACTIVE))
nf_tables_set_destroy(ctx, set);
}
@@ -2552,16 +2769,18 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
[NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING },
[NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING },
[NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED },
+ [NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 },
};
static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ bool trans)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nft_af_info *afi;
- const struct nft_table *table;
+ struct nft_af_info *afi;
+ struct nft_table *table;
struct net *net = sock_net(skb->sk);
afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
@@ -2571,6 +2790,8 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (!trans && (table->flags & NFT_TABLE_INACTIVE))
+ return -ENOENT;
nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
return 0;
@@ -2644,13 +2865,16 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
if (err < 0)
return err;
- err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla);
+ err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla,
+ false);
if (err < 0)
return err;
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
if (IS_ERR(set))
return PTR_ERR(set);
+ if (set->flags & NFT_SET_INACTIVE)
+ return -ENOENT;
event = NFT_MSG_NEWSETELEM;
event |= NFNL_SUBSYS_NFTABLES << 8;
@@ -2663,7 +2887,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
goto nla_put_failure;
nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = NFPROTO_UNSPEC;
+ nfmsg->nfgen_family = ctx.afi->family;
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
@@ -2707,13 +2931,15 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb,
struct nft_ctx ctx;
int err;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+ err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
if (err < 0)
return err;
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
if (IS_ERR(set))
return PTR_ERR(set);
+ if (set->flags & NFT_SET_INACTIVE)
+ return -ENOENT;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
@@ -2724,7 +2950,98 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb,
return -EOPNOTSUPP;
}
-static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
+static int nf_tables_fill_setelem_info(struct sk_buff *skb,
+ const struct nft_ctx *ctx, u32 seq,
+ u32 portid, int event, u16 flags,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nfgenmsg *nfmsg;
+ struct nlmsghdr *nlh;
+ struct nlattr *nest;
+ int err;
+
+ event |= NFNL_SUBSYS_NFTABLES << 8;
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
+ flags);
+ if (nlh == NULL)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = ctx->afi->family;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
+ goto nla_put_failure;
+ if (nla_put_string(skb, NFTA_SET_NAME, set->name))
+ goto nla_put_failure;
+
+ nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS);
+ if (nest == NULL)
+ goto nla_put_failure;
+
+ err = nf_tables_fill_setelem(skb, set, elem);
+ if (err < 0)
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_trim(skb, nlh);
+ return -1;
+}
+
+static int nf_tables_setelem_notify(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem,
+ int event, u16 flags)
+{
+ struct net *net = ctx->net;
+ u32 portid = ctx->portid;
+ struct sk_buff *skb;
+ int err;
+
+ if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ return 0;
+
+ err = -ENOBUFS;
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto err;
+
+ err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags,
+ set, elem);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto err;
+ }
+
+ err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report,
+ GFP_KERNEL);
+err:
+ if (err < 0)
+ nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ return err;
+}
+
+static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
+ int msg_type,
+ struct nft_set *set)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_elem));
+ if (trans == NULL)
+ return NULL;
+
+ nft_trans_elem_set(trans) = set;
+ return trans;
+}
+
+static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
@@ -2732,8 +3049,12 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_elem elem;
struct nft_set_binding *binding;
enum nft_registers dreg;
+ struct nft_trans *trans;
int err;
+ if (set->size && set->nelems == set->size)
+ return -ENFILE;
+
err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
nft_set_elem_policy);
if (err < 0)
@@ -2786,7 +3107,7 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_ctx bind_ctx = {
.afi = ctx->afi,
.table = ctx->table,
- .chain = binding->chain,
+ .chain = (struct nft_chain *)binding->chain,
};
err = nft_validate_data_load(&bind_ctx, dreg,
@@ -2796,12 +3117,20 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
}
}
+ trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
+ if (trans == NULL)
+ goto err3;
+
err = set->ops->insert(set, &elem);
if (err < 0)
- goto err3;
+ goto err4;
+ nft_trans_elem(trans) = elem;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
+err4:
+ kfree(trans);
err3:
if (nla[NFTA_SET_ELEM_DATA] != NULL)
nft_data_uninit(&elem.data, d2.type);
@@ -2815,35 +3144,46 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
+ struct net *net = sock_net(skb->sk);
const struct nlattr *attr;
struct nft_set *set;
struct nft_ctx ctx;
- int rem, err;
+ int rem, err = 0;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+ err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, true);
if (err < 0)
return err;
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
- if (IS_ERR(set))
- return PTR_ERR(set);
+ if (IS_ERR(set)) {
+ if (nla[NFTA_SET_ELEM_LIST_SET_ID]) {
+ set = nf_tables_set_lookup_byid(net,
+ nla[NFTA_SET_ELEM_LIST_SET_ID]);
+ }
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+ }
+
if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
return -EBUSY;
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
err = nft_add_set_elem(&ctx, set, attr);
if (err < 0)
- return err;
+ break;
+
+ set->nelems++;
}
- return 0;
+ return err;
}
-static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set,
+static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
struct nft_data_desc desc;
struct nft_set_elem elem;
+ struct nft_trans *trans;
int err;
err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
@@ -2867,7 +3207,12 @@ static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
goto err2;
- set->ops->remove(set, &elem);
+ trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
+ if (trans == NULL)
+ goto err2;
+
+ nft_trans_elem(trans) = elem;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
nft_data_uninit(&elem.key, NFT_DATA_VALUE);
if (set->flags & NFT_SET_MAP)
@@ -2886,9 +3231,9 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
const struct nlattr *attr;
struct nft_set *set;
struct nft_ctx ctx;
- int rem, err;
+ int rem, err = 0;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+ err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
if (err < 0)
return err;
@@ -2901,14 +3246,16 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
err = nft_del_setelem(&ctx, set, attr);
if (err < 0)
- return err;
+ break;
+
+ set->nelems--;
}
- return 0;
+ return err;
}
static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
[NFT_MSG_NEWTABLE] = {
- .call = nf_tables_newtable,
+ .call_batch = nf_tables_newtable,
.attr_count = NFTA_TABLE_MAX,
.policy = nft_table_policy,
},
@@ -2918,12 +3265,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_table_policy,
},
[NFT_MSG_DELTABLE] = {
- .call = nf_tables_deltable,
+ .call_batch = nf_tables_deltable,
.attr_count = NFTA_TABLE_MAX,
.policy = nft_table_policy,
},
[NFT_MSG_NEWCHAIN] = {
- .call = nf_tables_newchain,
+ .call_batch = nf_tables_newchain,
.attr_count = NFTA_CHAIN_MAX,
.policy = nft_chain_policy,
},
@@ -2933,7 +3280,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_chain_policy,
},
[NFT_MSG_DELCHAIN] = {
- .call = nf_tables_delchain,
+ .call_batch = nf_tables_delchain,
.attr_count = NFTA_CHAIN_MAX,
.policy = nft_chain_policy,
},
@@ -2953,7 +3300,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_rule_policy,
},
[NFT_MSG_NEWSET] = {
- .call = nf_tables_newset,
+ .call_batch = nf_tables_newset,
.attr_count = NFTA_SET_MAX,
.policy = nft_set_policy,
},
@@ -2963,12 +3310,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_set_policy,
},
[NFT_MSG_DELSET] = {
- .call = nf_tables_delset,
+ .call_batch = nf_tables_delset,
.attr_count = NFTA_SET_MAX,
.policy = nft_set_policy,
},
[NFT_MSG_NEWSETELEM] = {
- .call = nf_tables_newsetelem,
+ .call_batch = nf_tables_newsetelem,
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
@@ -2978,12 +3325,282 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_set_elem_list_policy,
},
[NFT_MSG_DELSETELEM] = {
- .call = nf_tables_delsetelem,
+ .call_batch = nf_tables_delsetelem,
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
};
+static void nft_chain_commit_update(struct nft_trans *trans)
+{
+ struct nft_base_chain *basechain;
+
+ if (nft_trans_chain_name(trans)[0])
+ strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans));
+
+ if (!(trans->ctx.chain->flags & NFT_BASE_CHAIN))
+ return;
+
+ basechain = nft_base_chain(trans->ctx.chain);
+ nft_chain_stats_replace(basechain, nft_trans_chain_stats(trans));
+
+ switch (nft_trans_chain_policy(trans)) {
+ case NF_DROP:
+ case NF_ACCEPT:
+ basechain->policy = nft_trans_chain_policy(trans);
+ break;
+ }
+}
+
+/* Schedule objects for release via rcu to make sure no packets are accesing
+ * removed rules.
+ */
+static void nf_tables_commit_release_rcu(struct rcu_head *rt)
+{
+ struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head);
+
+ switch (trans->msg_type) {
+ case NFT_MSG_DELTABLE:
+ nf_tables_table_destroy(&trans->ctx);
+ break;
+ case NFT_MSG_DELCHAIN:
+ nf_tables_chain_destroy(trans->ctx.chain);
+ break;
+ case NFT_MSG_DELRULE:
+ nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+ break;
+ case NFT_MSG_DELSET:
+ nft_set_destroy(nft_trans_set(trans));
+ break;
+ }
+ kfree(trans);
+}
+
+static int nf_tables_commit(struct sk_buff *skb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nft_trans *trans, *next;
+ struct nft_set *set;
+
+ /* Bump generation counter, invalidate any dump in progress */
+ while (++net->nft.base_seq == 0);
+
+ /* A new generation has just started */
+ net->nft.gencursor = gencursor_next(net);
+
+ /* Make sure all packets have left the previous generation before
+ * purging old rules.
+ */
+ synchronize_rcu();
+
+ list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWTABLE:
+ if (nft_trans_table_update(trans)) {
+ if (!nft_trans_table_enable(trans)) {
+ nf_tables_table_disable(trans->ctx.afi,
+ trans->ctx.table);
+ trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
+ }
+ } else {
+ trans->ctx.table->flags &= ~NFT_TABLE_INACTIVE;
+ }
+ nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELTABLE:
+ nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE);
+ break;
+ case NFT_MSG_NEWCHAIN:
+ if (nft_trans_chain_update(trans))
+ nft_chain_commit_update(trans);
+ else
+ trans->ctx.chain->flags &= ~NFT_CHAIN_INACTIVE;
+
+ nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELCHAIN:
+ nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
+ if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) &&
+ trans->ctx.chain->flags & NFT_BASE_CHAIN) {
+ nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops,
+ trans->ctx.afi->nops);
+ }
+ break;
+ case NFT_MSG_NEWRULE:
+ nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
+ nf_tables_rule_notify(&trans->ctx,
+ nft_trans_rule(trans),
+ NFT_MSG_NEWRULE);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELRULE:
+ list_del_rcu(&nft_trans_rule(trans)->list);
+ nf_tables_rule_notify(&trans->ctx,
+ nft_trans_rule(trans),
+ NFT_MSG_DELRULE);
+ break;
+ case NFT_MSG_NEWSET:
+ nft_trans_set(trans)->flags &= ~NFT_SET_INACTIVE;
+ /* This avoids hitting -EBUSY when deleting the table
+ * from the transaction.
+ */
+ if (nft_trans_set(trans)->flags & NFT_SET_ANONYMOUS &&
+ !list_empty(&nft_trans_set(trans)->bindings))
+ trans->ctx.table->use--;
+
+ nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+ NFT_MSG_NEWSET, GFP_KERNEL);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELSET:
+ nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+ NFT_MSG_DELSET, GFP_KERNEL);
+ break;
+ case NFT_MSG_NEWSETELEM:
+ nf_tables_setelem_notify(&trans->ctx,
+ nft_trans_elem_set(trans),
+ &nft_trans_elem(trans),
+ NFT_MSG_NEWSETELEM, 0);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELSETELEM:
+ nf_tables_setelem_notify(&trans->ctx,
+ nft_trans_elem_set(trans),
+ &nft_trans_elem(trans),
+ NFT_MSG_DELSETELEM, 0);
+ set = nft_trans_elem_set(trans);
+ set->ops->get(set, &nft_trans_elem(trans));
+ set->ops->remove(set, &nft_trans_elem(trans));
+ nft_trans_destroy(trans);
+ break;
+ }
+ }
+
+ list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ list_del(&trans->list);
+ trans->ctx.nla = NULL;
+ call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu);
+ }
+
+ return 0;
+}
+
+/* Schedule objects for release via rcu to make sure no packets are accesing
+ * aborted rules.
+ */
+static void nf_tables_abort_release_rcu(struct rcu_head *rt)
+{
+ struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head);
+
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWTABLE:
+ nf_tables_table_destroy(&trans->ctx);
+ break;
+ case NFT_MSG_NEWCHAIN:
+ nf_tables_chain_destroy(trans->ctx.chain);
+ break;
+ case NFT_MSG_NEWRULE:
+ nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+ break;
+ case NFT_MSG_NEWSET:
+ nft_set_destroy(nft_trans_set(trans));
+ break;
+ }
+ kfree(trans);
+}
+
+static int nf_tables_abort(struct sk_buff *skb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nft_trans *trans, *next;
+ struct nft_set *set;
+
+ list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWTABLE:
+ if (nft_trans_table_update(trans)) {
+ if (nft_trans_table_enable(trans)) {
+ nf_tables_table_disable(trans->ctx.afi,
+ trans->ctx.table);
+ trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
+ }
+ nft_trans_destroy(trans);
+ } else {
+ list_del_rcu(&trans->ctx.table->list);
+ }
+ break;
+ case NFT_MSG_DELTABLE:
+ list_add_tail_rcu(&trans->ctx.table->list,
+ &trans->ctx.afi->tables);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_NEWCHAIN:
+ if (nft_trans_chain_update(trans)) {
+ if (nft_trans_chain_stats(trans))
+ free_percpu(nft_trans_chain_stats(trans));
+
+ nft_trans_destroy(trans);
+ } else {
+ trans->ctx.table->use--;
+ list_del_rcu(&trans->ctx.chain->list);
+ if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) &&
+ trans->ctx.chain->flags & NFT_BASE_CHAIN) {
+ nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops,
+ trans->ctx.afi->nops);
+ }
+ }
+ break;
+ case NFT_MSG_DELCHAIN:
+ trans->ctx.table->use++;
+ list_add_tail_rcu(&trans->ctx.chain->list,
+ &trans->ctx.table->chains);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_NEWRULE:
+ trans->ctx.chain->use--;
+ list_del_rcu(&nft_trans_rule(trans)->list);
+ break;
+ case NFT_MSG_DELRULE:
+ trans->ctx.chain->use++;
+ nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_NEWSET:
+ trans->ctx.table->use--;
+ list_del_rcu(&nft_trans_set(trans)->list);
+ break;
+ case NFT_MSG_DELSET:
+ trans->ctx.table->use++;
+ list_add_tail_rcu(&nft_trans_set(trans)->list,
+ &trans->ctx.table->sets);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_NEWSETELEM:
+ nft_trans_elem_set(trans)->nelems--;
+ set = nft_trans_elem_set(trans);
+ set->ops->get(set, &nft_trans_elem(trans));
+ set->ops->remove(set, &nft_trans_elem(trans));
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELSETELEM:
+ nft_trans_elem_set(trans)->nelems++;
+ nft_trans_destroy(trans);
+ break;
+ }
+ }
+
+ list_for_each_entry_safe_reverse(trans, next,
+ &net->nft.commit_list, list) {
+ list_del(&trans->list);
+ trans->ctx.nla = NULL;
+ call_rcu(&trans->rcu_head, nf_tables_abort_release_rcu);
+ }
+
+ return 0;
+}
+
static const struct nfnetlink_subsystem nf_tables_subsys = {
.name = "nf_tables",
.subsys_id = NFNL_SUBSYS_NFTABLES,
@@ -3371,6 +3988,7 @@ static int nf_tables_init_net(struct net *net)
{
INIT_LIST_HEAD(&net->nft.af_info);
INIT_LIST_HEAD(&net->nft.commit_list);
+ net->nft.base_seq = 1;
return 0;
}
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 804105391b9..3b90eb2b2c5 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -66,20 +66,6 @@ struct nft_jumpstack {
int rulenum;
};
-static inline void
-nft_chain_stats(const struct nft_chain *this, const struct nft_pktinfo *pkt,
- struct nft_jumpstack *jumpstack, unsigned int stackptr)
-{
- struct nft_stats __percpu *stats;
- const struct nft_chain *chain = stackptr ? jumpstack[0].chain : this;
-
- rcu_read_lock_bh();
- stats = rcu_dereference(nft_base_chain(chain)->stats);
- __this_cpu_inc(stats->pkts);
- __this_cpu_add(stats->bytes, pkt->skb->len);
- rcu_read_unlock_bh();
-}
-
enum nft_trace {
NFT_TRACE_RULE,
NFT_TRACE_RETURN,
@@ -117,13 +103,14 @@ static void nft_trace_packet(const struct nft_pktinfo *pkt,
unsigned int
nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
{
- const struct nft_chain *chain = ops->priv;
+ const struct nft_chain *chain = ops->priv, *basechain = chain;
const struct nft_rule *rule;
const struct nft_expr *expr, *last;
struct nft_data data[NFT_REG_MAX + 1];
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
- int rulenum = 0;
+ struct nft_stats *stats;
+ int rulenum;
/*
* Cache cursor to avoid problems in case that the cursor is updated
* while traversing the ruleset.
@@ -131,6 +118,7 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor);
do_chain:
+ rulenum = 0;
rule = list_entry(&chain->rules, struct nft_rule, list);
next_rule:
data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
@@ -156,8 +144,10 @@ next_rule:
switch (data[NFT_REG_VERDICT].verdict) {
case NFT_BREAK:
data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
- /* fall through */
+ continue;
case NFT_CONTINUE:
+ if (unlikely(pkt->skb->nf_trace))
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
continue;
}
break;
@@ -183,37 +173,46 @@ next_rule:
jumpstack[stackptr].rule = rule;
jumpstack[stackptr].rulenum = rulenum;
stackptr++;
- /* fall through */
+ chain = data[NFT_REG_VERDICT].chain;
+ goto do_chain;
case NFT_GOTO:
+ if (unlikely(pkt->skb->nf_trace))
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+
chain = data[NFT_REG_VERDICT].chain;
goto do_chain;
case NFT_RETURN:
if (unlikely(pkt->skb->nf_trace))
nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
-
- /* fall through */
+ break;
case NFT_CONTINUE:
+ if (unlikely(pkt->skb->nf_trace && !(chain->flags & NFT_BASE_CHAIN)))
+ nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN);
break;
default:
WARN_ON(1);
}
if (stackptr > 0) {
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN);
-
stackptr--;
chain = jumpstack[stackptr].chain;
rule = jumpstack[stackptr].rule;
rulenum = jumpstack[stackptr].rulenum;
goto next_rule;
}
- nft_chain_stats(chain, pkt, jumpstack, stackptr);
if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_POLICY);
+ nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY);
+
+ rcu_read_lock_bh();
+ stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
+ u64_stats_update_begin(&stats->syncp);
+ stats->pkts++;
+ stats->bytes += pkt->skb->len;
+ u64_stats_update_end(&stats->syncp);
+ rcu_read_unlock_bh();
- return nft_base_chain(chain)->policy;
+ return nft_base_chain(basechain)->policy;
}
EXPORT_SYMBOL_GPL(nft_do_chain);
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index c7b6d466a66..2baa125c2e8 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -32,18 +32,24 @@ static LIST_HEAD(nfnl_acct_list);
struct nf_acct {
atomic64_t pkts;
atomic64_t bytes;
+ unsigned long flags;
struct list_head head;
atomic_t refcnt;
char name[NFACCT_NAME_MAX];
struct rcu_head rcu_head;
+ char data[0];
};
+#define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES)
+
static int
nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
const struct nlmsghdr *nlh, const struct nlattr * const tb[])
{
struct nf_acct *nfacct, *matching = NULL;
char *acct_name;
+ unsigned int size = 0;
+ u32 flags = 0;
if (!tb[NFACCT_NAME])
return -EINVAL;
@@ -68,15 +74,38 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
/* reset counters if you request a replacement. */
atomic64_set(&matching->pkts, 0);
atomic64_set(&matching->bytes, 0);
+ smp_mb__before_atomic();
+ /* reset overquota flag if quota is enabled. */
+ if ((matching->flags & NFACCT_F_QUOTA))
+ clear_bit(NFACCT_F_OVERQUOTA, &matching->flags);
return 0;
}
return -EBUSY;
}
- nfacct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL);
+ if (tb[NFACCT_FLAGS]) {
+ flags = ntohl(nla_get_be32(tb[NFACCT_FLAGS]));
+ if (flags & ~NFACCT_F_QUOTA)
+ return -EOPNOTSUPP;
+ if ((flags & NFACCT_F_QUOTA) == NFACCT_F_QUOTA)
+ return -EINVAL;
+ if (flags & NFACCT_F_OVERQUOTA)
+ return -EINVAL;
+
+ size += sizeof(u64);
+ }
+
+ nfacct = kzalloc(sizeof(struct nf_acct) + size, GFP_KERNEL);
if (nfacct == NULL)
return -ENOMEM;
+ if (flags & NFACCT_F_QUOTA) {
+ u64 *quota = (u64 *)nfacct->data;
+
+ *quota = be64_to_cpu(nla_get_be64(tb[NFACCT_QUOTA]));
+ nfacct->flags = flags;
+ }
+
strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
if (tb[NFACCT_BYTES]) {
@@ -117,6 +146,9 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
if (type == NFNL_MSG_ACCT_GET_CTRZERO) {
pkts = atomic64_xchg(&acct->pkts, 0);
bytes = atomic64_xchg(&acct->bytes, 0);
+ smp_mb__before_atomic();
+ if (acct->flags & NFACCT_F_QUOTA)
+ clear_bit(NFACCT_F_OVERQUOTA, &acct->flags);
} else {
pkts = atomic64_read(&acct->pkts);
bytes = atomic64_read(&acct->bytes);
@@ -125,7 +157,13 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) ||
nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))))
goto nla_put_failure;
+ if (acct->flags & NFACCT_F_QUOTA) {
+ u64 *quota = (u64 *)acct->data;
+ if (nla_put_be32(skb, NFACCT_FLAGS, htonl(acct->flags)) ||
+ nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota)))
+ goto nla_put_failure;
+ }
nlmsg_end(skb, nlh);
return skb->len;
@@ -270,6 +308,8 @@ static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
[NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 },
[NFACCT_BYTES] = { .type = NLA_U64 },
[NFACCT_PKTS] = { .type = NLA_U64 },
+ [NFACCT_FLAGS] = { .type = NLA_U32 },
+ [NFACCT_QUOTA] = { .type = NLA_U64 },
};
static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
@@ -336,6 +376,50 @@ void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
}
EXPORT_SYMBOL_GPL(nfnl_acct_update);
+static void nfnl_overquota_report(struct nf_acct *nfacct)
+{
+ int ret;
+ struct sk_buff *skb;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (skb == NULL)
+ return;
+
+ ret = nfnl_acct_fill_info(skb, 0, 0, NFNL_MSG_ACCT_OVERQUOTA, 0,
+ nfacct);
+ if (ret <= 0) {
+ kfree_skb(skb);
+ return;
+ }
+ netlink_broadcast(init_net.nfnl, skb, 0, NFNLGRP_ACCT_QUOTA,
+ GFP_ATOMIC);
+}
+
+int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct)
+{
+ u64 now;
+ u64 *quota;
+ int ret = NFACCT_UNDERQUOTA;
+
+ /* no place here if we don't have a quota */
+ if (!(nfacct->flags & NFACCT_F_QUOTA))
+ return NFACCT_NO_QUOTA;
+
+ quota = (u64 *)nfacct->data;
+ now = (nfacct->flags & NFACCT_F_QUOTA_PKTS) ?
+ atomic64_read(&nfacct->pkts) : atomic64_read(&nfacct->bytes);
+
+ ret = now > *quota;
+
+ if (now >= *quota &&
+ !test_and_set_bit(NFACCT_F_OVERQUOTA, &nfacct->flags)) {
+ nfnl_overquota_report(nfacct);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_overquota);
+
static int __init nfnl_acct_init(void)
{
int ret;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 8a779be832f..1840989092e 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -195,6 +195,15 @@ static void
nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
struct xt_target *target = expr->ops->data;
+ void *info = nft_expr_priv(expr);
+ struct xt_tgdtor_param par;
+
+ par.net = ctx->net;
+ par.target = target;
+ par.targinfo = info;
+ par.family = ctx->afi->family;
+ if (par.target->destroy != NULL)
+ par.target->destroy(&par);
module_put(target->me);
}
@@ -382,6 +391,15 @@ static void
nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
struct xt_match *match = expr->ops->data;
+ void *info = nft_expr_priv(expr);
+ struct xt_mtdtor_param par;
+
+ par.net = ctx->net;
+ par.match = match;
+ par.matchinfo = info;
+ par.family = ctx->afi->family;
+ if (par.match->destroy != NULL)
+ par.match->destroy(&par);
module_put(match->me);
}
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index bd0d41e6934..cc560301624 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -215,22 +215,14 @@ static void nft_ct_l3proto_module_put(uint8_t family)
nf_ct_l3proto_module_put(family);
}
-static int nft_ct_init_validate_get(const struct nft_expr *expr,
- const struct nlattr * const tb[])
+static int nft_ct_get_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_ct *priv = nft_expr_priv(expr);
+ int err;
- if (tb[NFTA_CT_DIRECTION] != NULL) {
- priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
- switch (priv->dir) {
- case IP_CT_DIR_ORIGINAL:
- case IP_CT_DIR_REPLY:
- break;
- default:
- return -EINVAL;
- }
- }
-
+ priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
switch (priv->key) {
case NFT_CT_STATE:
case NFT_CT_DIRECTION:
@@ -262,55 +254,55 @@ static int nft_ct_init_validate_get(const struct nft_expr *expr,
return -EOPNOTSUPP;
}
- return 0;
-}
-
-static int nft_ct_init_validate_set(uint32_t key)
-{
- switch (key) {
- case NFT_CT_MARK:
- break;
- default:
- return -EOPNOTSUPP;
+ if (tb[NFTA_CT_DIRECTION] != NULL) {
+ priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
+ switch (priv->dir) {
+ case IP_CT_DIR_ORIGINAL:
+ case IP_CT_DIR_REPLY:
+ break;
+ default:
+ return -EINVAL;
+ }
}
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+
+ err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ if (err < 0)
+ return err;
+
+ err = nft_ct_l3proto_try_module_get(ctx->afi->family);
+ if (err < 0)
+ return err;
+
return 0;
}
-static int nft_ct_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+static int nft_ct_set_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_ct *priv = nft_expr_priv(expr);
int err;
priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
-
- if (tb[NFTA_CT_DREG]) {
- err = nft_ct_init_validate_get(expr, tb);
- if (err < 0)
- return err;
-
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
-
- err = nft_validate_data_load(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE);
- if (err < 0)
- return err;
- } else {
- err = nft_ct_init_validate_set(priv->key);
- if (err < 0)
- return err;
-
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG]));
- err = nft_validate_input_register(priv->sreg);
- if (err < 0)
- return err;
+ switch (priv->key) {
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ case NFT_CT_MARK:
+ break;
+#endif
+ default:
+ return -EOPNOTSUPP;
}
+ priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG]));
+ err = nft_validate_input_register(priv->sreg);
+ if (err < 0)
+ return err;
+
err = nft_ct_l3proto_try_module_get(ctx->afi->family);
if (err < 0)
return err;
@@ -370,7 +362,7 @@ static const struct nft_expr_ops nft_ct_get_ops = {
.type = &nft_ct_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
.eval = nft_ct_get_eval,
- .init = nft_ct_init,
+ .init = nft_ct_get_init,
.destroy = nft_ct_destroy,
.dump = nft_ct_get_dump,
};
@@ -379,7 +371,7 @@ static const struct nft_expr_ops nft_ct_set_ops = {
.type = &nft_ct_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
.eval = nft_ct_set_eval,
- .init = nft_ct_init,
+ .init = nft_ct_set_init,
.destroy = nft_ct_destroy,
.dump = nft_ct_set_dump,
};
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 3b1ad876d6b..4080ed6a072 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -12,6 +12,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/list.h>
+#include <linux/log2.h>
#include <linux/jhash.h>
#include <linux/netlink.h>
#include <linux/vmalloc.h>
@@ -19,7 +20,7 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
-#define NFT_HASH_MIN_SIZE 4
+#define NFT_HASH_MIN_SIZE 4UL
struct nft_hash {
struct nft_hash_table __rcu *tbl;
@@ -27,7 +28,6 @@ struct nft_hash {
struct nft_hash_table {
unsigned int size;
- unsigned int elements;
struct nft_hash_elem __rcu *buckets[];
};
@@ -76,10 +76,12 @@ static bool nft_hash_lookup(const struct nft_set *set,
static void nft_hash_tbl_free(const struct nft_hash_table *tbl)
{
- if (is_vmalloc_addr(tbl))
- vfree(tbl);
- else
- kfree(tbl);
+ kvfree(tbl);
+}
+
+static unsigned int nft_hash_tbl_size(unsigned int nelem)
+{
+ return max(roundup_pow_of_two(nelem * 4 / 3), NFT_HASH_MIN_SIZE);
}
static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets)
@@ -161,7 +163,6 @@ static int nft_hash_tbl_expand(const struct nft_set *set, struct nft_hash *priv)
break;
}
}
- ntbl->elements = tbl->elements;
/* Publish new table */
rcu_assign_pointer(priv->tbl, ntbl);
@@ -201,7 +202,6 @@ static int nft_hash_tbl_shrink(const struct nft_set *set, struct nft_hash *priv)
;
RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]);
}
- ntbl->elements = tbl->elements;
/* Publish new table */
rcu_assign_pointer(priv->tbl, ntbl);
@@ -237,10 +237,9 @@ static int nft_hash_insert(const struct nft_set *set,
h = nft_hash_data(&he->key, tbl->size, set->klen);
RCU_INIT_POINTER(he->next, tbl->buckets[h]);
rcu_assign_pointer(tbl->buckets[h], he);
- tbl->elements++;
/* Expand table when exceeding 75% load */
- if (tbl->elements > tbl->size / 4 * 3)
+ if (set->nelems + 1 > tbl->size / 4 * 3)
nft_hash_tbl_expand(set, priv);
return 0;
@@ -268,10 +267,9 @@ static void nft_hash_remove(const struct nft_set *set,
RCU_INIT_POINTER(*pprev, he->next);
synchronize_rcu();
kfree(he);
- tbl->elements--;
/* Shrink table beneath 30% load */
- if (tbl->elements < tbl->size * 3 / 10 &&
+ if (set->nelems - 1 < tbl->size * 3 / 10 &&
tbl->size > NFT_HASH_MIN_SIZE)
nft_hash_tbl_shrink(set, priv);
}
@@ -335,17 +333,23 @@ static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
}
static int nft_hash_init(const struct nft_set *set,
+ const struct nft_set_desc *desc,
const struct nlattr * const tb[])
{
struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_table *tbl;
+ unsigned int size;
if (unlikely(!nft_hash_rnd_initted)) {
get_random_bytes(&nft_hash_rnd, 4);
nft_hash_rnd_initted = true;
}
- tbl = nft_hash_tbl_alloc(NFT_HASH_MIN_SIZE);
+ size = NFT_HASH_MIN_SIZE;
+ if (desc->size)
+ size = nft_hash_tbl_size(desc->size);
+
+ tbl = nft_hash_tbl_alloc(size);
if (tbl == NULL)
return -ENOMEM;
RCU_INIT_POINTER(priv->tbl, tbl);
@@ -369,8 +373,37 @@ static void nft_hash_destroy(const struct nft_set *set)
kfree(tbl);
}
+static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
+ struct nft_set_estimate *est)
+{
+ unsigned int esize;
+
+ esize = sizeof(struct nft_hash_elem);
+ if (features & NFT_SET_MAP)
+ esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]);
+
+ if (desc->size) {
+ est->size = sizeof(struct nft_hash) +
+ nft_hash_tbl_size(desc->size) *
+ sizeof(struct nft_hash_elem *) +
+ desc->size * esize;
+ } else {
+ /* Resizing happens when the load drops below 30% or goes
+ * above 75%. The average of 52.5% load (approximated by 50%)
+ * is used for the size estimation of the hash buckets,
+ * meaning we calculate two buckets per element.
+ */
+ est->size = esize + 2 * sizeof(struct nft_hash_elem *);
+ }
+
+ est->class = NFT_SET_CLASS_O_1;
+
+ return true;
+}
+
static struct nft_set_ops nft_hash_ops __read_mostly = {
.privsize = nft_hash_privsize,
+ .estimate = nft_hash_estimate,
.init = nft_hash_init,
.destroy = nft_hash_destroy,
.get = nft_hash_get,
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 7fd2bea8aa2..6404a726d17 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -56,8 +56,14 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
return -EINVAL;
set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]);
- if (IS_ERR(set))
- return PTR_ERR(set);
+ if (IS_ERR(set)) {
+ if (tb[NFTA_LOOKUP_SET_ID]) {
+ set = nf_tables_set_lookup_byid(ctx->net,
+ tb[NFTA_LOOKUP_SET_ID]);
+ }
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+ }
priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG]));
err = nft_validate_input_register(priv->sreg);
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 425cf39af89..852b178c6ae 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -18,18 +18,11 @@
#include <net/sock.h>
#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_meta.h>
-struct nft_meta {
- enum nft_meta_keys key:8;
- union {
- enum nft_registers dreg:8;
- enum nft_registers sreg:8;
- };
-};
-
-static void nft_meta_get_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
- const struct nft_pktinfo *pkt)
+void nft_meta_get_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
{
const struct nft_meta *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
@@ -140,10 +133,11 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
err:
data[NFT_REG_VERDICT].verdict = NFT_BREAK;
}
+EXPORT_SYMBOL_GPL(nft_meta_get_eval);
-static void nft_meta_set_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
- const struct nft_pktinfo *pkt)
+void nft_meta_set_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
{
const struct nft_meta *meta = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
@@ -163,28 +157,24 @@ static void nft_meta_set_eval(const struct nft_expr *expr,
WARN_ON(1);
}
}
+EXPORT_SYMBOL_GPL(nft_meta_set_eval);
-static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
[NFTA_META_DREG] = { .type = NLA_U32 },
[NFTA_META_KEY] = { .type = NLA_U32 },
[NFTA_META_SREG] = { .type = NLA_U32 },
};
+EXPORT_SYMBOL_GPL(nft_meta_policy);
-static int nft_meta_init_validate_set(uint32_t key)
+int nft_meta_get_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
- switch (key) {
- case NFT_META_MARK:
- case NFT_META_PRIORITY:
- case NFT_META_NFTRACE:
- return 0;
- default:
- return -EOPNOTSUPP;
- }
-}
+ struct nft_meta *priv = nft_expr_priv(expr);
+ int err;
-static int nft_meta_init_validate_get(uint32_t key)
-{
- switch (key) {
+ priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+ switch (priv->key) {
case NFT_META_LEN:
case NFT_META_PROTOCOL:
case NFT_META_NFPROTO:
@@ -205,39 +195,41 @@ static int nft_meta_init_validate_get(uint32_t key)
#ifdef CONFIG_NETWORK_SECMARK
case NFT_META_SECMARK:
#endif
- return 0;
+ break;
default:
return -EOPNOTSUPP;
}
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+
+ err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ if (err < 0)
+ return err;
+
+ return 0;
}
+EXPORT_SYMBOL_GPL(nft_meta_get_init);
-static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
- const struct nlattr * const tb[])
+int nft_meta_set_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_meta *priv = nft_expr_priv(expr);
int err;
priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
-
- if (tb[NFTA_META_DREG]) {
- err = nft_meta_init_validate_get(priv->key);
- if (err < 0)
- return err;
-
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
-
- return nft_validate_data_load(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE);
+ switch (priv->key) {
+ case NFT_META_MARK:
+ case NFT_META_PRIORITY:
+ case NFT_META_NFTRACE:
+ break;
+ default:
+ return -EOPNOTSUPP;
}
- err = nft_meta_init_validate_set(priv->key);
- if (err < 0)
- return err;
-
priv->sreg = ntohl(nla_get_be32(tb[NFTA_META_SREG]));
err = nft_validate_input_register(priv->sreg);
if (err < 0)
@@ -245,9 +237,10 @@ static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return 0;
}
+EXPORT_SYMBOL_GPL(nft_meta_set_init);
-static int nft_meta_get_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
+int nft_meta_get_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
@@ -260,9 +253,10 @@ static int nft_meta_get_dump(struct sk_buff *skb,
nla_put_failure:
return -1;
}
+EXPORT_SYMBOL_GPL(nft_meta_get_dump);
-static int nft_meta_set_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
+int nft_meta_set_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
@@ -276,13 +270,14 @@ static int nft_meta_set_dump(struct sk_buff *skb,
nla_put_failure:
return -1;
}
+EXPORT_SYMBOL_GPL(nft_meta_set_dump);
static struct nft_expr_type nft_meta_type;
static const struct nft_expr_ops nft_meta_get_ops = {
.type = &nft_meta_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_get_eval,
- .init = nft_meta_init,
+ .init = nft_meta_get_init,
.dump = nft_meta_get_dump,
};
@@ -290,7 +285,7 @@ static const struct nft_expr_ops nft_meta_set_ops = {
.type = &nft_meta_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_set_eval,
- .init = nft_meta_init,
+ .init = nft_meta_set_init,
.dump = nft_meta_set_dump,
};
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index a0195d28bcf..79ff58cd36d 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -175,12 +175,14 @@ static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr)
if (nla_put_be32(skb,
NFTA_NAT_REG_ADDR_MAX, htonl(priv->sreg_addr_max)))
goto nla_put_failure;
- if (nla_put_be32(skb,
- NFTA_NAT_REG_PROTO_MIN, htonl(priv->sreg_proto_min)))
- goto nla_put_failure;
- if (nla_put_be32(skb,
- NFTA_NAT_REG_PROTO_MAX, htonl(priv->sreg_proto_max)))
- goto nla_put_failure;
+ if (priv->sreg_proto_min) {
+ if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MIN,
+ htonl(priv->sreg_proto_min)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX,
+ htonl(priv->sreg_proto_max)))
+ goto nla_put_failure;
+ }
return 0;
nla_put_failure:
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index e21d69d1350..e1836ff8819 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -18,6 +18,8 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
+static DEFINE_SPINLOCK(nft_rbtree_lock);
+
struct nft_rbtree {
struct rb_root root;
};
@@ -38,6 +40,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set,
const struct rb_node *parent = priv->root.rb_node;
int d;
+ spin_lock_bh(&nft_rbtree_lock);
while (parent != NULL) {
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
@@ -53,6 +56,8 @@ found:
goto out;
if (set->flags & NFT_SET_MAP)
nft_data_copy(data, rbe->data);
+
+ spin_unlock_bh(&nft_rbtree_lock);
return true;
}
}
@@ -62,6 +67,7 @@ found:
goto found;
}
out:
+ spin_unlock_bh(&nft_rbtree_lock);
return false;
}
@@ -124,9 +130,12 @@ static int nft_rbtree_insert(const struct nft_set *set,
!(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
nft_data_copy(rbe->data, &elem->data);
+ spin_lock_bh(&nft_rbtree_lock);
err = __nft_rbtree_insert(set, rbe);
if (err < 0)
kfree(rbe);
+
+ spin_unlock_bh(&nft_rbtree_lock);
return err;
}
@@ -136,7 +145,9 @@ static void nft_rbtree_remove(const struct nft_set *set,
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe = elem->cookie;
+ spin_lock_bh(&nft_rbtree_lock);
rb_erase(&rbe->node, &priv->root);
+ spin_unlock_bh(&nft_rbtree_lock);
kfree(rbe);
}
@@ -147,6 +158,7 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
struct nft_rbtree_elem *rbe;
int d;
+ spin_lock_bh(&nft_rbtree_lock);
while (parent != NULL) {
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
@@ -161,9 +173,11 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
!(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
nft_data_copy(&elem->data, rbe->data);
elem->flags = rbe->flags;
+ spin_unlock_bh(&nft_rbtree_lock);
return 0;
}
}
+ spin_unlock_bh(&nft_rbtree_lock);
return -ENOENT;
}
@@ -176,6 +190,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
struct nft_set_elem elem;
struct rb_node *node;
+ spin_lock_bh(&nft_rbtree_lock);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
if (iter->count < iter->skip)
goto cont;
@@ -188,11 +203,14 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
elem.flags = rbe->flags;
iter->err = iter->fn(ctx, set, iter, &elem);
- if (iter->err < 0)
+ if (iter->err < 0) {
+ spin_unlock_bh(&nft_rbtree_lock);
return;
+ }
cont:
iter->count++;
}
+ spin_unlock_bh(&nft_rbtree_lock);
}
static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
@@ -201,6 +219,7 @@ static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
}
static int nft_rbtree_init(const struct nft_set *set,
+ const struct nft_set_desc *desc,
const struct nlattr * const nla[])
{
struct nft_rbtree *priv = nft_set_priv(set);
@@ -215,15 +234,37 @@ static void nft_rbtree_destroy(const struct nft_set *set)
struct nft_rbtree_elem *rbe;
struct rb_node *node;
+ spin_lock_bh(&nft_rbtree_lock);
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
nft_rbtree_elem_destroy(set, rbe);
}
+ spin_unlock_bh(&nft_rbtree_lock);
+}
+
+static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
+ struct nft_set_estimate *est)
+{
+ unsigned int nsize;
+
+ nsize = sizeof(struct nft_rbtree_elem);
+ if (features & NFT_SET_MAP)
+ nsize += FIELD_SIZEOF(struct nft_rbtree_elem, data[0]);
+
+ if (desc->size)
+ est->size = sizeof(struct nft_rbtree) + desc->size * nsize;
+ else
+ est->size = nsize;
+
+ est->class = NFT_SET_CLASS_O_LOG_N;
+
+ return true;
}
static struct nft_set_ops nft_rbtree_ops __read_mostly = {
.privsize = nft_rbtree_privsize,
+ .estimate = nft_rbtree_estimate,
.init = nft_rbtree_init,
.destroy = nft_rbtree_destroy,
.insert = nft_rbtree_insert,
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 12d4da8e6c7..bbffdbdaf60 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -23,10 +23,11 @@ MODULE_ALIAS("ip6t_bpf");
static int bpf_mt_check(const struct xt_mtchk_param *par)
{
struct xt_bpf_info *info = par->matchinfo;
- struct sock_fprog program;
+ struct sock_fprog_kern program;
program.len = info->bpf_program_num_elem;
- program.filter = (struct sock_filter __user *) info->bpf_program;
+ program.filter = info->bpf_program;
+
if (sk_unattached_filter_create(&info->filter, &program)) {
pr_info("bpf: check failed: parse error\n");
return -EINVAL;
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index b3be0ef21f1..8c646ed9c92 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -21,11 +21,14 @@ MODULE_ALIAS("ip6t_nfacct");
static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
+ int overquota;
const struct xt_nfacct_match_info *info = par->targinfo;
nfnl_acct_update(skb, info->nfacct);
- return true;
+ overquota = nfnl_acct_overquota(skb, info->nfacct);
+
+ return overquota == NFACCT_UNDERQUOTA ? false : true;
}
static int
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 1e657cf715c..a9faae89f95 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -313,10 +313,7 @@ out:
static void recent_table_free(void *addr)
{
- if (is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
+ kvfree(addr);
}
static int recent_mt_check(const struct xt_mtchk_param *par,
diff --git a/net/netfilter/xt_repldata.h b/net/netfilter/xt_repldata.h
index 6efe4e5a81c..8fd324116e6 100644
--- a/net/netfilter/xt_repldata.h
+++ b/net/netfilter/xt_repldata.h
@@ -5,23 +5,35 @@
* they serve as the hanging-off data accessed through repl.data[].
*/
+/* tbl has the following structure equivalent, but is C99 compliant:
+ * struct {
+ * struct type##_replace repl;
+ * struct type##_standard entries[nhooks];
+ * struct type##_error term;
+ * } *tbl;
+ */
+
#define xt_alloc_initial_table(type, typ2) ({ \
unsigned int hook_mask = info->valid_hooks; \
unsigned int nhooks = hweight32(hook_mask); \
unsigned int bytes = 0, hooknum = 0, i = 0; \
struct { \
struct type##_replace repl; \
- struct type##_standard entries[nhooks]; \
- struct type##_error term; \
- } *tbl = kzalloc(sizeof(*tbl), GFP_KERNEL); \
+ struct type##_standard entries[]; \
+ } *tbl; \
+ struct type##_error *term; \
+ size_t term_offset = (offsetof(typeof(*tbl), entries[nhooks]) + \
+ __alignof__(*term) - 1) & ~(__alignof__(*term) - 1); \
+ tbl = kzalloc(term_offset + sizeof(*term), GFP_KERNEL); \
if (tbl == NULL) \
return NULL; \
+ term = (struct type##_error *)&(((char *)tbl)[term_offset]); \
strncpy(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \
- tbl->term = (struct type##_error)typ2##_ERROR_INIT; \
+ *term = (struct type##_error)typ2##_ERROR_INIT; \
tbl->repl.valid_hooks = hook_mask; \
tbl->repl.num_entries = nhooks + 1; \
tbl->repl.size = nhooks * sizeof(struct type##_standard) + \
- sizeof(struct type##_error); \
+ sizeof(struct type##_error); \
for (; hook_mask != 0; hook_mask >>= 1, ++hooknum) { \
if (!(hook_mask & 1)) \
continue; \
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e0ccd84d4d6..e6fac7e3db5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -636,7 +636,7 @@ static unsigned int netlink_poll(struct file *file, struct socket *sock,
while (nlk->cb_running && netlink_dump_space(nlk)) {
err = netlink_dump(sk);
if (err < 0) {
- sk->sk_err = err;
+ sk->sk_err = -err;
sk->sk_error_report(sk);
break;
}
@@ -1377,7 +1377,9 @@ retry:
bool __netlink_ns_capable(const struct netlink_skb_parms *nsp,
struct user_namespace *user_ns, int cap)
{
- return sk_ns_capable(nsp->sk, user_ns, cap);
+ return ((nsp->flags & NETLINK_SKB_DST) ||
+ file_ns_capable(nsp->sk->sk_socket->file, user_ns, cap)) &&
+ ns_capable(user_ns, cap);
}
EXPORT_SYMBOL(__netlink_ns_capable);
@@ -2323,6 +2325,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
struct sk_buff *skb;
int err;
struct scm_cookie scm;
+ u32 netlink_skb_flags = 0;
if (msg->msg_flags&MSG_OOB)
return -EOPNOTSUPP;
@@ -2344,6 +2347,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
if ((dst_group || dst_portid) &&
!netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
goto out;
+ netlink_skb_flags |= NETLINK_SKB_DST;
} else {
dst_portid = nlk->dst_portid;
dst_group = nlk->dst_group;
@@ -2373,6 +2377,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
NETLINK_CB(skb).portid = nlk->portid;
NETLINK_CB(skb).dst_group = dst_group;
NETLINK_CB(skb).creds = siocb->scm->creds;
+ NETLINK_CB(skb).flags = netlink_skb_flags;
err = -EFAULT;
if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
@@ -2478,7 +2483,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
ret = netlink_dump(sk);
if (ret) {
- sk->sk_err = ret;
+ sk->sk_err = -ret;
sk->sk_error_report(sk);
}
}
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index a3ba3ca0ff9..76393f2f4b2 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -317,7 +317,7 @@ static void genl_unregister_mc_groups(struct genl_family *family)
}
}
-static int genl_validate_ops(struct genl_family *family)
+static int genl_validate_ops(const struct genl_family *family)
{
const struct genl_ops *ops = family->ops;
unsigned int n_ops = family->n_ops;
@@ -337,10 +337,6 @@ static int genl_validate_ops(struct genl_family *family)
return -EINVAL;
}
- /* family is not registered yet, so no locking needed */
- family->ops = ops;
- family->n_ops = n_ops;
-
return 0;
}
diff --git a/net/nfc/digital.h b/net/nfc/digital.h
index 3759add68b1..71ad7eefddd 100644
--- a/net/nfc/digital.h
+++ b/net/nfc/digital.h
@@ -71,6 +71,7 @@ static inline int digital_in_send_cmd(struct nfc_digital_dev *ddev,
void digital_poll_next_tech(struct nfc_digital_dev *ddev);
int digital_in_send_sens_req(struct nfc_digital_dev *ddev, u8 rf_tech);
+int digital_in_send_sensb_req(struct nfc_digital_dev *ddev, u8 rf_tech);
int digital_in_send_sensf_req(struct nfc_digital_dev *ddev, u8 rf_tech);
int digital_in_send_iso15693_inv_req(struct nfc_digital_dev *ddev, u8 rf_tech);
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index e01e15dbf1a..a6ce3c627e4 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -22,6 +22,8 @@
#define DIGITAL_PROTO_NFCA_RF_TECH \
(NFC_PROTO_JEWEL_MASK | NFC_PROTO_MIFARE_MASK | NFC_PROTO_NFC_DEP_MASK)
+#define DIGITAL_PROTO_NFCB_RF_TECH NFC_PROTO_ISO14443_B_MASK
+
#define DIGITAL_PROTO_NFCF_RF_TECH \
(NFC_PROTO_FELICA_MASK | NFC_PROTO_NFC_DEP_MASK)
@@ -345,6 +347,12 @@ int digital_target_found(struct nfc_digital_dev *ddev,
add_crc = digital_skb_add_crc_a;
break;
+ case NFC_PROTO_ISO14443_B:
+ framing = NFC_DIGITAL_FRAMING_NFCB_T4T;
+ check_crc = digital_skb_check_crc_b;
+ add_crc = digital_skb_add_crc_b;
+ break;
+
default:
pr_err("Invalid protocol %d\n", protocol);
return -EINVAL;
@@ -378,6 +386,8 @@ int digital_target_found(struct nfc_digital_dev *ddev,
void digital_poll_next_tech(struct nfc_digital_dev *ddev)
{
+ u8 rand_mod;
+
digital_switch_rf(ddev, 0);
mutex_lock(&ddev->poll_lock);
@@ -387,8 +397,8 @@ void digital_poll_next_tech(struct nfc_digital_dev *ddev)
return;
}
- ddev->poll_tech_index = (ddev->poll_tech_index + 1) %
- ddev->poll_tech_count;
+ get_random_bytes(&rand_mod, sizeof(rand_mod));
+ ddev->poll_tech_index = rand_mod % ddev->poll_tech_count;
mutex_unlock(&ddev->poll_lock);
@@ -475,6 +485,10 @@ static int digital_start_poll(struct nfc_dev *nfc_dev, __u32 im_protocols,
digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106A,
digital_in_send_sens_req);
+ if (matching_im_protocols & DIGITAL_PROTO_NFCB_RF_TECH)
+ digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106B,
+ digital_in_send_sensb_req);
+
if (matching_im_protocols & DIGITAL_PROTO_NFCF_RF_TECH) {
digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_212F,
digital_in_send_sensf_req);
@@ -635,7 +649,8 @@ static void digital_in_send_complete(struct nfc_digital_dev *ddev, void *arg,
goto done;
}
- if (ddev->curr_protocol == NFC_PROTO_ISO14443) {
+ if ((ddev->curr_protocol == NFC_PROTO_ISO14443) ||
+ (ddev->curr_protocol == NFC_PROTO_ISO14443_B)) {
rc = digital_in_iso_dep_pull_sod(ddev, resp);
if (rc)
goto done;
@@ -676,7 +691,8 @@ static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target,
goto exit;
}
- if (ddev->curr_protocol == NFC_PROTO_ISO14443) {
+ if ((ddev->curr_protocol == NFC_PROTO_ISO14443) ||
+ (ddev->curr_protocol == NFC_PROTO_ISO14443_B)) {
rc = digital_in_iso_dep_push_sod(ddev, skb);
if (rc)
goto exit;
@@ -747,6 +763,8 @@ struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops,
ddev->protocols |= NFC_PROTO_ISO15693_MASK;
if (supported_protocols & NFC_PROTO_ISO14443_MASK)
ddev->protocols |= NFC_PROTO_ISO14443_MASK;
+ if (supported_protocols & NFC_PROTO_ISO14443_B_MASK)
+ ddev->protocols |= NFC_PROTO_ISO14443_B_MASK;
ddev->tx_headroom = tx_headroom + DIGITAL_MAX_HEADER_LEN;
ddev->tx_tailroom = tx_tailroom + DIGITAL_CRC_LEN;
diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c
index d4ed25ff723..171cb9949ab 100644
--- a/net/nfc/digital_dep.c
+++ b/net/nfc/digital_dep.c
@@ -224,9 +224,8 @@ int digital_in_send_atr_req(struct nfc_digital_dev *ddev,
ddev->skb_add_crc(skb);
- digital_in_send_cmd(ddev, skb, 500, digital_in_recv_atr_res, target);
-
- return 0;
+ return digital_in_send_cmd(ddev, skb, 500, digital_in_recv_atr_res,
+ target);
}
static int digital_in_send_rtox(struct nfc_digital_dev *ddev,
diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c
index 278c3fed27e..c2c1c0189b7 100644
--- a/net/nfc/digital_technology.c
+++ b/net/nfc/digital_technology.c
@@ -41,6 +41,24 @@
#define DIGITAL_MIFARE_READ_RES_LEN 16
#define DIGITAL_MIFARE_ACK_RES 0x0A
+#define DIGITAL_CMD_SENSB_REQ 0x05
+#define DIGITAL_SENSB_ADVANCED BIT(5)
+#define DIGITAL_SENSB_EXTENDED BIT(4)
+#define DIGITAL_SENSB_ALLB_REQ BIT(3)
+#define DIGITAL_SENSB_N(n) ((n) & 0x7)
+
+#define DIGITAL_CMD_SENSB_RES 0x50
+
+#define DIGITAL_CMD_ATTRIB_REQ 0x1D
+#define DIGITAL_ATTRIB_P1_TR0_DEFAULT (0x0 << 6)
+#define DIGITAL_ATTRIB_P1_TR1_DEFAULT (0x0 << 4)
+#define DIGITAL_ATTRIB_P1_SUPRESS_EOS BIT(3)
+#define DIGITAL_ATTRIB_P1_SUPRESS_SOS BIT(2)
+#define DIGITAL_ATTRIB_P2_LISTEN_POLL_1 (0x0 << 6)
+#define DIGITAL_ATTRIB_P2_POLL_LISTEN_1 (0x0 << 4)
+#define DIGITAL_ATTRIB_P2_MAX_FRAME_256 0x8
+#define DIGITAL_ATTRIB_P4_DID(n) ((n) & 0xf)
+
#define DIGITAL_CMD_SENSF_REQ 0x00
#define DIGITAL_CMD_SENSF_RES 0x01
@@ -75,6 +93,7 @@ static const u8 digital_ats_fsc[] = {
};
#define DIGITAL_ATS_FSCI(t0) ((t0) & 0x0F)
+#define DIGITAL_SENSB_FSCI(pi2) (((pi2) & 0xF0) >> 4)
#define DIGITAL_ATS_MAX_FSC 256
#define DIGITAL_RATS_BYTE1 0xE0
@@ -92,6 +111,32 @@ struct digital_sel_req {
u8 bcc;
} __packed;
+struct digital_sensb_req {
+ u8 cmd;
+ u8 afi;
+ u8 param;
+} __packed;
+
+struct digital_sensb_res {
+ u8 cmd;
+ u8 nfcid0[4];
+ u8 app_data[4];
+ u8 proto_info[3];
+} __packed;
+
+struct digital_attrib_req {
+ u8 cmd;
+ u8 nfcid0[4];
+ u8 param1;
+ u8 param2;
+ u8 param3;
+ u8 param4;
+} __packed;
+
+struct digital_attrib_res {
+ u8 mbli_did;
+} __packed;
+
struct digital_sensf_req {
u8 cmd;
u8 sc1;
@@ -531,6 +576,175 @@ int digital_in_recv_mifare_res(struct sk_buff *resp)
return -EIO;
}
+static void digital_in_recv_attrib_res(struct nfc_digital_dev *ddev, void *arg,
+ struct sk_buff *resp)
+{
+ struct nfc_target *target = arg;
+ struct digital_attrib_res *attrib_res;
+ int rc;
+
+ if (IS_ERR(resp)) {
+ rc = PTR_ERR(resp);
+ resp = NULL;
+ goto exit;
+ }
+
+ if (resp->len < sizeof(*attrib_res)) {
+ PROTOCOL_ERR("12.6.2");
+ rc = -EIO;
+ goto exit;
+ }
+
+ attrib_res = (struct digital_attrib_res *)resp->data;
+
+ if (attrib_res->mbli_did & 0x0f) {
+ PROTOCOL_ERR("12.6.2.1");
+ rc = -EIO;
+ goto exit;
+ }
+
+ rc = digital_target_found(ddev, target, NFC_PROTO_ISO14443_B);
+
+exit:
+ dev_kfree_skb(resp);
+ kfree(target);
+
+ if (rc)
+ digital_poll_next_tech(ddev);
+}
+
+static int digital_in_send_attrib_req(struct nfc_digital_dev *ddev,
+ struct nfc_target *target,
+ struct digital_sensb_res *sensb_res)
+{
+ struct digital_attrib_req *attrib_req;
+ struct sk_buff *skb;
+ int rc;
+
+ skb = digital_skb_alloc(ddev, sizeof(*attrib_req));
+ if (!skb)
+ return -ENOMEM;
+
+ attrib_req = (struct digital_attrib_req *)skb_put(skb,
+ sizeof(*attrib_req));
+
+ attrib_req->cmd = DIGITAL_CMD_ATTRIB_REQ;
+ memcpy(attrib_req->nfcid0, sensb_res->nfcid0,
+ sizeof(attrib_req->nfcid0));
+ attrib_req->param1 = DIGITAL_ATTRIB_P1_TR0_DEFAULT |
+ DIGITAL_ATTRIB_P1_TR1_DEFAULT;
+ attrib_req->param2 = DIGITAL_ATTRIB_P2_LISTEN_POLL_1 |
+ DIGITAL_ATTRIB_P2_POLL_LISTEN_1 |
+ DIGITAL_ATTRIB_P2_MAX_FRAME_256;
+ attrib_req->param3 = sensb_res->proto_info[1] & 0x07;
+ attrib_req->param4 = DIGITAL_ATTRIB_P4_DID(0);
+
+ rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_attrib_res,
+ target);
+ if (rc)
+ kfree_skb(skb);
+
+ return rc;
+}
+
+static void digital_in_recv_sensb_res(struct nfc_digital_dev *ddev, void *arg,
+ struct sk_buff *resp)
+{
+ struct nfc_target *target = NULL;
+ struct digital_sensb_res *sensb_res;
+ u8 fsci;
+ int rc;
+
+ if (IS_ERR(resp)) {
+ rc = PTR_ERR(resp);
+ resp = NULL;
+ goto exit;
+ }
+
+ if (resp->len != sizeof(*sensb_res)) {
+ PROTOCOL_ERR("5.6.2.1");
+ rc = -EIO;
+ goto exit;
+ }
+
+ sensb_res = (struct digital_sensb_res *)resp->data;
+
+ if (sensb_res->cmd != DIGITAL_CMD_SENSB_RES) {
+ PROTOCOL_ERR("5.6.2");
+ rc = -EIO;
+ goto exit;
+ }
+
+ if (!(sensb_res->proto_info[1] & BIT(0))) {
+ PROTOCOL_ERR("5.6.2.12");
+ rc = -EIO;
+ goto exit;
+ }
+
+ if (sensb_res->proto_info[1] & BIT(3)) {
+ PROTOCOL_ERR("5.6.2.16");
+ rc = -EIO;
+ goto exit;
+ }
+
+ fsci = DIGITAL_SENSB_FSCI(sensb_res->proto_info[1]);
+ if (fsci >= 8)
+ ddev->target_fsc = DIGITAL_ATS_MAX_FSC;
+ else
+ ddev->target_fsc = digital_ats_fsc[fsci];
+
+ target = kzalloc(sizeof(struct nfc_target), GFP_KERNEL);
+ if (!target) {
+ rc = -ENOMEM;
+ goto exit;
+ }
+
+ rc = digital_in_send_attrib_req(ddev, target, sensb_res);
+
+exit:
+ dev_kfree_skb(resp);
+
+ if (rc) {
+ kfree(target);
+ digital_poll_next_tech(ddev);
+ }
+}
+
+int digital_in_send_sensb_req(struct nfc_digital_dev *ddev, u8 rf_tech)
+{
+ struct digital_sensb_req *sensb_req;
+ struct sk_buff *skb;
+ int rc;
+
+ rc = digital_in_configure_hw(ddev, NFC_DIGITAL_CONFIG_RF_TECH,
+ NFC_DIGITAL_RF_TECH_106B);
+ if (rc)
+ return rc;
+
+ rc = digital_in_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING,
+ NFC_DIGITAL_FRAMING_NFCB);
+ if (rc)
+ return rc;
+
+ skb = digital_skb_alloc(ddev, sizeof(*sensb_req));
+ if (!skb)
+ return -ENOMEM;
+
+ sensb_req = (struct digital_sensb_req *)skb_put(skb,
+ sizeof(*sensb_req));
+
+ sensb_req->cmd = DIGITAL_CMD_SENSB_REQ;
+ sensb_req->afi = 0x00; /* All families and sub-families */
+ sensb_req->param = DIGITAL_SENSB_N(0);
+
+ rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sensb_res,
+ NULL);
+ if (rc)
+ kfree_skb(skb);
+
+ return rc;
+}
+
static void digital_in_recv_sensf_res(struct nfc_digital_dev *ddev, void *arg,
struct sk_buff *resp)
{
@@ -877,6 +1091,18 @@ exit:
dev_kfree_skb(resp);
}
+static void digital_tg_recv_atr_or_sensf_req(struct nfc_digital_dev *ddev,
+ void *arg, struct sk_buff *resp)
+{
+ if (!IS_ERR(resp) && (resp->len >= 2) &&
+ (resp->data[1] == DIGITAL_CMD_SENSF_REQ))
+ digital_tg_recv_sensf_req(ddev, arg, resp);
+ else
+ digital_tg_recv_atr_req(ddev, arg, resp);
+
+ return;
+}
+
static int digital_tg_send_sensf_res(struct nfc_digital_dev *ddev,
struct digital_sensf_req *sensf_req)
{
@@ -887,7 +1113,7 @@ static int digital_tg_send_sensf_res(struct nfc_digital_dev *ddev,
size = sizeof(struct digital_sensf_res);
- if (sensf_req->rc != DIGITAL_SENSF_REQ_RC_NONE)
+ if (sensf_req->rc == DIGITAL_SENSF_REQ_RC_NONE)
size -= sizeof(sensf_res->rd);
skb = digital_skb_alloc(ddev, size);
@@ -922,7 +1148,7 @@ static int digital_tg_send_sensf_res(struct nfc_digital_dev *ddev,
digital_skb_add_crc_f(skb);
rc = digital_tg_send_cmd(ddev, skb, 300,
- digital_tg_recv_atr_req, NULL);
+ digital_tg_recv_atr_or_sensf_req, NULL);
if (rc)
kfree_skb(skb);
diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c
index a9f4d2e62d8..677d24bb70f 100644
--- a/net/nfc/hci/command.c
+++ b/net/nfc/hci/command.c
@@ -26,6 +26,8 @@
#include "hci.h"
+#define MAX_FWI 4949
+
static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
const u8 *param, size_t param_len,
data_exchange_cb_t cb, void *cb_context)
@@ -37,7 +39,7 @@ static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
* for all commands?
*/
return nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_COMMAND, cmd,
- param, param_len, cb, cb_context, 3000);
+ param, param_len, cb, cb_context, MAX_FWI);
}
/*
@@ -82,7 +84,7 @@ static int nfc_hci_execute_cmd(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
NFC_HCI_HCP_COMMAND, cmd,
param, param_len,
nfc_hci_execute_cb, &hcp_ew,
- 3000);
+ MAX_FWI);
if (hcp_ew.exec_result < 0)
return hcp_ew.exec_result;
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index d45b638e77c..47403705197 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -225,7 +225,7 @@ int nfc_hci_target_discovered(struct nfc_hci_dev *hdev, u8 gate)
goto exit;
}
- targets->sens_res = be16_to_cpu(*(u16 *)atqa_skb->data);
+ targets->sens_res = be16_to_cpu(*(__be16 *)atqa_skb->data);
targets->sel_res = sak_skb->data[0];
r = nfc_hci_get_param(hdev, NFC_HCI_RF_READER_A_GATE,
@@ -380,34 +380,31 @@ static int hci_dev_session_init(struct nfc_hci_dev *hdev)
if (r < 0)
goto disconnect_all;
- if (skb->len && skb->len == strlen(hdev->init_data.session_id))
- if (memcmp(hdev->init_data.session_id, skb->data,
- skb->len) == 0) {
- /* TODO ELa: restore gate<->pipe table from
- * some TBD location.
- * note: it doesn't seem possible to get the chip
- * currently open gate/pipe table.
- * It is only possible to obtain the supported
- * gate list.
- */
+ if (skb->len && skb->len == strlen(hdev->init_data.session_id) &&
+ (memcmp(hdev->init_data.session_id, skb->data,
+ skb->len) == 0) && hdev->ops->load_session) {
+ /* Restore gate<->pipe table from some proprietary location. */
- /* goto exit
- * For now, always do a full initialization */
- }
+ r = hdev->ops->load_session(hdev);
- r = nfc_hci_disconnect_all_gates(hdev);
- if (r < 0)
- goto exit;
+ if (r < 0)
+ goto disconnect_all;
+ } else {
- r = hci_dev_connect_gates(hdev, hdev->init_data.gate_count,
- hdev->init_data.gates);
- if (r < 0)
- goto disconnect_all;
+ r = nfc_hci_disconnect_all_gates(hdev);
+ if (r < 0)
+ goto exit;
- r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE,
- NFC_HCI_ADMIN_SESSION_IDENTITY,
- hdev->init_data.session_id,
- strlen(hdev->init_data.session_id));
+ r = hci_dev_connect_gates(hdev, hdev->init_data.gate_count,
+ hdev->init_data.gates);
+ if (r < 0)
+ goto disconnect_all;
+
+ r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE,
+ NFC_HCI_ADMIN_SESSION_IDENTITY,
+ hdev->init_data.session_id,
+ strlen(hdev->init_data.session_id));
+ }
if (r == 0)
goto exit;
diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index bec6ed15f50..a3ad69a4c64 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -387,7 +387,7 @@ int nfc_llcp_send_symm(struct nfc_dev *dev)
__net_timestamp(skb);
- nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_TX);
+ nfc_llcp_send_to_raw_sock(local, skb, NFC_DIRECTION_TX);
return nfc_data_exchange(dev, local->target_idx, skb,
nfc_llcp_recv, local);
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index b4671958fcf..51e78879731 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -680,16 +680,17 @@ void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local,
continue;
if (skb_copy == NULL) {
- skb_copy = __pskb_copy(skb, NFC_LLCP_RAW_HEADER_SIZE,
- GFP_ATOMIC);
+ skb_copy = __pskb_copy_fclone(skb, NFC_RAW_HEADER_SIZE,
+ GFP_ATOMIC, true);
if (skb_copy == NULL)
continue;
- data = skb_push(skb_copy, NFC_LLCP_RAW_HEADER_SIZE);
+ data = skb_push(skb_copy, NFC_RAW_HEADER_SIZE);
data[0] = local->dev ? local->dev->idx : 0xFF;
- data[1] = direction;
+ data[1] = direction & 0x01;
+ data[1] |= (RAW_PAYLOAD_LLCP << 1);
}
nskb = skb_clone(skb_copy, GFP_ATOMIC);
@@ -747,7 +748,7 @@ static void nfc_llcp_tx_work(struct work_struct *work)
__net_timestamp(skb);
nfc_llcp_send_to_raw_sock(local, skb,
- NFC_LLCP_DIRECTION_TX);
+ NFC_DIRECTION_TX);
ret = nfc_data_exchange(local->dev, local->target_idx,
skb, nfc_llcp_recv, local);
@@ -1476,7 +1477,7 @@ static void nfc_llcp_rx_work(struct work_struct *work)
__net_timestamp(skb);
- nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX);
+ nfc_llcp_send_to_raw_sock(local, skb, NFC_DIRECTION_RX);
nfc_llcp_rx_skb(local, skb);
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 6c34ac97850..2b400e1a869 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -861,6 +861,10 @@ static int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb)
/* Get rid of skb owner, prior to sending to the driver. */
skb_orphan(skb);
+ /* Send copy to sniffer */
+ nfc_send_to_raw_sock(ndev->nfc_dev, skb,
+ RAW_PAYLOAD_NCI, NFC_DIRECTION_TX);
+
return ndev->ops->send(ndev, skb);
}
@@ -935,6 +939,11 @@ static void nci_rx_work(struct work_struct *work)
struct sk_buff *skb;
while ((skb = skb_dequeue(&ndev->rx_q))) {
+
+ /* Send copy to sniffer */
+ nfc_send_to_raw_sock(ndev->nfc_dev, skb,
+ RAW_PAYLOAD_NCI, NFC_DIRECTION_RX);
+
/* Process frame */
switch (nci_mt(skb->data)) {
case NCI_MT_RSP_PKT:
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 1e905097456..f8f6af23138 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -366,7 +366,6 @@ static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev,
struct nci_rf_intf_activated_ntf *ntf, __u8 *data)
{
struct activation_params_poll_nfc_dep *poll;
- int i;
switch (ntf->activation_rf_tech_and_mode) {
case NCI_NFC_A_PASSIVE_POLL_MODE:
@@ -374,10 +373,8 @@ static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev,
poll = &ntf->activation_params.poll_nfc_dep;
poll->atr_res_len = min_t(__u8, *data++, 63);
pr_debug("atr_res_len %d\n", poll->atr_res_len);
- if (poll->atr_res_len > 0) {
- for (i = 0; i < poll->atr_res_len; i++)
- poll->atr_res[poll->atr_res_len-1-i] = data[i];
- }
+ if (poll->atr_res_len > 0)
+ memcpy(poll->atr_res, data, poll->atr_res_len);
break;
default:
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 9d6e74f7e6b..88d60064890 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -40,6 +40,12 @@ struct nfc_rawsock {
struct work_struct tx_work;
bool tx_work_scheduled;
};
+
+struct nfc_sock_list {
+ struct hlist_head head;
+ rwlock_t lock;
+};
+
#define nfc_rawsock(sk) ((struct nfc_rawsock *) sk)
#define to_rawsock_sk(_tx_work) \
((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work))
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index c27a6e86cae..11c3544ea54 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -27,6 +27,24 @@
#include "nfc.h"
+static struct nfc_sock_list raw_sk_list = {
+ .lock = __RW_LOCK_UNLOCKED(raw_sk_list.lock)
+};
+
+static void nfc_sock_link(struct nfc_sock_list *l, struct sock *sk)
+{
+ write_lock(&l->lock);
+ sk_add_node(sk, &l->head);
+ write_unlock(&l->lock);
+}
+
+static void nfc_sock_unlink(struct nfc_sock_list *l, struct sock *sk)
+{
+ write_lock(&l->lock);
+ sk_del_node_init(sk);
+ write_unlock(&l->lock);
+}
+
static void rawsock_write_queue_purge(struct sock *sk)
{
pr_debug("sk=%p\n", sk);
@@ -57,6 +75,9 @@ static int rawsock_release(struct socket *sock)
if (!sk)
return 0;
+ if (sock->type == SOCK_RAW)
+ nfc_sock_unlink(&raw_sk_list, sk);
+
sock_orphan(sk);
sock_put(sk);
@@ -275,6 +296,26 @@ static const struct proto_ops rawsock_ops = {
.mmap = sock_no_mmap,
};
+static const struct proto_ops rawsock_raw_ops = {
+ .family = PF_NFC,
+ .owner = THIS_MODULE,
+ .release = rawsock_release,
+ .bind = sock_no_bind,
+ .connect = sock_no_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = sock_no_getname,
+ .poll = datagram_poll,
+ .ioctl = sock_no_ioctl,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .setsockopt = sock_no_setsockopt,
+ .getsockopt = sock_no_getsockopt,
+ .sendmsg = sock_no_sendmsg,
+ .recvmsg = rawsock_recvmsg,
+ .mmap = sock_no_mmap,
+};
+
static void rawsock_destruct(struct sock *sk)
{
pr_debug("sk=%p\n", sk);
@@ -300,10 +341,13 @@ static int rawsock_create(struct net *net, struct socket *sock,
pr_debug("sock=%p\n", sock);
- if (sock->type != SOCK_SEQPACKET)
+ if ((sock->type != SOCK_SEQPACKET) && (sock->type != SOCK_RAW))
return -ESOCKTNOSUPPORT;
- sock->ops = &rawsock_ops;
+ if (sock->type == SOCK_RAW)
+ sock->ops = &rawsock_raw_ops;
+ else
+ sock->ops = &rawsock_ops;
sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto);
if (!sk)
@@ -313,13 +357,53 @@ static int rawsock_create(struct net *net, struct socket *sock,
sk->sk_protocol = nfc_proto->id;
sk->sk_destruct = rawsock_destruct;
sock->state = SS_UNCONNECTED;
-
- INIT_WORK(&nfc_rawsock(sk)->tx_work, rawsock_tx_work);
- nfc_rawsock(sk)->tx_work_scheduled = false;
+ if (sock->type == SOCK_RAW)
+ nfc_sock_link(&raw_sk_list, sk);
+ else {
+ INIT_WORK(&nfc_rawsock(sk)->tx_work, rawsock_tx_work);
+ nfc_rawsock(sk)->tx_work_scheduled = false;
+ }
return 0;
}
+void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb,
+ u8 payload_type, u8 direction)
+{
+ struct sk_buff *skb_copy = NULL, *nskb;
+ struct sock *sk;
+ u8 *data;
+
+ read_lock(&raw_sk_list.lock);
+
+ sk_for_each(sk, &raw_sk_list.head) {
+ if (!skb_copy) {
+ skb_copy = __pskb_copy_fclone(skb, NFC_RAW_HEADER_SIZE,
+ GFP_ATOMIC, true);
+ if (!skb_copy)
+ continue;
+
+ data = skb_push(skb_copy, NFC_RAW_HEADER_SIZE);
+
+ data[0] = dev ? dev->idx : 0xFF;
+ data[1] = direction & 0x01;
+ data[1] |= (payload_type << 1);
+ }
+
+ nskb = skb_clone(skb_copy, GFP_ATOMIC);
+ if (!nskb)
+ continue;
+
+ if (sock_queue_rcv_skb(sk, nskb))
+ kfree_skb(nskb);
+ }
+
+ read_unlock(&raw_sk_list.lock);
+
+ kfree_skb(skb_copy);
+}
+EXPORT_SYMBOL(nfc_send_to_raw_sock);
+
static struct proto rawsock_proto = {
.name = "NFC_RAW",
.owner = THIS_MODULE,
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index c36856a457c..e70d8b18e96 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -551,6 +551,8 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
case OVS_ACTION_ATTR_SAMPLE:
err = sample(dp, skb, a);
+ if (unlikely(err)) /* skb already freed. */
+ return err;
break;
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 8867d7e2d65..9db4bf6740d 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2013 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -44,11 +44,11 @@
#include <linux/netfilter_ipv4.h>
#include <linux/inetdevice.h>
#include <linux/list.h>
-#include <linux/lockdep.h>
#include <linux/openvswitch.h>
#include <linux/rculist.h>
#include <linux/dmi.h>
-#include <linux/workqueue.h>
+#include <linux/genetlink.h>
+#include <net/genetlink.h>
#include <net/genetlink.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -62,6 +62,31 @@
int ovs_net_id __read_mostly;
+static struct genl_family dp_packet_genl_family;
+static struct genl_family dp_flow_genl_family;
+static struct genl_family dp_datapath_genl_family;
+
+static struct genl_multicast_group ovs_dp_flow_multicast_group = {
+ .name = OVS_FLOW_MCGROUP
+};
+
+static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
+ .name = OVS_DATAPATH_MCGROUP
+};
+
+struct genl_multicast_group ovs_dp_vport_multicast_group = {
+ .name = OVS_VPORT_MCGROUP
+};
+
+/* Check if need to build a reply message.
+ * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
+static bool ovs_must_notify(struct genl_info *info,
+ const struct genl_multicast_group *grp)
+{
+ return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
+ netlink_has_listeners(genl_info_net(info)->genl_sock, 0);
+}
+
static void ovs_notify(struct genl_family *family,
struct sk_buff *skb, struct genl_info *info)
{
@@ -173,6 +198,7 @@ static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
}
+/* Called with ovs_mutex or RCU read lock. */
struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
{
struct vport *vport;
@@ -250,7 +276,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
OVS_CB(skb)->flow = flow;
OVS_CB(skb)->pkt_key = &key;
- ovs_flow_stats_update(OVS_CB(skb)->flow, skb);
+ ovs_flow_stats_update(OVS_CB(skb)->flow, key.tp.flags, skb);
ovs_execute_actions(dp, skb);
stats_counter = &stats->n_hit;
@@ -262,16 +288,6 @@ out:
u64_stats_update_end(&stats->syncp);
}
-static struct genl_family dp_packet_genl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = sizeof(struct ovs_header),
- .name = OVS_PACKET_FAMILY,
- .version = OVS_PACKET_VERSION,
- .maxattr = OVS_PACKET_ATTR_MAX,
- .netnsok = true,
- .parallel_ops = true,
-};
-
int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
const struct dp_upcall_info *upcall_info)
{
@@ -590,6 +606,18 @@ static const struct genl_ops dp_packet_genl_ops[] = {
}
};
+static struct genl_family dp_packet_genl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = sizeof(struct ovs_header),
+ .name = OVS_PACKET_FAMILY,
+ .version = OVS_PACKET_VERSION,
+ .maxattr = OVS_PACKET_ATTR_MAX,
+ .netnsok = true,
+ .parallel_ops = true,
+ .ops = dp_packet_genl_ops,
+ .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
+};
+
static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
struct ovs_dp_megaflow_stats *mega_stats)
{
@@ -621,26 +649,6 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
}
}
-static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
- [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
- [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
- [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
-};
-
-static struct genl_family dp_flow_genl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = sizeof(struct ovs_header),
- .name = OVS_FLOW_FAMILY,
- .version = OVS_FLOW_VERSION,
- .maxattr = OVS_FLOW_ATTR_MAX,
- .netnsok = true,
- .parallel_ops = true,
-};
-
-static struct genl_multicast_group ovs_dp_flow_multicast_group = {
- .name = OVS_FLOW_MCGROUP
-};
-
static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
{
return NLMSG_ALIGN(sizeof(struct ovs_header))
@@ -652,8 +660,8 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
+ nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
}
-/* Called with ovs_mutex. */
-static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
+/* Called with ovs_mutex or RCU read lock. */
+static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
struct sk_buff *skb, u32 portid,
u32 seq, u32 flags, u8 cmd)
{
@@ -670,7 +678,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
if (!ovs_header)
return -EMSGSIZE;
- ovs_header->dp_ifindex = get_dpifindex(dp);
+ ovs_header->dp_ifindex = dp_ifindex;
/* Fill flow key. */
nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
@@ -693,6 +701,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
nla_nest_end(skb, nla);
ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
+
if (used &&
nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
goto nla_put_failure;
@@ -720,9 +729,9 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
const struct sw_flow_actions *sf_acts;
sf_acts = rcu_dereference_ovsl(flow->sf_acts);
-
err = ovs_nla_put_actions(sf_acts->actions,
sf_acts->actions_len, skb);
+
if (!err)
nla_nest_end(skb, start);
else {
@@ -743,112 +752,128 @@ error:
return err;
}
-static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow,
- struct genl_info *info)
+/* May not be called with RCU read lock. */
+static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
+ struct genl_info *info,
+ bool always)
{
- size_t len;
+ struct sk_buff *skb;
- len = ovs_flow_cmd_msg_size(ovsl_dereference(flow->sf_acts));
+ if (!always && !ovs_must_notify(info, &ovs_dp_flow_multicast_group))
+ return NULL;
- return genlmsg_new_unicast(len, info, GFP_KERNEL);
+ skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ return skb;
}
-static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
- struct datapath *dp,
- struct genl_info *info,
- u8 cmd)
+/* Called with ovs_mutex. */
+static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
+ int dp_ifindex,
+ struct genl_info *info, u8 cmd,
+ bool always)
{
struct sk_buff *skb;
int retval;
- skb = ovs_flow_cmd_alloc_info(flow, info);
- if (!skb)
- return ERR_PTR(-ENOMEM);
+ skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info,
+ always);
+ if (!skb || IS_ERR(skb))
+ return skb;
- retval = ovs_flow_cmd_fill_info(flow, dp, skb, info->snd_portid,
- info->snd_seq, 0, cmd);
+ retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
+ info->snd_portid, info->snd_seq, 0,
+ cmd);
BUG_ON(retval < 0);
return skb;
}
-static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
+static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
- struct sw_flow_key key, masked_key;
- struct sw_flow *flow = NULL;
+ struct sw_flow *flow, *new_flow;
struct sw_flow_mask mask;
struct sk_buff *reply;
struct datapath *dp;
- struct sw_flow_actions *acts = NULL;
+ struct sw_flow_actions *acts;
struct sw_flow_match match;
int error;
- /* Extract key. */
+ /* Must have key and actions. */
error = -EINVAL;
if (!a[OVS_FLOW_ATTR_KEY])
goto error;
+ if (!a[OVS_FLOW_ATTR_ACTIONS])
+ goto error;
- ovs_match_init(&match, &key, &mask);
+ /* Most of the time we need to allocate a new flow, do it before
+ * locking.
+ */
+ new_flow = ovs_flow_alloc();
+ if (IS_ERR(new_flow)) {
+ error = PTR_ERR(new_flow);
+ goto error;
+ }
+
+ /* Extract key. */
+ ovs_match_init(&match, &new_flow->unmasked_key, &mask);
error = ovs_nla_get_match(&match,
a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
if (error)
- goto error;
+ goto err_kfree_flow;
+
+ ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);
/* Validate actions. */
- if (a[OVS_FLOW_ATTR_ACTIONS]) {
- acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
- error = PTR_ERR(acts);
- if (IS_ERR(acts))
- goto error;
+ acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
+ error = PTR_ERR(acts);
+ if (IS_ERR(acts))
+ goto err_kfree_flow;
- ovs_flow_mask_key(&masked_key, &key, &mask);
- error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
- &masked_key, 0, &acts);
- if (error) {
- OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
- goto err_kfree;
- }
- } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
- error = -EINVAL;
- goto error;
+ error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
+ 0, &acts);
+ if (error) {
+ OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
+ goto err_kfree_acts;
+ }
+
+ reply = ovs_flow_cmd_alloc_info(acts, info, false);
+ if (IS_ERR(reply)) {
+ error = PTR_ERR(reply);
+ goto err_kfree_acts;
}
ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
- error = -ENODEV;
- if (!dp)
+ if (unlikely(!dp)) {
+ error = -ENODEV;
goto err_unlock_ovs;
-
+ }
/* Check if this is a duplicate flow */
- flow = ovs_flow_tbl_lookup(&dp->table, &key);
- if (!flow) {
- /* Bail out if we're not allowed to create a new flow. */
- error = -ENOENT;
- if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
- goto err_unlock_ovs;
-
- /* Allocate flow. */
- flow = ovs_flow_alloc();
- if (IS_ERR(flow)) {
- error = PTR_ERR(flow);
- goto err_unlock_ovs;
- }
-
- flow->key = masked_key;
- flow->unmasked_key = key;
- rcu_assign_pointer(flow->sf_acts, acts);
+ flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key);
+ if (likely(!flow)) {
+ rcu_assign_pointer(new_flow->sf_acts, acts);
/* Put flow in bucket. */
- error = ovs_flow_tbl_insert(&dp->table, flow, &mask);
- if (error) {
+ error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
+ if (unlikely(error)) {
acts = NULL;
- goto err_flow_free;
+ goto err_unlock_ovs;
}
- reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);
+ if (unlikely(reply)) {
+ error = ovs_flow_cmd_fill_info(new_flow,
+ ovs_header->dp_ifindex,
+ reply, info->snd_portid,
+ info->snd_seq, 0,
+ OVS_FLOW_CMD_NEW);
+ BUG_ON(error < 0);
+ }
+ ovs_unlock();
} else {
- /* We found a matching flow. */
struct sw_flow_actions *old_acts;
/* Bail out if we're not allowed to modify an existing flow.
@@ -857,40 +882,153 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
* request. We also accept NLM_F_EXCL in case that bug ever
* gets fixed.
*/
- error = -EEXIST;
- if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
- info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
+ if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
+ | NLM_F_EXCL))) {
+ error = -EEXIST;
goto err_unlock_ovs;
-
+ }
/* The unmasked key has to be the same for flow updates. */
- if (!ovs_flow_cmp_unmasked_key(flow, &match))
- goto err_unlock_ovs;
-
+ if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) {
+ flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+ if (!flow) {
+ error = -ENOENT;
+ goto err_unlock_ovs;
+ }
+ }
/* Update actions. */
old_acts = ovsl_dereference(flow->sf_acts);
rcu_assign_pointer(flow->sf_acts, acts);
+
+ if (unlikely(reply)) {
+ error = ovs_flow_cmd_fill_info(flow,
+ ovs_header->dp_ifindex,
+ reply, info->snd_portid,
+ info->snd_seq, 0,
+ OVS_FLOW_CMD_NEW);
+ BUG_ON(error < 0);
+ }
+ ovs_unlock();
+
ovs_nla_free_flow_actions(old_acts);
+ ovs_flow_free(new_flow, false);
+ }
+
+ if (reply)
+ ovs_notify(&dp_flow_genl_family, reply, info);
+ return 0;
+
+err_unlock_ovs:
+ ovs_unlock();
+ kfree_skb(reply);
+err_kfree_acts:
+ kfree(acts);
+err_kfree_flow:
+ ovs_flow_free(new_flow, false);
+error:
+ return error;
+}
+
+static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr **a = info->attrs;
+ struct ovs_header *ovs_header = info->userhdr;
+ struct sw_flow_key key, masked_key;
+ struct sw_flow *flow;
+ struct sw_flow_mask mask;
+ struct sk_buff *reply = NULL;
+ struct datapath *dp;
+ struct sw_flow_actions *old_acts = NULL, *acts = NULL;
+ struct sw_flow_match match;
+ int error;
+
+ /* Extract key. */
+ error = -EINVAL;
+ if (!a[OVS_FLOW_ATTR_KEY])
+ goto error;
+
+ ovs_match_init(&match, &key, &mask);
+ error = ovs_nla_get_match(&match,
+ a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
+ if (error)
+ goto error;
+
+ /* Validate actions. */
+ if (a[OVS_FLOW_ATTR_ACTIONS]) {
+ acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
+ error = PTR_ERR(acts);
+ if (IS_ERR(acts))
+ goto error;
+
+ ovs_flow_mask_key(&masked_key, &key, &mask);
+ error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
+ &masked_key, 0, &acts);
+ if (error) {
+ OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
+ goto err_kfree_acts;
+ }
+ }
+
+ /* Can allocate before locking if have acts. */
+ if (acts) {
+ reply = ovs_flow_cmd_alloc_info(acts, info, false);
+ if (IS_ERR(reply)) {
+ error = PTR_ERR(reply);
+ goto err_kfree_acts;
+ }
+ }
+
+ ovs_lock();
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ if (unlikely(!dp)) {
+ error = -ENODEV;
+ goto err_unlock_ovs;
+ }
+ /* Check that the flow exists. */
+ flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+ if (unlikely(!flow)) {
+ error = -ENOENT;
+ goto err_unlock_ovs;
+ }
- reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);
+ /* Update actions, if present. */
+ if (likely(acts)) {
+ old_acts = ovsl_dereference(flow->sf_acts);
+ rcu_assign_pointer(flow->sf_acts, acts);
- /* Clear stats. */
- if (a[OVS_FLOW_ATTR_CLEAR])
- ovs_flow_stats_clear(flow);
+ if (unlikely(reply)) {
+ error = ovs_flow_cmd_fill_info(flow,
+ ovs_header->dp_ifindex,
+ reply, info->snd_portid,
+ info->snd_seq, 0,
+ OVS_FLOW_CMD_NEW);
+ BUG_ON(error < 0);
+ }
+ } else {
+ /* Could not alloc without acts before locking. */
+ reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
+ info, OVS_FLOW_CMD_NEW, false);
+ if (unlikely(IS_ERR(reply))) {
+ error = PTR_ERR(reply);
+ goto err_unlock_ovs;
+ }
}
+
+ /* Clear stats. */
+ if (a[OVS_FLOW_ATTR_CLEAR])
+ ovs_flow_stats_clear(flow);
ovs_unlock();
- if (!IS_ERR(reply))
+ if (reply)
ovs_notify(&dp_flow_genl_family, reply, info);
- else
- genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0,
- 0, PTR_ERR(reply));
+ if (old_acts)
+ ovs_nla_free_flow_actions(old_acts);
+
return 0;
-err_flow_free:
- ovs_flow_free(flow, false);
err_unlock_ovs:
ovs_unlock();
-err_kfree:
+ kfree_skb(reply);
+err_kfree_acts:
kfree(acts);
error:
return error;
@@ -924,13 +1062,14 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
goto unlock;
}
- flow = ovs_flow_tbl_lookup(&dp->table, &key);
- if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) {
+ flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+ if (!flow) {
err = -ENOENT;
goto unlock;
}
- reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);
+ reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
+ OVS_FLOW_CMD_NEW, true);
if (IS_ERR(reply)) {
err = PTR_ERR(reply);
goto unlock;
@@ -954,45 +1093,53 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
struct sw_flow_match match;
int err;
+ if (likely(a[OVS_FLOW_ATTR_KEY])) {
+ ovs_match_init(&match, &key, NULL);
+ err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
+ if (unlikely(err))
+ return err;
+ }
+
ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
- if (!dp) {
+ if (unlikely(!dp)) {
err = -ENODEV;
goto unlock;
}
- if (!a[OVS_FLOW_ATTR_KEY]) {
+ if (unlikely(!a[OVS_FLOW_ATTR_KEY])) {
err = ovs_flow_tbl_flush(&dp->table);
goto unlock;
}
- ovs_match_init(&match, &key, NULL);
- err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
- if (err)
- goto unlock;
-
- flow = ovs_flow_tbl_lookup(&dp->table, &key);
- if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) {
+ flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+ if (unlikely(!flow)) {
err = -ENOENT;
goto unlock;
}
- reply = ovs_flow_cmd_alloc_info(flow, info);
- if (!reply) {
- err = -ENOMEM;
- goto unlock;
- }
-
ovs_flow_tbl_remove(&dp->table, flow);
+ ovs_unlock();
- err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
- info->snd_seq, 0, OVS_FLOW_CMD_DEL);
- BUG_ON(err < 0);
+ reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
+ info, false);
+ if (likely(reply)) {
+ if (likely(!IS_ERR(reply))) {
+ rcu_read_lock(); /*To keep RCU checker happy. */
+ err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
+ reply, info->snd_portid,
+ info->snd_seq, 0,
+ OVS_FLOW_CMD_DEL);
+ rcu_read_unlock();
+ BUG_ON(err < 0);
+
+ ovs_notify(&dp_flow_genl_family, reply, info);
+ } else {
+ netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply));
+ }
+ }
ovs_flow_free(flow, true);
- ovs_unlock();
-
- ovs_notify(&dp_flow_genl_family, reply, info);
return 0;
unlock:
ovs_unlock();
@@ -1023,7 +1170,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (!flow)
break;
- if (ovs_flow_cmd_fill_info(flow, dp, skb,
+ if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
OVS_FLOW_CMD_NEW) < 0)
@@ -1036,11 +1183,17 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-static const struct genl_ops dp_flow_genl_ops[] = {
+static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
+ [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
+ [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
+ [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
+};
+
+static struct genl_ops dp_flow_genl_ops[] = {
{ .cmd = OVS_FLOW_CMD_NEW,
.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = flow_policy,
- .doit = ovs_flow_cmd_new_or_set
+ .doit = ovs_flow_cmd_new
},
{ .cmd = OVS_FLOW_CMD_DEL,
.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
@@ -1056,28 +1209,22 @@ static const struct genl_ops dp_flow_genl_ops[] = {
{ .cmd = OVS_FLOW_CMD_SET,
.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = flow_policy,
- .doit = ovs_flow_cmd_new_or_set,
+ .doit = ovs_flow_cmd_set,
},
};
-static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
- [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
- [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
- [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
-};
-
-static struct genl_family dp_datapath_genl_family = {
+static struct genl_family dp_flow_genl_family = {
.id = GENL_ID_GENERATE,
.hdrsize = sizeof(struct ovs_header),
- .name = OVS_DATAPATH_FAMILY,
- .version = OVS_DATAPATH_VERSION,
- .maxattr = OVS_DP_ATTR_MAX,
+ .name = OVS_FLOW_FAMILY,
+ .version = OVS_FLOW_VERSION,
+ .maxattr = OVS_FLOW_ATTR_MAX,
.netnsok = true,
.parallel_ops = true,
-};
-
-static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
- .name = OVS_DATAPATH_MCGROUP
+ .ops = dp_flow_genl_ops,
+ .n_ops = ARRAY_SIZE(dp_flow_genl_ops),
+ .mcgrps = &ovs_dp_flow_multicast_group,
+ .n_mcgrps = 1,
};
static size_t ovs_dp_cmd_msg_size(void)
@@ -1092,6 +1239,7 @@ static size_t ovs_dp_cmd_msg_size(void)
return msgsize;
}
+/* Called with ovs_mutex or RCU read lock. */
static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
u32 portid, u32 seq, u32 flags, u8 cmd)
{
@@ -1107,9 +1255,7 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
ovs_header->dp_ifindex = get_dpifindex(dp);
- rcu_read_lock();
err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
- rcu_read_unlock();
if (err)
goto nla_put_failure;
@@ -1134,25 +1280,12 @@ error:
return -EMSGSIZE;
}
-static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp,
- struct genl_info *info, u8 cmd)
+static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info)
{
- struct sk_buff *skb;
- int retval;
-
- skb = genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);
- if (!skb)
- return ERR_PTR(-ENOMEM);
-
- retval = ovs_dp_cmd_fill_info(dp, skb, info->snd_portid, info->snd_seq, 0, cmd);
- if (retval < 0) {
- kfree_skb(skb);
- return ERR_PTR(retval);
- }
- return skb;
+ return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);
}
-/* Called with ovs_mutex. */
+/* Called with rcu_read_lock or ovs_mutex. */
static struct datapath *lookup_datapath(struct net *net,
struct ovs_header *ovs_header,
struct nlattr *a[OVS_DP_ATTR_MAX + 1])
@@ -1164,10 +1297,8 @@ static struct datapath *lookup_datapath(struct net *net,
else {
struct vport *vport;
- rcu_read_lock();
vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
- rcu_read_unlock();
}
return dp ? dp : ERR_PTR(-ENODEV);
}
@@ -1204,12 +1335,14 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
goto err;
- ovs_lock();
+ reply = ovs_dp_cmd_alloc_info(info);
+ if (!reply)
+ return -ENOMEM;
err = -ENOMEM;
dp = kzalloc(sizeof(*dp), GFP_KERNEL);
if (dp == NULL)
- goto err_unlock_ovs;
+ goto err_free_reply;
ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
@@ -1244,6 +1377,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_dp_change(dp, a);
+ /* So far only local changes have been made, now need the lock. */
+ ovs_lock();
+
vport = new_vport(&parms);
if (IS_ERR(vport)) {
err = PTR_ERR(vport);
@@ -1262,10 +1398,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto err_destroy_ports_array;
}
- reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW);
- err = PTR_ERR(reply);
- if (IS_ERR(reply))
- goto err_destroy_local_port;
+ err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
+ info->snd_seq, 0, OVS_DP_CMD_NEW);
+ BUG_ON(err < 0);
ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
@@ -1275,9 +1410,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_notify(&dp_datapath_genl_family, reply, info);
return 0;
-err_destroy_local_port:
- ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
err_destroy_ports_array:
+ ovs_unlock();
kfree(dp->ports);
err_destroy_percpu:
free_percpu(dp->stats_percpu);
@@ -1286,8 +1420,8 @@ err_destroy_table:
err_free_dp:
release_net(ovs_dp_get_net(dp));
kfree(dp);
-err_unlock_ovs:
- ovs_unlock();
+err_free_reply:
+ kfree_skb(reply);
err:
return err;
}
@@ -1325,16 +1459,19 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
int err;
+ reply = ovs_dp_cmd_alloc_info(info);
+ if (!reply)
+ return -ENOMEM;
+
ovs_lock();
dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
err = PTR_ERR(dp);
if (IS_ERR(dp))
- goto unlock;
+ goto err_unlock_free;
- reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_DEL);
- err = PTR_ERR(reply);
- if (IS_ERR(reply))
- goto unlock;
+ err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
+ info->snd_seq, 0, OVS_DP_CMD_DEL);
+ BUG_ON(err < 0);
__dp_destroy(dp);
ovs_unlock();
@@ -1342,8 +1479,10 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
ovs_notify(&dp_datapath_genl_family, reply, info);
return 0;
-unlock:
+
+err_unlock_free:
ovs_unlock();
+ kfree_skb(reply);
return err;
}
@@ -1353,29 +1492,30 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
int err;
+ reply = ovs_dp_cmd_alloc_info(info);
+ if (!reply)
+ return -ENOMEM;
+
ovs_lock();
dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
err = PTR_ERR(dp);
if (IS_ERR(dp))
- goto unlock;
+ goto err_unlock_free;
ovs_dp_change(dp, info->attrs);
- reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW);
- if (IS_ERR(reply)) {
- err = PTR_ERR(reply);
- genl_set_err(&dp_datapath_genl_family, sock_net(skb->sk), 0,
- 0, err);
- err = 0;
- goto unlock;
- }
+ err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
+ info->snd_seq, 0, OVS_DP_CMD_NEW);
+ BUG_ON(err < 0);
ovs_unlock();
ovs_notify(&dp_datapath_genl_family, reply, info);
return 0;
-unlock:
+
+err_unlock_free:
ovs_unlock();
+ kfree_skb(reply);
return err;
}
@@ -1385,24 +1525,26 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
int err;
- ovs_lock();
+ reply = ovs_dp_cmd_alloc_info(info);
+ if (!reply)
+ return -ENOMEM;
+
+ rcu_read_lock();
dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
if (IS_ERR(dp)) {
err = PTR_ERR(dp);
- goto unlock;
- }
-
- reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW);
- if (IS_ERR(reply)) {
- err = PTR_ERR(reply);
- goto unlock;
+ goto err_unlock_free;
}
+ err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
+ info->snd_seq, 0, OVS_DP_CMD_NEW);
+ BUG_ON(err < 0);
+ rcu_read_unlock();
- ovs_unlock();
return genlmsg_reply(reply, info);
-unlock:
- ovs_unlock();
+err_unlock_free:
+ rcu_read_unlock();
+ kfree_skb(reply);
return err;
}
@@ -1429,7 +1571,13 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-static const struct genl_ops dp_datapath_genl_ops[] = {
+static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
+ [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+ [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
+ [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
+};
+
+static struct genl_ops dp_datapath_genl_ops[] = {
{ .cmd = OVS_DP_CMD_NEW,
.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = datapath_policy,
@@ -1453,27 +1601,18 @@ static const struct genl_ops dp_datapath_genl_ops[] = {
},
};
-static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
- [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
- [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
- [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
- [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
- [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
- [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
-};
-
-struct genl_family dp_vport_genl_family = {
+static struct genl_family dp_datapath_genl_family = {
.id = GENL_ID_GENERATE,
.hdrsize = sizeof(struct ovs_header),
- .name = OVS_VPORT_FAMILY,
- .version = OVS_VPORT_VERSION,
- .maxattr = OVS_VPORT_ATTR_MAX,
+ .name = OVS_DATAPATH_FAMILY,
+ .version = OVS_DATAPATH_VERSION,
+ .maxattr = OVS_DP_ATTR_MAX,
.netnsok = true,
.parallel_ops = true,
-};
-
-static struct genl_multicast_group ovs_dp_vport_multicast_group = {
- .name = OVS_VPORT_MCGROUP
+ .ops = dp_datapath_genl_ops,
+ .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
+ .mcgrps = &ovs_dp_datapath_multicast_group,
+ .n_mcgrps = 1,
};
/* Called with ovs_mutex or RCU read lock. */
@@ -1515,7 +1654,12 @@ error:
return err;
}
-/* Called with ovs_mutex or RCU read lock. */
+static struct sk_buff *ovs_vport_cmd_alloc_info(void)
+{
+ return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+}
+
+/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
u32 seq, u8 cmd)
{
@@ -1577,33 +1721,35 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
u32 port_no;
int err;
- err = -EINVAL;
if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
!a[OVS_VPORT_ATTR_UPCALL_PID])
- goto exit;
+ return -EINVAL;
+
+ port_no = a[OVS_VPORT_ATTR_PORT_NO]
+ ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
+ if (port_no >= DP_MAX_PORTS)
+ return -EFBIG;
+
+ reply = ovs_vport_cmd_alloc_info();
+ if (!reply)
+ return -ENOMEM;
ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
err = -ENODEV;
if (!dp)
- goto exit_unlock;
-
- if (a[OVS_VPORT_ATTR_PORT_NO]) {
- port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
-
- err = -EFBIG;
- if (port_no >= DP_MAX_PORTS)
- goto exit_unlock;
+ goto exit_unlock_free;
+ if (port_no) {
vport = ovs_vport_ovsl(dp, port_no);
err = -EBUSY;
if (vport)
- goto exit_unlock;
+ goto exit_unlock_free;
} else {
for (port_no = 1; ; port_no++) {
if (port_no >= DP_MAX_PORTS) {
err = -EFBIG;
- goto exit_unlock;
+ goto exit_unlock_free;
}
vport = ovs_vport_ovsl(dp, port_no);
if (!vport)
@@ -1621,22 +1767,19 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
vport = new_vport(&parms);
err = PTR_ERR(vport);
if (IS_ERR(vport))
- goto exit_unlock;
+ goto exit_unlock_free;
- err = 0;
- reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
- OVS_VPORT_CMD_NEW);
- if (IS_ERR(reply)) {
- err = PTR_ERR(reply);
- ovs_dp_detach_port(vport);
- goto exit_unlock;
- }
+ err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
+ info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+ BUG_ON(err < 0);
+ ovs_unlock();
ovs_notify(&dp_vport_genl_family, reply, info);
+ return 0;
-exit_unlock:
+exit_unlock_free:
ovs_unlock();
-exit:
+ kfree_skb(reply);
return err;
}
@@ -1647,28 +1790,26 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct vport *vport;
int err;
+ reply = ovs_vport_cmd_alloc_info();
+ if (!reply)
+ return -ENOMEM;
+
ovs_lock();
vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
- goto exit_unlock;
+ goto exit_unlock_free;
if (a[OVS_VPORT_ATTR_TYPE] &&
nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
err = -EINVAL;
- goto exit_unlock;
- }
-
- reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!reply) {
- err = -ENOMEM;
- goto exit_unlock;
+ goto exit_unlock_free;
}
if (a[OVS_VPORT_ATTR_OPTIONS]) {
err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
if (err)
- goto exit_free;
+ goto exit_unlock_free;
}
if (a[OVS_VPORT_ATTR_UPCALL_PID])
@@ -1682,10 +1823,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
ovs_notify(&dp_vport_genl_family, reply, info);
return 0;
-exit_free:
- kfree_skb(reply);
-exit_unlock:
+exit_unlock_free:
ovs_unlock();
+ kfree_skb(reply);
return err;
}
@@ -1696,30 +1836,33 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
struct vport *vport;
int err;
+ reply = ovs_vport_cmd_alloc_info();
+ if (!reply)
+ return -ENOMEM;
+
ovs_lock();
vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
- goto exit_unlock;
+ goto exit_unlock_free;
if (vport->port_no == OVSP_LOCAL) {
err = -EINVAL;
- goto exit_unlock;
+ goto exit_unlock_free;
}
- reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
- info->snd_seq, OVS_VPORT_CMD_DEL);
- err = PTR_ERR(reply);
- if (IS_ERR(reply))
- goto exit_unlock;
-
- err = 0;
+ err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
+ info->snd_seq, 0, OVS_VPORT_CMD_DEL);
+ BUG_ON(err < 0);
ovs_dp_detach_port(vport);
+ ovs_unlock();
ovs_notify(&dp_vport_genl_family, reply, info);
+ return 0;
-exit_unlock:
+exit_unlock_free:
ovs_unlock();
+ kfree_skb(reply);
return err;
}
@@ -1731,24 +1874,25 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
struct vport *vport;
int err;
+ reply = ovs_vport_cmd_alloc_info();
+ if (!reply)
+ return -ENOMEM;
+
rcu_read_lock();
vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
- goto exit_unlock;
-
- reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
- info->snd_seq, OVS_VPORT_CMD_NEW);
- err = PTR_ERR(reply);
- if (IS_ERR(reply))
- goto exit_unlock;
-
+ goto exit_unlock_free;
+ err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
+ info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+ BUG_ON(err < 0);
rcu_read_unlock();
return genlmsg_reply(reply, info);
-exit_unlock:
+exit_unlock_free:
rcu_read_unlock();
+ kfree_skb(reply);
return err;
}
@@ -1791,7 +1935,16 @@ out:
return skb->len;
}
-static const struct genl_ops dp_vport_genl_ops[] = {
+static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
+ [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+ [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
+ [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
+ [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
+ [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
+ [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
+};
+
+static struct genl_ops dp_vport_genl_ops[] = {
{ .cmd = OVS_VPORT_CMD_NEW,
.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = vport_policy,
@@ -1815,26 +1968,25 @@ static const struct genl_ops dp_vport_genl_ops[] = {
},
};
-struct genl_family_and_ops {
- struct genl_family *family;
- const struct genl_ops *ops;
- int n_ops;
- const struct genl_multicast_group *group;
+struct genl_family dp_vport_genl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = sizeof(struct ovs_header),
+ .name = OVS_VPORT_FAMILY,
+ .version = OVS_VPORT_VERSION,
+ .maxattr = OVS_VPORT_ATTR_MAX,
+ .netnsok = true,
+ .parallel_ops = true,
+ .ops = dp_vport_genl_ops,
+ .n_ops = ARRAY_SIZE(dp_vport_genl_ops),
+ .mcgrps = &ovs_dp_vport_multicast_group,
+ .n_mcgrps = 1,
};
-static const struct genl_family_and_ops dp_genl_families[] = {
- { &dp_datapath_genl_family,
- dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
- &ovs_dp_datapath_multicast_group },
- { &dp_vport_genl_family,
- dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
- &ovs_dp_vport_multicast_group },
- { &dp_flow_genl_family,
- dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
- &ovs_dp_flow_multicast_group },
- { &dp_packet_genl_family,
- dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
- NULL },
+static struct genl_family * const dp_genl_families[] = {
+ &dp_datapath_genl_family,
+ &dp_vport_genl_family,
+ &dp_flow_genl_family,
+ &dp_packet_genl_family,
};
static void dp_unregister_genl(int n_families)
@@ -1842,33 +1994,25 @@ static void dp_unregister_genl(int n_families)
int i;
for (i = 0; i < n_families; i++)
- genl_unregister_family(dp_genl_families[i].family);
+ genl_unregister_family(dp_genl_families[i]);
}
static int dp_register_genl(void)
{
- int n_registered;
int err;
int i;
- n_registered = 0;
for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
- const struct genl_family_and_ops *f = &dp_genl_families[i];
- f->family->ops = f->ops;
- f->family->n_ops = f->n_ops;
- f->family->mcgrps = f->group;
- f->family->n_mcgrps = f->group ? 1 : 0;
- err = genl_register_family(f->family);
+ err = genl_register_family(dp_genl_families[i]);
if (err)
goto error;
- n_registered++;
}
return 0;
error:
- dp_unregister_genl(n_registered);
+ dp_unregister_genl(i);
return err;
}
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index e0fc12bbeeb..d07ab538fc9 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -61,20 +61,14 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
#define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF))
-void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
+void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
+ struct sk_buff *skb)
{
struct flow_stats *stats;
- __be16 tcp_flags = 0;
int node = numa_node_id();
stats = rcu_dereference(flow->stats[node]);
- if (likely(flow->key.ip.proto == IPPROTO_TCP)) {
- if (likely(flow->key.eth.type == htons(ETH_P_IP)))
- tcp_flags = flow->key.ipv4.tp.flags;
- else if (likely(flow->key.eth.type == htons(ETH_P_IPV6)))
- tcp_flags = flow->key.ipv6.tp.flags;
- }
/* Check if already have node-specific stats. */
if (likely(stats)) {
spin_lock(&stats->lock);
@@ -128,7 +122,9 @@ unlock:
spin_unlock(&stats->lock);
}
-void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats,
+/* Must be called with rcu_read_lock or ovs_mutex. */
+void ovs_flow_stats_get(const struct sw_flow *flow,
+ struct ovs_flow_stats *ovs_stats,
unsigned long *used, __be16 *tcp_flags)
{
int node;
@@ -138,7 +134,7 @@ void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats,
memset(ovs_stats, 0, sizeof(*ovs_stats));
for_each_node(node) {
- struct flow_stats *stats = rcu_dereference(flow->stats[node]);
+ struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]);
if (stats) {
/* Local CPU may write on non-local stats, so we must
@@ -155,12 +151,13 @@ void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats,
}
}
+/* Called with ovs_mutex. */
void ovs_flow_stats_clear(struct sw_flow *flow)
{
int node;
for_each_node(node) {
- struct flow_stats *stats = rcu_dereference(flow->stats[node]);
+ struct flow_stats *stats = ovsl_dereference(flow->stats[node]);
if (stats) {
spin_lock_bh(&stats->lock);
@@ -357,8 +354,8 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
/* The ICMPv6 type and code fields use the 16-bit transport port
* fields, so we need to store them in 16-bit network byte order.
*/
- key->ipv6.tp.src = htons(icmp->icmp6_type);
- key->ipv6.tp.dst = htons(icmp->icmp6_code);
+ key->tp.src = htons(icmp->icmp6_type);
+ key->tp.dst = htons(icmp->icmp6_code);
if (icmp->icmp6_code == 0 &&
(icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
@@ -520,21 +517,21 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
if (key->ip.proto == IPPROTO_TCP) {
if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
- key->ipv4.tp.src = tcp->source;
- key->ipv4.tp.dst = tcp->dest;
- key->ipv4.tp.flags = TCP_FLAGS_BE16(tcp);
+ key->tp.src = tcp->source;
+ key->tp.dst = tcp->dest;
+ key->tp.flags = TCP_FLAGS_BE16(tcp);
}
} else if (key->ip.proto == IPPROTO_UDP) {
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
- key->ipv4.tp.src = udp->source;
- key->ipv4.tp.dst = udp->dest;
+ key->tp.src = udp->source;
+ key->tp.dst = udp->dest;
}
} else if (key->ip.proto == IPPROTO_SCTP) {
if (sctphdr_ok(skb)) {
struct sctphdr *sctp = sctp_hdr(skb);
- key->ipv4.tp.src = sctp->source;
- key->ipv4.tp.dst = sctp->dest;
+ key->tp.src = sctp->source;
+ key->tp.dst = sctp->dest;
}
} else if (key->ip.proto == IPPROTO_ICMP) {
if (icmphdr_ok(skb)) {
@@ -542,8 +539,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
/* The ICMP type and code fields use the 16-bit
* transport port fields, so we need to store
* them in 16-bit network byte order. */
- key->ipv4.tp.src = htons(icmp->type);
- key->ipv4.tp.dst = htons(icmp->code);
+ key->tp.src = htons(icmp->type);
+ key->tp.dst = htons(icmp->code);
}
}
@@ -589,21 +586,21 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
if (key->ip.proto == NEXTHDR_TCP) {
if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
- key->ipv6.tp.src = tcp->source;
- key->ipv6.tp.dst = tcp->dest;
- key->ipv6.tp.flags = TCP_FLAGS_BE16(tcp);
+ key->tp.src = tcp->source;
+ key->tp.dst = tcp->dest;
+ key->tp.flags = TCP_FLAGS_BE16(tcp);
}
} else if (key->ip.proto == NEXTHDR_UDP) {
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
- key->ipv6.tp.src = udp->source;
- key->ipv6.tp.dst = udp->dest;
+ key->tp.src = udp->source;
+ key->tp.dst = udp->dest;
}
} else if (key->ip.proto == NEXTHDR_SCTP) {
if (sctphdr_ok(skb)) {
struct sctphdr *sctp = sctp_hdr(skb);
- key->ipv6.tp.src = sctp->source;
- key->ipv6.tp.dst = sctp->dest;
+ key->tp.src = sctp->source;
+ key->tp.dst = sctp->dest;
}
} else if (key->ip.proto == NEXTHDR_ICMP) {
if (icmp6hdr_ok(skb)) {
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index ddcebc53224..5e5aaed3a85 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2013 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -47,7 +47,7 @@ struct ovs_key_ipv4_tunnel {
__be16 tun_flags;
u8 ipv4_tos;
u8 ipv4_ttl;
-};
+} __packed __aligned(4); /* Minimize padding. */
static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
const struct iphdr *iph, __be64 tun_id,
@@ -71,7 +71,7 @@ struct sw_flow_key {
u32 priority; /* Packet QoS priority. */
u32 skb_mark; /* SKB mark. */
u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
- } phy;
+ } __packed phy; /* Safe when right after 'tun_key'. */
struct {
u8 src[ETH_ALEN]; /* Ethernet source address. */
u8 dst[ETH_ALEN]; /* Ethernet destination address. */
@@ -84,23 +84,21 @@ struct sw_flow_key {
u8 ttl; /* IP TTL/hop limit. */
u8 frag; /* One of OVS_FRAG_TYPE_*. */
} ip;
+ struct {
+ __be16 src; /* TCP/UDP/SCTP source port. */
+ __be16 dst; /* TCP/UDP/SCTP destination port. */
+ __be16 flags; /* TCP flags. */
+ } tp;
union {
struct {
struct {
__be32 src; /* IP source address. */
__be32 dst; /* IP destination address. */
} addr;
- union {
- struct {
- __be16 src; /* TCP/UDP/SCTP source port. */
- __be16 dst; /* TCP/UDP/SCTP destination port. */
- __be16 flags; /* TCP flags. */
- } tp;
- struct {
- u8 sha[ETH_ALEN]; /* ARP source hardware address. */
- u8 tha[ETH_ALEN]; /* ARP target hardware address. */
- } arp;
- };
+ struct {
+ u8 sha[ETH_ALEN]; /* ARP source hardware address. */
+ u8 tha[ETH_ALEN]; /* ARP target hardware address. */
+ } arp;
} ipv4;
struct {
struct {
@@ -109,11 +107,6 @@ struct sw_flow_key {
} addr;
__be32 label; /* IPv6 flow label. */
struct {
- __be16 src; /* TCP/UDP/SCTP source port. */
- __be16 dst; /* TCP/UDP/SCTP destination port. */
- __be16 flags; /* TCP flags. */
- } tp;
- struct {
struct in6_addr target; /* ND target address. */
u8 sll[ETH_ALEN]; /* ND source link layer address. */
u8 tll[ETH_ALEN]; /* ND target link layer address. */
@@ -187,10 +180,11 @@ struct arp_eth_header {
unsigned char ar_tip[4]; /* target IP address */
} __packed;
-void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb);
-void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *stats,
+void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags,
+ struct sk_buff *);
+void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
unsigned long *used, __be16 *tcp_flags);
-void ovs_flow_stats_clear(struct sw_flow *flow);
+void ovs_flow_stats_clear(struct sw_flow *);
u64 ovs_flow_used_time(unsigned long flow_jiffies);
int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 32a725cfeb0..d757848da89 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -204,11 +204,11 @@ static bool match_validate(const struct sw_flow_match *match,
if (match->mask && (match->mask->key.ip.proto == 0xff))
mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
- if (match->key->ipv6.tp.src ==
+ if (match->key->tp.src ==
htons(NDISC_NEIGHBOUR_SOLICITATION) ||
- match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+ match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
key_expected |= 1 << OVS_KEY_ATTR_ND;
- if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff)))
+ if (match->mask && (match->mask->key.tp.src == htons(0xffff)))
mask_allowed |= 1 << OVS_KEY_ATTR_ND;
}
}
@@ -630,27 +630,18 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
const struct ovs_key_tcp *tcp_key;
tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
- if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
- SW_FLOW_KEY_PUT(match, ipv4.tp.src,
- tcp_key->tcp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
- tcp_key->tcp_dst, is_mask);
- } else {
- SW_FLOW_KEY_PUT(match, ipv6.tp.src,
- tcp_key->tcp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
- tcp_key->tcp_dst, is_mask);
- }
+ SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_TCP);
}
if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
- SW_FLOW_KEY_PUT(match, ipv4.tp.flags,
+ SW_FLOW_KEY_PUT(match, tp.flags,
nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
is_mask);
} else {
- SW_FLOW_KEY_PUT(match, ipv6.tp.flags,
+ SW_FLOW_KEY_PUT(match, tp.flags,
nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
is_mask);
}
@@ -661,17 +652,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
const struct ovs_key_udp *udp_key;
udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
- if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
- SW_FLOW_KEY_PUT(match, ipv4.tp.src,
- udp_key->udp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
- udp_key->udp_dst, is_mask);
- } else {
- SW_FLOW_KEY_PUT(match, ipv6.tp.src,
- udp_key->udp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
- udp_key->udp_dst, is_mask);
- }
+ SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_UDP);
}
@@ -679,17 +661,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
const struct ovs_key_sctp *sctp_key;
sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
- if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
- SW_FLOW_KEY_PUT(match, ipv4.tp.src,
- sctp_key->sctp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
- sctp_key->sctp_dst, is_mask);
- } else {
- SW_FLOW_KEY_PUT(match, ipv6.tp.src,
- sctp_key->sctp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
- sctp_key->sctp_dst, is_mask);
- }
+ SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
}
@@ -697,9 +670,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
const struct ovs_key_icmp *icmp_key;
icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
- SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+ SW_FLOW_KEY_PUT(match, tp.src,
htons(icmp_key->icmp_type), is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+ SW_FLOW_KEY_PUT(match, tp.dst,
htons(icmp_key->icmp_code), is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
}
@@ -708,9 +681,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
const struct ovs_key_icmpv6 *icmpv6_key;
icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
- SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+ SW_FLOW_KEY_PUT(match, tp.src,
htons(icmpv6_key->icmpv6_type), is_mask);
- SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+ SW_FLOW_KEY_PUT(match, tp.dst,
htons(icmpv6_key->icmpv6_code), is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
}
@@ -1024,19 +997,11 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
if (!nla)
goto nla_put_failure;
tcp_key = nla_data(nla);
- if (swkey->eth.type == htons(ETH_P_IP)) {
- tcp_key->tcp_src = output->ipv4.tp.src;
- tcp_key->tcp_dst = output->ipv4.tp.dst;
- if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
- output->ipv4.tp.flags))
- goto nla_put_failure;
- } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- tcp_key->tcp_src = output->ipv6.tp.src;
- tcp_key->tcp_dst = output->ipv6.tp.dst;
- if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
- output->ipv6.tp.flags))
- goto nla_put_failure;
- }
+ tcp_key->tcp_src = output->tp.src;
+ tcp_key->tcp_dst = output->tp.dst;
+ if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
+ output->tp.flags))
+ goto nla_put_failure;
} else if (swkey->ip.proto == IPPROTO_UDP) {
struct ovs_key_udp *udp_key;
@@ -1044,13 +1009,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
if (!nla)
goto nla_put_failure;
udp_key = nla_data(nla);
- if (swkey->eth.type == htons(ETH_P_IP)) {
- udp_key->udp_src = output->ipv4.tp.src;
- udp_key->udp_dst = output->ipv4.tp.dst;
- } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- udp_key->udp_src = output->ipv6.tp.src;
- udp_key->udp_dst = output->ipv6.tp.dst;
- }
+ udp_key->udp_src = output->tp.src;
+ udp_key->udp_dst = output->tp.dst;
} else if (swkey->ip.proto == IPPROTO_SCTP) {
struct ovs_key_sctp *sctp_key;
@@ -1058,13 +1018,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
if (!nla)
goto nla_put_failure;
sctp_key = nla_data(nla);
- if (swkey->eth.type == htons(ETH_P_IP)) {
- sctp_key->sctp_src = output->ipv4.tp.src;
- sctp_key->sctp_dst = output->ipv4.tp.dst;
- } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- sctp_key->sctp_src = output->ipv6.tp.src;
- sctp_key->sctp_dst = output->ipv6.tp.dst;
- }
+ sctp_key->sctp_src = output->tp.src;
+ sctp_key->sctp_dst = output->tp.dst;
} else if (swkey->eth.type == htons(ETH_P_IP) &&
swkey->ip.proto == IPPROTO_ICMP) {
struct ovs_key_icmp *icmp_key;
@@ -1073,8 +1028,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
if (!nla)
goto nla_put_failure;
icmp_key = nla_data(nla);
- icmp_key->icmp_type = ntohs(output->ipv4.tp.src);
- icmp_key->icmp_code = ntohs(output->ipv4.tp.dst);
+ icmp_key->icmp_type = ntohs(output->tp.src);
+ icmp_key->icmp_code = ntohs(output->tp.dst);
} else if (swkey->eth.type == htons(ETH_P_IPV6) &&
swkey->ip.proto == IPPROTO_ICMPV6) {
struct ovs_key_icmpv6 *icmpv6_key;
@@ -1084,8 +1039,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
if (!nla)
goto nla_put_failure;
icmpv6_key = nla_data(nla);
- icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src);
- icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst);
+ icmpv6_key->icmpv6_type = ntohs(output->tp.src);
+ icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
@@ -1263,13 +1218,10 @@ static int validate_and_copy_sample(const struct nlattr *attr,
static int validate_tp_port(const struct sw_flow_key *flow_key)
{
- if (flow_key->eth.type == htons(ETH_P_IP)) {
- if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
- return 0;
- } else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
- if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
- return 0;
- }
+ if ((flow_key->eth.type == htons(ETH_P_IP) ||
+ flow_key->eth.type == htons(ETH_P_IPV6)) &&
+ (flow_key->tp.src || flow_key->tp.dst))
+ return 0;
return -EINVAL;
}
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index d8ef37b937b..cf2d853646f 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -139,7 +139,7 @@ static void flow_free(struct sw_flow *flow)
{
int node;
- kfree((struct sf_flow_acts __force *)flow->sf_acts);
+ kfree((struct sw_flow_actions __force *)flow->sf_acts);
for_each_node(node)
if (flow->stats[node])
kmem_cache_free(flow_stats_cache,
@@ -159,25 +159,6 @@ void ovs_flow_free(struct sw_flow *flow, bool deferred)
if (!flow)
return;
- if (flow->mask) {
- struct sw_flow_mask *mask = flow->mask;
-
- /* ovs-lock is required to protect mask-refcount and
- * mask list.
- */
- ASSERT_OVSL();
- BUG_ON(!mask->ref_count);
- mask->ref_count--;
-
- if (!mask->ref_count) {
- list_del_rcu(&mask->list);
- if (deferred)
- kfree_rcu(mask, rcu);
- else
- kfree(mask);
- }
- }
-
if (deferred)
call_rcu(&flow->rcu, rcu_free_flow_callback);
else
@@ -475,6 +456,22 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit);
}
+struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
+ struct sw_flow_match *match)
+{
+ struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+ struct sw_flow_mask *mask;
+ struct sw_flow *flow;
+
+ /* Always called under ovs-mutex. */
+ list_for_each_entry(mask, &tbl->mask_list, list) {
+ flow = masked_flow_lookup(ti, match->key, mask);
+ if (flow && ovs_flow_cmp_unmasked_key(flow, match)) /* Found */
+ return flow;
+ }
+ return NULL;
+}
+
int ovs_flow_tbl_num_masks(const struct flow_table *table)
{
struct sw_flow_mask *mask;
@@ -491,6 +488,25 @@ static struct table_instance *table_instance_expand(struct table_instance *ti)
return table_instance_rehash(ti, ti->n_buckets * 2);
}
+/* Remove 'mask' from the mask list, if it is not needed any more. */
+static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
+{
+ if (mask) {
+ /* ovs-lock is required to protect mask-refcount and
+ * mask list.
+ */
+ ASSERT_OVSL();
+ BUG_ON(!mask->ref_count);
+ mask->ref_count--;
+
+ if (!mask->ref_count) {
+ list_del_rcu(&mask->list);
+ kfree_rcu(mask, rcu);
+ }
+ }
+}
+
+/* Must be called with OVS mutex held. */
void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
{
struct table_instance *ti = ovsl_dereference(table->ti);
@@ -498,6 +514,11 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
BUG_ON(table->count == 0);
hlist_del_rcu(&flow->hash_node[ti->node_ver]);
table->count--;
+
+ /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
+ * accessible as long as the RCU read lock is held.
+ */
+ flow_mask_remove(table, flow->mask);
}
static struct sw_flow_mask *mask_alloc(void)
@@ -560,6 +581,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
return 0;
}
+/* Must be called with OVS mutex held. */
int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
struct sw_flow_mask *mask)
{
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index ca8a5820f61..5918bff7f3f 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -76,7 +76,8 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
u32 *n_mask_hit);
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
const struct sw_flow_key *);
-
+struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
+ struct sw_flow_match *match);
bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
struct sw_flow_match *match);
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 35ec4fed09e..f49148a07da 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -110,6 +110,22 @@ static int gre_rcv(struct sk_buff *skb,
return PACKET_RCVD;
}
+/* Called with rcu_read_lock and BH disabled. */
+static int gre_err(struct sk_buff *skb, u32 info,
+ const struct tnl_ptk_info *tpi)
+{
+ struct ovs_net *ovs_net;
+ struct vport *vport;
+
+ ovs_net = net_generic(dev_net(skb->dev), ovs_net_id);
+ vport = rcu_dereference(ovs_net->vport_net.gre_vport);
+
+ if (unlikely(!vport))
+ return PACKET_REJECT;
+ else
+ return PACKET_RCVD;
+}
+
static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
{
struct net *net = ovs_dp_get_net(vport->dp);
@@ -186,6 +202,7 @@ error:
static struct gre_cisco_protocol gre_protocol = {
.handler = gre_rcv,
+ .err_handler = gre_err,
.priority = 1,
};
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index a93efa3f64c..0edbd95c60e 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -122,7 +122,7 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
vxlan_port = vxlan_vport(vport);
strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
- vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, false);
+ vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0);
if (IS_ERR(vs)) {
ovs_vport_free(vport);
return (void *)vs;
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index b7ebe23cded..d67de453c35 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -598,7 +598,7 @@ static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
{
atomic64_set(&ic->i_ack_next, seq);
if (ack_required) {
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
}
}
@@ -606,7 +606,7 @@ static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
{
clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
return atomic64_read(&ic->i_ack_next);
}
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
index 45033358358..aa8bf678600 100644
--- a/net/rds/iw_recv.c
+++ b/net/rds/iw_recv.c
@@ -429,7 +429,7 @@ static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
{
atomic64_set(&ic->i_ack_next, seq);
if (ack_required) {
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
}
}
@@ -437,7 +437,7 @@ static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
static u64 rds_iw_get_ack(struct rds_iw_connection *ic)
{
clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
return atomic64_read(&ic->i_ack_next);
}
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index c2be901d19e..6cd9d1deafc 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -168,7 +168,7 @@ static int rds_rdma_listen_init(void)
return ret;
}
- sin.sin_family = AF_INET,
+ sin.sin_family = AF_INET;
sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY);
sin.sin_port = (__force u16)htons(RDS_PORT);
diff --git a/net/rds/send.c b/net/rds/send.c
index a82fb660ec0..23718160d71 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -107,7 +107,7 @@ static int acquire_in_xmit(struct rds_connection *conn)
static void release_in_xmit(struct rds_connection *conn)
{
clear_bit(RDS_IN_XMIT, &conn->c_flags);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
/*
* We don't use wait_on_bit()/wake_up_bit() because our waking is in a
* hot path and finding waiters is very rare. We don't want to walk
@@ -661,7 +661,7 @@ void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
/* order flag updates with spin locks */
if (!list_empty(&list))
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
spin_unlock_irqrestore(&conn->c_lock, flags);
@@ -691,7 +691,7 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
}
/* order flag updates with the rs lock */
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
spin_unlock_irqrestore(&rs->rs_lock, flags);
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 4e638f85118..23ab4dcd1d9 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -153,7 +153,7 @@ int rds_tcp_listen_init(void)
sock->sk->sk_data_ready = rds_tcp_listen_data_ready;
write_unlock_bh(&sock->sk->sk_callback_lock);
- sin.sin_family = PF_INET,
+ sin.sin_family = PF_INET;
sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY);
sin.sin_port = (__force u16)htons(RDS_TCP_PORT);
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 81cf5a4c5e4..53b17ca0dff 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -93,7 +93,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
rm->m_ack_seq = tc->t_last_sent_nxt +
sizeof(struct rds_header) +
be32_to_cpu(rm->m_inc.i_hdr.h_len) - 1;
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
set_bit(RDS_MSG_HAS_ACK_SEQ, &rm->m_flags);
tc->t_last_expected_una = rm->m_ack_seq + 1;
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index bd2a5b90400..14c98e48f26 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -36,8 +36,6 @@ struct rfkill_gpio_data {
struct gpio_desc *shutdown_gpio;
struct rfkill *rfkill_dev;
- char *reset_name;
- char *shutdown_name;
struct clk *clk;
bool clk_enabled;
@@ -47,17 +45,14 @@ static int rfkill_gpio_set_power(void *data, bool blocked)
{
struct rfkill_gpio_data *rfkill = data;
- if (blocked) {
- gpiod_set_value(rfkill->shutdown_gpio, 0);
- gpiod_set_value(rfkill->reset_gpio, 0);
- if (!IS_ERR(rfkill->clk) && rfkill->clk_enabled)
- clk_disable(rfkill->clk);
- } else {
- if (!IS_ERR(rfkill->clk) && !rfkill->clk_enabled)
- clk_enable(rfkill->clk);
- gpiod_set_value(rfkill->reset_gpio, 1);
- gpiod_set_value(rfkill->shutdown_gpio, 1);
- }
+ if (!blocked && !IS_ERR(rfkill->clk) && !rfkill->clk_enabled)
+ clk_enable(rfkill->clk);
+
+ gpiod_set_value_cansleep(rfkill->shutdown_gpio, !blocked);
+ gpiod_set_value_cansleep(rfkill->reset_gpio, !blocked);
+
+ if (blocked && !IS_ERR(rfkill->clk) && rfkill->clk_enabled)
+ clk_disable(rfkill->clk);
rfkill->clk_enabled = blocked;
@@ -87,10 +82,8 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
{
struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data;
struct rfkill_gpio_data *rfkill;
- const char *clk_name = NULL;
struct gpio_desc *gpio;
int ret;
- int len;
rfkill = devm_kzalloc(&pdev->dev, sizeof(*rfkill), GFP_KERNEL);
if (!rfkill)
@@ -101,28 +94,15 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
if (ret)
return ret;
} else if (pdata) {
- clk_name = pdata->power_clk_name;
rfkill->name = pdata->name;
rfkill->type = pdata->type;
} else {
return -ENODEV;
}
- len = strlen(rfkill->name);
- rfkill->reset_name = devm_kzalloc(&pdev->dev, len + 7, GFP_KERNEL);
- if (!rfkill->reset_name)
- return -ENOMEM;
-
- rfkill->shutdown_name = devm_kzalloc(&pdev->dev, len + 10, GFP_KERNEL);
- if (!rfkill->shutdown_name)
- return -ENOMEM;
+ rfkill->clk = devm_clk_get(&pdev->dev, NULL);
- snprintf(rfkill->reset_name, len + 6 , "%s_reset", rfkill->name);
- snprintf(rfkill->shutdown_name, len + 9, "%s_shutdown", rfkill->name);
-
- rfkill->clk = devm_clk_get(&pdev->dev, clk_name);
-
- gpio = devm_gpiod_get_index(&pdev->dev, rfkill->reset_name, 0);
+ gpio = devm_gpiod_get_index(&pdev->dev, "reset", 0);
if (!IS_ERR(gpio)) {
ret = gpiod_direction_output(gpio, 0);
if (ret)
@@ -130,7 +110,7 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
rfkill->reset_gpio = gpio;
}
- gpio = devm_gpiod_get_index(&pdev->dev, rfkill->shutdown_name, 1);
+ gpio = devm_gpiod_get_index(&pdev->dev, "shutdown", 1);
if (!IS_ERR(gpio)) {
ret = gpiod_direction_output(gpio, 0);
if (ret)
@@ -146,14 +126,6 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
return -EINVAL;
}
- if (pdata && pdata->gpio_runtime_setup) {
- ret = pdata->gpio_runtime_setup(pdev);
- if (ret) {
- dev_err(&pdev->dev, "can't set up gpio\n");
- return ret;
- }
- }
-
rfkill->rfkill_dev = rfkill_alloc(rfkill->name, &pdev->dev,
rfkill->type, &rfkill_gpio_ops,
rfkill);
@@ -174,20 +146,23 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
static int rfkill_gpio_remove(struct platform_device *pdev)
{
struct rfkill_gpio_data *rfkill = platform_get_drvdata(pdev);
- struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data;
- if (pdata && pdata->gpio_runtime_close)
- pdata->gpio_runtime_close(pdev);
rfkill_unregister(rfkill->rfkill_dev);
rfkill_destroy(rfkill->rfkill_dev);
return 0;
}
+#ifdef CONFIG_ACPI
static const struct acpi_device_id rfkill_acpi_match[] = {
+ { "BCM2E1A", RFKILL_TYPE_BLUETOOTH },
+ { "BCM2E39", RFKILL_TYPE_BLUETOOTH },
+ { "BCM2E3D", RFKILL_TYPE_BLUETOOTH },
{ "BCM4752", RFKILL_TYPE_GPS },
+ { "LNV4752", RFKILL_TYPE_GPS },
{ },
};
+#endif
static struct platform_driver rfkill_gpio_driver = {
.probe = rfkill_gpio_probe,
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 7633a752c65..0ad080790a3 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -99,7 +99,7 @@ static int rxrpc_instantiate_xdr_rxkad(struct key *key, const __be32 *xdr,
_debug("tktlen: %x", tktlen);
if (tktlen > AFSTOKEN_RK_TIX_MAX)
return -EKEYREJECTED;
- if (8 * 4 + tktlen != toklen)
+ if (toklen < 8 * 4 + tktlen)
return -EKEYREJECTED;
plen = sizeof(*token) + sizeof(*token->kad) + tktlen;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 16186965af9..13f64df2c71 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -160,7 +160,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
{
struct sock_filter *bpf_ops, *bpf_old;
struct tcf_exts exts;
- struct sock_fprog tmp;
+ struct sock_fprog_kern tmp;
struct sk_filter *fp, *fp_old;
u16 bpf_size, bpf_len;
u32 classid;
@@ -191,7 +191,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
tmp.len = bpf_len;
- tmp.filter = (struct sock_filter __user *) bpf_ops;
+ tmp.filter = bpf_ops;
ret = sk_unattached_filter_create(&fp, &tmp);
if (ret)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index d11d0a4fbe3..c721cd4a469 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -188,6 +188,12 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
[TCA_TCINDEX_CLASSID] = { .type = NLA_U32 },
};
+static void tcindex_filter_result_init(struct tcindex_filter_result *r)
+{
+ memset(r, 0, sizeof(*r));
+ tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+}
+
static int
tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
u32 handle, struct tcindex_data *p,
@@ -207,15 +213,11 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
return err;
memcpy(&cp, p, sizeof(cp));
- memset(&new_filter_result, 0, sizeof(new_filter_result));
- tcf_exts_init(&new_filter_result.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+ tcindex_filter_result_init(&new_filter_result);
+ tcindex_filter_result_init(&cr);
if (old_r)
- memcpy(&cr, r, sizeof(cr));
- else {
- memset(&cr, 0, sizeof(cr));
- tcf_exts_init(&cr.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
- }
+ cr.res = r->res;
if (tb[TCA_TCINDEX_HASH])
cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
@@ -267,9 +269,14 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
err = -ENOMEM;
if (!cp.perfect && !cp.h) {
if (valid_perfect_hash(&cp)) {
+ int i;
+
cp.perfect = kcalloc(cp.hash, sizeof(*r), GFP_KERNEL);
if (!cp.perfect)
goto errout;
+ for (i = 0; i < cp.hash; i++)
+ tcf_exts_init(&cp.perfect[i].exts, TCA_TCINDEX_ACT,
+ TCA_TCINDEX_POLICE);
balloc = 1;
} else {
cp.h = kcalloc(cp.hash, sizeof(f), GFP_KERNEL);
@@ -295,14 +302,17 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
tcf_bind_filter(tp, &cr.res, base);
}
- tcf_exts_change(tp, &cr.exts, &e);
+ if (old_r)
+ tcf_exts_change(tp, &r->exts, &e);
+ else
+ tcf_exts_change(tp, &cr.exts, &e);
tcf_tree_lock(tp);
if (old_r && old_r != r)
- memset(old_r, 0, sizeof(*old_r));
+ tcindex_filter_result_init(old_r);
memcpy(p, &cp, sizeof(cp));
- memcpy(r, &cr, sizeof(cr));
+ r->res = cr.res;
if (r == &new_filter_result) {
struct tcindex_filter **fp;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index c39b583ace3..70c0be8d012 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -38,6 +38,7 @@
#include <linux/errno.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
+#include <linux/bitmap.h>
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
@@ -460,17 +461,25 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
return 0;
}
+#define NR_U32_NODE (1<<12)
static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
{
struct tc_u_knode *n;
- unsigned int i = 0x7FF;
+ unsigned long i;
+ unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!bitmap)
+ return handle | 0xFFF;
for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
- if (i < TC_U32_NODE(n->handle))
- i = TC_U32_NODE(n->handle);
- i++;
+ set_bit(TC_U32_NODE(n->handle), bitmap);
- return handle | (i > 0xFFF ? 0xFFF : i);
+ i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800);
+ if (i >= NR_U32_NODE)
+ i = find_next_zero_bit(bitmap, NR_U32_NODE, 1);
+
+ kfree(bitmap);
+ return handle | (i >= NR_U32_NODE ? 0xFFF : i);
}
static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index fd14df56e5f..58bed7599db 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -563,7 +563,7 @@ out:
}
EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
-void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
+void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
{
if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 2aee02802c2..ed30e436128 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -391,12 +391,7 @@ static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
static void choke_free(void *addr)
{
- if (addr) {
- if (is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
- }
+ kvfree(addr);
}
static int choke_change(struct Qdisc *sch, struct nlattr *opt)
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 8302717ea30..7bbbfe11219 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -391,8 +391,10 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch)
while (1) {
cl = list_first_entry(&q->active, struct drr_class, alist);
skb = cl->qdisc->ops->peek(cl->qdisc);
- if (skb == NULL)
+ if (skb == NULL) {
+ qdisc_warn_nonwc(__func__, cl->qdisc);
goto out;
+ }
len = qdisc_pkt_len(skb);
if (len <= cl->deficit) {
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 23c682b42f9..ba32c2b005d 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -591,10 +591,7 @@ static void *fq_alloc_node(size_t sz, int node)
static void fq_free(void *addr)
{
- if (addr && is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
+ kvfree(addr);
}
static int fq_resize(struct Qdisc *sch, u32 log)
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 0bf432c782c..063b726bf1f 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -365,12 +365,7 @@ static void *fq_codel_zalloc(size_t sz)
static void fq_codel_free(void *addr)
{
- if (addr) {
- if (is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
- }
+ kvfree(addr);
}
static void fq_codel_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 6aab8619bbb..d85b6812a7d 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -494,12 +494,7 @@ static void *hhf_zalloc(size_t sz)
static void hhf_free(void *addr)
{
- if (addr) {
- if (is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
- }
+ kvfree(addr);
}
static void hhf_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index f1669a00f57..111d70fddae 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -648,12 +648,7 @@ static void netem_reset(struct Qdisc *sch)
static void dist_free(struct disttable *d)
{
- if (d) {
- if (is_vmalloc_addr(d))
- vfree(d);
- else
- kfree(d);
- }
+ kvfree(d);
}
/*
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 87317ff0b4e..1af2f73906d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -716,12 +716,7 @@ static void *sfq_alloc(size_t sz)
static void sfq_free(void *addr)
{
- if (addr) {
- if (is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
- }
+ kvfree(addr);
}
static void sfq_destroy(struct Qdisc *sch)
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 39579c3e0d1..06a9ee6b2d3 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -55,6 +55,7 @@
#include <net/sctp/sm.h>
/* Forward declarations for internal functions. */
+static void sctp_select_active_and_retran_path(struct sctp_association *asoc);
static void sctp_assoc_bh_rcv(struct work_struct *work);
static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc);
static void sctp_assoc_free_asconf_queue(struct sctp_association *asoc);
@@ -330,7 +331,7 @@ void sctp_association_free(struct sctp_association *asoc)
/* Only real associations count against the endpoint, so
* don't bother for if this is a temporary association.
*/
- if (!asoc->temp) {
+ if (!list_empty(&asoc->asocs)) {
list_del(&asoc->asocs);
/* Decrement the backlog value for a TCP-style listening
@@ -774,9 +775,6 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
sctp_transport_cmd_t command,
sctp_sn_error_t error)
{
- struct sctp_transport *t = NULL;
- struct sctp_transport *first;
- struct sctp_transport *second;
struct sctp_ulpevent *event;
struct sockaddr_storage addr;
int spc_state = 0;
@@ -829,13 +827,14 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
return;
}
- /* Generate and send a SCTP_PEER_ADDR_CHANGE notification to the
- * user.
+ /* Generate and send a SCTP_PEER_ADDR_CHANGE notification
+ * to the user.
*/
if (ulp_notify) {
memset(&addr, 0, sizeof(struct sockaddr_storage));
memcpy(&addr, &transport->ipaddr,
transport->af_specific->sockaddr_len);
+
event = sctp_ulpevent_make_peer_addr_change(asoc, &addr,
0, spc_state, error, GFP_ATOMIC);
if (event)
@@ -843,60 +842,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
}
/* Select new active and retran paths. */
-
- /* Look for the two most recently used active transports.
- *
- * This code produces the wrong ordering whenever jiffies
- * rolls over, but we still get usable transports, so we don't
- * worry about it.
- */
- first = NULL; second = NULL;
-
- list_for_each_entry(t, &asoc->peer.transport_addr_list,
- transports) {
-
- if ((t->state == SCTP_INACTIVE) ||
- (t->state == SCTP_UNCONFIRMED) ||
- (t->state == SCTP_PF))
- continue;
- if (!first || t->last_time_heard > first->last_time_heard) {
- second = first;
- first = t;
- } else if (!second ||
- t->last_time_heard > second->last_time_heard)
- second = t;
- }
-
- /* RFC 2960 6.4 Multi-Homed SCTP Endpoints
- *
- * By default, an endpoint should always transmit to the
- * primary path, unless the SCTP user explicitly specifies the
- * destination transport address (and possibly source
- * transport address) to use.
- *
- * [If the primary is active but not most recent, bump the most
- * recently used transport.]
- */
- if (((asoc->peer.primary_path->state == SCTP_ACTIVE) ||
- (asoc->peer.primary_path->state == SCTP_UNKNOWN)) &&
- first != asoc->peer.primary_path) {
- second = first;
- first = asoc->peer.primary_path;
- }
-
- if (!second)
- second = first;
- /* If we failed to find a usable transport, just camp on the
- * primary, even if it is inactive.
- */
- if (!first) {
- first = asoc->peer.primary_path;
- second = asoc->peer.primary_path;
- }
-
- /* Set the active and retran transports. */
- asoc->peer.active_path = first;
- asoc->peer.retran_path = second;
+ sctp_select_active_and_retran_path(asoc);
}
/* Hold a reference to an association. */
@@ -1090,7 +1036,7 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
}
if (chunk->transport)
- chunk->transport->last_time_heard = jiffies;
+ chunk->transport->last_time_heard = ktime_get();
/* Run through the state machine. */
error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype,
@@ -1151,6 +1097,7 @@ void sctp_assoc_update(struct sctp_association *asoc,
asoc->c = new->c;
asoc->peer.rwnd = new->peer.rwnd;
asoc->peer.sack_needed = new->peer.sack_needed;
+ asoc->peer.auth_capable = new->peer.auth_capable;
asoc->peer.i = new->peer.i;
sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL,
asoc->peer.i.initial_tsn, GFP_ATOMIC);
@@ -1278,13 +1225,41 @@ static u8 sctp_trans_score(const struct sctp_transport *trans)
return sctp_trans_state_to_prio_map[trans->state];
}
+static struct sctp_transport *sctp_trans_elect_tie(struct sctp_transport *trans1,
+ struct sctp_transport *trans2)
+{
+ if (trans1->error_count > trans2->error_count) {
+ return trans2;
+ } else if (trans1->error_count == trans2->error_count &&
+ ktime_after(trans2->last_time_heard,
+ trans1->last_time_heard)) {
+ return trans2;
+ } else {
+ return trans1;
+ }
+}
+
static struct sctp_transport *sctp_trans_elect_best(struct sctp_transport *curr,
struct sctp_transport *best)
{
+ u8 score_curr, score_best;
+
if (best == NULL)
return curr;
- return sctp_trans_score(curr) > sctp_trans_score(best) ? curr : best;
+ score_curr = sctp_trans_score(curr);
+ score_best = sctp_trans_score(best);
+
+ /* First, try a score-based selection if both transport states
+ * differ. If we're in a tie, lets try to make a more clever
+ * decision here based on error counts and last time heard.
+ */
+ if (score_curr > score_best)
+ return curr;
+ else if (score_curr == score_best)
+ return sctp_trans_elect_tie(curr, best);
+ else
+ return best;
}
void sctp_assoc_update_retran_path(struct sctp_association *asoc)
@@ -1325,6 +1300,76 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
__func__, asoc, &asoc->peer.retran_path->ipaddr.sa);
}
+static void sctp_select_active_and_retran_path(struct sctp_association *asoc)
+{
+ struct sctp_transport *trans, *trans_pri = NULL, *trans_sec = NULL;
+ struct sctp_transport *trans_pf = NULL;
+
+ /* Look for the two most recently used active transports. */
+ list_for_each_entry(trans, &asoc->peer.transport_addr_list,
+ transports) {
+ /* Skip uninteresting transports. */
+ if (trans->state == SCTP_INACTIVE ||
+ trans->state == SCTP_UNCONFIRMED)
+ continue;
+ /* Keep track of the best PF transport from our
+ * list in case we don't find an active one.
+ */
+ if (trans->state == SCTP_PF) {
+ trans_pf = sctp_trans_elect_best(trans, trans_pf);
+ continue;
+ }
+ /* For active transports, pick the most recent ones. */
+ if (trans_pri == NULL ||
+ ktime_after(trans->last_time_heard,
+ trans_pri->last_time_heard)) {
+ trans_sec = trans_pri;
+ trans_pri = trans;
+ } else if (trans_sec == NULL ||
+ ktime_after(trans->last_time_heard,
+ trans_sec->last_time_heard)) {
+ trans_sec = trans;
+ }
+ }
+
+ /* RFC 2960 6.4 Multi-Homed SCTP Endpoints
+ *
+ * By default, an endpoint should always transmit to the primary
+ * path, unless the SCTP user explicitly specifies the
+ * destination transport address (and possibly source transport
+ * address) to use. [If the primary is active but not most recent,
+ * bump the most recently used transport.]
+ */
+ if ((asoc->peer.primary_path->state == SCTP_ACTIVE ||
+ asoc->peer.primary_path->state == SCTP_UNKNOWN) &&
+ asoc->peer.primary_path != trans_pri) {
+ trans_sec = trans_pri;
+ trans_pri = asoc->peer.primary_path;
+ }
+
+ /* We did not find anything useful for a possible retransmission
+ * path; either primary path that we found is the the same as
+ * the current one, or we didn't generally find an active one.
+ */
+ if (trans_sec == NULL)
+ trans_sec = trans_pri;
+
+ /* If we failed to find a usable transport, just camp on the
+ * primary or retran, even if they are inactive, if possible
+ * pick a PF iff it's the better choice.
+ */
+ if (trans_pri == NULL) {
+ trans_pri = sctp_trans_elect_best(asoc->peer.primary_path,
+ asoc->peer.retran_path);
+ trans_pri = sctp_trans_elect_best(trans_pri, trans_pf);
+ trans_sec = asoc->peer.primary_path;
+ }
+
+ /* Set the active and retran transports. */
+ asoc->peer.active_path = trans_pri;
+ asoc->peer.retran_path = trans_sec;
+}
+
struct sctp_transport *
sctp_assoc_choose_alter_transport(struct sctp_association *asoc,
struct sctp_transport *last_sent_to)
@@ -1547,7 +1592,7 @@ int sctp_assoc_lookup_laddr(struct sctp_association *asoc,
/* Set an association id for a given association */
int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp)
{
- bool preload = gfp & __GFP_WAIT;
+ bool preload = !!(gfp & __GFP_WAIT);
int ret;
/* If the id is already assigned, keep it. */
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 3d9f429858d..9da76ba4d10 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -481,7 +481,7 @@ normal:
}
if (chunk->transport)
- chunk->transport->last_time_heard = jiffies;
+ chunk->transport->last_time_heard = ktime_get();
error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype, state,
ep, asoc, chunk, GFP_ATOMIC);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 4dc5d9e0831..1999592ba88 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -943,7 +943,6 @@ static struct inet_protosw sctpv6_seqpacket_protosw = {
.protocol = IPPROTO_SCTP,
.prot = &sctpv6_prot,
.ops = &inet6_seqpacket_ops,
- .no_check = 0,
.flags = SCTP_PROTOSW_FLAG
};
static struct inet_protosw sctpv6_stream_protosw = {
@@ -951,7 +950,6 @@ static struct inet_protosw sctpv6_stream_protosw = {
.protocol = IPPROTO_SCTP,
.prot = &sctpv6_prot,
.ops = &inet6_seqpacket_ops,
- .no_check = 0,
.flags = SCTP_PROTOSW_FLAG,
};
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index af5afca4b85..6789d785e69 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1017,7 +1017,6 @@ static struct inet_protosw sctp_seqpacket_protosw = {
.protocol = IPPROTO_SCTP,
.prot = &sctp_prot,
.ops = &inet_seqpacket_ops,
- .no_check = 0,
.flags = SCTP_PROTOSW_FLAG
};
static struct inet_protosw sctp_stream_protosw = {
@@ -1025,7 +1024,6 @@ static struct inet_protosw sctp_stream_protosw = {
.protocol = IPPROTO_SCTP,
.prot = &sctp_prot,
.ops = &inet_seqpacket_ops,
- .no_check = 0,
.flags = SCTP_PROTOSW_FLAG
};
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index fee5552ddf9..ae0e616a7ca 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1782,7 +1782,7 @@ no_hmac:
else
kt = ktime_get();
- if (!asoc && ktime_compare(bear_cookie->expiration, kt) < 0) {
+ if (!asoc && ktime_before(bear_cookie->expiration, kt)) {
/*
* Section 3.3.10.3 Stale Cookie Error (3)
*
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 2af76eaba8f..42989968940 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6946,7 +6946,8 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
newsk->sk_type = sk->sk_type;
newsk->sk_bound_dev_if = sk->sk_bound_dev_if;
newsk->sk_flags = sk->sk_flags;
- newsk->sk_no_check = sk->sk_no_check;
+ newsk->sk_no_check_tx = sk->sk_no_check_tx;
+ newsk->sk_no_check_rx = sk->sk_no_check_rx;
newsk->sk_reuse = sk->sk_reuse;
newsk->sk_shutdown = sk->sk_shutdown;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 7e5eb755499..12c7e01c267 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -34,6 +34,8 @@
* Sridhar Samudrala <sri@us.ibm.com>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <net/sctp/structs.h>
#include <net/sctp/sctp.h>
#include <linux/sysctl.h>
@@ -46,6 +48,11 @@ static int sack_timer_min = 1;
static int sack_timer_max = 500;
static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */
static int rwnd_scale_max = 16;
+static int rto_alpha_min = 0;
+static int rto_beta_min = 0;
+static int rto_alpha_max = 1000;
+static int rto_beta_max = 1000;
+
static unsigned long max_autoclose_min = 0;
static unsigned long max_autoclose_max =
(MAX_SCHEDULE_TIMEOUT / HZ > UINT_MAX)
@@ -64,6 +71,9 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write,
static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
+static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos);
static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
@@ -126,15 +136,19 @@ static struct ctl_table sctp_net_table[] = {
.procname = "rto_alpha_exp_divisor",
.data = &init_net.sctp.rto_alpha,
.maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = proc_dointvec,
+ .mode = 0644,
+ .proc_handler = proc_sctp_do_alpha_beta,
+ .extra1 = &rto_alpha_min,
+ .extra2 = &rto_alpha_max,
},
{
.procname = "rto_beta_exp_divisor",
.data = &init_net.sctp.rto_beta,
.maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = proc_dointvec,
+ .mode = 0644,
+ .proc_handler = proc_sctp_do_alpha_beta,
+ .extra1 = &rto_beta_min,
+ .extra2 = &rto_beta_max,
},
{
.procname = "max_burst",
@@ -307,41 +321,40 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write,
loff_t *ppos)
{
struct net *net = current->nsproxy->net_ns;
- char tmp[8];
struct ctl_table tbl;
- int ret;
- int changed = 0;
+ bool changed = false;
char *none = "none";
+ char tmp[8];
+ int ret;
memset(&tbl, 0, sizeof(struct ctl_table));
if (write) {
tbl.data = tmp;
- tbl.maxlen = 8;
+ tbl.maxlen = sizeof(tmp);
} else {
tbl.data = net->sctp.sctp_hmac_alg ? : none;
tbl.maxlen = strlen(tbl.data);
}
- ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
- if (write) {
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+ if (write && ret == 0) {
#ifdef CONFIG_CRYPTO_MD5
if (!strncmp(tmp, "md5", 3)) {
net->sctp.sctp_hmac_alg = "md5";
- changed = 1;
+ changed = true;
}
#endif
#ifdef CONFIG_CRYPTO_SHA1
if (!strncmp(tmp, "sha1", 4)) {
net->sctp.sctp_hmac_alg = "sha1";
- changed = 1;
+ changed = true;
}
#endif
if (!strncmp(tmp, "none", 4)) {
net->sctp.sctp_hmac_alg = NULL;
- changed = 1;
+ changed = true;
}
-
if (!changed)
ret = -EINVAL;
}
@@ -354,11 +367,10 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write,
loff_t *ppos)
{
struct net *net = current->nsproxy->net_ns;
- int new_value;
- struct ctl_table tbl;
unsigned int min = *(unsigned int *) ctl->extra1;
unsigned int max = *(unsigned int *) ctl->extra2;
- int ret;
+ struct ctl_table tbl;
+ int ret, new_value;
memset(&tbl, 0, sizeof(struct ctl_table));
tbl.maxlen = sizeof(unsigned int);
@@ -367,12 +379,15 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write,
tbl.data = &new_value;
else
tbl.data = &net->sctp.rto_min;
+
ret = proc_dointvec(&tbl, write, buffer, lenp, ppos);
- if (write) {
- if (ret || new_value > max || new_value < min)
+ if (write && ret == 0) {
+ if (new_value > max || new_value < min)
return -EINVAL;
+
net->sctp.rto_min = new_value;
}
+
return ret;
}
@@ -381,11 +396,10 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write,
loff_t *ppos)
{
struct net *net = current->nsproxy->net_ns;
- int new_value;
- struct ctl_table tbl;
unsigned int min = *(unsigned int *) ctl->extra1;
unsigned int max = *(unsigned int *) ctl->extra2;
- int ret;
+ struct ctl_table tbl;
+ int ret, new_value;
memset(&tbl, 0, sizeof(struct ctl_table));
tbl.maxlen = sizeof(unsigned int);
@@ -394,15 +408,28 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write,
tbl.data = &new_value;
else
tbl.data = &net->sctp.rto_max;
+
ret = proc_dointvec(&tbl, write, buffer, lenp, ppos);
- if (write) {
- if (ret || new_value > max || new_value < min)
+ if (write && ret == 0) {
+ if (new_value > max || new_value < min)
return -EINVAL;
+
net->sctp.rto_max = new_value;
}
+
return ret;
}
+static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ pr_warn_once("Changing rto_alpha or rto_beta may lead to "
+ "suboptimal rtt/srtt estimations!\n");
+
+ return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
+}
+
static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
@@ -420,8 +447,7 @@ static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
tbl.data = &net->sctp.auth_enable;
ret = proc_dointvec(&tbl, write, buffer, lenp, ppos);
-
- if (write) {
+ if (write && ret == 0) {
struct sock *sk = net->sctp.ctl_sock;
net->sctp.auth_enable = new_value;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 1d348d15b33..7dd672fa651 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -72,7 +72,7 @@ static struct sctp_transport *sctp_transport_init(struct net *net,
*/
peer->rto = msecs_to_jiffies(net->sctp.rto_initial);
- peer->last_time_heard = jiffies;
+ peer->last_time_heard = ktime_get();
peer->last_time_ecne_reduced = jiffies;
peer->param_flags = SPP_HB_DISABLE |
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 85c64658bd0..b6842fdb53d 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -366,9 +366,10 @@ fail:
* specification [SCTP] and any extensions for a list of possible
* error formats.
*/
-struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
- const struct sctp_association *asoc, struct sctp_chunk *chunk,
- __u16 flags, gfp_t gfp)
+struct sctp_ulpevent *
+sctp_ulpevent_make_remote_error(const struct sctp_association *asoc,
+ struct sctp_chunk *chunk, __u16 flags,
+ gfp_t gfp)
{
struct sctp_ulpevent *event;
struct sctp_remote_error *sre;
@@ -387,8 +388,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
/* Copy the skb to a new skb with room for us to prepend
* notification with.
*/
- skb = skb_copy_expand(chunk->skb, sizeof(struct sctp_remote_error),
- 0, gfp);
+ skb = skb_copy_expand(chunk->skb, sizeof(*sre), 0, gfp);
/* Pull off the rest of the cause TLV from the chunk. */
skb_pull(chunk->skb, elen);
@@ -399,62 +399,21 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
event = sctp_skb2event(skb);
sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize);
- sre = (struct sctp_remote_error *)
- skb_push(skb, sizeof(struct sctp_remote_error));
+ sre = (struct sctp_remote_error *) skb_push(skb, sizeof(*sre));
/* Trim the buffer to the right length. */
- skb_trim(skb, sizeof(struct sctp_remote_error) + elen);
+ skb_trim(skb, sizeof(*sre) + elen);
- /* Socket Extensions for SCTP
- * 5.3.1.3 SCTP_REMOTE_ERROR
- *
- * sre_type:
- * It should be SCTP_REMOTE_ERROR.
- */
+ /* RFC6458, Section 6.1.3. SCTP_REMOTE_ERROR */
+ memset(sre, 0, sizeof(*sre));
sre->sre_type = SCTP_REMOTE_ERROR;
-
- /*
- * Socket Extensions for SCTP
- * 5.3.1.3 SCTP_REMOTE_ERROR
- *
- * sre_flags: 16 bits (unsigned integer)
- * Currently unused.
- */
sre->sre_flags = 0;
-
- /* Socket Extensions for SCTP
- * 5.3.1.3 SCTP_REMOTE_ERROR
- *
- * sre_length: sizeof (__u32)
- *
- * This field is the total length of the notification data,
- * including the notification header.
- */
sre->sre_length = skb->len;
-
- /* Socket Extensions for SCTP
- * 5.3.1.3 SCTP_REMOTE_ERROR
- *
- * sre_error: 16 bits (unsigned integer)
- * This value represents one of the Operational Error causes defined in
- * the SCTP specification, in network byte order.
- */
sre->sre_error = cause;
-
- /* Socket Extensions for SCTP
- * 5.3.1.3 SCTP_REMOTE_ERROR
- *
- * sre_assoc_id: sizeof (sctp_assoc_t)
- *
- * The association id field, holds the identifier for the association.
- * All notifications for a given association have the same association
- * identifier. For TCP style socket, this field is ignored.
- */
sctp_ulpevent_set_owner(event, asoc);
sre->sre_assoc_id = sctp_assoc2id(asoc);
return event;
-
fail:
return NULL;
}
@@ -899,7 +858,9 @@ __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event)
return notification->sn_header.sn_type;
}
-/* Copy out the sndrcvinfo into a msghdr. */
+/* RFC6458, Section 5.3.2. SCTP Header Information Structure
+ * (SCTP_SNDRCV, DEPRECATED)
+ */
void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
struct msghdr *msghdr)
{
@@ -908,74 +869,21 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
if (sctp_ulpevent_is_notification(event))
return;
- /* Sockets API Extensions for SCTP
- * Section 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV)
- *
- * sinfo_stream: 16 bits (unsigned integer)
- *
- * For recvmsg() the SCTP stack places the message's stream number in
- * this value.
- */
+ memset(&sinfo, 0, sizeof(sinfo));
sinfo.sinfo_stream = event->stream;
- /* sinfo_ssn: 16 bits (unsigned integer)
- *
- * For recvmsg() this value contains the stream sequence number that
- * the remote endpoint placed in the DATA chunk. For fragmented
- * messages this is the same number for all deliveries of the message
- * (if more than one recvmsg() is needed to read the message).
- */
sinfo.sinfo_ssn = event->ssn;
- /* sinfo_ppid: 32 bits (unsigned integer)
- *
- * In recvmsg() this value is
- * the same information that was passed by the upper layer in the peer
- * application. Please note that byte order issues are NOT accounted
- * for and this information is passed opaquely by the SCTP stack from
- * one end to the other.
- */
sinfo.sinfo_ppid = event->ppid;
- /* sinfo_flags: 16 bits (unsigned integer)
- *
- * This field may contain any of the following flags and is composed of
- * a bitwise OR of these values.
- *
- * recvmsg() flags:
- *
- * SCTP_UNORDERED - This flag is present when the message was sent
- * non-ordered.
- */
sinfo.sinfo_flags = event->flags;
- /* sinfo_tsn: 32 bit (unsigned integer)
- *
- * For the receiving side, this field holds a TSN that was
- * assigned to one of the SCTP Data Chunks.
- */
sinfo.sinfo_tsn = event->tsn;
- /* sinfo_cumtsn: 32 bit (unsigned integer)
- *
- * This field will hold the current cumulative TSN as
- * known by the underlying SCTP layer. Note this field is
- * ignored when sending and only valid for a receive
- * operation when sinfo_flags are set to SCTP_UNORDERED.
- */
sinfo.sinfo_cumtsn = event->cumtsn;
- /* sinfo_assoc_id: sizeof (sctp_assoc_t)
- *
- * The association handle field, sinfo_assoc_id, holds the identifier
- * for the association announced in the COMMUNICATION_UP notification.
- * All notifications for a given association have the same identifier.
- * Ignored for one-to-one style sockets.
- */
sinfo.sinfo_assoc_id = sctp_assoc2id(event->asoc);
-
- /* context value that is set via SCTP_CONTEXT socket option. */
+ /* Context value that is set via SCTP_CONTEXT socket option. */
sinfo.sinfo_context = event->asoc->default_rcv_context;
-
/* These fields are not used while receiving. */
sinfo.sinfo_timetolive = 0;
put_cmsg(msghdr, IPPROTO_SCTP, SCTP_SNDRCV,
- sizeof(struct sctp_sndrcvinfo), (void *)&sinfo);
+ sizeof(sinfo), &sinfo);
}
/* Do accounting for bytes received and hold a reference to the association
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 5285ead196c..f7736671742 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -296,7 +296,7 @@ static void
rpcauth_unhash_cred_locked(struct rpc_cred *cred)
{
hlist_del_rcu(&cred->cr_hash);
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
}
@@ -592,6 +592,7 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags)
put_group_info(acred.group_info);
return ret;
}
+EXPORT_SYMBOL_GPL(rpcauth_lookupcred);
void
rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 36e431ee1c9..b6e440baccc 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -143,7 +143,7 @@ gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx)
gss_get_ctx(ctx);
rcu_assign_pointer(gss_cred->gc_ctx, ctx);
set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags);
}
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 27ce2624093..92d5ab99fbf 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -218,10 +218,8 @@ static struct gss_api_mech *_gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
spin_lock(&registered_mechs_lock);
list_for_each_entry(pos, &registered_mechs, gm_list) {
- if (!mech_supports_pseudoflavor(pos, pseudoflavor)) {
- module_put(pos->gm_owner);
+ if (!mech_supports_pseudoflavor(pos, pseudoflavor))
continue;
- }
if (try_module_get(pos->gm_owner))
gm = pos;
break;
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 0f73f450774..4ce5eccec1f 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1503,6 +1503,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
if (unwrap_integ_data(rqstp, &rqstp->rq_arg,
gc->gc_seq, rsci->mechctx))
goto garbage_args;
+ rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE;
break;
case RPC_GSS_SVC_PRIVACY:
/* placeholders for length and seq. number: */
@@ -1511,6 +1512,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
if (unwrap_priv_data(rqstp, &rqstp->rq_arg,
gc->gc_seq, rsci->mechctx))
goto garbage_args;
+ rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE * 2;
break;
default:
goto auth_err;
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 3513d559bc4..9761a0da964 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -244,10 +244,10 @@ void xprt_free_bc_request(struct rpc_rqst *req)
dprintk("RPC: free backchannel req=%p\n", req);
req->rq_connect_cookie = xprt->connect_cookie - 1;
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
if (!xprt_need_to_requeue(xprt)) {
/*
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index ae333c1845b..06636214113 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -374,7 +374,7 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
}
return;
out:
- printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name);
+ printk(KERN_ERR "RPC: failed to unregister %s cache\n", cd->name);
}
EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 25578afe154..c0365c14b85 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -832,7 +832,8 @@ static void rpc_async_schedule(struct work_struct *work)
* @size: requested byte size
*
* To prevent rpciod from hanging, this allocator never sleeps,
- * returning NULL if the request cannot be serviced immediately.
+ * returning NULL and suppressing warning if the request cannot be serviced
+ * immediately.
* The caller can arrange to sleep in a way that is safe for rpciod.
*
* Most requests are 'small' (under 2KiB) and can be serviced from a
@@ -845,7 +846,7 @@ static void rpc_async_schedule(struct work_struct *work)
void *rpc_malloc(struct rpc_task *task, size_t size)
{
struct rpc_buffer *buf;
- gfp_t gfp = GFP_NOWAIT;
+ gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN;
if (RPC_IS_SWAPPER(task))
gfp |= __GFP_MEMALLOC;
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 0a648c502fc..2df87f78e51 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -173,7 +173,8 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
return -1;
if (csum_fold(desc.csum))
return -1;
- if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+ !skb->csum_complete_sw)
netdev_rx_csum_fault(skb->dev);
return 0;
no_checksum:
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index 14c9f6d1c5f..f2b7cb540e6 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -43,6 +43,19 @@ static inline int rpc_reply_expected(struct rpc_task *task)
(task->tk_msg.rpc_proc->p_decode != NULL);
}
+static inline int sock_is_loopback(struct sock *sk)
+{
+ struct dst_entry *dst;
+ int loopback = 0;
+ rcu_read_lock();
+ dst = rcu_dereference(sk->sk_dst_cache);
+ if (dst && dst->dev &&
+ (dst->dev->features & NETIF_F_LOOPBACK))
+ loopback = 1;
+ rcu_read_unlock();
+ return loopback;
+}
+
int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
struct page *headpage, unsigned long headoffset,
struct page *tailpage, unsigned long tailoffset);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 06c6ff0cb91..b4737fbdec1 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -597,6 +597,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
}
rqstp->rq_pages[i] = p;
}
+ rqstp->rq_page_end = &rqstp->rq_pages[i];
rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */
/* Make arg->head point to first page and arg->pages point to rest */
@@ -730,6 +731,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
newxpt = xprt->xpt_ops->xpo_accept(xprt);
if (newxpt)
svc_add_new_temp_xprt(serv, newxpt);
+ else
+ module_put(xprt->xpt_class->xcl_owner);
} else if (xprt->xpt_ops->xpo_has_wspace(xprt)) {
/* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
@@ -793,7 +796,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
clear_bit(XPT_OLD, &xprt->xpt_flags);
- rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp));
+ rqstp->rq_secure = xprt->xpt_ops->xpo_secure_port(rqstp);
rqstp->rq_chandle.defer = svc_defer;
if (serv->sv_stats)
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 2af7b0cba43..79c0f3459b5 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -54,6 +54,8 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
}
spin_unlock(&authtab_lock);
+ rqstp->rq_auth_slack = 0;
+
rqstp->rq_authop = aops;
return aops->accept(rqstp, authp);
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 43bcb4699d6..b507cd327d9 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -400,6 +400,12 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
release_sock(sock->sk);
#endif
}
+
+static int svc_sock_secure_port(struct svc_rqst *rqstp)
+{
+ return svc_port_is_privileged(svc_addr(rqstp));
+}
+
/*
* INET callback when data has been received on the socket.
*/
@@ -678,6 +684,7 @@ static struct svc_xprt_ops svc_udp_ops = {
.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
.xpo_has_wspace = svc_udp_has_wspace,
.xpo_accept = svc_udp_accept,
+ .xpo_secure_port = svc_sock_secure_port,
};
static struct svc_xprt_class svc_udp_class = {
@@ -842,8 +849,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
* tell us anything. For now just warn about unpriv connections.
*/
if (!svc_port_is_privileged(sin)) {
- dprintk(KERN_WARNING
- "%s: connect from unprivileged port: %s\n",
+ dprintk("%s: connect from unprivileged port: %s\n",
serv->sv_name,
__svc_print_addr(sin, buf, sizeof(buf)));
}
@@ -867,6 +873,10 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
}
svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
+ if (sock_is_loopback(newsock->sk))
+ set_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
+ else
+ clear_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
if (serv->sv_stats)
serv->sv_stats->nettcpconn++;
@@ -1112,6 +1122,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_xprt_ctxt = NULL;
rqstp->rq_prot = IPPROTO_TCP;
+ rqstp->rq_local = !!test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags);
p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
calldir = p[1];
@@ -1234,6 +1245,7 @@ static struct svc_xprt_ops svc_tcp_bc_ops = {
.xpo_detach = svc_bc_tcp_sock_detach,
.xpo_free = svc_bc_sock_free,
.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
+ .xpo_secure_port = svc_sock_secure_port,
};
static struct svc_xprt_class svc_tcp_bc_class = {
@@ -1272,6 +1284,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
.xpo_has_wspace = svc_tcp_has_wspace,
.xpo_accept = svc_tcp_accept,
+ .xpo_secure_port = svc_sock_secure_port,
};
static struct svc_xprt_class svc_tcp_class = {
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index dd97ba3c445..23fb4e75e24 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -462,6 +462,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
struct kvec *iov = buf->head;
int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
+ xdr_set_scratch_buffer(xdr, NULL, 0);
BUG_ON(scratch_len < 0);
xdr->buf = buf;
xdr->iov = iov;
@@ -482,6 +483,73 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
EXPORT_SYMBOL_GPL(xdr_init_encode);
/**
+ * xdr_commit_encode - Ensure all data is written to buffer
+ * @xdr: pointer to xdr_stream
+ *
+ * We handle encoding across page boundaries by giving the caller a
+ * temporary location to write to, then later copying the data into
+ * place; xdr_commit_encode does that copying.
+ *
+ * Normally the caller doesn't need to call this directly, as the
+ * following xdr_reserve_space will do it. But an explicit call may be
+ * required at the end of encoding, or any other time when the xdr_buf
+ * data might be read.
+ */
+void xdr_commit_encode(struct xdr_stream *xdr)
+{
+ int shift = xdr->scratch.iov_len;
+ void *page;
+
+ if (shift == 0)
+ return;
+ page = page_address(*xdr->page_ptr);
+ memcpy(xdr->scratch.iov_base, page, shift);
+ memmove(page, page + shift, (void *)xdr->p - page);
+ xdr->scratch.iov_len = 0;
+}
+EXPORT_SYMBOL_GPL(xdr_commit_encode);
+
+__be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes)
+{
+ static __be32 *p;
+ int space_left;
+ int frag1bytes, frag2bytes;
+
+ if (nbytes > PAGE_SIZE)
+ return NULL; /* Bigger buffers require special handling */
+ if (xdr->buf->len + nbytes > xdr->buf->buflen)
+ return NULL; /* Sorry, we're totally out of space */
+ frag1bytes = (xdr->end - xdr->p) << 2;
+ frag2bytes = nbytes - frag1bytes;
+ if (xdr->iov)
+ xdr->iov->iov_len += frag1bytes;
+ else
+ xdr->buf->page_len += frag1bytes;
+ xdr->page_ptr++;
+ xdr->iov = NULL;
+ /*
+ * If the last encode didn't end exactly on a page boundary, the
+ * next one will straddle boundaries. Encode into the next
+ * page, then copy it back later in xdr_commit_encode. We use
+ * the "scratch" iov to track any temporarily unused fragment of
+ * space at the end of the previous buffer:
+ */
+ xdr->scratch.iov_base = xdr->p;
+ xdr->scratch.iov_len = frag1bytes;
+ p = page_address(*xdr->page_ptr);
+ /*
+ * Note this is where the next encode will start after we've
+ * shifted this one back:
+ */
+ xdr->p = (void *)p + frag2bytes;
+ space_left = xdr->buf->buflen - xdr->buf->len;
+ xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE);
+ xdr->buf->page_len += frag2bytes;
+ xdr->buf->len += nbytes;
+ return p;
+}
+
+/**
* xdr_reserve_space - Reserve buffer space for sending
* @xdr: pointer to xdr_stream
* @nbytes: number of bytes to reserve
@@ -495,20 +563,122 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
__be32 *p = xdr->p;
__be32 *q;
+ xdr_commit_encode(xdr);
/* align nbytes on the next 32-bit boundary */
nbytes += 3;
nbytes &= ~3;
q = p + (nbytes >> 2);
if (unlikely(q > xdr->end || q < p))
- return NULL;
+ return xdr_get_next_encode_buffer(xdr, nbytes);
xdr->p = q;
- xdr->iov->iov_len += nbytes;
+ if (xdr->iov)
+ xdr->iov->iov_len += nbytes;
+ else
+ xdr->buf->page_len += nbytes;
xdr->buf->len += nbytes;
return p;
}
EXPORT_SYMBOL_GPL(xdr_reserve_space);
/**
+ * xdr_truncate_encode - truncate an encode buffer
+ * @xdr: pointer to xdr_stream
+ * @len: new length of buffer
+ *
+ * Truncates the xdr stream, so that xdr->buf->len == len,
+ * and xdr->p points at offset len from the start of the buffer, and
+ * head, tail, and page lengths are adjusted to correspond.
+ *
+ * If this means moving xdr->p to a different buffer, we assume that
+ * that the end pointer should be set to the end of the current page,
+ * except in the case of the head buffer when we assume the head
+ * buffer's current length represents the end of the available buffer.
+ *
+ * This is *not* safe to use on a buffer that already has inlined page
+ * cache pages (as in a zero-copy server read reply), except for the
+ * simple case of truncating from one position in the tail to another.
+ *
+ */
+void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
+{
+ struct xdr_buf *buf = xdr->buf;
+ struct kvec *head = buf->head;
+ struct kvec *tail = buf->tail;
+ int fraglen;
+ int new, old;
+
+ if (len > buf->len) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+ xdr_commit_encode(xdr);
+
+ fraglen = min_t(int, buf->len - len, tail->iov_len);
+ tail->iov_len -= fraglen;
+ buf->len -= fraglen;
+ if (tail->iov_len && buf->len == len) {
+ xdr->p = tail->iov_base + tail->iov_len;
+ /* xdr->end, xdr->iov should be set already */
+ return;
+ }
+ WARN_ON_ONCE(fraglen);
+ fraglen = min_t(int, buf->len - len, buf->page_len);
+ buf->page_len -= fraglen;
+ buf->len -= fraglen;
+
+ new = buf->page_base + buf->page_len;
+ old = new + fraglen;
+ xdr->page_ptr -= (old >> PAGE_SHIFT) - (new >> PAGE_SHIFT);
+
+ if (buf->page_len && buf->len == len) {
+ xdr->p = page_address(*xdr->page_ptr);
+ xdr->end = (void *)xdr->p + PAGE_SIZE;
+ xdr->p = (void *)xdr->p + (new % PAGE_SIZE);
+ /* xdr->iov should already be NULL */
+ return;
+ }
+ if (fraglen) {
+ xdr->end = head->iov_base + head->iov_len;
+ xdr->page_ptr--;
+ }
+ /* (otherwise assume xdr->end is already set) */
+ head->iov_len = len;
+ buf->len = len;
+ xdr->p = head->iov_base + head->iov_len;
+ xdr->iov = buf->head;
+}
+EXPORT_SYMBOL(xdr_truncate_encode);
+
+/**
+ * xdr_restrict_buflen - decrease available buffer space
+ * @xdr: pointer to xdr_stream
+ * @newbuflen: new maximum number of bytes available
+ *
+ * Adjust our idea of how much space is available in the buffer.
+ * If we've already used too much space in the buffer, returns -1.
+ * If the available space is already smaller than newbuflen, returns 0
+ * and does nothing. Otherwise, adjusts xdr->buf->buflen to newbuflen
+ * and ensures xdr->end is set at most offset newbuflen from the start
+ * of the buffer.
+ */
+int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen)
+{
+ struct xdr_buf *buf = xdr->buf;
+ int left_in_this_buf = (void *)xdr->end - (void *)xdr->p;
+ int end_offset = buf->len + left_in_this_buf;
+
+ if (newbuflen < 0 || newbuflen < buf->len)
+ return -1;
+ if (newbuflen > buf->buflen)
+ return 0;
+ if (newbuflen < end_offset)
+ xdr->end = (void *)xdr->end + newbuflen - end_offset;
+ buf->buflen = newbuflen;
+ return 0;
+}
+EXPORT_SYMBOL(xdr_restrict_buflen);
+
+/**
* xdr_write_pages - Insert a list of pages into an XDR buffer for sending
* @xdr: pointer to xdr_stream
* @pages: list of pages
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index d173f79947c..c3b2b3369e5 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -71,24 +71,6 @@ static void xprt_destroy(struct rpc_xprt *xprt);
static DEFINE_SPINLOCK(xprt_list_lock);
static LIST_HEAD(xprt_list);
-/*
- * The transport code maintains an estimate on the maximum number of out-
- * standing RPC requests, using a smoothed version of the congestion
- * avoidance implemented in 44BSD. This is basically the Van Jacobson
- * congestion algorithm: If a retransmit occurs, the congestion window is
- * halved; otherwise, it is incremented by 1/cwnd when
- *
- * - a reply is received and
- * - a full number of requests are outstanding and
- * - the congestion window hasn't been updated recently.
- */
-#define RPC_CWNDSHIFT (8U)
-#define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT)
-#define RPC_INITCWND RPC_CWNDSCALE
-#define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT)
-
-#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
-
/**
* xprt_register_transport - register a transport implementation
* @transport: transport to register
@@ -230,9 +212,9 @@ static void xprt_clear_locked(struct rpc_xprt *xprt)
{
xprt->snd_task = NULL;
if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(XPRT_LOCKED, &xprt->state);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
} else
queue_work(rpciod_workqueue, &xprt->task_cleanup);
}
@@ -446,7 +428,15 @@ EXPORT_SYMBOL_GPL(xprt_release_rqst_cong);
* @task: recently completed RPC request used to adjust window
* @result: result code of completed RPC request
*
- * We use a time-smoothed congestion estimator to avoid heavy oscillation.
+ * The transport code maintains an estimate on the maximum number of out-
+ * standing RPC requests, using a smoothed version of the congestion
+ * avoidance implemented in 44BSD. This is basically the Van Jacobson
+ * congestion algorithm: If a retransmit occurs, the congestion window is
+ * halved; otherwise, it is incremented by 1/cwnd when
+ *
+ * - a reply is received and
+ * - a full number of requests are outstanding and
+ * - the congestion window hasn't been updated recently.
*/
void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result)
{
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 96ead526b12..693966d3f33 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -78,8 +78,7 @@ static const char transfertypes[][12] = {
* elements. Segments are then coalesced when registered, if possible
* within the selected memreg mode.
*
- * Note, this routine is never called if the connection's memory
- * registration strategy is 0 (bounce buffers).
+ * Returns positive number of segments converted, or a negative errno.
*/
static int
@@ -102,10 +101,17 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
page_base = xdrbuf->page_base & ~PAGE_MASK;
p = 0;
while (len && n < nsegs) {
+ if (!ppages[p]) {
+ /* alloc the pagelist for receiving buffer */
+ ppages[p] = alloc_page(GFP_ATOMIC);
+ if (!ppages[p])
+ return -ENOMEM;
+ }
seg[n].mr_page = ppages[p];
seg[n].mr_offset = (void *)(unsigned long) page_base;
seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len);
- BUG_ON(seg[n].mr_len > PAGE_SIZE);
+ if (seg[n].mr_len > PAGE_SIZE)
+ return -EIO;
len -= seg[n].mr_len;
++n;
++p;
@@ -114,7 +120,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
/* Message overflows the seg array */
if (len && n == nsegs)
- return 0;
+ return -EIO;
if (xdrbuf->tail[0].iov_len) {
/* the rpcrdma protocol allows us to omit any trailing
@@ -123,7 +129,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
return n;
if (n == nsegs)
/* Tail remains, but we're out of segments */
- return 0;
+ return -EIO;
seg[n].mr_page = NULL;
seg[n].mr_offset = xdrbuf->tail[0].iov_base;
seg[n].mr_len = xdrbuf->tail[0].iov_len;
@@ -164,15 +170,17 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
* Reply chunk (a counted array):
* N elements:
* 1 - N - HLOO - HLOO - ... - HLOO
+ *
+ * Returns positive RPC/RDMA header size, or negative errno.
*/
-static unsigned int
+static ssize_t
rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type)
{
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
- int nsegs, nchunks = 0;
+ int n, nsegs, nchunks = 0;
unsigned int pos;
struct rpcrdma_mr_seg *seg = req->rl_segments;
struct rpcrdma_read_chunk *cur_rchunk = NULL;
@@ -198,12 +206,11 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
pos = target->head[0].iov_len;
nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS);
- if (nsegs == 0)
- return 0;
+ if (nsegs < 0)
+ return nsegs;
do {
- /* bind/register the memory, then build chunk from result. */
- int n = rpcrdma_register_external(seg, nsegs,
+ n = rpcrdma_register_external(seg, nsegs,
cur_wchunk != NULL, r_xprt);
if (n <= 0)
goto out;
@@ -248,10 +255,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
/* success. all failures return above */
req->rl_nchunks = nchunks;
- BUG_ON(nchunks == 0);
- BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
- && (nchunks > 3));
-
/*
* finish off header. If write, marshal discrim and nchunks.
*/
@@ -278,8 +281,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
out:
for (pos = 0; nchunks--;)
pos += rpcrdma_deregister_external(
- &req->rl_segments[pos], r_xprt, NULL);
- return 0;
+ &req->rl_segments[pos], r_xprt);
+ return n;
}
/*
@@ -361,6 +364,8 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
* [1] -- the RPC header/data, marshaled by RPC and the NFS protocol.
* [2] -- optional padding.
* [3] -- if padded, header only in [1] and data here.
+ *
+ * Returns zero on success, otherwise a negative errno.
*/
int
@@ -370,7 +375,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
char *base;
- size_t hdrlen, rpclen, padlen;
+ size_t rpclen, padlen;
+ ssize_t hdrlen;
enum rpcrdma_chunktype rtype, wtype;
struct rpcrdma_msg *headerp;
@@ -441,14 +447,10 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
/* The following simplification is not true forever */
if (rtype != rpcrdma_noch && wtype == rpcrdma_replych)
wtype = rpcrdma_noch;
- BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch);
-
- if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS &&
- (rtype != rpcrdma_noch || wtype != rpcrdma_noch)) {
- /* forced to "pure inline"? */
- dprintk("RPC: %s: too much data (%d/%d) for inline\n",
- __func__, rqst->rq_rcv_buf.len, rqst->rq_snd_buf.len);
- return -1;
+ if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) {
+ dprintk("RPC: %s: cannot marshal multiple chunk lists\n",
+ __func__);
+ return -EIO;
}
hdrlen = 28; /*sizeof *headerp;*/
@@ -474,8 +476,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
hdrlen += 2 * sizeof(u32); /* extra words in padhdr */
- BUG_ON(wtype != rpcrdma_noch);
-
+ if (wtype != rpcrdma_noch) {
+ dprintk("RPC: %s: invalid chunk list\n",
+ __func__);
+ return -EIO;
+ }
} else {
headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero;
headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero;
@@ -492,8 +497,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
* on receive. Therefore, we request a reply chunk
* for non-writes wherever feasible and efficient.
*/
- if (wtype == rpcrdma_noch &&
- r_xprt->rx_ia.ri_memreg_strategy > RPCRDMA_REGISTER)
+ if (wtype == rpcrdma_noch)
wtype = rpcrdma_replych;
}
}
@@ -511,9 +515,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
hdrlen = rpcrdma_create_chunks(rqst,
&rqst->rq_rcv_buf, headerp, wtype);
}
-
- if (hdrlen == 0)
- return -1;
+ if (hdrlen < 0)
+ return hdrlen;
dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd"
" headerp 0x%p base 0x%p lkey 0x%x\n",
@@ -680,15 +683,11 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
rqst->rq_private_buf = rqst->rq_rcv_buf;
}
-/*
- * This function is called when an async event is posted to
- * the connection which changes the connection state. All it
- * does at this point is mark the connection up/down, the rpc
- * timers do the rest.
- */
void
-rpcrdma_conn_func(struct rpcrdma_ep *ep)
+rpcrdma_connect_worker(struct work_struct *work)
{
+ struct rpcrdma_ep *ep =
+ container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
struct rpc_xprt *xprt = ep->rep_xprt;
spin_lock_bh(&xprt->transport_lock);
@@ -705,13 +704,15 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep)
}
/*
- * This function is called when memory window unbind which we are waiting
- * for completes. Just use rr_func (zeroed by upcall) to signal completion.
+ * This function is called when an async event is posted to
+ * the connection which changes the connection state. All it
+ * does at this point is mark the connection up/down, the rpc
+ * timers do the rest.
*/
-static void
-rpcrdma_unbind_func(struct rpcrdma_rep *rep)
+void
+rpcrdma_conn_func(struct rpcrdma_ep *ep)
{
- wake_up(&rep->rr_unbind);
+ schedule_delayed_work(&ep->rep_connect_worker, 0);
}
/*
@@ -728,7 +729,8 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
struct rpc_xprt *xprt = rep->rr_xprt;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
__be32 *iptr;
- int i, rdmalen, status;
+ int rdmalen, status;
+ unsigned long cwnd;
/* Check status. If bad, signal disconnect and return rep to pool */
if (rep->rr_len == ~0U) {
@@ -783,6 +785,7 @@ repost:
/* from here on, the reply is no longer an orphan */
req->rl_reply = rep;
+ xprt->reestablish_timeout = 0;
/* check for expected message types */
/* The order of some of these tests is important. */
@@ -857,26 +860,10 @@ badheader:
break;
}
- /* If using mw bind, start the deregister process now. */
- /* (Note: if mr_free(), cannot perform it here, in tasklet context) */
- if (req->rl_nchunks) switch (r_xprt->rx_ia.ri_memreg_strategy) {
- case RPCRDMA_MEMWINDOWS:
- for (i = 0; req->rl_nchunks-- > 1;)
- i += rpcrdma_deregister_external(
- &req->rl_segments[i], r_xprt, NULL);
- /* Optionally wait (not here) for unbinds to complete */
- rep->rr_func = rpcrdma_unbind_func;
- (void) rpcrdma_deregister_external(&req->rl_segments[i],
- r_xprt, rep);
- break;
- case RPCRDMA_MEMWINDOWS_ASYNC:
- for (i = 0; req->rl_nchunks--;)
- i += rpcrdma_deregister_external(&req->rl_segments[i],
- r_xprt, NULL);
- break;
- default:
- break;
- }
+ cwnd = xprt->cwnd;
+ xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
+ if (xprt->cwnd > cwnd)
+ xprt_release_rqst_cong(rqst->rq_task);
dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
__func__, xprt, rqst, status);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 8d904e4eef1..8f92a61ee2d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -69,7 +70,8 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
/* Set up the XDR head */
rqstp->rq_arg.head[0].iov_base = page_address(page);
- rqstp->rq_arg.head[0].iov_len = min(byte_count, ctxt->sge[0].length);
+ rqstp->rq_arg.head[0].iov_len =
+ min_t(size_t, byte_count, ctxt->sge[0].length);
rqstp->rq_arg.len = byte_count;
rqstp->rq_arg.buflen = byte_count;
@@ -85,7 +87,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
page = ctxt->pages[sge_no];
put_page(rqstp->rq_pages[sge_no]);
rqstp->rq_pages[sge_no] = page;
- bc -= min(bc, ctxt->sge[sge_no].length);
+ bc -= min_t(u32, bc, ctxt->sge[sge_no].length);
rqstp->rq_arg.buflen += ctxt->sge[sge_no].length;
sge_no++;
}
@@ -113,291 +115,265 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
rqstp->rq_arg.tail[0].iov_len = 0;
}
-/* Encode a read-chunk-list as an array of IB SGE
- *
- * Assumptions:
- * - chunk[0]->position points to pages[0] at an offset of 0
- * - pages[] is not physically or virtually contiguous and consists of
- * PAGE_SIZE elements.
- *
- * Output:
- * - sge array pointing into pages[] array.
- * - chunk_sge array specifying sge index and count for each
- * chunk in the read list
- *
- */
-static int map_read_chunks(struct svcxprt_rdma *xprt,
- struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *head,
- struct rpcrdma_msg *rmsgp,
- struct svc_rdma_req_map *rpl_map,
- struct svc_rdma_req_map *chl_map,
- int ch_count,
- int byte_count)
+static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
{
- int sge_no;
- int sge_bytes;
- int page_off;
- int page_no;
- int ch_bytes;
- int ch_no;
- struct rpcrdma_read_chunk *ch;
+ if (rdma_node_get_transport(xprt->sc_cm_id->device->node_type) ==
+ RDMA_TRANSPORT_IWARP)
+ return 1;
+ else
+ return min_t(int, sge_count, xprt->sc_max_sge);
+}
- sge_no = 0;
- page_no = 0;
- page_off = 0;
- ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
- ch_no = 0;
- ch_bytes = ntohl(ch->rc_target.rs_length);
- head->arg.head[0] = rqstp->rq_arg.head[0];
- head->arg.tail[0] = rqstp->rq_arg.tail[0];
- head->arg.pages = &head->pages[head->count];
- head->hdr_count = head->count; /* save count of hdr pages */
- head->arg.page_base = 0;
- head->arg.page_len = ch_bytes;
- head->arg.len = rqstp->rq_arg.len + ch_bytes;
- head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes;
- head->count++;
- chl_map->ch[0].start = 0;
- while (byte_count) {
- rpl_map->sge[sge_no].iov_base =
- page_address(rqstp->rq_arg.pages[page_no]) + page_off;
- sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes);
- rpl_map->sge[sge_no].iov_len = sge_bytes;
- /*
- * Don't bump head->count here because the same page
- * may be used by multiple SGE.
- */
- head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
- rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
+typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt,
+ struct svc_rqst *rqstp,
+ struct svc_rdma_op_ctxt *head,
+ int *page_no,
+ u32 *page_offset,
+ u32 rs_handle,
+ u32 rs_length,
+ u64 rs_offset,
+ int last);
+
+/* Issue an RDMA_READ using the local lkey to map the data sink */
+static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
+ struct svc_rqst *rqstp,
+ struct svc_rdma_op_ctxt *head,
+ int *page_no,
+ u32 *page_offset,
+ u32 rs_handle,
+ u32 rs_length,
+ u64 rs_offset,
+ int last)
+{
+ struct ib_send_wr read_wr;
+ int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
+ struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
+ int ret, read, pno;
+ u32 pg_off = *page_offset;
+ u32 pg_no = *page_no;
+
+ ctxt->direction = DMA_FROM_DEVICE;
+ ctxt->read_hdr = head;
+ pages_needed =
+ min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed));
+ read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
+
+ for (pno = 0; pno < pages_needed; pno++) {
+ int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
+
+ head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
+ head->arg.page_len += len;
+ head->arg.len += len;
+ if (!pg_off)
+ head->count++;
+ rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
rqstp->rq_next_page = rqstp->rq_respages + 1;
+ ctxt->sge[pno].addr =
+ ib_dma_map_page(xprt->sc_cm_id->device,
+ head->arg.pages[pg_no], pg_off,
+ PAGE_SIZE - pg_off,
+ DMA_FROM_DEVICE);
+ ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
+ ctxt->sge[pno].addr);
+ if (ret)
+ goto err;
+ atomic_inc(&xprt->sc_dma_used);
- byte_count -= sge_bytes;
- ch_bytes -= sge_bytes;
- sge_no++;
- /*
- * If all bytes for this chunk have been mapped to an
- * SGE, move to the next SGE
- */
- if (ch_bytes == 0) {
- chl_map->ch[ch_no].count =
- sge_no - chl_map->ch[ch_no].start;
- ch_no++;
- ch++;
- chl_map->ch[ch_no].start = sge_no;
- ch_bytes = ntohl(ch->rc_target.rs_length);
- /* If bytes remaining account for next chunk */
- if (byte_count) {
- head->arg.page_len += ch_bytes;
- head->arg.len += ch_bytes;
- head->arg.buflen += ch_bytes;
- }
+ /* The lkey here is either a local dma lkey or a dma_mr lkey */
+ ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
+ ctxt->sge[pno].length = len;
+ ctxt->count++;
+
+ /* adjust offset and wrap to next page if needed */
+ pg_off += len;
+ if (pg_off == PAGE_SIZE) {
+ pg_off = 0;
+ pg_no++;
}
- /*
- * If this SGE consumed all of the page, move to the
- * next page
- */
- if ((sge_bytes + page_off) == PAGE_SIZE) {
- page_no++;
- page_off = 0;
- /*
- * If there are still bytes left to map, bump
- * the page count
- */
- if (byte_count)
- head->count++;
- } else
- page_off += sge_bytes;
+ rs_length -= len;
}
- BUG_ON(byte_count != 0);
- return sge_no;
+
+ if (last && rs_length == 0)
+ set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+ else
+ clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+
+ memset(&read_wr, 0, sizeof(read_wr));
+ read_wr.wr_id = (unsigned long)ctxt;
+ read_wr.opcode = IB_WR_RDMA_READ;
+ ctxt->wr_op = read_wr.opcode;
+ read_wr.send_flags = IB_SEND_SIGNALED;
+ read_wr.wr.rdma.rkey = rs_handle;
+ read_wr.wr.rdma.remote_addr = rs_offset;
+ read_wr.sg_list = ctxt->sge;
+ read_wr.num_sge = pages_needed;
+
+ ret = svc_rdma_send(xprt, &read_wr);
+ if (ret) {
+ pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
+ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+ goto err;
+ }
+
+ /* return current location in page array */
+ *page_no = pg_no;
+ *page_offset = pg_off;
+ ret = read;
+ atomic_inc(&rdma_stat_read);
+ return ret;
+ err:
+ svc_rdma_unmap_dma(ctxt);
+ svc_rdma_put_context(ctxt, 0);
+ return ret;
}
-/* Map a read-chunk-list to an XDR and fast register the page-list.
- *
- * Assumptions:
- * - chunk[0] position points to pages[0] at an offset of 0
- * - pages[] will be made physically contiguous by creating a one-off memory
- * region using the fastreg verb.
- * - byte_count is # of bytes in read-chunk-list
- * - ch_count is # of chunks in read-chunk-list
- *
- * Output:
- * - sge array pointing into pages[] array.
- * - chunk_sge array specifying sge index and count for each
- * chunk in the read list
- */
-static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
+/* Issue an RDMA_READ using an FRMR to map the data sink */
+static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head,
- struct rpcrdma_msg *rmsgp,
- struct svc_rdma_req_map *rpl_map,
- struct svc_rdma_req_map *chl_map,
- int ch_count,
- int byte_count)
+ int *page_no,
+ u32 *page_offset,
+ u32 rs_handle,
+ u32 rs_length,
+ u64 rs_offset,
+ int last)
{
- int page_no;
- int ch_no;
- u32 offset;
- struct rpcrdma_read_chunk *ch;
- struct svc_rdma_fastreg_mr *frmr;
- int ret = 0;
+ struct ib_send_wr read_wr;
+ struct ib_send_wr inv_wr;
+ struct ib_send_wr fastreg_wr;
+ u8 key;
+ int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
+ struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
+ struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
+ int ret, read, pno;
+ u32 pg_off = *page_offset;
+ u32 pg_no = *page_no;
- frmr = svc_rdma_get_frmr(xprt);
if (IS_ERR(frmr))
return -ENOMEM;
- head->frmr = frmr;
- head->arg.head[0] = rqstp->rq_arg.head[0];
- head->arg.tail[0] = rqstp->rq_arg.tail[0];
- head->arg.pages = &head->pages[head->count];
- head->hdr_count = head->count; /* save count of hdr pages */
- head->arg.page_base = 0;
- head->arg.page_len = byte_count;
- head->arg.len = rqstp->rq_arg.len + byte_count;
- head->arg.buflen = rqstp->rq_arg.buflen + byte_count;
+ ctxt->direction = DMA_FROM_DEVICE;
+ ctxt->frmr = frmr;
+ pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len);
+ read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
- /* Fast register the page list */
- frmr->kva = page_address(rqstp->rq_arg.pages[0]);
+ frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
frmr->direction = DMA_FROM_DEVICE;
frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
- frmr->map_len = byte_count;
- frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
- for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
- frmr->page_list->page_list[page_no] =
+ frmr->map_len = pages_needed << PAGE_SHIFT;
+ frmr->page_list_len = pages_needed;
+
+ for (pno = 0; pno < pages_needed; pno++) {
+ int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
+
+ head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
+ head->arg.page_len += len;
+ head->arg.len += len;
+ if (!pg_off)
+ head->count++;
+ rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
+ rqstp->rq_next_page = rqstp->rq_respages + 1;
+ frmr->page_list->page_list[pno] =
ib_dma_map_page(xprt->sc_cm_id->device,
- rqstp->rq_arg.pages[page_no], 0,
+ head->arg.pages[pg_no], 0,
PAGE_SIZE, DMA_FROM_DEVICE);
- if (ib_dma_mapping_error(xprt->sc_cm_id->device,
- frmr->page_list->page_list[page_no]))
- goto fatal_err;
+ ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
+ frmr->page_list->page_list[pno]);
+ if (ret)
+ goto err;
atomic_inc(&xprt->sc_dma_used);
- head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
- }
- head->count += page_no;
-
- /* rq_respages points one past arg pages */
- rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
- rqstp->rq_next_page = rqstp->rq_respages + 1;
- /* Create the reply and chunk maps */
- offset = 0;
- ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
- for (ch_no = 0; ch_no < ch_count; ch_no++) {
- int len = ntohl(ch->rc_target.rs_length);
- rpl_map->sge[ch_no].iov_base = frmr->kva + offset;
- rpl_map->sge[ch_no].iov_len = len;
- chl_map->ch[ch_no].count = 1;
- chl_map->ch[ch_no].start = ch_no;
- offset += len;
- ch++;
+ /* adjust offset and wrap to next page if needed */
+ pg_off += len;
+ if (pg_off == PAGE_SIZE) {
+ pg_off = 0;
+ pg_no++;
+ }
+ rs_length -= len;
}
- ret = svc_rdma_fastreg(xprt, frmr);
- if (ret)
- goto fatal_err;
-
- return ch_no;
-
- fatal_err:
- printk("svcrdma: error fast registering xdr for xprt %p", xprt);
- svc_rdma_put_frmr(xprt, frmr);
- return -EIO;
-}
-
-static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
- struct svc_rdma_op_ctxt *ctxt,
- struct svc_rdma_fastreg_mr *frmr,
- struct kvec *vec,
- u64 *sgl_offset,
- int count)
-{
- int i;
- unsigned long off;
+ if (last && rs_length == 0)
+ set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+ else
+ clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
- ctxt->count = count;
- ctxt->direction = DMA_FROM_DEVICE;
- for (i = 0; i < count; i++) {
- ctxt->sge[i].length = 0; /* in case map fails */
- if (!frmr) {
- BUG_ON(!virt_to_page(vec[i].iov_base));
- off = (unsigned long)vec[i].iov_base & ~PAGE_MASK;
- ctxt->sge[i].addr =
- ib_dma_map_page(xprt->sc_cm_id->device,
- virt_to_page(vec[i].iov_base),
- off,
- vec[i].iov_len,
- DMA_FROM_DEVICE);
- if (ib_dma_mapping_error(xprt->sc_cm_id->device,
- ctxt->sge[i].addr))
- return -EINVAL;
- ctxt->sge[i].lkey = xprt->sc_dma_lkey;
- atomic_inc(&xprt->sc_dma_used);
- } else {
- ctxt->sge[i].addr = (unsigned long)vec[i].iov_base;
- ctxt->sge[i].lkey = frmr->mr->lkey;
- }
- ctxt->sge[i].length = vec[i].iov_len;
- *sgl_offset = *sgl_offset + vec[i].iov_len;
+ /* Bump the key */
+ key = (u8)(frmr->mr->lkey & 0x000000FF);
+ ib_update_fast_reg_key(frmr->mr, ++key);
+
+ ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset;
+ ctxt->sge[0].lkey = frmr->mr->lkey;
+ ctxt->sge[0].length = read;
+ ctxt->count = 1;
+ ctxt->read_hdr = head;
+
+ /* Prepare FASTREG WR */
+ memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+ fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+ fastreg_wr.send_flags = IB_SEND_SIGNALED;
+ fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
+ fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
+ fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
+ fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+ fastreg_wr.wr.fast_reg.length = frmr->map_len;
+ fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
+ fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
+ fastreg_wr.next = &read_wr;
+
+ /* Prepare RDMA_READ */
+ memset(&read_wr, 0, sizeof(read_wr));
+ read_wr.send_flags = IB_SEND_SIGNALED;
+ read_wr.wr.rdma.rkey = rs_handle;
+ read_wr.wr.rdma.remote_addr = rs_offset;
+ read_wr.sg_list = ctxt->sge;
+ read_wr.num_sge = 1;
+ if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
+ read_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
+ read_wr.wr_id = (unsigned long)ctxt;
+ read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
+ } else {
+ read_wr.opcode = IB_WR_RDMA_READ;
+ read_wr.next = &inv_wr;
+ /* Prepare invalidate */
+ memset(&inv_wr, 0, sizeof(inv_wr));
+ inv_wr.wr_id = (unsigned long)ctxt;
+ inv_wr.opcode = IB_WR_LOCAL_INV;
+ inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
+ inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
+ }
+ ctxt->wr_op = read_wr.opcode;
+
+ /* Post the chain */
+ ret = svc_rdma_send(xprt, &fastreg_wr);
+ if (ret) {
+ pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
+ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+ goto err;
}
- return 0;
-}
-static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
-{
- if ((rdma_node_get_transport(xprt->sc_cm_id->device->node_type) ==
- RDMA_TRANSPORT_IWARP) &&
- sge_count > 1)
- return 1;
- else
- return min_t(int, sge_count, xprt->sc_max_sge);
+ /* return current location in page array */
+ *page_no = pg_no;
+ *page_offset = pg_off;
+ ret = read;
+ atomic_inc(&rdma_stat_read);
+ return ret;
+ err:
+ svc_rdma_unmap_dma(ctxt);
+ svc_rdma_put_context(ctxt, 0);
+ svc_rdma_put_frmr(xprt, frmr);
+ return ret;
}
-/*
- * Use RDMA_READ to read data from the advertised client buffer into the
- * XDR stream starting at rq_arg.head[0].iov_base.
- * Each chunk in the array
- * contains the following fields:
- * discrim - '1', This isn't used for data placement
- * position - The xdr stream offset (the same for every chunk)
- * handle - RMR for client memory region
- * length - data transfer length
- * offset - 64 bit tagged offset in remote memory region
- *
- * On our side, we need to read into a pagelist. The first page immediately
- * follows the RPC header.
- *
- * This function returns:
- * 0 - No error and no read-list found.
- *
- * 1 - Successful read-list processing. The data is not yet in
- * the pagelist and therefore the RPC request must be deferred. The
- * I/O completion will enqueue the transport again and
- * svc_rdma_recvfrom will complete the request.
- *
- * <0 - Error processing/posting read-list.
- *
- * NOTE: The ctxt must not be touched after the last WR has been posted
- * because the I/O completion processing may occur on another
- * processor and free / modify the context. Ne touche pas!
- */
-static int rdma_read_xdr(struct svcxprt_rdma *xprt,
- struct rpcrdma_msg *rmsgp,
- struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *hdr_ctxt)
+static int rdma_read_chunks(struct svcxprt_rdma *xprt,
+ struct rpcrdma_msg *rmsgp,
+ struct svc_rqst *rqstp,
+ struct svc_rdma_op_ctxt *head)
{
- struct ib_send_wr read_wr;
- struct ib_send_wr inv_wr;
- int err = 0;
- int ch_no;
- int ch_count;
- int byte_count;
- int sge_count;
- u64 sgl_offset;
+ int page_no, ch_count, ret;
struct rpcrdma_read_chunk *ch;
- struct svc_rdma_op_ctxt *ctxt = NULL;
- struct svc_rdma_req_map *rpl_map;
- struct svc_rdma_req_map *chl_map;
+ u32 page_offset, byte_count;
+ u64 rs_offset;
+ rdma_reader_fn reader;
/* If no read list is present, return 0 */
ch = svc_rdma_get_read_chunk(rmsgp);
@@ -408,122 +384,55 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
if (ch_count > RPCSVC_MAXPAGES)
return -EINVAL;
- /* Allocate temporary reply and chunk maps */
- rpl_map = svc_rdma_get_req_map();
- chl_map = svc_rdma_get_req_map();
+ /* The request is completed when the RDMA_READs complete. The
+ * head context keeps all the pages that comprise the
+ * request.
+ */
+ head->arg.head[0] = rqstp->rq_arg.head[0];
+ head->arg.tail[0] = rqstp->rq_arg.tail[0];
+ head->arg.pages = &head->pages[head->count];
+ head->hdr_count = head->count;
+ head->arg.page_base = 0;
+ head->arg.page_len = 0;
+ head->arg.len = rqstp->rq_arg.len;
+ head->arg.buflen = rqstp->rq_arg.buflen;
- if (!xprt->sc_frmr_pg_list_len)
- sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
- rpl_map, chl_map, ch_count,
- byte_count);
+ /* Use FRMR if supported */
+ if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)
+ reader = rdma_read_chunk_frmr;
else
- sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
- rpl_map, chl_map, ch_count,
- byte_count);
- if (sge_count < 0) {
- err = -EIO;
- goto out;
- }
-
- sgl_offset = 0;
- ch_no = 0;
+ reader = rdma_read_chunk_lcl;
+ page_no = 0; page_offset = 0;
for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
- ch->rc_discrim != 0; ch++, ch_no++) {
- u64 rs_offset;
-next_sge:
- ctxt = svc_rdma_get_context(xprt);
- ctxt->direction = DMA_FROM_DEVICE;
- ctxt->frmr = hdr_ctxt->frmr;
- ctxt->read_hdr = NULL;
- clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
- clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
+ ch->rc_discrim != 0; ch++) {
- /* Prepare READ WR */
- memset(&read_wr, 0, sizeof read_wr);
- read_wr.wr_id = (unsigned long)ctxt;
- read_wr.opcode = IB_WR_RDMA_READ;
- ctxt->wr_op = read_wr.opcode;
- read_wr.send_flags = IB_SEND_SIGNALED;
- read_wr.wr.rdma.rkey = ntohl(ch->rc_target.rs_handle);
xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset,
&rs_offset);
- read_wr.wr.rdma.remote_addr = rs_offset + sgl_offset;
- read_wr.sg_list = ctxt->sge;
- read_wr.num_sge =
- rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
- err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr,
- &rpl_map->sge[chl_map->ch[ch_no].start],
- &sgl_offset,
- read_wr.num_sge);
- if (err) {
- svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_context(ctxt, 0);
- goto out;
- }
- if (((ch+1)->rc_discrim == 0) &&
- (read_wr.num_sge == chl_map->ch[ch_no].count)) {
- /*
- * Mark the last RDMA_READ with a bit to
- * indicate all RPC data has been fetched from
- * the client and the RPC needs to be enqueued.
- */
- set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
- if (hdr_ctxt->frmr) {
- set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
- /*
- * Invalidate the local MR used to map the data
- * sink.
- */
- if (xprt->sc_dev_caps &
- SVCRDMA_DEVCAP_READ_W_INV) {
- read_wr.opcode =
- IB_WR_RDMA_READ_WITH_INV;
- ctxt->wr_op = read_wr.opcode;
- read_wr.ex.invalidate_rkey =
- ctxt->frmr->mr->lkey;
- } else {
- /* Prepare INVALIDATE WR */
- memset(&inv_wr, 0, sizeof inv_wr);
- inv_wr.opcode = IB_WR_LOCAL_INV;
- inv_wr.send_flags = IB_SEND_SIGNALED;
- inv_wr.ex.invalidate_rkey =
- hdr_ctxt->frmr->mr->lkey;
- read_wr.next = &inv_wr;
- }
- }
- ctxt->read_hdr = hdr_ctxt;
- }
- /* Post the read */
- err = svc_rdma_send(xprt, &read_wr);
- if (err) {
- printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
- err);
- set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
- svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_context(ctxt, 0);
- goto out;
+ byte_count = ntohl(ch->rc_target.rs_length);
+
+ while (byte_count > 0) {
+ ret = reader(xprt, rqstp, head,
+ &page_no, &page_offset,
+ ntohl(ch->rc_target.rs_handle),
+ byte_count, rs_offset,
+ ((ch+1)->rc_discrim == 0) /* last */
+ );
+ if (ret < 0)
+ goto err;
+ byte_count -= ret;
+ rs_offset += ret;
+ head->arg.buflen += ret;
}
- atomic_inc(&rdma_stat_read);
-
- if (read_wr.num_sge < chl_map->ch[ch_no].count) {
- chl_map->ch[ch_no].count -= read_wr.num_sge;
- chl_map->ch[ch_no].start += read_wr.num_sge;
- goto next_sge;
- }
- sgl_offset = 0;
- err = 1;
}
-
- out:
- svc_rdma_put_req_map(rpl_map);
- svc_rdma_put_req_map(chl_map);
-
+ ret = 1;
+ err:
/* Detach arg pages. svc_recv will replenish them */
- for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++)
- rqstp->rq_pages[ch_no] = NULL;
+ for (page_no = 0;
+ &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++)
+ rqstp->rq_pages[page_no] = NULL;
- return err;
+ return ret;
}
static int rdma_read_complete(struct svc_rqst *rqstp,
@@ -595,13 +504,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
struct svc_rdma_op_ctxt,
dto_q);
list_del_init(&ctxt->dto_q);
- }
- if (ctxt) {
spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
return rdma_read_complete(rqstp, ctxt);
- }
-
- if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
+ } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
struct svc_rdma_op_ctxt,
dto_q);
@@ -621,7 +526,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
goto close_out;
- BUG_ON(ret);
goto out;
}
dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
@@ -644,12 +548,11 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
}
/* Read read-list data. */
- ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt);
+ ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
if (ret > 0) {
/* read-list posted, defer until data received from client. */
goto defer;
- }
- if (ret < 0) {
+ } else if (ret < 0) {
/* Post of read-list failed, free context. */
svc_rdma_put_context(ctxt, 1);
return 0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 7e024a51617..49fd21a5c21 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -49,152 +50,6 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
-/* Encode an XDR as an array of IB SGE
- *
- * Assumptions:
- * - head[0] is physically contiguous.
- * - tail[0] is physically contiguous.
- * - pages[] is not physically or virtually contiguous and consists of
- * PAGE_SIZE elements.
- *
- * Output:
- * SGE[0] reserved for RCPRDMA header
- * SGE[1] data from xdr->head[]
- * SGE[2..sge_count-2] data from xdr->pages[]
- * SGE[sge_count-1] data from xdr->tail.
- *
- * The max SGE we need is the length of the XDR / pagesize + one for
- * head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES
- * reserves a page for both the request and the reply header, and this
- * array is only concerned with the reply we are assured that we have
- * on extra page for the RPCRMDA header.
- */
-static int fast_reg_xdr(struct svcxprt_rdma *xprt,
- struct xdr_buf *xdr,
- struct svc_rdma_req_map *vec)
-{
- int sge_no;
- u32 sge_bytes;
- u32 page_bytes;
- u32 page_off;
- int page_no = 0;
- u8 *frva;
- struct svc_rdma_fastreg_mr *frmr;
-
- frmr = svc_rdma_get_frmr(xprt);
- if (IS_ERR(frmr))
- return -ENOMEM;
- vec->frmr = frmr;
-
- /* Skip the RPCRDMA header */
- sge_no = 1;
-
- /* Map the head. */
- frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK);
- vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
- vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
- vec->count = 2;
- sge_no++;
-
- /* Map the XDR head */
- frmr->kva = frva;
- frmr->direction = DMA_TO_DEVICE;
- frmr->access_flags = 0;
- frmr->map_len = PAGE_SIZE;
- frmr->page_list_len = 1;
- page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
- frmr->page_list->page_list[page_no] =
- ib_dma_map_page(xprt->sc_cm_id->device,
- virt_to_page(xdr->head[0].iov_base),
- page_off,
- PAGE_SIZE - page_off,
- DMA_TO_DEVICE);
- if (ib_dma_mapping_error(xprt->sc_cm_id->device,
- frmr->page_list->page_list[page_no]))
- goto fatal_err;
- atomic_inc(&xprt->sc_dma_used);
-
- /* Map the XDR page list */
- page_off = xdr->page_base;
- page_bytes = xdr->page_len + page_off;
- if (!page_bytes)
- goto encode_tail;
-
- /* Map the pages */
- vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
- vec->sge[sge_no].iov_len = page_bytes;
- sge_no++;
- while (page_bytes) {
- struct page *page;
-
- page = xdr->pages[page_no++];
- sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
- page_bytes -= sge_bytes;
-
- frmr->page_list->page_list[page_no] =
- ib_dma_map_page(xprt->sc_cm_id->device,
- page, page_off,
- sge_bytes, DMA_TO_DEVICE);
- if (ib_dma_mapping_error(xprt->sc_cm_id->device,
- frmr->page_list->page_list[page_no]))
- goto fatal_err;
-
- atomic_inc(&xprt->sc_dma_used);
- page_off = 0; /* reset for next time through loop */
- frmr->map_len += PAGE_SIZE;
- frmr->page_list_len++;
- }
- vec->count++;
-
- encode_tail:
- /* Map tail */
- if (0 == xdr->tail[0].iov_len)
- goto done;
-
- vec->count++;
- vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
-
- if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) ==
- ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) {
- /*
- * If head and tail use the same page, we don't need
- * to map it again.
- */
- vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
- } else {
- void *va;
-
- /* Map another page for the tail */
- page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
- va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK);
- vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
-
- frmr->page_list->page_list[page_no] =
- ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va),
- page_off,
- PAGE_SIZE,
- DMA_TO_DEVICE);
- if (ib_dma_mapping_error(xprt->sc_cm_id->device,
- frmr->page_list->page_list[page_no]))
- goto fatal_err;
- atomic_inc(&xprt->sc_dma_used);
- frmr->map_len += PAGE_SIZE;
- frmr->page_list_len++;
- }
-
- done:
- if (svc_rdma_fastreg(xprt, frmr))
- goto fatal_err;
-
- return 0;
-
- fatal_err:
- printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
- vec->frmr = NULL;
- svc_rdma_put_frmr(xprt, frmr);
- return -EIO;
-}
-
static int map_xdr(struct svcxprt_rdma *xprt,
struct xdr_buf *xdr,
struct svc_rdma_req_map *vec)
@@ -208,9 +63,6 @@ static int map_xdr(struct svcxprt_rdma *xprt,
BUG_ON(xdr->len !=
(xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
- if (xprt->sc_frmr_pg_list_len)
- return fast_reg_xdr(xprt, xdr, vec);
-
/* Skip the first sge, this is for the RPCRDMA header */
sge_no = 1;
@@ -282,8 +134,6 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
}
/* Assumptions:
- * - We are using FRMR
- * - or -
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
*/
static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
@@ -327,23 +177,16 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
sge_bytes = min_t(size_t,
bc, vec->sge[xdr_sge_no].iov_len-sge_off);
sge[sge_no].length = sge_bytes;
- if (!vec->frmr) {
- sge[sge_no].addr =
- dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
- sge_bytes, DMA_TO_DEVICE);
- xdr_off += sge_bytes;
- if (ib_dma_mapping_error(xprt->sc_cm_id->device,
- sge[sge_no].addr))
- goto err;
- atomic_inc(&xprt->sc_dma_used);
- sge[sge_no].lkey = xprt->sc_dma_lkey;
- } else {
- sge[sge_no].addr = (unsigned long)
- vec->sge[xdr_sge_no].iov_base + sge_off;
- sge[sge_no].lkey = vec->frmr->mr->lkey;
- }
+ sge[sge_no].addr =
+ dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
+ sge_bytes, DMA_TO_DEVICE);
+ xdr_off += sge_bytes;
+ if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+ sge[sge_no].addr))
+ goto err;
+ atomic_inc(&xprt->sc_dma_used);
+ sge[sge_no].lkey = xprt->sc_dma_lkey;
ctxt->count++;
- ctxt->frmr = vec->frmr;
sge_off = 0;
sge_no++;
xdr_sge_no++;
@@ -369,7 +212,6 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
return 0;
err:
svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_frmr(xprt, vec->frmr);
svc_rdma_put_context(ctxt, 0);
/* Fatal error, close transport */
return -EIO;
@@ -397,10 +239,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[1];
- if (vec->frmr)
- max_write = vec->frmr->map_len;
- else
- max_write = xprt->sc_max_sge * PAGE_SIZE;
+ max_write = xprt->sc_max_sge * PAGE_SIZE;
/* Write chunks start at the pagelist */
for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
@@ -472,10 +311,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[2];
- if (vec->frmr)
- max_write = vec->frmr->map_len;
- else
- max_write = xprt->sc_max_sge * PAGE_SIZE;
+ max_write = xprt->sc_max_sge * PAGE_SIZE;
/* xdr offset starts at RPC message */
nchunks = ntohl(arg_ary->wc_nchunks);
@@ -545,7 +381,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
int byte_count)
{
struct ib_send_wr send_wr;
- struct ib_send_wr inv_wr;
int sge_no;
int sge_bytes;
int page_no;
@@ -559,7 +394,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
"svcrdma: could not post a receive buffer, err=%d."
"Closing transport %p.\n", ret, rdma);
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
- svc_rdma_put_frmr(rdma, vec->frmr);
svc_rdma_put_context(ctxt, 0);
return -ENOTCONN;
}
@@ -567,11 +401,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
/* Prepare the context */
ctxt->pages[0] = page;
ctxt->count = 1;
- ctxt->frmr = vec->frmr;
- if (vec->frmr)
- set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
- else
- clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
/* Prepare the SGE for the RPCRDMA Header */
ctxt->sge[0].lkey = rdma->sc_dma_lkey;
@@ -590,21 +419,15 @@ static int send_reply(struct svcxprt_rdma *rdma,
int xdr_off = 0;
sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
byte_count -= sge_bytes;
- if (!vec->frmr) {
- ctxt->sge[sge_no].addr =
- dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
- sge_bytes, DMA_TO_DEVICE);
- xdr_off += sge_bytes;
- if (ib_dma_mapping_error(rdma->sc_cm_id->device,
- ctxt->sge[sge_no].addr))
- goto err;
- atomic_inc(&rdma->sc_dma_used);
- ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
- } else {
- ctxt->sge[sge_no].addr = (unsigned long)
- vec->sge[sge_no].iov_base;
- ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey;
- }
+ ctxt->sge[sge_no].addr =
+ dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
+ sge_bytes, DMA_TO_DEVICE);
+ xdr_off += sge_bytes;
+ if (ib_dma_mapping_error(rdma->sc_cm_id->device,
+ ctxt->sge[sge_no].addr))
+ goto err;
+ atomic_inc(&rdma->sc_dma_used);
+ ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
ctxt->sge[sge_no].length = sge_bytes;
}
BUG_ON(byte_count != 0);
@@ -627,6 +450,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->sge[page_no+1].length = 0;
}
rqstp->rq_next_page = rqstp->rq_respages + 1;
+
BUG_ON(sge_no > rdma->sc_max_sge);
memset(&send_wr, 0, sizeof send_wr);
ctxt->wr_op = IB_WR_SEND;
@@ -635,15 +459,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
send_wr.num_sge = sge_no;
send_wr.opcode = IB_WR_SEND;
send_wr.send_flags = IB_SEND_SIGNALED;
- if (vec->frmr) {
- /* Prepare INVALIDATE WR */
- memset(&inv_wr, 0, sizeof inv_wr);
- inv_wr.opcode = IB_WR_LOCAL_INV;
- inv_wr.send_flags = IB_SEND_SIGNALED;
- inv_wr.ex.invalidate_rkey =
- vec->frmr->mr->lkey;
- send_wr.next = &inv_wr;
- }
ret = svc_rdma_send(rdma, &send_wr);
if (ret)
@@ -653,7 +468,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
err:
svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_frmr(rdma, vec->frmr);
svc_rdma_put_context(ctxt, 1);
return -EIO;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 25688fa2207..e7323fbbd34 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -65,6 +66,7 @@ static void dto_tasklet_func(unsigned long data);
static void svc_rdma_detach(struct svc_xprt *xprt);
static void svc_rdma_free(struct svc_xprt *xprt);
static int svc_rdma_has_wspace(struct svc_xprt *xprt);
+static int svc_rdma_secure_port(struct svc_rqst *);
static void rq_cq_reap(struct svcxprt_rdma *xprt);
static void sq_cq_reap(struct svcxprt_rdma *xprt);
@@ -82,6 +84,7 @@ static struct svc_xprt_ops svc_rdma_ops = {
.xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
.xpo_has_wspace = svc_rdma_has_wspace,
.xpo_accept = svc_rdma_accept,
+ .xpo_secure_port = svc_rdma_secure_port,
};
struct svc_xprt_class svc_rdma_class = {
@@ -160,7 +163,6 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void)
schedule_timeout_uninterruptible(msecs_to_jiffies(500));
}
map->count = 0;
- map->frmr = NULL;
return map;
}
@@ -336,22 +338,21 @@ static void process_context(struct svcxprt_rdma *xprt,
switch (ctxt->wr_op) {
case IB_WR_SEND:
- if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
- svc_rdma_put_frmr(xprt, ctxt->frmr);
+ BUG_ON(ctxt->frmr);
svc_rdma_put_context(ctxt, 1);
break;
case IB_WR_RDMA_WRITE:
+ BUG_ON(ctxt->frmr);
svc_rdma_put_context(ctxt, 0);
break;
case IB_WR_RDMA_READ:
case IB_WR_RDMA_READ_WITH_INV:
+ svc_rdma_put_frmr(xprt, ctxt->frmr);
if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
BUG_ON(!read_hdr);
- if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
- svc_rdma_put_frmr(xprt, ctxt->frmr);
spin_lock_bh(&xprt->sc_rq_dto_lock);
set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
list_add_tail(&read_hdr->dto_q,
@@ -363,6 +364,7 @@ static void process_context(struct svcxprt_rdma *xprt,
break;
default:
+ BUG_ON(1);
printk(KERN_ERR "svcrdma: unexpected completion type, "
"opcode=%d\n",
ctxt->wr_op);
@@ -378,29 +380,42 @@ static void process_context(struct svcxprt_rdma *xprt,
static void sq_cq_reap(struct svcxprt_rdma *xprt)
{
struct svc_rdma_op_ctxt *ctxt = NULL;
- struct ib_wc wc;
+ struct ib_wc wc_a[6];
+ struct ib_wc *wc;
struct ib_cq *cq = xprt->sc_sq_cq;
int ret;
+ memset(wc_a, 0, sizeof(wc_a));
+
if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
return;
ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
atomic_inc(&rdma_stat_sq_poll);
- while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
- if (wc.status != IB_WC_SUCCESS)
- /* Close the transport */
- set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+ while ((ret = ib_poll_cq(cq, ARRAY_SIZE(wc_a), wc_a)) > 0) {
+ int i;
- /* Decrement used SQ WR count */
- atomic_dec(&xprt->sc_sq_count);
- wake_up(&xprt->sc_send_wait);
+ for (i = 0; i < ret; i++) {
+ wc = &wc_a[i];
+ if (wc->status != IB_WC_SUCCESS) {
+ dprintk("svcrdma: sq wc err status %d\n",
+ wc->status);
- ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
- if (ctxt)
- process_context(xprt, ctxt);
+ /* Close the transport */
+ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+ }
- svc_xprt_put(&xprt->sc_xprt);
+ /* Decrement used SQ WR count */
+ atomic_dec(&xprt->sc_sq_count);
+ wake_up(&xprt->sc_send_wait);
+
+ ctxt = (struct svc_rdma_op_ctxt *)
+ (unsigned long)wc->wr_id;
+ if (ctxt)
+ process_context(xprt, ctxt);
+
+ svc_xprt_put(&xprt->sc_xprt);
+ }
}
if (ctxt)
@@ -993,7 +1008,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
need_dma_mr = 0;
break;
case RDMA_TRANSPORT_IB:
- if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
+ if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
+ need_dma_mr = 1;
+ dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
+ } else if (!(devattr.device_cap_flags &
+ IB_DEVICE_LOCAL_DMA_LKEY)) {
need_dma_mr = 1;
dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
} else
@@ -1190,14 +1209,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
container_of(xprt, struct svcxprt_rdma, sc_xprt);
/*
- * If there are fewer SQ WR available than required to send a
- * simple response, return false.
- */
- if ((rdma->sc_sq_depth - atomic_read(&rdma->sc_sq_count) < 3))
- return 0;
-
- /*
- * ...or there are already waiters on the SQ,
+ * If there are already waiters on the SQ,
* return false.
*/
if (waitqueue_active(&rdma->sc_send_wait))
@@ -1207,6 +1219,11 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
return 1;
}
+static int svc_rdma_secure_port(struct svc_rqst *rqstp)
+{
+ return 1;
+}
+
/*
* Attempt to register the kvec representing the RPC memory with the
* device.
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 1eb9c468d0c..66f91f0d071 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -149,6 +149,11 @@ static struct ctl_table sunrpc_table[] = {
#endif
+#define RPCRDMA_BIND_TO (60U * HZ)
+#define RPCRDMA_INIT_REEST_TO (5U * HZ)
+#define RPCRDMA_MAX_REEST_TO (30U * HZ)
+#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ)
+
static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */
static void
@@ -229,7 +234,6 @@ static void
xprt_rdma_destroy(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- int rc;
dprintk("RPC: %s: called\n", __func__);
@@ -238,10 +242,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
xprt_clear_connected(xprt);
rpcrdma_buffer_destroy(&r_xprt->rx_buf);
- rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
- if (rc)
- dprintk("RPC: %s: rpcrdma_ep_destroy returned %i\n",
- __func__, rc);
+ rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
rpcrdma_ia_close(&r_xprt->rx_ia);
xprt_rdma_free_addresses(xprt);
@@ -289,9 +290,9 @@ xprt_setup_rdma(struct xprt_create *args)
/* 60 second timeout, no retries */
xprt->timeout = &xprt_rdma_default_timeout;
- xprt->bind_timeout = (60U * HZ);
- xprt->reestablish_timeout = (5U * HZ);
- xprt->idle_timeout = (5U * 60 * HZ);
+ xprt->bind_timeout = RPCRDMA_BIND_TO;
+ xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
+ xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
xprt->resvport = 0; /* privileged port not needed */
xprt->tsh_size = 0; /* RPC-RDMA handles framing */
@@ -391,7 +392,7 @@ out4:
xprt_rdma_free_addresses(xprt);
rc = -EINVAL;
out3:
- (void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
+ rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
out2:
rpcrdma_ia_close(&new_xprt->rx_ia);
out1:
@@ -436,10 +437,10 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
schedule_delayed_work(&r_xprt->rdma_connect,
xprt->reestablish_timeout);
xprt->reestablish_timeout <<= 1;
- if (xprt->reestablish_timeout > (30 * HZ))
- xprt->reestablish_timeout = (30 * HZ);
- else if (xprt->reestablish_timeout < (5 * HZ))
- xprt->reestablish_timeout = (5 * HZ);
+ if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO)
+ xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO;
+ else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO)
+ xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
} else {
schedule_delayed_work(&r_xprt->rdma_connect, 0);
if (!RPC_IS_ASYNC(task))
@@ -447,23 +448,6 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
}
}
-static int
-xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
-{
- struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- int credits = atomic_read(&r_xprt->rx_buf.rb_credits);
-
- /* == RPC_CWNDSCALE @ init, but *after* setup */
- if (r_xprt->rx_buf.rb_cwndscale == 0UL) {
- r_xprt->rx_buf.rb_cwndscale = xprt->cwnd;
- dprintk("RPC: %s: cwndscale %lu\n", __func__,
- r_xprt->rx_buf.rb_cwndscale);
- BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0);
- }
- xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale;
- return xprt_reserve_xprt_cong(xprt, task);
-}
-
/*
* The RDMA allocate/free functions need the task structure as a place
* to hide the struct rpcrdma_req, which is necessary for the actual send/recv
@@ -479,7 +463,8 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
struct rpcrdma_req *req, *nreq;
req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf);
- BUG_ON(NULL == req);
+ if (req == NULL)
+ return NULL;
if (size > req->rl_size) {
dprintk("RPC: %s: size %zd too large for buffer[%zd]: "
@@ -503,18 +488,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
* If the allocation or registration fails, the RPC framework
* will (doggedly) retry.
*/
- if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy ==
- RPCRDMA_BOUNCEBUFFERS) {
- /* forced to "pure inline" */
- dprintk("RPC: %s: too much data (%zd) for inline "
- "(r/w max %d/%d)\n", __func__, size,
- rpcx_to_rdmad(xprt).inline_rsize,
- rpcx_to_rdmad(xprt).inline_wsize);
- size = req->rl_size;
- rpc_exit(task, -EIO); /* fail the operation */
- rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++;
- goto out;
- }
if (task->tk_flags & RPC_TASK_SWAPPER)
nreq = kmalloc(sizeof *req + size, GFP_ATOMIC);
else
@@ -543,7 +516,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
req = nreq;
}
dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
-out:
req->rl_connect_cookie = 0; /* our reserved value */
return req->rl_xdr_buf;
@@ -579,9 +551,7 @@ xprt_rdma_free(void *buffer)
__func__, rep, (rep && rep->rr_func) ? " (with waiter)" : "");
/*
- * Finish the deregistration. When using mw bind, this was
- * begun in rpcrdma_reply_handler(). In all other modes, we
- * do it here, in thread context. The process is considered
+ * Finish the deregistration. The process is considered
* complete when the rr_func vector becomes NULL - this
* was put in place during rpcrdma_reply_handler() - the wait
* call below will not block if the dereg is "done". If
@@ -590,12 +560,7 @@ xprt_rdma_free(void *buffer)
for (i = 0; req->rl_nchunks;) {
--req->rl_nchunks;
i += rpcrdma_deregister_external(
- &req->rl_segments[i], r_xprt, NULL);
- }
-
- if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) {
- rep->rr_func = NULL; /* abandon the callback */
- req->rl_reply = NULL;
+ &req->rl_segments[i], r_xprt);
}
if (req->rl_iov.length == 0) { /* see allocate above */
@@ -630,13 +595,12 @@ xprt_rdma_send_request(struct rpc_task *task)
struct rpc_xprt *xprt = rqst->rq_xprt;
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ int rc;
- /* marshal the send itself */
- if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) {
- r_xprt->rx_stats.failed_marshal_count++;
- dprintk("RPC: %s: rpcrdma_marshal_req failed\n",
- __func__);
- return -EIO;
+ if (req->rl_niovs == 0) {
+ rc = rpcrdma_marshal_req(rqst);
+ if (rc < 0)
+ goto failed_marshal;
}
if (req->rl_reply == NULL) /* e.g. reconnection */
@@ -660,6 +624,12 @@ xprt_rdma_send_request(struct rpc_task *task)
rqst->rq_bytes_sent = 0;
return 0;
+failed_marshal:
+ r_xprt->rx_stats.failed_marshal_count++;
+ dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n",
+ __func__, rc);
+ if (rc == -EIO)
+ return -EIO;
drop_connection:
xprt_disconnect_done(xprt);
return -ENOTCONN; /* implies disconnect */
@@ -705,7 +675,7 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
*/
static struct rpc_xprt_ops xprt_rdma_procs = {
- .reserve_xprt = xprt_rdma_reserve_xprt,
+ .reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */
.alloc_slot = xprt_alloc_slot,
.release_request = xprt_release_rqst_cong, /* ditto */
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 93726560eaa..13dbd1c389f 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -48,8 +48,8 @@
*/
#include <linux/interrupt.h>
-#include <linux/pci.h> /* for Tavor hack below */
#include <linux/slab.h>
+#include <asm/bitops.h>
#include "xprt_rdma.h"
@@ -142,98 +142,139 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
}
}
-static inline
-void rpcrdma_event_process(struct ib_wc *wc)
+static void
+rpcrdma_sendcq_process_wc(struct ib_wc *wc)
{
- struct rpcrdma_mw *frmr;
- struct rpcrdma_rep *rep =
- (struct rpcrdma_rep *)(unsigned long) wc->wr_id;
+ struct rpcrdma_mw *frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
- dprintk("RPC: %s: event rep %p status %X opcode %X length %u\n",
- __func__, rep, wc->status, wc->opcode, wc->byte_len);
+ dprintk("RPC: %s: frmr %p status %X opcode %d\n",
+ __func__, frmr, wc->status, wc->opcode);
- if (!rep) /* send or bind completion that we don't care about */
+ if (wc->wr_id == 0ULL)
return;
-
- if (IB_WC_SUCCESS != wc->status) {
- dprintk("RPC: %s: WC opcode %d status %X, connection lost\n",
- __func__, wc->opcode, wc->status);
- rep->rr_len = ~0U;
- if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV)
- rpcrdma_schedule_tasklet(rep);
+ if (wc->status != IB_WC_SUCCESS)
return;
- }
- switch (wc->opcode) {
- case IB_WC_FAST_REG_MR:
- frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
+ if (wc->opcode == IB_WC_FAST_REG_MR)
frmr->r.frmr.state = FRMR_IS_VALID;
- break;
- case IB_WC_LOCAL_INV:
- frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
+ else if (wc->opcode == IB_WC_LOCAL_INV)
frmr->r.frmr.state = FRMR_IS_INVALID;
- break;
- case IB_WC_RECV:
- rep->rr_len = wc->byte_len;
- ib_dma_sync_single_for_cpu(
- rdmab_to_ia(rep->rr_buffer)->ri_id->device,
- rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
- /* Keep (only) the most recent credits, after check validity */
- if (rep->rr_len >= 16) {
- struct rpcrdma_msg *p =
- (struct rpcrdma_msg *) rep->rr_base;
- unsigned int credits = ntohl(p->rm_credit);
- if (credits == 0) {
- dprintk("RPC: %s: server"
- " dropped credits to 0!\n", __func__);
- /* don't deadlock */
- credits = 1;
- } else if (credits > rep->rr_buffer->rb_max_requests) {
- dprintk("RPC: %s: server"
- " over-crediting: %d (%d)\n",
- __func__, credits,
- rep->rr_buffer->rb_max_requests);
- credits = rep->rr_buffer->rb_max_requests;
- }
- atomic_set(&rep->rr_buffer->rb_credits, credits);
- }
- /* fall through */
- case IB_WC_BIND_MW:
- rpcrdma_schedule_tasklet(rep);
- break;
- default:
- dprintk("RPC: %s: unexpected WC event %X\n",
- __func__, wc->opcode);
- break;
- }
}
-static inline int
-rpcrdma_cq_poll(struct ib_cq *cq)
+static int
+rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
{
- struct ib_wc wc;
- int rc;
+ struct ib_wc *wcs;
+ int budget, count, rc;
- for (;;) {
- rc = ib_poll_cq(cq, 1, &wc);
- if (rc < 0) {
- dprintk("RPC: %s: ib_poll_cq failed %i\n",
- __func__, rc);
+ budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
+ do {
+ wcs = ep->rep_send_wcs;
+
+ rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
+ if (rc <= 0)
return rc;
- }
- if (rc == 0)
- break;
- rpcrdma_event_process(&wc);
+ count = rc;
+ while (count-- > 0)
+ rpcrdma_sendcq_process_wc(wcs++);
+ } while (rc == RPCRDMA_POLLSIZE && --budget);
+ return 0;
+}
+
+/*
+ * Handle send, fast_reg_mr, and local_inv completions.
+ *
+ * Send events are typically suppressed and thus do not result
+ * in an upcall. Occasionally one is signaled, however. This
+ * prevents the provider's completion queue from wrapping and
+ * losing a completion.
+ */
+static void
+rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
+{
+ struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
+ int rc;
+
+ rc = rpcrdma_sendcq_poll(cq, ep);
+ if (rc) {
+ dprintk("RPC: %s: ib_poll_cq failed: %i\n",
+ __func__, rc);
+ return;
}
+ rc = ib_req_notify_cq(cq,
+ IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+ if (rc == 0)
+ return;
+ if (rc < 0) {
+ dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
+ __func__, rc);
+ return;
+ }
+
+ rpcrdma_sendcq_poll(cq, ep);
+}
+
+static void
+rpcrdma_recvcq_process_wc(struct ib_wc *wc)
+{
+ struct rpcrdma_rep *rep =
+ (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
+
+ dprintk("RPC: %s: rep %p status %X opcode %X length %u\n",
+ __func__, rep, wc->status, wc->opcode, wc->byte_len);
+
+ if (wc->status != IB_WC_SUCCESS) {
+ rep->rr_len = ~0U;
+ goto out_schedule;
+ }
+ if (wc->opcode != IB_WC_RECV)
+ return;
+
+ rep->rr_len = wc->byte_len;
+ ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
+ rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
+
+ if (rep->rr_len >= 16) {
+ struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base;
+ unsigned int credits = ntohl(p->rm_credit);
+
+ if (credits == 0)
+ credits = 1; /* don't deadlock */
+ else if (credits > rep->rr_buffer->rb_max_requests)
+ credits = rep->rr_buffer->rb_max_requests;
+ atomic_set(&rep->rr_buffer->rb_credits, credits);
+ }
+
+out_schedule:
+ rpcrdma_schedule_tasklet(rep);
+}
+
+static int
+rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
+{
+ struct ib_wc *wcs;
+ int budget, count, rc;
+
+ budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
+ do {
+ wcs = ep->rep_recv_wcs;
+
+ rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
+ if (rc <= 0)
+ return rc;
+
+ count = rc;
+ while (count-- > 0)
+ rpcrdma_recvcq_process_wc(wcs++);
+ } while (rc == RPCRDMA_POLLSIZE && --budget);
return 0;
}
/*
- * rpcrdma_cq_event_upcall
+ * Handle receive completions.
*
- * This upcall handles recv, send, bind and unbind events.
* It is reentrant but processes single events in order to maintain
* ordering of receives to keep server credits.
*
@@ -242,26 +283,31 @@ rpcrdma_cq_poll(struct ib_cq *cq)
* connection shutdown. That is, the structures required for
* the completion of the reply handler must remain intact until
* all memory has been reclaimed.
- *
- * Note that send events are suppressed and do not result in an upcall.
*/
static void
-rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context)
+rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
{
+ struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
int rc;
- rc = rpcrdma_cq_poll(cq);
- if (rc)
+ rc = rpcrdma_recvcq_poll(cq, ep);
+ if (rc) {
+ dprintk("RPC: %s: ib_poll_cq failed: %i\n",
+ __func__, rc);
return;
+ }
- rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
- if (rc) {
- dprintk("RPC: %s: ib_req_notify_cq failed %i\n",
+ rc = ib_req_notify_cq(cq,
+ IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+ if (rc == 0)
+ return;
+ if (rc < 0) {
+ dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
__func__, rc);
return;
}
- rpcrdma_cq_poll(cq);
+ rpcrdma_recvcq_poll(cq, ep);
}
#ifdef RPC_DEBUG
@@ -493,54 +539,32 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
}
- switch (memreg) {
- case RPCRDMA_MEMWINDOWS:
- case RPCRDMA_MEMWINDOWS_ASYNC:
- if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) {
- dprintk("RPC: %s: MEMWINDOWS registration "
- "specified but not supported by adapter, "
- "using slower RPCRDMA_REGISTER\n",
- __func__);
- memreg = RPCRDMA_REGISTER;
- }
- break;
- case RPCRDMA_MTHCAFMR:
- if (!ia->ri_id->device->alloc_fmr) {
-#if RPCRDMA_PERSISTENT_REGISTRATION
- dprintk("RPC: %s: MTHCAFMR registration "
- "specified but not supported by adapter, "
- "using riskier RPCRDMA_ALLPHYSICAL\n",
- __func__);
- memreg = RPCRDMA_ALLPHYSICAL;
-#else
- dprintk("RPC: %s: MTHCAFMR registration "
- "specified but not supported by adapter, "
- "using slower RPCRDMA_REGISTER\n",
- __func__);
- memreg = RPCRDMA_REGISTER;
-#endif
- }
- break;
- case RPCRDMA_FRMR:
+ if (memreg == RPCRDMA_FRMR) {
/* Requires both frmr reg and local dma lkey */
if ((devattr.device_cap_flags &
(IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
(IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
-#if RPCRDMA_PERSISTENT_REGISTRATION
dprintk("RPC: %s: FRMR registration "
- "specified but not supported by adapter, "
- "using riskier RPCRDMA_ALLPHYSICAL\n",
- __func__);
+ "not supported by HCA\n", __func__);
+ memreg = RPCRDMA_MTHCAFMR;
+ } else {
+ /* Mind the ia limit on FRMR page list depth */
+ ia->ri_max_frmr_depth = min_t(unsigned int,
+ RPCRDMA_MAX_DATA_SEGS,
+ devattr.max_fast_reg_page_list_len);
+ }
+ }
+ if (memreg == RPCRDMA_MTHCAFMR) {
+ if (!ia->ri_id->device->alloc_fmr) {
+ dprintk("RPC: %s: MTHCAFMR registration "
+ "not supported by HCA\n", __func__);
+#if RPCRDMA_PERSISTENT_REGISTRATION
memreg = RPCRDMA_ALLPHYSICAL;
#else
- dprintk("RPC: %s: FRMR registration "
- "specified but not supported by adapter, "
- "using slower RPCRDMA_REGISTER\n",
- __func__);
- memreg = RPCRDMA_REGISTER;
+ rc = -ENOMEM;
+ goto out2;
#endif
}
- break;
}
/*
@@ -552,8 +576,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
* adapter.
*/
switch (memreg) {
- case RPCRDMA_BOUNCEBUFFERS:
- case RPCRDMA_REGISTER:
case RPCRDMA_FRMR:
break;
#if RPCRDMA_PERSISTENT_REGISTRATION
@@ -563,30 +585,26 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
IB_ACCESS_REMOTE_READ;
goto register_setup;
#endif
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- mem_priv = IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_MW_BIND;
- goto register_setup;
case RPCRDMA_MTHCAFMR:
if (ia->ri_have_dma_lkey)
break;
mem_priv = IB_ACCESS_LOCAL_WRITE;
+#if RPCRDMA_PERSISTENT_REGISTRATION
register_setup:
+#endif
ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
if (IS_ERR(ia->ri_bind_mem)) {
printk(KERN_ALERT "%s: ib_get_dma_mr for "
- "phys register failed with %lX\n\t"
- "Will continue with degraded performance\n",
+ "phys register failed with %lX\n",
__func__, PTR_ERR(ia->ri_bind_mem));
- memreg = RPCRDMA_REGISTER;
- ia->ri_bind_mem = NULL;
+ rc = -ENOMEM;
+ goto out2;
}
break;
default:
- printk(KERN_ERR "%s: invalid memory registration mode %d\n",
- __func__, memreg);
- rc = -EINVAL;
+ printk(KERN_ERR "RPC: Unsupported memory "
+ "registration mode: %d\n", memreg);
+ rc = -ENOMEM;
goto out2;
}
dprintk("RPC: %s: memory registration strategy is %d\n",
@@ -640,6 +658,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
struct rpcrdma_create_data_internal *cdata)
{
struct ib_device_attr devattr;
+ struct ib_cq *sendcq, *recvcq;
int rc, err;
rc = ib_query_device(ia->ri_id->device, &devattr);
@@ -659,32 +678,42 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.srq = NULL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests;
switch (ia->ri_memreg_strategy) {
- case RPCRDMA_FRMR:
+ case RPCRDMA_FRMR: {
+ int depth = 7;
+
/* Add room for frmr register and invalidate WRs.
* 1. FRMR reg WR for head
* 2. FRMR invalidate WR for head
- * 3. FRMR reg WR for pagelist
- * 4. FRMR invalidate WR for pagelist
+ * 3. N FRMR reg WRs for pagelist
+ * 4. N FRMR invalidate WRs for pagelist
* 5. FRMR reg WR for tail
* 6. FRMR invalidate WR for tail
* 7. The RDMA_SEND WR
*/
- ep->rep_attr.cap.max_send_wr *= 7;
+
+ /* Calculate N if the device max FRMR depth is smaller than
+ * RPCRDMA_MAX_DATA_SEGS.
+ */
+ if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
+ int delta = RPCRDMA_MAX_DATA_SEGS -
+ ia->ri_max_frmr_depth;
+
+ do {
+ depth += 2; /* FRMR reg + invalidate */
+ delta -= ia->ri_max_frmr_depth;
+ } while (delta > 0);
+
+ }
+ ep->rep_attr.cap.max_send_wr *= depth;
if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
- cdata->max_requests = devattr.max_qp_wr / 7;
+ cdata->max_requests = devattr.max_qp_wr / depth;
if (!cdata->max_requests)
return -EINVAL;
- ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7;
+ ep->rep_attr.cap.max_send_wr = cdata->max_requests *
+ depth;
}
break;
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- /* Add room for mw_binds+unbinds - overkill! */
- ep->rep_attr.cap.max_send_wr++;
- ep->rep_attr.cap.max_send_wr *= (2 * RPCRDMA_MAX_SEGS);
- if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
- return -EINVAL;
- break;
+ }
default:
break;
}
@@ -705,46 +734,51 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.cap.max_recv_sge);
/* set trigger for requesting send completion */
- ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /* - 1*/;
- switch (ia->ri_memreg_strategy) {
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- ep->rep_cqinit -= RPCRDMA_MAX_SEGS;
- break;
- default:
- break;
- }
+ ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
if (ep->rep_cqinit <= 2)
ep->rep_cqinit = 0;
INIT_CQCOUNT(ep);
ep->rep_ia = ia;
init_waitqueue_head(&ep->rep_connect_wait);
+ INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
- /*
- * Create a single cq for receive dto and mw_bind (only ever
- * care about unbind, really). Send completions are suppressed.
- * Use single threaded tasklet upcalls to maintain ordering.
- */
- ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall,
- rpcrdma_cq_async_error_upcall, NULL,
- ep->rep_attr.cap.max_recv_wr +
+ sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
+ rpcrdma_cq_async_error_upcall, ep,
ep->rep_attr.cap.max_send_wr + 1, 0);
- if (IS_ERR(ep->rep_cq)) {
- rc = PTR_ERR(ep->rep_cq);
- dprintk("RPC: %s: ib_create_cq failed: %i\n",
+ if (IS_ERR(sendcq)) {
+ rc = PTR_ERR(sendcq);
+ dprintk("RPC: %s: failed to create send CQ: %i\n",
__func__, rc);
goto out1;
}
- rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP);
+ rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
+ if (rc) {
+ dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
+ __func__, rc);
+ goto out2;
+ }
+
+ recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
+ rpcrdma_cq_async_error_upcall, ep,
+ ep->rep_attr.cap.max_recv_wr + 1, 0);
+ if (IS_ERR(recvcq)) {
+ rc = PTR_ERR(recvcq);
+ dprintk("RPC: %s: failed to create recv CQ: %i\n",
+ __func__, rc);
+ goto out2;
+ }
+
+ rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
if (rc) {
dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
__func__, rc);
+ ib_destroy_cq(recvcq);
goto out2;
}
- ep->rep_attr.send_cq = ep->rep_cq;
- ep->rep_attr.recv_cq = ep->rep_cq;
+ ep->rep_attr.send_cq = sendcq;
+ ep->rep_attr.recv_cq = recvcq;
/* Initialize cma parameters */
@@ -754,9 +788,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
/* Client offers RDMA Read but does not initiate */
ep->rep_remote_cma.initiator_depth = 0;
- if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS)
- ep->rep_remote_cma.responder_resources = 0;
- else if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
+ if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
ep->rep_remote_cma.responder_resources = 32;
else
ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
@@ -768,7 +800,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
return 0;
out2:
- err = ib_destroy_cq(ep->rep_cq);
+ err = ib_destroy_cq(sendcq);
if (err)
dprintk("RPC: %s: ib_destroy_cq returned %i\n",
__func__, err);
@@ -782,11 +814,8 @@ out1:
* Disconnect and destroy endpoint. After this, the only
* valid operations on the ep are to free it (if dynamically
* allocated) or re-create it.
- *
- * The caller's error handling must be sure to not leak the endpoint
- * if this function fails.
*/
-int
+void
rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
int rc;
@@ -794,6 +823,8 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
dprintk("RPC: %s: entering, connected is %d\n",
__func__, ep->rep_connected);
+ cancel_delayed_work_sync(&ep->rep_connect_worker);
+
if (ia->ri_id->qp) {
rc = rpcrdma_ep_disconnect(ep, ia);
if (rc)
@@ -809,13 +840,17 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
ep->rep_pad_mr = NULL;
}
- rpcrdma_clean_cq(ep->rep_cq);
- rc = ib_destroy_cq(ep->rep_cq);
+ rpcrdma_clean_cq(ep->rep_attr.recv_cq);
+ rc = ib_destroy_cq(ep->rep_attr.recv_cq);
if (rc)
dprintk("RPC: %s: ib_destroy_cq returned %i\n",
__func__, rc);
- return rc;
+ rpcrdma_clean_cq(ep->rep_attr.send_cq);
+ rc = ib_destroy_cq(ep->rep_attr.send_cq);
+ if (rc)
+ dprintk("RPC: %s: ib_destroy_cq returned %i\n",
+ __func__, rc);
}
/*
@@ -831,17 +866,20 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
if (ep->rep_connected != 0) {
struct rpcrdma_xprt *xprt;
retry:
+ dprintk("RPC: %s: reconnecting...\n", __func__);
rc = rpcrdma_ep_disconnect(ep, ia);
if (rc && rc != -ENOTCONN)
dprintk("RPC: %s: rpcrdma_ep_disconnect"
" status %i\n", __func__, rc);
- rpcrdma_clean_cq(ep->rep_cq);
+
+ rpcrdma_clean_cq(ep->rep_attr.recv_cq);
+ rpcrdma_clean_cq(ep->rep_attr.send_cq);
xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
id = rpcrdma_create_id(xprt, ia,
(struct sockaddr *)&xprt->rx_data.addr);
if (IS_ERR(id)) {
- rc = PTR_ERR(id);
+ rc = -EHOSTUNREACH;
goto out;
}
/* TEMP TEMP TEMP - fail if new device:
@@ -855,35 +893,32 @@ retry:
printk("RPC: %s: can't reconnect on "
"different device!\n", __func__);
rdma_destroy_id(id);
- rc = -ENETDOWN;
+ rc = -ENETUNREACH;
goto out;
}
/* END TEMP */
+ rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
+ if (rc) {
+ dprintk("RPC: %s: rdma_create_qp failed %i\n",
+ __func__, rc);
+ rdma_destroy_id(id);
+ rc = -ENETUNREACH;
+ goto out;
+ }
rdma_destroy_qp(ia->ri_id);
rdma_destroy_id(ia->ri_id);
ia->ri_id = id;
+ } else {
+ dprintk("RPC: %s: connecting...\n", __func__);
+ rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
+ if (rc) {
+ dprintk("RPC: %s: rdma_create_qp failed %i\n",
+ __func__, rc);
+ /* do not update ep->rep_connected */
+ return -ENETUNREACH;
+ }
}
- rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
- if (rc) {
- dprintk("RPC: %s: rdma_create_qp failed %i\n",
- __func__, rc);
- goto out;
- }
-
-/* XXX Tavor device performs badly with 2K MTU! */
-if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
- struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device);
- if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR &&
- (pcid->vendor == PCI_VENDOR_ID_MELLANOX ||
- pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) {
- struct ib_qp_attr attr = {
- .path_mtu = IB_MTU_1024
- };
- rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU);
- }
-}
-
ep->rep_connected = 0;
rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
@@ -944,7 +979,8 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
int rc;
- rpcrdma_clean_cq(ep->rep_cq);
+ rpcrdma_clean_cq(ep->rep_attr.recv_cq);
+ rpcrdma_clean_cq(ep->rep_attr.send_cq);
rc = rdma_disconnect(ia->ri_id);
if (!rc) {
/* returns without wait if not connected */
@@ -967,7 +1003,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
{
char *p;
- size_t len;
+ size_t len, rlen, wlen;
int i, rc;
struct rpcrdma_mw *r;
@@ -997,11 +1033,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
sizeof(struct rpcrdma_mw);
break;
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
- sizeof(struct rpcrdma_mw);
- break;
default:
break;
}
@@ -1032,32 +1063,29 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
}
p += cdata->padding;
- /*
- * Allocate the fmr's, or mw's for mw_bind chunk registration.
- * We "cycle" the mw's in order to minimize rkey reuse,
- * and also reduce unbind-to-bind collision.
- */
INIT_LIST_HEAD(&buf->rb_mws);
r = (struct rpcrdma_mw *)p;
switch (ia->ri_memreg_strategy) {
case RPCRDMA_FRMR:
for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
- RPCRDMA_MAX_SEGS);
+ ia->ri_max_frmr_depth);
if (IS_ERR(r->r.frmr.fr_mr)) {
rc = PTR_ERR(r->r.frmr.fr_mr);
dprintk("RPC: %s: ib_alloc_fast_reg_mr"
" failed %i\n", __func__, rc);
goto out;
}
- r->r.frmr.fr_pgl =
- ib_alloc_fast_reg_page_list(ia->ri_id->device,
- RPCRDMA_MAX_SEGS);
+ r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
+ ia->ri_id->device,
+ ia->ri_max_frmr_depth);
if (IS_ERR(r->r.frmr.fr_pgl)) {
rc = PTR_ERR(r->r.frmr.fr_pgl);
dprintk("RPC: %s: "
"ib_alloc_fast_reg_page_list "
"failed %i\n", __func__, rc);
+
+ ib_dereg_mr(r->r.frmr.fr_mr);
goto out;
}
list_add(&r->mw_list, &buf->rb_mws);
@@ -1082,21 +1110,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
++r;
}
break;
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- /* Allocate one extra request's worth, for full cycling */
- for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
- r->r.mw = ib_alloc_mw(ia->ri_pd, IB_MW_TYPE_1);
- if (IS_ERR(r->r.mw)) {
- rc = PTR_ERR(r->r.mw);
- dprintk("RPC: %s: ib_alloc_mw"
- " failed %i\n", __func__, rc);
- goto out;
- }
- list_add(&r->mw_list, &buf->rb_mws);
- ++r;
- }
- break;
default:
break;
}
@@ -1105,16 +1118,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
* Allocate/init the request/reply buffers. Doing this
* using kmalloc for now -- one for each buf.
*/
+ wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req));
+ rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep));
+ dprintk("RPC: %s: wlen = %zu, rlen = %zu\n",
+ __func__, wlen, rlen);
+
for (i = 0; i < buf->rb_max_requests; i++) {
struct rpcrdma_req *req;
struct rpcrdma_rep *rep;
- len = cdata->inline_wsize + sizeof(struct rpcrdma_req);
- /* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */
- /* Typical ~2400b, so rounding up saves work later */
- if (len < 4096)
- len = 4096;
- req = kmalloc(len, GFP_KERNEL);
+ req = kmalloc(wlen, GFP_KERNEL);
if (req == NULL) {
dprintk("RPC: %s: request buffer %d alloc"
" failed\n", __func__, i);
@@ -1126,16 +1139,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
buf->rb_send_bufs[i]->rl_buffer = buf;
rc = rpcrdma_register_internal(ia, req->rl_base,
- len - offsetof(struct rpcrdma_req, rl_base),
+ wlen - offsetof(struct rpcrdma_req, rl_base),
&buf->rb_send_bufs[i]->rl_handle,
&buf->rb_send_bufs[i]->rl_iov);
if (rc)
goto out;
- buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req);
+ buf->rb_send_bufs[i]->rl_size = wlen -
+ sizeof(struct rpcrdma_req);
- len = cdata->inline_rsize + sizeof(struct rpcrdma_rep);
- rep = kmalloc(len, GFP_KERNEL);
+ rep = kmalloc(rlen, GFP_KERNEL);
if (rep == NULL) {
dprintk("RPC: %s: reply buffer %d alloc failed\n",
__func__, i);
@@ -1145,10 +1158,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
memset(rep, 0, sizeof(struct rpcrdma_rep));
buf->rb_recv_bufs[i] = rep;
buf->rb_recv_bufs[i]->rr_buffer = buf;
- init_waitqueue_head(&rep->rr_unbind);
rc = rpcrdma_register_internal(ia, rep->rr_base,
- len - offsetof(struct rpcrdma_rep, rr_base),
+ rlen - offsetof(struct rpcrdma_rep, rr_base),
&buf->rb_recv_bufs[i]->rr_handle,
&buf->rb_recv_bufs[i]->rr_iov);
if (rc)
@@ -1179,7 +1191,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
/* clean up in reverse order from create
* 1. recv mr memory (mr free, then kfree)
- * 1a. bind mw memory
* 2. send mr memory (mr free, then kfree)
* 3. padding (if any) [moved to rpcrdma_ep_destroy]
* 4. arrays
@@ -1194,41 +1205,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
kfree(buf->rb_recv_bufs[i]);
}
if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
- while (!list_empty(&buf->rb_mws)) {
- r = list_entry(buf->rb_mws.next,
- struct rpcrdma_mw, mw_list);
- list_del(&r->mw_list);
- switch (ia->ri_memreg_strategy) {
- case RPCRDMA_FRMR:
- rc = ib_dereg_mr(r->r.frmr.fr_mr);
- if (rc)
- dprintk("RPC: %s:"
- " ib_dereg_mr"
- " failed %i\n",
- __func__, rc);
- ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
- break;
- case RPCRDMA_MTHCAFMR:
- rc = ib_dealloc_fmr(r->r.fmr);
- if (rc)
- dprintk("RPC: %s:"
- " ib_dealloc_fmr"
- " failed %i\n",
- __func__, rc);
- break;
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- rc = ib_dealloc_mw(r->r.mw);
- if (rc)
- dprintk("RPC: %s:"
- " ib_dealloc_mw"
- " failed %i\n",
- __func__, rc);
- break;
- default:
- break;
- }
- }
rpcrdma_deregister_internal(ia,
buf->rb_send_bufs[i]->rl_handle,
&buf->rb_send_bufs[i]->rl_iov);
@@ -1236,6 +1212,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
}
}
+ while (!list_empty(&buf->rb_mws)) {
+ r = list_entry(buf->rb_mws.next,
+ struct rpcrdma_mw, mw_list);
+ list_del(&r->mw_list);
+ switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_FRMR:
+ rc = ib_dereg_mr(r->r.frmr.fr_mr);
+ if (rc)
+ dprintk("RPC: %s:"
+ " ib_dereg_mr"
+ " failed %i\n",
+ __func__, rc);
+ ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
+ break;
+ case RPCRDMA_MTHCAFMR:
+ rc = ib_dealloc_fmr(r->r.fmr);
+ if (rc)
+ dprintk("RPC: %s:"
+ " ib_dealloc_fmr"
+ " failed %i\n",
+ __func__, rc);
+ break;
+ default:
+ break;
+ }
+ }
+
kfree(buf->rb_pool);
}
@@ -1299,21 +1302,17 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
int i;
unsigned long flags;
- BUG_ON(req->rl_nchunks != 0);
spin_lock_irqsave(&buffers->rb_lock, flags);
buffers->rb_send_bufs[--buffers->rb_send_index] = req;
req->rl_niovs = 0;
if (req->rl_reply) {
buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply;
- init_waitqueue_head(&req->rl_reply->rr_unbind);
req->rl_reply->rr_func = NULL;
req->rl_reply = NULL;
}
switch (ia->ri_memreg_strategy) {
case RPCRDMA_FRMR:
case RPCRDMA_MTHCAFMR:
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
/*
* Cycle mw's back in reverse order, and "spin" them.
* This delays and scrambles reuse as much as possible.
@@ -1358,8 +1357,7 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
/*
* Put reply buffers back into pool when not attached to
- * request. This happens in error conditions, and when
- * aborting unbinds. Pre-decrement counter/array index.
+ * request. This happens in error conditions.
*/
void
rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
@@ -1498,8 +1496,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
seg1->mr_offset -= pageoff; /* start of page */
seg1->mr_len += pageoff;
len = -pageoff;
- if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
- *nsegs = RPCRDMA_MAX_DATA_SEGS;
+ if (*nsegs > ia->ri_max_frmr_depth)
+ *nsegs = ia->ri_max_frmr_depth;
for (page_no = i = 0; i < *nsegs;) {
rpcrdma_map_one(ia, seg, writing);
pa = seg->mr_dma;
@@ -1536,10 +1534,6 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
} else
post_wr = &frmr_wr;
- /* Bump the key */
- key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
- ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
-
/* Prepare FRMR WR */
memset(&frmr_wr, 0, sizeof frmr_wr);
frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
@@ -1550,7 +1544,16 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
frmr_wr.wr.fast_reg.page_list_len = page_no;
frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
- BUG_ON(frmr_wr.wr.fast_reg.length < len);
+ if (frmr_wr.wr.fast_reg.length < len) {
+ while (seg1->mr_nsegs--)
+ rpcrdma_unmap_one(ia, seg++);
+ return -EIO;
+ }
+
+ /* Bump the key */
+ key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
+
frmr_wr.wr.fast_reg.access_flags = (writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ);
@@ -1661,135 +1664,6 @@ rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
return rc;
}
-static int
-rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg,
- int *nsegs, int writing, struct rpcrdma_ia *ia,
- struct rpcrdma_xprt *r_xprt)
-{
- int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
- IB_ACCESS_REMOTE_READ);
- struct ib_mw_bind param;
- int rc;
-
- *nsegs = 1;
- rpcrdma_map_one(ia, seg, writing);
- param.bind_info.mr = ia->ri_bind_mem;
- param.wr_id = 0ULL; /* no send cookie */
- param.bind_info.addr = seg->mr_dma;
- param.bind_info.length = seg->mr_len;
- param.send_flags = 0;
- param.bind_info.mw_access_flags = mem_priv;
-
- DECR_CQCOUNT(&r_xprt->rx_ep);
- rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
- if (rc) {
- dprintk("RPC: %s: failed ib_bind_mw "
- "%u@0x%llx status %i\n",
- __func__, seg->mr_len,
- (unsigned long long)seg->mr_dma, rc);
- rpcrdma_unmap_one(ia, seg);
- } else {
- seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
- seg->mr_base = param.bind_info.addr;
- seg->mr_nsegs = 1;
- }
- return rc;
-}
-
-static int
-rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg,
- struct rpcrdma_ia *ia,
- struct rpcrdma_xprt *r_xprt, void **r)
-{
- struct ib_mw_bind param;
- LIST_HEAD(l);
- int rc;
-
- BUG_ON(seg->mr_nsegs != 1);
- param.bind_info.mr = ia->ri_bind_mem;
- param.bind_info.addr = 0ULL; /* unbind */
- param.bind_info.length = 0;
- param.bind_info.mw_access_flags = 0;
- if (*r) {
- param.wr_id = (u64) (unsigned long) *r;
- param.send_flags = IB_SEND_SIGNALED;
- INIT_CQCOUNT(&r_xprt->rx_ep);
- } else {
- param.wr_id = 0ULL;
- param.send_flags = 0;
- DECR_CQCOUNT(&r_xprt->rx_ep);
- }
- rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
- rpcrdma_unmap_one(ia, seg);
- if (rc)
- dprintk("RPC: %s: failed ib_(un)bind_mw,"
- " status %i\n", __func__, rc);
- else
- *r = NULL; /* will upcall on completion */
- return rc;
-}
-
-static int
-rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg,
- int *nsegs, int writing, struct rpcrdma_ia *ia)
-{
- int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
- IB_ACCESS_REMOTE_READ);
- struct rpcrdma_mr_seg *seg1 = seg;
- struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
- int len, i, rc = 0;
-
- if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
- *nsegs = RPCRDMA_MAX_DATA_SEGS;
- for (len = 0, i = 0; i < *nsegs;) {
- rpcrdma_map_one(ia, seg, writing);
- ipb[i].addr = seg->mr_dma;
- ipb[i].size = seg->mr_len;
- len += seg->mr_len;
- ++seg;
- ++i;
- /* Check for holes */
- if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
- offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
- break;
- }
- seg1->mr_base = seg1->mr_dma;
- seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
- ipb, i, mem_priv, &seg1->mr_base);
- if (IS_ERR(seg1->mr_chunk.rl_mr)) {
- rc = PTR_ERR(seg1->mr_chunk.rl_mr);
- dprintk("RPC: %s: failed ib_reg_phys_mr "
- "%u@0x%llx (%d)... status %i\n",
- __func__, len,
- (unsigned long long)seg1->mr_dma, i, rc);
- while (i--)
- rpcrdma_unmap_one(ia, --seg);
- } else {
- seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
- seg1->mr_nsegs = i;
- seg1->mr_len = len;
- }
- *nsegs = i;
- return rc;
-}
-
-static int
-rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg,
- struct rpcrdma_ia *ia)
-{
- struct rpcrdma_mr_seg *seg1 = seg;
- int rc;
-
- rc = ib_dereg_mr(seg1->mr_chunk.rl_mr);
- seg1->mr_chunk.rl_mr = NULL;
- while (seg1->mr_nsegs--)
- rpcrdma_unmap_one(ia, seg++);
- if (rc)
- dprintk("RPC: %s: failed ib_dereg_mr,"
- " status %i\n", __func__, rc);
- return rc;
-}
-
int
rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
@@ -1819,16 +1693,8 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
break;
- /* Registration using memory windows */
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt);
- break;
-
- /* Default registration each time */
default:
- rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia);
- break;
+ return -1;
}
if (rc)
return -1;
@@ -1838,7 +1704,7 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
int
rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
- struct rpcrdma_xprt *r_xprt, void *r)
+ struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
int nsegs = seg->mr_nsegs, rc;
@@ -1847,9 +1713,7 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
#if RPCRDMA_PERSISTENT_REGISTRATION
case RPCRDMA_ALLPHYSICAL:
- BUG_ON(nsegs != 1);
rpcrdma_unmap_one(ia, seg);
- rc = 0;
break;
#endif
@@ -1861,21 +1725,9 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
rc = rpcrdma_deregister_fmr_external(seg, ia);
break;
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r);
- break;
-
default:
- rc = rpcrdma_deregister_default_external(seg, ia);
break;
}
- if (r) {
- struct rpcrdma_rep *rep = r;
- void (*func)(struct rpcrdma_rep *) = rep->rr_func;
- rep->rr_func = NULL;
- func(rep); /* dereg done, callback now */
- }
return nsegs;
}
@@ -1950,7 +1802,6 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
ib_dma_sync_single_for_cpu(ia->ri_id->device,
rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
- DECR_CQCOUNT(ep);
rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
if (rc)
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index cc1445dc1d1..89e7cd47970 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -43,6 +43,7 @@
#include <linux/wait.h> /* wait_queue_head_t, etc */
#include <linux/spinlock.h> /* spinlock_t, etc */
#include <linux/atomic.h> /* atomic_t, etc */
+#include <linux/workqueue.h> /* struct work_struct */
#include <rdma/rdma_cm.h> /* RDMA connection api */
#include <rdma/ib_verbs.h> /* RDMA verbs api */
@@ -66,18 +67,21 @@ struct rpcrdma_ia {
struct completion ri_done;
int ri_async_rc;
enum rpcrdma_memreg ri_memreg_strategy;
+ unsigned int ri_max_frmr_depth;
};
/*
* RDMA Endpoint -- one per transport instance
*/
+#define RPCRDMA_WC_BUDGET (128)
+#define RPCRDMA_POLLSIZE (16)
+
struct rpcrdma_ep {
atomic_t rep_cqcount;
int rep_cqinit;
int rep_connected;
struct rpcrdma_ia *rep_ia;
- struct ib_cq *rep_cq;
struct ib_qp_init_attr rep_attr;
wait_queue_head_t rep_connect_wait;
struct ib_sge rep_pad; /* holds zeroed pad */
@@ -86,6 +90,9 @@ struct rpcrdma_ep {
struct rpc_xprt *rep_xprt; /* for rep_func */
struct rdma_conn_param rep_remote_cma;
struct sockaddr_storage rep_remote_addr;
+ struct delayed_work rep_connect_worker;
+ struct ib_wc rep_send_wcs[RPCRDMA_POLLSIZE];
+ struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE];
};
#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
@@ -124,7 +131,6 @@ struct rpcrdma_rep {
struct rpc_xprt *rr_xprt; /* needed for request/reply matching */
void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */
struct list_head rr_list; /* tasklet list */
- wait_queue_head_t rr_unbind; /* optional unbind wait */
struct ib_sge rr_iov; /* for posting */
struct ib_mr *rr_handle; /* handle for mem in rr_iov */
char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */
@@ -159,7 +165,6 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
struct ib_mr *rl_mr; /* if registered directly */
struct rpcrdma_mw { /* if registered from region */
union {
- struct ib_mw *mw;
struct ib_fmr *fmr;
struct {
struct ib_fast_reg_page_list *fr_pgl;
@@ -207,7 +212,6 @@ struct rpcrdma_req {
struct rpcrdma_buffer {
spinlock_t rb_lock; /* protects indexes */
atomic_t rb_credits; /* most recent server credits */
- unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */
int rb_max_requests;/* client max requests */
struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */
int rb_send_index;
@@ -300,7 +304,7 @@ void rpcrdma_ia_close(struct rpcrdma_ia *);
*/
int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
struct rpcrdma_create_data_internal *);
-int rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
+void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
@@ -330,11 +334,12 @@ int rpcrdma_deregister_internal(struct rpcrdma_ia *,
int rpcrdma_register_external(struct rpcrdma_mr_seg *,
int, int, struct rpcrdma_xprt *);
int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
- struct rpcrdma_xprt *, void *);
+ struct rpcrdma_xprt *);
/*
* RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
*/
+void rpcrdma_connect_worker(struct work_struct *);
void rpcrdma_conn_func(struct rpcrdma_ep *);
void rpcrdma_reply_handler(struct rpcrdma_rep *);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 25a3dcf15ca..be8bbd5d65e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -866,8 +866,6 @@ static void xs_reset_transport(struct sock_xprt *transport)
xs_restore_old_callbacks(transport, sk);
write_unlock_bh(&sk->sk_callback_lock);
- sk->sk_no_check = 0;
-
trace_rpc_socket_close(&transport->xprt, sock);
sock_release(sock);
}
@@ -893,11 +891,11 @@ static void xs_close(struct rpc_xprt *xprt)
xs_reset_transport(transport);
xprt->reestablish_timeout = 0;
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
clear_bit(XPRT_CLOSING, &xprt->state);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
xprt_disconnect_done(xprt);
}
@@ -1497,12 +1495,12 @@ static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
{
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
clear_bit(XPRT_CLOSING, &xprt->state);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
}
static void xs_sock_mark_closed(struct rpc_xprt *xprt)
@@ -1556,10 +1554,10 @@ static void xs_tcp_state_change(struct sock *sk)
xprt->connect_cookie++;
xprt->reestablish_timeout = 0;
set_bit(XPRT_CLOSING, &xprt->state);
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(XPRT_CONNECTED, &xprt->state);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
break;
case TCP_CLOSE_WAIT:
@@ -1578,9 +1576,9 @@ static void xs_tcp_state_change(struct sock *sk)
case TCP_LAST_ACK:
set_bit(XPRT_CLOSING, &xprt->state);
xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(XPRT_CONNECTED, &xprt->state);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
break;
case TCP_CLOSE:
xs_tcp_cancel_linger_timeout(xprt);
@@ -2046,7 +2044,6 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_user_data = xprt;
sk->sk_data_ready = xs_udp_data_ready;
sk->sk_write_space = xs_udp_write_space;
- sk->sk_no_check = UDP_CSUM_NORCV;
sk->sk_allocation = GFP_ATOMIC;
xprt_set_connected(xprt);
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 671f9817b4f..55c6c9d3e1c 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -559,6 +559,7 @@ receive:
buf = node->bclink.deferred_head;
node->bclink.deferred_head = buf->next;
+ buf->next = NULL;
node->bclink.deferred_size--;
goto receive;
}
@@ -653,7 +654,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1,
tipc_bearer_send(b->identity, buf, &b->bcast_addr);
} else {
/* Avoid concurrent buffer access */
- tbuf = pskb_copy(buf, GFP_ATOMIC);
+ tbuf = pskb_copy_for_clone(buf, GFP_ATOMIC);
if (!tbuf)
break;
tipc_bearer_send(b->identity, tbuf, &b->bcast_addr);
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 8be6e94a1ca..0a37a472c29 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -101,9 +101,11 @@ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect,
}
/* tipc_buf_append(): Append a buffer to the fragment list of another buffer
- * Let first buffer become head buffer
- * Returns 1 and sets *buf to headbuf if chain is complete, otherwise 0
- * Leaves headbuf pointer at NULL if failure
+ * @*headbuf: in: NULL for first frag, otherwise value returned from prev call
+ * out: set when successful non-complete reassembly, otherwise NULL
+ * @*buf: in: the buffer to append. Always defined
+ * out: head buf after sucessful complete reassembly, otherwise NULL
+ * Returns 1 when reassembly complete, otherwise 0
*/
int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
{
@@ -122,6 +124,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
goto out_free;
head = *headbuf = frag;
skb_frag_list_init(head);
+ *buf = NULL;
return 0;
}
if (!head)
@@ -150,5 +153,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
out_free:
pr_warn_ratelimited("Unable to build fragment list\n");
kfree_skb(*buf);
+ kfree_skb(*headbuf);
+ *buf = *headbuf = NULL;
return 0;
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index ac08966f285..ef0475568f9 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1,5 +1,5 @@
/*
-* net/tipc/socket.c: TIPC socket API
+ * net/tipc/socket.c: TIPC socket API
*
* Copyright (c) 2001-2007, 2012-2014, Ericsson AB
* Copyright (c) 2004-2008, 2010-2013, Wind River Systems
@@ -985,10 +985,11 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
return 0;
}
-static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo)
+static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
{
struct sock *sk = sock->sk;
DEFINE_WAIT(wait);
+ long timeo = *timeop;
int err;
for (;;) {
@@ -1013,6 +1014,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo)
break;
}
finish_wait(sk_sleep(sk), &wait);
+ *timeop = timeo;
return err;
}
@@ -1056,7 +1058,7 @@ static int tipc_recvmsg(struct kiocb *iocb, struct socket *sock,
restart:
/* Look for a message in receive queue; wait if necessary */
- res = tipc_wait_for_rcvmsg(sock, timeo);
+ res = tipc_wait_for_rcvmsg(sock, &timeo);
if (res)
goto exit;
@@ -1154,7 +1156,7 @@ static int tipc_recv_stream(struct kiocb *iocb, struct socket *sock,
restart:
/* Look for a message in receive queue; wait if necessary */
- res = tipc_wait_for_rcvmsg(sock, timeo);
+ res = tipc_wait_for_rcvmsg(sock, &timeo);
if (res)
goto exit;
@@ -1429,13 +1431,14 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf)
{
u32 res;
struct tipc_sock *tsk = tipc_sk(sk);
+ uint truesize = buf->truesize;
res = filter_rcv(sk, buf);
if (unlikely(res))
tipc_reject_msg(buf, res);
if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT)
- atomic_add(buf->truesize, &tsk->dupl_rcvcnt);
+ atomic_add(truesize, &tsk->dupl_rcvcnt);
return 0;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 7b9114e0a5b..e9688438073 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1207,7 +1207,7 @@ restart:
sk->sk_state = TCP_ESTABLISHED;
sock_hold(newsk);
- smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */
+ smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
unix_peer(sk) = newsk;
unix_state_unlock(sk);
diff --git a/net/wireless/ap.c b/net/wireless/ap.c
index 3e02ade508d..bdad1f95156 100644
--- a/net/wireless/ap.c
+++ b/net/wireless/ap.c
@@ -6,8 +6,8 @@
#include "rdev-ops.h"
-static int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
- struct net_device *dev, bool notify)
+int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
+ struct net_device *dev, bool notify)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
int err;
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 84d686e2dbd..992b34070bc 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -370,8 +370,8 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_WDS:
case NL80211_IFTYPE_P2P_DEVICE:
- case NL80211_IFTYPE_UNSPECIFIED:
break;
+ case NL80211_IFTYPE_UNSPECIFIED:
case NUM_NL80211_IFTYPES:
WARN_ON(1);
}
@@ -796,8 +796,7 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
!cfg80211_go_permissive_chan(rdev, chandef->chan))
prohibited_flags |= IEEE80211_CHAN_NO_IR;
- if (cfg80211_chandef_dfs_required(wiphy, chandef,
- NL80211_IFTYPE_UNSPECIFIED) > 0 &&
+ if (cfg80211_chandef_dfs_required(wiphy, chandef, iftype) > 0 &&
cfg80211_chandef_dfs_available(wiphy, chandef)) {
/* We can skip IEEE80211_CHAN_NO_IR if chandef dfs available */
prohibited_flags = IEEE80211_CHAN_DISABLED;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index b3ff3697239..a1c40654dd9 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -130,7 +130,7 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
newname))
pr_err("failed to rename debugfs dir to %s!\n", newname);
- nl80211_notify_dev_rename(rdev);
+ nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
return 0;
}
@@ -210,15 +210,12 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
}
}
-static int cfg80211_rfkill_set_block(void *data, bool blocked)
+void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy)
{
- struct cfg80211_registered_device *rdev = data;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct wireless_dev *wdev;
- if (!blocked)
- return 0;
-
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(wdev, &rdev->wdev_list, list) {
if (wdev->netdev) {
@@ -234,7 +231,18 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked)
break;
}
}
+}
+EXPORT_SYMBOL_GPL(cfg80211_shutdown_all_interfaces);
+static int cfg80211_rfkill_set_block(void *data, bool blocked)
+{
+ struct cfg80211_registered_device *rdev = data;
+
+ if (!blocked)
+ return 0;
+
+ rtnl_lock();
+ cfg80211_shutdown_all_interfaces(&rdev->wiphy);
rtnl_unlock();
return 0;
@@ -394,6 +402,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
rdev->wiphy.rts_threshold = (u32) -1;
rdev->wiphy.coverage_class = 0;
+ rdev->wiphy.max_num_csa_counters = 1;
+
return &rdev->wiphy;
}
EXPORT_SYMBOL(wiphy_new);
@@ -650,6 +660,8 @@ int wiphy_register(struct wiphy *wiphy)
return res;
}
+ nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
+
return 0;
}
EXPORT_SYMBOL(wiphy_register);
@@ -688,9 +700,10 @@ void wiphy_unregister(struct wiphy *wiphy)
rfkill_unregister(rdev->rfkill);
rtnl_lock();
+ nl80211_notify_wiphy(rdev, NL80211_CMD_DEL_WIPHY);
rdev->wiphy.registered = false;
- BUG_ON(!list_empty(&rdev->wdev_list));
+ WARN_ON(!list_empty(&rdev->wdev_list));
/*
* First remove the hardware from everywhere, this makes
@@ -792,23 +805,23 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
rdev->num_running_monitor_ifaces += num;
}
-void cfg80211_leave(struct cfg80211_registered_device *rdev,
- struct wireless_dev *wdev)
+void __cfg80211_leave(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev)
{
struct net_device *dev = wdev->netdev;
ASSERT_RTNL();
+ ASSERT_WDEV_LOCK(wdev);
switch (wdev->iftype) {
case NL80211_IFTYPE_ADHOC:
- cfg80211_leave_ibss(rdev, dev, true);
+ __cfg80211_leave_ibss(rdev, dev, true);
break;
case NL80211_IFTYPE_P2P_CLIENT:
case NL80211_IFTYPE_STATION:
if (rdev->sched_scan_req && dev == rdev->sched_scan_req->dev)
__cfg80211_stop_sched_scan(rdev, false);
- wdev_lock(wdev);
#ifdef CONFIG_CFG80211_WEXT
kfree(wdev->wext.ie);
wdev->wext.ie = NULL;
@@ -817,20 +830,49 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev,
#endif
cfg80211_disconnect(rdev, dev,
WLAN_REASON_DEAUTH_LEAVING, true);
- wdev_unlock(wdev);
break;
case NL80211_IFTYPE_MESH_POINT:
- cfg80211_leave_mesh(rdev, dev);
+ __cfg80211_leave_mesh(rdev, dev);
break;
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_P2P_GO:
- cfg80211_stop_ap(rdev, dev, true);
+ __cfg80211_stop_ap(rdev, dev, true);
break;
default:
break;
}
}
+void cfg80211_leave(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev)
+{
+ wdev_lock(wdev);
+ __cfg80211_leave(rdev, wdev);
+ wdev_unlock(wdev);
+}
+
+void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev,
+ gfp_t gfp)
+{
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+ struct cfg80211_event *ev;
+ unsigned long flags;
+
+ trace_cfg80211_stop_iface(wiphy, wdev);
+
+ ev = kzalloc(sizeof(*ev), gfp);
+ if (!ev)
+ return;
+
+ ev->type = EVENT_STOPPED;
+
+ spin_lock_irqsave(&wdev->event_lock, flags);
+ list_add_tail(&ev->list, &wdev->event_list);
+ spin_unlock_irqrestore(&wdev->event_lock, flags);
+ queue_work(cfg80211_wq, &rdev->event_work);
+}
+EXPORT_SYMBOL(cfg80211_stop_iface);
+
static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
unsigned long state, void *ptr)
{
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 681b8fa4355..7e3a3cef7df 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -185,6 +185,7 @@ enum cfg80211_event_type {
EVENT_ROAMED,
EVENT_DISCONNECTED,
EVENT_IBSS_JOINED,
+ EVENT_STOPPED,
};
struct cfg80211_event {
@@ -281,6 +282,8 @@ int cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct mesh_setup *setup,
const struct mesh_config *conf);
+int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
+ struct net_device *dev);
int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
struct net_device *dev);
int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
@@ -288,6 +291,8 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
struct cfg80211_chan_def *chandef);
/* AP */
+int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
+ struct net_device *dev, bool notify);
int cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
struct net_device *dev, bool notify);
@@ -419,7 +424,7 @@ static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
if (end >= start)
return jiffies_to_msecs(end - start);
- return jiffies_to_msecs(end + (MAX_JIFFY_OFFSET - start) + 1);
+ return jiffies_to_msecs(end + (ULONG_MAX - start) + 1);
}
void
@@ -441,6 +446,8 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
enum nl80211_iftype iftype, int num);
+void __cfg80211_leave(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev);
void cfg80211_leave(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk
index b35da8dc85d..40c37fc5b67 100644
--- a/net/wireless/genregdb.awk
+++ b/net/wireless/genregdb.awk
@@ -68,17 +68,7 @@ function parse_reg_rule()
sub(/,/, "", units)
dfs_cac = $9
if (units == "mW") {
- if (power == 100) {
- power = 20
- } else if (power == 200) {
- power = 23
- } else if (power == 500) {
- power = 27
- } else if (power == 1000) {
- power = 30
- } else {
- print "Unknown power value in database!"
- }
+ power = 10 * log(power)/log(10)
} else {
dfs_cac = $8
}
@@ -117,7 +107,7 @@ function parse_reg_rule()
}
flags = flags "0"
- printf "\t\tREG_RULE_EXT(%d, %d, %d, %d, %d, %d, %s),\n", start, end, bw, gain, power, dfs_cac, flags
+ printf "\t\tREG_RULE_EXT(%d, %d, %d, %d, %.0f, %d, %s),\n", start, end, bw, gain, power, dfs_cac, flags
rules++
}
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 6b50588b709..8f345da3ea5 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -420,8 +420,8 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev,
if (len > 0 && ssid[len - 1] == '\0')
len--;
+ memcpy(wdev->ssid, ssid, len);
wdev->wext.ibss.ssid = wdev->ssid;
- memcpy(wdev->wext.ibss.ssid, ssid, len);
wdev->wext.ibss.ssid_len = len;
wdev_lock(wdev);
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 3ddfb7cd335..092300b30c3 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -238,8 +238,8 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
return 0;
}
-static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
- struct net_device *dev)
+int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
+ struct net_device *dev)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
int err;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0f1b18f209d..6668daf6932 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -371,8 +371,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
[NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 },
[NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG },
[NL80211_ATTR_CSA_IES] = { .type = NLA_NESTED },
- [NL80211_ATTR_CSA_C_OFF_BEACON] = { .type = NLA_U16 },
- [NL80211_ATTR_CSA_C_OFF_PRESP] = { .type = NLA_U16 },
+ [NL80211_ATTR_CSA_C_OFF_BEACON] = { .type = NLA_BINARY },
+ [NL80211_ATTR_CSA_C_OFF_PRESP] = { .type = NLA_BINARY },
[NL80211_ATTR_STA_SUPPORTED_CHANNELS] = { .type = NLA_BINARY },
[NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES] = { .type = NLA_BINARY },
[NL80211_ATTR_HANDLE_DFS] = { .type = NLA_FLAG },
@@ -386,6 +386,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
[NL80211_ATTR_WIPHY_FREQ_HINT] = { .type = NLA_U32 },
[NL80211_ATTR_TDLS_PEER_CAPABILITY] = { .type = NLA_U32 },
[NL80211_ATTR_IFACE_SOCKET_OWNER] = { .type = NLA_FLAG },
+ [NL80211_ATTR_CSA_C_OFFSETS_TX] = { .type = NLA_BINARY },
};
/* policy for the key attributes */
@@ -970,8 +971,10 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy,
c->max_interfaces))
goto nla_put_failure;
if (large &&
- nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS,
- c->radar_detect_widths))
+ (nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS,
+ c->radar_detect_widths) ||
+ nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS,
+ c->radar_detect_regions)))
goto nla_put_failure;
nla_nest_end(msg, nl_combi);
@@ -1223,6 +1226,7 @@ struct nl80211_dump_wiphy_state {
};
static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
+ enum nl80211_commands cmd,
struct sk_buff *msg, u32 portid, u32 seq,
int flags, struct nl80211_dump_wiphy_state *state)
{
@@ -1237,7 +1241,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
rdev->wiphy.mgmt_stypes;
u32 features;
- hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY);
+ hdr = nl80211hdr_put(msg, portid, seq, flags, cmd);
if (!hdr)
return -ENOBUFS;
@@ -1251,6 +1255,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
cfg80211_rdev_list_generation))
goto nla_put_failure;
+ if (cmd != NL80211_CMD_NEW_WIPHY)
+ goto finish;
+
switch (state->split_start) {
case 0:
if (nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT,
@@ -1490,18 +1497,17 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
}
CMD(start_p2p_device, START_P2P_DEVICE);
CMD(set_mcast_rate, SET_MCAST_RATE);
+#ifdef CONFIG_NL80211_TESTMODE
+ CMD(testmode_cmd, TESTMODE);
+#endif
if (state->split) {
CMD(crit_proto_start, CRIT_PROTOCOL_START);
CMD(crit_proto_stop, CRIT_PROTOCOL_STOP);
if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)
CMD(channel_switch, CHANNEL_SWITCH);
+ CMD(set_qos_map, SET_QOS_MAP);
}
- CMD(set_qos_map, SET_QOS_MAP);
-
-#ifdef CONFIG_NL80211_TESTMODE
- CMD(testmode_cmd, TESTMODE);
-#endif
-
+ /* add into the if now */
#undef CMD
if (rdev->ops->connect || rdev->ops->auth) {
@@ -1667,11 +1673,19 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
}
nla_nest_end(msg, nested);
}
+ state->split_start++;
+ break;
+ case 12:
+ if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH &&
+ nla_put_u8(msg, NL80211_ATTR_MAX_CSA_COUNTERS,
+ rdev->wiphy.max_num_csa_counters))
+ goto nla_put_failure;
/* done */
state->split_start = 0;
break;
}
+ finish:
return genlmsg_end(msg, hdr);
nla_put_failure:
@@ -1746,7 +1760,8 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
continue;
/* attempt to fit multiple wiphy data chunks into the skb */
do {
- ret = nl80211_send_wiphy(rdev, skb,
+ ret = nl80211_send_wiphy(rdev, NL80211_CMD_NEW_WIPHY,
+ skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI, state);
@@ -1801,7 +1816,8 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info)
if (!msg)
return -ENOMEM;
- if (nl80211_send_wiphy(rdev, msg, info->snd_portid, info->snd_seq, 0,
+ if (nl80211_send_wiphy(rdev, NL80211_CMD_NEW_WIPHY, msg,
+ info->snd_portid, info->snd_seq, 0,
&state) < 0) {
nlmsg_free(msg);
return -ENOBUFS;
@@ -3640,6 +3656,10 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq,
nla_put_u32(msg, NL80211_STA_INFO_TX_FAILED,
sinfo->tx_failed))
goto nla_put_failure;
+ if ((sinfo->filled & STATION_INFO_EXPECTED_THROUGHPUT) &&
+ nla_put_u32(msg, NL80211_STA_INFO_EXPECTED_THROUGHPUT,
+ sinfo->expected_throughput))
+ goto nla_put_failure;
if ((sinfo->filled & STATION_INFO_BEACON_LOSS_COUNT) &&
nla_put_u32(msg, NL80211_STA_INFO_BEACON_LOSS,
sinfo->beacon_loss_count))
@@ -5820,7 +5840,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
return -EBUSY;
err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef,
- NL80211_IFTYPE_UNSPECIFIED);
+ wdev->iftype);
if (err < 0)
return err;
@@ -5861,6 +5881,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
u8 radar_detect_width = 0;
int err;
bool need_new_beacon = false;
+ int len, i;
if (!rdev->ops->channel_switch ||
!(rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH))
@@ -5919,26 +5940,55 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
if (!csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON])
return -EINVAL;
- params.counter_offset_beacon =
- nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]);
- if (params.counter_offset_beacon >= params.beacon_csa.tail_len)
+ len = nla_len(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]);
+ if (!len || (len % sizeof(u16)))
return -EINVAL;
- /* sanity check - counters should be the same */
- if (params.beacon_csa.tail[params.counter_offset_beacon] !=
- params.count)
+ params.n_counter_offsets_beacon = len / sizeof(u16);
+ if (rdev->wiphy.max_num_csa_counters &&
+ (params.n_counter_offsets_beacon >
+ rdev->wiphy.max_num_csa_counters))
return -EINVAL;
+ params.counter_offsets_beacon =
+ nla_data(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]);
+
+ /* sanity checks - counters should fit and be the same */
+ for (i = 0; i < params.n_counter_offsets_beacon; i++) {
+ u16 offset = params.counter_offsets_beacon[i];
+
+ if (offset >= params.beacon_csa.tail_len)
+ return -EINVAL;
+
+ if (params.beacon_csa.tail[offset] != params.count)
+ return -EINVAL;
+ }
+
if (csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]) {
- params.counter_offset_presp =
- nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]);
- if (params.counter_offset_presp >=
- params.beacon_csa.probe_resp_len)
+ len = nla_len(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]);
+ if (!len || (len % sizeof(u16)))
return -EINVAL;
- if (params.beacon_csa.probe_resp[params.counter_offset_presp] !=
- params.count)
+ params.n_counter_offsets_presp = len / sizeof(u16);
+ if (rdev->wiphy.max_num_csa_counters &&
+ (params.n_counter_offsets_beacon >
+ rdev->wiphy.max_num_csa_counters))
return -EINVAL;
+
+ params.counter_offsets_presp =
+ nla_data(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]);
+
+ /* sanity checks - counters should fit and be the same */
+ for (i = 0; i < params.n_counter_offsets_presp; i++) {
+ u16 offset = params.counter_offsets_presp[i];
+
+ if (offset >= params.beacon_csa.probe_resp_len)
+ return -EINVAL;
+
+ if (params.beacon_csa.probe_resp[offset] !=
+ params.count)
+ return -EINVAL;
+ }
}
skip_beacons:
@@ -7784,6 +7834,27 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
if (!chandef.chan && params.offchan)
return -EINVAL;
+ params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]);
+ params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]);
+
+ if (info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]) {
+ int len = nla_len(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]);
+ int i;
+
+ if (len % sizeof(u16))
+ return -EINVAL;
+
+ params.n_csa_offsets = len / sizeof(u16);
+ params.csa_offsets =
+ nla_data(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]);
+
+ /* check that all the offsets fit the frame */
+ for (i = 0; i < params.n_csa_offsets; i++) {
+ if (params.csa_offsets[i] >= params.len)
+ return -EINVAL;
+ }
+ }
+
if (!params.dont_wait_for_ack) {
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -7797,8 +7868,6 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
}
}
- params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]);
- params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]);
params.chan = chandef.chan;
err = cfg80211_mlme_mgmt_tx(rdev, wdev, &params, &cookie);
if (err)
@@ -8495,6 +8564,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN],
rem) {
+ u8 *mask_pat;
+
nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat),
nla_len(pat), NULL);
err = -EINVAL;
@@ -8518,19 +8589,18 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
goto error;
new_triggers.patterns[i].pkt_offset = pkt_offset;
- new_triggers.patterns[i].mask =
- kmalloc(mask_len + pat_len, GFP_KERNEL);
- if (!new_triggers.patterns[i].mask) {
+ mask_pat = kmalloc(mask_len + pat_len, GFP_KERNEL);
+ if (!mask_pat) {
err = -ENOMEM;
goto error;
}
- new_triggers.patterns[i].pattern =
- new_triggers.patterns[i].mask + mask_len;
- memcpy(new_triggers.patterns[i].mask,
- nla_data(pat_tb[NL80211_PKTPAT_MASK]),
+ new_triggers.patterns[i].mask = mask_pat;
+ memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_MASK]),
mask_len);
+ mask_pat += mask_len;
+ new_triggers.patterns[i].pattern = mask_pat;
new_triggers.patterns[i].pattern_len = pat_len;
- memcpy(new_triggers.patterns[i].pattern,
+ memcpy(mask_pat,
nla_data(pat_tb[NL80211_PKTPAT_PATTERN]),
pat_len);
i++;
@@ -8722,6 +8792,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
nla_for_each_nested(pat, tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN],
rem) {
+ u8 *mask_pat;
+
nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat),
nla_len(pat), NULL);
if (!pat_tb[NL80211_PKTPAT_MASK] ||
@@ -8743,17 +8815,19 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
return -EINVAL;
new_rule->patterns[i].pkt_offset = pkt_offset;
- new_rule->patterns[i].mask =
- kmalloc(mask_len + pat_len, GFP_KERNEL);
- if (!new_rule->patterns[i].mask)
+ mask_pat = kmalloc(mask_len + pat_len, GFP_KERNEL);
+ if (!mask_pat)
return -ENOMEM;
- new_rule->patterns[i].pattern =
- new_rule->patterns[i].mask + mask_len;
- memcpy(new_rule->patterns[i].mask,
- nla_data(pat_tb[NL80211_PKTPAT_MASK]), mask_len);
+
+ new_rule->patterns[i].mask = mask_pat;
+ memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_MASK]),
+ mask_len);
+
+ mask_pat += mask_len;
+ new_rule->patterns[i].pattern = mask_pat;
new_rule->patterns[i].pattern_len = pat_len;
- memcpy(new_rule->patterns[i].pattern,
- nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), pat_len);
+ memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_PATTERN]),
+ pat_len);
i++;
}
@@ -10033,16 +10107,20 @@ static const struct genl_ops nl80211_ops[] = {
/* notification functions */
-void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev)
+void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev,
+ enum nl80211_commands cmd)
{
struct sk_buff *msg;
struct nl80211_dump_wiphy_state state = {};
+ WARN_ON(cmd != NL80211_CMD_NEW_WIPHY &&
+ cmd != NL80211_CMD_DEL_WIPHY);
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return;
- if (nl80211_send_wiphy(rdev, msg, 0, 0, 0, &state) < 0) {
+ if (nl80211_send_wiphy(rdev, cmd, msg, 0, 0, 0, &state) < 0) {
nlmsg_free(msg);
return;
}
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 1e6df9630f4..49c9a482dd1 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -5,7 +5,8 @@
int nl80211_init(void);
void nl80211_exit(void);
-void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev);
+void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev,
+ enum nl80211_commands cmd);
void nl80211_send_scan_start(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 00cdf73ba6c..d95bbe34813 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -199,7 +199,7 @@ static inline int rdev_change_station(struct cfg80211_registered_device *rdev,
}
static inline int rdev_get_station(struct cfg80211_registered_device *rdev,
- struct net_device *dev, u8 *mac,
+ struct net_device *dev, const u8 *mac,
struct station_info *sinfo)
{
int ret;
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index e78f532aaa5..1afdf45db38 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -935,7 +935,7 @@ freq_reg_info_regd(struct wiphy *wiphy, u32 center_freq,
if (!band_rule_found)
band_rule_found = freq_in_rule_band(fr, center_freq);
- bw_fits = reg_does_bw_fit(fr, center_freq, MHZ_TO_KHZ(5));
+ bw_fits = reg_does_bw_fit(fr, center_freq, MHZ_TO_KHZ(20));
if (band_rule_found && bw_fits)
return rr;
@@ -1019,10 +1019,10 @@ static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd,
}
#endif
-/* Find an ieee80211_reg_rule such that a 5MHz channel with frequency
- * chan->center_freq fits there.
- * If there is no such reg_rule, disable the channel, otherwise set the
- * flags corresponding to the bandwidths allowed in the particular reg_rule
+/*
+ * Note that right now we assume the desired channel bandwidth
+ * is always 20 MHz for each individual channel (HT40 uses 20 MHz
+ * per channel, the primary and the extension channel).
*/
static void handle_channel(struct wiphy *wiphy,
enum nl80211_reg_initiator initiator,
@@ -1083,12 +1083,8 @@ static void handle_channel(struct wiphy *wiphy,
if (reg_rule->flags & NL80211_RRF_AUTO_BW)
max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule);
- if (max_bandwidth_khz < MHZ_TO_KHZ(10))
- bw_flags = IEEE80211_CHAN_NO_10MHZ;
- if (max_bandwidth_khz < MHZ_TO_KHZ(20))
- bw_flags |= IEEE80211_CHAN_NO_20MHZ;
if (max_bandwidth_khz < MHZ_TO_KHZ(40))
- bw_flags |= IEEE80211_CHAN_NO_HT40;
+ bw_flags = IEEE80211_CHAN_NO_HT40;
if (max_bandwidth_khz < MHZ_TO_KHZ(80))
bw_flags |= IEEE80211_CHAN_NO_80MHZ;
if (max_bandwidth_khz < MHZ_TO_KHZ(160))
@@ -1522,12 +1518,8 @@ static void handle_channel_custom(struct wiphy *wiphy,
if (reg_rule->flags & NL80211_RRF_AUTO_BW)
max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule);
- if (max_bandwidth_khz < MHZ_TO_KHZ(10))
- bw_flags = IEEE80211_CHAN_NO_10MHZ;
- if (max_bandwidth_khz < MHZ_TO_KHZ(20))
- bw_flags |= IEEE80211_CHAN_NO_20MHZ;
if (max_bandwidth_khz < MHZ_TO_KHZ(40))
- bw_flags |= IEEE80211_CHAN_NO_HT40;
+ bw_flags = IEEE80211_CHAN_NO_HT40;
if (max_bandwidth_khz < MHZ_TO_KHZ(80))
bw_flags |= IEEE80211_CHAN_NO_80MHZ;
if (max_bandwidth_khz < MHZ_TO_KHZ(160))
@@ -1876,7 +1868,8 @@ static void reg_process_hint(struct regulatory_request *reg_request)
case NL80211_REGDOM_SET_BY_USER:
treatment = reg_process_hint_user(reg_request);
if (treatment == REG_REQ_IGNORE ||
- treatment == REG_REQ_ALREADY_SET)
+ treatment == REG_REQ_ALREADY_SET ||
+ treatment == REG_REQ_USER_HINT_HANDLED)
return;
queue_delayed_work(system_power_efficient_wq,
&reg_timeout, msecs_to_jiffies(3142));
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index e7329bb6a32..0798c62e608 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -891,6 +891,7 @@ cfg80211_inform_bss_width(struct wiphy *wiphy,
struct cfg80211_bss_ies *ies;
struct ieee80211_channel *channel;
struct cfg80211_internal_bss tmp = {}, *res;
+ bool signal_valid;
if (WARN_ON(!wiphy))
return NULL;
@@ -927,8 +928,9 @@ cfg80211_inform_bss_width(struct wiphy *wiphy,
rcu_assign_pointer(tmp.pub.beacon_ies, ies);
rcu_assign_pointer(tmp.pub.ies, ies);
- res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp,
- rx_channel == channel);
+ signal_valid = abs(rx_channel->center_freq - channel->center_freq) <=
+ wiphy->max_adj_channel_rssi_comp;
+ res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid);
if (!res)
return NULL;
@@ -952,6 +954,7 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
struct cfg80211_internal_bss tmp = {}, *res;
struct cfg80211_bss_ies *ies;
struct ieee80211_channel *channel;
+ bool signal_valid;
size_t ielen = len - offsetof(struct ieee80211_mgmt,
u.probe_resp.variable);
@@ -999,8 +1002,9 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
- res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp,
- rx_channel == channel);
+ signal_valid = abs(rx_channel->center_freq - channel->center_freq) <=
+ wiphy->max_adj_channel_rssi_comp;
+ res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid);
if (!res)
return NULL;
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 0c0844b585d..8bbeeb30221 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -149,7 +149,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
case CFG80211_CONN_SCAN_AGAIN:
return cfg80211_conn_scan(wdev);
case CFG80211_CONN_AUTHENTICATE_NEXT:
- BUG_ON(!rdev->ops->auth);
+ if (WARN_ON(!rdev->ops->auth))
+ return -EOPNOTSUPP;
wdev->conn->state = CFG80211_CONN_AUTHENTICATING;
return cfg80211_mlme_auth(rdev, wdev->netdev,
params->channel, params->auth_type,
@@ -161,7 +162,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
case CFG80211_CONN_AUTH_FAILED:
return -ENOTCONN;
case CFG80211_CONN_ASSOCIATE_NEXT:
- BUG_ON(!rdev->ops->assoc);
+ if (WARN_ON(!rdev->ops->assoc))
+ return -EOPNOTSUPP;
wdev->conn->state = CFG80211_CONN_ASSOCIATING;
if (wdev->conn->prev_bssid_valid)
req.prev_bssid = wdev->conn->prev_bssid;
@@ -877,7 +879,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
}
void cfg80211_disconnected(struct net_device *dev, u16 reason,
- u8 *ie, size_t ie_len, gfp_t gfp)
+ const u8 *ie, size_t ie_len, gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index f3c13ff4d04..7cc887f9da1 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1876,29 +1876,33 @@ TRACE_EVENT(rdev_channel_switch,
WIPHY_ENTRY
NETDEV_ENTRY
CHAN_DEF_ENTRY
- __field(u16, counter_offset_beacon)
- __field(u16, counter_offset_presp)
__field(bool, radar_required)
__field(bool, block_tx)
__field(u8, count)
+ __dynamic_array(u16, bcn_ofs, params->n_counter_offsets_beacon)
+ __dynamic_array(u16, pres_ofs, params->n_counter_offsets_presp)
),
TP_fast_assign(
WIPHY_ASSIGN;
NETDEV_ASSIGN;
CHAN_DEF_ASSIGN(&params->chandef);
- __entry->counter_offset_beacon = params->counter_offset_beacon;
- __entry->counter_offset_presp = params->counter_offset_presp;
__entry->radar_required = params->radar_required;
__entry->block_tx = params->block_tx;
__entry->count = params->count;
+ memcpy(__get_dynamic_array(bcn_ofs),
+ params->counter_offsets_beacon,
+ params->n_counter_offsets_beacon * sizeof(u16));
+
+ /* probe response offsets are optional */
+ if (params->n_counter_offsets_presp)
+ memcpy(__get_dynamic_array(pres_ofs),
+ params->counter_offsets_presp,
+ params->n_counter_offsets_presp * sizeof(u16));
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT
- ", block_tx: %d, count: %u, radar_required: %d"
- ", counter offsets (beacon/presp): %u/%u",
+ ", block_tx: %d, count: %u, radar_required: %d",
WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG,
- __entry->block_tx, __entry->count, __entry->radar_required,
- __entry->counter_offset_beacon,
- __entry->counter_offset_presp)
+ __entry->block_tx, __entry->count, __entry->radar_required)
);
TRACE_EVENT(rdev_set_qos_map,
@@ -2090,7 +2094,8 @@ TRACE_EVENT(cfg80211_michael_mic_failure,
MAC_ASSIGN(addr, addr);
__entry->key_type = key_type;
__entry->key_id = key_id;
- memcpy(__entry->tsc, tsc, 6);
+ if (tsc)
+ memcpy(__entry->tsc, tsc, 6);
),
TP_printk(NETDEV_PR_FMT ", " MAC_PR_FMT ", key type: %d, key id: %d, tsc: %pm",
NETDEV_PR_ARG, MAC_PR_ARG(addr), __entry->key_type,
@@ -2636,6 +2641,21 @@ TRACE_EVENT(cfg80211_ft_event,
WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(target_ap))
);
+TRACE_EVENT(cfg80211_stop_iface,
+ TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
+ TP_ARGS(wiphy, wdev),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ WDEV_ENTRY
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ WDEV_ASSIGN;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT,
+ WIPHY_PR_ARG, WDEV_PR_ARG)
+);
+
#endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 7c47fa07b27..728f1c0dc70 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -476,7 +476,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
EXPORT_SYMBOL(ieee80211_data_to_8023);
int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
- enum nl80211_iftype iftype, u8 *bssid, bool qos)
+ enum nl80211_iftype iftype,
+ const u8 *bssid, bool qos)
{
struct ieee80211_hdr hdr;
u16 hdrlen, ethertype;
@@ -839,6 +840,9 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev)
__cfg80211_ibss_joined(wdev->netdev, ev->ij.bssid,
ev->ij.channel);
break;
+ case EVENT_STOPPED:
+ __cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev);
+ break;
}
wdev_unlock(wdev);
@@ -1271,10 +1275,20 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
void *data),
void *data)
{
+ const struct ieee80211_regdomain *regdom;
+ enum nl80211_dfs_regions region = 0;
int i, j, iftype;
int num_interfaces = 0;
u32 used_iftypes = 0;
+ if (radar_detect) {
+ rcu_read_lock();
+ regdom = rcu_dereference(cfg80211_regdomain);
+ if (regdom)
+ region = regdom->dfs_region;
+ rcu_read_unlock();
+ }
+
for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
num_interfaces += iftype_num[iftype];
if (iftype_num[iftype] > 0 &&
@@ -1315,6 +1329,10 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
if (radar_detect != (c->radar_detect_widths & radar_detect))
goto cont;
+ if (radar_detect && c->radar_detect_regions &&
+ !(c->radar_detect_regions & BIT(region)))
+ goto cont;
+
/* Finally check that all iftypes that we're currently
* using are actually part of this combination. If they
* aren't then we can't use this combination and have
@@ -1528,6 +1546,24 @@ unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy)
}
EXPORT_SYMBOL(ieee80211_get_num_supported_channels);
+int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
+ struct station_info *sinfo)
+{
+ struct cfg80211_registered_device *rdev;
+ struct wireless_dev *wdev;
+
+ wdev = dev->ieee80211_ptr;
+ if (!wdev)
+ return -EOPNOTSUPP;
+
+ rdev = wiphy_to_rdev(wdev->wiphy);
+ if (!rdev->ops->get_station)
+ return -EOPNOTSUPP;
+
+ return rdev_get_station(rdev, dev, mac_addr, sinfo);
+}
+EXPORT_SYMBOL(cfg80211_get_station);
+
/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */
/* Ethernet-II snap header (RFC1042 for most EtherTypes) */
const unsigned char rfc1042_header[] __aligned(2) =
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 3bb2cdc13b4..c51e8f7b865 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -199,6 +199,7 @@ int xfrm_output(struct sk_buff *skb)
return xfrm_output2(skb);
}
+EXPORT_SYMBOL_GPL(xfrm_output);
int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
{
@@ -213,6 +214,7 @@ int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
return -EAFNOSUPPORT;
return inner_mode->afinfo->extract_output(x, skb);
}
+EXPORT_SYMBOL_GPL(xfrm_inner_extract_output);
void xfrm_local_error(struct sk_buff *skb, int mtu)
{
@@ -233,7 +235,4 @@ void xfrm_local_error(struct sk_buff *skb, int mtu)
afinfo->local_error(skb, mtu);
xfrm_state_put_afinfo(afinfo);
}
-
-EXPORT_SYMBOL_GPL(xfrm_output);
-EXPORT_SYMBOL_GPL(xfrm_inner_extract_output);
EXPORT_SYMBOL_GPL(xfrm_local_error);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index e63f242ae03..0525d78ba32 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -769,7 +769,7 @@ EXPORT_SYMBOL(xfrm_policy_byid);
#ifdef CONFIG_SECURITY_NETWORK_XFRM
static inline int
-xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
+xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
{
int dir, err = 0;
@@ -783,10 +783,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
continue;
err = security_xfrm_policy_delete(pol->security);
if (err) {
- xfrm_audit_policy_delete(pol, 0,
- audit_info->loginuid,
- audit_info->sessionid,
- audit_info->secid);
+ xfrm_audit_policy_delete(pol, 0, task_valid);
return err;
}
}
@@ -800,9 +797,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
pol->security);
if (err) {
xfrm_audit_policy_delete(pol, 0,
- audit_info->loginuid,
- audit_info->sessionid,
- audit_info->secid);
+ task_valid);
return err;
}
}
@@ -812,19 +807,19 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
}
#else
static inline int
-xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
+xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
{
return 0;
}
#endif
-int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
+int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
{
int dir, err = 0, cnt = 0;
write_lock_bh(&net->xfrm.xfrm_policy_lock);
- err = xfrm_policy_flush_secctx_check(net, type, audit_info);
+ err = xfrm_policy_flush_secctx_check(net, type, task_valid);
if (err)
goto out;
@@ -841,9 +836,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
write_unlock_bh(&net->xfrm.xfrm_policy_lock);
cnt++;
- xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
- audit_info->sessionid,
- audit_info->secid);
+ xfrm_audit_policy_delete(pol, 1, task_valid);
xfrm_policy_kill(pol);
@@ -862,10 +855,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
write_unlock_bh(&net->xfrm.xfrm_policy_lock);
cnt++;
- xfrm_audit_policy_delete(pol, 1,
- audit_info->loginuid,
- audit_info->sessionid,
- audit_info->secid);
+ xfrm_audit_policy_delete(pol, 1, task_valid);
xfrm_policy_kill(pol);
write_lock_bh(&net->xfrm.xfrm_policy_lock);
@@ -2107,6 +2097,8 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
goto no_transform;
}
+ dst_hold(&xdst->u.dst);
+ xdst->u.dst.flags |= DST_NOCACHE;
route = xdst->route;
}
}
@@ -2860,21 +2852,14 @@ out_byidx:
static void xfrm_policy_fini(struct net *net)
{
- struct xfrm_audit audit_info;
unsigned int sz;
int dir;
flush_work(&net->xfrm.policy_hash_work);
#ifdef CONFIG_XFRM_SUB_POLICY
- audit_info.loginuid = INVALID_UID;
- audit_info.sessionid = (unsigned int)-1;
- audit_info.secid = 0;
- xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info);
+ xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false);
#endif
- audit_info.loginuid = INVALID_UID;
- audit_info.sessionid = (unsigned int)-1;
- audit_info.secid = 0;
- xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
+ xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false);
WARN_ON(!list_empty(&net->xfrm.policy_all));
@@ -2989,15 +2974,14 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
}
}
-void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
- kuid_t auid, unsigned int sessionid, u32 secid)
+void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid)
{
struct audit_buffer *audit_buf;
audit_buf = xfrm_audit_start("SPD-add");
if (audit_buf == NULL)
return;
- xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
+ xfrm_audit_helper_usrinfo(task_valid, audit_buf);
audit_log_format(audit_buf, " res=%u", result);
xfrm_audit_common_policyinfo(xp, audit_buf);
audit_log_end(audit_buf);
@@ -3005,14 +2989,14 @@ void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
- kuid_t auid, unsigned int sessionid, u32 secid)
+ bool task_valid)
{
struct audit_buffer *audit_buf;
audit_buf = xfrm_audit_start("SPD-delete");
if (audit_buf == NULL)
return;
- xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
+ xfrm_audit_helper_usrinfo(task_valid, audit_buf);
audit_log_format(audit_buf, " res=%u", result);
xfrm_audit_common_policyinfo(xp, audit_buf);
audit_log_end(audit_buf);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 8e9c781a6bb..0ab54134bb4 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -463,9 +463,7 @@ expired:
if (!err)
km_state_expired(x, 1, 0);
- xfrm_audit_state_delete(x, err ? 0 : 1,
- audit_get_loginuid(current),
- audit_get_sessionid(current), 0);
+ xfrm_audit_state_delete(x, err ? 0 : 1, true);
out:
spin_unlock(&x->lock);
@@ -562,7 +560,7 @@ EXPORT_SYMBOL(xfrm_state_delete);
#ifdef CONFIG_SECURITY_NETWORK_XFRM
static inline int
-xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info)
+xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
{
int i, err = 0;
@@ -572,10 +570,7 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi
hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
if (xfrm_id_proto_match(x->id.proto, proto) &&
(err = security_xfrm_state_delete(x)) != 0) {
- xfrm_audit_state_delete(x, 0,
- audit_info->loginuid,
- audit_info->sessionid,
- audit_info->secid);
+ xfrm_audit_state_delete(x, 0, task_valid);
return err;
}
}
@@ -585,18 +580,18 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi
}
#else
static inline int
-xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info)
+xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
{
return 0;
}
#endif
-int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info)
+int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
{
int i, err = 0, cnt = 0;
spin_lock_bh(&net->xfrm.xfrm_state_lock);
- err = xfrm_state_flush_secctx_check(net, proto, audit_info);
+ err = xfrm_state_flush_secctx_check(net, proto, task_valid);
if (err)
goto out;
@@ -612,9 +607,7 @@ restart:
err = xfrm_state_delete(x);
xfrm_audit_state_delete(x, err ? 0 : 1,
- audit_info->loginuid,
- audit_info->sessionid,
- audit_info->secid);
+ task_valid);
xfrm_state_put(x);
if (!err)
cnt++;
@@ -2128,14 +2121,10 @@ out_bydst:
void xfrm_state_fini(struct net *net)
{
- struct xfrm_audit audit_info;
unsigned int sz;
flush_work(&net->xfrm.state_hash_work);
- audit_info.loginuid = INVALID_UID;
- audit_info.sessionid = (unsigned int)-1;
- audit_info.secid = 0;
- xfrm_state_flush(net, IPSEC_PROTO_ANY, &audit_info);
+ xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
flush_work(&net->xfrm.state_gc_work);
WARN_ON(!list_empty(&net->xfrm.state_all));
@@ -2198,30 +2187,28 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
}
}
-void xfrm_audit_state_add(struct xfrm_state *x, int result,
- kuid_t auid, unsigned int sessionid, u32 secid)
+void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid)
{
struct audit_buffer *audit_buf;
audit_buf = xfrm_audit_start("SAD-add");
if (audit_buf == NULL)
return;
- xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
+ xfrm_audit_helper_usrinfo(task_valid, audit_buf);
xfrm_audit_helper_sainfo(x, audit_buf);
audit_log_format(audit_buf, " res=%u", result);
audit_log_end(audit_buf);
}
EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
-void xfrm_audit_state_delete(struct xfrm_state *x, int result,
- kuid_t auid, unsigned int sessionid, u32 secid)
+void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid)
{
struct audit_buffer *audit_buf;
audit_buf = xfrm_audit_start("SAD-delete");
if (audit_buf == NULL)
return;
- xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
+ xfrm_audit_helper_usrinfo(task_valid, audit_buf);
xfrm_audit_helper_sainfo(x, audit_buf);
audit_log_format(audit_buf, " res=%u", result);
audit_log_end(audit_buf);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 51398ae6cda..d4db6ebb089 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -177,9 +177,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
attrs[XFRMA_ALG_AEAD] ||
attrs[XFRMA_ALG_CRYPT] ||
attrs[XFRMA_ALG_COMP] ||
- attrs[XFRMA_TFCPAD] ||
- (ntohl(p->id.spi) >= 0x10000))
-
+ attrs[XFRMA_TFCPAD])
goto out;
break;
@@ -207,7 +205,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
attrs[XFRMA_ALG_AUTH] ||
attrs[XFRMA_ALG_AUTH_TRUNC] ||
attrs[XFRMA_ALG_CRYPT] ||
- attrs[XFRMA_TFCPAD])
+ attrs[XFRMA_TFCPAD] ||
+ (ntohl(p->id.spi) >= 0x10000))
goto out;
break;
@@ -597,9 +596,6 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
struct xfrm_state *x;
int err;
struct km_event c;
- kuid_t loginuid = audit_get_loginuid(current);
- unsigned int sessionid = audit_get_sessionid(current);
- u32 sid;
err = verify_newsa_info(p, attrs);
if (err)
@@ -615,8 +611,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
else
err = xfrm_state_update(x);
- security_task_getsecid(current, &sid);
- xfrm_audit_state_add(x, err ? 0 : 1, loginuid, sessionid, sid);
+ xfrm_audit_state_add(x, err ? 0 : 1, true);
if (err < 0) {
x->km.state = XFRM_STATE_DEAD;
@@ -676,9 +671,6 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
int err = -ESRCH;
struct km_event c;
struct xfrm_usersa_id *p = nlmsg_data(nlh);
- kuid_t loginuid = audit_get_loginuid(current);
- unsigned int sessionid = audit_get_sessionid(current);
- u32 sid;
x = xfrm_user_state_lookup(net, p, attrs, &err);
if (x == NULL)
@@ -703,8 +695,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
km_state_notify(x, &c);
out:
- security_task_getsecid(current, &sid);
- xfrm_audit_state_delete(x, err ? 0 : 1, loginuid, sessionid, sid);
+ xfrm_audit_state_delete(x, err ? 0 : 1, true);
xfrm_state_put(x);
return err;
}
@@ -955,6 +946,20 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
return skb;
}
+/* A wrapper for nlmsg_multicast() checking that nlsk is still available.
+ * Must be called with RCU read lock.
+ */
+static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb,
+ u32 pid, unsigned int group)
+{
+ struct sock *nlsk = rcu_dereference(net->xfrm.nlsk);
+
+ if (nlsk)
+ return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC);
+ else
+ return -1;
+}
+
static inline size_t xfrm_spdinfo_msgsize(void)
{
return NLMSG_ALIGN(4)
@@ -1414,9 +1419,6 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
struct km_event c;
int err;
int excl;
- kuid_t loginuid = audit_get_loginuid(current);
- unsigned int sessionid = audit_get_sessionid(current);
- u32 sid;
err = verify_newpolicy_info(p);
if (err)
@@ -1435,8 +1437,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
* a type XFRM_MSG_UPDPOLICY - JHS */
excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
err = xfrm_policy_insert(p->dir, xp, excl);
- security_task_getsecid(current, &sid);
- xfrm_audit_policy_add(xp, err ? 0 : 1, loginuid, sessionid, sid);
+ xfrm_audit_policy_add(xp, err ? 0 : 1, true);
if (err) {
security_xfrm_policy_free(xp->security);
@@ -1673,13 +1674,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
NETLINK_CB(skb).portid);
}
} else {
- kuid_t loginuid = audit_get_loginuid(current);
- unsigned int sessionid = audit_get_sessionid(current);
- u32 sid;
-
- security_task_getsecid(current, &sid);
- xfrm_audit_policy_delete(xp, err ? 0 : 1, loginuid, sessionid,
- sid);
+ xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
if (err != 0)
goto out;
@@ -1704,13 +1699,9 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *net = sock_net(skb->sk);
struct km_event c;
struct xfrm_usersa_flush *p = nlmsg_data(nlh);
- struct xfrm_audit audit_info;
int err;
- audit_info.loginuid = audit_get_loginuid(current);
- audit_info.sessionid = audit_get_sessionid(current);
- security_task_getsecid(current, &audit_info.secid);
- err = xfrm_state_flush(net, p->proto, &audit_info);
+ err = xfrm_state_flush(net, p->proto, true);
if (err) {
if (err == -ESRCH) /* empty table */
return 0;
@@ -1894,16 +1885,12 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
struct km_event c;
u8 type = XFRM_POLICY_TYPE_MAIN;
int err;
- struct xfrm_audit audit_info;
err = copy_from_user_policy_type(&type, attrs);
if (err)
return err;
- audit_info.loginuid = audit_get_loginuid(current);
- audit_info.sessionid = audit_get_sessionid(current);
- security_task_getsecid(current, &audit_info.secid);
- err = xfrm_policy_flush(net, type, &audit_info);
+ err = xfrm_policy_flush(net, type, true);
if (err) {
if (err == -ESRCH) /* empty table */
return 0;
@@ -1969,14 +1956,8 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
err = 0;
if (up->hard) {
- kuid_t loginuid = audit_get_loginuid(current);
- unsigned int sessionid = audit_get_sessionid(current);
- u32 sid;
-
- security_task_getsecid(current, &sid);
xfrm_policy_delete(xp, p->dir);
- xfrm_audit_policy_delete(xp, 1, loginuid, sessionid, sid);
-
+ xfrm_audit_policy_delete(xp, 1, true);
} else {
// reset the timers here?
WARN(1, "Dont know what to do with soft policy expire\n");
@@ -2012,13 +1993,8 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
km_state_expired(x, ue->hard, nlh->nlmsg_pid);
if (ue->hard) {
- kuid_t loginuid = audit_get_loginuid(current);
- unsigned int sessionid = audit_get_sessionid(current);
- u32 sid;
-
- security_task_getsecid(current, &sid);
__xfrm_state_delete(x);
- xfrm_audit_state_delete(x, 1, loginuid, sessionid, sid);
+ xfrm_audit_state_delete(x, 1, true);
}
err = 0;
out:
@@ -2265,7 +2241,7 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
if (build_migrate(skb, m, num_migrate, k, sel, dir, type) < 0)
BUG();
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MIGRATE);
}
#else
static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
@@ -2456,7 +2432,7 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c)
return -EMSGSIZE;
}
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE);
}
static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event *c)
@@ -2471,7 +2447,7 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event
if (build_aevent(skb, x, c) < 0)
BUG();
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_AEVENTS);
}
static int xfrm_notify_sa_flush(const struct km_event *c)
@@ -2497,7 +2473,7 @@ static int xfrm_notify_sa_flush(const struct km_event *c)
nlmsg_end(skb, nlh);
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA);
}
static inline size_t xfrm_sa_len(struct xfrm_state *x)
@@ -2584,7 +2560,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c)
nlmsg_end(skb, nlh);
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA);
out_free_skb:
kfree_skb(skb);
@@ -2675,7 +2651,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
if (build_acquire(skb, x, xt, xp) < 0)
BUG();
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_ACQUIRE);
}
/* User gives us xfrm_user_policy_info followed by an array of 0
@@ -2789,7 +2765,7 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct
if (build_polexpire(skb, xp, dir, c) < 0)
BUG();
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE);
}
static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c)
@@ -2851,7 +2827,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e
nlmsg_end(skb, nlh);
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY);
out_free_skb:
kfree_skb(skb);
@@ -2879,7 +2855,7 @@ static int xfrm_notify_policy_flush(const struct km_event *c)
nlmsg_end(skb, nlh);
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY);
out_free_skb:
kfree_skb(skb);
@@ -2948,7 +2924,7 @@ static int xfrm_send_report(struct net *net, u8 proto,
if (build_report(skb, proto, sel, addr) < 0)
BUG();
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT);
}
static inline size_t xfrm_mapping_msgsize(void)
@@ -3000,7 +2976,7 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
if (build_mapping(skb, x, ipaddr, sport) < 0)
BUG();
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MAPPING);
}
static bool xfrm_is_alive(const struct km_event *c)