aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c1
-rw-r--r--net/8021q/vlan.h4
-rw-r--r--net/8021q/vlan_core.c21
-rw-r--r--net/8021q/vlan_dev.c103
-rw-r--r--net/8021q/vlan_netlink.c4
-rw-r--r--net/9p/client.c25
-rw-r--r--net/9p/trans_fd.c110
-rw-r--r--net/9p/trans_rdma.c26
-rw-r--r--net/9p/trans_virtio.c3
-rw-r--r--net/Kconfig6
-rw-r--r--net/appletalk/aarp.c4
-rw-r--r--net/appletalk/ddp.c105
-rw-r--r--net/atm/clip.c2
-rw-r--r--net/atm/lec.c10
-rw-r--r--net/atm/mpc.c8
-rw-r--r--net/atm/pppoatm.c2
-rw-r--r--net/atm/raw.c2
-rw-r--r--net/atm/signaling.c2
-rw-r--r--net/atm/svc.c10
-rw-r--r--net/ax25/ax25_in.c2
-rw-r--r--net/batman-adv/Kconfig9
-rw-r--r--net/batman-adv/Makefile1
-rw-r--r--net/batman-adv/bat_iv_ogm.c12
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c77
-rw-r--r--net/batman-adv/debugfs.c13
-rw-r--r--net/batman-adv/distributed-arp-table.c15
-rw-r--r--net/batman-adv/distributed-arp-table.h3
-rw-r--r--net/batman-adv/fragmentation.c15
-rw-r--r--net/batman-adv/gateway_client.c21
-rw-r--r--net/batman-adv/hard-interface.c2
-rw-r--r--net/batman-adv/icmp_socket.c11
-rw-r--r--net/batman-adv/main.c19
-rw-r--r--net/batman-adv/main.h4
-rw-r--r--net/batman-adv/multicast.c748
-rw-r--r--net/batman-adv/multicast.h80
-rw-r--r--net/batman-adv/network-coding.c29
-rw-r--r--net/batman-adv/originator.c73
-rw-r--r--net/batman-adv/packet.h53
-rw-r--r--net/batman-adv/routing.c17
-rw-r--r--net/batman-adv/send.c31
-rw-r--r--net/batman-adv/send.h7
-rw-r--r--net/batman-adv/soft-interface.c110
-rw-r--r--net/batman-adv/sysfs.c20
-rw-r--r--net/batman-adv/translation-table.c166
-rw-r--r--net/batman-adv/translation-table.h2
-rw-r--r--net/batman-adv/types.h92
-rw-r--r--net/bluetooth/6lowpan.c67
-rw-r--r--net/bluetooth/6lowpan.h21
-rw-r--r--net/bluetooth/Kconfig8
-rw-r--r--net/bluetooth/Makefile3
-rw-r--r--net/bluetooth/a2mp.c20
-rw-r--r--net/bluetooth/af_bluetooth.c2
-rw-r--r--net/bluetooth/hci_conn.c333
-rw-r--r--net/bluetooth/hci_core.c1265
-rw-r--r--net/bluetooth/hci_event.c854
-rw-r--r--net/bluetooth/hci_sock.c34
-rw-r--r--net/bluetooth/hci_sysfs.c18
-rw-r--r--net/bluetooth/hidp/core.c100
-rw-r--r--net/bluetooth/l2cap_core.c722
-rw-r--r--net/bluetooth/l2cap_sock.c85
-rw-r--r--net/bluetooth/lib.c1
-rw-r--r--net/bluetooth/mgmt.c1342
-rw-r--r--net/bluetooth/rfcomm/core.c102
-rw-r--r--net/bluetooth/rfcomm/sock.c38
-rw-r--r--net/bluetooth/rfcomm/tty.c276
-rw-r--r--net/bluetooth/sco.c12
-rw-r--r--net/bluetooth/smp.c750
-rw-r--r--net/bluetooth/smp.h29
-rw-r--r--net/bridge/Makefile4
-rw-r--r--net/bridge/br.c98
-rw-r--r--net/bridge/br_device.c46
-rw-r--r--net/bridge/br_fdb.c142
-rw-r--r--net/bridge/br_forward.c9
-rw-r--r--net/bridge/br_if.c128
-rw-r--r--net/bridge/br_input.c14
-rw-r--r--net/bridge/br_mdb.c4
-rw-r--r--net/bridge/br_multicast.c386
-rw-r--r--net/bridge/br_netfilter.c10
-rw-r--r--net/bridge/br_netlink.c18
-rw-r--r--net/bridge/br_notify.c118
-rw-r--r--net/bridge/br_private.h90
-rw-r--r--net/bridge/br_sysfs_br.c26
-rw-r--r--net/bridge/br_sysfs_if.c30
-rw-r--r--net/bridge/br_vlan.c195
-rw-r--r--net/bridge/netfilter/Kconfig17
-rw-r--r--net/bridge/netfilter/Makefile1
-rw-r--r--net/bridge/netfilter/ebt_among.c2
-rw-r--r--net/bridge/netfilter/ebt_dnat.c2
-rw-r--r--net/bridge/netfilter/ebt_redirect.c6
-rw-r--r--net/bridge/netfilter/ebt_snat.c2
-rw-r--r--net/bridge/netfilter/ebtables.c5
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c139
-rw-r--r--net/caif/caif_socket.c4
-rw-r--r--net/can/af_can.c31
-rw-r--r--net/can/af_can.h9
-rw-r--r--net/can/gw.c4
-rw-r--r--net/can/proc.c76
-rw-r--r--net/ceph/ceph_common.c2
-rw-r--r--net/ceph/crush/mapper.c85
-rw-r--r--net/ceph/debugfs.c63
-rw-r--r--net/ceph/messenger.c28
-rw-r--r--net/ceph/mon_client.c150
-rw-r--r--net/ceph/osd_client.c45
-rw-r--r--net/ceph/osdmap.c999
-rw-r--r--net/ceph/pagevec.c35
-rw-r--r--net/compat.c9
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/datagram.c26
-rw-r--r--net/core/dev.c439
-rw-r--r--net/core/dev_addr_lists.c85
-rw-r--r--net/core/dst.c31
-rw-r--r--net/core/ethtool.c216
-rw-r--r--net/core/filter.c1857
-rw-r--r--net/core/flow.c140
-rw-r--r--net/core/flow_dissector.c24
-rw-r--r--net/core/iovec.c61
-rw-r--r--net/core/link_watch.c2
-rw-r--r--net/core/neighbour.c24
-rw-r--r--net/core/net-sysfs.c24
-rw-r--r--net/core/net_namespace.c4
-rw-r--r--net/core/netclassid_cgroup.c17
-rw-r--r--net/core/netpoll.c587
-rw-r--r--net/core/netprio_cgroup.c53
-rw-r--r--net/core/pktgen.c92
-rw-r--r--net/core/ptp_classifier.c141
-rw-r--r--net/core/request_sock.c1
-rw-r--r--net/core/rtnetlink.c258
-rw-r--r--net/core/secure_seq.c25
-rw-r--r--net/core/skbuff.c217
-rw-r--r--net/core/sock.c57
-rw-r--r--net/core/sock_diag.c27
-rw-r--r--net/core/timestamping.c19
-rw-r--r--net/core/tso.c77
-rw-r--r--net/core/utils.c8
-rw-r--r--net/dcb/dcbnl.c2
-rw-r--r--net/dccp/input.c2
-rw-r--r--net/dccp/ipv4.c1
-rw-r--r--net/dccp/minisocks.c2
-rw-r--r--net/dccp/output.c2
-rw-r--r--net/dccp/proto.c9
-rw-r--r--net/dccp/sysctl.c3
-rw-r--r--net/dccp/timer.c2
-rw-r--r--net/decnet/af_decnet.c2
-rw-r--r--net/decnet/dn_dev.c4
-rw-r--r--net/decnet/dn_fib.c4
-rw-r--r--net/decnet/dn_nsp_in.c4
-rw-r--r--net/decnet/dn_route.c16
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c2
-rw-r--r--net/dns_resolver/dns_query.c8
-rw-r--r--net/dsa/dsa.c3
-rw-r--r--net/dsa/slave.c2
-rw-r--r--net/hsr/hsr_device.c10
-rw-r--r--net/hsr/hsr_framereg.c20
-rw-r--r--net/hsr/hsr_main.c6
-rw-r--r--net/ieee802154/6lowpan.h319
-rw-r--r--net/ieee802154/6lowpan_iphc.c3
-rw-r--r--net/ieee802154/6lowpan_rtnl.c (renamed from net/ieee802154/6lowpan.c)539
-rw-r--r--net/ieee802154/Kconfig2
-rw-r--r--net/ieee802154/Makefile6
-rw-r--r--net/ieee802154/af802154.h5
-rw-r--r--net/ieee802154/af_ieee802154.c22
-rw-r--r--net/ieee802154/dgram.c154
-rw-r--r--net/ieee802154/header_ops.c325
-rw-r--r--net/ieee802154/ieee802154.h20
-rw-r--r--net/ieee802154/netlink.c21
-rw-r--r--net/ieee802154/nl-mac.c1029
-rw-r--r--net/ieee802154/nl_policy.c26
-rw-r--r--net/ieee802154/raw.c18
-rw-r--r--net/ieee802154/reassembly.c585
-rw-r--r--net/ieee802154/reassembly.h41
-rw-r--r--net/ieee802154/wpan-class.c4
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/af_inet.c153
-rw-r--r--net/ipv4/ah4.c78
-rw-r--r--net/ipv4/datagram.c20
-rw-r--r--net/ipv4/devinet.c9
-rw-r--r--net/ipv4/esp4.c26
-rw-r--r--net/ipv4/fib_frontend.c4
-rw-r--r--net/ipv4/fib_semantics.c3
-rw-r--r--net/ipv4/gre_demux.c28
-rw-r--r--net/ipv4/gre_offload.c19
-rw-r--r--net/ipv4/icmp.c25
-rw-r--r--net/ipv4/igmp.c26
-rw-r--r--net/ipv4/inet_connection_sock.c19
-rw-r--r--net/ipv4/inet_hashtables.c6
-rw-r--r--net/ipv4/inetpeer.c22
-rw-r--r--net/ipv4/ip_forward.c61
-rw-r--r--net/ipv4/ip_fragment.c5
-rw-r--r--net/ipv4/ip_gre.c9
-rw-r--r--net/ipv4/ip_options.c10
-rw-r--r--net/ipv4/ip_output.c95
-rw-r--r--net/ipv4/ip_sockglue.c21
-rw-r--r--net/ipv4/ip_tunnel.c97
-rw-r--r--net/ipv4/ip_tunnel_core.c18
-rw-r--r--net/ipv4/ip_vti.c321
-rw-r--r--net/ipv4/ipcomp.c26
-rw-r--r--net/ipv4/ipip.c5
-rw-r--r--net/ipv4/ipmr.c6
-rw-r--r--net/ipv4/netfilter.c2
-rw-r--r--net/ipv4/netfilter/arp_tables.c6
-rw-r--r--net/ipv4/netfilter/ip_tables.c6
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c5
-rw-r--r--net/ipv4/netfilter/iptable_nat.c14
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c5
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c12
-rw-r--r--net/ipv4/ping.c23
-rw-r--r--net/ipv4/proc.c30
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/route.c110
-rw-r--r--net/ipv4/syncookies.c3
-rw-r--r--net/ipv4/sysctl_net_ipv4.c87
-rw-r--r--net/ipv4/tcp.c24
-rw-r--r--net/ipv4/tcp_bic.c5
-rw-r--r--net/ipv4/tcp_cong.c34
-rw-r--r--net/ipv4/tcp_cubic.c11
-rw-r--r--net/ipv4/tcp_fastopen.c219
-rw-r--r--net/ipv4/tcp_highspeed.c5
-rw-r--r--net/ipv4/tcp_htcp.c4
-rw-r--r--net/ipv4/tcp_hybla.c20
-rw-r--r--net/ipv4/tcp_illinois.c7
-rw-r--r--net/ipv4/tcp_input.c252
-rw-r--r--net/ipv4/tcp_ipv4.c311
-rw-r--r--net/ipv4/tcp_lp.c7
-rw-r--r--net/ipv4/tcp_memcontrol.c33
-rw-r--r--net/ipv4/tcp_metrics.c88
-rw-r--r--net/ipv4/tcp_minisocks.c37
-rw-r--r--net/ipv4/tcp_offload.c11
-rw-r--r--net/ipv4/tcp_output.c219
-rw-r--r--net/ipv4/tcp_probe.c2
-rw-r--r--net/ipv4/tcp_scalable.c6
-rw-r--r--net/ipv4/tcp_timer.c3
-rw-r--r--net/ipv4/tcp_vegas.c9
-rw-r--r--net/ipv4/tcp_veno.c10
-rw-r--r--net/ipv4/tcp_westwood.c1
-rw-r--r--net/ipv4/tcp_yeah.c7
-rw-r--r--net/ipv4/udp.c147
-rw-r--r--net/ipv4/udp_offload.c8
-rw-r--r--net/ipv4/udplite.c1
-rw-r--r--net/ipv4/xfrm4_input.c9
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c70
-rw-r--r--net/ipv4/xfrm4_output.c36
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv4/xfrm4_protocol.c301
-rw-r--r--net/ipv6/Makefile2
-rw-r--r--net/ipv6/addrconf.c49
-rw-r--r--net/ipv6/addrconf_core.c2
-rw-r--r--net/ipv6/addrlabel.c59
-rw-r--r--net/ipv6/af_inet6.c45
-rw-r--r--net/ipv6/ah6.c80
-rw-r--r--net/ipv6/esp6.c26
-rw-r--r--net/ipv6/icmp.c43
-rw-r--r--net/ipv6/inet6_connection_sock.c5
-rw-r--r--net/ipv6/ip6_checksum.c61
-rw-r--r--net/ipv6/ip6_fib.c136
-rw-r--r--net/ipv6/ip6_flowlabel.c7
-rw-r--r--net/ipv6/ip6_gre.c83
-rw-r--r--net/ipv6/ip6_offload.c8
-rw-r--r--net/ipv6/ip6_output.c47
-rw-r--r--net/ipv6/ip6_tunnel.c24
-rw-r--r--net/ipv6/ip6_vti.c319
-rw-r--r--net/ipv6/ip6mr.c4
-rw-r--r--net/ipv6/ipcomp6.c22
-rw-r--r--net/ipv6/ipv6_sockglue.c2
-rw-r--r--net/ipv6/mcast.c24
-rw-r--r--net/ipv6/ndisc.c7
-rw-r--r--net/ipv6/netfilter.c6
-rw-r--r--net/ipv6/netfilter/ip6_tables.c6
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c1
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c14
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c2
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c12
-rw-r--r--net/ipv6/output_core.c31
-rw-r--r--net/ipv6/ping.c12
-rw-r--r--net/ipv6/proc.c14
-rw-r--r--net/ipv6/raw.c11
-rw-r--r--net/ipv6/route.c99
-rw-r--r--net/ipv6/sit.c45
-rw-r--r--net/ipv6/syncookies.c4
-rw-r--r--net/ipv6/sysctl_net_ipv6.c7
-rw-r--r--net/ipv6/tcp_ipv6.c130
-rw-r--r--net/ipv6/tcpv6_offload.c4
-rw-r--r--net/ipv6/udp.c72
-rw-r--r--net/ipv6/udp_offload.c5
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c63
-rw-r--r--net/ipv6/xfrm6_output.c30
-rw-r--r--net/ipv6/xfrm6_policy.c7
-rw-r--r--net/ipv6/xfrm6_protocol.c279
-rw-r--r--net/ipx/af_ipx.c30
-rw-r--r--net/ipx/ipx_route.c3
-rw-r--r--net/iucv/af_iucv.c39
-rw-r--r--net/iucv/iucv.c127
-rw-r--r--net/key/af_key.c75
-rw-r--r--net/l2tp/l2tp_core.c146
-rw-r--r--net/l2tp/l2tp_core.h4
-rw-r--r--net/l2tp/l2tp_ip.c3
-rw-r--r--net/l2tp/l2tp_ip6.c11
-rw-r--r--net/l2tp/l2tp_netlink.c10
-rw-r--r--net/l2tp/l2tp_ppp.c11
-rw-r--r--net/mac80211/Makefile3
-rw-r--r--net/mac80211/aes_ccm.c37
-rw-r--r--net/mac80211/agg-tx.c2
-rw-r--r--net/mac80211/cfg.c832
-rw-r--r--net/mac80211/cfg.h2
-rw-r--r--net/mac80211/chan.c627
-rw-r--r--net/mac80211/debugfs.c2
-rw-r--r--net/mac80211/debugfs.h2
-rw-r--r--net/mac80211/debugfs_netdev.c19
-rw-r--r--net/mac80211/debugfs_netdev.h2
-rw-r--r--net/mac80211/debugfs_sta.c2
-rw-r--r--net/mac80211/driver-ops.h190
-rw-r--r--net/mac80211/ht.c26
-rw-r--r--net/mac80211/ibss.c123
-rw-r--r--net/mac80211/ieee80211_i.h68
-rw-r--r--net/mac80211/iface.c59
-rw-r--r--net/mac80211/key.c7
-rw-r--r--net/mac80211/main.c35
-rw-r--r--net/mac80211/mesh.c132
-rw-r--r--net/mac80211/mesh_hwmp.c5
-rw-r--r--net/mac80211/mesh_pathtbl.c6
-rw-r--r--net/mac80211/mesh_sync.c2
-rw-r--r--net/mac80211/michael.h1
-rw-r--r--net/mac80211/mlme.c235
-rw-r--r--net/mac80211/offchannel.c26
-rw-r--r--net/mac80211/pm.c14
-rw-r--r--net/mac80211/rate.c46
-rw-r--r--net/mac80211/rate.h2
-rw-r--r--net/mac80211/rc80211_minstrel.c14
-rw-r--r--net/mac80211/rc80211_minstrel.h2
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c29
-rw-r--r--net/mac80211/rc80211_pid_algo.c2
-rw-r--r--net/mac80211/rx.c129
-rw-r--r--net/mac80211/scan.c40
-rw-r--r--net/mac80211/sta_info.c7
-rw-r--r--net/mac80211/sta_info.h2
-rw-r--r--net/mac80211/status.c34
-rw-r--r--net/mac80211/tdls.c325
-rw-r--r--net/mac80211/trace.h121
-rw-r--r--net/mac80211/tx.c230
-rw-r--r--net/mac80211/util.c240
-rw-r--r--net/mac80211/vht.c35
-rw-r--r--net/mac80211/wpa.c14
-rw-r--r--net/mac802154/Kconfig4
-rw-r--r--net/mac802154/Makefile5
-rw-r--r--net/mac802154/ieee802154_dev.c85
-rw-r--r--net/mac802154/llsec.c1070
-rw-r--r--net/mac802154/llsec.h108
-rw-r--r--net/mac802154/mac802154.h63
-rw-r--r--net/mac802154/mac_cmd.c47
-rw-r--r--net/mac802154/mib.c212
-rw-r--r--net/mac802154/monitor.c3
-rw-r--r--net/mac802154/rx.c12
-rw-r--r--net/mac802154/wpan.c553
-rw-r--r--net/mpls/mpls_gso.c1
-rw-r--r--net/netfilter/ipset/Kconfig9
-rw-r--r--net/netfilter/ipset/Makefile1
-rw-r--r--net/netfilter/ipset/ip_set_core.c59
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h43
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c321
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c3
-rw-r--r--net/netfilter/ipset/pfxlen.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c17
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c13
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c22
-rw-r--r--net/netfilter/nf_conntrack_core.c435
-rw-r--r--net/netfilter/nf_conntrack_expect.c36
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c4
-rw-r--r--net/netfilter/nf_conntrack_helper.c41
-rw-r--r--net/netfilter/nf_conntrack_netlink.c136
-rw-r--r--net/netfilter/nf_conntrack_pptp.c20
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c3
-rw-r--r--net/netfilter/nf_conntrack_sip.c8
-rw-r--r--net/netfilter/nf_nat_core.c59
-rw-r--r--net/netfilter/nf_tables_api.c1441
-rw-r--r--net/netfilter/nf_tables_core.c54
-rw-r--r--net/netfilter/nfnetlink.c29
-rw-r--r--net/netfilter/nfnetlink_acct.c86
-rw-r--r--net/netfilter/nfnetlink_log.c8
-rw-r--r--net/netfilter/nft_cmp.c2
-rw-r--r--net/netfilter/nft_compat.c22
-rw-r--r--net/netfilter/nft_ct.c132
-rw-r--r--net/netfilter/nft_hash.c292
-rw-r--r--net/netfilter/nft_immediate.c3
-rw-r--r--net/netfilter/nft_log.c3
-rw-r--r--net/netfilter/nft_lookup.c15
-rw-r--r--net/netfilter/nft_meta.c103
-rw-r--r--net/netfilter/nft_nat.c36
-rw-r--r--net/netfilter/nft_rbtree.c43
-rw-r--r--net/netfilter/xt_AUDIT.c4
-rw-r--r--net/netfilter/xt_bpf.c5
-rw-r--r--net/netfilter/xt_cgroup.c3
-rw-r--r--net/netfilter/xt_connlimit.c318
-rw-r--r--net/netfilter/xt_ipcomp.c2
-rw-r--r--net/netfilter/xt_nfacct.c5
-rw-r--r--net/netfilter/xt_osf.c2
-rw-r--r--net/netfilter/xt_recent.c5
-rw-r--r--net/netfilter/xt_repldata.h22
-rw-r--r--net/netlink/af_netlink.c187
-rw-r--r--net/netlink/af_netlink.h7
-rw-r--r--net/netlink/genetlink.c8
-rw-r--r--net/netrom/af_netrom.c2
-rw-r--r--net/nfc/core.c10
-rw-r--r--net/nfc/digital.h7
-rw-r--r--net/nfc/digital_core.c89
-rw-r--r--net/nfc/digital_dep.c5
-rw-r--r--net/nfc/digital_technology.c477
-rw-r--r--net/nfc/hci/command.c6
-rw-r--r--net/nfc/hci/core.c47
-rw-r--r--net/nfc/hci/llc.c4
-rw-r--r--net/nfc/llcp_commands.c2
-rw-r--r--net/nfc/llcp_core.c31
-rw-r--r--net/nfc/nci/core.c12
-rw-r--r--net/nfc/nci/ntf.c7
-rw-r--r--net/nfc/nci/spi.c3
-rw-r--r--net/nfc/netlink.c8
-rw-r--r--net/nfc/nfc.h6
-rw-r--r--net/nfc/rawsock.c94
-rw-r--r--net/openvswitch/actions.c6
-rw-r--r--net/openvswitch/datapath.c809
-rw-r--r--net/openvswitch/datapath.h10
-rw-r--r--net/openvswitch/flow.c190
-rw-r--r--net/openvswitch/flow.h56
-rw-r--r--net/openvswitch/flow_netlink.c186
-rw-r--r--net/openvswitch/flow_netlink.h1
-rw-r--r--net/openvswitch/flow_table.c137
-rw-r--r--net/openvswitch/flow_table.h7
-rw-r--r--net/openvswitch/vport-gre.c23
-rw-r--r--net/openvswitch/vport-internal_dev.c2
-rw-r--r--net/openvswitch/vport-vxlan.c7
-rw-r--r--net/openvswitch/vport.c14
-rw-r--r--net/openvswitch/vport.h6
-rw-r--r--net/packet/af_packet.c66
-rw-r--r--net/packet/diag.c7
-rw-r--r--net/phonet/pep-gprs.c4
-rw-r--r--net/phonet/pep.c8
-rw-r--r--net/phonet/pn_netlink.c8
-rw-r--r--net/rds/ib_recv.c4
-rw-r--r--net/rds/ib_send.c4
-rw-r--r--net/rds/iw.c3
-rw-r--r--net/rds/iw_recv.c4
-rw-r--r--net/rds/iw_send.c4
-rw-r--r--net/rds/iw_sysctl.c3
-rw-r--r--net/rds/rdma_transport.c2
-rw-r--r--net/rds/send.c6
-rw-r--r--net/rds/sysctl.c3
-rw-r--r--net/rds/tcp.h4
-rw-r--r--net/rds/tcp_listen.c8
-rw-r--r--net/rds/tcp_recv.c8
-rw-r--r--net/rds/tcp_send.c2
-rw-r--r--net/rfkill/core.c9
-rw-r--r--net/rfkill/rfkill-gpio.c59
-rw-r--r--net/rose/af_rose.c2
-rw-r--r--net/rxrpc/Makefile5
-rw-r--r--net/rxrpc/af_rxrpc.c9
-rw-r--r--net/rxrpc/ar-ack.c61
-rw-r--r--net/rxrpc/ar-call.c213
-rw-r--r--net/rxrpc/ar-connection.c10
-rw-r--r--net/rxrpc/ar-error.c1
-rw-r--r--net/rxrpc/ar-input.c196
-rw-r--r--net/rxrpc/ar-internal.h42
-rw-r--r--net/rxrpc/ar-key.c2
-rw-r--r--net/rxrpc/ar-output.c15
-rw-r--r--net/rxrpc/ar-recvmsg.c25
-rw-r--r--net/rxrpc/ar-skbuff.c7
-rw-r--r--net/rxrpc/ar-transport.c10
-rw-r--r--net/rxrpc/sysctl.c146
-rw-r--r--net/sched/act_api.c144
-rw-r--r--net/sched/act_csum.c31
-rw-r--r--net/sched/act_gact.c34
-rw-r--r--net/sched/act_ipt.c68
-rw-r--r--net/sched/act_mirred.c56
-rw-r--r--net/sched/act_nat.c33
-rw-r--r--net/sched/act_pedit.c45
-rw-r--r--net/sched/act_police.c22
-rw-r--r--net/sched/act_simple.c64
-rw-r--r--net/sched/act_skbedit.c36
-rw-r--r--net/sched/cls_api.c26
-rw-r--r--net/sched/cls_basic.c10
-rw-r--r--net/sched/cls_bpf.c14
-rw-r--r--net/sched/cls_cgroup.c4
-rw-r--r--net/sched/cls_flow.c4
-rw-r--r--net/sched/cls_fw.c43
-rw-r--r--net/sched/cls_route.c11
-rw-r--r--net/sched/cls_rsvp.h4
-rw-r--r--net/sched/cls_tcindex.c38
-rw-r--r--net/sched/cls_u32.c29
-rw-r--r--net/sched/sch_api.c18
-rw-r--r--net/sched/sch_atm.c3
-rw-r--r--net/sched/sch_cbq.c6
-rw-r--r--net/sched/sch_choke.c7
-rw-r--r--net/sched/sch_drr.c4
-rw-r--r--net/sched/sch_fq.c8
-rw-r--r--net/sched/sch_fq_codel.c10
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/sched/sch_hfsc.c3
-rw-r--r--net/sched/sch_hhf.c23
-rw-r--r--net/sched/sch_htb.c20
-rw-r--r--net/sched/sch_ingress.c3
-rw-r--r--net/sched/sch_netem.c86
-rw-r--r--net/sched/sch_sfq.c7
-rw-r--r--net/sched/sch_tbf.c26
-rw-r--r--net/sctp/associola.c256
-rw-r--r--net/sctp/auth.c17
-rw-r--r--net/sctp/endpointola.c5
-rw-r--r--net/sctp/ipv6.c4
-rw-r--r--net/sctp/output.c2
-rw-r--r--net/sctp/proc.c2
-rw-r--r--net/sctp/protocol.c20
-rw-r--r--net/sctp/sm_make_chunk.c34
-rw-r--r--net/sctp/sm_sideeffect.c7
-rw-r--r--net/sctp/sm_statefuns.c8
-rw-r--r--net/sctp/socket.c117
-rw-r--r--net/sctp/sysctl.c129
-rw-r--r--net/sctp/transport.c3
-rw-r--r--net/sctp/ulpevent.c130
-rw-r--r--net/sctp/ulpqueue.c8
-rw-r--r--net/socket.c22
-rw-r--r--net/sunrpc/Kconfig39
-rw-r--r--net/sunrpc/Makefile3
-rw-r--r--net/sunrpc/auth.c3
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c2
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c4
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c2
-rw-r--r--net/sunrpc/backchannel_rqst.c97
-rw-r--r--net/sunrpc/cache.c2
-rw-r--r--net/sunrpc/clnt.c81
-rw-r--r--net/sunrpc/sched.c8
-rw-r--r--net/sunrpc/socklib.c3
-rw-r--r--net/sunrpc/sunrpc.h13
-rw-r--r--net/sunrpc/svc_xprt.c5
-rw-r--r--net/sunrpc/svcauth.c2
-rw-r--r--net/sunrpc/svcsock.c45
-rw-r--r--net/sunrpc/xdr.c196
-rw-r--r--net/sunrpc/xprt.c44
-rw-r--r--net/sunrpc/xprtrdma/Makefile4
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c123
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c653
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c232
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c72
-rw-r--r--net/sunrpc/xprtrdma/transport.c100
-rw-r--r--net/sunrpc/xprtrdma/verbs.c753
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h17
-rw-r--r--net/sunrpc/xprtsock.c89
-rw-r--r--net/tipc/Makefile2
-rw-r--r--net/tipc/addr.h2
-rw-r--r--net/tipc/bcast.c224
-rw-r--r--net/tipc/bcast.h13
-rw-r--r--net/tipc/bearer.c217
-rw-r--r--net/tipc/bearer.h58
-rw-r--r--net/tipc/config.c117
-rw-r--r--net/tipc/config.h5
-rw-r--r--net/tipc/core.c25
-rw-r--r--net/tipc/core.h11
-rw-r--r--net/tipc/discover.c286
-rw-r--r--net/tipc/discover.h6
-rw-r--r--net/tipc/eth_media.c51
-rw-r--r--net/tipc/handler.c134
-rw-r--r--net/tipc/ib_media.c34
-rw-r--r--net/tipc/link.c772
-rw-r--r--net/tipc/link.h70
-rw-r--r--net/tipc/msg.c60
-rw-r--r--net/tipc/msg.h5
-rw-r--r--net/tipc/name_distr.c94
-rw-r--r--net/tipc/name_distr.h37
-rw-r--r--net/tipc/name_table.c14
-rw-r--r--net/tipc/net.c89
-rw-r--r--net/tipc/net.h4
-rw-r--r--net/tipc/netlink.c2
-rw-r--r--net/tipc/node.c229
-rw-r--r--net/tipc/node.h94
-rw-r--r--net/tipc/node_subscr.c9
-rw-r--r--net/tipc/node_subscr.h2
-rw-r--r--net/tipc/port.c326
-rw-r--r--net/tipc/port.h130
-rw-r--r--net/tipc/ref.c27
-rw-r--r--net/tipc/ref.h1
-rw-r--r--net/tipc/server.c4
-rw-r--r--net/tipc/socket.c536
-rw-r--r--net/tipc/socket.h74
-rw-r--r--net/unix/af_unix.c16
-rw-r--r--net/vmw_vsock/af_vsock.c47
-rw-r--r--net/vmw_vsock/vmci_transport_notify.c2
-rw-r--r--net/vmw_vsock/vmci_transport_notify_qstate.c4
-rw-r--r--net/wireless/Kconfig37
-rw-r--r--net/wireless/ap.c12
-rw-r--r--net/wireless/chan.c254
-rw-r--r--net/wireless/core.c149
-rw-r--r--net/wireless/core.h72
-rw-r--r--net/wireless/ethtool.c10
-rw-r--r--net/wireless/genregdb.awk22
-rw-r--r--net/wireless/ibss.c71
-rw-r--r--net/wireless/mesh.c32
-rw-r--r--net/wireless/mlme.c42
-rw-r--r--net/wireless/nl80211.c909
-rw-r--r--net/wireless/nl80211.h5
-rw-r--r--net/wireless/rdev-ops.h24
-rw-r--r--net/wireless/reg.c421
-rw-r--r--net/wireless/reg.h21
-rw-r--r--net/wireless/scan.c197
-rw-r--r--net/wireless/sme.c51
-rw-r--r--net/wireless/trace.h104
-rw-r--r--net/wireless/util.c266
-rw-r--r--net/wireless/wext-compat.c40
-rw-r--r--net/wireless/wext-compat.h2
-rw-r--r--net/wireless/wext-sme.c14
-rw-r--r--net/x25/af_x25.c2
-rw-r--r--net/x25/x25_in.c2
-rw-r--r--net/xfrm/xfrm_input.c97
-rw-r--r--net/xfrm/xfrm_output.c5
-rw-r--r--net/xfrm/xfrm_policy.c98
-rw-r--r--net/xfrm/xfrm_proc.c3
-rw-r--r--net/xfrm/xfrm_state.c109
-rw-r--r--net/xfrm/xfrm_user.c135
623 files changed, 32866 insertions, 16320 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 175273f38cb..44ebd5c2cd4 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -169,6 +169,7 @@ int register_vlan_dev(struct net_device *dev)
if (err < 0)
goto out_uninit_mvrp;
+ vlan->nest_level = dev_get_nest_level(real_dev, is_vlan_dev) + 1;
err = register_netdevice(dev);
if (err < 0)
goto out_uninit_mvrp;
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 5704ed9c3a2..9d010a09ab9 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -38,9 +38,9 @@ struct vlan_info {
static inline unsigned int vlan_proto_idx(__be16 proto)
{
switch (proto) {
- case __constant_htons(ETH_P_8021Q):
+ case htons(ETH_P_8021Q):
return VLAN_PROTO_8021Q;
- case __constant_htons(ETH_P_8021AD):
+ case htons(ETH_P_8021AD):
return VLAN_PROTO_8021AD;
default:
BUG();
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 6ee48aac776..75d42776399 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -22,11 +22,11 @@ bool vlan_do_receive(struct sk_buff **skbp)
return false;
skb->dev = vlan_dev;
- if (skb->pkt_type == PACKET_OTHERHOST) {
+ if (unlikely(skb->pkt_type == PACKET_OTHERHOST)) {
/* Our lower layer thinks this is not local, let's make sure.
* This allows the VLAN to have a different MAC than the
* underlying device, and still route correctly. */
- if (ether_addr_equal(eth_hdr(skb)->h_dest, vlan_dev->dev_addr))
+ if (ether_addr_equal_64bits(eth_hdr(skb)->h_dest, vlan_dev->dev_addr))
skb->pkt_type = PACKET_HOST;
}
@@ -63,7 +63,7 @@ bool vlan_do_receive(struct sk_buff **skbp)
}
/* Must be invoked with rcu_read_lock. */
-struct net_device *__vlan_find_dev_deep(struct net_device *dev,
+struct net_device *__vlan_find_dev_deep_rcu(struct net_device *dev,
__be16 vlan_proto, u16 vlan_id)
{
struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info);
@@ -81,13 +81,13 @@ struct net_device *__vlan_find_dev_deep(struct net_device *dev,
upper_dev = netdev_master_upper_dev_get_rcu(dev);
if (upper_dev)
- return __vlan_find_dev_deep(upper_dev,
+ return __vlan_find_dev_deep_rcu(upper_dev,
vlan_proto, vlan_id);
}
return NULL;
}
-EXPORT_SYMBOL(__vlan_find_dev_deep);
+EXPORT_SYMBOL(__vlan_find_dev_deep_rcu);
struct net_device *vlan_dev_real_dev(const struct net_device *dev)
{
@@ -106,10 +106,19 @@ u16 vlan_dev_vlan_id(const struct net_device *dev)
}
EXPORT_SYMBOL(vlan_dev_vlan_id);
+__be16 vlan_dev_vlan_proto(const struct net_device *dev)
+{
+ return vlan_dev_priv(dev)->vlan_proto;
+}
+EXPORT_SYMBOL(vlan_dev_vlan_proto);
+
static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
{
- if (skb_cow(skb, skb_headroom(skb)) < 0)
+ if (skb_cow(skb, skb_headroom(skb)) < 0) {
+ kfree_skb(skb);
return NULL;
+ }
+
memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
skb->mac_header += VLAN_HLEN;
return skb;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 27bfe2f8e2d..dd11f612e03 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -524,6 +524,11 @@ static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass)
netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, &subclass);
}
+static int vlan_dev_get_lock_subclass(struct net_device *dev)
+{
+ return vlan_dev_priv(dev)->nest_level;
+}
+
static const struct header_ops vlan_header_ops = {
.create = vlan_dev_hard_header,
.rebuild = vlan_dev_rebuild_header,
@@ -559,7 +564,6 @@ static const struct net_device_ops vlan_netdev_ops;
static int vlan_dev_init(struct net_device *dev)
{
struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
- int subclass = 0, i;
netif_carrier_off(dev);
@@ -608,22 +612,12 @@ static int vlan_dev_init(struct net_device *dev)
SET_NETDEV_DEVTYPE(dev, &vlan_type);
- if (is_vlan_dev(real_dev))
- subclass = 1;
+ vlan_dev_set_lockdep_class(dev, vlan_dev_get_lock_subclass(dev));
- vlan_dev_set_lockdep_class(dev, subclass);
-
- vlan_dev_priv(dev)->vlan_pcpu_stats = alloc_percpu(struct vlan_pcpu_stats);
+ vlan_dev_priv(dev)->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats);
if (!vlan_dev_priv(dev)->vlan_pcpu_stats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct vlan_pcpu_stats *vlan_stat;
- vlan_stat = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i);
- u64_stats_init(&vlan_stat->syncp);
- }
-
-
return 0;
}
@@ -633,8 +627,6 @@ static void vlan_dev_uninit(struct net_device *dev)
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
int i;
- free_percpu(vlan->vlan_pcpu_stats);
- vlan->vlan_pcpu_stats = NULL;
for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) {
while ((pm = vlan->egress_priority_map[i]) != NULL) {
vlan->egress_priority_map[i] = pm->next;
@@ -649,9 +641,9 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
netdev_features_t old_features = features;
- features &= real_dev->vlan_features;
+ features = netdev_intersect_features(features, real_dev->vlan_features);
features |= NETIF_F_RXCSUM;
- features &= real_dev->features;
+ features = netdev_intersect_features(features, real_dev->features);
features |= old_features & NETIF_F_SOFT_FEATURES;
features |= NETIF_F_LLTX;
@@ -677,38 +669,36 @@ static void vlan_ethtool_get_drvinfo(struct net_device *dev,
static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
+ struct vlan_pcpu_stats *p;
+ u32 rx_errors = 0, tx_dropped = 0;
+ int i;
- if (vlan_dev_priv(dev)->vlan_pcpu_stats) {
- struct vlan_pcpu_stats *p;
- u32 rx_errors = 0, tx_dropped = 0;
- int i;
-
- for_each_possible_cpu(i) {
- u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes;
- unsigned int start;
-
- p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i);
- do {
- start = u64_stats_fetch_begin_bh(&p->syncp);
- rxpackets = p->rx_packets;
- rxbytes = p->rx_bytes;
- rxmulticast = p->rx_multicast;
- txpackets = p->tx_packets;
- txbytes = p->tx_bytes;
- } while (u64_stats_fetch_retry_bh(&p->syncp, start));
-
- stats->rx_packets += rxpackets;
- stats->rx_bytes += rxbytes;
- stats->multicast += rxmulticast;
- stats->tx_packets += txpackets;
- stats->tx_bytes += txbytes;
- /* rx_errors & tx_dropped are u32 */
- rx_errors += p->rx_errors;
- tx_dropped += p->tx_dropped;
- }
- stats->rx_errors = rx_errors;
- stats->tx_dropped = tx_dropped;
+ for_each_possible_cpu(i) {
+ u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes;
+ unsigned int start;
+
+ p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i);
+ do {
+ start = u64_stats_fetch_begin_irq(&p->syncp);
+ rxpackets = p->rx_packets;
+ rxbytes = p->rx_bytes;
+ rxmulticast = p->rx_multicast;
+ txpackets = p->tx_packets;
+ txbytes = p->tx_bytes;
+ } while (u64_stats_fetch_retry_irq(&p->syncp, start));
+
+ stats->rx_packets += rxpackets;
+ stats->rx_bytes += rxbytes;
+ stats->multicast += rxmulticast;
+ stats->tx_packets += txpackets;
+ stats->tx_bytes += txbytes;
+ /* rx_errors & tx_dropped are u32 */
+ rx_errors += p->rx_errors;
+ tx_dropped += p->tx_dropped;
}
+ stats->rx_errors = rx_errors;
+ stats->tx_dropped = tx_dropped;
+
return stats;
}
@@ -718,20 +708,19 @@ static void vlan_dev_poll_controller(struct net_device *dev)
return;
}
-static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo,
- gfp_t gfp)
+static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo)
{
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct net_device *real_dev = vlan->real_dev;
struct netpoll *netpoll;
int err = 0;
- netpoll = kzalloc(sizeof(*netpoll), gfp);
+ netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL);
err = -ENOMEM;
if (!netpoll)
goto out;
- err = __netpoll_setup(netpoll, real_dev, gfp);
+ err = __netpoll_setup(netpoll, real_dev);
if (err) {
kfree(netpoll);
goto out;
@@ -791,8 +780,18 @@ static const struct net_device_ops vlan_netdev_ops = {
.ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup,
#endif
.ndo_fix_features = vlan_dev_fix_features,
+ .ndo_get_lock_subclass = vlan_dev_get_lock_subclass,
};
+static void vlan_dev_free(struct net_device *dev)
+{
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+
+ free_percpu(vlan->vlan_pcpu_stats);
+ vlan->vlan_pcpu_stats = NULL;
+ free_netdev(dev);
+}
+
void vlan_setup(struct net_device *dev)
{
ether_setup(dev);
@@ -802,7 +801,7 @@ void vlan_setup(struct net_device *dev)
dev->tx_queue_len = 0;
dev->netdev_ops = &vlan_netdev_ops;
- dev->destructor = free_netdev;
+ dev->destructor = vlan_dev_free;
dev->ethtool_ops = &vlan_ethtool_ops;
memset(dev->broadcast, 0, ETH_ALEN);
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index c7e634af851..8ac8a5cc214 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -56,8 +56,8 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
if (data[IFLA_VLAN_PROTOCOL]) {
switch (nla_get_be16(data[IFLA_VLAN_PROTOCOL])) {
- case __constant_htons(ETH_P_8021Q):
- case __constant_htons(ETH_P_8021AD):
+ case htons(ETH_P_8021Q):
+ case htons(ETH_P_8021AD):
break;
default:
return -EPROTONOSUPPORT;
diff --git a/net/9p/client.c b/net/9p/client.c
index 9186550d77a..0004cbaac4a 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -415,9 +415,17 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
* req: request received
*
*/
-void p9_client_cb(struct p9_client *c, struct p9_req_t *req)
+void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
{
p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc->tag);
+
+ /*
+ * This barrier is needed to make sure any change made to req before
+ * the other thread wakes up will indeed be seen by the waiting side.
+ */
+ smp_wmb();
+ req->status = status;
+
wake_up(req->wq);
p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
}
@@ -655,16 +663,13 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
if (IS_ERR(req))
return PTR_ERR(req);
-
/*
* if we haven't received a response for oldreq,
* remove it from the list
*/
- if (oldreq->status == REQ_STATUS_FLSH) {
- spin_lock(&c->lock);
- list_del(&oldreq->req_list);
- spin_unlock(&c->lock);
- }
+ if (oldreq->status == REQ_STATUS_SENT)
+ if (c->trans_mod->cancelled)
+ c->trans_mod->cancelled(c, oldreq);
p9_free_req(c, req);
return 0;
@@ -751,6 +756,12 @@ again:
err = wait_event_interruptible(*req->wq,
req->status >= REQ_STATUS_RCVD);
+ /*
+ * Make sure our req is coherent with regard to updates in other
+ * threads - echoes to wmb() in the callback
+ */
+ smp_rmb();
+
if ((err == -ERESTARTSYS) && (c->status == Connected)
&& (type == P9_TFLUSH)) {
sigpending = 1;
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index b7bd7f2961b..80d08f6664c 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -66,20 +66,6 @@ struct p9_fd_opts {
int privport;
};
-/**
- * struct p9_trans_fd - transport state
- * @rd: reference to file to read from
- * @wr: reference of file to write to
- * @conn: connection state reference
- *
- */
-
-struct p9_trans_fd {
- struct file *rd;
- struct file *wr;
- struct p9_conn *conn;
-};
-
/*
* Option Parsing (code inspired by NFS code)
* - a little lazy - parse all fd-transport options
@@ -159,6 +145,20 @@ struct p9_conn {
unsigned long wsched;
};
+/**
+ * struct p9_trans_fd - transport state
+ * @rd: reference to file to read from
+ * @wr: reference of file to write to
+ * @conn: connection state reference
+ *
+ */
+
+struct p9_trans_fd {
+ struct file *rd;
+ struct file *wr;
+ struct p9_conn conn;
+};
+
static void p9_poll_workfn(struct work_struct *work);
static DEFINE_SPINLOCK(p9_poll_lock);
@@ -212,15 +212,9 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
m->err = err;
list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
- req->status = REQ_STATUS_ERROR;
- if (!req->t_err)
- req->t_err = err;
list_move(&req->req_list, &cancel_list);
}
list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
- req->status = REQ_STATUS_ERROR;
- if (!req->t_err)
- req->t_err = err;
list_move(&req->req_list, &cancel_list);
}
spin_unlock_irqrestore(&m->client->lock, flags);
@@ -228,7 +222,9 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req);
list_del(&req->req_list);
- p9_client_cb(m->client, req);
+ if (!req->t_err)
+ req->t_err = err;
+ p9_client_cb(m->client, req, REQ_STATUS_ERROR);
}
}
@@ -302,6 +298,7 @@ static void p9_read_work(struct work_struct *work)
{
int n, err;
struct p9_conn *m;
+ int status = REQ_STATUS_ERROR;
m = container_of(work, struct p9_conn, rq);
@@ -348,8 +345,7 @@ static void p9_read_work(struct work_struct *work)
"mux %p pkt: size: %d bytes tag: %d\n", m, n, tag);
m->req = p9_tag_lookup(m->client, tag);
- if (!m->req || (m->req->status != REQ_STATUS_SENT &&
- m->req->status != REQ_STATUS_FLSH)) {
+ if (!m->req || (m->req->status != REQ_STATUS_SENT)) {
p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
tag);
err = -EIO;
@@ -375,10 +371,10 @@ static void p9_read_work(struct work_struct *work)
p9_debug(P9_DEBUG_TRANS, "got new packet\n");
spin_lock(&m->client->lock);
if (m->req->status != REQ_STATUS_ERROR)
- m->req->status = REQ_STATUS_RCVD;
+ status = REQ_STATUS_RCVD;
list_del(&m->req->req_list);
spin_unlock(&m->client->lock);
- p9_client_cb(m->client, m->req);
+ p9_client_cb(m->client, m->req, status);
m->rbuf = NULL;
m->rpos = 0;
m->rsize = 0;
@@ -573,21 +569,19 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
}
/**
- * p9_conn_create - allocate and initialize the per-session mux data
+ * p9_conn_create - initialize the per-session mux data
* @client: client instance
*
* Note: Creates the polling task if this is the first session.
*/
-static struct p9_conn *p9_conn_create(struct p9_client *client)
+static void p9_conn_create(struct p9_client *client)
{
int n;
- struct p9_conn *m;
+ struct p9_trans_fd *ts = client->trans;
+ struct p9_conn *m = &ts->conn;
p9_debug(P9_DEBUG_TRANS, "client %p msize %d\n", client, client->msize);
- m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL);
- if (!m)
- return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&m->mux_list);
m->client = client;
@@ -609,8 +603,6 @@ static struct p9_conn *p9_conn_create(struct p9_client *client)
p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);
set_bit(Wpending, &m->wsched);
}
-
- return m;
}
/**
@@ -669,7 +661,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
{
int n;
struct p9_trans_fd *ts = client->trans;
- struct p9_conn *m = ts->conn;
+ struct p9_conn *m = &ts->conn;
p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n",
m, current, req->tc, req->tc->id);
@@ -704,14 +696,26 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
list_del(&req->req_list);
req->status = REQ_STATUS_FLSHD;
ret = 0;
- } else if (req->status == REQ_STATUS_SENT)
- req->status = REQ_STATUS_FLSH;
-
+ }
spin_unlock(&client->lock);
return ret;
}
+static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
+{
+ p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
+
+ /* we haven't received a response for oldreq,
+ * remove it from the list.
+ */
+ spin_lock(&client->lock);
+ list_del(&req->req_list);
+ spin_unlock(&client->lock);
+
+ return 0;
+}
+
/**
* parse_opts - parse mount options into p9_fd_opts structure
* @params: options string passed from mount
@@ -780,7 +784,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
{
- struct p9_trans_fd *ts = kmalloc(sizeof(struct p9_trans_fd),
+ struct p9_trans_fd *ts = kzalloc(sizeof(struct p9_trans_fd),
GFP_KERNEL);
if (!ts)
return -ENOMEM;
@@ -806,9 +810,8 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket)
{
struct p9_trans_fd *p;
struct file *file;
- int ret;
- p = kmalloc(sizeof(struct p9_trans_fd), GFP_KERNEL);
+ p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL);
if (!p)
return -ENOMEM;
@@ -829,20 +832,12 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket)
p->rd->f_flags |= O_NONBLOCK;
- p->conn = p9_conn_create(client);
- if (IS_ERR(p->conn)) {
- ret = PTR_ERR(p->conn);
- p->conn = NULL;
- kfree(p);
- sockfd_put(csocket);
- sockfd_put(csocket);
- return ret;
- }
+ p9_conn_create(client);
return 0;
}
/**
- * p9_mux_destroy - cancels all pending requests and frees mux resources
+ * p9_mux_destroy - cancels all pending requests of mux
* @m: mux to destroy
*
*/
@@ -859,7 +854,6 @@ static void p9_conn_destroy(struct p9_conn *m)
p9_conn_cancel(m, -ECONNRESET);
m->client = NULL;
- kfree(m);
}
/**
@@ -881,7 +875,7 @@ static void p9_fd_close(struct p9_client *client)
client->status = Disconnected;
- p9_conn_destroy(ts->conn);
+ p9_conn_destroy(&ts->conn);
if (ts->rd)
fput(ts->rd);
@@ -1033,14 +1027,7 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args)
return err;
p = (struct p9_trans_fd *) client->trans;
- p->conn = p9_conn_create(client);
- if (IS_ERR(p->conn)) {
- err = PTR_ERR(p->conn);
- p->conn = NULL;
- fput(p->rd);
- fput(p->wr);
- return err;
- }
+ p9_conn_create(client);
return 0;
}
@@ -1053,6 +1040,7 @@ static struct p9_trans_module p9_tcp_trans = {
.close = p9_fd_close,
.request = p9_fd_request,
.cancel = p9_fd_cancel,
+ .cancelled = p9_fd_cancelled,
.owner = THIS_MODULE,
};
@@ -1064,6 +1052,7 @@ static struct p9_trans_module p9_unix_trans = {
.close = p9_fd_close,
.request = p9_fd_request,
.cancel = p9_fd_cancel,
+ .cancelled = p9_fd_cancelled,
.owner = THIS_MODULE,
};
@@ -1075,6 +1064,7 @@ static struct p9_trans_module p9_fd_trans = {
.close = p9_fd_close,
.request = p9_fd_request,
.cancel = p9_fd_cancel,
+ .cancelled = p9_fd_cancelled,
.owner = THIS_MODULE,
};
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 8f68df5d297..14ad43b5cf8 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -193,6 +193,8 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts)
if (!*p)
continue;
token = match_token(p, tokens, args);
+ if (token == Opt_err)
+ continue;
r = match_int(&args[0], &option);
if (r < 0) {
p9_debug(P9_DEBUG_ERROR,
@@ -305,8 +307,7 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
}
req->rc = c->rc;
- req->status = REQ_STATUS_RCVD;
- p9_client_cb(client, req);
+ p9_client_cb(client, req, REQ_STATUS_RCVD);
return;
@@ -511,6 +512,11 @@ dont_need_post_recv:
goto send_error;
}
+ /* Mark request as `sent' *before* we actually send it,
+ * because doing if after could erase the REQ_STATUS_RCVD
+ * status in case of a very fast reply.
+ */
+ req->status = REQ_STATUS_SENT;
err = ib_post_send(rdma->qp, &wr, &bad_wr);
if (err)
goto send_error;
@@ -520,6 +526,7 @@ dont_need_post_recv:
/* Handle errors that happened during or while preparing the send: */
send_error:
+ req->status = REQ_STATUS_ERROR;
kfree(c);
p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);
@@ -582,12 +589,24 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
return rdma;
}
-/* its not clear to me we can do anything after send has been posted */
static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
{
+ /* Nothing to do here.
+ * We will take care of it (if we have to) in rdma_cancelled()
+ */
return 1;
}
+/* A request has been fully flushed without a reply.
+ * That means we have posted one buffer in excess.
+ */
+static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req)
+{
+ struct p9_trans_rdma *rdma = client->trans;
+ atomic_inc(&rdma->excess_rc);
+ return 0;
+}
+
/**
* trans_create_rdma - Transport method for creating atransport instance
* @client: client instance
@@ -721,6 +740,7 @@ static struct p9_trans_module p9_rdma_trans = {
.close = rdma_close,
.request = rdma_request,
.cancel = rdma_cancel,
+ .cancelled = rdma_cancelled,
};
/**
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index ac2666c1d01..6940d8fe897 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -164,8 +164,7 @@ static void req_done(struct virtqueue *vq)
p9_debug(P9_DEBUG_TRANS, ": rc %p\n", rc);
p9_debug(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
req = p9_tag_lookup(chan->client, rc->tag);
- req->status = REQ_STATUS_RCVD;
- p9_client_cb(chan->client, req);
+ p9_client_cb(chan->client, req, REQ_STATUS_RCVD);
}
}
diff --git a/net/Kconfig b/net/Kconfig
index e411046a62e..d92afe4204d 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -89,8 +89,12 @@ config NETWORK_SECMARK
to nfmark, but designated for security purposes.
If you are unsure how to answer this question, answer N.
+config NET_PTP_CLASSIFY
+ def_bool n
+
config NETWORK_PHY_TIMESTAMPING
bool "Timestamping in PHY devices"
+ select NET_PTP_CLASSIFY
help
This allows timestamping of network packets by PHYs with
hardware timestamping capabilities. This option adds some
@@ -239,7 +243,7 @@ config XPS
default y
config CGROUP_NET_PRIO
- tristate "Network priority cgroup"
+ bool "Network priority cgroup"
depends on CGROUPS
---help---
Cgroup subsystem for use in assigning processes to network priorities on
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index d27b86dfb0e..d1c55d8dd0a 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -926,7 +926,7 @@ static struct aarp_entry *iter_next(struct aarp_iter_state *iter, loff_t *pos)
struct aarp_entry *entry;
rescan:
- while(ct < AARP_HASH_SIZE) {
+ while (ct < AARP_HASH_SIZE) {
for (entry = table[ct]; entry; entry = entry->next) {
if (!pos || ++off == *pos) {
iter->table = table;
@@ -995,7 +995,7 @@ static const char *dt2str(unsigned long ticks)
{
static char buf[32];
- sprintf(buf, "%ld.%02ld", ticks / HZ, ((ticks % HZ) * 100 ) / HZ);
+ sprintf(buf, "%ld.%02ld", ticks / HZ, ((ticks % HZ) * 100) / HZ);
return buf;
}
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 02806c6b2ff..bfcf6be1d66 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -293,7 +293,7 @@ static int atif_probe_device(struct atalk_iface *atif)
/* Perform AARP probing for a proxy address */
static int atif_proxy_probe_device(struct atalk_iface *atif,
- struct atalk_addr* proxy_addr)
+ struct atalk_addr *proxy_addr)
{
int netrange = ntohs(atif->nets.nr_lastnet) -
ntohs(atif->nets.nr_firstnet) + 1;
@@ -581,7 +581,7 @@ out:
}
/* Delete a route. Find it and discard it */
-static int atrtr_delete(struct atalk_addr * addr)
+static int atrtr_delete(struct atalk_addr *addr)
{
struct atalk_route **r = &atalk_routes;
int retval = 0;
@@ -936,11 +936,11 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset,
int i, copy;
/* checksum stuff in header space */
- if ( (copy = start - offset) > 0) {
+ if ((copy = start - offset) > 0) {
if (copy > len)
copy = len;
sum = atalk_sum_partial(skb->data + offset, copy, sum);
- if ( (len -= copy) == 0)
+ if ((len -= copy) == 0)
return sum;
offset += copy;
@@ -1151,7 +1151,7 @@ static int atalk_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
at->src_net = addr->sat_addr.s_net = ap->s_net;
- at->src_node = addr->sat_addr.s_node= ap->s_node;
+ at->src_node = addr->sat_addr.s_node = ap->s_node;
} else {
err = -EADDRNOTAVAIL;
if (!atalk_find_interface(addr->sat_addr.s_net,
@@ -1489,8 +1489,6 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
goto drop;
/* Queue packet (standard) */
- skb->sk = sock;
-
if (sock_queue_rcv_skb(sock, skb) < 0)
goto drop;
@@ -1644,7 +1642,6 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
if (!skb)
goto out;
- skb->sk = sk;
skb_reserve(skb, ddp_dl->header_length);
skb_reserve(skb, dev->hard_header_len);
skb->dev = dev;
@@ -1669,7 +1666,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
goto out;
}
- if (sk->sk_no_check == 1)
+ if (sk->sk_no_check_tx)
ddp->deh_sum = 0;
else
ddp->deh_sum = atalk_checksum(skb, len + sizeof(*ddp));
@@ -1790,53 +1787,53 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
void __user *argp = (void __user *)arg;
switch (cmd) {
- /* Protocol layer */
- case TIOCOUTQ: {
- long amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
+ /* Protocol layer */
+ case TIOCOUTQ: {
+ long amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
- if (amount < 0)
- amount = 0;
- rc = put_user(amount, (int __user *)argp);
- break;
- }
- case TIOCINQ: {
- /*
- * These two are safe on a single CPU system as only
- * user tasks fiddle here
- */
- struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
- long amount = 0;
+ if (amount < 0)
+ amount = 0;
+ rc = put_user(amount, (int __user *)argp);
+ break;
+ }
+ case TIOCINQ: {
+ /*
+ * These two are safe on a single CPU system as only
+ * user tasks fiddle here
+ */
+ struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+ long amount = 0;
- if (skb)
- amount = skb->len - sizeof(struct ddpehdr);
- rc = put_user(amount, (int __user *)argp);
- break;
- }
- case SIOCGSTAMP:
- rc = sock_get_timestamp(sk, argp);
- break;
- case SIOCGSTAMPNS:
- rc = sock_get_timestampns(sk, argp);
- break;
- /* Routing */
- case SIOCADDRT:
- case SIOCDELRT:
- rc = -EPERM;
- if (capable(CAP_NET_ADMIN))
- rc = atrtr_ioctl(cmd, argp);
- break;
- /* Interface */
- case SIOCGIFADDR:
- case SIOCSIFADDR:
- case SIOCGIFBRDADDR:
- case SIOCATALKDIFADDR:
- case SIOCDIFADDR:
- case SIOCSARP: /* proxy AARP */
- case SIOCDARP: /* proxy AARP */
- rtnl_lock();
- rc = atif_ioctl(cmd, argp);
- rtnl_unlock();
- break;
+ if (skb)
+ amount = skb->len - sizeof(struct ddpehdr);
+ rc = put_user(amount, (int __user *)argp);
+ break;
+ }
+ case SIOCGSTAMP:
+ rc = sock_get_timestamp(sk, argp);
+ break;
+ case SIOCGSTAMPNS:
+ rc = sock_get_timestampns(sk, argp);
+ break;
+ /* Routing */
+ case SIOCADDRT:
+ case SIOCDELRT:
+ rc = -EPERM;
+ if (capable(CAP_NET_ADMIN))
+ rc = atrtr_ioctl(cmd, argp);
+ break;
+ /* Interface */
+ case SIOCGIFADDR:
+ case SIOCSIFADDR:
+ case SIOCGIFBRDADDR:
+ case SIOCATALKDIFADDR:
+ case SIOCDIFADDR:
+ case SIOCSARP: /* proxy AARP */
+ case SIOCDARP: /* proxy AARP */
+ rtnl_lock();
+ rc = atif_ioctl(cmd, argp);
+ rtnl_unlock();
+ break;
}
return rc;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 8215f7cb170..ba291ce4bdf 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -68,7 +68,7 @@ static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip)
sk = sk_atm(atmarpd);
skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk, skb->len);
+ sk->sk_data_ready(sk);
return 0;
}
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 5a2f602d07e..4c5b8ba0f84 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -152,7 +152,7 @@ static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev)
atm_force_charge(priv->lecd, skb2->truesize);
sk = sk_atm(priv->lecd);
skb_queue_tail(&sk->sk_receive_queue, skb2);
- sk->sk_data_ready(sk, skb2->len);
+ sk->sk_data_ready(sk);
}
}
#endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */
@@ -447,7 +447,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
atm_force_charge(priv->lecd, skb2->truesize);
sk = sk_atm(priv->lecd);
skb_queue_tail(&sk->sk_receive_queue, skb2);
- sk->sk_data_ready(sk, skb2->len);
+ sk->sk_data_ready(sk);
}
}
#endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */
@@ -530,13 +530,13 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type,
atm_force_charge(priv->lecd, skb->truesize);
sk = sk_atm(priv->lecd);
skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk, skb->len);
+ sk->sk_data_ready(sk);
if (data != NULL) {
pr_debug("about to send %d bytes of data\n", data->len);
atm_force_charge(priv->lecd, data->truesize);
skb_queue_tail(&sk->sk_receive_queue, data);
- sk->sk_data_ready(sk, skb->len);
+ sk->sk_data_ready(sk);
}
return 0;
@@ -616,7 +616,7 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb)
pr_debug("%s: To daemon\n", dev->name);
skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk, skb->len);
+ sk->sk_data_ready(sk);
} else { /* Data frame, queue to protocol handlers */
struct lec_arp_table *entry;
unsigned char *src, *dst;
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index b71ff6b234f..e8e0e7a8a23 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -706,7 +706,7 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
dprintk("(%s) control packet arrived\n", dev->name);
/* Pass control packets to daemon */
skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk, skb->len);
+ sk->sk_data_ready(sk);
return;
}
@@ -992,7 +992,7 @@ int msg_to_mpoad(struct k_message *mesg, struct mpoa_client *mpc)
sk = sk_atm(mpc->mpoad_vcc);
skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk, skb->len);
+ sk->sk_data_ready(sk);
return 0;
}
@@ -1273,7 +1273,7 @@ static void purge_egress_shortcut(struct atm_vcc *vcc, eg_cache_entry *entry)
sk = sk_atm(vcc);
skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk, skb->len);
+ sk->sk_data_ready(sk);
dprintk("exiting\n");
}
@@ -1492,7 +1492,7 @@ static void __exit atm_mpoa_cleanup(void)
mpc_proc_clean();
- del_timer(&mpc_timer);
+ del_timer_sync(&mpc_timer);
unregister_netdevice_notifier(&mpoa_notifier);
deregister_atm_ioctl(&atm_ioctl_ops);
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 8c93267ce96..c4e09846d1d 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -252,7 +252,7 @@ static int pppoatm_may_send(struct pppoatm_vcc *pvcc, int size)
* we need to ensure there's a memory barrier after it. The bit
* *must* be set before we do the atomic_inc() on pvcc->inflight.
* There's no smp_mb__after_set_bit(), so it's this or abuse
- * smp_mb__after_clear_bit().
+ * smp_mb__after_atomic().
*/
test_and_set_bit(BLOCKED, &pvcc->blocked);
diff --git a/net/atm/raw.c b/net/atm/raw.c
index b4f7b9ff3c7..2e17e97a7a8 100644
--- a/net/atm/raw.c
+++ b/net/atm/raw.c
@@ -25,7 +25,7 @@ static void atm_push_raw(struct atm_vcc *vcc, struct sk_buff *skb)
struct sock *sk = sk_atm(vcc);
skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk, skb->len);
+ sk->sk_data_ready(sk);
}
}
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 4176887e72e..523bce72f69 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -51,7 +51,7 @@ static void sigd_put_skb(struct sk_buff *skb)
#endif
atm_force_charge(sigd, skb->truesize);
skb_queue_tail(&sk_atm(sigd)->sk_receive_queue, skb);
- sk_atm(sigd)->sk_data_ready(sk_atm(sigd), skb->len);
+ sk_atm(sigd)->sk_data_ready(sk_atm(sigd));
}
static void modify_qos(struct atm_vcc *vcc, struct atmsvc_msg *msg)
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 1281049c135..d8e5d0c2ebb 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -263,17 +263,11 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr,
goto out;
}
}
-/*
- * Not supported yet
- *
- * #ifndef CONFIG_SINGLE_SIGITF
- */
+
vcc->qos.txtp.max_pcr = SELECT_TOP_PCR(vcc->qos.txtp);
vcc->qos.txtp.pcr = 0;
vcc->qos.txtp.min_pcr = 0;
-/*
- * #endif
- */
+
error = vcc_connect(sock, vcc->itf, vcc->vpi, vcc->vci);
if (!error)
sock->state = SS_CONNECTED;
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 96f4cab3a2f..7ed8ab72481 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -422,7 +422,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
if (sk) {
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk, skb->len);
+ sk->sk_data_ready(sk);
sock_put(sk);
} else {
free:
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index fa780b76630..11660a3aab5 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -50,6 +50,15 @@ config BATMAN_ADV_NC
If you think that your network does not need this feature you
can safely disable it and save some space.
+config BATMAN_ADV_MCAST
+ bool "Multicast optimisation"
+ depends on BATMAN_ADV
+ default n
+ help
+ This option enables the multicast optimisation which aims to
+ reduce the air overhead while improving the reliability of
+ multicast messages.
+
config BATMAN_ADV_DEBUG
bool "B.A.T.M.A.N. debugging"
depends on BATMAN_ADV
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 42df18f877e..eb7d8c0388e 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -36,3 +36,4 @@ batman-adv-y += send.o
batman-adv-y += soft-interface.o
batman-adv-y += sysfs.o
batman-adv-y += translation-table.o
+batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 8323bced8e5..f04224c3200 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -347,10 +347,10 @@ static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface)
unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff;
batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff;
- memcpy(batadv_ogm_packet->orig,
- hard_iface->net_dev->dev_addr, ETH_ALEN);
- memcpy(batadv_ogm_packet->prev_sender,
- hard_iface->net_dev->dev_addr, ETH_ALEN);
+ ether_addr_copy(batadv_ogm_packet->orig,
+ hard_iface->net_dev->dev_addr);
+ ether_addr_copy(batadv_ogm_packet->prev_sender,
+ hard_iface->net_dev->dev_addr);
}
static void
@@ -830,7 +830,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node,
tvlv_len = ntohs(batadv_ogm_packet->tvlv_len);
batadv_ogm_packet->ttl--;
- memcpy(batadv_ogm_packet->prev_sender, ethhdr->h_source, ETH_ALEN);
+ ether_addr_copy(batadv_ogm_packet->prev_sender, ethhdr->h_source);
/* apply hop penalty */
batadv_ogm_packet->tq = batadv_hop_penalty(batadv_ogm_packet->tq,
@@ -1545,6 +1545,8 @@ out_neigh:
if ((orig_neigh_node) && (!is_single_hop_neigh))
batadv_orig_node_free_ref(orig_neigh_node);
out:
+ if (router_ifinfo)
+ batadv_neigh_ifinfo_free_ref(router_ifinfo);
if (router)
batadv_neigh_node_free_ref(router);
if (router_router)
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 05f0712be5e..a957c814072 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -191,7 +191,7 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv,
if (!hash)
return NULL;
- memcpy(search_entry.orig, addr, ETH_ALEN);
+ ether_addr_copy(search_entry.orig, addr);
search_entry.vid = vid;
index = batadv_choose_backbone_gw(&search_entry, hash->size);
@@ -305,7 +305,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
/* normal claim frame
* set Ethernet SRC to the clients mac
*/
- memcpy(ethhdr->h_source, mac, ETH_ALEN);
+ ether_addr_copy(ethhdr->h_source, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): CLAIM %pM on vid %d\n", mac,
BATADV_PRINT_VID(vid));
@@ -314,7 +314,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
/* unclaim frame
* set HW SRC to the clients mac
*/
- memcpy(hw_src, mac, ETH_ALEN);
+ ether_addr_copy(hw_src, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): UNCLAIM %pM on vid %d\n", mac,
BATADV_PRINT_VID(vid));
@@ -323,7 +323,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
/* announcement frame
* set HW SRC to the special mac containg the crc
*/
- memcpy(hw_src, mac, ETH_ALEN);
+ ether_addr_copy(hw_src, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): ANNOUNCE of %pM on vid %d\n",
ethhdr->h_source, BATADV_PRINT_VID(vid));
@@ -333,8 +333,8 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
* set HW SRC and header destination to the receiving backbone
* gws mac
*/
- memcpy(hw_src, mac, ETH_ALEN);
- memcpy(ethhdr->h_dest, mac, ETH_ALEN);
+ ether_addr_copy(hw_src, mac);
+ ether_addr_copy(ethhdr->h_dest, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): REQUEST of %pM to %pM on vid %d\n",
ethhdr->h_source, ethhdr->h_dest,
@@ -395,7 +395,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig,
entry->bat_priv = bat_priv;
atomic_set(&entry->request_sent, 0);
atomic_set(&entry->wait_periods, 0);
- memcpy(entry->orig, orig, ETH_ALEN);
+ ether_addr_copy(entry->orig, orig);
/* one for the hash, one for returning */
atomic_set(&entry->refcount, 2);
@@ -563,7 +563,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
struct batadv_bla_claim search_claim;
int hash_added;
- memcpy(search_claim.addr, mac, ETH_ALEN);
+ ether_addr_copy(search_claim.addr, mac);
search_claim.vid = vid;
claim = batadv_claim_hash_find(bat_priv, &search_claim);
@@ -573,7 +573,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
if (!claim)
return;
- memcpy(claim->addr, mac, ETH_ALEN);
+ ether_addr_copy(claim->addr, mac);
claim->vid = vid;
claim->lasttime = jiffies;
claim->backbone_gw = backbone_gw;
@@ -624,7 +624,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
{
struct batadv_bla_claim search_claim, *claim;
- memcpy(search_claim.addr, mac, ETH_ALEN);
+ ether_addr_copy(search_claim.addr, mac);
search_claim.vid = vid;
claim = batadv_claim_hash_find(bat_priv, &search_claim);
if (!claim)
@@ -800,11 +800,6 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv,
bla_dst = (struct batadv_bla_claim_dst *)hw_dst;
bla_dst_own = &bat_priv->bla.claim_dest;
- /* check if it is a claim packet in general */
- if (memcmp(bla_dst->magic, bla_dst_own->magic,
- sizeof(bla_dst->magic)) != 0)
- return 0;
-
/* if announcement packet, use the source,
* otherwise assume it is in the hw_src
*/
@@ -866,12 +861,13 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
struct sk_buff *skb)
{
- struct batadv_bla_claim_dst *bla_dst;
+ struct batadv_bla_claim_dst *bla_dst, *bla_dst_own;
uint8_t *hw_src, *hw_dst;
- struct vlan_ethhdr *vhdr;
+ struct vlan_hdr *vhdr, vhdr_buf;
struct ethhdr *ethhdr;
struct arphdr *arphdr;
unsigned short vid;
+ int vlan_depth = 0;
__be16 proto;
int headlen;
int ret;
@@ -882,9 +878,24 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
proto = ethhdr->h_proto;
headlen = ETH_HLEN;
if (vid & BATADV_VLAN_HAS_TAG) {
- vhdr = (struct vlan_ethhdr *)ethhdr;
- proto = vhdr->h_vlan_encapsulated_proto;
- headlen += VLAN_HLEN;
+ /* Traverse the VLAN/Ethertypes.
+ *
+ * At this point it is known that the first protocol is a VLAN
+ * header, so start checking at the encapsulated protocol.
+ *
+ * The depth of the VLAN headers is recorded to drop BLA claim
+ * frames encapsulated into multiple VLAN headers (QinQ).
+ */
+ do {
+ vhdr = skb_header_pointer(skb, headlen, VLAN_HLEN,
+ &vhdr_buf);
+ if (!vhdr)
+ return 0;
+
+ proto = vhdr->h_vlan_encapsulated_proto;
+ headlen += VLAN_HLEN;
+ vlan_depth++;
+ } while (proto == htons(ETH_P_8021Q));
}
if (proto != htons(ETH_P_ARP))
@@ -914,6 +925,19 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
hw_src = (uint8_t *)arphdr + sizeof(struct arphdr);
hw_dst = hw_src + ETH_ALEN + 4;
bla_dst = (struct batadv_bla_claim_dst *)hw_dst;
+ bla_dst_own = &bat_priv->bla.claim_dest;
+
+ /* check if it is a claim frame in general */
+ if (memcmp(bla_dst->magic, bla_dst_own->magic,
+ sizeof(bla_dst->magic)) != 0)
+ return 0;
+
+ /* check if there is a claim frame encapsulated deeper in (QinQ) and
+ * drop that, as this is not supported by BLA but should also not be
+ * sent via the mesh.
+ */
+ if (vlan_depth > 1)
+ return 1;
/* check if it is a claim frame. */
ret = batadv_check_claim_group(bat_priv, primary_if, hw_src, hw_dst,
@@ -1103,8 +1127,8 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
oldif->net_dev->dev_addr))
continue;
- memcpy(backbone_gw->orig,
- primary_if->net_dev->dev_addr, ETH_ALEN);
+ ether_addr_copy(backbone_gw->orig,
+ primary_if->net_dev->dev_addr);
/* send an announce frame so others will ask for our
* claims and update their tables.
*/
@@ -1310,7 +1334,7 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
entry = &bat_priv->bla.bcast_duplist[curr];
entry->crc = crc;
entry->entrytime = jiffies;
- memcpy(entry->orig, bcast_packet->orig, ETH_ALEN);
+ ether_addr_copy(entry->orig, bcast_packet->orig);
bat_priv->bla.bcast_duplist_curr = curr;
out:
@@ -1458,7 +1482,7 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast)
goto handled;
- memcpy(search_claim.addr, ethhdr->h_source, ETH_ALEN);
+ ether_addr_copy(search_claim.addr, ethhdr->h_source);
search_claim.vid = vid;
claim = batadv_claim_hash_find(bat_priv, &search_claim);
@@ -1547,9 +1571,6 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
if (!atomic_read(&bat_priv->bridge_loop_avoidance))
goto allow;
- /* in VLAN case, the mac header might not be set. */
- skb_reset_mac_header(skb);
-
if (batadv_bla_process_claim(bat_priv, primary_if, skb))
goto handled;
@@ -1560,7 +1581,7 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
if (is_multicast_ether_addr(ethhdr->h_dest))
goto handled;
- memcpy(search_claim.addr, ethhdr->h_source, ETH_ALEN);
+ ether_addr_copy(search_claim.addr, ethhdr->h_source);
search_claim.vid = vid;
claim = batadv_claim_hash_find(bat_priv, &search_claim);
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index b758881be10..a12e25efaf6 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -245,6 +245,7 @@ static int batadv_algorithms_open(struct inode *inode, struct file *file)
static int batadv_originators_open(struct inode *inode, struct file *file)
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
+
return single_open(file, batadv_orig_seq_print_text, net_dev);
}
@@ -258,18 +259,21 @@ static int batadv_originators_hardif_open(struct inode *inode,
struct file *file)
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
+
return single_open(file, batadv_orig_hardif_seq_print_text, net_dev);
}
static int batadv_gateways_open(struct inode *inode, struct file *file)
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
+
return single_open(file, batadv_gw_client_seq_print_text, net_dev);
}
static int batadv_transtable_global_open(struct inode *inode, struct file *file)
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
+
return single_open(file, batadv_tt_global_seq_print_text, net_dev);
}
@@ -277,6 +281,7 @@ static int batadv_transtable_global_open(struct inode *inode, struct file *file)
static int batadv_bla_claim_table_open(struct inode *inode, struct file *file)
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
+
return single_open(file, batadv_bla_claim_table_seq_print_text,
net_dev);
}
@@ -285,6 +290,7 @@ static int batadv_bla_backbone_table_open(struct inode *inode,
struct file *file)
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
+
return single_open(file, batadv_bla_backbone_table_seq_print_text,
net_dev);
}
@@ -300,6 +306,7 @@ static int batadv_bla_backbone_table_open(struct inode *inode,
static int batadv_dat_cache_open(struct inode *inode, struct file *file)
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
+
return single_open(file, batadv_dat_cache_seq_print_text, net_dev);
}
#endif
@@ -307,6 +314,7 @@ static int batadv_dat_cache_open(struct inode *inode, struct file *file)
static int batadv_transtable_local_open(struct inode *inode, struct file *file)
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
+
return single_open(file, batadv_tt_local_seq_print_text, net_dev);
}
@@ -319,6 +327,7 @@ struct batadv_debuginfo {
static int batadv_nc_nodes_open(struct inode *inode, struct file *file)
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
+
return single_open(file, batadv_nc_nodes_seq_print_text, net_dev);
}
#endif
@@ -333,7 +342,7 @@ struct batadv_debuginfo batadv_debuginfo_##_name = { \
.llseek = seq_lseek, \
.release = single_release, \
} \
-};
+}
/* the following attributes are general and therefore they will be directly
* placed in the BATADV_DEBUGFS_SUBDIR subdirectory of debugfs
@@ -395,7 +404,7 @@ struct batadv_debuginfo batadv_hardif_debuginfo_##_name = { \
.llseek = seq_lseek, \
.release = single_release, \
}, \
-};
+}
static BATADV_HARDIF_DEBUGINFO(originators, S_IRUGO,
batadv_originators_hardif_open);
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index edee5041189..f2c066b2171 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -277,7 +277,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
/* if this entry is already known, just update it */
if (dat_entry) {
if (!batadv_compare_eth(dat_entry->mac_addr, mac_addr))
- memcpy(dat_entry->mac_addr, mac_addr, ETH_ALEN);
+ ether_addr_copy(dat_entry->mac_addr, mac_addr);
dat_entry->last_update = jiffies;
batadv_dbg(BATADV_DBG_DAT, bat_priv,
"Entry updated: %pI4 %pM (vid: %d)\n",
@@ -292,7 +292,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
dat_entry->ip = ip;
dat_entry->vid = vid;
- memcpy(dat_entry->mac_addr, mac_addr, ETH_ALEN);
+ ether_addr_copy(dat_entry->mac_addr, mac_addr);
dat_entry->last_update = jiffies;
atomic_set(&dat_entry->refcount, 2);
@@ -594,7 +594,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
if (!neigh_node)
goto free_orig;
- tmp_skb = pskb_copy(skb, GFP_ATOMIC);
+ tmp_skb = pskb_copy_for_clone(skb, GFP_ATOMIC);
if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, tmp_skb,
cand[i].orig_node,
packet_subtype)) {
@@ -662,6 +662,7 @@ static void batadv_dat_tvlv_container_update(struct batadv_priv *bat_priv)
void batadv_dat_status_update(struct net_device *net_dev)
{
struct batadv_priv *bat_priv = netdev_priv(net_dev);
+
batadv_dat_tvlv_container_update(bat_priv);
}
@@ -940,8 +941,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
* additional DAT answer may trigger kernel warnings about
* a packet coming from the wrong port.
*/
- if (batadv_is_my_client(bat_priv, dat_entry->mac_addr,
- BATADV_NO_FLAGS)) {
+ if (batadv_is_my_client(bat_priv, dat_entry->mac_addr, vid)) {
ret = true;
goto out;
}
@@ -1027,6 +1027,11 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
if (!skb_new)
goto out;
+ /* the rest of the TX path assumes that the mac_header offset pointing
+ * to the inner Ethernet header has been set, therefore reset it now.
+ */
+ skb_reset_mac_header(skb_new);
+
if (vid & BATADV_VLAN_HAS_TAG)
skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q),
vid & VLAN_VID_MASK);
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index ac9be9b67a2..d76e1d06c5b 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -25,6 +25,9 @@
#include <linux/if_arp.h>
+/**
+ * BATADV_DAT_ADDR_MAX - maximum address value in the DHT space
+ */
#define BATADV_DAT_ADDR_MAX ((batadv_dat_addr_t)~(batadv_dat_addr_t)0)
void batadv_dat_status_update(struct net_device *net_dev);
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 88df9b1d552..f14e54a0569 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -418,12 +418,13 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
struct batadv_neigh_node *neigh_node)
{
struct batadv_priv *bat_priv;
- struct batadv_hard_iface *primary_if;
+ struct batadv_hard_iface *primary_if = NULL;
struct batadv_frag_packet frag_header;
struct sk_buff *skb_fragment;
unsigned mtu = neigh_node->if_incoming->net_dev->mtu;
unsigned header_size = sizeof(frag_header);
unsigned max_fragment_size, max_packet_size;
+ bool ret = false;
/* To avoid merge and refragmentation at next-hops we never send
* fragments larger than BATADV_FRAG_MAX_FRAG_SIZE
@@ -449,8 +450,8 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
frag_header.reserved = 0;
frag_header.no = 0;
frag_header.total_size = htons(skb->len);
- memcpy(frag_header.orig, primary_if->net_dev->dev_addr, ETH_ALEN);
- memcpy(frag_header.dest, orig_node->orig, ETH_ALEN);
+ ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr);
+ ether_addr_copy(frag_header.dest, orig_node->orig);
/* Eat and send fragments from the tail of skb */
while (skb->len > max_fragment_size) {
@@ -483,7 +484,11 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
skb->len + ETH_HLEN);
batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
- return true;
+ ret = true;
+
out_err:
- return false;
+ if (primary_if)
+ batadv_hardif_free_ref(primary_if);
+
+ return ret;
}
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 55cf2260d29..90cff585b37 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -42,8 +42,10 @@
static void batadv_gw_node_free_ref(struct batadv_gw_node *gw_node)
{
- if (atomic_dec_and_test(&gw_node->refcount))
+ if (atomic_dec_and_test(&gw_node->refcount)) {
+ batadv_orig_node_free_ref(gw_node->orig_node);
kfree_rcu(gw_node, rcu);
+ }
}
static struct batadv_gw_node *
@@ -389,8 +391,6 @@ out:
batadv_neigh_ifinfo_free_ref(router_gw_tq);
if (router_orig_tq)
batadv_neigh_ifinfo_free_ref(router_orig_tq);
-
- return;
}
/**
@@ -408,9 +408,14 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
if (gateway->bandwidth_down == 0)
return;
+ if (!atomic_inc_not_zero(&orig_node->refcount))
+ return;
+
gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC);
- if (!gw_node)
+ if (!gw_node) {
+ batadv_orig_node_free_ref(orig_node);
return;
+ }
INIT_HLIST_NODE(&gw_node->list);
gw_node->orig_node = orig_node;
@@ -680,7 +685,7 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
if (!pskb_may_pull(skb, *header_len + ETH_HLEN))
return BATADV_DHCP_NO;
- ethhdr = (struct ethhdr *)skb->data;
+ ethhdr = eth_hdr(skb);
proto = ethhdr->h_proto;
*header_len += ETH_HLEN;
@@ -689,7 +694,7 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
if (!pskb_may_pull(skb, *header_len + VLAN_HLEN))
return BATADV_DHCP_NO;
- vhdr = (struct vlan_ethhdr *)skb->data;
+ vhdr = vlan_eth_hdr(skb);
proto = vhdr->h_vlan_encapsulated_proto;
*header_len += VLAN_HLEN;
}
@@ -728,7 +733,7 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
return BATADV_DHCP_NO;
/* skb->data might have been reallocated by pskb_may_pull() */
- ethhdr = (struct ethhdr *)skb->data;
+ ethhdr = eth_hdr(skb);
if (ntohs(ethhdr->h_proto) == ETH_P_8021Q)
ethhdr = (struct ethhdr *)(skb->data + VLAN_HLEN);
@@ -765,7 +770,7 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
if (*p != ETH_ALEN)
return BATADV_DHCP_NO;
- memcpy(chaddr, skb->data + chaddr_offset, ETH_ALEN);
+ ether_addr_copy(chaddr, skb->data + chaddr_offset);
}
return ret;
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index b851cc58085..fbda6b54baf 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -83,7 +83,7 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
return true;
/* no more parents..stop recursion */
- if (net_dev->iflink == net_dev->ifindex)
+ if (net_dev->iflink == 0 || net_dev->iflink == net_dev->ifindex)
return false;
/* recurse over the parent device */
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index abb9d6e0388..161ef8f17d2 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -158,6 +158,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
struct batadv_orig_node *orig_node = NULL;
struct batadv_neigh_node *neigh_node = NULL;
size_t packet_len = sizeof(struct batadv_icmp_packet);
+ uint8_t *addr;
if (len < sizeof(struct batadv_icmp_header)) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -227,10 +228,10 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
goto dst_unreach;
icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmp_header;
- if (packet_len == sizeof(*icmp_packet_rr))
- memcpy(icmp_packet_rr->rr,
- neigh_node->if_incoming->net_dev->dev_addr,
- ETH_ALEN);
+ if (packet_len == sizeof(*icmp_packet_rr)) {
+ addr = neigh_node->if_incoming->net_dev->dev_addr;
+ ether_addr_copy(icmp_packet_rr->rr[0], addr);
+ }
break;
default:
@@ -250,7 +251,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
goto free_skb;
}
- memcpy(icmp_header->orig, primary_if->net_dev->dev_addr, ETH_ALEN);
+ ether_addr_copy(icmp_header->orig, primary_if->net_dev->dev_addr);
batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
goto out;
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 66ae135b9f2..d1183e88216 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -34,6 +34,7 @@
#include "gateway_client.h"
#include "bridge_loop_avoidance.h"
#include "distributed-arp-table.h"
+#include "multicast.h"
#include "gateway_common.h"
#include "hash.h"
#include "bat_algo.h"
@@ -110,6 +111,9 @@ int batadv_mesh_init(struct net_device *soft_iface)
spin_lock_init(&bat_priv->tt.last_changeset_lock);
spin_lock_init(&bat_priv->tt.commit_lock);
spin_lock_init(&bat_priv->gw.list_lock);
+#ifdef CONFIG_BATMAN_ADV_MCAST
+ spin_lock_init(&bat_priv->mcast.want_lists_lock);
+#endif
spin_lock_init(&bat_priv->tvlv.container_list_lock);
spin_lock_init(&bat_priv->tvlv.handler_list_lock);
spin_lock_init(&bat_priv->softif_vlan_list_lock);
@@ -117,9 +121,17 @@ int batadv_mesh_init(struct net_device *soft_iface)
INIT_HLIST_HEAD(&bat_priv->forw_bat_list);
INIT_HLIST_HEAD(&bat_priv->forw_bcast_list);
INIT_HLIST_HEAD(&bat_priv->gw.list);
+#ifdef CONFIG_BATMAN_ADV_MCAST
+ INIT_HLIST_HEAD(&bat_priv->mcast.want_all_unsnoopables_list);
+ INIT_HLIST_HEAD(&bat_priv->mcast.want_all_ipv4_list);
+ INIT_HLIST_HEAD(&bat_priv->mcast.want_all_ipv6_list);
+#endif
INIT_LIST_HEAD(&bat_priv->tt.changes_list);
INIT_LIST_HEAD(&bat_priv->tt.req_list);
INIT_LIST_HEAD(&bat_priv->tt.roam_list);
+#ifdef CONFIG_BATMAN_ADV_MCAST
+ INIT_HLIST_HEAD(&bat_priv->mcast.mla_list);
+#endif
INIT_HLIST_HEAD(&bat_priv->tvlv.container_list);
INIT_HLIST_HEAD(&bat_priv->tvlv.handler_list);
INIT_HLIST_HEAD(&bat_priv->softif_vlan_list);
@@ -145,6 +157,7 @@ int batadv_mesh_init(struct net_device *soft_iface)
goto err;
batadv_gw_init(bat_priv);
+ batadv_mcast_init(bat_priv);
atomic_set(&bat_priv->gw.reselect, 0);
atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE);
@@ -169,6 +182,8 @@ void batadv_mesh_free(struct net_device *soft_iface)
batadv_dat_free(bat_priv);
batadv_bla_free(bat_priv);
+ batadv_mcast_free(bat_priv);
+
/* Free the TT and the originator tables only after having terminated
* all the other depending components which may use these structures for
* their purposes.
@@ -1133,8 +1148,8 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, uint8_t *src,
unicast_tvlv_packet->reserved = 0;
unicast_tvlv_packet->tvlv_len = htons(tvlv_len);
unicast_tvlv_packet->align = 0;
- memcpy(unicast_tvlv_packet->src, src, ETH_ALEN);
- memcpy(unicast_tvlv_packet->dst, dst, ETH_ALEN);
+ ether_addr_copy(unicast_tvlv_packet->src, src);
+ ether_addr_copy(unicast_tvlv_packet->dst, dst);
tvlv_buff = (unsigned char *)(unicast_tvlv_packet + 1);
tvlv_hdr = (struct batadv_tvlv_hdr *)tvlv_buff;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 9374f1a5134..118b990bae2 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2014.1.0"
+#define BATADV_SOURCE_VERSION "2014.3.0"
#endif
/* B.A.T.M.A.N. parameters */
@@ -176,6 +176,8 @@ enum batadv_uev_type {
#include <linux/percpu.h>
#include <linux/slab.h>
#include <net/sock.h> /* struct sock */
+#include <net/addrconf.h> /* ipv6 address stuff */
+#include <linux/ip.h>
#include <net/rtnetlink.h>
#include <linux/jiffies.h>
#include <linux/seq_file.h>
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
new file mode 100644
index 00000000000..96b66fd30f9
--- /dev/null
+++ b/net/batman-adv/multicast.c
@@ -0,0 +1,748 @@
+/* Copyright (C) 2014 B.A.T.M.A.N. contributors:
+ *
+ * Linus Lüssing
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "main.h"
+#include "multicast.h"
+#include "originator.h"
+#include "hard-interface.h"
+#include "translation-table.h"
+#include "multicast.h"
+
+/**
+ * batadv_mcast_mla_softif_get - get softif multicast listeners
+ * @dev: the device to collect multicast addresses from
+ * @mcast_list: a list to put found addresses into
+ *
+ * Collect multicast addresses of the local multicast listeners
+ * on the given soft interface, dev, in the given mcast_list.
+ *
+ * Returns -ENOMEM on memory allocation error or the number of
+ * items added to the mcast_list otherwise.
+ */
+static int batadv_mcast_mla_softif_get(struct net_device *dev,
+ struct hlist_head *mcast_list)
+{
+ struct netdev_hw_addr *mc_list_entry;
+ struct batadv_hw_addr *new;
+ int ret = 0;
+
+ netif_addr_lock_bh(dev);
+ netdev_for_each_mc_addr(mc_list_entry, dev) {
+ new = kmalloc(sizeof(*new), GFP_ATOMIC);
+ if (!new) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ ether_addr_copy(new->addr, mc_list_entry->addr);
+ hlist_add_head(&new->list, mcast_list);
+ ret++;
+ }
+ netif_addr_unlock_bh(dev);
+
+ return ret;
+}
+
+/**
+ * batadv_mcast_mla_is_duplicate - check whether an address is in a list
+ * @mcast_addr: the multicast address to check
+ * @mcast_list: the list with multicast addresses to search in
+ *
+ * Returns true if the given address is already in the given list.
+ * Otherwise returns false.
+ */
+static bool batadv_mcast_mla_is_duplicate(uint8_t *mcast_addr,
+ struct hlist_head *mcast_list)
+{
+ struct batadv_hw_addr *mcast_entry;
+
+ hlist_for_each_entry(mcast_entry, mcast_list, list)
+ if (batadv_compare_eth(mcast_entry->addr, mcast_addr))
+ return true;
+
+ return false;
+}
+
+/**
+ * batadv_mcast_mla_list_free - free a list of multicast addresses
+ * @mcast_list: the list to free
+ *
+ * Removes and frees all items in the given mcast_list.
+ */
+static void batadv_mcast_mla_list_free(struct hlist_head *mcast_list)
+{
+ struct batadv_hw_addr *mcast_entry;
+ struct hlist_node *tmp;
+
+ hlist_for_each_entry_safe(mcast_entry, tmp, mcast_list, list) {
+ hlist_del(&mcast_entry->list);
+ kfree(mcast_entry);
+ }
+}
+
+/**
+ * batadv_mcast_mla_tt_retract - clean up multicast listener announcements
+ * @bat_priv: the bat priv with all the soft interface information
+ * @mcast_list: a list of addresses which should _not_ be removed
+ *
+ * Retracts the announcement of any multicast listener from the
+ * translation table except the ones listed in the given mcast_list.
+ *
+ * If mcast_list is NULL then all are retracted.
+ */
+static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv,
+ struct hlist_head *mcast_list)
+{
+ struct batadv_hw_addr *mcast_entry;
+ struct hlist_node *tmp;
+
+ hlist_for_each_entry_safe(mcast_entry, tmp, &bat_priv->mcast.mla_list,
+ list) {
+ if (mcast_list &&
+ batadv_mcast_mla_is_duplicate(mcast_entry->addr,
+ mcast_list))
+ continue;
+
+ batadv_tt_local_remove(bat_priv, mcast_entry->addr,
+ BATADV_NO_FLAGS,
+ "mcast TT outdated", false);
+
+ hlist_del(&mcast_entry->list);
+ kfree(mcast_entry);
+ }
+}
+
+/**
+ * batadv_mcast_mla_tt_add - add multicast listener announcements
+ * @bat_priv: the bat priv with all the soft interface information
+ * @mcast_list: a list of addresses which are going to get added
+ *
+ * Adds multicast listener announcements from the given mcast_list to the
+ * translation table if they have not been added yet.
+ */
+static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv,
+ struct hlist_head *mcast_list)
+{
+ struct batadv_hw_addr *mcast_entry;
+ struct hlist_node *tmp;
+
+ if (!mcast_list)
+ return;
+
+ hlist_for_each_entry_safe(mcast_entry, tmp, mcast_list, list) {
+ if (batadv_mcast_mla_is_duplicate(mcast_entry->addr,
+ &bat_priv->mcast.mla_list))
+ continue;
+
+ if (!batadv_tt_local_add(bat_priv->soft_iface,
+ mcast_entry->addr, BATADV_NO_FLAGS,
+ BATADV_NULL_IFINDEX, BATADV_NO_MARK))
+ continue;
+
+ hlist_del(&mcast_entry->list);
+ hlist_add_head(&mcast_entry->list, &bat_priv->mcast.mla_list);
+ }
+}
+
+/**
+ * batadv_mcast_has_bridge - check whether the soft-iface is bridged
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Checks whether there is a bridge on top of our soft interface. Returns
+ * true if so, false otherwise.
+ */
+static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv)
+{
+ struct net_device *upper = bat_priv->soft_iface;
+
+ rcu_read_lock();
+ do {
+ upper = netdev_master_upper_dev_get_rcu(upper);
+ } while (upper && !(upper->priv_flags & IFF_EBRIDGE));
+ rcu_read_unlock();
+
+ return upper;
+}
+
+/**
+ * batadv_mcast_mla_tvlv_update - update multicast tvlv
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Updates the own multicast tvlv with our current multicast related settings,
+ * capabilities and inabilities.
+ *
+ * Returns true if the tvlv container is registered afterwards. Otherwise
+ * returns false.
+ */
+static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv)
+{
+ struct batadv_tvlv_mcast_data mcast_data;
+
+ mcast_data.flags = BATADV_NO_FLAGS;
+ memset(mcast_data.reserved, 0, sizeof(mcast_data.reserved));
+
+ /* Avoid attaching MLAs, if there is a bridge on top of our soft
+ * interface, we don't support that yet (TODO)
+ */
+ if (batadv_mcast_has_bridge(bat_priv)) {
+ if (bat_priv->mcast.enabled) {
+ batadv_tvlv_container_unregister(bat_priv,
+ BATADV_TVLV_MCAST, 1);
+ bat_priv->mcast.enabled = false;
+ }
+
+ return false;
+ }
+
+ if (!bat_priv->mcast.enabled ||
+ mcast_data.flags != bat_priv->mcast.flags) {
+ batadv_tvlv_container_register(bat_priv, BATADV_TVLV_MCAST, 1,
+ &mcast_data, sizeof(mcast_data));
+ bat_priv->mcast.flags = mcast_data.flags;
+ bat_priv->mcast.enabled = true;
+ }
+
+ return true;
+}
+
+/**
+ * batadv_mcast_mla_update - update the own MLAs
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Updates the own multicast listener announcements in the translation
+ * table as well as the own, announced multicast tvlv container.
+ */
+void batadv_mcast_mla_update(struct batadv_priv *bat_priv)
+{
+ struct net_device *soft_iface = bat_priv->soft_iface;
+ struct hlist_head mcast_list = HLIST_HEAD_INIT;
+ int ret;
+
+ if (!batadv_mcast_mla_tvlv_update(bat_priv))
+ goto update;
+
+ ret = batadv_mcast_mla_softif_get(soft_iface, &mcast_list);
+ if (ret < 0)
+ goto out;
+
+update:
+ batadv_mcast_mla_tt_retract(bat_priv, &mcast_list);
+ batadv_mcast_mla_tt_add(bat_priv, &mcast_list);
+
+out:
+ batadv_mcast_mla_list_free(&mcast_list);
+}
+
+/**
+ * batadv_mcast_forw_mode_check_ipv4 - check for optimized forwarding potential
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the IPv4 packet to check
+ * @is_unsnoopable: stores whether the destination is snoopable
+ *
+ * Checks whether the given IPv4 packet has the potential to be forwarded with a
+ * mode more optimal than classic flooding.
+ *
+ * If so then returns 0. Otherwise -EINVAL is returned or -ENOMEM in case of
+ * memory allocation failure.
+ */
+static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ bool *is_unsnoopable)
+{
+ struct iphdr *iphdr;
+
+ /* We might fail due to out-of-memory -> drop it */
+ if (!pskb_may_pull(skb, sizeof(struct ethhdr) + sizeof(*iphdr)))
+ return -ENOMEM;
+
+ iphdr = ip_hdr(skb);
+
+ /* TODO: Implement Multicast Router Discovery (RFC4286),
+ * then allow scope > link local, too
+ */
+ if (!ipv4_is_local_multicast(iphdr->daddr))
+ return -EINVAL;
+
+ /* link-local multicast listeners behind a bridge are
+ * not snoopable (see RFC4541, section 2.1.2.2)
+ */
+ *is_unsnoopable = true;
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_forw_mode_check_ipv6 - check for optimized forwarding potential
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the IPv6 packet to check
+ * @is_unsnoopable: stores whether the destination is snoopable
+ *
+ * Checks whether the given IPv6 packet has the potential to be forwarded with a
+ * mode more optimal than classic flooding.
+ *
+ * If so then returns 0. Otherwise -EINVAL is returned or -ENOMEM if we are out
+ * of memory.
+ */
+static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ bool *is_unsnoopable)
+{
+ struct ipv6hdr *ip6hdr;
+
+ /* We might fail due to out-of-memory -> drop it */
+ if (!pskb_may_pull(skb, sizeof(struct ethhdr) + sizeof(*ip6hdr)))
+ return -ENOMEM;
+
+ ip6hdr = ipv6_hdr(skb);
+
+ /* TODO: Implement Multicast Router Discovery (RFC4286),
+ * then allow scope > link local, too
+ */
+ if (IPV6_ADDR_MC_SCOPE(&ip6hdr->daddr) != IPV6_ADDR_SCOPE_LINKLOCAL)
+ return -EINVAL;
+
+ /* link-local-all-nodes multicast listeners behind a bridge are
+ * not snoopable (see RFC4541, section 3, paragraph 3)
+ */
+ if (ipv6_addr_is_ll_all_nodes(&ip6hdr->daddr))
+ *is_unsnoopable = true;
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_forw_mode_check - check for optimized forwarding potential
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast frame to check
+ * @is_unsnoopable: stores whether the destination is snoopable
+ *
+ * Checks whether the given multicast ethernet frame has the potential to be
+ * forwarded with a mode more optimal than classic flooding.
+ *
+ * If so then returns 0. Otherwise -EINVAL is returned or -ENOMEM if we are out
+ * of memory.
+ */
+static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ bool *is_unsnoopable)
+{
+ struct ethhdr *ethhdr = eth_hdr(skb);
+
+ if (!atomic_read(&bat_priv->multicast_mode))
+ return -EINVAL;
+
+ if (atomic_read(&bat_priv->mcast.num_disabled))
+ return -EINVAL;
+
+ switch (ntohs(ethhdr->h_proto)) {
+ case ETH_P_IP:
+ return batadv_mcast_forw_mode_check_ipv4(bat_priv, skb,
+ is_unsnoopable);
+ case ETH_P_IPV6:
+ return batadv_mcast_forw_mode_check_ipv6(bat_priv, skb,
+ is_unsnoopable);
+ default:
+ return -EINVAL;
+ }
+}
+
+/**
+ * batadv_mcast_want_all_ip_count - count nodes with unspecific mcast interest
+ * @bat_priv: the bat priv with all the soft interface information
+ * @ethhdr: ethernet header of a packet
+ *
+ * Returns the number of nodes which want all IPv4 multicast traffic if the
+ * given ethhdr is from an IPv4 packet or the number of nodes which want all
+ * IPv6 traffic if it matches an IPv6 packet.
+ */
+static int batadv_mcast_forw_want_all_ip_count(struct batadv_priv *bat_priv,
+ struct ethhdr *ethhdr)
+{
+ switch (ntohs(ethhdr->h_proto)) {
+ case ETH_P_IP:
+ return atomic_read(&bat_priv->mcast.num_want_all_ipv4);
+ case ETH_P_IPV6:
+ return atomic_read(&bat_priv->mcast.num_want_all_ipv6);
+ default:
+ /* we shouldn't be here... */
+ return 0;
+ }
+}
+
+/**
+ * batadv_mcast_forw_tt_node_get - get a multicast tt node
+ * @bat_priv: the bat priv with all the soft interface information
+ * @ethhdr: the ether header containing the multicast destination
+ *
+ * Returns an orig_node matching the multicast address provided by ethhdr
+ * via a translation table lookup. This increases the returned nodes refcount.
+ */
+static struct batadv_orig_node *
+batadv_mcast_forw_tt_node_get(struct batadv_priv *bat_priv,
+ struct ethhdr *ethhdr)
+{
+ return batadv_transtable_search(bat_priv, ethhdr->h_source,
+ ethhdr->h_dest, BATADV_NO_FLAGS);
+}
+
+/**
+ * batadv_mcast_want_forw_ipv4_node_get - get a node with an ipv4 flag
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 flag set and
+ * increases its refcount.
+ */
+static struct batadv_orig_node *
+batadv_mcast_forw_ipv4_node_get(struct batadv_priv *bat_priv)
+{
+ struct batadv_orig_node *tmp_orig_node, *orig_node = NULL;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tmp_orig_node,
+ &bat_priv->mcast.want_all_ipv4_list,
+ mcast_want_all_ipv4_node) {
+ if (!atomic_inc_not_zero(&tmp_orig_node->refcount))
+ continue;
+
+ orig_node = tmp_orig_node;
+ break;
+ }
+ rcu_read_unlock();
+
+ return orig_node;
+}
+
+/**
+ * batadv_mcast_want_forw_ipv6_node_get - get a node with an ipv6 flag
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_IPV6 flag set
+ * and increases its refcount.
+ */
+static struct batadv_orig_node *
+batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv)
+{
+ struct batadv_orig_node *tmp_orig_node, *orig_node = NULL;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tmp_orig_node,
+ &bat_priv->mcast.want_all_ipv6_list,
+ mcast_want_all_ipv6_node) {
+ if (!atomic_inc_not_zero(&tmp_orig_node->refcount))
+ continue;
+
+ orig_node = tmp_orig_node;
+ break;
+ }
+ rcu_read_unlock();
+
+ return orig_node;
+}
+
+/**
+ * batadv_mcast_want_forw_ip_node_get - get a node with an ipv4/ipv6 flag
+ * @bat_priv: the bat priv with all the soft interface information
+ * @ethhdr: an ethernet header to determine the protocol family from
+ *
+ * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 or
+ * BATADV_MCAST_WANT_ALL_IPV6 flag, depending on the provided ethhdr, set and
+ * increases its refcount.
+ */
+static struct batadv_orig_node *
+batadv_mcast_forw_ip_node_get(struct batadv_priv *bat_priv,
+ struct ethhdr *ethhdr)
+{
+ switch (ntohs(ethhdr->h_proto)) {
+ case ETH_P_IP:
+ return batadv_mcast_forw_ipv4_node_get(bat_priv);
+ case ETH_P_IPV6:
+ return batadv_mcast_forw_ipv6_node_get(bat_priv);
+ default:
+ /* we shouldn't be here... */
+ return NULL;
+ }
+}
+
+/**
+ * batadv_mcast_want_forw_unsnoop_node_get - get a node with an unsnoopable flag
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag
+ * set and increases its refcount.
+ */
+static struct batadv_orig_node *
+batadv_mcast_forw_unsnoop_node_get(struct batadv_priv *bat_priv)
+{
+ struct batadv_orig_node *tmp_orig_node, *orig_node = NULL;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tmp_orig_node,
+ &bat_priv->mcast.want_all_unsnoopables_list,
+ mcast_want_all_unsnoopables_node) {
+ if (!atomic_inc_not_zero(&tmp_orig_node->refcount))
+ continue;
+
+ orig_node = tmp_orig_node;
+ break;
+ }
+ rcu_read_unlock();
+
+ return orig_node;
+}
+
+/**
+ * batadv_mcast_forw_mode - check on how to forward a multicast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: The multicast packet to check
+ * @orig: an originator to be set to forward the skb to
+ *
+ * Returns the forwarding mode as enum batadv_forw_mode and in case of
+ * BATADV_FORW_SINGLE set the orig to the single originator the skb
+ * should be forwarded to.
+ */
+enum batadv_forw_mode
+batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ struct batadv_orig_node **orig)
+{
+ int ret, tt_count, ip_count, unsnoop_count, total_count;
+ bool is_unsnoopable = false;
+ struct ethhdr *ethhdr;
+
+ ret = batadv_mcast_forw_mode_check(bat_priv, skb, &is_unsnoopable);
+ if (ret == -ENOMEM)
+ return BATADV_FORW_NONE;
+ else if (ret < 0)
+ return BATADV_FORW_ALL;
+
+ ethhdr = eth_hdr(skb);
+
+ tt_count = batadv_tt_global_hash_count(bat_priv, ethhdr->h_dest,
+ BATADV_NO_FLAGS);
+ ip_count = batadv_mcast_forw_want_all_ip_count(bat_priv, ethhdr);
+ unsnoop_count = !is_unsnoopable ? 0 :
+ atomic_read(&bat_priv->mcast.num_want_all_unsnoopables);
+
+ total_count = tt_count + ip_count + unsnoop_count;
+
+ switch (total_count) {
+ case 1:
+ if (tt_count)
+ *orig = batadv_mcast_forw_tt_node_get(bat_priv, ethhdr);
+ else if (ip_count)
+ *orig = batadv_mcast_forw_ip_node_get(bat_priv, ethhdr);
+ else if (unsnoop_count)
+ *orig = batadv_mcast_forw_unsnoop_node_get(bat_priv);
+
+ if (*orig)
+ return BATADV_FORW_SINGLE;
+
+ /* fall through */
+ case 0:
+ return BATADV_FORW_NONE;
+ default:
+ return BATADV_FORW_ALL;
+ }
+}
+
+/**
+ * batadv_mcast_want_unsnoop_update - update unsnoop counter and list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig: the orig_node which multicast state might have changed of
+ * @mcast_flags: flags indicating the new multicast state
+ *
+ * If the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag of this originator,
+ * orig, has toggled then this method updates counter and list accordingly.
+ */
+static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ uint8_t mcast_flags)
+{
+ /* switched from flag unset to set */
+ if (mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES &&
+ !(orig->mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES)) {
+ atomic_inc(&bat_priv->mcast.num_want_all_unsnoopables);
+
+ spin_lock_bh(&bat_priv->mcast.want_lists_lock);
+ hlist_add_head_rcu(&orig->mcast_want_all_unsnoopables_node,
+ &bat_priv->mcast.want_all_unsnoopables_list);
+ spin_unlock_bh(&bat_priv->mcast.want_lists_lock);
+ /* switched from flag set to unset */
+ } else if (!(mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) &&
+ orig->mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) {
+ atomic_dec(&bat_priv->mcast.num_want_all_unsnoopables);
+
+ spin_lock_bh(&bat_priv->mcast.want_lists_lock);
+ hlist_del_rcu(&orig->mcast_want_all_unsnoopables_node);
+ spin_unlock_bh(&bat_priv->mcast.want_lists_lock);
+ }
+}
+
+/**
+ * batadv_mcast_want_ipv4_update - update want-all-ipv4 counter and list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig: the orig_node which multicast state might have changed of
+ * @mcast_flags: flags indicating the new multicast state
+ *
+ * If the BATADV_MCAST_WANT_ALL_IPV4 flag of this originator, orig, has
+ * toggled then this method updates counter and list accordingly.
+ */
+static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ uint8_t mcast_flags)
+{
+ /* switched from flag unset to set */
+ if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV4 &&
+ !(orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV4)) {
+ atomic_inc(&bat_priv->mcast.num_want_all_ipv4);
+
+ spin_lock_bh(&bat_priv->mcast.want_lists_lock);
+ hlist_add_head_rcu(&orig->mcast_want_all_ipv4_node,
+ &bat_priv->mcast.want_all_ipv4_list);
+ spin_unlock_bh(&bat_priv->mcast.want_lists_lock);
+ /* switched from flag set to unset */
+ } else if (!(mcast_flags & BATADV_MCAST_WANT_ALL_IPV4) &&
+ orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV4) {
+ atomic_dec(&bat_priv->mcast.num_want_all_ipv4);
+
+ spin_lock_bh(&bat_priv->mcast.want_lists_lock);
+ hlist_del_rcu(&orig->mcast_want_all_ipv4_node);
+ spin_unlock_bh(&bat_priv->mcast.want_lists_lock);
+ }
+}
+
+/**
+ * batadv_mcast_want_ipv6_update - update want-all-ipv6 counter and list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig: the orig_node which multicast state might have changed of
+ * @mcast_flags: flags indicating the new multicast state
+ *
+ * If the BATADV_MCAST_WANT_ALL_IPV6 flag of this originator, orig, has
+ * toggled then this method updates counter and list accordingly.
+ */
+static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ uint8_t mcast_flags)
+{
+ /* switched from flag unset to set */
+ if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV6 &&
+ !(orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV6)) {
+ atomic_inc(&bat_priv->mcast.num_want_all_ipv6);
+
+ spin_lock_bh(&bat_priv->mcast.want_lists_lock);
+ hlist_add_head_rcu(&orig->mcast_want_all_ipv6_node,
+ &bat_priv->mcast.want_all_ipv6_list);
+ spin_unlock_bh(&bat_priv->mcast.want_lists_lock);
+ /* switched from flag set to unset */
+ } else if (!(mcast_flags & BATADV_MCAST_WANT_ALL_IPV6) &&
+ orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV6) {
+ atomic_dec(&bat_priv->mcast.num_want_all_ipv6);
+
+ spin_lock_bh(&bat_priv->mcast.want_lists_lock);
+ hlist_del_rcu(&orig->mcast_want_all_ipv6_node);
+ spin_unlock_bh(&bat_priv->mcast.want_lists_lock);
+ }
+}
+
+/**
+ * batadv_mcast_tvlv_ogm_handler_v1 - process incoming multicast tvlv container
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig: the orig_node of the ogm
+ * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags)
+ * @tvlv_value: tvlv buffer containing the multicast data
+ * @tvlv_value_len: tvlv buffer length
+ */
+static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ uint8_t flags,
+ void *tvlv_value,
+ uint16_t tvlv_value_len)
+{
+ bool orig_mcast_enabled = !(flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
+ uint8_t mcast_flags = BATADV_NO_FLAGS;
+ bool orig_initialized;
+
+ orig_initialized = orig->capa_initialized & BATADV_ORIG_CAPA_HAS_MCAST;
+
+ /* If mcast support is turned on decrease the disabled mcast node
+ * counter only if we had increased it for this node before. If this
+ * is a completely new orig_node no need to decrease the counter.
+ */
+ if (orig_mcast_enabled &&
+ !(orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST)) {
+ if (orig_initialized)
+ atomic_dec(&bat_priv->mcast.num_disabled);
+ orig->capabilities |= BATADV_ORIG_CAPA_HAS_MCAST;
+ /* If mcast support is being switched off increase the disabled
+ * mcast node counter.
+ */
+ } else if (!orig_mcast_enabled &&
+ orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST) {
+ atomic_inc(&bat_priv->mcast.num_disabled);
+ orig->capabilities &= ~BATADV_ORIG_CAPA_HAS_MCAST;
+ }
+
+ orig->capa_initialized |= BATADV_ORIG_CAPA_HAS_MCAST;
+
+ if (orig_mcast_enabled && tvlv_value &&
+ (tvlv_value_len >= sizeof(mcast_flags)))
+ mcast_flags = *(uint8_t *)tvlv_value;
+
+ batadv_mcast_want_unsnoop_update(bat_priv, orig, mcast_flags);
+ batadv_mcast_want_ipv4_update(bat_priv, orig, mcast_flags);
+ batadv_mcast_want_ipv6_update(bat_priv, orig, mcast_flags);
+
+ orig->mcast_flags = mcast_flags;
+}
+
+/**
+ * batadv_mcast_init - initialize the multicast optimizations structures
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+void batadv_mcast_init(struct batadv_priv *bat_priv)
+{
+ batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler_v1,
+ NULL, BATADV_TVLV_MCAST, 1,
+ BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
+}
+
+/**
+ * batadv_mcast_free - free the multicast optimizations structures
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+void batadv_mcast_free(struct batadv_priv *bat_priv)
+{
+ batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 1);
+ batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 1);
+
+ batadv_mcast_mla_tt_retract(bat_priv, NULL);
+}
+
+/**
+ * batadv_mcast_purge_orig - reset originator global mcast state modifications
+ * @orig: the originator which is going to get purged
+ */
+void batadv_mcast_purge_orig(struct batadv_orig_node *orig)
+{
+ struct batadv_priv *bat_priv = orig->bat_priv;
+
+ if (!(orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST))
+ atomic_dec(&bat_priv->mcast.num_disabled);
+
+ batadv_mcast_want_unsnoop_update(bat_priv, orig, BATADV_NO_FLAGS);
+ batadv_mcast_want_ipv4_update(bat_priv, orig, BATADV_NO_FLAGS);
+ batadv_mcast_want_ipv6_update(bat_priv, orig, BATADV_NO_FLAGS);
+}
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
new file mode 100644
index 00000000000..73b5d45819c
--- /dev/null
+++ b/net/batman-adv/multicast.h
@@ -0,0 +1,80 @@
+/* Copyright (C) 2014 B.A.T.M.A.N. contributors:
+ *
+ * Linus Lüssing
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_MULTICAST_H_
+#define _NET_BATMAN_ADV_MULTICAST_H_
+
+/**
+ * batadv_forw_mode - the way a packet should be forwarded as
+ * @BATADV_FORW_ALL: forward the packet to all nodes (currently via classic
+ * flooding)
+ * @BATADV_FORW_SINGLE: forward the packet to a single node (currently via the
+ * BATMAN unicast routing protocol)
+ * @BATADV_FORW_NONE: don't forward, drop it
+ */
+enum batadv_forw_mode {
+ BATADV_FORW_ALL,
+ BATADV_FORW_SINGLE,
+ BATADV_FORW_NONE,
+};
+
+#ifdef CONFIG_BATMAN_ADV_MCAST
+
+void batadv_mcast_mla_update(struct batadv_priv *bat_priv);
+
+enum batadv_forw_mode
+batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ struct batadv_orig_node **mcast_single_orig);
+
+void batadv_mcast_init(struct batadv_priv *bat_priv);
+
+void batadv_mcast_free(struct batadv_priv *bat_priv);
+
+void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
+
+#else
+
+static inline void batadv_mcast_mla_update(struct batadv_priv *bat_priv)
+{
+ return;
+}
+
+static inline enum batadv_forw_mode
+batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ struct batadv_orig_node **mcast_single_orig)
+{
+ return BATADV_FORW_ALL;
+}
+
+static inline int batadv_mcast_init(struct batadv_priv *bat_priv)
+{
+ return 0;
+}
+
+static inline void batadv_mcast_free(struct batadv_priv *bat_priv)
+{
+ return;
+}
+
+static inline void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node)
+{
+ return;
+}
+
+#endif /* CONFIG_BATMAN_ADV_MCAST */
+
+#endif /* _NET_BATMAN_ADV_MULTICAST_H_ */
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index f1b604d88dc..8d04d174669 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -86,6 +86,7 @@ static void batadv_nc_tvlv_container_update(struct batadv_priv *bat_priv)
void batadv_nc_status_update(struct net_device *net_dev)
{
struct batadv_priv *bat_priv = netdev_priv(net_dev);
+
batadv_nc_tvlv_container_update(bat_priv);
}
@@ -819,7 +820,7 @@ static struct batadv_nc_node
/* Initialize nc_node */
INIT_LIST_HEAD(&nc_node->list);
- memcpy(nc_node->addr, orig_node->orig, ETH_ALEN);
+ ether_addr_copy(nc_node->addr, orig_node->orig);
nc_node->orig_node = orig_neigh_node;
atomic_set(&nc_node->refcount, 2);
@@ -941,8 +942,8 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
spin_lock_init(&nc_path->packet_list_lock);
atomic_set(&nc_path->refcount, 2);
nc_path->last_valid = jiffies;
- memcpy(nc_path->next_hop, dst, ETH_ALEN);
- memcpy(nc_path->prev_hop, src, ETH_ALEN);
+ ether_addr_copy(nc_path->next_hop, dst);
+ ether_addr_copy(nc_path->prev_hop, src);
batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_path %pM -> %pM\n",
nc_path->prev_hop,
@@ -1114,15 +1115,15 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
coded_packet->ttl = packet1->ttl;
/* Info about first unicast packet */
- memcpy(coded_packet->first_source, first_source, ETH_ALEN);
- memcpy(coded_packet->first_orig_dest, packet1->dest, ETH_ALEN);
+ ether_addr_copy(coded_packet->first_source, first_source);
+ ether_addr_copy(coded_packet->first_orig_dest, packet1->dest);
coded_packet->first_crc = packet_id1;
coded_packet->first_ttvn = packet1->ttvn;
/* Info about second unicast packet */
- memcpy(coded_packet->second_dest, second_dest, ETH_ALEN);
- memcpy(coded_packet->second_source, second_source, ETH_ALEN);
- memcpy(coded_packet->second_orig_dest, packet2->dest, ETH_ALEN);
+ ether_addr_copy(coded_packet->second_dest, second_dest);
+ ether_addr_copy(coded_packet->second_source, second_source);
+ ether_addr_copy(coded_packet->second_orig_dest, packet2->dest);
coded_packet->second_crc = packet_id2;
coded_packet->second_ttl = packet2->ttl;
coded_packet->second_ttvn = packet2->ttvn;
@@ -1343,14 +1344,14 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv,
struct ethhdr *ethhdr;
/* Copy skb header to change the mac header */
- skb = pskb_copy(skb, GFP_ATOMIC);
+ skb = pskb_copy_for_clone(skb, GFP_ATOMIC);
if (!skb)
return;
/* Set the mac header as if we actually sent the packet uncoded */
ethhdr = eth_hdr(skb);
- memcpy(ethhdr->h_source, ethhdr->h_dest, ETH_ALEN);
- memcpy(ethhdr->h_dest, eth_dst_new, ETH_ALEN);
+ ether_addr_copy(ethhdr->h_source, ethhdr->h_dest);
+ ether_addr_copy(ethhdr->h_dest, eth_dst_new);
/* Set data pointer to MAC header to mimic packets from our tx path */
skb_push(skb, ETH_HLEN);
@@ -1636,7 +1637,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
/* Reconstruct original mac header */
ethhdr = eth_hdr(skb);
- memcpy(ethhdr, &ethhdr_tmp, sizeof(*ethhdr));
+ *ethhdr = ethhdr_tmp;
/* Select the correct unicast header information based on the location
* of our mac address in the coded_packet header
@@ -1646,7 +1647,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
* so the Ethernet address must be copied to h_dest and
* pkt_type changed from PACKET_OTHERHOST to PACKET_HOST
*/
- memcpy(ethhdr->h_dest, coded_packet_tmp.second_dest, ETH_ALEN);
+ ether_addr_copy(ethhdr->h_dest, coded_packet_tmp.second_dest);
skb->pkt_type = PACKET_HOST;
orig_dest = coded_packet_tmp.second_orig_dest;
@@ -1682,7 +1683,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
unicast_packet->packet_type = BATADV_UNICAST;
unicast_packet->version = BATADV_COMPAT_VERSION;
unicast_packet->ttl = ttl;
- memcpy(unicast_packet->dest, orig_dest, ETH_ALEN);
+ ether_addr_copy(unicast_packet->dest, orig_dest);
unicast_packet->ttvn = ttvn;
batadv_nc_packet_free(nc_packet);
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 853941629dc..6a484514cd3 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -27,6 +27,7 @@
#include "bridge_loop_avoidance.h"
#include "network-coding.h"
#include "fragmentation.h"
+#include "multicast.h"
/* hash class keys */
static struct lock_class_key batadv_orig_hash_lock_class_key;
@@ -446,7 +447,7 @@ batadv_neigh_node_new(struct batadv_hard_iface *hard_iface,
INIT_HLIST_HEAD(&neigh_node->ifinfo_list);
spin_lock_init(&neigh_node->ifinfo_lock);
- memcpy(neigh_node->addr, neigh_addr, ETH_ALEN);
+ ether_addr_copy(neigh_node->addr, neigh_addr);
neigh_node->if_incoming = hard_iface;
neigh_node->orig_node = orig_node;
@@ -500,12 +501,17 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
static void batadv_orig_ifinfo_free_rcu(struct rcu_head *rcu)
{
struct batadv_orig_ifinfo *orig_ifinfo;
+ struct batadv_neigh_node *router;
orig_ifinfo = container_of(rcu, struct batadv_orig_ifinfo, rcu);
if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT)
batadv_hardif_free_ref_now(orig_ifinfo->if_outgoing);
+ /* this is the last reference to this object */
+ router = rcu_dereference_protected(orig_ifinfo->router, true);
+ if (router)
+ batadv_neigh_node_free_ref_now(router);
kfree(orig_ifinfo);
}
@@ -557,6 +563,8 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
}
spin_unlock_bh(&orig_node->neigh_list_lock);
+ batadv_mcast_purge_orig(orig_node);
+
/* Free nc_nodes */
batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL);
@@ -664,15 +672,17 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
/* extra reference for return */
atomic_set(&orig_node->refcount, 2);
- orig_node->tt_initialised = false;
orig_node->bat_priv = bat_priv;
- memcpy(orig_node->orig, addr, ETH_ALEN);
+ ether_addr_copy(orig_node->orig, addr);
batadv_dat_init_orig_node_addr(orig_node);
atomic_set(&orig_node->last_ttvn, 0);
orig_node->tt_buff = NULL;
orig_node->tt_buff_len = 0;
reset_time = jiffies - 1 - msecs_to_jiffies(BATADV_RESET_PROTECTION_MS);
orig_node->bcast_seqno_reset = reset_time;
+#ifdef CONFIG_BATMAN_ADV_MCAST
+ orig_node->mcast_flags = BATADV_NO_FLAGS;
+#endif
/* create a vlan object for the "untagged" LAN */
vlan = batadv_orig_node_vlan_new(orig_node, BATADV_NO_FLAGS);
@@ -697,6 +707,47 @@ free_orig_node:
}
/**
+ * batadv_purge_neigh_ifinfo - purge obsolete ifinfo entries from neighbor
+ * @bat_priv: the bat priv with all the soft interface information
+ * @neigh: orig node which is to be checked
+ */
+static void
+batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv,
+ struct batadv_neigh_node *neigh)
+{
+ struct batadv_neigh_ifinfo *neigh_ifinfo;
+ struct batadv_hard_iface *if_outgoing;
+ struct hlist_node *node_tmp;
+
+ spin_lock_bh(&neigh->ifinfo_lock);
+
+ /* for all ifinfo objects for this neighinator */
+ hlist_for_each_entry_safe(neigh_ifinfo, node_tmp,
+ &neigh->ifinfo_list, list) {
+ if_outgoing = neigh_ifinfo->if_outgoing;
+
+ /* always keep the default interface */
+ if (if_outgoing == BATADV_IF_DEFAULT)
+ continue;
+
+ /* don't purge if the interface is not (going) down */
+ if ((if_outgoing->if_status != BATADV_IF_INACTIVE) &&
+ (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) &&
+ (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED))
+ continue;
+
+ batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+ "neighbor/ifinfo purge: neighbor %pM, iface: %s\n",
+ neigh->addr, if_outgoing->net_dev->name);
+
+ hlist_del_rcu(&neigh_ifinfo->list);
+ batadv_neigh_ifinfo_free_ref(neigh_ifinfo);
+ }
+
+ spin_unlock_bh(&neigh->ifinfo_lock);
+}
+
+/**
* batadv_purge_orig_ifinfo - purge obsolete ifinfo entries from originator
* @bat_priv: the bat priv with all the soft interface information
* @orig_node: orig node which is to be checked
@@ -795,6 +846,11 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
hlist_del_rcu(&neigh_node->list);
batadv_neigh_node_free_ref(neigh_node);
+ } else {
+ /* only necessary if not the whole neighbor is to be
+ * deleted, but some interface has been removed.
+ */
+ batadv_purge_neigh_ifinfo(bat_priv, neigh_node);
}
}
@@ -852,7 +908,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
{
struct batadv_neigh_node *best_neigh_node;
struct batadv_hard_iface *hard_iface;
- bool changed;
+ bool changed_ifinfo, changed_neigh;
if (batadv_has_timed_out(orig_node->last_seen,
2 * BATADV_PURGE_TIMEOUT)) {
@@ -862,10 +918,10 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
jiffies_to_msecs(orig_node->last_seen));
return true;
}
- changed = batadv_purge_orig_ifinfo(bat_priv, orig_node);
- changed = changed || batadv_purge_orig_neighbors(bat_priv, orig_node);
+ changed_ifinfo = batadv_purge_orig_ifinfo(bat_priv, orig_node);
+ changed_neigh = batadv_purge_orig_neighbors(bat_priv, orig_node);
- if (!changed)
+ if (!changed_ifinfo && !changed_neigh)
return false;
/* first for NULL ... */
@@ -1023,7 +1079,8 @@ int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset)
bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq, hard_iface);
out:
- batadv_hardif_free_ref(hard_iface);
+ if (hard_iface)
+ batadv_hardif_free_ref(hard_iface);
return 0;
}
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 0a381d1174c..34e096d2dce 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -89,6 +89,19 @@ enum batadv_icmp_packettype {
BATADV_PARAMETER_PROBLEM = 12,
};
+/**
+ * enum batadv_mcast_flags - flags for multicast capabilities and settings
+ * @BATADV_MCAST_WANT_ALL_UNSNOOPABLES: we want all packets destined for
+ * 224.0.0.0/24 or ff02::1
+ * @BATADV_MCAST_WANT_ALL_IPV4: we want all IPv4 multicast packets
+ * @BATADV_MCAST_WANT_ALL_IPV6: we want all IPv6 multicast packets
+ */
+enum batadv_mcast_flags {
+ BATADV_MCAST_WANT_ALL_UNSNOOPABLES = BIT(0),
+ BATADV_MCAST_WANT_ALL_IPV4 = BIT(1),
+ BATADV_MCAST_WANT_ALL_IPV6 = BIT(2),
+};
+
/* tt data subtypes */
#define BATADV_TT_DATA_TYPE_MASK 0x0F
@@ -106,10 +119,30 @@ enum batadv_tt_data_flags {
BATADV_TT_FULL_TABLE = BIT(4),
};
-/* BATADV_TT_CLIENT flags.
- * Flags from BIT(0) to BIT(7) are sent on the wire, while flags from BIT(8) to
- * BIT(15) are used for local computation only.
- * Flags from BIT(4) to BIT(7) are kept in sync with the rest of the network.
+/**
+ * enum batadv_tt_client_flags - TT client specific flags
+ * @BATADV_TT_CLIENT_DEL: the client has to be deleted from the table
+ * @BATADV_TT_CLIENT_ROAM: the client roamed to/from another node and the new
+ * update telling its new real location has not been received/sent yet
+ * @BATADV_TT_CLIENT_WIFI: this client is connected through a wifi interface.
+ * This information is used by the "AP Isolation" feature
+ * @BATADV_TT_CLIENT_ISOLA: this client is considered "isolated". This
+ * information is used by the Extended Isolation feature
+ * @BATADV_TT_CLIENT_NOPURGE: this client should never be removed from the table
+ * @BATADV_TT_CLIENT_NEW: this client has been added to the local table but has
+ * not been announced yet
+ * @BATADV_TT_CLIENT_PENDING: this client is marked for removal but it is kept
+ * in the table for one more originator interval for consistency purposes
+ * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be part of
+ * the network but no nnode has already announced it
+ *
+ * Bits from 0 to 7 are called _remote flags_ because they are sent on the wire.
+ * Bits from 8 to 15 are called _local flags_ because they are used for local
+ * computations only.
+ *
+ * Bits from 4 to 7 - a subset of remote flags - are ensured to be in sync with
+ * the other nodes in the network. To achieve this goal these flags are included
+ * in the TT CRC computation.
*/
enum batadv_tt_client_flags {
BATADV_TT_CLIENT_DEL = BIT(0),
@@ -145,6 +178,7 @@ enum batadv_bla_claimframe {
* @BATADV_TVLV_NC: network coding tvlv
* @BATADV_TVLV_TT: translation table tvlv
* @BATADV_TVLV_ROAM: roaming advertisement tvlv
+ * @BATADV_TVLV_MCAST: multicast capability tvlv
*/
enum batadv_tvlv_type {
BATADV_TVLV_GW = 0x01,
@@ -152,6 +186,7 @@ enum batadv_tvlv_type {
BATADV_TVLV_NC = 0x03,
BATADV_TVLV_TT = 0x04,
BATADV_TVLV_ROAM = 0x05,
+ BATADV_TVLV_MCAST = 0x06,
};
#pragma pack(2)
@@ -504,4 +539,14 @@ struct batadv_tvlv_roam_adv {
__be16 vid;
};
+/**
+ * struct batadv_tvlv_mcast_data - payload of a multicast tvlv
+ * @flags: multicast flags announced by the orig node
+ * @reserved: reserved field
+ */
+struct batadv_tvlv_mcast_data {
+ uint8_t flags;
+ uint8_t reserved[3];
+};
+
#endif /* _NET_BATMAN_ADV_PACKET_H_ */
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index a953d5b196a..35141534938 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -222,8 +222,8 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
icmph = (struct batadv_icmp_header *)skb->data;
- memcpy(icmph->dst, icmph->orig, ETH_ALEN);
- memcpy(icmph->orig, primary_if->net_dev->dev_addr, ETH_ALEN);
+ ether_addr_copy(icmph->dst, icmph->orig);
+ ether_addr_copy(icmph->orig, primary_if->net_dev->dev_addr);
icmph->msg_type = BATADV_ECHO_REPLY;
icmph->ttl = BATADV_TTL;
@@ -276,9 +276,8 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
icmp_packet = (struct batadv_icmp_packet *)skb->data;
- memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN);
- memcpy(icmp_packet->orig, primary_if->net_dev->dev_addr,
- ETH_ALEN);
+ ether_addr_copy(icmp_packet->dst, icmp_packet->orig);
+ ether_addr_copy(icmp_packet->orig, primary_if->net_dev->dev_addr);
icmp_packet->msg_type = BATADV_TTL_EXCEEDED;
icmp_packet->ttl = BATADV_TTL;
@@ -341,8 +340,8 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
if (icmp_packet_rr->rr_cur >= BATADV_RR_LEN)
goto out;
- memcpy(&(icmp_packet_rr->rr[icmp_packet_rr->rr_cur]),
- ethhdr->h_dest, ETH_ALEN);
+ ether_addr_copy(icmp_packet_rr->rr[icmp_packet_rr->rr_cur],
+ ethhdr->h_dest);
icmp_packet_rr->rr_cur++;
}
@@ -664,7 +663,7 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
}
/* update the packet header */
- memcpy(unicast_packet->dest, orig_addr, ETH_ALEN);
+ ether_addr_copy(unicast_packet->dest, orig_addr);
unicast_packet->ttvn = orig_ttvn;
ret = true;
@@ -774,7 +773,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
if (!primary_if)
return 0;
- memcpy(unicast_packet->dest, primary_if->net_dev->dev_addr, ETH_ALEN);
+ ether_addr_copy(unicast_packet->dest, primary_if->net_dev->dev_addr);
batadv_hardif_free_ref(primary_if);
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 843febd1e51..3d64ed20c39 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -27,6 +27,7 @@
#include "originator.h"
#include "network-coding.h"
#include "fragmentation.h"
+#include "multicast.h"
static void batadv_send_outstanding_bcast_packet(struct work_struct *work);
@@ -59,8 +60,8 @@ int batadv_send_skb_packet(struct sk_buff *skb,
skb_reset_mac_header(skb);
ethhdr = eth_hdr(skb);
- memcpy(ethhdr->h_source, hard_iface->net_dev->dev_addr, ETH_ALEN);
- memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN);
+ ether_addr_copy(ethhdr->h_source, hard_iface->net_dev->dev_addr);
+ ether_addr_copy(ethhdr->h_dest, dst_addr);
ethhdr->h_proto = htons(ETH_P_BATMAN);
skb_set_network_header(skb, ETH_HLEN);
@@ -165,7 +166,7 @@ batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size,
/* set unicast ttl */
unicast_packet->ttl = BATADV_TTL;
/* copy the destination for faster routing */
- memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN);
+ ether_addr_copy(unicast_packet->dest, orig_node->orig);
/* set the destination tt version number */
unicast_packet->ttvn = ttvn;
@@ -220,7 +221,7 @@ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv,
uc_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
uc_4addr_packet->u.packet_type = BATADV_UNICAST_4ADDR;
- memcpy(uc_4addr_packet->src, primary_if->net_dev->dev_addr, ETH_ALEN);
+ ether_addr_copy(uc_4addr_packet->src, primary_if->net_dev->dev_addr);
uc_4addr_packet->subtype = packet_subtype;
uc_4addr_packet->reserved = 0;
@@ -248,15 +249,15 @@ out:
*
* Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
*/
-static int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
- struct sk_buff *skb, int packet_type,
- int packet_subtype,
- struct batadv_orig_node *orig_node,
- unsigned short vid)
+int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, int packet_type,
+ int packet_subtype,
+ struct batadv_orig_node *orig_node,
+ unsigned short vid)
{
struct ethhdr *ethhdr;
struct batadv_unicast_packet *unicast_packet;
- int ret = NET_XMIT_DROP, hdr_size;
+ int ret = NET_XMIT_DROP;
if (!orig_node)
goto out;
@@ -265,16 +266,12 @@ static int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
case BATADV_UNICAST:
if (!batadv_send_skb_prepare_unicast(skb, orig_node))
goto out;
-
- hdr_size = sizeof(*unicast_packet);
break;
case BATADV_UNICAST_4ADDR:
if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, skb,
orig_node,
packet_subtype))
goto out;
-
- hdr_size = sizeof(struct batadv_unicast_4addr_packet);
break;
default:
/* this function supports UNICAST and UNICAST_4ADDR only. It
@@ -283,7 +280,10 @@ static int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
goto out;
}
- ethhdr = (struct ethhdr *)(skb->data + hdr_size);
+ /* skb->data might have been reallocated by
+ * batadv_send_skb_prepare_unicast{,_4addr}()
+ */
+ ethhdr = eth_hdr(skb);
unicast_packet = (struct batadv_unicast_packet *)skb->data;
/* inform the destination node that we are still missing a correct route
@@ -312,6 +312,7 @@ out:
* @packet_type: the batman unicast packet type to use
* @packet_subtype: the unicast 4addr packet subtype (only relevant for unicast
* 4addr packets)
+ * @dst_hint: can be used to override the destination contained in the skb
* @vid: the vid to be used to search the translation table
*
* Look up the recipient node for the destination address in the ethernet
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index aaddaa9661c..38d0ec1833a 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -36,6 +36,11 @@ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv,
struct sk_buff *skb,
struct batadv_orig_node *orig_node,
int packet_subtype);
+int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, int packet_type,
+ int packet_subtype,
+ struct batadv_orig_node *orig_node,
+ unsigned short vid);
int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
struct sk_buff *skb, int packet_type,
int packet_subtype, uint8_t *dst_hint,
@@ -47,6 +52,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
* batadv_send_skb_via_tt - send an skb via TT lookup
* @bat_priv: the bat priv with all the soft interface information
* @skb: the payload to send
+ * @dst_hint: can be used to override the destination contained in the skb
* @vid: the vid to be used to search the translation table
*
* Look up the recipient node for the destination address in the ethernet
@@ -68,6 +74,7 @@ static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv,
* @bat_priv: the bat priv with all the soft interface information
* @skb: the payload to send
* @packet_subtype: the unicast 4addr packet subtype to use
+ * @dst_hint: can be used to override the destination contained in the skb
* @vid: the vid to be used to search the translation table
*
* Look up the recipient node for the destination address in the ethernet
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index f82c267e188..cbd677f48c0 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -32,6 +32,7 @@
#include <linux/ethtool.h>
#include <linux/etherdevice.h>
#include <linux/if_vlan.h>
+#include "multicast.h"
#include "bridge_loop_avoidance.h"
#include "network-coding.h"
@@ -111,8 +112,8 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
- memcpy(old_addr, dev->dev_addr, ETH_ALEN);
- memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
+ ether_addr_copy(old_addr, dev->dev_addr);
+ ether_addr_copy(dev->dev_addr, addr->sa_data);
/* only modify transtable if it has been initialized before */
if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE) {
@@ -170,17 +171,19 @@ static int batadv_interface_tx(struct sk_buff *skb,
unsigned short vid;
uint32_t seqno;
int gw_mode;
+ enum batadv_forw_mode forw_mode;
+ struct batadv_orig_node *mcast_single_orig = NULL;
if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
goto dropped;
soft_iface->trans_start = jiffies;
vid = batadv_get_vid(skb, 0);
- ethhdr = (struct ethhdr *)skb->data;
+ ethhdr = eth_hdr(skb);
switch (ntohs(ethhdr->h_proto)) {
case ETH_P_8021Q:
- vhdr = (struct vlan_ethhdr *)skb->data;
+ vhdr = vlan_eth_hdr(skb);
if (vhdr->h_vlan_encapsulated_proto != ethertype)
break;
@@ -194,7 +197,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
goto dropped;
/* skb->data might have been reallocated by batadv_bla_tx() */
- ethhdr = (struct ethhdr *)skb->data;
+ ethhdr = eth_hdr(skb);
/* Register the client MAC in the transtable */
if (!is_multicast_ether_addr(ethhdr->h_source)) {
@@ -230,7 +233,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
/* skb->data may have been modified by
* batadv_gw_dhcp_recipient_get()
*/
- ethhdr = (struct ethhdr *)skb->data;
+ ethhdr = eth_hdr(skb);
/* if gw_mode is on, broadcast any non-DHCP message.
* All the DHCP packets are going to be sent as unicast
*/
@@ -247,9 +250,19 @@ static int batadv_interface_tx(struct sk_buff *skb,
* directed to a DHCP server
*/
goto dropped;
- }
send:
+ if (do_bcast && !is_broadcast_ether_addr(ethhdr->h_dest)) {
+ forw_mode = batadv_mcast_forw_mode(bat_priv, skb,
+ &mcast_single_orig);
+ if (forw_mode == BATADV_FORW_NONE)
+ goto dropped;
+
+ if (forw_mode == BATADV_FORW_SINGLE)
+ do_bcast = false;
+ }
+ }
+
batadv_skb_set_priority(skb, 0);
/* ethernet packet should be broadcasted */
@@ -279,8 +292,8 @@ send:
/* hw address of first interface is the orig mac because only
* this mac is known throughout the mesh
*/
- memcpy(bcast_packet->orig,
- primary_if->net_dev->dev_addr, ETH_ALEN);
+ ether_addr_copy(bcast_packet->orig,
+ primary_if->net_dev->dev_addr);
/* set broadcast sequence number */
seqno = atomic_inc_return(&bat_priv->bcast_seqno);
@@ -301,6 +314,10 @@ send:
if (ret)
goto dropped;
ret = batadv_send_skb_via_gw(bat_priv, skb, vid);
+ } else if (mcast_single_orig) {
+ ret = batadv_send_skb_unicast(bat_priv, skb,
+ BATADV_UNICAST, 0,
+ mcast_single_orig, vid);
} else {
if (batadv_dat_snoop_outgoing_arp_request(bat_priv,
skb))
@@ -431,10 +448,15 @@ out:
* possibly free it
* @softif_vlan: the vlan object to release
*/
-void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *softif_vlan)
+void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *vlan)
{
- if (atomic_dec_and_test(&softif_vlan->refcount))
- kfree_rcu(softif_vlan, rcu);
+ if (atomic_dec_and_test(&vlan->refcount)) {
+ spin_lock_bh(&vlan->bat_priv->softif_vlan_list_lock);
+ hlist_del_rcu(&vlan->list);
+ spin_unlock_bh(&vlan->bat_priv->softif_vlan_list_lock);
+
+ kfree_rcu(vlan, rcu);
+ }
}
/**
@@ -488,6 +510,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
if (!vlan)
return -ENOMEM;
+ vlan->bat_priv = bat_priv;
vlan->vid = vid;
atomic_set(&vlan->refcount, 1);
@@ -499,6 +522,10 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
return err;
}
+ spin_lock_bh(&bat_priv->softif_vlan_list_lock);
+ hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list);
+ spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
+
/* add a new TT local entry. This one will be marked with the NOPURGE
* flag
*/
@@ -506,10 +533,6 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
bat_priv->soft_iface->dev_addr, vid,
BATADV_NULL_IFINDEX, BATADV_NO_MARK);
- spin_lock_bh(&bat_priv->softif_vlan_list_lock);
- hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list);
- spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
-
return 0;
}
@@ -521,18 +544,13 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
static void batadv_softif_destroy_vlan(struct batadv_priv *bat_priv,
struct batadv_softif_vlan *vlan)
{
- spin_lock_bh(&bat_priv->softif_vlan_list_lock);
- hlist_del_rcu(&vlan->list);
- spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
-
- batadv_sysfs_del_vlan(bat_priv, vlan);
-
/* explicitly remove the associated TT local entry because it is marked
* with the NOPURGE flag
*/
batadv_tt_local_remove(bat_priv, bat_priv->soft_iface->dev_addr,
vlan->vid, "vlan interface destroyed", false);
+ batadv_sysfs_del_vlan(bat_priv, vlan);
batadv_softif_vlan_free_ref(vlan);
}
@@ -550,6 +568,8 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
unsigned short vid)
{
struct batadv_priv *bat_priv = netdev_priv(dev);
+ struct batadv_softif_vlan *vlan;
+ int ret;
/* only 802.1Q vlans are supported.
* batman-adv does not know how to handle other types
@@ -559,7 +579,36 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
vid |= BATADV_VLAN_HAS_TAG;
- return batadv_softif_create_vlan(bat_priv, vid);
+ /* if a new vlan is getting created and it already exists, it means that
+ * it was not deleted yet. batadv_softif_vlan_get() increases the
+ * refcount in order to revive the object.
+ *
+ * if it does not exist then create it.
+ */
+ vlan = batadv_softif_vlan_get(bat_priv, vid);
+ if (!vlan)
+ return batadv_softif_create_vlan(bat_priv, vid);
+
+ /* recreate the sysfs object if it was already destroyed (and it should
+ * be since we received a kill_vid() for this vlan
+ */
+ if (!vlan->kobj) {
+ ret = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan);
+ if (ret) {
+ batadv_softif_vlan_free_ref(vlan);
+ return ret;
+ }
+ }
+
+ /* add a new TT local entry. This one will be marked with the NOPURGE
+ * flag. This must be added again, even if the vlan object already
+ * exists, because the entry was deleted by kill_vid()
+ */
+ batadv_tt_local_add(bat_priv->soft_iface,
+ bat_priv->soft_iface->dev_addr, vid,
+ BATADV_NULL_IFINDEX, BATADV_NO_MARK);
+
+ return 0;
}
/**
@@ -652,10 +701,7 @@ static void batadv_softif_destroy_finish(struct work_struct *work)
}
batadv_sysfs_del_meshif(soft_iface);
-
- rtnl_lock();
- unregister_netdevice(soft_iface);
- rtnl_unlock();
+ unregister_netdev(soft_iface);
}
/**
@@ -692,6 +738,14 @@ static int batadv_softif_init_late(struct net_device *dev)
#ifdef CONFIG_BATMAN_ADV_DAT
atomic_set(&bat_priv->distributed_arp_table, 1);
#endif
+#ifdef CONFIG_BATMAN_ADV_MCAST
+ bat_priv->mcast.flags = BATADV_NO_FLAGS;
+ atomic_set(&bat_priv->multicast_mode, 1);
+ atomic_set(&bat_priv->mcast.num_disabled, 0);
+ atomic_set(&bat_priv->mcast.num_want_all_unsnoopables, 0);
+ atomic_set(&bat_priv->mcast.num_want_all_ipv4, 0);
+ atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0);
+#endif
atomic_set(&bat_priv->gw_mode, BATADV_GW_MODE_OFF);
atomic_set(&bat_priv->gw_sel_class, 20);
atomic_set(&bat_priv->gw.bandwidth_down, 100);
@@ -862,7 +916,7 @@ static void batadv_softif_init_early(struct net_device *dev)
/* generate random address */
eth_hw_addr_random(dev);
- SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops);
+ dev->ethtool_ops = &batadv_ethtool_ops;
memset(priv, 0, sizeof(*priv));
}
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index e456bf6bb28..fc47baa888c 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -29,12 +29,14 @@
static struct net_device *batadv_kobj_to_netdev(struct kobject *obj)
{
struct device *dev = container_of(obj->parent, struct device, kobj);
+
return to_net_dev(dev);
}
static struct batadv_priv *batadv_kobj_to_batpriv(struct kobject *obj)
{
struct net_device *net_dev = batadv_kobj_to_netdev(obj);
+
return netdev_priv(net_dev);
}
@@ -106,7 +108,7 @@ struct batadv_attribute batadv_attr_vlan_##_name = { \
.mode = _mode }, \
.show = _show, \
.store = _store, \
-};
+}
/* Use this, if you have customized show and store functions */
#define BATADV_ATTR(_name, _mode, _show, _store) \
@@ -115,7 +117,7 @@ struct batadv_attribute batadv_attr_##_name = { \
.mode = _mode }, \
.show = _show, \
.store = _store, \
-};
+}
#define BATADV_ATTR_SIF_STORE_BOOL(_name, _post_func) \
ssize_t batadv_store_##_name(struct kobject *kobj, \
@@ -124,6 +126,7 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \
{ \
struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \
struct batadv_priv *bat_priv = netdev_priv(net_dev); \
+ \
return __batadv_store_bool_attr(buff, count, _post_func, attr, \
&bat_priv->_name, net_dev); \
}
@@ -133,6 +136,7 @@ ssize_t batadv_show_##_name(struct kobject *kobj, \
struct attribute *attr, char *buff) \
{ \
struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); \
+ \
return sprintf(buff, "%s\n", \
atomic_read(&bat_priv->_name) == 0 ? \
"disabled" : "enabled"); \
@@ -155,6 +159,7 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \
{ \
struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \
struct batadv_priv *bat_priv = netdev_priv(net_dev); \
+ \
return __batadv_store_uint_attr(buff, count, _min, _max, \
_post_func, attr, \
&bat_priv->_name, net_dev); \
@@ -165,6 +170,7 @@ ssize_t batadv_show_##_name(struct kobject *kobj, \
struct attribute *attr, char *buff) \
{ \
struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); \
+ \
return sprintf(buff, "%i\n", atomic_read(&bat_priv->_name)); \
} \
@@ -188,6 +194,7 @@ ssize_t batadv_store_vlan_##_name(struct kobject *kobj, \
size_t res = __batadv_store_bool_attr(buff, count, _post_func, \
attr, &vlan->_name, \
bat_priv->soft_iface); \
+ \
batadv_softif_vlan_free_ref(vlan); \
return res; \
}
@@ -202,6 +209,7 @@ ssize_t batadv_show_vlan_##_name(struct kobject *kobj, \
size_t res = sprintf(buff, "%s\n", \
atomic_read(&vlan->_name) == 0 ? \
"disabled" : "enabled"); \
+ \
batadv_softif_vlan_free_ref(vlan); \
return res; \
}
@@ -324,12 +332,14 @@ static ssize_t batadv_show_bat_algo(struct kobject *kobj,
struct attribute *attr, char *buff)
{
struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
+
return sprintf(buff, "%s\n", bat_priv->bat_algo_ops->name);
}
static void batadv_post_gw_reselect(struct net_device *net_dev)
{
struct batadv_priv *bat_priv = netdev_priv(net_dev);
+
batadv_gw_reselect(bat_priv);
}
@@ -539,6 +549,9 @@ BATADV_ATTR_SIF_UINT(gw_sel_class, S_IRUGO | S_IWUSR, 1, BATADV_TQ_MAX_VALUE,
batadv_post_gw_reselect);
static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth,
batadv_store_gw_bwidth);
+#ifdef CONFIG_BATMAN_ADV_MCAST
+BATADV_ATTR_SIF_BOOL(multicast_mode, S_IRUGO | S_IWUSR, NULL);
+#endif
#ifdef CONFIG_BATMAN_ADV_DEBUG
BATADV_ATTR_SIF_UINT(log_level, S_IRUGO | S_IWUSR, 0, BATADV_DBG_ALL, NULL);
#endif
@@ -558,6 +571,9 @@ static struct batadv_attribute *batadv_mesh_attrs[] = {
#ifdef CONFIG_BATMAN_ADV_DAT
&batadv_attr_distributed_arp_table,
#endif
+#ifdef CONFIG_BATMAN_ADV_MCAST
+ &batadv_attr_multicast_mode,
+#endif
&batadv_attr_fragmentation,
&batadv_attr_routing_algo,
&batadv_attr_gw_mode,
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 959dde721c4..5f59e7f899a 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -24,6 +24,7 @@
#include "originator.h"
#include "routing.h"
#include "bridge_loop_avoidance.h"
+#include "multicast.h"
#include <linux/crc32c.h>
@@ -96,7 +97,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const uint8_t *addr,
if (!hash)
return NULL;
- memcpy(to_search.addr, addr, ETH_ALEN);
+ ether_addr_copy(to_search.addr, addr);
to_search.vid = vid;
index = batadv_choose_tt(&to_search, hash->size);
@@ -192,6 +193,31 @@ batadv_tt_global_entry_free_ref(struct batadv_tt_global_entry *tt_global_entry)
}
}
+/**
+ * batadv_tt_global_hash_count - count the number of orig entries
+ * @hash: hash table containing the tt entries
+ * @addr: the mac address of the client to count entries for
+ * @vid: VLAN identifier
+ *
+ * Return the number of originators advertising the given address/data
+ * (excluding ourself).
+ */
+int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
+ const uint8_t *addr, unsigned short vid)
+{
+ struct batadv_tt_global_entry *tt_global_entry;
+ int count;
+
+ tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr, vid);
+ if (!tt_global_entry)
+ return 0;
+
+ count = atomic_read(&tt_global_entry->orig_list_count);
+ batadv_tt_global_entry_free_ref(tt_global_entry);
+
+ return count;
+}
+
static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
{
struct batadv_tt_orig_list_entry *orig_entry;
@@ -333,7 +359,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv,
tt_change_node->change.flags = flags;
memset(tt_change_node->change.reserved, 0,
sizeof(tt_change_node->change.reserved));
- memcpy(tt_change_node->change.addr, common->addr, ETH_ALEN);
+ ether_addr_copy(tt_change_node->change.addr, common->addr);
tt_change_node->change.vid = htons(common->vid);
del_op_requested = flags & BATADV_TT_CLIENT_DEL;
@@ -484,7 +510,8 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
{
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
struct batadv_tt_local_entry *tt_local;
- struct batadv_tt_global_entry *tt_global;
+ struct batadv_tt_global_entry *tt_global = NULL;
+ struct batadv_softif_vlan *vlan;
struct net_device *in_dev = NULL;
struct hlist_head *head;
struct batadv_tt_orig_list_entry *orig_entry;
@@ -497,7 +524,9 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
in_dev = dev_get_by_index(&init_net, ifindex);
tt_local = batadv_tt_local_hash_find(bat_priv, addr, vid);
- tt_global = batadv_tt_global_hash_find(bat_priv, addr, vid);
+
+ if (!is_multicast_ether_addr(addr))
+ tt_global = batadv_tt_global_hash_find(bat_priv, addr, vid);
if (tt_local) {
tt_local->last_seen = jiffies;
@@ -544,12 +573,15 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
if (!tt_local)
goto out;
+ /* increase the refcounter of the related vlan */
+ vlan = batadv_softif_vlan_get(bat_priv, vid);
+
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n",
addr, BATADV_PRINT_VID(vid),
(uint8_t)atomic_read(&bat_priv->tt.vn));
- memcpy(tt_local->common.addr, addr, ETH_ALEN);
+ ether_addr_copy(tt_local->common.addr, addr);
/* The local entry has to be marked as NEW to avoid to send it in
* a full table response going out before the next ttvn increment
* (consistency check)
@@ -562,8 +594,11 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
tt_local->last_seen = jiffies;
tt_local->common.added_at = tt_local->last_seen;
- /* the batman interface mac address should never be purged */
- if (batadv_compare_eth(addr, soft_iface->dev_addr))
+ /* the batman interface mac and multicast addresses should never be
+ * purged
+ */
+ if (batadv_compare_eth(addr, soft_iface->dev_addr) ||
+ is_multicast_ether_addr(addr))
tt_local->common.flags |= BATADV_TT_CLIENT_NOPURGE;
hash_added = batadv_hash_add(bat_priv->tt.local_hash, batadv_compare_tt,
@@ -573,6 +608,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
if (unlikely(hash_added != 0)) {
/* remove the reference for the hash */
batadv_tt_local_entry_free_ref(tt_local);
+ batadv_softif_vlan_free_ref(vlan);
goto out;
}
@@ -978,6 +1014,7 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
{
struct batadv_tt_local_entry *tt_local_entry;
uint16_t flags, curr_flags = BATADV_NO_FLAGS;
+ struct batadv_softif_vlan *vlan;
tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid);
if (!tt_local_entry)
@@ -1008,6 +1045,11 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
hlist_del_rcu(&tt_local_entry->common.hash_entry);
batadv_tt_local_entry_free_ref(tt_local_entry);
+ /* decrease the reference held for this vlan */
+ vlan = batadv_softif_vlan_get(bat_priv, vid);
+ batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_free_ref(vlan);
+
out:
if (tt_local_entry)
batadv_tt_local_entry_free_ref(tt_local_entry);
@@ -1080,6 +1122,7 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
spinlock_t *list_lock; /* protects write access to the hash lists */
struct batadv_tt_common_entry *tt_common_entry;
struct batadv_tt_local_entry *tt_local;
+ struct batadv_softif_vlan *vlan;
struct hlist_node *node_tmp;
struct hlist_head *head;
uint32_t i;
@@ -1100,6 +1143,13 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
tt_local = container_of(tt_common_entry,
struct batadv_tt_local_entry,
common);
+
+ /* decrease the reference held for this vlan */
+ vlan = batadv_softif_vlan_get(bat_priv,
+ tt_common_entry->vid);
+ batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_free_ref(vlan);
+
batadv_tt_local_entry_free_ref(tt_local);
}
spin_unlock_bh(list_lock);
@@ -1219,6 +1269,8 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
hlist_add_head_rcu(&orig_entry->list,
&tt_global->orig_list);
spin_unlock_bh(&tt_global->list_lock);
+ atomic_inc(&tt_global->orig_list_count);
+
out:
if (orig_entry)
batadv_tt_orig_list_entry_free_ref(orig_entry);
@@ -1277,7 +1329,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
goto out;
common = &tt_global_entry->common;
- memcpy(common->addr, tt_addr, ETH_ALEN);
+ ether_addr_copy(common->addr, tt_addr);
common->vid = vid;
common->flags = flags;
@@ -1292,6 +1344,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
common->added_at = jiffies;
INIT_HLIST_HEAD(&tt_global_entry->orig_list);
+ atomic_set(&tt_global_entry->orig_list_count, 0);
spin_lock_init(&tt_global_entry->list_lock);
hash_added = batadv_hash_add(bat_priv->tt.global_hash,
@@ -1361,6 +1414,11 @@ add_orig_entry:
ret = true;
out_remove:
+ /* Do not remove multicast addresses from the local hash on
+ * global additions
+ */
+ if (is_multicast_ether_addr(tt_addr))
+ goto out;
/* remove address from local hash if present */
local_flags = batadv_tt_local_remove(bat_priv, tt_addr, vid,
@@ -1552,6 +1610,25 @@ out:
return 0;
}
+/**
+ * batadv_tt_global_del_orig_entry - remove and free an orig_entry
+ * @tt_global_entry: the global entry to remove the orig_entry from
+ * @orig_entry: the orig entry to remove and free
+ *
+ * Remove an orig_entry from its list in the given tt_global_entry and
+ * free this orig_entry afterwards.
+ */
+static void
+batadv_tt_global_del_orig_entry(struct batadv_tt_global_entry *tt_global_entry,
+ struct batadv_tt_orig_list_entry *orig_entry)
+{
+ batadv_tt_global_size_dec(orig_entry->orig_node,
+ tt_global_entry->common.vid);
+ atomic_dec(&tt_global_entry->orig_list_count);
+ hlist_del_rcu(&orig_entry->list);
+ batadv_tt_orig_list_entry_free_ref(orig_entry);
+}
+
/* deletes the orig list of a tt_global_entry */
static void
batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry)
@@ -1562,20 +1639,26 @@ batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry)
spin_lock_bh(&tt_global_entry->list_lock);
head = &tt_global_entry->orig_list;
- hlist_for_each_entry_safe(orig_entry, safe, head, list) {
- hlist_del_rcu(&orig_entry->list);
- batadv_tt_global_size_dec(orig_entry->orig_node,
- tt_global_entry->common.vid);
- batadv_tt_orig_list_entry_free_ref(orig_entry);
- }
+ hlist_for_each_entry_safe(orig_entry, safe, head, list)
+ batadv_tt_global_del_orig_entry(tt_global_entry, orig_entry);
spin_unlock_bh(&tt_global_entry->list_lock);
}
+/**
+ * batadv_tt_global_del_orig_node - remove orig_node from a global tt entry
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tt_global_entry: the global entry to remove the orig_node from
+ * @orig_node: the originator announcing the client
+ * @message: message to append to the log on deletion
+ *
+ * Remove the given orig_node and its according orig_entry from the given
+ * global tt entry.
+ */
static void
-batadv_tt_global_del_orig_entry(struct batadv_priv *bat_priv,
- struct batadv_tt_global_entry *tt_global_entry,
- struct batadv_orig_node *orig_node,
- const char *message)
+batadv_tt_global_del_orig_node(struct batadv_priv *bat_priv,
+ struct batadv_tt_global_entry *tt_global_entry,
+ struct batadv_orig_node *orig_node,
+ const char *message)
{
struct hlist_head *head;
struct hlist_node *safe;
@@ -1592,10 +1675,8 @@ batadv_tt_global_del_orig_entry(struct batadv_priv *bat_priv,
orig_node->orig,
tt_global_entry->common.addr,
BATADV_PRINT_VID(vid), message);
- hlist_del_rcu(&orig_entry->list);
- batadv_tt_global_size_dec(orig_node,
- tt_global_entry->common.vid);
- batadv_tt_orig_list_entry_free_ref(orig_entry);
+ batadv_tt_global_del_orig_entry(tt_global_entry,
+ orig_entry);
}
}
spin_unlock_bh(&tt_global_entry->list_lock);
@@ -1637,8 +1718,8 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv,
/* there is another entry, we can simply delete this
* one and can still use the other one.
*/
- batadv_tt_global_del_orig_entry(bat_priv, tt_global_entry,
- orig_node, message);
+ batadv_tt_global_del_orig_node(bat_priv, tt_global_entry,
+ orig_node, message);
}
/**
@@ -1664,8 +1745,8 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
goto out;
if (!roaming) {
- batadv_tt_global_del_orig_entry(bat_priv, tt_global_entry,
- orig_node, message);
+ batadv_tt_global_del_orig_node(bat_priv, tt_global_entry,
+ orig_node, message);
if (hlist_empty(&tt_global_entry->orig_list))
batadv_tt_global_free(bat_priv, tt_global_entry,
@@ -1748,8 +1829,8 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
struct batadv_tt_global_entry,
common);
- batadv_tt_global_del_orig_entry(bat_priv, tt_global,
- orig_node, message);
+ batadv_tt_global_del_orig_node(bat_priv, tt_global,
+ orig_node, message);
if (hlist_empty(&tt_global->orig_list)) {
vid = tt_global->common.vid;
@@ -1763,7 +1844,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
}
spin_unlock_bh(list_lock);
}
- orig_node->tt_initialised = false;
+ orig_node->capa_initialized &= ~BATADV_ORIG_CAPA_HAS_TT;
}
static bool batadv_tt_global_to_purge(struct batadv_tt_global_entry *tt_global,
@@ -2160,7 +2241,7 @@ batadv_new_tt_req_node(struct batadv_priv *bat_priv,
if (!tt_req_node)
goto unlock;
- memcpy(tt_req_node->addr, orig_node->orig, ETH_ALEN);
+ ether_addr_copy(tt_req_node->addr, orig_node->orig);
tt_req_node->issued_at = jiffies;
list_add(&tt_req_node->list, &bat_priv->tt.req_list);
@@ -2240,8 +2321,7 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
if ((valid_cb) && (!valid_cb(tt_common_entry, cb_data)))
continue;
- memcpy(tt_change->addr, tt_common_entry->addr,
- ETH_ALEN);
+ ether_addr_copy(tt_change->addr, tt_common_entry->addr);
tt_change->flags = tt_common_entry->flags;
tt_change->vid = htons(tt_common_entry->vid);
memset(tt_change->reserved, 0,
@@ -2724,7 +2804,7 @@ static void _batadv_tt_update_changes(struct batadv_priv *bat_priv,
return;
}
}
- orig_node->tt_initialised = true;
+ orig_node->capa_initialized |= BATADV_ORIG_CAPA_HAS_TT;
}
static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv,
@@ -2932,7 +3012,7 @@ static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv,
tt_roam_node->first_time = jiffies;
atomic_set(&tt_roam_node->counter,
BATADV_ROAMING_MAX_COUNT - 1);
- memcpy(tt_roam_node->addr, client, ETH_ALEN);
+ ether_addr_copy(tt_roam_node->addr, client);
list_add(&tt_roam_node->list, &bat_priv->tt.roam_list);
ret = true;
@@ -3078,6 +3158,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
struct batadv_hashtable *hash = bat_priv->tt.local_hash;
struct batadv_tt_common_entry *tt_common;
struct batadv_tt_local_entry *tt_local;
+ struct batadv_softif_vlan *vlan;
struct hlist_node *node_tmp;
struct hlist_head *head;
spinlock_t *list_lock; /* protects write access to the hash lists */
@@ -3106,6 +3187,12 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
tt_local = container_of(tt_common,
struct batadv_tt_local_entry,
common);
+
+ /* decrease the reference held for this vlan */
+ vlan = batadv_softif_vlan_get(bat_priv, tt_common->vid);
+ batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_free_ref(vlan);
+
batadv_tt_local_entry_free_ref(tt_local);
}
spin_unlock_bh(list_lock);
@@ -3121,6 +3208,9 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
*/
static void batadv_tt_local_commit_changes_nolock(struct batadv_priv *bat_priv)
{
+ /* Update multicast addresses in local translation table */
+ batadv_mcast_mla_update(bat_priv);
+
if (atomic_read(&bat_priv->tt.local_changes) < 1) {
if (!batadv_atomic_dec_not_zero(&bat_priv->tt.ogm_append_cnt))
batadv_tt_tvlv_container_update(bat_priv);
@@ -3211,13 +3301,15 @@ static void batadv_tt_update_orig(struct batadv_priv *bat_priv,
uint8_t orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
struct batadv_tvlv_tt_vlan_data *tt_vlan;
bool full_table = true;
+ bool has_tt_init;
tt_vlan = (struct batadv_tvlv_tt_vlan_data *)tt_buff;
+ has_tt_init = orig_node->capa_initialized & BATADV_ORIG_CAPA_HAS_TT;
+
/* orig table not initialised AND first diff is in the OGM OR the ttvn
* increased by one -> we can apply the attached changes
*/
- if ((!orig_node->tt_initialised && ttvn == 1) ||
- ttvn - orig_ttvn == 1) {
+ if ((!has_tt_init && ttvn == 1) || ttvn - orig_ttvn == 1) {
/* the OGM could not contain the changes due to their size or
* because they have already been sent BATADV_TT_OGM_APPEND_MAX
* times.
@@ -3257,7 +3349,7 @@ static void batadv_tt_update_orig(struct batadv_priv *bat_priv,
/* if we missed more than one change or our tables are not
* in sync anymore -> request fresh tt data
*/
- if (!orig_node->tt_initialised || ttvn != orig_ttvn ||
+ if (!has_tt_init || ttvn != orig_ttvn ||
!batadv_tt_global_check_crc(orig_node, tt_vlan,
tt_num_vlan)) {
request_table:
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 20a1d7861de..ad84d7b89e3 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -29,6 +29,8 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset);
void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
int32_t match_vid, const char *message);
+int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
+ const uint8_t *addr, unsigned short vid);
struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
const uint8_t *src,
const uint8_t *addr,
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 78370ab31f9..8854c05622a 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -24,8 +24,9 @@
#ifdef CONFIG_BATMAN_ADV_DAT
-/* batadv_dat_addr_t is the type used for all DHT addresses. If it is changed,
- * BATADV_DAT_ADDR_MAX is changed as well.
+/**
+ * batadv_dat_addr_t - it is the type used for all DHT addresses. If it is
+ * changed, BATADV_DAT_ADDR_MAX is changed as well.
*
* *Please be careful: batadv_dat_addr_t must be UNSIGNED*
*/
@@ -163,7 +164,7 @@ struct batadv_vlan_tt {
};
/**
- * batadv_orig_node_vlan - VLAN specific data per orig_node
+ * struct batadv_orig_node_vlan - VLAN specific data per orig_node
* @vid: the VLAN identifier
* @tt: VLAN specific TT attributes
* @list: list node for orig_node::vlan_list
@@ -204,14 +205,18 @@ struct batadv_orig_bat_iv {
* @batadv_dat_addr_t: address of the orig node in the distributed hash
* @last_seen: time when last packet from this node was received
* @bcast_seqno_reset: time when the broadcast seqno window was reset
+ * @mcast_flags: multicast flags announced by the orig node
+ * @mcast_want_all_unsnoop_node: a list node for the
+ * mcast.want_all_unsnoopables list
+ * @mcast_want_all_ipv4_node: a list node for the mcast.want_all_ipv4 list
+ * @mcast_want_all_ipv6_node: a list node for the mcast.want_all_ipv6 list
* @capabilities: announced capabilities of this originator
+ * @capa_initialized: bitfield to remember whether a capability was initialized
* @last_ttvn: last seen translation table version number
* @tt_buff: last tt changeset this node received from the orig node
* @tt_buff_len: length of the last tt changeset this node received from the
* orig node
* @tt_buff_lock: lock that protects tt_buff and tt_buff_len
- * @tt_initialised: bool keeping track of whether or not this node have received
- * any translation table information from the orig node yet
* @tt_lock: prevents from updating the table while reading it. Table update is
* made up by two operations (data structure update and metdata -CRC/TTVN-
* recalculation) and they have to be executed atomically in order to avoid
@@ -247,12 +252,18 @@ struct batadv_orig_node {
#endif
unsigned long last_seen;
unsigned long bcast_seqno_reset;
+#ifdef CONFIG_BATMAN_ADV_MCAST
+ uint8_t mcast_flags;
+ struct hlist_node mcast_want_all_unsnoopables_node;
+ struct hlist_node mcast_want_all_ipv4_node;
+ struct hlist_node mcast_want_all_ipv6_node;
+#endif
uint8_t capabilities;
+ uint8_t capa_initialized;
atomic_t last_ttvn;
unsigned char *tt_buff;
int16_t tt_buff_len;
spinlock_t tt_buff_lock; /* protects tt_buff & tt_buff_len */
- bool tt_initialised;
/* prevents from changing the table while reading it */
spinlock_t tt_lock;
DECLARE_BITMAP(bcast_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
@@ -282,10 +293,15 @@ struct batadv_orig_node {
* enum batadv_orig_capabilities - orig node capabilities
* @BATADV_ORIG_CAPA_HAS_DAT: orig node has distributed arp table enabled
* @BATADV_ORIG_CAPA_HAS_NC: orig node has network coding enabled
+ * @BATADV_ORIG_CAPA_HAS_TT: orig node has tt capability
+ * @BATADV_ORIG_CAPA_HAS_MCAST: orig node has some multicast capability
+ * (= orig node announces a tvlv of type BATADV_TVLV_MCAST)
*/
enum batadv_orig_capabilities {
BATADV_ORIG_CAPA_HAS_DAT = BIT(0),
BATADV_ORIG_CAPA_HAS_NC = BIT(1),
+ BATADV_ORIG_CAPA_HAS_TT = BIT(2),
+ BATADV_ORIG_CAPA_HAS_MCAST = BIT(3),
};
/**
@@ -334,7 +350,7 @@ struct batadv_neigh_node {
};
/**
- * struct batadv_neigh_node_bat_iv - neighbor information per outgoing
+ * struct batadv_neigh_ifinfo_bat_iv - neighbor information per outgoing
* interface for BATMAN IV
* @tq_recv: ring buffer of received TQ values from this neigh node
* @tq_index: ring buffer index
@@ -544,7 +560,7 @@ struct batadv_priv_bla {
#endif
/**
- * struct batadv_debug_log - debug logging data
+ * struct batadv_priv_debug_log - debug logging data
* @log_buff: buffer holding the logs (ring bufer)
* @log_start: index of next character to read
* @log_end: index of next character to write
@@ -607,6 +623,39 @@ struct batadv_priv_dat {
};
#endif
+#ifdef CONFIG_BATMAN_ADV_MCAST
+/**
+ * struct batadv_priv_mcast - per mesh interface mcast data
+ * @mla_list: list of multicast addresses we are currently announcing via TT
+ * @want_all_unsnoopables_list: a list of orig_nodes wanting all unsnoopable
+ * multicast traffic
+ * @want_all_ipv4_list: a list of orig_nodes wanting all IPv4 multicast traffic
+ * @want_all_ipv6_list: a list of orig_nodes wanting all IPv6 multicast traffic
+ * @flags: the flags we have last sent in our mcast tvlv
+ * @enabled: whether the multicast tvlv is currently enabled
+ * @num_disabled: number of nodes that have no mcast tvlv
+ * @num_want_all_unsnoopables: number of nodes wanting unsnoopable IP traffic
+ * @num_want_all_ipv4: counter for items in want_all_ipv4_list
+ * @num_want_all_ipv6: counter for items in want_all_ipv6_list
+ * @want_lists_lock: lock for protecting modifications to mcast want lists
+ * (traversals are rcu-locked)
+ */
+struct batadv_priv_mcast {
+ struct hlist_head mla_list;
+ struct hlist_head want_all_unsnoopables_list;
+ struct hlist_head want_all_ipv4_list;
+ struct hlist_head want_all_ipv6_list;
+ uint8_t flags;
+ bool enabled;
+ atomic_t num_disabled;
+ atomic_t num_want_all_unsnoopables;
+ atomic_t num_want_all_ipv4;
+ atomic_t num_want_all_ipv6;
+ /* protects want_all_{unsnoopables,ipv4,ipv6}_list */
+ spinlock_t want_lists_lock;
+};
+#endif
+
/**
* struct batadv_priv_nc - per mesh interface network coding private data
* @work: work queue callback item for cleanup
@@ -638,6 +687,7 @@ struct batadv_priv_nc {
/**
* struct batadv_softif_vlan - per VLAN attributes set
+ * @bat_priv: pointer to the mesh object
* @vid: VLAN identifier
* @kobj: kobject for sysfs vlan subdirectory
* @ap_isolation: AP isolation state
@@ -647,6 +697,7 @@ struct batadv_priv_nc {
* @rcu: struct used for freeing in a RCU-safe manner
*/
struct batadv_softif_vlan {
+ struct batadv_priv *bat_priv;
unsigned short vid;
struct kobject *kobj;
atomic_t ap_isolation; /* boolean */
@@ -672,6 +723,8 @@ struct batadv_softif_vlan {
* enabled
* @distributed_arp_table: bool indicating whether distributed ARP table is
* enabled
+ * @multicast_mode: Enable or disable multicast optimizations on this node's
+ * sender/originating side
* @gw_mode: gateway operation: off, client or server (see batadv_gw_modes)
* @gw_sel_class: gateway selection class (applies if gw_mode client)
* @orig_interval: OGM broadcast interval in milliseconds
@@ -702,6 +755,7 @@ struct batadv_softif_vlan {
* @tt: translation table data
* @tvlv: type-version-length-value data
* @dat: distributed arp table data
+ * @mcast: multicast data
* @network_coding: bool indicating whether network coding is enabled
* @batadv_priv_nc: network coding data
*/
@@ -721,6 +775,9 @@ struct batadv_priv {
#ifdef CONFIG_BATMAN_ADV_DAT
atomic_t distributed_arp_table;
#endif
+#ifdef CONFIG_BATMAN_ADV_MCAST
+ atomic_t multicast_mode;
+#endif
atomic_t gw_mode;
atomic_t gw_sel_class;
atomic_t orig_interval;
@@ -759,6 +816,9 @@ struct batadv_priv {
#ifdef CONFIG_BATMAN_ADV_DAT
struct batadv_priv_dat dat;
#endif
+#ifdef CONFIG_BATMAN_ADV_MCAST
+ struct batadv_priv_mcast mcast;
+#endif
#ifdef CONFIG_BATMAN_ADV_NC
atomic_t network_coding;
struct batadv_priv_nc nc;
@@ -881,12 +941,14 @@ struct batadv_tt_local_entry {
* struct batadv_tt_global_entry - translation table global entry data
* @common: general translation table data
* @orig_list: list of orig nodes announcing this non-mesh client
+ * @orig_list_count: number of items in the orig_list
* @list_lock: lock protecting orig_list
* @roam_at: time at which TT_GLOBAL_ROAM was set
*/
struct batadv_tt_global_entry {
struct batadv_tt_common_entry common;
struct hlist_head orig_list;
+ atomic_t orig_list_count;
spinlock_t list_lock; /* protects orig_list */
unsigned long roam_at;
};
@@ -1004,8 +1066,8 @@ struct batadv_nc_packet {
};
/**
- * batadv_skb_cb - control buffer structure used to store private data relevant
- * to batman-adv in the skb->cb buffer in skbs.
+ * struct batadv_skb_cb - control buffer structure used to store private data
+ * relevant to batman-adv in the skb->cb buffer in skbs.
* @decoded: Marks a skb as decoded, which is checked when searching for coding
* opportunities in network-coding.c
*/
@@ -1116,6 +1178,16 @@ struct batadv_dat_entry {
};
/**
+ * struct batadv_hw_addr - a list entry for a MAC address
+ * @list: list node for the linking of entries
+ * @addr: the MAC address of this list entry
+ */
+struct batadv_hw_addr {
+ struct hlist_node list;
+ unsigned char addr[ETH_ALEN];
+};
+
+/**
* struct batadv_dat_candidate - candidate destination for DAT operations
* @type: the type of the selected candidate. It can one of the following:
* - BATADV_DAT_CANDIDATE_NOT_FOUND
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index adb3ea04ada..8796ffa08b4 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -27,7 +27,7 @@
#include "6lowpan.h"
-#include "../ieee802154/6lowpan.h" /* for the compression support */
+#include <net/6lowpan.h> /* for the compression support */
#define IFACE_NAME_TEMPLATE "bt%d"
#define EUI64_ADDR_LEN 8
@@ -420,12 +420,18 @@ static int conn_send(struct l2cap_conn *conn,
return 0;
}
-static void get_dest_bdaddr(struct in6_addr *ip6_daddr,
- bdaddr_t *addr, u8 *addr_type)
+static u8 get_addr_type_from_eui64(u8 byte)
{
- u8 *eui64;
+ /* Is universal(0) or local(1) bit, */
+ if (byte & 0x02)
+ return ADDR_LE_DEV_RANDOM;
- eui64 = ip6_daddr->s6_addr + 8;
+ return ADDR_LE_DEV_PUBLIC;
+}
+
+static void copy_to_bdaddr(struct in6_addr *ip6_daddr, bdaddr_t *addr)
+{
+ u8 *eui64 = ip6_daddr->s6_addr + 8;
addr->b[0] = eui64[7];
addr->b[1] = eui64[6];
@@ -433,16 +439,19 @@ static void get_dest_bdaddr(struct in6_addr *ip6_daddr,
addr->b[3] = eui64[2];
addr->b[4] = eui64[1];
addr->b[5] = eui64[0];
+}
- addr->b[5] ^= 2;
+static void convert_dest_bdaddr(struct in6_addr *ip6_daddr,
+ bdaddr_t *addr, u8 *addr_type)
+{
+ copy_to_bdaddr(ip6_daddr, addr);
- /* Set universal/local bit to 0 */
- if (addr->b[5] & 1) {
- addr->b[5] &= ~1;
- *addr_type = ADDR_LE_DEV_PUBLIC;
- } else {
- *addr_type = ADDR_LE_DEV_RANDOM;
- }
+ /* We need to toggle the U/L bit that we got from IPv6 address
+ * so that we get the proper address and type of the BD address.
+ */
+ addr->b[5] ^= 0x02;
+
+ *addr_type = get_addr_type_from_eui64(addr->b[5]);
}
static int header_create(struct sk_buff *skb, struct net_device *netdev,
@@ -473,9 +482,11 @@ static int header_create(struct sk_buff *skb, struct net_device *netdev,
/* Get destination BT device from skb.
* If there is no such peer then discard the packet.
*/
- get_dest_bdaddr(&hdr->daddr, &addr, &addr_type);
+ convert_dest_bdaddr(&hdr->daddr, &addr, &addr_type);
- BT_DBG("dest addr %pMR type %d", &addr, addr_type);
+ BT_DBG("dest addr %pMR type %s IP %pI6c", &addr,
+ addr_type == ADDR_LE_DEV_PUBLIC ? "PUBLIC" : "RANDOM",
+ &hdr->daddr);
read_lock_irqsave(&devices_lock, flags);
peer = peer_lookup_ba(dev, &addr, addr_type);
@@ -556,7 +567,7 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev)
} else {
unsigned long flags;
- get_dest_bdaddr(&lowpan_cb(skb)->addr, &addr, &addr_type);
+ convert_dest_bdaddr(&lowpan_cb(skb)->addr, &addr, &addr_type);
eui64_addr = lowpan_cb(skb)->addr.s6_addr + 8;
dev = lowpan_dev(netdev);
@@ -564,8 +575,10 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev)
peer = peer_lookup_ba(dev, &addr, addr_type);
read_unlock_irqrestore(&devices_lock, flags);
- BT_DBG("xmit from %s to %pMR (%pI6c) peer %p", netdev->name,
- &addr, &lowpan_cb(skb)->addr, peer);
+ BT_DBG("xmit %s to %pMR type %s IP %pI6c peer %p",
+ netdev->name, &addr,
+ addr_type == ADDR_LE_DEV_PUBLIC ? "PUBLIC" : "RANDOM",
+ &lowpan_cb(skb)->addr, peer);
if (peer && peer->conn)
err = send_pkt(peer->conn, netdev->dev_addr,
@@ -620,13 +633,13 @@ static void set_addr(u8 *eui, u8 *addr, u8 addr_type)
eui[6] = addr[1];
eui[7] = addr[0];
- eui[0] ^= 2;
-
- /* Universal/local bit set, RFC 4291 */
+ /* Universal/local bit set, BT 6lowpan draft ch. 3.2.1 */
if (addr_type == ADDR_LE_DEV_PUBLIC)
- eui[0] |= 1;
+ eui[0] &= ~0x02;
else
- eui[0] &= ~1;
+ eui[0] |= 0x02;
+
+ BT_DBG("type %d addr %*phC", addr_type, 8, eui);
}
static void set_dev_addr(struct net_device *netdev, bdaddr_t *addr,
@@ -634,7 +647,6 @@ static void set_dev_addr(struct net_device *netdev, bdaddr_t *addr,
{
netdev->addr_assign_type = NET_ADDR_PERM;
set_addr(netdev->dev_addr, addr->b, addr_type);
- netdev->dev_addr[0] ^= 2;
}
static void ifup(struct net_device *netdev)
@@ -684,13 +696,6 @@ static int add_peer_conn(struct l2cap_conn *conn, struct lowpan_dev *dev)
memcpy(&peer->eui64_addr, (u8 *)&peer->peer_addr.s6_addr + 8,
EUI64_ADDR_LEN);
- peer->eui64_addr[0] ^= 2; /* second bit-flip (Universe/Local)
- * is done according RFC2464
- */
-
- raw_dump_inline(__func__, "peer IPv6 address",
- (unsigned char *)&peer->peer_addr, 16);
- raw_dump_inline(__func__, "peer EUI64 address", peer->eui64_addr, 8);
write_lock_irqsave(&devices_lock, flags);
INIT_LIST_HEAD(&peer->list);
diff --git a/net/bluetooth/6lowpan.h b/net/bluetooth/6lowpan.h
index 680eac808d7..5d281f1eaf5 100644
--- a/net/bluetooth/6lowpan.h
+++ b/net/bluetooth/6lowpan.h
@@ -14,13 +14,34 @@
#ifndef __6LOWPAN_H
#define __6LOWPAN_H
+#include <linux/errno.h>
#include <linux/skbuff.h>
#include <net/bluetooth/l2cap.h>
+#if IS_ENABLED(CONFIG_BT_6LOWPAN)
int bt_6lowpan_recv(struct l2cap_conn *conn, struct sk_buff *skb);
int bt_6lowpan_add_conn(struct l2cap_conn *conn);
int bt_6lowpan_del_conn(struct l2cap_conn *conn);
int bt_6lowpan_init(void);
void bt_6lowpan_cleanup(void);
+#else
+static int bt_6lowpan_recv(struct l2cap_conn *conn, struct sk_buff *skb)
+{
+ return -EOPNOTSUPP;
+}
+static int bt_6lowpan_add_conn(struct l2cap_conn *conn)
+{
+ return -EOPNOTSUPP;
+}
+int bt_6lowpan_del_conn(struct l2cap_conn *conn)
+{
+ return -EOPNOTSUPP;
+}
+static int bt_6lowpan_init(void)
+{
+ return -EOPNOTSUPP;
+}
+static void bt_6lowpan_cleanup(void) { }
+#endif
#endif /* __6LOWPAN_H */
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 985b56070d2..06ec14499ca 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -6,13 +6,13 @@ menuconfig BT
tristate "Bluetooth subsystem support"
depends on NET && !S390
depends on RFKILL || !RFKILL
+ select 6LOWPAN_IPHC if BT_6LOWPAN
select CRC16
select CRYPTO
select CRYPTO_BLKCIPHER
select CRYPTO_AES
select CRYPTO_ECB
select CRYPTO_SHA256
- select 6LOWPAN_IPHC
help
Bluetooth is low-cost, low-power, short-range wireless technology.
It was designed as a replacement for cables and other short-range
@@ -40,6 +40,12 @@ menuconfig BT
to Bluetooth kernel modules are provided in the BlueZ packages. For
more information, see <http://www.bluez.org/>.
+config BT_6LOWPAN
+ bool "Bluetooth 6LoWPAN support"
+ depends on BT && IPV6
+ help
+ IPv6 compression over Bluetooth.
+
source "net/bluetooth/rfcomm/Kconfig"
source "net/bluetooth/bnep/Kconfig"
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 80cb215826e..ca51246b101 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_BT_HIDP) += hidp/
bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \
hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o \
- a2mp.o amp.o 6lowpan.o
+ a2mp.o amp.o
+bluetooth-$(CONFIG_BT_6LOWPAN) += 6lowpan.o
subdir-ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index efcd108822c..9514cc9e850 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -162,7 +162,7 @@ static int a2mp_discover_req(struct amp_mgr *mgr, struct sk_buff *skb,
return -ENOMEM;
}
- rsp->mtu = __constant_cpu_to_le16(L2CAP_A2MP_DEFAULT_MTU);
+ rsp->mtu = cpu_to_le16(L2CAP_A2MP_DEFAULT_MTU);
rsp->ext_feat = 0;
__a2mp_add_cl(mgr, rsp->cl);
@@ -235,7 +235,7 @@ static int a2mp_discover_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
BT_DBG("chan %p state %s", chan,
state_to_string(chan->state));
- if (chan->chan_type == L2CAP_CHAN_CONN_FIX_A2MP)
+ if (chan->scid == L2CAP_CID_A2MP)
continue;
l2cap_chan_lock(chan);
@@ -649,7 +649,7 @@ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
if (err) {
struct a2mp_cmd_rej rej;
- rej.reason = __constant_cpu_to_le16(0);
+ rej.reason = cpu_to_le16(0);
hdr = (void *) skb->data;
BT_DBG("Send A2MP Rej: cmd 0x%2.2x err %d", hdr->code, err);
@@ -695,7 +695,13 @@ static void a2mp_chan_state_change_cb(struct l2cap_chan *chan, int state,
static struct sk_buff *a2mp_chan_alloc_skb_cb(struct l2cap_chan *chan,
unsigned long len, int nb)
{
- return bt_skb_alloc(len, GFP_KERNEL);
+ struct sk_buff *skb;
+
+ skb = bt_skb_alloc(len, GFP_KERNEL);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ return skb;
}
static struct l2cap_ops a2mp_chan_ops = {
@@ -726,7 +732,11 @@ static struct l2cap_chan *a2mp_chan_open(struct l2cap_conn *conn, bool locked)
BT_DBG("chan %p", chan);
- chan->chan_type = L2CAP_CHAN_CONN_FIX_A2MP;
+ chan->chan_type = L2CAP_CHAN_FIXED;
+ chan->scid = L2CAP_CID_A2MP;
+ chan->dcid = L2CAP_CID_A2MP;
+ chan->omtu = L2CAP_A2MP_DEFAULT_MTU;
+ chan->imtu = L2CAP_A2MP_DEFAULT_MTU;
chan->flush_to = L2CAP_DEFAULT_FLUSH_TO;
chan->ops = &a2mp_chan_ops;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 0c5866bb49b..2021c481cdb 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -31,7 +31,7 @@
#include <net/bluetooth/bluetooth.h>
#include <linux/proc_fs.h>
-#define VERSION "2.18"
+#define VERSION "2.19"
/* Bluetooth sockets */
#define BT_MAX_PROTO 8
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index ba5366c320d..a7a27bc2c0b 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -28,6 +28,7 @@
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/l2cap.h>
#include "smp.h"
#include "a2mp.h"
@@ -82,7 +83,7 @@ static void hci_acl_create_connection(struct hci_conn *conn)
cp.pscan_rep_mode = ie->data.pscan_rep_mode;
cp.pscan_mode = ie->data.pscan_mode;
cp.clock_offset = ie->data.clock_offset |
- __constant_cpu_to_le16(0x8000);
+ cpu_to_le16(0x8000);
}
memcpy(conn->dev_class, ie->data.dev_class, 3);
@@ -182,8 +183,8 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle)
cp.handle = cpu_to_le16(handle);
- cp.tx_bandwidth = __constant_cpu_to_le32(0x00001f40);
- cp.rx_bandwidth = __constant_cpu_to_le32(0x00001f40);
+ cp.tx_bandwidth = cpu_to_le32(0x00001f40);
+ cp.rx_bandwidth = cpu_to_le32(0x00001f40);
cp.voice_setting = cpu_to_le16(conn->setting);
switch (conn->setting & SCO_AIRMODE_MASK) {
@@ -225,13 +226,13 @@ void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max,
cp.conn_interval_max = cpu_to_le16(max);
cp.conn_latency = cpu_to_le16(latency);
cp.supervision_timeout = cpu_to_le16(to_multiplier);
- cp.min_ce_len = __constant_cpu_to_le16(0x0001);
- cp.max_ce_len = __constant_cpu_to_le16(0x0001);
+ cp.min_ce_len = cpu_to_le16(0x0000);
+ cp.max_ce_len = cpu_to_le16(0x0000);
hci_send_cmd(hdev, HCI_OP_LE_CONN_UPDATE, sizeof(cp), &cp);
}
-void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __u8 rand[8],
+void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand,
__u8 ltk[16])
{
struct hci_dev *hdev = conn->hdev;
@@ -242,9 +243,9 @@ void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __u8 rand[8],
memset(&cp, 0, sizeof(cp));
cp.handle = cpu_to_le16(conn->handle);
- memcpy(cp.ltk, ltk, sizeof(cp.ltk));
+ cp.rand = rand;
cp.ediv = ediv;
- memcpy(cp.rand, rand, sizeof(cp.rand));
+ memcpy(cp.ltk, ltk, sizeof(cp.ltk));
hci_send_cmd(hdev, HCI_OP_LE_START_ENC, sizeof(cp), &cp);
}
@@ -288,10 +289,20 @@ static void hci_conn_timeout(struct work_struct *work)
{
struct hci_conn *conn = container_of(work, struct hci_conn,
disc_work.work);
+ int refcnt = atomic_read(&conn->refcnt);
BT_DBG("hcon %p state %s", conn, state_to_string(conn->state));
- if (atomic_read(&conn->refcnt))
+ WARN_ON(refcnt < 0);
+
+ /* FIXME: It was observed that in pairing failed scenario, refcnt
+ * drops below 0. Probably this is because l2cap_conn_del calls
+ * l2cap_chan_del for each channel, and inside l2cap_chan_del conn is
+ * dropped. After that loop hci_chan_del is called which also drops
+ * conn. For now make sure that ACL is alive if refcnt is higher then 0,
+ * otherwise drop it.
+ */
+ if (refcnt > 0)
return;
switch (conn->state) {
@@ -337,9 +348,9 @@ static void hci_conn_idle(struct work_struct *work)
if (lmp_sniffsubr_capable(hdev) && lmp_sniffsubr_capable(conn)) {
struct hci_cp_sniff_subrate cp;
cp.handle = cpu_to_le16(conn->handle);
- cp.max_latency = __constant_cpu_to_le16(0);
- cp.min_remote_timeout = __constant_cpu_to_le16(0);
- cp.min_local_timeout = __constant_cpu_to_le16(0);
+ cp.max_latency = cpu_to_le16(0);
+ cp.min_remote_timeout = cpu_to_le16(0);
+ cp.min_local_timeout = cpu_to_le16(0);
hci_send_cmd(hdev, HCI_OP_SNIFF_SUBRATE, sizeof(cp), &cp);
}
@@ -348,8 +359,8 @@ static void hci_conn_idle(struct work_struct *work)
cp.handle = cpu_to_le16(conn->handle);
cp.max_interval = cpu_to_le16(hdev->sniff_max_interval);
cp.min_interval = cpu_to_le16(hdev->sniff_min_interval);
- cp.attempt = __constant_cpu_to_le16(4);
- cp.timeout = __constant_cpu_to_le16(1);
+ cp.attempt = cpu_to_le16(4);
+ cp.timeout = cpu_to_le16(1);
hci_send_cmd(hdev, HCI_OP_SNIFF_MODE, sizeof(cp), &cp);
}
}
@@ -363,6 +374,30 @@ static void hci_conn_auto_accept(struct work_struct *work)
&conn->dst);
}
+static void le_conn_timeout(struct work_struct *work)
+{
+ struct hci_conn *conn = container_of(work, struct hci_conn,
+ le_conn_timeout.work);
+ struct hci_dev *hdev = conn->hdev;
+
+ BT_DBG("");
+
+ /* We could end up here due to having done directed advertising,
+ * so clean up the state if necessary. This should however only
+ * happen with broken hardware or if low duty cycle was used
+ * (which doesn't have a timeout of its own).
+ */
+ if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) {
+ u8 enable = 0x00;
+ hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable),
+ &enable);
+ hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT);
+ return;
+ }
+
+ hci_le_create_connection_cancel(conn);
+}
+
struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
{
struct hci_conn *conn;
@@ -383,6 +418,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
conn->io_capability = hdev->io_capability;
conn->remote_auth = 0xff;
conn->key_type = 0xff;
+ conn->tx_power = HCI_TX_POWER_INVALID;
+ conn->max_tx_power = HCI_TX_POWER_INVALID;
set_bit(HCI_CONN_POWER_SAVE, &conn->flags);
conn->disc_timeout = HCI_DISCONN_TIMEOUT;
@@ -391,6 +428,10 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
case ACL_LINK:
conn->pkt_type = hdev->pkt_type & ACL_PTYPE_MASK;
break;
+ case LE_LINK:
+ /* conn->src should reflect the local identity address */
+ hci_copy_identity_address(hdev, &conn->src, &conn->src_type);
+ break;
case SCO_LINK:
if (lmp_esco_capable(hdev))
conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) |
@@ -410,6 +451,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
INIT_DELAYED_WORK(&conn->disc_work, hci_conn_timeout);
INIT_DELAYED_WORK(&conn->auto_accept_work, hci_conn_auto_accept);
INIT_DELAYED_WORK(&conn->idle_work, hci_conn_idle);
+ INIT_DELAYED_WORK(&conn->le_conn_timeout, le_conn_timeout);
atomic_set(&conn->refcnt, 0);
@@ -442,6 +484,8 @@ int hci_conn_del(struct hci_conn *conn)
/* Unacked frames */
hdev->acl_cnt += conn->sent;
} else if (conn->type == LE_LINK) {
+ cancel_delayed_work_sync(&conn->le_conn_timeout);
+
if (hdev->le_pkts)
hdev->le_cnt += conn->sent;
else
@@ -514,6 +558,31 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src)
}
EXPORT_SYMBOL(hci_get_route);
+/* This function requires the caller holds hdev->lock */
+void hci_le_conn_failed(struct hci_conn *conn, u8 status)
+{
+ struct hci_dev *hdev = conn->hdev;
+
+ conn->state = BT_CLOSED;
+
+ mgmt_connect_failed(hdev, &conn->dst, conn->type, conn->dst_type,
+ status);
+
+ hci_proto_connect_cfm(conn, status);
+
+ hci_conn_del(conn);
+
+ /* Since we may have temporarily stopped the background scanning in
+ * favor of connection establishment, we should restart it.
+ */
+ hci_update_background_scan(hdev);
+
+ /* Re-enable advertising in case this was a failed connection
+ * attempt as a peripheral.
+ */
+ mgmt_reenable_advertising(hdev);
+}
+
static void create_le_conn_complete(struct hci_dev *hdev, u8 status)
{
struct hci_conn *conn;
@@ -530,60 +599,91 @@ static void create_le_conn_complete(struct hci_dev *hdev, u8 status)
if (!conn)
goto done;
- conn->state = BT_CLOSED;
-
- mgmt_connect_failed(hdev, &conn->dst, conn->type, conn->dst_type,
- status);
-
- hci_proto_connect_cfm(conn, status);
-
- hci_conn_del(conn);
+ hci_le_conn_failed(conn, status);
done:
hci_dev_unlock(hdev);
}
-static int hci_create_le_conn(struct hci_conn *conn)
+static void hci_req_add_le_create_conn(struct hci_request *req,
+ struct hci_conn *conn)
{
- struct hci_dev *hdev = conn->hdev;
struct hci_cp_le_create_conn cp;
- struct hci_request req;
- int err;
-
- hci_req_init(&req, hdev);
+ struct hci_dev *hdev = conn->hdev;
+ u8 own_addr_type;
memset(&cp, 0, sizeof(cp));
+
+ /* Update random address, but set require_privacy to false so
+ * that we never connect with an unresolvable address.
+ */
+ if (hci_update_random_address(req, false, &own_addr_type))
+ return;
+
cp.scan_interval = cpu_to_le16(hdev->le_scan_interval);
cp.scan_window = cpu_to_le16(hdev->le_scan_window);
bacpy(&cp.peer_addr, &conn->dst);
cp.peer_addr_type = conn->dst_type;
- cp.own_address_type = conn->src_type;
- cp.conn_interval_min = cpu_to_le16(hdev->le_conn_min_interval);
- cp.conn_interval_max = cpu_to_le16(hdev->le_conn_max_interval);
- cp.supervision_timeout = __constant_cpu_to_le16(0x002a);
- cp.min_ce_len = __constant_cpu_to_le16(0x0000);
- cp.max_ce_len = __constant_cpu_to_le16(0x0000);
+ cp.own_address_type = own_addr_type;
+ cp.conn_interval_min = cpu_to_le16(conn->le_conn_min_interval);
+ cp.conn_interval_max = cpu_to_le16(conn->le_conn_max_interval);
+ cp.supervision_timeout = cpu_to_le16(0x002a);
+ cp.min_ce_len = cpu_to_le16(0x0000);
+ cp.max_ce_len = cpu_to_le16(0x0000);
- hci_req_add(&req, HCI_OP_LE_CREATE_CONN, sizeof(cp), &cp);
+ hci_req_add(req, HCI_OP_LE_CREATE_CONN, sizeof(cp), &cp);
- err = hci_req_run(&req, create_le_conn_complete);
- if (err) {
- hci_conn_del(conn);
- return err;
- }
+ conn->state = BT_CONNECT;
+}
- return 0;
+static void hci_req_directed_advertising(struct hci_request *req,
+ struct hci_conn *conn)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_cp_le_set_adv_param cp;
+ u8 own_addr_type;
+ u8 enable;
+
+ enable = 0x00;
+ hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
+
+ /* Clear the HCI_ADVERTISING bit temporarily so that the
+ * hci_update_random_address knows that it's safe to go ahead
+ * and write a new random address. The flag will be set back on
+ * as soon as the SET_ADV_ENABLE HCI command completes.
+ */
+ clear_bit(HCI_ADVERTISING, &hdev->dev_flags);
+
+ /* Set require_privacy to false so that the remote device has a
+ * chance of identifying us.
+ */
+ if (hci_update_random_address(req, false, &own_addr_type) < 0)
+ return;
+
+ memset(&cp, 0, sizeof(cp));
+ cp.type = LE_ADV_DIRECT_IND;
+ cp.own_address_type = own_addr_type;
+ cp.direct_addr_type = conn->dst_type;
+ bacpy(&cp.direct_addr, &conn->dst);
+ cp.channel_map = hdev->le_adv_channel_map;
+
+ hci_req_add(req, HCI_OP_LE_SET_ADV_PARAM, sizeof(cp), &cp);
+
+ enable = 0x01;
+ hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
+
+ conn->state = BT_CONNECT;
}
-static struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
- u8 dst_type, u8 sec_level, u8 auth_type)
+struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
+ u8 dst_type, u8 sec_level, u8 auth_type)
{
+ struct hci_conn_params *params;
struct hci_conn *conn;
+ struct smp_irk *irk;
+ struct hci_request req;
int err;
- if (test_bit(HCI_ADVERTISING, &hdev->flags))
- return ERR_PTR(-ENOTSUPP);
-
/* Some devices send ATT messages as soon as the physical link is
* established. To be able to handle these ATT messages, the user-
* space first establishes the connection and then starts the pairing
@@ -607,35 +707,80 @@ static struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
if (conn)
return ERR_PTR(-EBUSY);
+ /* When given an identity address with existing identity
+ * resolving key, the connection needs to be established
+ * to a resolvable random address.
+ *
+ * This uses the cached random resolvable address from
+ * a previous scan. When no cached address is available,
+ * try connecting to the identity address instead.
+ *
+ * Storing the resolvable random address is required here
+ * to handle connection failures. The address will later
+ * be resolved back into the original identity address
+ * from the connect request.
+ */
+ irk = hci_find_irk_by_addr(hdev, dst, dst_type);
+ if (irk && bacmp(&irk->rpa, BDADDR_ANY)) {
+ dst = &irk->rpa;
+ dst_type = ADDR_LE_DEV_RANDOM;
+ }
+
conn = hci_conn_add(hdev, LE_LINK, dst);
if (!conn)
return ERR_PTR(-ENOMEM);
- if (dst_type == BDADDR_LE_PUBLIC)
- conn->dst_type = ADDR_LE_DEV_PUBLIC;
- else
- conn->dst_type = ADDR_LE_DEV_RANDOM;
+ conn->dst_type = dst_type;
+ conn->sec_level = BT_SECURITY_LOW;
+ conn->pending_sec_level = sec_level;
+ conn->auth_type = auth_type;
- conn->src_type = hdev->own_addr_type;
+ hci_req_init(&req, hdev);
+
+ if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) {
+ hci_req_directed_advertising(&req, conn);
+ goto create_conn;
+ }
- conn->state = BT_CONNECT;
conn->out = true;
conn->link_mode |= HCI_LM_MASTER;
- conn->sec_level = BT_SECURITY_LOW;
- conn->pending_sec_level = sec_level;
- conn->auth_type = auth_type;
- err = hci_create_le_conn(conn);
- if (err)
+ params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type);
+ if (params) {
+ conn->le_conn_min_interval = params->conn_min_interval;
+ conn->le_conn_max_interval = params->conn_max_interval;
+ } else {
+ conn->le_conn_min_interval = hdev->le_conn_min_interval;
+ conn->le_conn_max_interval = hdev->le_conn_max_interval;
+ }
+
+ /* If controller is scanning, we stop it since some controllers are
+ * not able to scan and connect at the same time. Also set the
+ * HCI_LE_SCAN_INTERRUPTED flag so that the command complete
+ * handler for scan disabling knows to set the correct discovery
+ * state.
+ */
+ if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) {
+ hci_req_add_le_scan_disable(&req);
+ set_bit(HCI_LE_SCAN_INTERRUPTED, &hdev->dev_flags);
+ }
+
+ hci_req_add_le_create_conn(&req, conn);
+
+create_conn:
+ err = hci_req_run(&req, create_le_conn_complete);
+ if (err) {
+ hci_conn_del(conn);
return ERR_PTR(err);
+ }
done:
hci_conn_hold(conn);
return conn;
}
-static struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
- u8 sec_level, u8 auth_type)
+struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
+ u8 sec_level, u8 auth_type)
{
struct hci_conn *acl;
@@ -704,27 +849,22 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
return sco;
}
-/* Create SCO, ACL or LE connection. */
-struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst,
- __u8 dst_type, __u8 sec_level, __u8 auth_type)
-{
- BT_DBG("%s dst %pMR type 0x%x", hdev->name, dst, type);
-
- switch (type) {
- case LE_LINK:
- return hci_connect_le(hdev, dst, dst_type, sec_level, auth_type);
- case ACL_LINK:
- return hci_connect_acl(hdev, dst, sec_level, auth_type);
- }
-
- return ERR_PTR(-EINVAL);
-}
-
/* Check link security requirement */
int hci_conn_check_link_mode(struct hci_conn *conn)
{
BT_DBG("hcon %p", conn);
+ /* In Secure Connections Only mode, it is required that Secure
+ * Connections is used and the link is encrypted with AES-CCM
+ * using a P-256 authenticated combination key.
+ */
+ if (test_bit(HCI_SC_ONLY, &conn->hdev->flags)) {
+ if (!hci_conn_sc_enabled(conn) ||
+ !test_bit(HCI_CONN_AES_CCM, &conn->flags) ||
+ conn->key_type != HCI_LK_AUTH_COMBINATION_P256)
+ return 0;
+ }
+
if (hci_conn_ssp_enabled(conn) && !(conn->link_mode & HCI_LM_ENCRYPT))
return 0;
@@ -752,14 +892,17 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->flags)) {
struct hci_cp_auth_requested cp;
- /* encrypt must be pending if auth is also pending */
- set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
-
cp.handle = cpu_to_le16(conn->handle);
hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED,
sizeof(cp), &cp);
- if (conn->key_type != 0xff)
+
+ /* If we're already encrypted set the REAUTH_PEND flag,
+ * otherwise set the ENCRYPT_PEND.
+ */
+ if (conn->link_mode & HCI_LM_ENCRYPT)
set_bit(HCI_CONN_REAUTH_PEND, &conn->flags);
+ else
+ set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
}
return 0;
@@ -800,14 +943,23 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
if (!(conn->link_mode & HCI_LM_AUTH))
goto auth;
- /* An authenticated combination key has sufficient security for any
- security level. */
- if (conn->key_type == HCI_LK_AUTH_COMBINATION)
+ /* An authenticated FIPS approved combination key has sufficient
+ * security for security level 4. */
+ if (conn->key_type == HCI_LK_AUTH_COMBINATION_P256 &&
+ sec_level == BT_SECURITY_FIPS)
+ goto encrypt;
+
+ /* An authenticated combination key has sufficient security for
+ security level 3. */
+ if ((conn->key_type == HCI_LK_AUTH_COMBINATION_P192 ||
+ conn->key_type == HCI_LK_AUTH_COMBINATION_P256) &&
+ sec_level == BT_SECURITY_HIGH)
goto encrypt;
/* An unauthenticated combination key has sufficient security for
security level 1 and 2. */
- if (conn->key_type == HCI_LK_UNAUTH_COMBINATION &&
+ if ((conn->key_type == HCI_LK_UNAUTH_COMBINATION_P192 ||
+ conn->key_type == HCI_LK_UNAUTH_COMBINATION_P256) &&
(sec_level == BT_SECURITY_MEDIUM || sec_level == BT_SECURITY_LOW))
goto encrypt;
@@ -816,7 +968,8 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
is generated using maximum PIN code length (16).
For pre 2.1 units. */
if (conn->key_type == HCI_LK_COMBINATION &&
- (sec_level != BT_SECURITY_HIGH || conn->pin_length == 16))
+ (sec_level == BT_SECURITY_MEDIUM || sec_level == BT_SECURITY_LOW ||
+ conn->pin_length == 16))
goto encrypt;
auth:
@@ -840,13 +993,17 @@ int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level)
{
BT_DBG("hcon %p", conn);
- if (sec_level != BT_SECURITY_HIGH)
- return 1; /* Accept if non-secure is required */
+ /* Accept if non-secure or higher security level is required */
+ if (sec_level != BT_SECURITY_HIGH && sec_level != BT_SECURITY_FIPS)
+ return 1;
- if (conn->sec_level == BT_SECURITY_HIGH)
+ /* Accept if secure or higher security level is already present */
+ if (conn->sec_level == BT_SECURITY_HIGH ||
+ conn->sec_level == BT_SECURITY_FIPS)
return 1;
- return 0; /* Reject not secure link */
+ /* Reject not secure link */
+ return 0;
}
EXPORT_SYMBOL(hci_conn_check_secure);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 5e8663c194c..0a43cce9a91 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -29,10 +29,14 @@
#include <linux/idr.h>
#include <linux/rfkill.h>
#include <linux/debugfs.h>
+#include <linux/crypto.h>
#include <asm/unaligned.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/l2cap.h>
+
+#include "smp.h"
static void hci_rx_work(struct work_struct *work);
static void hci_cmd_work(struct work_struct *work);
@@ -285,24 +289,6 @@ static const struct file_operations link_keys_fops = {
.release = single_release,
};
-static ssize_t use_debug_keys_read(struct file *file, char __user *user_buf,
- size_t count, loff_t *ppos)
-{
- struct hci_dev *hdev = file->private_data;
- char buf[3];
-
- buf[0] = test_bit(HCI_DEBUG_KEYS, &hdev->dev_flags) ? 'Y': 'N';
- buf[1] = '\n';
- buf[2] = '\0';
- return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
-}
-
-static const struct file_operations use_debug_keys_fops = {
- .open = simple_open,
- .read = use_debug_keys_read,
- .llseek = default_llseek,
-};
-
static int dev_class_show(struct seq_file *f, void *ptr)
{
struct hci_dev *hdev = f->private;
@@ -415,6 +401,70 @@ static int ssp_debug_mode_get(void *data, u64 *val)
DEFINE_SIMPLE_ATTRIBUTE(ssp_debug_mode_fops, ssp_debug_mode_get,
ssp_debug_mode_set, "%llu\n");
+static ssize_t force_sc_support_read(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct hci_dev *hdev = file->private_data;
+ char buf[3];
+
+ buf[0] = test_bit(HCI_FORCE_SC, &hdev->dev_flags) ? 'Y': 'N';
+ buf[1] = '\n';
+ buf[2] = '\0';
+ return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t force_sc_support_write(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct hci_dev *hdev = file->private_data;
+ char buf[32];
+ size_t buf_size = min(count, (sizeof(buf)-1));
+ bool enable;
+
+ if (test_bit(HCI_UP, &hdev->flags))
+ return -EBUSY;
+
+ if (copy_from_user(buf, user_buf, buf_size))
+ return -EFAULT;
+
+ buf[buf_size] = '\0';
+ if (strtobool(buf, &enable))
+ return -EINVAL;
+
+ if (enable == test_bit(HCI_FORCE_SC, &hdev->dev_flags))
+ return -EALREADY;
+
+ change_bit(HCI_FORCE_SC, &hdev->dev_flags);
+
+ return count;
+}
+
+static const struct file_operations force_sc_support_fops = {
+ .open = simple_open,
+ .read = force_sc_support_read,
+ .write = force_sc_support_write,
+ .llseek = default_llseek,
+};
+
+static ssize_t sc_only_mode_read(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct hci_dev *hdev = file->private_data;
+ char buf[3];
+
+ buf[0] = test_bit(HCI_SC_ONLY, &hdev->dev_flags) ? 'Y': 'N';
+ buf[1] = '\n';
+ buf[2] = '\0';
+ return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static const struct file_operations sc_only_mode_fops = {
+ .open = simple_open,
+ .read = sc_only_mode_read,
+ .llseek = default_llseek,
+};
+
static int idle_timeout_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
@@ -443,6 +493,37 @@ static int idle_timeout_get(void *data, u64 *val)
DEFINE_SIMPLE_ATTRIBUTE(idle_timeout_fops, idle_timeout_get,
idle_timeout_set, "%llu\n");
+static int rpa_timeout_set(void *data, u64 val)
+{
+ struct hci_dev *hdev = data;
+
+ /* Require the RPA timeout to be at least 30 seconds and at most
+ * 24 hours.
+ */
+ if (val < 30 || val > (60 * 60 * 24))
+ return -EINVAL;
+
+ hci_dev_lock(hdev);
+ hdev->rpa_timeout = val;
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+static int rpa_timeout_get(void *data, u64 *val)
+{
+ struct hci_dev *hdev = data;
+
+ hci_dev_lock(hdev);
+ *val = hdev->rpa_timeout;
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(rpa_timeout_fops, rpa_timeout_get,
+ rpa_timeout_set, "%llu\n");
+
static int sniff_min_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
@@ -499,6 +580,115 @@ static int sniff_max_interval_get(void *data, u64 *val)
DEFINE_SIMPLE_ATTRIBUTE(sniff_max_interval_fops, sniff_max_interval_get,
sniff_max_interval_set, "%llu\n");
+static int conn_info_min_age_set(void *data, u64 val)
+{
+ struct hci_dev *hdev = data;
+
+ if (val == 0 || val > hdev->conn_info_max_age)
+ return -EINVAL;
+
+ hci_dev_lock(hdev);
+ hdev->conn_info_min_age = val;
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+static int conn_info_min_age_get(void *data, u64 *val)
+{
+ struct hci_dev *hdev = data;
+
+ hci_dev_lock(hdev);
+ *val = hdev->conn_info_min_age;
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(conn_info_min_age_fops, conn_info_min_age_get,
+ conn_info_min_age_set, "%llu\n");
+
+static int conn_info_max_age_set(void *data, u64 val)
+{
+ struct hci_dev *hdev = data;
+
+ if (val == 0 || val < hdev->conn_info_min_age)
+ return -EINVAL;
+
+ hci_dev_lock(hdev);
+ hdev->conn_info_max_age = val;
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+static int conn_info_max_age_get(void *data, u64 *val)
+{
+ struct hci_dev *hdev = data;
+
+ hci_dev_lock(hdev);
+ *val = hdev->conn_info_max_age;
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(conn_info_max_age_fops, conn_info_max_age_get,
+ conn_info_max_age_set, "%llu\n");
+
+static int identity_show(struct seq_file *f, void *p)
+{
+ struct hci_dev *hdev = f->private;
+ bdaddr_t addr;
+ u8 addr_type;
+
+ hci_dev_lock(hdev);
+
+ hci_copy_identity_address(hdev, &addr, &addr_type);
+
+ seq_printf(f, "%pMR (type %u) %*phN %pMR\n", &addr, addr_type,
+ 16, hdev->irk, &hdev->rpa);
+
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+static int identity_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, identity_show, inode->i_private);
+}
+
+static const struct file_operations identity_fops = {
+ .open = identity_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int random_address_show(struct seq_file *f, void *p)
+{
+ struct hci_dev *hdev = f->private;
+
+ hci_dev_lock(hdev);
+ seq_printf(f, "%pMR\n", &hdev->random_addr);
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+static int random_address_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, random_address_show, inode->i_private);
+}
+
+static const struct file_operations random_address_fops = {
+ .open = random_address_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static int static_address_show(struct seq_file *f, void *p)
{
struct hci_dev *hdev = f->private;
@@ -522,33 +712,107 @@ static const struct file_operations static_address_fops = {
.release = single_release,
};
-static int own_address_type_set(void *data, u64 val)
+static ssize_t force_static_address_read(struct file *file,
+ char __user *user_buf,
+ size_t count, loff_t *ppos)
{
- struct hci_dev *hdev = data;
+ struct hci_dev *hdev = file->private_data;
+ char buf[3];
- if (val != 0 && val != 1)
+ buf[0] = test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dev_flags) ? 'Y': 'N';
+ buf[1] = '\n';
+ buf[2] = '\0';
+ return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t force_static_address_write(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct hci_dev *hdev = file->private_data;
+ char buf[32];
+ size_t buf_size = min(count, (sizeof(buf)-1));
+ bool enable;
+
+ if (test_bit(HCI_UP, &hdev->flags))
+ return -EBUSY;
+
+ if (copy_from_user(buf, user_buf, buf_size))
+ return -EFAULT;
+
+ buf[buf_size] = '\0';
+ if (strtobool(buf, &enable))
return -EINVAL;
+ if (enable == test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dev_flags))
+ return -EALREADY;
+
+ change_bit(HCI_FORCE_STATIC_ADDR, &hdev->dev_flags);
+
+ return count;
+}
+
+static const struct file_operations force_static_address_fops = {
+ .open = simple_open,
+ .read = force_static_address_read,
+ .write = force_static_address_write,
+ .llseek = default_llseek,
+};
+
+static int white_list_show(struct seq_file *f, void *ptr)
+{
+ struct hci_dev *hdev = f->private;
+ struct bdaddr_list *b;
+
hci_dev_lock(hdev);
- hdev->own_addr_type = val;
+ list_for_each_entry(b, &hdev->le_white_list, list)
+ seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type);
hci_dev_unlock(hdev);
return 0;
}
-static int own_address_type_get(void *data, u64 *val)
+static int white_list_open(struct inode *inode, struct file *file)
{
- struct hci_dev *hdev = data;
+ return single_open(file, white_list_show, inode->i_private);
+}
+
+static const struct file_operations white_list_fops = {
+ .open = white_list_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int identity_resolving_keys_show(struct seq_file *f, void *ptr)
+{
+ struct hci_dev *hdev = f->private;
+ struct list_head *p, *n;
hci_dev_lock(hdev);
- *val = hdev->own_addr_type;
+ list_for_each_safe(p, n, &hdev->identity_resolving_keys) {
+ struct smp_irk *irk = list_entry(p, struct smp_irk, list);
+ seq_printf(f, "%pMR (type %u) %*phN %pMR\n",
+ &irk->bdaddr, irk->addr_type,
+ 16, irk->val, &irk->rpa);
+ }
hci_dev_unlock(hdev);
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(own_address_type_fops, own_address_type_get,
- own_address_type_set, "%llu\n");
+static int identity_resolving_keys_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, identity_resolving_keys_show,
+ inode->i_private);
+}
+
+static const struct file_operations identity_resolving_keys_fops = {
+ .open = identity_resolving_keys_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
static int long_term_keys_show(struct seq_file *f, void *ptr)
{
@@ -556,12 +820,12 @@ static int long_term_keys_show(struct seq_file *f, void *ptr)
struct list_head *p, *n;
hci_dev_lock(hdev);
- list_for_each_safe(p, n, &hdev->link_keys) {
+ list_for_each_safe(p, n, &hdev->long_term_keys) {
struct smp_ltk *ltk = list_entry(p, struct smp_ltk, list);
- seq_printf(f, "%pMR (type %u) %u %u %u %.4x %*phN %*phN\\n",
+ seq_printf(f, "%pMR (type %u) %u 0x%02x %u %.4x %.16llx %*phN\n",
&ltk->bdaddr, ltk->bdaddr_type, ltk->authenticated,
ltk->type, ltk->enc_size, __le16_to_cpu(ltk->ediv),
- 8, ltk->rand, 16, ltk->val);
+ __le64_to_cpu(ltk->rand), 16, ltk->val);
}
hci_dev_unlock(hdev);
@@ -636,6 +900,34 @@ static int conn_max_interval_get(void *data, u64 *val)
DEFINE_SIMPLE_ATTRIBUTE(conn_max_interval_fops, conn_max_interval_get,
conn_max_interval_set, "%llu\n");
+static int adv_channel_map_set(void *data, u64 val)
+{
+ struct hci_dev *hdev = data;
+
+ if (val < 0x01 || val > 0x07)
+ return -EINVAL;
+
+ hci_dev_lock(hdev);
+ hdev->le_adv_channel_map = val;
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+static int adv_channel_map_get(void *data, u64 *val)
+{
+ struct hci_dev *hdev = data;
+
+ hci_dev_lock(hdev);
+ *val = hdev->le_adv_channel_map;
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(adv_channel_map_fops, adv_channel_map_get,
+ adv_channel_map_set, "%llu\n");
+
static ssize_t lowpan_read(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{
@@ -679,6 +971,110 @@ static const struct file_operations lowpan_debugfs_fops = {
.llseek = default_llseek,
};
+static int le_auto_conn_show(struct seq_file *sf, void *ptr)
+{
+ struct hci_dev *hdev = sf->private;
+ struct hci_conn_params *p;
+
+ hci_dev_lock(hdev);
+
+ list_for_each_entry(p, &hdev->le_conn_params, list) {
+ seq_printf(sf, "%pMR %u %u\n", &p->addr, p->addr_type,
+ p->auto_connect);
+ }
+
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+static int le_auto_conn_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, le_auto_conn_show, inode->i_private);
+}
+
+static ssize_t le_auto_conn_write(struct file *file, const char __user *data,
+ size_t count, loff_t *offset)
+{
+ struct seq_file *sf = file->private_data;
+ struct hci_dev *hdev = sf->private;
+ u8 auto_connect = 0;
+ bdaddr_t addr;
+ u8 addr_type;
+ char *buf;
+ int err = 0;
+ int n;
+
+ /* Don't allow partial write */
+ if (*offset != 0)
+ return -EINVAL;
+
+ if (count < 3)
+ return -EINVAL;
+
+ buf = memdup_user(data, count);
+ if (IS_ERR(buf))
+ return PTR_ERR(buf);
+
+ if (memcmp(buf, "add", 3) == 0) {
+ n = sscanf(&buf[4], "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx %hhu %hhu",
+ &addr.b[5], &addr.b[4], &addr.b[3], &addr.b[2],
+ &addr.b[1], &addr.b[0], &addr_type,
+ &auto_connect);
+
+ if (n < 7) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ hci_dev_lock(hdev);
+ err = hci_conn_params_add(hdev, &addr, addr_type, auto_connect,
+ hdev->le_conn_min_interval,
+ hdev->le_conn_max_interval);
+ hci_dev_unlock(hdev);
+
+ if (err)
+ goto done;
+ } else if (memcmp(buf, "del", 3) == 0) {
+ n = sscanf(&buf[4], "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx %hhu",
+ &addr.b[5], &addr.b[4], &addr.b[3], &addr.b[2],
+ &addr.b[1], &addr.b[0], &addr_type);
+
+ if (n < 7) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ hci_dev_lock(hdev);
+ hci_conn_params_del(hdev, &addr, addr_type);
+ hci_dev_unlock(hdev);
+ } else if (memcmp(buf, "clr", 3) == 0) {
+ hci_dev_lock(hdev);
+ hci_conn_params_clear(hdev);
+ hci_pend_le_conns_clear(hdev);
+ hci_update_background_scan(hdev);
+ hci_dev_unlock(hdev);
+ } else {
+ err = -EINVAL;
+ }
+
+done:
+ kfree(buf);
+
+ if (err)
+ return err;
+ else
+ return count;
+}
+
+static const struct file_operations le_auto_conn_fops = {
+ .open = le_auto_conn_open,
+ .read = seq_read,
+ .write = le_auto_conn_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
/* ---- HCI requests ---- */
static void hci_req_sync_complete(struct hci_dev *hdev, u8 result)
@@ -1005,7 +1401,7 @@ static void bredr_setup(struct hci_request *req)
hci_req_add(req, HCI_OP_SET_EVENT_FLT, 1, &flt_type);
/* Connection accept timeout ~20 secs */
- param = __constant_cpu_to_le16(0x7d00);
+ param = cpu_to_le16(0x7d00);
hci_req_add(req, HCI_OP_WRITE_CA_TIMEOUT, 2, &param);
/* AVM Berlin (31), aka "BlueFRITZ!", reports version 1.2,
@@ -1027,14 +1423,17 @@ static void le_setup(struct hci_request *req)
/* Read LE Local Supported Features */
hci_req_add(req, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL);
+ /* Read LE Supported States */
+ hci_req_add(req, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL);
+
/* Read LE Advertising Channel TX Power */
hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL);
/* Read LE White List Size */
hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL);
- /* Read LE Supported States */
- hci_req_add(req, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL);
+ /* Clear LE White List */
+ hci_req_add(req, HCI_OP_LE_CLEAR_WHITE_LIST, 0, NULL);
/* LE-only controllers have LE implicitly enabled */
if (!lmp_bredr_capable(hdev))
@@ -1288,6 +1687,10 @@ static void hci_set_event_mask_page_2(struct hci_request *req)
events[2] |= 0x08; /* Truncated Page Complete */
}
+ /* Enable Authenticated Payload Timeout Expired event if supported */
+ if (lmp_ping_capable(hdev))
+ events[2] |= 0x80;
+
hci_req_add(req, HCI_OP_SET_EVENT_MASK_PAGE_2, sizeof(events), events);
}
@@ -1322,21 +1725,8 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
if (hdev->commands[5] & 0x10)
hci_setup_link_policy(req);
- if (lmp_le_capable(hdev)) {
- if (test_bit(HCI_SETUP, &hdev->dev_flags)) {
- /* If the controller has a public BD_ADDR, then
- * by default use that one. If this is a LE only
- * controller without a public address, default
- * to the random address.
- */
- if (bacmp(&hdev->bdaddr, BDADDR_ANY))
- hdev->own_addr_type = ADDR_LE_DEV_PUBLIC;
- else
- hdev->own_addr_type = ADDR_LE_DEV_RANDOM;
- }
-
+ if (lmp_le_capable(hdev))
hci_set_le_support(req);
- }
/* Read features beyond page 1 if available */
for (p = 2; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) {
@@ -1359,6 +1749,15 @@ static void hci_init4_req(struct hci_request *req, unsigned long opt)
/* Check for Synchronization Train support */
if (lmp_sync_train_capable(hdev))
hci_req_add(req, HCI_OP_READ_SYNC_TRAIN_PARAMS, 0, NULL);
+
+ /* Enable Secure Connections if supported and configured */
+ if ((lmp_sc_capable(hdev) ||
+ test_bit(HCI_FORCE_SC, &hdev->dev_flags)) &&
+ test_bit(HCI_SC_ENABLED, &hdev->dev_flags)) {
+ u8 support = 0x01;
+ hci_req_add(req, HCI_OP_WRITE_SC_SUPPORT,
+ sizeof(support), &support);
+ }
}
static int __hci_init(struct hci_dev *hdev)
@@ -1412,13 +1811,16 @@ static int __hci_init(struct hci_dev *hdev)
&blacklist_fops);
debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops);
+ debugfs_create_file("conn_info_min_age", 0644, hdev->debugfs, hdev,
+ &conn_info_min_age_fops);
+ debugfs_create_file("conn_info_max_age", 0644, hdev->debugfs, hdev,
+ &conn_info_max_age_fops);
+
if (lmp_bredr_capable(hdev)) {
debugfs_create_file("inquiry_cache", 0444, hdev->debugfs,
hdev, &inquiry_cache_fops);
debugfs_create_file("link_keys", 0400, hdev->debugfs,
hdev, &link_keys_fops);
- debugfs_create_file("use_debug_keys", 0444, hdev->debugfs,
- hdev, &use_debug_keys_fops);
debugfs_create_file("dev_class", 0444, hdev->debugfs,
hdev, &dev_class_fops);
debugfs_create_file("voice_setting", 0444, hdev->debugfs,
@@ -1430,6 +1832,10 @@ static int __hci_init(struct hci_dev *hdev)
hdev, &auto_accept_delay_fops);
debugfs_create_file("ssp_debug_mode", 0644, hdev->debugfs,
hdev, &ssp_debug_mode_fops);
+ debugfs_create_file("force_sc_support", 0644, hdev->debugfs,
+ hdev, &force_sc_support_fops);
+ debugfs_create_file("sc_only_mode", 0444, hdev->debugfs,
+ hdev, &sc_only_mode_fops);
}
if (lmp_sniff_capable(hdev)) {
@@ -1442,20 +1848,46 @@ static int __hci_init(struct hci_dev *hdev)
}
if (lmp_le_capable(hdev)) {
+ debugfs_create_file("identity", 0400, hdev->debugfs,
+ hdev, &identity_fops);
+ debugfs_create_file("rpa_timeout", 0644, hdev->debugfs,
+ hdev, &rpa_timeout_fops);
+ debugfs_create_file("random_address", 0444, hdev->debugfs,
+ hdev, &random_address_fops);
+ debugfs_create_file("static_address", 0444, hdev->debugfs,
+ hdev, &static_address_fops);
+
+ /* For controllers with a public address, provide a debug
+ * option to force the usage of the configured static
+ * address. By default the public address is used.
+ */
+ if (bacmp(&hdev->bdaddr, BDADDR_ANY))
+ debugfs_create_file("force_static_address", 0644,
+ hdev->debugfs, hdev,
+ &force_static_address_fops);
+
debugfs_create_u8("white_list_size", 0444, hdev->debugfs,
&hdev->le_white_list_size);
- debugfs_create_file("static_address", 0444, hdev->debugfs,
- hdev, &static_address_fops);
- debugfs_create_file("own_address_type", 0644, hdev->debugfs,
- hdev, &own_address_type_fops);
+ debugfs_create_file("white_list", 0444, hdev->debugfs, hdev,
+ &white_list_fops);
+ debugfs_create_file("identity_resolving_keys", 0400,
+ hdev->debugfs, hdev,
+ &identity_resolving_keys_fops);
debugfs_create_file("long_term_keys", 0400, hdev->debugfs,
hdev, &long_term_keys_fops);
debugfs_create_file("conn_min_interval", 0644, hdev->debugfs,
hdev, &conn_min_interval_fops);
debugfs_create_file("conn_max_interval", 0644, hdev->debugfs,
hdev, &conn_max_interval_fops);
+ debugfs_create_file("adv_channel_map", 0644, hdev->debugfs,
+ hdev, &adv_channel_map_fops);
debugfs_create_file("6lowpan", 0644, hdev->debugfs, hdev,
&lowpan_debugfs_fops);
+ debugfs_create_file("le_auto_conn", 0644, hdev->debugfs, hdev,
+ &le_auto_conn_fops);
+ debugfs_create_u16("discov_interleaved_timeout", 0644,
+ hdev->debugfs,
+ &hdev->discov_interleaved_timeout);
}
return 0;
@@ -1548,6 +1980,8 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state)
switch (state) {
case DISCOVERY_STOPPED:
+ hci_update_background_scan(hdev);
+
if (hdev->discovery.state != DISCOVERY_STARTING)
mgmt_discovering(hdev, 0);
break;
@@ -1659,12 +2093,11 @@ bool hci_inquiry_cache_update(struct hci_dev *hdev, struct inquiry_data *data,
hci_remove_remote_oob_data(hdev, &data->bdaddr);
- if (ssp)
- *ssp = data->ssp_mode;
+ *ssp = data->ssp_mode;
ie = hci_inquiry_cache_lookup(hdev, &data->bdaddr);
if (ie) {
- if (ie->data.ssp_mode && ssp)
+ if (ie->data.ssp_mode)
*ssp = true;
if (ie->name_state == NAME_NEEDED &&
@@ -1876,10 +2309,15 @@ static int hci_dev_do_open(struct hci_dev *hdev)
* be able to determine if there is a public address
* or not.
*
+ * In case of user channel usage, it is not important
+ * if a public address or static random address is
+ * available.
+ *
* This check is only valid for BR/EDR controllers
* since AMP controllers do not have an address.
*/
- if (hdev->dev_type == HCI_BREDR &&
+ if (!test_bit(HCI_USER_CHANNEL, &hdev->dev_flags) &&
+ hdev->dev_type == HCI_BREDR &&
!bacmp(&hdev->bdaddr, BDADDR_ANY) &&
!bacmp(&hdev->static_addr, BDADDR_ANY)) {
ret = -EADDRNOTAVAIL;
@@ -1916,6 +2354,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
if (!ret) {
hci_dev_hold(hdev);
+ set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags);
set_bit(HCI_UP, &hdev->flags);
hci_notify(hdev, HCI_DEV_UP);
if (!test_bit(HCI_SETUP, &hdev->dev_flags) &&
@@ -2014,9 +2453,13 @@ static int hci_dev_do_close(struct hci_dev *hdev)
cancel_delayed_work_sync(&hdev->le_scan_disable);
+ if (test_bit(HCI_MGMT, &hdev->dev_flags))
+ cancel_delayed_work_sync(&hdev->rpa_expired);
+
hci_dev_lock(hdev);
hci_inquiry_cache_flush(hdev);
hci_conn_hash_flush(hdev);
+ hci_pend_le_conns_clear(hdev);
hci_dev_unlock(hdev);
hci_notify(hdev, HCI_DEV_DOWN);
@@ -2074,6 +2517,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
memset(hdev->eir, 0, sizeof(hdev->eir));
memset(hdev->dev_class, 0, sizeof(hdev->dev_class));
+ bacpy(&hdev->random_addr, BDADDR_ANY);
hci_req_unlock(hdev);
@@ -2437,7 +2881,7 @@ static void hci_discov_off(struct work_struct *work)
mgmt_discoverable_timeout(hdev);
}
-int hci_uuids_clear(struct hci_dev *hdev)
+void hci_uuids_clear(struct hci_dev *hdev)
{
struct bt_uuid *uuid, *tmp;
@@ -2445,11 +2889,9 @@ int hci_uuids_clear(struct hci_dev *hdev)
list_del(&uuid->list);
kfree(uuid);
}
-
- return 0;
}
-int hci_link_keys_clear(struct hci_dev *hdev)
+void hci_link_keys_clear(struct hci_dev *hdev)
{
struct list_head *p, *n;
@@ -2461,11 +2903,9 @@ int hci_link_keys_clear(struct hci_dev *hdev)
list_del(p);
kfree(key);
}
-
- return 0;
}
-int hci_smp_ltks_clear(struct hci_dev *hdev)
+void hci_smp_ltks_clear(struct hci_dev *hdev)
{
struct smp_ltk *k, *tmp;
@@ -2473,8 +2913,16 @@ int hci_smp_ltks_clear(struct hci_dev *hdev)
list_del(&k->list);
kfree(k);
}
+}
- return 0;
+void hci_smp_irks_clear(struct hci_dev *hdev)
+{
+ struct smp_irk *k, *tmp;
+
+ list_for_each_entry_safe(k, tmp, &hdev->identity_resolving_keys, list) {
+ list_del(&k->list);
+ kfree(k);
+ }
}
struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr)
@@ -2524,13 +2972,24 @@ static bool hci_persistent_key(struct hci_dev *hdev, struct hci_conn *conn,
return false;
}
-struct smp_ltk *hci_find_ltk(struct hci_dev *hdev, __le16 ediv, u8 rand[8])
+static bool ltk_type_master(u8 type)
+{
+ if (type == HCI_SMP_STK || type == HCI_SMP_LTK)
+ return true;
+
+ return false;
+}
+
+struct smp_ltk *hci_find_ltk(struct hci_dev *hdev, __le16 ediv, __le64 rand,
+ bool master)
{
struct smp_ltk *k;
list_for_each_entry(k, &hdev->long_term_keys, list) {
- if (k->ediv != ediv ||
- memcmp(rand, k->rand, sizeof(k->rand)))
+ if (k->ediv != ediv || k->rand != rand)
+ continue;
+
+ if (ltk_type_master(k->type) != master)
continue;
return k;
@@ -2540,18 +2999,56 @@ struct smp_ltk *hci_find_ltk(struct hci_dev *hdev, __le16 ediv, u8 rand[8])
}
struct smp_ltk *hci_find_ltk_by_addr(struct hci_dev *hdev, bdaddr_t *bdaddr,
- u8 addr_type)
+ u8 addr_type, bool master)
{
struct smp_ltk *k;
list_for_each_entry(k, &hdev->long_term_keys, list)
if (addr_type == k->bdaddr_type &&
- bacmp(bdaddr, &k->bdaddr) == 0)
+ bacmp(bdaddr, &k->bdaddr) == 0 &&
+ ltk_type_master(k->type) == master)
return k;
return NULL;
}
+struct smp_irk *hci_find_irk_by_rpa(struct hci_dev *hdev, bdaddr_t *rpa)
+{
+ struct smp_irk *irk;
+
+ list_for_each_entry(irk, &hdev->identity_resolving_keys, list) {
+ if (!bacmp(&irk->rpa, rpa))
+ return irk;
+ }
+
+ list_for_each_entry(irk, &hdev->identity_resolving_keys, list) {
+ if (smp_irk_matches(hdev->tfm_aes, irk->val, rpa)) {
+ bacpy(&irk->rpa, rpa);
+ return irk;
+ }
+ }
+
+ return NULL;
+}
+
+struct smp_irk *hci_find_irk_by_addr(struct hci_dev *hdev, bdaddr_t *bdaddr,
+ u8 addr_type)
+{
+ struct smp_irk *irk;
+
+ /* Identity Address must be public or static random */
+ if (addr_type == ADDR_LE_DEV_RANDOM && (bdaddr->b[5] & 0xc0) != 0xc0)
+ return NULL;
+
+ list_for_each_entry(irk, &hdev->identity_resolving_keys, list) {
+ if (addr_type == irk->addr_type &&
+ bacmp(bdaddr, &irk->bdaddr) == 0)
+ return irk;
+ }
+
+ return NULL;
+}
+
int hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, int new_key,
bdaddr_t *bdaddr, u8 *val, u8 type, u8 pin_len)
{
@@ -2565,7 +3062,7 @@ int hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, int new_key,
key = old_key;
} else {
old_key_type = conn ? conn->key_type : 0xff;
- key = kzalloc(sizeof(*key), GFP_ATOMIC);
+ key = kzalloc(sizeof(*key), GFP_KERNEL);
if (!key)
return -ENOMEM;
list_add(&key->list, &hdev->link_keys);
@@ -2605,22 +3102,20 @@ int hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, int new_key,
return 0;
}
-int hci_add_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type, u8 type,
- int new_key, u8 authenticated, u8 tk[16], u8 enc_size, __le16
- ediv, u8 rand[8])
+struct smp_ltk *hci_add_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr,
+ u8 addr_type, u8 type, u8 authenticated,
+ u8 tk[16], u8 enc_size, __le16 ediv, __le64 rand)
{
struct smp_ltk *key, *old_key;
+ bool master = ltk_type_master(type);
- if (!(type & HCI_SMP_STK) && !(type & HCI_SMP_LTK))
- return 0;
-
- old_key = hci_find_ltk_by_addr(hdev, bdaddr, addr_type);
+ old_key = hci_find_ltk_by_addr(hdev, bdaddr, addr_type, master);
if (old_key)
key = old_key;
else {
- key = kzalloc(sizeof(*key), GFP_ATOMIC);
+ key = kzalloc(sizeof(*key), GFP_KERNEL);
if (!key)
- return -ENOMEM;
+ return NULL;
list_add(&key->list, &hdev->long_term_keys);
}
@@ -2629,17 +3124,34 @@ int hci_add_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type, u8 type,
memcpy(key->val, tk, sizeof(key->val));
key->authenticated = authenticated;
key->ediv = ediv;
+ key->rand = rand;
key->enc_size = enc_size;
key->type = type;
- memcpy(key->rand, rand, sizeof(key->rand));
- if (!new_key)
- return 0;
+ return key;
+}
- if (type & HCI_SMP_LTK)
- mgmt_new_ltk(hdev, key, 1);
+struct smp_irk *hci_add_irk(struct hci_dev *hdev, bdaddr_t *bdaddr,
+ u8 addr_type, u8 val[16], bdaddr_t *rpa)
+{
+ struct smp_irk *irk;
- return 0;
+ irk = hci_find_irk_by_addr(hdev, bdaddr, addr_type);
+ if (!irk) {
+ irk = kzalloc(sizeof(*irk), GFP_KERNEL);
+ if (!irk)
+ return NULL;
+
+ bacpy(&irk->bdaddr, bdaddr);
+ irk->addr_type = addr_type;
+
+ list_add(&irk->list, &hdev->identity_resolving_keys);
+ }
+
+ memcpy(irk->val, val, 16);
+ bacpy(&irk->rpa, rpa);
+
+ return irk;
}
int hci_remove_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr)
@@ -2658,21 +3170,38 @@ int hci_remove_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr)
return 0;
}
-int hci_remove_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr)
+int hci_remove_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type)
{
struct smp_ltk *k, *tmp;
+ int removed = 0;
list_for_each_entry_safe(k, tmp, &hdev->long_term_keys, list) {
- if (bacmp(bdaddr, &k->bdaddr))
+ if (bacmp(bdaddr, &k->bdaddr) || k->bdaddr_type != bdaddr_type)
continue;
BT_DBG("%s removing %pMR", hdev->name, bdaddr);
list_del(&k->list);
kfree(k);
+ removed++;
}
- return 0;
+ return removed ? 0 : -ENOENT;
+}
+
+void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type)
+{
+ struct smp_irk *k, *tmp;
+
+ list_for_each_entry_safe(k, tmp, &hdev->identity_resolving_keys, list) {
+ if (bacmp(bdaddr, &k->bdaddr) || k->addr_type != addr_type)
+ continue;
+
+ BT_DBG("%s removing %pMR", hdev->name, bdaddr);
+
+ list_del(&k->list);
+ kfree(k);
+ }
}
/* HCI command timer function */
@@ -2721,7 +3250,7 @@ int hci_remove_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr)
return 0;
}
-int hci_remote_oob_data_clear(struct hci_dev *hdev)
+void hci_remote_oob_data_clear(struct hci_dev *hdev)
{
struct oob_data *data, *n;
@@ -2729,19 +3258,43 @@ int hci_remote_oob_data_clear(struct hci_dev *hdev)
list_del(&data->list);
kfree(data);
}
+}
+
+int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr,
+ u8 *hash, u8 *randomizer)
+{
+ struct oob_data *data;
+
+ data = hci_find_remote_oob_data(hdev, bdaddr);
+ if (!data) {
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ bacpy(&data->bdaddr, bdaddr);
+ list_add(&data->list, &hdev->remote_oob_data);
+ }
+
+ memcpy(data->hash192, hash, sizeof(data->hash192));
+ memcpy(data->randomizer192, randomizer, sizeof(data->randomizer192));
+
+ memset(data->hash256, 0, sizeof(data->hash256));
+ memset(data->randomizer256, 0, sizeof(data->randomizer256));
+
+ BT_DBG("%s for %pMR", hdev->name, bdaddr);
return 0;
}
-int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 *hash,
- u8 *randomizer)
+int hci_add_remote_oob_ext_data(struct hci_dev *hdev, bdaddr_t *bdaddr,
+ u8 *hash192, u8 *randomizer192,
+ u8 *hash256, u8 *randomizer256)
{
struct oob_data *data;
data = hci_find_remote_oob_data(hdev, bdaddr);
-
if (!data) {
- data = kmalloc(sizeof(*data), GFP_ATOMIC);
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
@@ -2749,8 +3302,11 @@ int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 *hash,
list_add(&data->list, &hdev->remote_oob_data);
}
- memcpy(data->hash, hash, sizeof(data->hash));
- memcpy(data->randomizer, randomizer, sizeof(data->randomizer));
+ memcpy(data->hash192, hash192, sizeof(data->hash192));
+ memcpy(data->randomizer192, randomizer192, sizeof(data->randomizer192));
+
+ memcpy(data->hash256, hash256, sizeof(data->hash256));
+ memcpy(data->randomizer256, randomizer256, sizeof(data->randomizer256));
BT_DBG("%s for %pMR", hdev->name, bdaddr);
@@ -2770,7 +3326,7 @@ struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev,
return NULL;
}
-int hci_blacklist_clear(struct hci_dev *hdev)
+static void hci_blacklist_clear(struct hci_dev *hdev)
{
struct list_head *p, *n;
@@ -2780,8 +3336,6 @@ int hci_blacklist_clear(struct hci_dev *hdev)
list_del(p);
kfree(b);
}
-
- return 0;
}
int hci_blacklist_add(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
@@ -2810,8 +3364,10 @@ int hci_blacklist_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
{
struct bdaddr_list *entry;
- if (!bacmp(bdaddr, BDADDR_ANY))
- return hci_blacklist_clear(hdev);
+ if (!bacmp(bdaddr, BDADDR_ANY)) {
+ hci_blacklist_clear(hdev);
+ return 0;
+ }
entry = hci_blacklist_lookup(hdev, bdaddr, type);
if (!entry)
@@ -2823,6 +3379,262 @@ int hci_blacklist_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
return mgmt_device_unblocked(hdev, bdaddr, type);
}
+struct bdaddr_list *hci_white_list_lookup(struct hci_dev *hdev,
+ bdaddr_t *bdaddr, u8 type)
+{
+ struct bdaddr_list *b;
+
+ list_for_each_entry(b, &hdev->le_white_list, list) {
+ if (!bacmp(&b->bdaddr, bdaddr) && b->bdaddr_type == type)
+ return b;
+ }
+
+ return NULL;
+}
+
+void hci_white_list_clear(struct hci_dev *hdev)
+{
+ struct list_head *p, *n;
+
+ list_for_each_safe(p, n, &hdev->le_white_list) {
+ struct bdaddr_list *b = list_entry(p, struct bdaddr_list, list);
+
+ list_del(p);
+ kfree(b);
+ }
+}
+
+int hci_white_list_add(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
+{
+ struct bdaddr_list *entry;
+
+ if (!bacmp(bdaddr, BDADDR_ANY))
+ return -EBADF;
+
+ entry = kzalloc(sizeof(struct bdaddr_list), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ bacpy(&entry->bdaddr, bdaddr);
+ entry->bdaddr_type = type;
+
+ list_add(&entry->list, &hdev->le_white_list);
+
+ return 0;
+}
+
+int hci_white_list_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
+{
+ struct bdaddr_list *entry;
+
+ if (!bacmp(bdaddr, BDADDR_ANY))
+ return -EBADF;
+
+ entry = hci_white_list_lookup(hdev, bdaddr, type);
+ if (!entry)
+ return -ENOENT;
+
+ list_del(&entry->list);
+ kfree(entry);
+
+ return 0;
+}
+
+/* This function requires the caller holds hdev->lock */
+struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
+ bdaddr_t *addr, u8 addr_type)
+{
+ struct hci_conn_params *params;
+
+ list_for_each_entry(params, &hdev->le_conn_params, list) {
+ if (bacmp(&params->addr, addr) == 0 &&
+ params->addr_type == addr_type) {
+ return params;
+ }
+ }
+
+ return NULL;
+}
+
+static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type)
+{
+ struct hci_conn *conn;
+
+ conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr);
+ if (!conn)
+ return false;
+
+ if (conn->dst_type != type)
+ return false;
+
+ if (conn->state != BT_CONNECTED)
+ return false;
+
+ return true;
+}
+
+static bool is_identity_address(bdaddr_t *addr, u8 addr_type)
+{
+ if (addr_type == ADDR_LE_DEV_PUBLIC)
+ return true;
+
+ /* Check for Random Static address type */
+ if ((addr->b[5] & 0xc0) == 0xc0)
+ return true;
+
+ return false;
+}
+
+/* This function requires the caller holds hdev->lock */
+int hci_conn_params_add(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type,
+ u8 auto_connect, u16 conn_min_interval,
+ u16 conn_max_interval)
+{
+ struct hci_conn_params *params;
+
+ if (!is_identity_address(addr, addr_type))
+ return -EINVAL;
+
+ params = hci_conn_params_lookup(hdev, addr, addr_type);
+ if (params)
+ goto update;
+
+ params = kzalloc(sizeof(*params), GFP_KERNEL);
+ if (!params) {
+ BT_ERR("Out of memory");
+ return -ENOMEM;
+ }
+
+ bacpy(&params->addr, addr);
+ params->addr_type = addr_type;
+
+ list_add(&params->list, &hdev->le_conn_params);
+
+update:
+ params->conn_min_interval = conn_min_interval;
+ params->conn_max_interval = conn_max_interval;
+ params->auto_connect = auto_connect;
+
+ switch (auto_connect) {
+ case HCI_AUTO_CONN_DISABLED:
+ case HCI_AUTO_CONN_LINK_LOSS:
+ hci_pend_le_conn_del(hdev, addr, addr_type);
+ break;
+ case HCI_AUTO_CONN_ALWAYS:
+ if (!is_connected(hdev, addr, addr_type))
+ hci_pend_le_conn_add(hdev, addr, addr_type);
+ break;
+ }
+
+ BT_DBG("addr %pMR (type %u) auto_connect %u conn_min_interval 0x%.4x "
+ "conn_max_interval 0x%.4x", addr, addr_type, auto_connect,
+ conn_min_interval, conn_max_interval);
+
+ return 0;
+}
+
+/* This function requires the caller holds hdev->lock */
+void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type)
+{
+ struct hci_conn_params *params;
+
+ params = hci_conn_params_lookup(hdev, addr, addr_type);
+ if (!params)
+ return;
+
+ hci_pend_le_conn_del(hdev, addr, addr_type);
+
+ list_del(&params->list);
+ kfree(params);
+
+ BT_DBG("addr %pMR (type %u)", addr, addr_type);
+}
+
+/* This function requires the caller holds hdev->lock */
+void hci_conn_params_clear(struct hci_dev *hdev)
+{
+ struct hci_conn_params *params, *tmp;
+
+ list_for_each_entry_safe(params, tmp, &hdev->le_conn_params, list) {
+ list_del(&params->list);
+ kfree(params);
+ }
+
+ BT_DBG("All LE connection parameters were removed");
+}
+
+/* This function requires the caller holds hdev->lock */
+struct bdaddr_list *hci_pend_le_conn_lookup(struct hci_dev *hdev,
+ bdaddr_t *addr, u8 addr_type)
+{
+ struct bdaddr_list *entry;
+
+ list_for_each_entry(entry, &hdev->pend_le_conns, list) {
+ if (bacmp(&entry->bdaddr, addr) == 0 &&
+ entry->bdaddr_type == addr_type)
+ return entry;
+ }
+
+ return NULL;
+}
+
+/* This function requires the caller holds hdev->lock */
+void hci_pend_le_conn_add(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type)
+{
+ struct bdaddr_list *entry;
+
+ entry = hci_pend_le_conn_lookup(hdev, addr, addr_type);
+ if (entry)
+ goto done;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry) {
+ BT_ERR("Out of memory");
+ return;
+ }
+
+ bacpy(&entry->bdaddr, addr);
+ entry->bdaddr_type = addr_type;
+
+ list_add(&entry->list, &hdev->pend_le_conns);
+
+ BT_DBG("addr %pMR (type %u)", addr, addr_type);
+
+done:
+ hci_update_background_scan(hdev);
+}
+
+/* This function requires the caller holds hdev->lock */
+void hci_pend_le_conn_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type)
+{
+ struct bdaddr_list *entry;
+
+ entry = hci_pend_le_conn_lookup(hdev, addr, addr_type);
+ if (!entry)
+ goto done;
+
+ list_del(&entry->list);
+ kfree(entry);
+
+ BT_DBG("addr %pMR (type %u)", addr, addr_type);
+
+done:
+ hci_update_background_scan(hdev);
+}
+
+/* This function requires the caller holds hdev->lock */
+void hci_pend_le_conns_clear(struct hci_dev *hdev)
+{
+ struct bdaddr_list *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &hdev->pend_le_conns, list) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
+
+ BT_DBG("All LE pending connections cleared");
+}
+
static void inquiry_complete(struct hci_dev *hdev, u8 status)
{
if (status) {
@@ -2882,7 +3694,6 @@ static void le_scan_disable_work(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev,
le_scan_disable.work);
- struct hci_cp_le_set_scan_enable cp;
struct hci_request req;
int err;
@@ -2890,15 +3701,128 @@ static void le_scan_disable_work(struct work_struct *work)
hci_req_init(&req, hdev);
- memset(&cp, 0, sizeof(cp));
- cp.enable = LE_SCAN_DISABLE;
- hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
+ hci_req_add_le_scan_disable(&req);
err = hci_req_run(&req, le_scan_disable_work_complete);
if (err)
BT_ERR("Disable LE scanning request failed: err %d", err);
}
+static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
+{
+ struct hci_dev *hdev = req->hdev;
+
+ /* If we're advertising or initiating an LE connection we can't
+ * go ahead and change the random address at this time. This is
+ * because the eventual initiator address used for the
+ * subsequently created connection will be undefined (some
+ * controllers use the new address and others the one we had
+ * when the operation started).
+ *
+ * In this kind of scenario skip the update and let the random
+ * address be updated at the next cycle.
+ */
+ if (test_bit(HCI_ADVERTISING, &hdev->dev_flags) ||
+ hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) {
+ BT_DBG("Deferring random address update");
+ return;
+ }
+
+ hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa);
+}
+
+int hci_update_random_address(struct hci_request *req, bool require_privacy,
+ u8 *own_addr_type)
+{
+ struct hci_dev *hdev = req->hdev;
+ int err;
+
+ /* If privacy is enabled use a resolvable private address. If
+ * current RPA has expired or there is something else than
+ * the current RPA in use, then generate a new one.
+ */
+ if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) {
+ int to;
+
+ *own_addr_type = ADDR_LE_DEV_RANDOM;
+
+ if (!test_and_clear_bit(HCI_RPA_EXPIRED, &hdev->dev_flags) &&
+ !bacmp(&hdev->random_addr, &hdev->rpa))
+ return 0;
+
+ err = smp_generate_rpa(hdev->tfm_aes, hdev->irk, &hdev->rpa);
+ if (err < 0) {
+ BT_ERR("%s failed to generate new RPA", hdev->name);
+ return err;
+ }
+
+ set_random_addr(req, &hdev->rpa);
+
+ to = msecs_to_jiffies(hdev->rpa_timeout * 1000);
+ queue_delayed_work(hdev->workqueue, &hdev->rpa_expired, to);
+
+ return 0;
+ }
+
+ /* In case of required privacy without resolvable private address,
+ * use an unresolvable private address. This is useful for active
+ * scanning and non-connectable advertising.
+ */
+ if (require_privacy) {
+ bdaddr_t urpa;
+
+ get_random_bytes(&urpa, 6);
+ urpa.b[5] &= 0x3f; /* Clear two most significant bits */
+
+ *own_addr_type = ADDR_LE_DEV_RANDOM;
+ set_random_addr(req, &urpa);
+ return 0;
+ }
+
+ /* If forcing static address is in use or there is no public
+ * address use the static address as random address (but skip
+ * the HCI command if the current random address is already the
+ * static one.
+ */
+ if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dev_flags) ||
+ !bacmp(&hdev->bdaddr, BDADDR_ANY)) {
+ *own_addr_type = ADDR_LE_DEV_RANDOM;
+ if (bacmp(&hdev->static_addr, &hdev->random_addr))
+ hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6,
+ &hdev->static_addr);
+ return 0;
+ }
+
+ /* Neither privacy nor static address is being used so use a
+ * public address.
+ */
+ *own_addr_type = ADDR_LE_DEV_PUBLIC;
+
+ return 0;
+}
+
+/* Copy the Identity Address of the controller.
+ *
+ * If the controller has a public BD_ADDR, then by default use that one.
+ * If this is a LE only controller without a public address, default to
+ * the static random address.
+ *
+ * For debugging purposes it is possible to force controllers with a
+ * public address to use the static random address instead.
+ */
+void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr,
+ u8 *bdaddr_type)
+{
+ if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dev_flags) ||
+ !bacmp(&hdev->bdaddr, BDADDR_ANY)) {
+ bacpy(bdaddr, &hdev->static_addr);
+ *bdaddr_type = ADDR_LE_DEV_RANDOM;
+ } else {
+ bacpy(bdaddr, &hdev->bdaddr);
+ *bdaddr_type = ADDR_LE_DEV_PUBLIC;
+ }
+}
+
/* Alloc HCI device */
struct hci_dev *hci_alloc_dev(void)
{
@@ -2919,11 +3843,17 @@ struct hci_dev *hci_alloc_dev(void)
hdev->sniff_max_interval = 800;
hdev->sniff_min_interval = 80;
+ hdev->le_adv_channel_map = 0x07;
hdev->le_scan_interval = 0x0060;
hdev->le_scan_window = 0x0030;
hdev->le_conn_min_interval = 0x0028;
hdev->le_conn_max_interval = 0x0038;
+ hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT;
+ hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT;
+ hdev->conn_info_min_age = DEFAULT_CONN_INFO_MIN_AGE;
+ hdev->conn_info_max_age = DEFAULT_CONN_INFO_MAX_AGE;
+
mutex_init(&hdev->lock);
mutex_init(&hdev->req_lock);
@@ -2932,7 +3862,11 @@ struct hci_dev *hci_alloc_dev(void)
INIT_LIST_HEAD(&hdev->uuids);
INIT_LIST_HEAD(&hdev->link_keys);
INIT_LIST_HEAD(&hdev->long_term_keys);
+ INIT_LIST_HEAD(&hdev->identity_resolving_keys);
INIT_LIST_HEAD(&hdev->remote_oob_data);
+ INIT_LIST_HEAD(&hdev->le_white_list);
+ INIT_LIST_HEAD(&hdev->le_conn_params);
+ INIT_LIST_HEAD(&hdev->pend_le_conns);
INIT_LIST_HEAD(&hdev->conn_hash.list);
INIT_WORK(&hdev->rx_work, hci_rx_work);
@@ -3017,9 +3951,18 @@ int hci_register_dev(struct hci_dev *hdev)
dev_set_name(&hdev->dev, "%s", hdev->name);
+ hdev->tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0,
+ CRYPTO_ALG_ASYNC);
+ if (IS_ERR(hdev->tfm_aes)) {
+ BT_ERR("Unable to create crypto context");
+ error = PTR_ERR(hdev->tfm_aes);
+ hdev->tfm_aes = NULL;
+ goto err_wqueue;
+ }
+
error = device_add(&hdev->dev);
if (error < 0)
- goto err_wqueue;
+ goto err_tfm;
hdev->rfkill = rfkill_alloc(hdev->name, &hdev->dev,
RFKILL_TYPE_BLUETOOTH, &hci_rfkill_ops,
@@ -3055,6 +3998,8 @@ int hci_register_dev(struct hci_dev *hdev)
return id;
+err_tfm:
+ crypto_free_blkcipher(hdev->tfm_aes);
err_wqueue:
destroy_workqueue(hdev->workqueue);
destroy_workqueue(hdev->req_workqueue);
@@ -3105,6 +4050,9 @@ void hci_unregister_dev(struct hci_dev *hdev)
rfkill_destroy(hdev->rfkill);
}
+ if (hdev->tfm_aes)
+ crypto_free_blkcipher(hdev->tfm_aes);
+
device_del(&hdev->dev);
debugfs_remove_recursive(hdev->debugfs);
@@ -3117,7 +4065,11 @@ void hci_unregister_dev(struct hci_dev *hdev)
hci_uuids_clear(hdev);
hci_link_keys_clear(hdev);
hci_smp_ltks_clear(hdev);
+ hci_smp_irks_clear(hdev);
hci_remote_oob_data_clear(hdev);
+ hci_white_list_clear(hdev);
+ hci_conn_params_clear(hdev);
+ hci_pend_le_conns_clear(hdev);
hci_dev_unlock(hdev);
hci_dev_put(hdev);
@@ -4345,3 +5297,104 @@ static void hci_cmd_work(struct work_struct *work)
}
}
}
+
+void hci_req_add_le_scan_disable(struct hci_request *req)
+{
+ struct hci_cp_le_set_scan_enable cp;
+
+ memset(&cp, 0, sizeof(cp));
+ cp.enable = LE_SCAN_DISABLE;
+ hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
+}
+
+void hci_req_add_le_passive_scan(struct hci_request *req)
+{
+ struct hci_cp_le_set_scan_param param_cp;
+ struct hci_cp_le_set_scan_enable enable_cp;
+ struct hci_dev *hdev = req->hdev;
+ u8 own_addr_type;
+
+ /* Set require_privacy to true to avoid identification from
+ * unknown peer devices. Since this is passive scanning, no
+ * SCAN_REQ using the local identity should be sent. Mandating
+ * privacy is just an extra precaution.
+ */
+ if (hci_update_random_address(req, true, &own_addr_type))
+ return;
+
+ memset(&param_cp, 0, sizeof(param_cp));
+ param_cp.type = LE_SCAN_PASSIVE;
+ param_cp.interval = cpu_to_le16(hdev->le_scan_interval);
+ param_cp.window = cpu_to_le16(hdev->le_scan_window);
+ param_cp.own_address_type = own_addr_type;
+ hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp),
+ &param_cp);
+
+ memset(&enable_cp, 0, sizeof(enable_cp));
+ enable_cp.enable = LE_SCAN_ENABLE;
+ enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
+ hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp),
+ &enable_cp);
+}
+
+static void update_background_scan_complete(struct hci_dev *hdev, u8 status)
+{
+ if (status)
+ BT_DBG("HCI request failed to update background scanning: "
+ "status 0x%2.2x", status);
+}
+
+/* This function controls the background scanning based on hdev->pend_le_conns
+ * list. If there are pending LE connection we start the background scanning,
+ * otherwise we stop it.
+ *
+ * This function requires the caller holds hdev->lock.
+ */
+void hci_update_background_scan(struct hci_dev *hdev)
+{
+ struct hci_request req;
+ struct hci_conn *conn;
+ int err;
+
+ hci_req_init(&req, hdev);
+
+ if (list_empty(&hdev->pend_le_conns)) {
+ /* If there is no pending LE connections, we should stop
+ * the background scanning.
+ */
+
+ /* If controller is not scanning we are done. */
+ if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags))
+ return;
+
+ hci_req_add_le_scan_disable(&req);
+
+ BT_DBG("%s stopping background scanning", hdev->name);
+ } else {
+ /* If there is at least one pending LE connection, we should
+ * keep the background scan running.
+ */
+
+ /* If controller is connecting, we should not start scanning
+ * since some controllers are not able to scan and connect at
+ * the same time.
+ */
+ conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
+ if (conn)
+ return;
+
+ /* If controller is currently scanning, we stop it to ensure we
+ * don't miss any advertising (due to duplicates filter).
+ */
+ if (test_bit(HCI_LE_SCAN, &hdev->dev_flags))
+ hci_req_add_le_scan_disable(&req);
+
+ hci_req_add_le_passive_scan(&req);
+
+ BT_DBG("%s starting background scanning", hdev->name);
+ }
+
+ err = hci_req_run(&req, update_background_scan_complete);
+ if (err)
+ BT_ERR("Failed to run HCI request: err %d", err);
+}
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 5f812455a45..640c54ec1bd 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -45,9 +45,13 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb)
return;
clear_bit(HCI_INQUIRY, &hdev->flags);
- smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */
+ smp_mb__after_atomic(); /* wake_up_bit advises about this barrier */
wake_up_bit(&hdev->flags, HCI_INQUIRY);
+ hci_dev_lock(hdev);
+ hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
+ hci_dev_unlock(hdev);
+
hci_conn_check_pending(hdev);
}
@@ -199,6 +203,8 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
memset(hdev->scan_rsp_data, 0, sizeof(hdev->scan_rsp_data));
hdev->scan_rsp_data_len = 0;
+ hdev->le_scan_type = LE_SCAN_PASSIVE;
+
hdev->ssp_debug_mode = 0;
}
@@ -461,6 +467,34 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
}
}
+static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ u8 status = *((u8 *) skb->data);
+ struct hci_cp_write_sc_support *sent;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+ sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_SC_SUPPORT);
+ if (!sent)
+ return;
+
+ if (!status) {
+ if (sent->support)
+ hdev->features[1][0] |= LMP_HOST_SC;
+ else
+ hdev->features[1][0] &= ~LMP_HOST_SC;
+ }
+
+ if (test_bit(HCI_MGMT, &hdev->dev_flags))
+ mgmt_sc_enable_complete(hdev, sent->support, status);
+ else if (!status) {
+ if (sent->support)
+ set_bit(HCI_SC_ENABLED, &hdev->dev_flags);
+ else
+ clear_bit(HCI_SC_ENABLED, &hdev->dev_flags);
+ }
+}
+
static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_rp_read_local_version *rp = (void *) skb->data;
@@ -904,16 +938,50 @@ static void hci_cc_user_passkey_neg_reply(struct hci_dev *hdev,
hci_dev_unlock(hdev);
}
-static void hci_cc_read_local_oob_data_reply(struct hci_dev *hdev,
- struct sk_buff *skb)
+static void hci_cc_read_local_oob_data(struct hci_dev *hdev,
+ struct sk_buff *skb)
{
struct hci_rp_read_local_oob_data *rp = (void *) skb->data;
BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
hci_dev_lock(hdev);
- mgmt_read_local_oob_data_reply_complete(hdev, rp->hash,
- rp->randomizer, rp->status);
+ mgmt_read_local_oob_data_complete(hdev, rp->hash, rp->randomizer,
+ NULL, NULL, rp->status);
+ hci_dev_unlock(hdev);
+}
+
+static void hci_cc_read_local_oob_ext_data(struct hci_dev *hdev,
+ struct sk_buff *skb)
+{
+ struct hci_rp_read_local_oob_ext_data *rp = (void *) skb->data;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+ hci_dev_lock(hdev);
+ mgmt_read_local_oob_data_complete(hdev, rp->hash192, rp->randomizer192,
+ rp->hash256, rp->randomizer256,
+ rp->status);
+ hci_dev_unlock(hdev);
+}
+
+
+static void hci_cc_le_set_random_addr(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ __u8 status = *((__u8 *) skb->data);
+ bdaddr_t *sent;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+ sent = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_RANDOM_ADDR);
+ if (!sent)
+ return;
+
+ hci_dev_lock(hdev);
+
+ if (!status)
+ bacpy(&hdev->random_addr, sent);
+
hci_dev_unlock(hdev);
}
@@ -927,18 +995,75 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb)
if (!sent)
return;
+ if (status)
+ return;
+
hci_dev_lock(hdev);
- if (!status) {
- if (*sent)
- set_bit(HCI_ADVERTISING, &hdev->dev_flags);
- else
- clear_bit(HCI_ADVERTISING, &hdev->dev_flags);
+ /* If we're doing connection initation as peripheral. Set a
+ * timeout in case something goes wrong.
+ */
+ if (*sent) {
+ struct hci_conn *conn;
+
+ conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
+ if (conn)
+ queue_delayed_work(hdev->workqueue,
+ &conn->le_conn_timeout,
+ HCI_LE_CONN_TIMEOUT);
}
+ mgmt_advertising(hdev, *sent);
+
+ hci_dev_unlock(hdev);
+}
+
+static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ struct hci_cp_le_set_scan_param *cp;
+ __u8 status = *((__u8 *) skb->data);
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+ cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_SCAN_PARAM);
+ if (!cp)
+ return;
+
+ hci_dev_lock(hdev);
+
+ if (!status)
+ hdev->le_scan_type = cp->type;
+
hci_dev_unlock(hdev);
}
+static bool has_pending_adv_report(struct hci_dev *hdev)
+{
+ struct discovery_state *d = &hdev->discovery;
+
+ return bacmp(&d->last_adv_addr, BDADDR_ANY);
+}
+
+static void clear_pending_adv_report(struct hci_dev *hdev)
+{
+ struct discovery_state *d = &hdev->discovery;
+
+ bacpy(&d->last_adv_addr, BDADDR_ANY);
+ d->last_adv_data_len = 0;
+}
+
+static void store_pending_adv_report(struct hci_dev *hdev, bdaddr_t *bdaddr,
+ u8 bdaddr_type, s8 rssi, u8 *data, u8 len)
+{
+ struct discovery_state *d = &hdev->discovery;
+
+ bacpy(&d->last_adv_addr, bdaddr);
+ d->last_adv_addr_type = bdaddr_type;
+ d->last_adv_rssi = rssi;
+ memcpy(d->last_adv_data, data, len);
+ d->last_adv_data_len = len;
+}
+
static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
struct sk_buff *skb)
{
@@ -957,10 +1082,38 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
switch (cp->enable) {
case LE_SCAN_ENABLE:
set_bit(HCI_LE_SCAN, &hdev->dev_flags);
+ if (hdev->le_scan_type == LE_SCAN_ACTIVE)
+ clear_pending_adv_report(hdev);
break;
case LE_SCAN_DISABLE:
+ /* We do this here instead of when setting DISCOVERY_STOPPED
+ * since the latter would potentially require waiting for
+ * inquiry to stop too.
+ */
+ if (has_pending_adv_report(hdev)) {
+ struct discovery_state *d = &hdev->discovery;
+
+ mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK,
+ d->last_adv_addr_type, NULL,
+ d->last_adv_rssi, 0, 1,
+ d->last_adv_data,
+ d->last_adv_data_len, NULL, 0);
+ }
+
+ /* Cancel this timer so that we don't try to disable scanning
+ * when it's already disabled.
+ */
+ cancel_delayed_work(&hdev->le_scan_disable);
+
clear_bit(HCI_LE_SCAN, &hdev->dev_flags);
+ /* The HCI_LE_SCAN_INTERRUPTED flag indicates that we
+ * interrupted scanning due to a connect request. Mark
+ * therefore discovery as stopped.
+ */
+ if (test_and_clear_bit(HCI_LE_SCAN_INTERRUPTED,
+ &hdev->dev_flags))
+ hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
break;
default:
@@ -980,6 +1133,49 @@ static void hci_cc_le_read_white_list_size(struct hci_dev *hdev,
hdev->le_white_list_size = rp->size;
}
+static void hci_cc_le_clear_white_list(struct hci_dev *hdev,
+ struct sk_buff *skb)
+{
+ __u8 status = *((__u8 *) skb->data);
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+ if (!status)
+ hci_white_list_clear(hdev);
+}
+
+static void hci_cc_le_add_to_white_list(struct hci_dev *hdev,
+ struct sk_buff *skb)
+{
+ struct hci_cp_le_add_to_white_list *sent;
+ __u8 status = *((__u8 *) skb->data);
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+ sent = hci_sent_cmd_data(hdev, HCI_OP_LE_ADD_TO_WHITE_LIST);
+ if (!sent)
+ return;
+
+ if (!status)
+ hci_white_list_add(hdev, &sent->bdaddr, sent->bdaddr_type);
+}
+
+static void hci_cc_le_del_from_white_list(struct hci_dev *hdev,
+ struct sk_buff *skb)
+{
+ struct hci_cp_le_del_from_white_list *sent;
+ __u8 status = *((__u8 *) skb->data);
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+ sent = hci_sent_cmd_data(hdev, HCI_OP_LE_DEL_FROM_WHITE_LIST);
+ if (!sent)
+ return;
+
+ if (!status)
+ hci_white_list_del(hdev, &sent->bdaddr, sent->bdaddr_type);
+}
+
static void hci_cc_le_read_supported_states(struct hci_dev *hdev,
struct sk_buff *skb)
{
@@ -1020,6 +1216,25 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
}
}
+static void hci_cc_set_adv_param(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ struct hci_cp_le_set_adv_param *cp;
+ u8 status = *((u8 *) skb->data);
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+ if (status)
+ return;
+
+ cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_ADV_PARAM);
+ if (!cp)
+ return;
+
+ hci_dev_lock(hdev);
+ hdev->adv_addr_type = cp->own_address_type;
+ hci_dev_unlock(hdev);
+}
+
static void hci_cc_write_remote_amp_assoc(struct hci_dev *hdev,
struct sk_buff *skb)
{
@@ -1034,6 +1249,59 @@ static void hci_cc_write_remote_amp_assoc(struct hci_dev *hdev,
amp_write_rem_assoc_continue(hdev, rp->phy_handle);
}
+static void hci_cc_read_rssi(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ struct hci_rp_read_rssi *rp = (void *) skb->data;
+ struct hci_conn *conn;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+ if (rp->status)
+ return;
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle));
+ if (conn)
+ conn->rssi = rp->rssi;
+
+ hci_dev_unlock(hdev);
+}
+
+static void hci_cc_read_tx_power(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ struct hci_cp_read_tx_power *sent;
+ struct hci_rp_read_tx_power *rp = (void *) skb->data;
+ struct hci_conn *conn;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+ if (rp->status)
+ return;
+
+ sent = hci_sent_cmd_data(hdev, HCI_OP_READ_TX_POWER);
+ if (!sent)
+ return;
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle));
+ if (!conn)
+ goto unlock;
+
+ switch (sent->type) {
+ case 0x00:
+ conn->tx_power = rp->tx_power;
+ break;
+ case 0x01:
+ conn->max_tx_power = rp->tx_power;
+ break;
+ }
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status)
{
BT_DBG("%s status 0x%2.2x", hdev->name, status);
@@ -1185,9 +1453,13 @@ static int hci_outgoing_auth_needed(struct hci_dev *hdev,
return 0;
/* Only request authentication for SSP connections or non-SSP
- * devices with sec_level HIGH or if MITM protection is requested */
+ * devices with sec_level MEDIUM or HIGH or if MITM protection
+ * is requested.
+ */
if (!hci_conn_ssp_enabled(conn) && !(conn->auth_type & 0x01) &&
- conn->pending_sec_level != BT_SECURITY_HIGH)
+ conn->pending_sec_level != BT_SECURITY_FIPS &&
+ conn->pending_sec_level != BT_SECURITY_HIGH &&
+ conn->pending_sec_level != BT_SECURITY_MEDIUM)
return 0;
return 1;
@@ -1518,6 +1790,87 @@ static void hci_cs_accept_phylink(struct hci_dev *hdev, u8 status)
amp_write_remote_assoc(hdev, cp->phy_handle);
}
+static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status)
+{
+ struct hci_cp_le_create_conn *cp;
+ struct hci_conn *conn;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+ /* All connection failure handling is taken care of by the
+ * hci_le_conn_failed function which is triggered by the HCI
+ * request completion callbacks used for connecting.
+ */
+ if (status)
+ return;
+
+ cp = hci_sent_cmd_data(hdev, HCI_OP_LE_CREATE_CONN);
+ if (!cp)
+ return;
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->peer_addr);
+ if (!conn)
+ goto unlock;
+
+ /* Store the initiator and responder address information which
+ * is needed for SMP. These values will not change during the
+ * lifetime of the connection.
+ */
+ conn->init_addr_type = cp->own_address_type;
+ if (cp->own_address_type == ADDR_LE_DEV_RANDOM)
+ bacpy(&conn->init_addr, &hdev->random_addr);
+ else
+ bacpy(&conn->init_addr, &hdev->bdaddr);
+
+ conn->resp_addr_type = cp->peer_addr_type;
+ bacpy(&conn->resp_addr, &cp->peer_addr);
+
+ /* We don't want the connection attempt to stick around
+ * indefinitely since LE doesn't have a page timeout concept
+ * like BR/EDR. Set a timer for any connection that doesn't use
+ * the white list for connecting.
+ */
+ if (cp->filter_policy == HCI_LE_USE_PEER_ADDR)
+ queue_delayed_work(conn->hdev->workqueue,
+ &conn->le_conn_timeout,
+ HCI_LE_CONN_TIMEOUT);
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
+static void hci_cs_le_start_enc(struct hci_dev *hdev, u8 status)
+{
+ struct hci_cp_le_start_enc *cp;
+ struct hci_conn *conn;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+ if (!status)
+ return;
+
+ hci_dev_lock(hdev);
+
+ cp = hci_sent_cmd_data(hdev, HCI_OP_LE_START_ENC);
+ if (!cp)
+ goto unlock;
+
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
+ if (!conn)
+ goto unlock;
+
+ if (conn->state != BT_CONNECTED)
+ goto unlock;
+
+ hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
+ hci_conn_drop(conn);
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
__u8 status = *((__u8 *) skb->data);
@@ -1531,7 +1884,7 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (!test_and_clear_bit(HCI_INQUIRY, &hdev->flags))
return;
- smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */
+ smp_mb__after_atomic(); /* wake_up_bit advises about this barrier */
wake_up_bit(&hdev->flags, HCI_INQUIRY);
if (!test_bit(HCI_MGMT, &hdev->dev_flags))
@@ -1590,7 +1943,7 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb)
name_known = hci_inquiry_cache_update(hdev, &data, false, &ssp);
mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00,
info->dev_class, 0, !name_known, ssp, NULL,
- 0);
+ 0, NULL, 0);
}
hci_dev_unlock(hdev);
@@ -1659,7 +2012,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
} else {
conn->state = BT_CLOSED;
if (conn->type == ACL_LINK)
- mgmt_connect_failed(hdev, &ev->bdaddr, conn->type,
+ mgmt_connect_failed(hdev, &conn->dst, conn->type,
conn->dst_type, ev->status);
}
@@ -1738,9 +2091,9 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
bacpy(&cp.bdaddr, &ev->bdaddr);
cp.pkt_type = cpu_to_le16(conn->pkt_type);
- cp.tx_bandwidth = __constant_cpu_to_le32(0x00001f40);
- cp.rx_bandwidth = __constant_cpu_to_le32(0x00001f40);
- cp.max_latency = __constant_cpu_to_le16(0xffff);
+ cp.tx_bandwidth = cpu_to_le32(0x00001f40);
+ cp.rx_bandwidth = cpu_to_le32(0x00001f40);
+ cp.max_latency = cpu_to_le16(0xffff);
cp.content_format = cpu_to_le16(hdev->voice_setting);
cp.retrans_effort = 0xff;
@@ -1780,7 +2133,9 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_disconn_complete *ev = (void *) skb->data;
u8 reason = hci_to_mgmt_reason(ev->reason);
+ struct hci_conn_params *params;
struct hci_conn *conn;
+ bool mgmt_connected;
u8 type;
BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
@@ -1799,13 +2154,30 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
conn->state = BT_CLOSED;
- if (test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags))
- mgmt_device_disconnected(hdev, &conn->dst, conn->type,
- conn->dst_type, reason);
+ mgmt_connected = test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags);
+ mgmt_device_disconnected(hdev, &conn->dst, conn->type, conn->dst_type,
+ reason, mgmt_connected);
if (conn->type == ACL_LINK && conn->flush_key)
hci_remove_link_key(hdev, &conn->dst);
+ params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type);
+ if (params) {
+ switch (params->auto_connect) {
+ case HCI_AUTO_CONN_LINK_LOSS:
+ if (ev->reason != HCI_ERROR_CONNECTION_TIMEOUT)
+ break;
+ /* Fall through */
+
+ case HCI_AUTO_CONN_ALWAYS:
+ hci_pend_le_conn_add(hdev, &conn->dst, conn->dst_type);
+ break;
+
+ default:
+ break;
+ }
+ }
+
type = conn->type;
hci_proto_disconn_cfm(conn, ev->reason);
@@ -1943,34 +2315,57 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_dev_lock(hdev);
conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
- if (conn) {
- if (!ev->status) {
- if (ev->encrypt) {
- /* Encryption implies authentication */
- conn->link_mode |= HCI_LM_AUTH;
- conn->link_mode |= HCI_LM_ENCRYPT;
- conn->sec_level = conn->pending_sec_level;
- } else
- conn->link_mode &= ~HCI_LM_ENCRYPT;
+ if (!conn)
+ goto unlock;
+
+ if (!ev->status) {
+ if (ev->encrypt) {
+ /* Encryption implies authentication */
+ conn->link_mode |= HCI_LM_AUTH;
+ conn->link_mode |= HCI_LM_ENCRYPT;
+ conn->sec_level = conn->pending_sec_level;
+
+ /* P-256 authentication key implies FIPS */
+ if (conn->key_type == HCI_LK_AUTH_COMBINATION_P256)
+ conn->link_mode |= HCI_LM_FIPS;
+
+ if ((conn->type == ACL_LINK && ev->encrypt == 0x02) ||
+ conn->type == LE_LINK)
+ set_bit(HCI_CONN_AES_CCM, &conn->flags);
+ } else {
+ conn->link_mode &= ~HCI_LM_ENCRYPT;
+ clear_bit(HCI_CONN_AES_CCM, &conn->flags);
}
+ }
- clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
+ clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
- if (ev->status && conn->state == BT_CONNECTED) {
- hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
+ if (ev->status && conn->state == BT_CONNECTED) {
+ hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
+ hci_conn_drop(conn);
+ goto unlock;
+ }
+
+ if (conn->state == BT_CONFIG) {
+ if (!ev->status)
+ conn->state = BT_CONNECTED;
+
+ /* In Secure Connections Only mode, do not allow any
+ * connections that are not encrypted with AES-CCM
+ * using a P-256 authenticated combination key.
+ */
+ if (test_bit(HCI_SC_ONLY, &hdev->dev_flags) &&
+ (!test_bit(HCI_CONN_AES_CCM, &conn->flags) ||
+ conn->key_type != HCI_LK_AUTH_COMBINATION_P256)) {
+ hci_proto_connect_cfm(conn, HCI_ERROR_AUTH_FAILURE);
hci_conn_drop(conn);
goto unlock;
}
- if (conn->state == BT_CONFIG) {
- if (!ev->status)
- conn->state = BT_CONNECTED;
-
- hci_proto_connect_cfm(conn, ev->status);
- hci_conn_drop(conn);
- } else
- hci_encrypt_cfm(conn, ev->status, ev->encrypt);
- }
+ hci_proto_connect_cfm(conn, ev->status);
+ hci_conn_drop(conn);
+ } else
+ hci_encrypt_cfm(conn, ev->status, ev->encrypt);
unlock:
hci_dev_unlock(hdev);
@@ -2144,6 +2539,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_cc_write_ssp_mode(hdev, skb);
break;
+ case HCI_OP_WRITE_SC_SUPPORT:
+ hci_cc_write_sc_support(hdev, skb);
+ break;
+
case HCI_OP_READ_LOCAL_VERSION:
hci_cc_read_local_version(hdev, skb);
break;
@@ -2213,7 +2612,11 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
break;
case HCI_OP_READ_LOCAL_OOB_DATA:
- hci_cc_read_local_oob_data_reply(hdev, skb);
+ hci_cc_read_local_oob_data(hdev, skb);
+ break;
+
+ case HCI_OP_READ_LOCAL_OOB_EXT_DATA:
+ hci_cc_read_local_oob_ext_data(hdev, skb);
break;
case HCI_OP_LE_READ_BUFFER_SIZE:
@@ -2244,10 +2647,18 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_cc_user_passkey_neg_reply(hdev, skb);
break;
+ case HCI_OP_LE_SET_RANDOM_ADDR:
+ hci_cc_le_set_random_addr(hdev, skb);
+ break;
+
case HCI_OP_LE_SET_ADV_ENABLE:
hci_cc_le_set_adv_enable(hdev, skb);
break;
+ case HCI_OP_LE_SET_SCAN_PARAM:
+ hci_cc_le_set_scan_param(hdev, skb);
+ break;
+
case HCI_OP_LE_SET_SCAN_ENABLE:
hci_cc_le_set_scan_enable(hdev, skb);
break;
@@ -2256,6 +2667,18 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_cc_le_read_white_list_size(hdev, skb);
break;
+ case HCI_OP_LE_CLEAR_WHITE_LIST:
+ hci_cc_le_clear_white_list(hdev, skb);
+ break;
+
+ case HCI_OP_LE_ADD_TO_WHITE_LIST:
+ hci_cc_le_add_to_white_list(hdev, skb);
+ break;
+
+ case HCI_OP_LE_DEL_FROM_WHITE_LIST:
+ hci_cc_le_del_from_white_list(hdev, skb);
+ break;
+
case HCI_OP_LE_READ_SUPPORTED_STATES:
hci_cc_le_read_supported_states(hdev, skb);
break;
@@ -2264,10 +2687,22 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_cc_write_le_host_supported(hdev, skb);
break;
+ case HCI_OP_LE_SET_ADV_PARAM:
+ hci_cc_set_adv_param(hdev, skb);
+ break;
+
case HCI_OP_WRITE_REMOTE_AMP_ASSOC:
hci_cc_write_remote_amp_assoc(hdev, skb);
break;
+ case HCI_OP_READ_RSSI:
+ hci_cc_read_rssi(hdev, skb);
+ break;
+
+ case HCI_OP_READ_TX_POWER:
+ hci_cc_read_tx_power(hdev, skb);
+ break;
+
default:
BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode);
break;
@@ -2351,6 +2786,14 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_cs_accept_phylink(hdev, ev->status);
break;
+ case HCI_OP_LE_CREATE_CONN:
+ hci_cs_le_create_conn(hdev, ev->status);
+ break;
+
+ case HCI_OP_LE_START_ENC:
+ hci_cs_le_start_enc(hdev, ev->status);
+ break;
+
default:
BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode);
break;
@@ -2630,14 +3073,16 @@ static void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
if (conn) {
- if (key->type == HCI_LK_UNAUTH_COMBINATION &&
+ if ((key->type == HCI_LK_UNAUTH_COMBINATION_P192 ||
+ key->type == HCI_LK_UNAUTH_COMBINATION_P256) &&
conn->auth_type != 0xff && (conn->auth_type & 0x01)) {
BT_DBG("%s ignoring unauthenticated key", hdev->name);
goto not_found;
}
if (key->type == HCI_LK_COMBINATION && key->pin_len < 16 &&
- conn->pending_sec_level == BT_SECURITY_HIGH) {
+ (conn->pending_sec_level == BT_SECURITY_HIGH ||
+ conn->pending_sec_level == BT_SECURITY_FIPS)) {
BT_DBG("%s ignoring key unauthenticated for high security",
hdev->name);
goto not_found;
@@ -2782,7 +3227,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev,
false, &ssp);
mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00,
info->dev_class, info->rssi,
- !name_known, ssp, NULL, 0);
+ !name_known, ssp, NULL, 0, NULL, 0);
}
} else {
struct inquiry_info_with_rssi *info = (void *) (skb->data + 1);
@@ -2800,7 +3245,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev,
false, &ssp);
mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00,
info->dev_class, info->rssi,
- !name_known, ssp, NULL, 0);
+ !name_known, ssp, NULL, 0, NULL, 0);
}
}
@@ -2844,6 +3289,9 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev,
* features do not indicate SSP support */
clear_bit(HCI_CONN_SSP_ENABLED, &conn->flags);
}
+
+ if (ev->features[0] & LMP_HOST_SC)
+ set_bit(HCI_CONN_SC_ENABLED, &conn->flags);
}
if (conn->state != BT_CONFIG)
@@ -2905,6 +3353,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
case 0x1c: /* SCO interval rejected */
case 0x1a: /* Unsupported Remote Feature */
case 0x1f: /* Unspecified error */
+ case 0x20: /* Unsupported LMP Parameter value */
if (conn->out) {
conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) |
(hdev->esco_type & EDR_ESCO_MASK);
@@ -2985,7 +3434,7 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev,
eir_len = eir_get_length(info->data, sizeof(info->data));
mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00,
info->dev_class, info->rssi, !name_known,
- ssp, info->data, eir_len);
+ ssp, info->data, eir_len, NULL, 0);
}
hci_dev_unlock(hdev);
@@ -3006,6 +3455,12 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev,
if (!conn)
goto unlock;
+ /* For BR/EDR the necessary steps are taken through the
+ * auth_complete event.
+ */
+ if (conn->type != LE_LINK)
+ goto unlock;
+
if (!ev->status)
conn->sec_level = conn->pending_sec_level;
@@ -3037,24 +3492,20 @@ unlock:
static u8 hci_get_auth_req(struct hci_conn *conn)
{
- /* If remote requests dedicated bonding follow that lead */
- if (conn->remote_auth == HCI_AT_DEDICATED_BONDING ||
- conn->remote_auth == HCI_AT_DEDICATED_BONDING_MITM) {
- /* If both remote and local IO capabilities allow MITM
- * protection then require it, otherwise don't */
- if (conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT ||
- conn->io_capability == HCI_IO_NO_INPUT_OUTPUT)
- return HCI_AT_DEDICATED_BONDING;
- else
- return HCI_AT_DEDICATED_BONDING_MITM;
- }
-
/* If remote requests no-bonding follow that lead */
if (conn->remote_auth == HCI_AT_NO_BONDING ||
conn->remote_auth == HCI_AT_NO_BONDING_MITM)
return conn->remote_auth | (conn->auth_type & 0x01);
- return conn->auth_type;
+ /* If both remote and local have enough IO capabilities, require
+ * MITM protection
+ */
+ if (conn->remote_cap != HCI_IO_NO_INPUT_OUTPUT &&
+ conn->io_capability != HCI_IO_NO_INPUT_OUTPUT)
+ return conn->remote_auth | 0x01;
+
+ /* No MITM protection possible so ignore remote requirement */
+ return (conn->remote_auth & ~0x01) | (conn->auth_type & 0x01);
}
static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
@@ -3084,8 +3535,25 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
* to DisplayYesNo as it is not supported by BT spec. */
cp.capability = (conn->io_capability == 0x04) ?
HCI_IO_DISPLAY_YESNO : conn->io_capability;
- conn->auth_type = hci_get_auth_req(conn);
- cp.authentication = conn->auth_type;
+
+ /* If we are initiators, there is no remote information yet */
+ if (conn->remote_auth == 0xff) {
+ cp.authentication = conn->auth_type;
+
+ /* Request MITM protection if our IO caps allow it
+ * except for the no-bonding case.
+ * conn->auth_type is not updated here since
+ * that might cause the user confirmation to be
+ * rejected in case the remote doesn't have the
+ * IO capabilities for MITM.
+ */
+ if (conn->io_capability != HCI_IO_NO_INPUT_OUTPUT &&
+ cp.authentication != HCI_AT_NO_BONDING)
+ cp.authentication |= 0x01;
+ } else {
+ conn->auth_type = hci_get_auth_req(conn);
+ cp.authentication = conn->auth_type;
+ }
if (hci_find_remote_oob_data(hdev, &conn->dst) &&
(conn->out || test_bit(HCI_CONN_REMOTE_OOB, &conn->flags)))
@@ -3153,12 +3621,9 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev,
rem_mitm = (conn->remote_auth & 0x01);
/* If we require MITM but the remote device can't provide that
- * (it has NoInputNoOutput) then reject the confirmation
- * request. The only exception is when we're dedicated bonding
- * initiators (connect_cfm_cb set) since then we always have the MITM
- * bit set. */
- if (!conn->connect_cfm_cb && loc_mitm &&
- conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) {
+ * (it has NoInputNoOutput) then reject the confirmation request
+ */
+ if (loc_mitm && conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) {
BT_DBG("Rejecting request: remote device can't provide MITM");
hci_send_cmd(hdev, HCI_OP_USER_CONFIRM_NEG_REPLY,
sizeof(ev->bdaddr), &ev->bdaddr);
@@ -3171,8 +3636,11 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev,
/* If we're not the initiators request authorization to
* proceed from user space (mgmt_user_confirm with
- * confirm_hint set to 1). */
- if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags)) {
+ * confirm_hint set to 1). The exception is if neither
+ * side had MITM in which case we do auto-accept.
+ */
+ if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) &&
+ (loc_mitm || rem_mitm)) {
BT_DBG("Confirming auto-accept as acceptor");
confirm_hint = 1;
goto confirm;
@@ -3194,8 +3662,8 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev,
}
confirm:
- mgmt_user_confirm_request(hdev, &ev->bdaddr, ACL_LINK, 0, ev->passkey,
- confirm_hint);
+ mgmt_user_confirm_request(hdev, &ev->bdaddr, ACL_LINK, 0,
+ le32_to_cpu(ev->passkey), confirm_hint);
unlock:
hci_dev_unlock(hdev);
@@ -3337,20 +3805,36 @@ static void hci_remote_oob_data_request_evt(struct hci_dev *hdev,
data = hci_find_remote_oob_data(hdev, &ev->bdaddr);
if (data) {
- struct hci_cp_remote_oob_data_reply cp;
+ if (test_bit(HCI_SC_ENABLED, &hdev->dev_flags)) {
+ struct hci_cp_remote_oob_ext_data_reply cp;
- bacpy(&cp.bdaddr, &ev->bdaddr);
- memcpy(cp.hash, data->hash, sizeof(cp.hash));
- memcpy(cp.randomizer, data->randomizer, sizeof(cp.randomizer));
+ bacpy(&cp.bdaddr, &ev->bdaddr);
+ memcpy(cp.hash192, data->hash192, sizeof(cp.hash192));
+ memcpy(cp.randomizer192, data->randomizer192,
+ sizeof(cp.randomizer192));
+ memcpy(cp.hash256, data->hash256, sizeof(cp.hash256));
+ memcpy(cp.randomizer256, data->randomizer256,
+ sizeof(cp.randomizer256));
+
+ hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_EXT_DATA_REPLY,
+ sizeof(cp), &cp);
+ } else {
+ struct hci_cp_remote_oob_data_reply cp;
- hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_REPLY, sizeof(cp),
- &cp);
+ bacpy(&cp.bdaddr, &ev->bdaddr);
+ memcpy(cp.hash, data->hash192, sizeof(cp.hash));
+ memcpy(cp.randomizer, data->randomizer192,
+ sizeof(cp.randomizer));
+
+ hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_REPLY,
+ sizeof(cp), &cp);
+ }
} else {
struct hci_cp_remote_oob_data_neg_reply cp;
bacpy(&cp.bdaddr, &ev->bdaddr);
- hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_NEG_REPLY, sizeof(cp),
- &cp);
+ hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_NEG_REPLY,
+ sizeof(cp), &cp);
}
unlock:
@@ -3484,6 +3968,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_le_conn_complete *ev = (void *) skb->data;
struct hci_conn *conn;
+ struct smp_irk *irk;
BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
@@ -3499,34 +3984,71 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
conn->dst_type = ev->bdaddr_type;
- /* The advertising parameters for own address type
- * define which source address and source address
- * type this connections has.
- */
- if (bacmp(&conn->src, BDADDR_ANY)) {
- conn->src_type = ADDR_LE_DEV_PUBLIC;
- } else {
- bacpy(&conn->src, &hdev->static_addr);
- conn->src_type = ADDR_LE_DEV_RANDOM;
- }
-
if (ev->role == LE_CONN_ROLE_MASTER) {
conn->out = true;
conn->link_mode |= HCI_LM_MASTER;
}
+
+ /* If we didn't have a hci_conn object previously
+ * but we're in master role this must be something
+ * initiated using a white list. Since white list based
+ * connections are not "first class citizens" we don't
+ * have full tracking of them. Therefore, we go ahead
+ * with a "best effort" approach of determining the
+ * initiator address based on the HCI_PRIVACY flag.
+ */
+ if (conn->out) {
+ conn->resp_addr_type = ev->bdaddr_type;
+ bacpy(&conn->resp_addr, &ev->bdaddr);
+ if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) {
+ conn->init_addr_type = ADDR_LE_DEV_RANDOM;
+ bacpy(&conn->init_addr, &hdev->rpa);
+ } else {
+ hci_copy_identity_address(hdev,
+ &conn->init_addr,
+ &conn->init_addr_type);
+ }
+ }
+ } else {
+ cancel_delayed_work(&conn->le_conn_timeout);
+ }
+
+ if (!conn->out) {
+ /* Set the responder (our side) address type based on
+ * the advertising address type.
+ */
+ conn->resp_addr_type = hdev->adv_addr_type;
+ if (hdev->adv_addr_type == ADDR_LE_DEV_RANDOM)
+ bacpy(&conn->resp_addr, &hdev->random_addr);
+ else
+ bacpy(&conn->resp_addr, &hdev->bdaddr);
+
+ conn->init_addr_type = ev->bdaddr_type;
+ bacpy(&conn->init_addr, &ev->bdaddr);
+ }
+
+ /* Lookup the identity address from the stored connection
+ * address and address type.
+ *
+ * When establishing connections to an identity address, the
+ * connection procedure will store the resolvable random
+ * address first. Now if it can be converted back into the
+ * identity address, start using the identity address from
+ * now on.
+ */
+ irk = hci_get_irk(hdev, &conn->dst, conn->dst_type);
+ if (irk) {
+ bacpy(&conn->dst, &irk->bdaddr);
+ conn->dst_type = irk->addr_type;
}
if (ev->status) {
- mgmt_connect_failed(hdev, &conn->dst, conn->type,
- conn->dst_type, ev->status);
- hci_proto_connect_cfm(conn, ev->status);
- conn->state = BT_CLOSED;
- hci_conn_del(conn);
+ hci_le_conn_failed(conn, ev->status);
goto unlock;
}
if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags))
- mgmt_device_connected(hdev, &ev->bdaddr, conn->type,
+ mgmt_device_connected(hdev, &conn->dst, conn->type,
conn->dst_type, 0, NULL, 0, NULL);
conn->sec_level = BT_SECURITY_LOW;
@@ -3540,25 +4062,145 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_proto_connect_cfm(conn, ev->status);
+ hci_pend_le_conn_del(hdev, &conn->dst, conn->dst_type);
+
unlock:
hci_dev_unlock(hdev);
}
+/* This function requires the caller holds hdev->lock */
+static void check_pending_le_conn(struct hci_dev *hdev, bdaddr_t *addr,
+ u8 addr_type)
+{
+ struct hci_conn *conn;
+ struct smp_irk *irk;
+
+ /* If this is a resolvable address, we should resolve it and then
+ * update address and address type variables.
+ */
+ irk = hci_get_irk(hdev, addr, addr_type);
+ if (irk) {
+ addr = &irk->bdaddr;
+ addr_type = irk->addr_type;
+ }
+
+ if (!hci_pend_le_conn_lookup(hdev, addr, addr_type))
+ return;
+
+ conn = hci_connect_le(hdev, addr, addr_type, BT_SECURITY_LOW,
+ HCI_AT_NO_BONDING);
+ if (!IS_ERR(conn))
+ return;
+
+ switch (PTR_ERR(conn)) {
+ case -EBUSY:
+ /* If hci_connect() returns -EBUSY it means there is already
+ * an LE connection attempt going on. Since controllers don't
+ * support more than one connection attempt at the time, we
+ * don't consider this an error case.
+ */
+ break;
+ default:
+ BT_DBG("Failed to connect: err %ld", PTR_ERR(conn));
+ }
+}
+
+static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
+ u8 bdaddr_type, s8 rssi, u8 *data, u8 len)
+{
+ struct discovery_state *d = &hdev->discovery;
+ bool match;
+
+ /* Passive scanning shouldn't trigger any device found events */
+ if (hdev->le_scan_type == LE_SCAN_PASSIVE) {
+ if (type == LE_ADV_IND || type == LE_ADV_DIRECT_IND)
+ check_pending_le_conn(hdev, bdaddr, bdaddr_type);
+ return;
+ }
+
+ /* If there's nothing pending either store the data from this
+ * event or send an immediate device found event if the data
+ * should not be stored for later.
+ */
+ if (!has_pending_adv_report(hdev)) {
+ /* If the report will trigger a SCAN_REQ store it for
+ * later merging.
+ */
+ if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) {
+ store_pending_adv_report(hdev, bdaddr, bdaddr_type,
+ rssi, data, len);
+ return;
+ }
+
+ mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL,
+ rssi, 0, 1, data, len, NULL, 0);
+ return;
+ }
+
+ /* Check if the pending report is for the same device as the new one */
+ match = (!bacmp(bdaddr, &d->last_adv_addr) &&
+ bdaddr_type == d->last_adv_addr_type);
+
+ /* If the pending data doesn't match this report or this isn't a
+ * scan response (e.g. we got a duplicate ADV_IND) then force
+ * sending of the pending data.
+ */
+ if (type != LE_ADV_SCAN_RSP || !match) {
+ /* Send out whatever is in the cache, but skip duplicates */
+ if (!match)
+ mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK,
+ d->last_adv_addr_type, NULL,
+ d->last_adv_rssi, 0, 1,
+ d->last_adv_data,
+ d->last_adv_data_len, NULL, 0);
+
+ /* If the new report will trigger a SCAN_REQ store it for
+ * later merging.
+ */
+ if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) {
+ store_pending_adv_report(hdev, bdaddr, bdaddr_type,
+ rssi, data, len);
+ return;
+ }
+
+ /* The advertising reports cannot be merged, so clear
+ * the pending report and send out a device found event.
+ */
+ clear_pending_adv_report(hdev);
+ mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL,
+ rssi, 0, 1, data, len, NULL, 0);
+ return;
+ }
+
+ /* If we get here we've got a pending ADV_IND or ADV_SCAN_IND and
+ * the new event is a SCAN_RSP. We can therefore proceed with
+ * sending a merged device found event.
+ */
+ mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK,
+ d->last_adv_addr_type, NULL, rssi, 0, 1, data, len,
+ d->last_adv_data, d->last_adv_data_len);
+ clear_pending_adv_report(hdev);
+}
+
static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
u8 num_reports = skb->data[0];
void *ptr = &skb->data[1];
- s8 rssi;
+
+ hci_dev_lock(hdev);
while (num_reports--) {
struct hci_ev_le_advertising_info *ev = ptr;
+ s8 rssi;
rssi = ev->data[ev->length];
- mgmt_device_found(hdev, &ev->bdaddr, LE_LINK, ev->bdaddr_type,
- NULL, rssi, 0, 1, ev->data, ev->length);
+ process_adv_report(hdev, ev->evt_type, &ev->bdaddr,
+ ev->bdaddr_type, rssi, ev->data, ev->length);
ptr += sizeof(*ev) + ev->length + 1;
}
+
+ hci_dev_unlock(hdev);
}
static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
@@ -3577,7 +4219,7 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (conn == NULL)
goto not_found;
- ltk = hci_find_ltk(hdev, ev->ediv, ev->random);
+ ltk = hci_find_ltk(hdev, ev->ediv, ev->rand, conn->out);
if (ltk == NULL)
goto not_found;
@@ -3593,7 +4235,13 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp);
- if (ltk->type & HCI_SMP_STK) {
+ /* Ref. Bluetooth Core SPEC pages 1975 and 2004. STK is a
+ * temporary key used to encrypt a connection following
+ * pairing. It is used during the Encrypted Session Setup to
+ * distribute the keys. Later, security can be re-established
+ * using a distributed LTK.
+ */
+ if (ltk->type == HCI_SMP_STK_SLAVE) {
list_del(&ltk->list);
kfree(ltk);
}
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 7552f9e3089..80d25c150a6 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -143,7 +143,7 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
if (!skb_copy) {
/* Create a private copy with headroom */
- skb_copy = __pskb_copy(skb, 1, GFP_ATOMIC);
+ skb_copy = __pskb_copy_fclone(skb, 1, GFP_ATOMIC, true);
if (!skb_copy)
continue;
@@ -211,22 +211,22 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
switch (bt_cb(skb)->pkt_type) {
case HCI_COMMAND_PKT:
- opcode = __constant_cpu_to_le16(HCI_MON_COMMAND_PKT);
+ opcode = cpu_to_le16(HCI_MON_COMMAND_PKT);
break;
case HCI_EVENT_PKT:
- opcode = __constant_cpu_to_le16(HCI_MON_EVENT_PKT);
+ opcode = cpu_to_le16(HCI_MON_EVENT_PKT);
break;
case HCI_ACLDATA_PKT:
if (bt_cb(skb)->incoming)
- opcode = __constant_cpu_to_le16(HCI_MON_ACL_RX_PKT);
+ opcode = cpu_to_le16(HCI_MON_ACL_RX_PKT);
else
- opcode = __constant_cpu_to_le16(HCI_MON_ACL_TX_PKT);
+ opcode = cpu_to_le16(HCI_MON_ACL_TX_PKT);
break;
case HCI_SCODATA_PKT:
if (bt_cb(skb)->incoming)
- opcode = __constant_cpu_to_le16(HCI_MON_SCO_RX_PKT);
+ opcode = cpu_to_le16(HCI_MON_SCO_RX_PKT);
else
- opcode = __constant_cpu_to_le16(HCI_MON_SCO_TX_PKT);
+ opcode = cpu_to_le16(HCI_MON_SCO_TX_PKT);
break;
default:
return;
@@ -247,8 +247,8 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
struct hci_mon_hdr *hdr;
/* Create a private copy with headroom */
- skb_copy = __pskb_copy(skb, HCI_MON_HDR_SIZE,
- GFP_ATOMIC);
+ skb_copy = __pskb_copy_fclone(skb, HCI_MON_HDR_SIZE,
+ GFP_ATOMIC, true);
if (!skb_copy)
continue;
@@ -319,7 +319,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
bacpy(&ni->bdaddr, &hdev->bdaddr);
memcpy(ni->name, hdev->name, 8);
- opcode = __constant_cpu_to_le16(HCI_MON_NEW_INDEX);
+ opcode = cpu_to_le16(HCI_MON_NEW_INDEX);
break;
case HCI_DEV_UNREG:
@@ -327,7 +327,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
if (!skb)
return NULL;
- opcode = __constant_cpu_to_le16(HCI_MON_DEL_INDEX);
+ opcode = cpu_to_le16(HCI_MON_DEL_INDEX);
break;
default:
@@ -524,16 +524,7 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
case HCISETRAW:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
-
- if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
- return -EPERM;
-
- if (arg)
- set_bit(HCI_RAW, &hdev->flags);
- else
- clear_bit(HCI_RAW, &hdev->flags);
-
- return 0;
+ return -EOPNOTSUPP;
case HCIGETCONNINFO:
return hci_get_conn_info(hdev, (void __user *) arg);
@@ -716,6 +707,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
err = hci_dev_open(hdev->id);
if (err) {
clear_bit(HCI_USER_CHANNEL, &hdev->dev_flags);
+ mgmt_index_added(hdev);
hci_dev_put(hdev);
goto done;
}
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 0b61250cfdf..555982a78a5 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -49,14 +49,7 @@ static struct attribute *bt_link_attrs[] = {
NULL
};
-static struct attribute_group bt_link_group = {
- .attrs = bt_link_attrs,
-};
-
-static const struct attribute_group *bt_link_groups[] = {
- &bt_link_group,
- NULL
-};
+ATTRIBUTE_GROUPS(bt_link);
static void bt_link_release(struct device *dev)
{
@@ -182,14 +175,7 @@ static struct attribute *bt_host_attrs[] = {
NULL
};
-static struct attribute_group bt_host_group = {
- .attrs = bt_host_attrs,
-};
-
-static const struct attribute_group *bt_host_groups[] = {
- &bt_host_group,
- NULL
-};
+ATTRIBUTE_GROUPS(bt_host);
static void bt_host_release(struct device *dev)
{
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index d9fb9345144..8181ea4bc2f 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -223,51 +223,6 @@ static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb)
input_sync(dev);
}
-static int hidp_send_report(struct hidp_session *session, struct hid_report *report)
-{
- unsigned char hdr;
- u8 *buf;
- int rsize, ret;
-
- buf = hid_alloc_report_buf(report, GFP_ATOMIC);
- if (!buf)
- return -EIO;
-
- hid_output_report(report, buf);
- hdr = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT;
-
- rsize = ((report->size - 1) >> 3) + 1 + (report->id > 0);
- ret = hidp_send_intr_message(session, hdr, buf, rsize);
-
- kfree(buf);
- return ret;
-}
-
-static int hidp_hidinput_event(struct input_dev *dev, unsigned int type,
- unsigned int code, int value)
-{
- struct hid_device *hid = input_get_drvdata(dev);
- struct hidp_session *session = hid->driver_data;
- struct hid_field *field;
- int offset;
-
- BT_DBG("session %p type %d code %d value %d",
- session, type, code, value);
-
- if (type != EV_LED)
- return -1;
-
- offset = hidinput_find_field(hid, type, code, &field);
- if (offset == -1) {
- hid_warn(dev, "event field not found\n");
- return -1;
- }
-
- hid_set_field(field, offset, value);
-
- return hidp_send_report(session, field->report);
-}
-
static int hidp_get_raw_report(struct hid_device *hid,
unsigned char report_number,
unsigned char *data, size_t count,
@@ -353,17 +308,24 @@ err:
return ret;
}
-static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, size_t count,
- unsigned char report_type)
+static int hidp_set_raw_report(struct hid_device *hid, unsigned char reportnum,
+ unsigned char *data, size_t count,
+ unsigned char report_type)
{
struct hidp_session *session = hid->driver_data;
int ret;
- if (report_type == HID_OUTPUT_REPORT) {
- report_type = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT;
- return hidp_send_intr_message(session, report_type,
- data, count);
- } else if (report_type != HID_FEATURE_REPORT) {
+ switch (report_type) {
+ case HID_FEATURE_REPORT:
+ report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE;
+ break;
+ case HID_INPUT_REPORT:
+ report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_INPUT;
+ break;
+ case HID_OUTPUT_REPORT:
+ report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_OUPUT;
+ break;
+ default:
return -EINVAL;
}
@@ -371,8 +333,8 @@ static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, s
return -ERESTARTSYS;
/* Set up our wait, and send the report request to the device. */
+ data[0] = reportnum;
set_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags);
- report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE;
ret = hidp_send_ctrl_message(session, report_type, data, count);
if (ret)
goto err;
@@ -411,6 +373,29 @@ err:
return ret;
}
+static int hidp_output_report(struct hid_device *hid, __u8 *data, size_t count)
+{
+ struct hidp_session *session = hid->driver_data;
+
+ return hidp_send_intr_message(session,
+ HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT,
+ data, count);
+}
+
+static int hidp_raw_request(struct hid_device *hid, unsigned char reportnum,
+ __u8 *buf, size_t len, unsigned char rtype,
+ int reqtype)
+{
+ switch (reqtype) {
+ case HID_REQ_GET_REPORT:
+ return hidp_get_raw_report(hid, reportnum, buf, len, rtype);
+ case HID_REQ_SET_REPORT:
+ return hidp_set_raw_report(hid, reportnum, buf, len, rtype);
+ default:
+ return -EIO;
+ }
+}
+
static void hidp_idle_timeout(unsigned long arg)
{
struct hidp_session *session = (struct hidp_session *) arg;
@@ -739,7 +724,8 @@ static struct hid_ll_driver hidp_hid_driver = {
.stop = hidp_stop,
.open = hidp_open,
.close = hidp_close,
- .hidinput_input_event = hidp_hidinput_event,
+ .raw_request = hidp_raw_request,
+ .output_report = hidp_output_report,
};
/* This function sets up the hid device. It does not add it
@@ -781,15 +767,15 @@ static int hidp_setup_hid(struct hidp_session *session,
snprintf(hid->phys, sizeof(hid->phys), "%pMR",
&l2cap_pi(session->ctrl_sock->sk)->chan->src);
+ /* NOTE: Some device modules depend on the dst address being stored in
+ * uniq. Please be aware of this before making changes to this behavior.
+ */
snprintf(hid->uniq, sizeof(hid->uniq), "%pMR",
&l2cap_pi(session->ctrl_sock->sk)->chan->dst);
hid->dev.parent = &session->conn->hcon->dev;
hid->ll_driver = &hidp_hid_driver;
- hid->hid_get_raw_report = hidp_get_raw_report;
- hid->hid_output_raw_report = hidp_output_raw_report;
-
/* True if device is blacklisted in drivers/hid/hid-core.c */
if (hid_ignore(hid)) {
hid_destroy_device(session->hid);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index b0ad2c752d7..323f23cd2c3 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -42,6 +42,8 @@
#include "amp.h"
#include "6lowpan.h"
+#define LE_FLOWCTL_MAX_CREDITS 65535
+
bool disable_ertm;
static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN | L2CAP_FEAT_UCD;
@@ -330,44 +332,20 @@ static inline bool l2cap_seq_list_contains(struct l2cap_seq_list *seq_list,
return seq_list->list[seq & seq_list->mask] != L2CAP_SEQ_LIST_CLEAR;
}
-static u16 l2cap_seq_list_remove(struct l2cap_seq_list *seq_list, u16 seq)
+static inline u16 l2cap_seq_list_pop(struct l2cap_seq_list *seq_list)
{
+ u16 seq = seq_list->head;
u16 mask = seq_list->mask;
- if (seq_list->head == L2CAP_SEQ_LIST_CLEAR) {
- /* In case someone tries to pop the head of an empty list */
- return L2CAP_SEQ_LIST_CLEAR;
- } else if (seq_list->head == seq) {
- /* Head can be removed in constant time */
- seq_list->head = seq_list->list[seq & mask];
- seq_list->list[seq & mask] = L2CAP_SEQ_LIST_CLEAR;
-
- if (seq_list->head == L2CAP_SEQ_LIST_TAIL) {
- seq_list->head = L2CAP_SEQ_LIST_CLEAR;
- seq_list->tail = L2CAP_SEQ_LIST_CLEAR;
- }
- } else {
- /* Walk the list to find the sequence number */
- u16 prev = seq_list->head;
- while (seq_list->list[prev & mask] != seq) {
- prev = seq_list->list[prev & mask];
- if (prev == L2CAP_SEQ_LIST_TAIL)
- return L2CAP_SEQ_LIST_CLEAR;
- }
+ seq_list->head = seq_list->list[seq & mask];
+ seq_list->list[seq & mask] = L2CAP_SEQ_LIST_CLEAR;
- /* Unlink the number from the list and clear it */
- seq_list->list[prev & mask] = seq_list->list[seq & mask];
- seq_list->list[seq & mask] = L2CAP_SEQ_LIST_CLEAR;
- if (seq_list->tail == seq)
- seq_list->tail = prev;
+ if (seq_list->head == L2CAP_SEQ_LIST_TAIL) {
+ seq_list->head = L2CAP_SEQ_LIST_CLEAR;
+ seq_list->tail = L2CAP_SEQ_LIST_CLEAR;
}
- return seq;
-}
-static inline u16 l2cap_seq_list_pop(struct l2cap_seq_list *seq_list)
-{
- /* Remove the head in constant time */
- return l2cap_seq_list_remove(seq_list, seq_list->head);
+ return seq;
}
static void l2cap_seq_list_clear(struct l2cap_seq_list *seq_list)
@@ -493,8 +471,14 @@ void l2cap_chan_set_defaults(struct l2cap_chan *chan)
chan->max_tx = L2CAP_DEFAULT_MAX_TX;
chan->tx_win = L2CAP_DEFAULT_TX_WINDOW;
chan->tx_win_max = L2CAP_DEFAULT_TX_WINDOW;
+ chan->remote_max_tx = chan->max_tx;
+ chan->remote_tx_win = chan->tx_win;
chan->ack_win = L2CAP_DEFAULT_TX_WINDOW;
chan->sec_level = BT_SECURITY_LOW;
+ chan->flush_to = L2CAP_DEFAULT_FLUSH_TO;
+ chan->retrans_timeout = L2CAP_DEFAULT_RETRANS_TO;
+ chan->monitor_timeout = L2CAP_DEFAULT_MONITOR_TO;
+ chan->conf_state = 0;
set_bit(FLAG_FORCE_ACTIVE, &chan->flags);
}
@@ -506,7 +490,7 @@ static void l2cap_le_flowctl_init(struct l2cap_chan *chan)
chan->sdu_len = 0;
chan->tx_credits = 0;
chan->rx_credits = le_max_credits;
- chan->mps = min_t(u16, chan->imtu, L2CAP_LE_DEFAULT_MPS);
+ chan->mps = min_t(u16, chan->imtu, le_default_mps);
skb_queue_head_init(&chan->tx_q);
}
@@ -522,18 +506,10 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan)
switch (chan->chan_type) {
case L2CAP_CHAN_CONN_ORIENTED:
- if (conn->hcon->type == LE_LINK) {
- if (chan->dcid == L2CAP_CID_ATT) {
- chan->omtu = L2CAP_DEFAULT_MTU;
- chan->scid = L2CAP_CID_ATT;
- } else {
- chan->scid = l2cap_alloc_cid(conn);
- }
- } else {
- /* Alloc CID for connection-oriented socket */
- chan->scid = l2cap_alloc_cid(conn);
+ /* Alloc CID for connection-oriented socket */
+ chan->scid = l2cap_alloc_cid(conn);
+ if (conn->hcon->type == ACL_LINK)
chan->omtu = L2CAP_DEFAULT_MTU;
- }
break;
case L2CAP_CHAN_CONN_LESS:
@@ -543,11 +519,8 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan)
chan->omtu = L2CAP_DEFAULT_MTU;
break;
- case L2CAP_CHAN_CONN_FIX_A2MP:
- chan->scid = L2CAP_CID_A2MP;
- chan->dcid = L2CAP_CID_A2MP;
- chan->omtu = L2CAP_A2MP_DEFAULT_MTU;
- chan->imtu = L2CAP_A2MP_DEFAULT_MTU;
+ case L2CAP_CHAN_FIXED:
+ /* Caller will set CID and CID specific MTU values */
break;
default:
@@ -595,7 +568,7 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err)
chan->conn = NULL;
- if (chan->chan_type != L2CAP_CHAN_CONN_FIX_A2MP)
+ if (chan->scid != L2CAP_CID_A2MP)
hci_conn_drop(conn->hcon);
if (mgr && mgr->bredr_chan == chan)
@@ -642,6 +615,23 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err)
return;
}
+void l2cap_conn_update_id_addr(struct hci_conn *hcon)
+{
+ struct l2cap_conn *conn = hcon->l2cap_data;
+ struct l2cap_chan *chan;
+
+ mutex_lock(&conn->chan_lock);
+
+ list_for_each_entry(chan, &conn->chan_l, list) {
+ l2cap_chan_lock(chan);
+ bacpy(&chan->dst, &hcon->dst);
+ chan->dst_type = bdaddr_type(hcon, hcon->dst_type);
+ l2cap_chan_unlock(chan);
+ }
+
+ mutex_unlock(&conn->chan_lock);
+}
+
static void l2cap_chan_le_connect_reject(struct l2cap_chan *chan)
{
struct l2cap_conn *conn = chan->conn;
@@ -681,7 +671,7 @@ static void l2cap_chan_connect_reject(struct l2cap_chan *chan)
rsp.scid = cpu_to_le16(chan->dcid);
rsp.dcid = cpu_to_le16(chan->scid);
rsp.result = cpu_to_le16(result);
- rsp.status = __constant_cpu_to_le16(L2CAP_CS_NO_INFO);
+ rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp);
}
@@ -699,10 +689,7 @@ void l2cap_chan_close(struct l2cap_chan *chan, int reason)
case BT_CONNECTED:
case BT_CONFIG:
- /* ATT uses L2CAP_CHAN_CONN_ORIENTED so we must also
- * check for chan->psm.
- */
- if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED && chan->psm) {
+ if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED) {
__set_chan_timer(chan, chan->ops->get_sndtimeo(chan));
l2cap_send_disconn_req(chan, reason);
} else
@@ -737,6 +724,7 @@ static inline u8 l2cap_get_auth_type(struct l2cap_chan *chan)
case L2CAP_CHAN_RAW:
switch (chan->sec_level) {
case BT_SECURITY_HIGH:
+ case BT_SECURITY_FIPS:
return HCI_AT_DEDICATED_BONDING_MITM;
case BT_SECURITY_MEDIUM:
return HCI_AT_DEDICATED_BONDING;
@@ -745,21 +733,23 @@ static inline u8 l2cap_get_auth_type(struct l2cap_chan *chan)
}
break;
case L2CAP_CHAN_CONN_LESS:
- if (chan->psm == __constant_cpu_to_le16(L2CAP_PSM_3DSP)) {
+ if (chan->psm == cpu_to_le16(L2CAP_PSM_3DSP)) {
if (chan->sec_level == BT_SECURITY_LOW)
chan->sec_level = BT_SECURITY_SDP;
}
- if (chan->sec_level == BT_SECURITY_HIGH)
+ if (chan->sec_level == BT_SECURITY_HIGH ||
+ chan->sec_level == BT_SECURITY_FIPS)
return HCI_AT_NO_BONDING_MITM;
else
return HCI_AT_NO_BONDING;
break;
case L2CAP_CHAN_CONN_ORIENTED:
- if (chan->psm == __constant_cpu_to_le16(L2CAP_PSM_SDP)) {
+ if (chan->psm == cpu_to_le16(L2CAP_PSM_SDP)) {
if (chan->sec_level == BT_SECURITY_LOW)
chan->sec_level = BT_SECURITY_SDP;
- if (chan->sec_level == BT_SECURITY_HIGH)
+ if (chan->sec_level == BT_SECURITY_HIGH ||
+ chan->sec_level == BT_SECURITY_FIPS)
return HCI_AT_NO_BONDING_MITM;
else
return HCI_AT_NO_BONDING;
@@ -768,6 +758,7 @@ static inline u8 l2cap_get_auth_type(struct l2cap_chan *chan)
default:
switch (chan->sec_level) {
case BT_SECURITY_HIGH:
+ case BT_SECURITY_FIPS:
return HCI_AT_GENERAL_BONDING_MITM;
case BT_SECURITY_MEDIUM:
return HCI_AT_GENERAL_BONDING;
@@ -1288,7 +1279,7 @@ static void l2cap_do_start(struct l2cap_chan *chan)
}
} else {
struct l2cap_info_req req;
- req.type = __constant_cpu_to_le16(L2CAP_IT_FEAT_MASK);
+ req.type = cpu_to_le16(L2CAP_IT_FEAT_MASK);
conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT;
conn->info_ident = l2cap_get_ident(conn);
@@ -1330,7 +1321,7 @@ static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err)
__clear_ack_timer(chan);
}
- if (chan->chan_type == L2CAP_CHAN_CONN_FIX_A2MP) {
+ if (chan->scid == L2CAP_CID_A2MP) {
l2cap_state_change(chan, BT_DISCONN);
return;
}
@@ -1385,18 +1376,18 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
if (l2cap_chan_check_security(chan)) {
if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) {
- rsp.result = __constant_cpu_to_le16(L2CAP_CR_PEND);
- rsp.status = __constant_cpu_to_le16(L2CAP_CS_AUTHOR_PEND);
+ rsp.result = cpu_to_le16(L2CAP_CR_PEND);
+ rsp.status = cpu_to_le16(L2CAP_CS_AUTHOR_PEND);
chan->ops->defer(chan);
} else {
l2cap_state_change(chan, BT_CONFIG);
- rsp.result = __constant_cpu_to_le16(L2CAP_CR_SUCCESS);
- rsp.status = __constant_cpu_to_le16(L2CAP_CS_NO_INFO);
+ rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS);
+ rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
}
} else {
- rsp.result = __constant_cpu_to_le16(L2CAP_CR_PEND);
- rsp.status = __constant_cpu_to_le16(L2CAP_CS_AUTHEN_PEND);
+ rsp.result = cpu_to_le16(L2CAP_CR_PEND);
+ rsp.status = cpu_to_le16(L2CAP_CS_AUTHEN_PEND);
}
l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_RSP,
@@ -1493,8 +1484,6 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn)
if (!chan)
goto clean;
- chan->dcid = L2CAP_CID_ATT;
-
bacpy(&chan->src, &hcon->src);
bacpy(&chan->dst, &hcon->dst);
chan->src_type = bdaddr_type(hcon, hcon->src_type);
@@ -1528,7 +1517,7 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
l2cap_chan_lock(chan);
- if (chan->chan_type == L2CAP_CHAN_CONN_FIX_A2MP) {
+ if (chan->scid == L2CAP_CID_A2MP) {
l2cap_chan_unlock(chan);
continue;
}
@@ -1546,6 +1535,8 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
}
mutex_unlock(&conn->chan_lock);
+
+ queue_work(hcon->hdev->workqueue, &conn->pending_rx_work);
}
/* Notify sockets that we cannot guaranty reliability anymore */
@@ -1671,6 +1662,15 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
kfree_skb(conn->rx_skb);
+ skb_queue_purge(&conn->pending_rx);
+
+ /* We can not call flush_work(&conn->pending_rx_work) here since we
+ * might block if we are running on a worker from the same workqueue
+ * pending_rx_work is waiting on.
+ */
+ if (work_pending(&conn->pending_rx_work))
+ cancel_work_sync(&conn->pending_rx_work);
+
l2cap_unregister_all_users(conn);
mutex_lock(&conn->chan_lock);
@@ -1718,66 +1718,6 @@ static void security_timeout(struct work_struct *work)
}
}
-static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
-{
- struct l2cap_conn *conn = hcon->l2cap_data;
- struct hci_chan *hchan;
-
- if (conn)
- return conn;
-
- hchan = hci_chan_create(hcon);
- if (!hchan)
- return NULL;
-
- conn = kzalloc(sizeof(struct l2cap_conn), GFP_KERNEL);
- if (!conn) {
- hci_chan_del(hchan);
- return NULL;
- }
-
- kref_init(&conn->ref);
- hcon->l2cap_data = conn;
- conn->hcon = hcon;
- hci_conn_get(conn->hcon);
- conn->hchan = hchan;
-
- BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan);
-
- switch (hcon->type) {
- case LE_LINK:
- if (hcon->hdev->le_mtu) {
- conn->mtu = hcon->hdev->le_mtu;
- break;
- }
- /* fall through */
- default:
- conn->mtu = hcon->hdev->acl_mtu;
- break;
- }
-
- conn->feat_mask = 0;
-
- if (hcon->type == ACL_LINK)
- conn->hs_enabled = test_bit(HCI_HS_ENABLED,
- &hcon->hdev->dev_flags);
-
- spin_lock_init(&conn->lock);
- mutex_init(&conn->chan_lock);
-
- INIT_LIST_HEAD(&conn->chan_l);
- INIT_LIST_HEAD(&conn->users);
-
- if (hcon->type == LE_LINK)
- INIT_DELAYED_WORK(&conn->security_timer, security_timeout);
- else
- INIT_DELAYED_WORK(&conn->info_timer, l2cap_info_timeout);
-
- conn->disc_reason = HCI_ERROR_REMOTE_USER_TERM;
-
- return conn;
-}
-
static void l2cap_conn_free(struct kref *ref)
{
struct l2cap_conn *conn = container_of(ref, struct l2cap_conn, ref);
@@ -1848,154 +1788,6 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,
return c1;
}
-static bool is_valid_psm(u16 psm, u8 dst_type)
-{
- if (!psm)
- return false;
-
- if (bdaddr_type_is_le(dst_type))
- return (psm <= 0x00ff);
-
- /* PSM must be odd and lsb of upper byte must be 0 */
- return ((psm & 0x0101) == 0x0001);
-}
-
-int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
- bdaddr_t *dst, u8 dst_type)
-{
- struct l2cap_conn *conn;
- struct hci_conn *hcon;
- struct hci_dev *hdev;
- __u8 auth_type;
- int err;
-
- BT_DBG("%pMR -> %pMR (type %u) psm 0x%2.2x", &chan->src, dst,
- dst_type, __le16_to_cpu(psm));
-
- hdev = hci_get_route(dst, &chan->src);
- if (!hdev)
- return -EHOSTUNREACH;
-
- hci_dev_lock(hdev);
-
- l2cap_chan_lock(chan);
-
- if (!is_valid_psm(__le16_to_cpu(psm), dst_type) && !cid &&
- chan->chan_type != L2CAP_CHAN_RAW) {
- err = -EINVAL;
- goto done;
- }
-
- if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED && !(psm || cid)) {
- err = -EINVAL;
- goto done;
- }
-
- switch (chan->mode) {
- case L2CAP_MODE_BASIC:
- break;
- case L2CAP_MODE_LE_FLOWCTL:
- l2cap_le_flowctl_init(chan);
- break;
- case L2CAP_MODE_ERTM:
- case L2CAP_MODE_STREAMING:
- if (!disable_ertm)
- break;
- /* fall through */
- default:
- err = -ENOTSUPP;
- goto done;
- }
-
- switch (chan->state) {
- case BT_CONNECT:
- case BT_CONNECT2:
- case BT_CONFIG:
- /* Already connecting */
- err = 0;
- goto done;
-
- case BT_CONNECTED:
- /* Already connected */
- err = -EISCONN;
- goto done;
-
- case BT_OPEN:
- case BT_BOUND:
- /* Can connect */
- break;
-
- default:
- err = -EBADFD;
- goto done;
- }
-
- /* Set destination address and psm */
- bacpy(&chan->dst, dst);
- chan->dst_type = dst_type;
-
- chan->psm = psm;
- chan->dcid = cid;
-
- auth_type = l2cap_get_auth_type(chan);
-
- if (bdaddr_type_is_le(dst_type))
- hcon = hci_connect(hdev, LE_LINK, dst, dst_type,
- chan->sec_level, auth_type);
- else
- hcon = hci_connect(hdev, ACL_LINK, dst, dst_type,
- chan->sec_level, auth_type);
-
- if (IS_ERR(hcon)) {
- err = PTR_ERR(hcon);
- goto done;
- }
-
- conn = l2cap_conn_add(hcon);
- if (!conn) {
- hci_conn_drop(hcon);
- err = -ENOMEM;
- goto done;
- }
-
- if (cid && __l2cap_get_chan_by_dcid(conn, cid)) {
- hci_conn_drop(hcon);
- err = -EBUSY;
- goto done;
- }
-
- /* Update source addr of the socket */
- bacpy(&chan->src, &hcon->src);
- chan->src_type = bdaddr_type(hcon, hcon->src_type);
-
- l2cap_chan_unlock(chan);
- l2cap_chan_add(conn, chan);
- l2cap_chan_lock(chan);
-
- /* l2cap_chan_add takes its own ref so we can drop this one */
- hci_conn_drop(hcon);
-
- l2cap_state_change(chan, BT_CONNECT);
- __set_chan_timer(chan, chan->ops->get_sndtimeo(chan));
-
- if (hcon->state == BT_CONNECTED) {
- if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) {
- __clear_chan_timer(chan);
- if (l2cap_chan_check_security(chan))
- l2cap_state_change(chan, BT_CONNECTED);
- } else
- l2cap_do_start(chan);
- }
-
- err = 0;
-
-done:
- l2cap_chan_unlock(chan);
- hci_dev_unlock(hdev);
- hci_dev_put(hdev);
- return err;
-}
-
static void l2cap_monitor_timeout(struct work_struct *work)
{
struct l2cap_chan *chan = container_of(work, struct l2cap_chan,
@@ -2654,6 +2446,14 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len,
if (IS_ERR(skb))
return PTR_ERR(skb);
+ /* Channel lock is released before requesting new skb and then
+ * reacquired thus we need to recheck channel state.
+ */
+ if (chan->state != BT_CONNECTED) {
+ kfree_skb(skb);
+ return -ENOTCONN;
+ }
+
l2cap_do_send(chan, skb);
return len;
}
@@ -2703,6 +2503,14 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len,
if (IS_ERR(skb))
return PTR_ERR(skb);
+ /* Channel lock is released before requesting new skb and then
+ * reacquired thus we need to recheck channel state.
+ */
+ if (chan->state != BT_CONNECTED) {
+ kfree_skb(skb);
+ return -ENOTCONN;
+ }
+
l2cap_do_send(chan, skb);
err = len;
break;
@@ -3099,9 +2907,9 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code,
lh->len = cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen);
if (conn->hcon->type == LE_LINK)
- lh->cid = __constant_cpu_to_le16(L2CAP_CID_LE_SIGNALING);
+ lh->cid = cpu_to_le16(L2CAP_CID_LE_SIGNALING);
else
- lh->cid = __constant_cpu_to_le16(L2CAP_CID_SIGNALING);
+ lh->cid = cpu_to_le16(L2CAP_CID_SIGNALING);
cmd = (struct l2cap_cmd_hdr *) skb_put(skb, L2CAP_CMD_HDR_SIZE);
cmd->code = code;
@@ -3214,8 +3022,8 @@ static void l2cap_add_opt_efs(void **ptr, struct l2cap_chan *chan)
efs.stype = chan->local_stype;
efs.msdu = cpu_to_le16(chan->local_msdu);
efs.sdu_itime = cpu_to_le32(chan->local_sdu_itime);
- efs.acc_lat = __constant_cpu_to_le32(L2CAP_DEFAULT_ACC_LAT);
- efs.flush_to = __constant_cpu_to_le32(L2CAP_EFS_DEFAULT_FLUSH_TO);
+ efs.acc_lat = cpu_to_le32(L2CAP_DEFAULT_ACC_LAT);
+ efs.flush_to = cpu_to_le32(L2CAP_EFS_DEFAULT_FLUSH_TO);
break;
case L2CAP_MODE_STREAMING:
@@ -3356,8 +3164,8 @@ static void __l2cap_set_ertm_timeouts(struct l2cap_chan *chan,
rfc->retrans_timeout = cpu_to_le16((u16) ertm_to);
rfc->monitor_timeout = rfc->retrans_timeout;
} else {
- rfc->retrans_timeout = __constant_cpu_to_le16(L2CAP_DEFAULT_RETRANS_TO);
- rfc->monitor_timeout = __constant_cpu_to_le16(L2CAP_DEFAULT_MONITOR_TO);
+ rfc->retrans_timeout = cpu_to_le16(L2CAP_DEFAULT_RETRANS_TO);
+ rfc->monitor_timeout = cpu_to_le16(L2CAP_DEFAULT_MONITOR_TO);
}
}
@@ -3489,7 +3297,7 @@ done:
}
req->dcid = cpu_to_le16(chan->dcid);
- req->flags = __constant_cpu_to_le16(0);
+ req->flags = cpu_to_le16(0);
return ptr - data;
}
@@ -3703,7 +3511,7 @@ done:
}
rsp->scid = cpu_to_le16(chan->dcid);
rsp->result = cpu_to_le16(result);
- rsp->flags = __constant_cpu_to_le16(0);
+ rsp->flags = cpu_to_le16(0);
return ptr - data;
}
@@ -3812,7 +3620,7 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
}
req->dcid = cpu_to_le16(chan->dcid);
- req->flags = __constant_cpu_to_le16(0);
+ req->flags = cpu_to_le16(0);
return ptr - data;
}
@@ -3843,7 +3651,7 @@ void __l2cap_le_connect_rsp_defer(struct l2cap_chan *chan)
rsp.mtu = cpu_to_le16(chan->imtu);
rsp.mps = cpu_to_le16(chan->mps);
rsp.credits = cpu_to_le16(chan->rx_credits);
- rsp.result = __constant_cpu_to_le16(L2CAP_CR_SUCCESS);
+ rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS);
l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_RSP, sizeof(rsp),
&rsp);
@@ -3858,8 +3666,8 @@ void __l2cap_connect_rsp_defer(struct l2cap_chan *chan)
rsp.scid = cpu_to_le16(chan->dcid);
rsp.dcid = cpu_to_le16(chan->scid);
- rsp.result = __constant_cpu_to_le16(L2CAP_CR_SUCCESS);
- rsp.status = __constant_cpu_to_le16(L2CAP_CS_NO_INFO);
+ rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS);
+ rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
if (chan->hs_hcon)
rsp_code = L2CAP_CREATE_CHAN_RSP;
@@ -3888,8 +3696,8 @@ static void l2cap_conf_rfc_get(struct l2cap_chan *chan, void *rsp, int len)
u16 txwin_ext = chan->ack_win;
struct l2cap_conf_rfc rfc = {
.mode = chan->mode,
- .retrans_timeout = __constant_cpu_to_le16(L2CAP_DEFAULT_RETRANS_TO),
- .monitor_timeout = __constant_cpu_to_le16(L2CAP_DEFAULT_MONITOR_TO),
+ .retrans_timeout = cpu_to_le16(L2CAP_DEFAULT_RETRANS_TO),
+ .monitor_timeout = cpu_to_le16(L2CAP_DEFAULT_MONITOR_TO),
.max_pdu_size = cpu_to_le16(chan->imtu),
.txwin_size = min_t(u16, chan->ack_win, L2CAP_DEFAULT_TX_WINDOW),
};
@@ -3980,7 +3788,7 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
l2cap_chan_lock(pchan);
/* Check if the ACL is secure enough (if not SDP) */
- if (psm != __constant_cpu_to_le16(L2CAP_PSM_SDP) &&
+ if (psm != cpu_to_le16(L2CAP_PSM_SDP) &&
!hci_conn_check_link_mode(conn->hcon)) {
conn->disc_reason = HCI_ERROR_AUTH_FAILURE;
result = L2CAP_CR_SEC_BLOCK;
@@ -4065,7 +3873,7 @@ sendresp:
if (result == L2CAP_CR_PEND && status == L2CAP_CS_NO_INFO) {
struct l2cap_info_req info;
- info.type = __constant_cpu_to_le16(L2CAP_IT_FEAT_MASK);
+ info.type = cpu_to_le16(L2CAP_IT_FEAT_MASK);
conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT;
conn->info_ident = l2cap_get_ident(conn);
@@ -4214,7 +4022,7 @@ static void cmd_reject_invalid_cid(struct l2cap_conn *conn, u8 ident,
{
struct l2cap_cmd_rej_cid rej;
- rej.reason = __constant_cpu_to_le16(L2CAP_REJ_INVALID_CID);
+ rej.reason = cpu_to_le16(L2CAP_REJ_INVALID_CID);
rej.scid = __cpu_to_le16(scid);
rej.dcid = __cpu_to_le16(dcid);
@@ -4546,8 +4354,8 @@ static inline int l2cap_information_req(struct l2cap_conn *conn,
u8 buf[8];
u32 feat_mask = l2cap_feat_mask;
struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf;
- rsp->type = __constant_cpu_to_le16(L2CAP_IT_FEAT_MASK);
- rsp->result = __constant_cpu_to_le16(L2CAP_IR_SUCCESS);
+ rsp->type = cpu_to_le16(L2CAP_IT_FEAT_MASK);
+ rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS);
if (!disable_ertm)
feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING
| L2CAP_FEAT_FCS;
@@ -4567,15 +4375,15 @@ static inline int l2cap_information_req(struct l2cap_conn *conn,
else
l2cap_fixed_chan[0] &= ~L2CAP_FC_A2MP;
- rsp->type = __constant_cpu_to_le16(L2CAP_IT_FIXED_CHAN);
- rsp->result = __constant_cpu_to_le16(L2CAP_IR_SUCCESS);
+ rsp->type = cpu_to_le16(L2CAP_IT_FIXED_CHAN);
+ rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS);
memcpy(rsp->data, l2cap_fixed_chan, sizeof(l2cap_fixed_chan));
l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(buf),
buf);
} else {
struct l2cap_info_rsp rsp;
rsp.type = cpu_to_le16(type);
- rsp.result = __constant_cpu_to_le16(L2CAP_IR_NOTSUPP);
+ rsp.result = cpu_to_le16(L2CAP_IR_NOTSUPP);
l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(rsp),
&rsp);
}
@@ -4620,7 +4428,7 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn,
if (conn->feat_mask & L2CAP_FEAT_FIXED_CHAN) {
struct l2cap_info_req req;
- req.type = __constant_cpu_to_le16(L2CAP_IT_FIXED_CHAN);
+ req.type = cpu_to_le16(L2CAP_IT_FIXED_CHAN);
conn->info_ident = l2cap_get_ident(conn);
@@ -4714,8 +4522,8 @@ static int l2cap_create_channel_req(struct l2cap_conn *conn,
error:
rsp.dcid = 0;
rsp.scid = cpu_to_le16(scid);
- rsp.result = __constant_cpu_to_le16(L2CAP_CR_BAD_AMP);
- rsp.status = __constant_cpu_to_le16(L2CAP_CS_NO_INFO);
+ rsp.result = cpu_to_le16(L2CAP_CR_BAD_AMP);
+ rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
l2cap_send_cmd(conn, cmd->ident, L2CAP_CREATE_CHAN_RSP,
sizeof(rsp), &rsp);
@@ -4779,7 +4587,7 @@ static void l2cap_send_move_chan_cfm_icid(struct l2cap_conn *conn, u16 icid)
BT_DBG("conn %p, icid 0x%4.4x", conn, icid);
cfm.icid = cpu_to_le16(icid);
- cfm.result = __constant_cpu_to_le16(L2CAP_MC_UNCONFIRMED);
+ cfm.result = cpu_to_le16(L2CAP_MC_UNCONFIRMED);
l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_MOVE_CHAN_CFM,
sizeof(cfm), &cfm);
@@ -4962,12 +4770,12 @@ static void l2cap_do_create(struct l2cap_chan *chan, int result,
if (result == L2CAP_CR_SUCCESS) {
/* Send successful response */
- rsp.result = __constant_cpu_to_le16(L2CAP_CR_SUCCESS);
- rsp.status = __constant_cpu_to_le16(L2CAP_CS_NO_INFO);
+ rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS);
+ rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
} else {
/* Send negative response */
- rsp.result = __constant_cpu_to_le16(L2CAP_CR_NO_MEM);
- rsp.status = __constant_cpu_to_le16(L2CAP_CS_NO_INFO);
+ rsp.result = cpu_to_le16(L2CAP_CR_NO_MEM);
+ rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
}
l2cap_send_cmd(chan->conn, chan->ident, L2CAP_CREATE_CHAN_RSP,
@@ -5095,7 +4903,7 @@ static inline int l2cap_move_channel_req(struct l2cap_conn *conn,
chan = l2cap_get_chan_by_dcid(conn, icid);
if (!chan) {
rsp.icid = cpu_to_le16(icid);
- rsp.result = __constant_cpu_to_le16(L2CAP_MR_NOT_ALLOWED);
+ rsp.result = cpu_to_le16(L2CAP_MR_NOT_ALLOWED);
l2cap_send_cmd(conn, cmd->ident, L2CAP_MOVE_CHAN_RSP,
sizeof(rsp), &rsp);
return 0;
@@ -5439,9 +5247,9 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn,
err = l2cap_check_conn_param(min, max, latency, to_multiplier);
if (err)
- rsp.result = __constant_cpu_to_le16(L2CAP_CONN_PARAM_REJECTED);
+ rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED);
else
- rsp.result = __constant_cpu_to_le16(L2CAP_CONN_PARAM_ACCEPTED);
+ rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_ACCEPTED);
l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_PARAM_UPDATE_RSP,
sizeof(rsp), &rsp);
@@ -5709,7 +5517,7 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn,
{
struct l2cap_le_credits *pkt;
struct l2cap_chan *chan;
- u16 cid, credits;
+ u16 cid, credits, max_credits;
if (cmd_len != sizeof(*pkt))
return -EPROTO;
@@ -5724,6 +5532,17 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn,
if (!chan)
return -EBADSLT;
+ max_credits = LE_FLOWCTL_MAX_CREDITS - chan->tx_credits;
+ if (credits > max_credits) {
+ BT_ERR("LE credits overflow");
+ l2cap_send_disconn_req(chan, ECONNRESET);
+
+ /* Return 0 so that we don't trigger an unnecessary
+ * command reject packet.
+ */
+ return 0;
+ }
+
chan->tx_credits += credits;
while (chan->tx_credits && !skb_queue_empty(&chan->tx_q)) {
@@ -5770,17 +5589,6 @@ static inline int l2cap_le_sig_cmd(struct l2cap_conn *conn,
{
int err = 0;
- if (!enable_lecoc) {
- switch (cmd->code) {
- case L2CAP_LE_CONN_REQ:
- case L2CAP_LE_CONN_RSP:
- case L2CAP_LE_CREDITS:
- case L2CAP_DISCONN_REQ:
- case L2CAP_DISCONN_RSP:
- return -EINVAL;
- }
- }
-
switch (cmd->code) {
case L2CAP_COMMAND_REJ:
l2cap_le_command_rej(conn, cmd, cmd_len, data);
@@ -5854,7 +5662,7 @@ static inline void l2cap_le_sig_channel(struct l2cap_conn *conn,
BT_ERR("Wrong link type (%d)", err);
- rej.reason = __constant_cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD);
+ rej.reason = cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD);
l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ,
sizeof(rej), &rej);
}
@@ -5899,7 +5707,7 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn,
BT_ERR("Wrong link type (%d)", err);
- rej.reason = __constant_cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD);
+ rej.reason = cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD);
l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ,
sizeof(rej), &rej);
}
@@ -6871,6 +6679,7 @@ static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb)
if (!chan->rx_credits) {
BT_ERR("No credits to receive LE L2CAP data");
+ l2cap_send_disconn_req(chan, ECONNRESET);
return -ENOBUFS;
}
@@ -6995,8 +6804,10 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid,
* But we don't have any other choice. L2CAP doesn't
* provide flow control mechanism. */
- if (chan->imtu < skb->len)
+ if (chan->imtu < skb->len) {
+ BT_ERR("Dropping L2CAP data: receive buffer overflow");
goto drop;
+ }
if (!chan->ops->recv(chan, skb))
goto done;
@@ -7084,9 +6895,16 @@ drop:
static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)
{
struct l2cap_hdr *lh = (void *) skb->data;
+ struct hci_conn *hcon = conn->hcon;
u16 cid, len;
__le16 psm;
+ if (hcon->state != BT_CONNECTED) {
+ BT_DBG("queueing pending rx skb");
+ skb_queue_tail(&conn->pending_rx, skb);
+ return;
+ }
+
skb_pull(skb, L2CAP_HDR_SIZE);
cid = __le16_to_cpu(lh->cid);
len = __le16_to_cpu(lh->len);
@@ -7132,6 +6950,247 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)
}
}
+static void process_pending_rx(struct work_struct *work)
+{
+ struct l2cap_conn *conn = container_of(work, struct l2cap_conn,
+ pending_rx_work);
+ struct sk_buff *skb;
+
+ BT_DBG("");
+
+ while ((skb = skb_dequeue(&conn->pending_rx)))
+ l2cap_recv_frame(conn, skb);
+}
+
+static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
+{
+ struct l2cap_conn *conn = hcon->l2cap_data;
+ struct hci_chan *hchan;
+
+ if (conn)
+ return conn;
+
+ hchan = hci_chan_create(hcon);
+ if (!hchan)
+ return NULL;
+
+ conn = kzalloc(sizeof(struct l2cap_conn), GFP_KERNEL);
+ if (!conn) {
+ hci_chan_del(hchan);
+ return NULL;
+ }
+
+ kref_init(&conn->ref);
+ hcon->l2cap_data = conn;
+ conn->hcon = hcon;
+ hci_conn_get(conn->hcon);
+ conn->hchan = hchan;
+
+ BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan);
+
+ switch (hcon->type) {
+ case LE_LINK:
+ if (hcon->hdev->le_mtu) {
+ conn->mtu = hcon->hdev->le_mtu;
+ break;
+ }
+ /* fall through */
+ default:
+ conn->mtu = hcon->hdev->acl_mtu;
+ break;
+ }
+
+ conn->feat_mask = 0;
+
+ if (hcon->type == ACL_LINK)
+ conn->hs_enabled = test_bit(HCI_HS_ENABLED,
+ &hcon->hdev->dev_flags);
+
+ spin_lock_init(&conn->lock);
+ mutex_init(&conn->chan_lock);
+
+ INIT_LIST_HEAD(&conn->chan_l);
+ INIT_LIST_HEAD(&conn->users);
+
+ if (hcon->type == LE_LINK)
+ INIT_DELAYED_WORK(&conn->security_timer, security_timeout);
+ else
+ INIT_DELAYED_WORK(&conn->info_timer, l2cap_info_timeout);
+
+ skb_queue_head_init(&conn->pending_rx);
+ INIT_WORK(&conn->pending_rx_work, process_pending_rx);
+
+ conn->disc_reason = HCI_ERROR_REMOTE_USER_TERM;
+
+ return conn;
+}
+
+static bool is_valid_psm(u16 psm, u8 dst_type) {
+ if (!psm)
+ return false;
+
+ if (bdaddr_type_is_le(dst_type))
+ return (psm <= 0x00ff);
+
+ /* PSM must be odd and lsb of upper byte must be 0 */
+ return ((psm & 0x0101) == 0x0001);
+}
+
+int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
+ bdaddr_t *dst, u8 dst_type)
+{
+ struct l2cap_conn *conn;
+ struct hci_conn *hcon;
+ struct hci_dev *hdev;
+ __u8 auth_type;
+ int err;
+
+ BT_DBG("%pMR -> %pMR (type %u) psm 0x%2.2x", &chan->src, dst,
+ dst_type, __le16_to_cpu(psm));
+
+ hdev = hci_get_route(dst, &chan->src);
+ if (!hdev)
+ return -EHOSTUNREACH;
+
+ hci_dev_lock(hdev);
+
+ l2cap_chan_lock(chan);
+
+ if (!is_valid_psm(__le16_to_cpu(psm), dst_type) && !cid &&
+ chan->chan_type != L2CAP_CHAN_RAW) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED && !psm) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ if (chan->chan_type == L2CAP_CHAN_FIXED && !cid) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ switch (chan->mode) {
+ case L2CAP_MODE_BASIC:
+ break;
+ case L2CAP_MODE_LE_FLOWCTL:
+ l2cap_le_flowctl_init(chan);
+ break;
+ case L2CAP_MODE_ERTM:
+ case L2CAP_MODE_STREAMING:
+ if (!disable_ertm)
+ break;
+ /* fall through */
+ default:
+ err = -ENOTSUPP;
+ goto done;
+ }
+
+ switch (chan->state) {
+ case BT_CONNECT:
+ case BT_CONNECT2:
+ case BT_CONFIG:
+ /* Already connecting */
+ err = 0;
+ goto done;
+
+ case BT_CONNECTED:
+ /* Already connected */
+ err = -EISCONN;
+ goto done;
+
+ case BT_OPEN:
+ case BT_BOUND:
+ /* Can connect */
+ break;
+
+ default:
+ err = -EBADFD;
+ goto done;
+ }
+
+ /* Set destination address and psm */
+ bacpy(&chan->dst, dst);
+ chan->dst_type = dst_type;
+
+ chan->psm = psm;
+ chan->dcid = cid;
+
+ auth_type = l2cap_get_auth_type(chan);
+
+ if (bdaddr_type_is_le(dst_type)) {
+ /* Convert from L2CAP channel address type to HCI address type
+ */
+ if (dst_type == BDADDR_LE_PUBLIC)
+ dst_type = ADDR_LE_DEV_PUBLIC;
+ else
+ dst_type = ADDR_LE_DEV_RANDOM;
+
+ hcon = hci_connect_le(hdev, dst, dst_type, chan->sec_level,
+ auth_type);
+ } else {
+ hcon = hci_connect_acl(hdev, dst, chan->sec_level, auth_type);
+ }
+
+ if (IS_ERR(hcon)) {
+ err = PTR_ERR(hcon);
+ goto done;
+ }
+
+ conn = l2cap_conn_add(hcon);
+ if (!conn) {
+ hci_conn_drop(hcon);
+ err = -ENOMEM;
+ goto done;
+ }
+
+ if (cid && __l2cap_get_chan_by_dcid(conn, cid)) {
+ hci_conn_drop(hcon);
+ err = -EBUSY;
+ goto done;
+ }
+
+ /* Update source addr of the socket */
+ bacpy(&chan->src, &hcon->src);
+ chan->src_type = bdaddr_type(hcon, hcon->src_type);
+
+ l2cap_chan_unlock(chan);
+ l2cap_chan_add(conn, chan);
+ l2cap_chan_lock(chan);
+
+ /* l2cap_chan_add takes its own ref so we can drop this one */
+ hci_conn_drop(hcon);
+
+ l2cap_state_change(chan, BT_CONNECT);
+ __set_chan_timer(chan, chan->ops->get_sndtimeo(chan));
+
+ /* Release chan->sport so that it can be reused by other
+ * sockets (as it's only used for listening sockets).
+ */
+ write_lock(&chan_list_lock);
+ chan->sport = 0;
+ write_unlock(&chan_list_lock);
+
+ if (hcon->state == BT_CONNECTED) {
+ if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) {
+ __clear_chan_timer(chan);
+ if (l2cap_chan_check_security(chan))
+ l2cap_state_change(chan, BT_CONNECTED);
+ } else
+ l2cap_do_start(chan);
+ }
+
+ err = 0;
+
+done:
+ l2cap_chan_unlock(chan);
+ hci_dev_unlock(hdev);
+ hci_dev_put(hdev);
+ return err;
+}
+
/* ---- L2CAP interface with lower layer (HCI) ---- */
int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr)
@@ -7206,7 +7265,8 @@ static inline void l2cap_check_encryption(struct l2cap_chan *chan, u8 encrypt)
if (encrypt == 0x00) {
if (chan->sec_level == BT_SECURITY_MEDIUM) {
__set_chan_timer(chan, L2CAP_ENC_TIMEOUT);
- } else if (chan->sec_level == BT_SECURITY_HIGH)
+ } else if (chan->sec_level == BT_SECURITY_HIGH ||
+ chan->sec_level == BT_SECURITY_FIPS)
l2cap_chan_close(chan, ECONNREFUSED);
} else {
if (chan->sec_level == BT_SECURITY_MEDIUM)
@@ -7226,7 +7286,7 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
if (hcon->type == LE_LINK) {
if (!status && encrypt)
- smp_distribute_keys(conn, 0);
+ smp_distribute_keys(conn);
cancel_delayed_work(&conn->security_timer);
}
@@ -7238,7 +7298,7 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
BT_DBG("chan %p scid 0x%4.4x state %s", chan, chan->scid,
state_to_string(chan->state));
- if (chan->chan_type == L2CAP_CHAN_CONN_FIX_A2MP) {
+ if (chan->scid == L2CAP_CID_A2MP) {
l2cap_chan_unlock(chan);
continue;
}
@@ -7471,9 +7531,9 @@ int __init l2cap_init(void)
l2cap_debugfs = debugfs_create_file("l2cap", 0444, bt_debugfs,
NULL, &l2cap_debugfs_fops);
- debugfs_create_u16("l2cap_le_max_credits", 0466, bt_debugfs,
+ debugfs_create_u16("l2cap_le_max_credits", 0644, bt_debugfs,
&le_max_credits);
- debugfs_create_u16("l2cap_le_default_mps", 0466, bt_debugfs,
+ debugfs_create_u16("l2cap_le_default_mps", 0644, bt_debugfs,
&le_default_mps);
bt_6lowpan_init();
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index d58f76bcebd..e1378693cc9 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -36,8 +36,6 @@
#include "smp.h"
-bool enable_lecoc;
-
static struct bt_sock_list l2cap_sk_list = {
.lock = __RW_LOCK_UNLOCKED(l2cap_sk_list.lock)
};
@@ -101,12 +99,19 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
if (!bdaddr_type_is_valid(la.l2_bdaddr_type))
return -EINVAL;
+ if (la.l2_cid) {
+ /* When the socket gets created it defaults to
+ * CHAN_CONN_ORIENTED, so we need to overwrite the
+ * default here.
+ */
+ chan->chan_type = L2CAP_CHAN_FIXED;
+ chan->omtu = L2CAP_DEFAULT_MTU;
+ }
+
if (bdaddr_type_is_le(la.l2_bdaddr_type)) {
- if (!enable_lecoc && la.l2_psm)
- return -EINVAL;
/* We only allow ATT user space socket */
if (la.l2_cid &&
- la.l2_cid != __constant_cpu_to_le16(L2CAP_CID_ATT))
+ la.l2_cid != cpu_to_le16(L2CAP_CID_ATT))
return -EINVAL;
}
@@ -204,7 +209,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr,
* ATT. Anything else is an invalid combination.
*/
if (chan->scid != L2CAP_CID_ATT ||
- la.l2_cid != __constant_cpu_to_le16(L2CAP_CID_ATT))
+ la.l2_cid != cpu_to_le16(L2CAP_CID_ATT))
return -EINVAL;
/* We don't have the hdev available here to make a
@@ -220,11 +225,9 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr,
return -EINVAL;
if (bdaddr_type_is_le(la.l2_bdaddr_type)) {
- if (!enable_lecoc && la.l2_psm)
- return -EINVAL;
/* We only allow ATT user space socket */
if (la.l2_cid &&
- la.l2_cid != __constant_cpu_to_le16(L2CAP_CID_ATT))
+ la.l2_cid != cpu_to_le16(L2CAP_CID_ATT))
return -EINVAL;
}
@@ -357,17 +360,21 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
BT_DBG("sock %p, sk %p", sock, sk);
+ if (peer && sk->sk_state != BT_CONNECTED &&
+ sk->sk_state != BT_CONNECT && sk->sk_state != BT_CONNECT2)
+ return -ENOTCONN;
+
memset(la, 0, sizeof(struct sockaddr_l2));
addr->sa_family = AF_BLUETOOTH;
*len = sizeof(struct sockaddr_l2);
+ la->l2_psm = chan->psm;
+
if (peer) {
- la->l2_psm = chan->psm;
bacpy(&la->l2_bdaddr, &chan->dst);
la->l2_cid = cpu_to_le16(chan->dcid);
la->l2_bdaddr_type = chan->dst_type;
} else {
- la->l2_psm = chan->sport;
bacpy(&la->l2_bdaddr, &chan->src);
la->l2_cid = cpu_to_le16(chan->scid);
la->l2_bdaddr_type = chan->src_type;
@@ -432,6 +439,10 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname,
opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT |
L2CAP_LM_SECURE;
break;
+ case BT_SECURITY_FIPS:
+ opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT |
+ L2CAP_LM_SECURE | L2CAP_LM_FIPS;
+ break;
default:
opt = 0;
break;
@@ -445,6 +456,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname,
if (put_user(opt, (u32 __user *) optval))
err = -EFAULT;
+
break;
case L2CAP_CONNINFO:
@@ -499,6 +511,7 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname,
switch (optname) {
case BT_SECURITY:
if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED &&
+ chan->chan_type != L2CAP_CHAN_FIXED &&
chan->chan_type != L2CAP_CHAN_RAW) {
err = -EINVAL;
break;
@@ -560,11 +573,6 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname,
break;
case BT_SNDMTU:
- if (!enable_lecoc) {
- err = -EPROTONOSUPPORT;
- break;
- }
-
if (!bdaddr_type_is_le(chan->src_type)) {
err = -EINVAL;
break;
@@ -580,11 +588,6 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname,
break;
case BT_RCVMTU:
- if (!enable_lecoc) {
- err = -EPROTONOSUPPORT;
- break;
- }
-
if (!bdaddr_type_is_le(chan->src_type)) {
err = -EINVAL;
break;
@@ -699,6 +702,11 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
break;
}
+ if (opt & L2CAP_LM_FIPS) {
+ err = -EINVAL;
+ break;
+ }
+
if (opt & L2CAP_LM_AUTH)
chan->sec_level = BT_SECURITY_LOW;
if (opt & L2CAP_LM_ENCRYPT)
@@ -750,6 +758,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
switch (optname) {
case BT_SECURITY:
if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED &&
+ chan->chan_type != L2CAP_CHAN_FIXED &&
chan->chan_type != L2CAP_CHAN_RAW) {
err = -EINVAL;
break;
@@ -778,11 +787,6 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
/*change security for LE channels */
if (chan->scid == L2CAP_CID_ATT) {
- if (!conn->hcon->out) {
- err = -EINVAL;
- break;
- }
-
if (smp_conn_security(conn->hcon, sec.level))
break;
sk->sk_state = BT_CONFIG;
@@ -895,11 +899,6 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_SNDMTU:
- if (!enable_lecoc) {
- err = -EPROTONOSUPPORT;
- break;
- }
-
if (!bdaddr_type_is_le(chan->src_type)) {
err = -EINVAL;
break;
@@ -912,11 +911,6 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_RCVMTU:
- if (!enable_lecoc) {
- err = -EPROTONOSUPPORT;
- break;
- }
-
if (!bdaddr_type_is_le(chan->src_type)) {
err = -EINVAL;
break;
@@ -1181,13 +1175,16 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan)
/* Check for backlog size */
if (sk_acceptq_is_full(parent)) {
BT_DBG("backlog full %d", parent->sk_ack_backlog);
+ release_sock(parent);
return NULL;
}
sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP,
GFP_ATOMIC);
- if (!sk)
+ if (!sk) {
+ release_sock(parent);
return NULL;
+ }
bt_sock_reclassify_lock(sk, BTPROTO_L2CAP);
@@ -1272,7 +1269,7 @@ static void l2cap_sock_teardown_cb(struct l2cap_chan *chan, int err)
if (parent) {
bt_accept_unlink(sk);
- parent->sk_data_ready(parent, 0);
+ parent->sk_data_ready(parent);
} else {
sk->sk_state_change(sk);
}
@@ -1328,7 +1325,7 @@ static void l2cap_sock_ready_cb(struct l2cap_chan *chan)
sk->sk_state_change(sk);
if (parent)
- parent->sk_data_ready(parent, 0);
+ parent->sk_data_ready(parent);
release_sock(sk);
}
@@ -1341,7 +1338,7 @@ static void l2cap_sock_defer_cb(struct l2cap_chan *chan)
parent = bt_sk(sk)->parent;
if (parent)
- parent->sk_data_ready(parent, 0);
+ parent->sk_data_ready(parent);
release_sock(sk);
}
@@ -1449,6 +1446,11 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
chan->tx_credits = pchan->tx_credits;
chan->rx_credits = pchan->rx_credits;
+ if (chan->chan_type == L2CAP_CHAN_FIXED) {
+ chan->scid = pchan->scid;
+ chan->dcid = pchan->scid;
+ }
+
security_sk_clone(parent, sk);
} else {
switch (sk->sk_type) {
@@ -1614,6 +1616,3 @@ void l2cap_cleanup_sockets(void)
bt_sock_unregister(BTPROTO_L2CAP);
proto_unregister(&l2cap_proto);
}
-
-module_param(enable_lecoc, bool, 0644);
-MODULE_PARM_DESC(enable_lecoc, "Enable support for LE CoC");
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
index b3fbc73516c..941ad7530ed 100644
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c
@@ -58,6 +58,7 @@ int bt_to_errno(__u16 code)
return EIO;
case 0x04:
+ case 0x3c:
return EHOSTDOWN;
case 0x05:
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index a03ca3ca91b..af8e0a6243b 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -29,12 +29,13 @@
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/l2cap.h>
#include <net/bluetooth/mgmt.h>
#include "smp.h"
#define MGMT_VERSION 1
-#define MGMT_REVISION 4
+#define MGMT_REVISION 6
static const u16 mgmt_commands[] = {
MGMT_OP_READ_INDEX_LIST,
@@ -79,6 +80,11 @@ static const u16 mgmt_commands[] = {
MGMT_OP_SET_BREDR,
MGMT_OP_SET_STATIC_ADDRESS,
MGMT_OP_SET_SCAN_PARAMS,
+ MGMT_OP_SET_SECURE_CONN,
+ MGMT_OP_SET_DEBUG_KEYS,
+ MGMT_OP_SET_PRIVACY,
+ MGMT_OP_LOAD_IRKS,
+ MGMT_OP_GET_CONN_INFO,
};
static const u16 mgmt_events[] = {
@@ -103,6 +109,8 @@ static const u16 mgmt_events[] = {
MGMT_EV_DEVICE_UNBLOCKED,
MGMT_EV_DEVICE_UNPAIRED,
MGMT_EV_PASSKEY_NOTIFY,
+ MGMT_EV_NEW_IRK,
+ MGMT_EV_NEW_CSRK,
};
#define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000)
@@ -127,7 +135,7 @@ static u8 mgmt_status_table[] = {
MGMT_STATUS_FAILED, /* Hardware Failure */
MGMT_STATUS_CONNECT_FAILED, /* Page Timeout */
MGMT_STATUS_AUTH_FAILED, /* Authentication Failed */
- MGMT_STATUS_NOT_PAIRED, /* PIN or Key Missing */
+ MGMT_STATUS_AUTH_FAILED, /* PIN or Key Missing */
MGMT_STATUS_NO_RESOURCES, /* Memory Full */
MGMT_STATUS_TIMEOUT, /* Connection Timeout */
MGMT_STATUS_NO_RESOURCES, /* Max Number of Connections */
@@ -207,7 +215,7 @@ static int cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status)
hdr = (void *) skb_put(skb, sizeof(*hdr));
- hdr->opcode = __constant_cpu_to_le16(MGMT_EV_CMD_STATUS);
+ hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS);
hdr->index = cpu_to_le16(index);
hdr->len = cpu_to_le16(sizeof(*ev));
@@ -238,7 +246,7 @@ static int cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status,
hdr = (void *) skb_put(skb, sizeof(*hdr));
- hdr->opcode = __constant_cpu_to_le16(MGMT_EV_CMD_COMPLETE);
+ hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE);
hdr->index = cpu_to_le16(index);
hdr->len = cpu_to_le16(sizeof(*ev) + rp_len);
@@ -264,7 +272,7 @@ static int read_version(struct sock *sk, struct hci_dev *hdev, void *data,
BT_DBG("sock %p", sk);
rp.version = MGMT_VERSION;
- rp.revision = __constant_cpu_to_le16(MGMT_REVISION);
+ rp.revision = cpu_to_le16(MGMT_REVISION);
return cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_VERSION, 0, &rp,
sizeof(rp));
@@ -288,8 +296,8 @@ static int read_commands(struct sock *sk, struct hci_dev *hdev, void *data,
if (!rp)
return -ENOMEM;
- rp->num_commands = __constant_cpu_to_le16(num_commands);
- rp->num_events = __constant_cpu_to_le16(num_events);
+ rp->num_commands = cpu_to_le16(num_commands);
+ rp->num_events = cpu_to_le16(num_events);
for (i = 0, opcode = rp->opcodes; i < num_commands; i++, opcode++)
put_unaligned_le16(mgmt_commands[i], opcode);
@@ -363,6 +371,7 @@ static u32 get_supported_settings(struct hci_dev *hdev)
settings |= MGMT_SETTING_POWERED;
settings |= MGMT_SETTING_PAIRABLE;
+ settings |= MGMT_SETTING_DEBUG_KEYS;
if (lmp_bredr_capable(hdev)) {
settings |= MGMT_SETTING_CONNECTABLE;
@@ -376,11 +385,16 @@ static u32 get_supported_settings(struct hci_dev *hdev)
settings |= MGMT_SETTING_SSP;
settings |= MGMT_SETTING_HS;
}
+
+ if (lmp_sc_capable(hdev) ||
+ test_bit(HCI_FORCE_SC, &hdev->dev_flags))
+ settings |= MGMT_SETTING_SECURE_CONN;
}
if (lmp_le_capable(hdev)) {
settings |= MGMT_SETTING_LE;
settings |= MGMT_SETTING_ADVERTISING;
+ settings |= MGMT_SETTING_PRIVACY;
}
return settings;
@@ -423,6 +437,15 @@ static u32 get_current_settings(struct hci_dev *hdev)
if (test_bit(HCI_ADVERTISING, &hdev->dev_flags))
settings |= MGMT_SETTING_ADVERTISING;
+ if (test_bit(HCI_SC_ENABLED, &hdev->dev_flags))
+ settings |= MGMT_SETTING_SECURE_CONN;
+
+ if (test_bit(HCI_DEBUG_KEYS, &hdev->dev_flags))
+ settings |= MGMT_SETTING_DEBUG_KEYS;
+
+ if (test_bit(HCI_PRIVACY, &hdev->dev_flags))
+ settings |= MGMT_SETTING_PRIVACY;
+
return settings;
}
@@ -629,14 +652,8 @@ static u8 create_adv_data(struct hci_dev *hdev, u8 *ptr)
flags |= get_adv_discov_flags(hdev);
- if (test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) {
- if (lmp_le_br_capable(hdev))
- flags |= LE_AD_SIM_LE_BREDR_CTRL;
- if (lmp_host_le_br_capable(hdev))
- flags |= LE_AD_SIM_LE_BREDR_HOST;
- } else {
+ if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags))
flags |= LE_AD_NO_BREDR;
- }
if (flags) {
BT_DBG("adv flags 0x%02x", flags);
@@ -803,6 +820,64 @@ static void update_class(struct hci_request *req)
hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod);
}
+static bool get_connectable(struct hci_dev *hdev)
+{
+ struct pending_cmd *cmd;
+
+ /* If there's a pending mgmt command the flag will not yet have
+ * it's final value, so check for this first.
+ */
+ cmd = mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev);
+ if (cmd) {
+ struct mgmt_mode *cp = cmd->param;
+ return cp->val;
+ }
+
+ return test_bit(HCI_CONNECTABLE, &hdev->dev_flags);
+}
+
+static void enable_advertising(struct hci_request *req)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_cp_le_set_adv_param cp;
+ u8 own_addr_type, enable = 0x01;
+ bool connectable;
+
+ /* Clear the HCI_ADVERTISING bit temporarily so that the
+ * hci_update_random_address knows that it's safe to go ahead
+ * and write a new random address. The flag will be set back on
+ * as soon as the SET_ADV_ENABLE HCI command completes.
+ */
+ clear_bit(HCI_ADVERTISING, &hdev->dev_flags);
+
+ connectable = get_connectable(hdev);
+
+ /* Set require_privacy to true only when non-connectable
+ * advertising is used. In that case it is fine to use a
+ * non-resolvable private address.
+ */
+ if (hci_update_random_address(req, !connectable, &own_addr_type) < 0)
+ return;
+
+ memset(&cp, 0, sizeof(cp));
+ cp.min_interval = cpu_to_le16(0x0800);
+ cp.max_interval = cpu_to_le16(0x0800);
+ cp.type = connectable ? LE_ADV_IND : LE_ADV_NONCONN_IND;
+ cp.own_address_type = own_addr_type;
+ cp.channel_map = hdev->le_adv_channel_map;
+
+ hci_req_add(req, HCI_OP_LE_SET_ADV_PARAM, sizeof(cp), &cp);
+
+ hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
+}
+
+static void disable_advertising(struct hci_request *req)
+{
+ u8 enable = 0x00;
+
+ hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
+}
+
static void service_cache_off(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev,
@@ -824,12 +899,39 @@ static void service_cache_off(struct work_struct *work)
hci_req_run(&req, NULL);
}
+static void rpa_expired(struct work_struct *work)
+{
+ struct hci_dev *hdev = container_of(work, struct hci_dev,
+ rpa_expired.work);
+ struct hci_request req;
+
+ BT_DBG("");
+
+ set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags);
+
+ if (!test_bit(HCI_ADVERTISING, &hdev->dev_flags) ||
+ hci_conn_num(hdev, LE_LINK) > 0)
+ return;
+
+ /* The generation of a new RPA and programming it into the
+ * controller happens in the enable_advertising() function.
+ */
+
+ hci_req_init(&req, hdev);
+
+ disable_advertising(&req);
+ enable_advertising(&req);
+
+ hci_req_run(&req, NULL);
+}
+
static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev)
{
if (test_and_set_bit(HCI_MGMT, &hdev->dev_flags))
return;
INIT_DELAYED_WORK(&hdev->service_cache, service_cache_off);
+ INIT_DELAYED_WORK(&hdev->rpa_expired, rpa_expired);
/* Non-mgmt controlled devices get this bit set
* implicitly so that pairing works for them, however
@@ -935,6 +1037,106 @@ static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev)
sizeof(settings));
}
+static void clean_up_hci_complete(struct hci_dev *hdev, u8 status)
+{
+ BT_DBG("%s status 0x%02x", hdev->name, status);
+
+ if (hci_conn_count(hdev) == 0) {
+ cancel_delayed_work(&hdev->power_off);
+ queue_work(hdev->req_workqueue, &hdev->power_off.work);
+ }
+}
+
+static void hci_stop_discovery(struct hci_request *req)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_cp_remote_name_req_cancel cp;
+ struct inquiry_entry *e;
+
+ switch (hdev->discovery.state) {
+ case DISCOVERY_FINDING:
+ if (test_bit(HCI_INQUIRY, &hdev->flags)) {
+ hci_req_add(req, HCI_OP_INQUIRY_CANCEL, 0, NULL);
+ } else {
+ cancel_delayed_work(&hdev->le_scan_disable);
+ hci_req_add_le_scan_disable(req);
+ }
+
+ break;
+
+ case DISCOVERY_RESOLVING:
+ e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY,
+ NAME_PENDING);
+ if (!e)
+ return;
+
+ bacpy(&cp.bdaddr, &e->data.bdaddr);
+ hci_req_add(req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp),
+ &cp);
+
+ break;
+
+ default:
+ /* Passive scanning */
+ if (test_bit(HCI_LE_SCAN, &hdev->dev_flags))
+ hci_req_add_le_scan_disable(req);
+ break;
+ }
+}
+
+static int clean_up_hci_state(struct hci_dev *hdev)
+{
+ struct hci_request req;
+ struct hci_conn *conn;
+
+ hci_req_init(&req, hdev);
+
+ if (test_bit(HCI_ISCAN, &hdev->flags) ||
+ test_bit(HCI_PSCAN, &hdev->flags)) {
+ u8 scan = 0x00;
+ hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+ }
+
+ if (test_bit(HCI_ADVERTISING, &hdev->dev_flags))
+ disable_advertising(&req);
+
+ hci_stop_discovery(&req);
+
+ list_for_each_entry(conn, &hdev->conn_hash.list, list) {
+ struct hci_cp_disconnect dc;
+ struct hci_cp_reject_conn_req rej;
+
+ switch (conn->state) {
+ case BT_CONNECTED:
+ case BT_CONFIG:
+ dc.handle = cpu_to_le16(conn->handle);
+ dc.reason = 0x15; /* Terminated due to Power Off */
+ hci_req_add(&req, HCI_OP_DISCONNECT, sizeof(dc), &dc);
+ break;
+ case BT_CONNECT:
+ if (conn->type == LE_LINK)
+ hci_req_add(&req, HCI_OP_LE_CREATE_CONN_CANCEL,
+ 0, NULL);
+ else if (conn->type == ACL_LINK)
+ hci_req_add(&req, HCI_OP_CREATE_CONN_CANCEL,
+ 6, &conn->dst);
+ break;
+ case BT_CONNECT2:
+ bacpy(&rej.bdaddr, &conn->dst);
+ rej.reason = 0x15; /* Terminated due to Power Off */
+ if (conn->type == ACL_LINK)
+ hci_req_add(&req, HCI_OP_REJECT_CONN_REQ,
+ sizeof(rej), &rej);
+ else if (conn->type == SCO_LINK)
+ hci_req_add(&req, HCI_OP_REJECT_SYNC_CONN_REQ,
+ sizeof(rej), &rej);
+ break;
+ }
+ }
+
+ return hci_req_run(&req, clean_up_hci_complete);
+}
+
static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data,
u16 len)
{
@@ -978,12 +1180,23 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data,
goto failed;
}
- if (cp->val)
+ if (cp->val) {
queue_work(hdev->req_workqueue, &hdev->power_on);
- else
- queue_work(hdev->req_workqueue, &hdev->power_off.work);
-
- err = 0;
+ err = 0;
+ } else {
+ /* Disconnect connections, stop scans, etc */
+ err = clean_up_hci_state(hdev);
+ if (!err)
+ queue_delayed_work(hdev->req_workqueue, &hdev->power_off,
+ HCI_POWER_OFF_TIMEOUT);
+
+ /* ENODATA means there were no HCI commands queued */
+ if (err == -ENODATA) {
+ cancel_delayed_work(&hdev->power_off);
+ queue_work(hdev->req_workqueue, &hdev->power_off.work);
+ err = 0;
+ }
+ }
failed:
hci_dev_unlock(hdev);
@@ -1005,7 +1218,7 @@ static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 data_len,
if (hdev)
hdr->index = cpu_to_le16(hdev->id);
else
- hdr->index = __constant_cpu_to_le16(MGMT_INDEX_NONE);
+ hdr->index = cpu_to_le16(MGMT_INDEX_NONE);
hdr->len = cpu_to_le16(data_len);
if (data)
@@ -1317,15 +1530,15 @@ static void write_fast_connectable(struct hci_request *req, bool enable)
type = PAGE_SCAN_TYPE_INTERLACED;
/* 160 msec page scan interval */
- acp.interval = __constant_cpu_to_le16(0x0100);
+ acp.interval = cpu_to_le16(0x0100);
} else {
type = PAGE_SCAN_TYPE_STANDARD; /* default */
/* default 1.28 sec page scan */
- acp.interval = __constant_cpu_to_le16(0x0800);
+ acp.interval = cpu_to_le16(0x0800);
}
- acp.window = __constant_cpu_to_le16(0x0012);
+ acp.window = cpu_to_le16(0x0012);
if (__cpu_to_le16(hdev->page_scan_interval) != acp.interval ||
__cpu_to_le16(hdev->page_scan_window) != acp.window)
@@ -1336,50 +1549,6 @@ static void write_fast_connectable(struct hci_request *req, bool enable)
hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type);
}
-static u8 get_adv_type(struct hci_dev *hdev)
-{
- struct pending_cmd *cmd;
- bool connectable;
-
- /* If there's a pending mgmt command the flag will not yet have
- * it's final value, so check for this first.
- */
- cmd = mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev);
- if (cmd) {
- struct mgmt_mode *cp = cmd->param;
- connectable = !!cp->val;
- } else {
- connectable = test_bit(HCI_CONNECTABLE, &hdev->dev_flags);
- }
-
- return connectable ? LE_ADV_IND : LE_ADV_NONCONN_IND;
-}
-
-static void enable_advertising(struct hci_request *req)
-{
- struct hci_dev *hdev = req->hdev;
- struct hci_cp_le_set_adv_param cp;
- u8 enable = 0x01;
-
- memset(&cp, 0, sizeof(cp));
- cp.min_interval = __constant_cpu_to_le16(0x0800);
- cp.max_interval = __constant_cpu_to_le16(0x0800);
- cp.type = get_adv_type(hdev);
- cp.own_address_type = hdev->own_addr_type;
- cp.channel_map = 0x07;
-
- hci_req_add(req, HCI_OP_LE_SET_ADV_PARAM, sizeof(cp), &cp);
-
- hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
-}
-
-static void disable_advertising(struct hci_request *req)
-{
- u8 enable = 0x00;
-
- hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
-}
-
static void set_connectable_complete(struct hci_dev *hdev, u8 status)
{
struct pending_cmd *cmd;
@@ -2065,7 +2234,7 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,
}
if (memcmp(cp->uuid, bt_uuid_any, 16) == 0) {
- err = hci_uuids_clear(hdev);
+ hci_uuids_clear(hdev);
if (enable_service_cache(hdev)) {
err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_UUID,
@@ -2205,6 +2374,7 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
{
struct mgmt_cp_load_link_keys *cp = data;
u16 key_count, expected_len;
+ bool changed;
int i;
BT_DBG("request for %s", hdev->name);
@@ -2219,7 +2389,7 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
sizeof(struct mgmt_link_key_info);
if (expected_len != len) {
BT_ERR("load_link_keys: expected %u bytes, got %u bytes",
- len, expected_len);
+ expected_len, len);
return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS,
MGMT_STATUS_INVALID_PARAMS);
}
@@ -2234,7 +2404,7 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
for (i = 0; i < key_count; i++) {
struct mgmt_link_key_info *key = &cp->keys[i];
- if (key->addr.type != BDADDR_BREDR)
+ if (key->addr.type != BDADDR_BREDR || key->type > 0x08)
return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS,
MGMT_STATUS_INVALID_PARAMS);
}
@@ -2244,9 +2414,12 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
hci_link_keys_clear(hdev);
if (cp->debug_keys)
- set_bit(HCI_DEBUG_KEYS, &hdev->dev_flags);
+ changed = !test_and_set_bit(HCI_DEBUG_KEYS, &hdev->dev_flags);
else
- clear_bit(HCI_DEBUG_KEYS, &hdev->dev_flags);
+ changed = test_and_clear_bit(HCI_DEBUG_KEYS, &hdev->dev_flags);
+
+ if (changed)
+ new_settings(hdev, NULL);
for (i = 0; i < key_count; i++) {
struct mgmt_link_key_info *key = &cp->keys[i];
@@ -2306,10 +2479,22 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
goto unlock;
}
- if (cp->addr.type == BDADDR_BREDR)
+ if (cp->addr.type == BDADDR_BREDR) {
err = hci_remove_link_key(hdev, &cp->addr.bdaddr);
- else
- err = hci_remove_ltk(hdev, &cp->addr.bdaddr);
+ } else {
+ u8 addr_type;
+
+ if (cp->addr.type == BDADDR_LE_PUBLIC)
+ addr_type = ADDR_LE_DEV_PUBLIC;
+ else
+ addr_type = ADDR_LE_DEV_RANDOM;
+
+ hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type);
+
+ hci_conn_params_del(hdev, &cp->addr.bdaddr, addr_type);
+
+ err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type);
+ }
if (err < 0) {
err = cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE,
@@ -2633,6 +2818,16 @@ static void pairing_complete(struct pending_cmd *cmd, u8 status)
mgmt_pending_remove(cmd);
}
+void mgmt_smp_complete(struct hci_conn *conn, bool complete)
+{
+ u8 status = complete ? MGMT_STATUS_SUCCESS : MGMT_STATUS_FAILED;
+ struct pending_cmd *cmd;
+
+ cmd = find_pairing(conn);
+ if (cmd)
+ pairing_complete(cmd, status);
+}
+
static void pairing_complete_cb(struct hci_conn *conn, u8 status)
{
struct pending_cmd *cmd;
@@ -2646,7 +2841,7 @@ static void pairing_complete_cb(struct hci_conn *conn, u8 status)
pairing_complete(cmd, mgmt_status(status));
}
-static void le_connect_complete_cb(struct hci_conn *conn, u8 status)
+static void le_pairing_complete_cb(struct hci_conn *conn, u8 status)
{
struct pending_cmd *cmd;
@@ -2692,17 +2887,24 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
}
sec_level = BT_SECURITY_MEDIUM;
- if (cp->io_cap == 0x03)
- auth_type = HCI_AT_DEDICATED_BONDING;
- else
- auth_type = HCI_AT_DEDICATED_BONDING_MITM;
+ auth_type = HCI_AT_DEDICATED_BONDING;
- if (cp->addr.type == BDADDR_BREDR)
- conn = hci_connect(hdev, ACL_LINK, &cp->addr.bdaddr,
- cp->addr.type, sec_level, auth_type);
- else
- conn = hci_connect(hdev, LE_LINK, &cp->addr.bdaddr,
- cp->addr.type, sec_level, auth_type);
+ if (cp->addr.type == BDADDR_BREDR) {
+ conn = hci_connect_acl(hdev, &cp->addr.bdaddr, sec_level,
+ auth_type);
+ } else {
+ u8 addr_type;
+
+ /* Convert from L2CAP channel address type to HCI address type
+ */
+ if (cp->addr.type == BDADDR_LE_PUBLIC)
+ addr_type = ADDR_LE_DEV_PUBLIC;
+ else
+ addr_type = ADDR_LE_DEV_RANDOM;
+
+ conn = hci_connect_le(hdev, &cp->addr.bdaddr, addr_type,
+ sec_level, auth_type);
+ }
if (IS_ERR(conn)) {
int status;
@@ -2733,13 +2935,16 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
}
/* For LE, just connecting isn't a proof that the pairing finished */
- if (cp->addr.type == BDADDR_BREDR)
+ if (cp->addr.type == BDADDR_BREDR) {
conn->connect_cfm_cb = pairing_complete_cb;
- else
- conn->connect_cfm_cb = le_connect_complete_cb;
+ conn->security_cfm_cb = pairing_complete_cb;
+ conn->disconn_cfm_cb = pairing_complete_cb;
+ } else {
+ conn->connect_cfm_cb = le_pairing_complete_cb;
+ conn->security_cfm_cb = le_pairing_complete_cb;
+ conn->disconn_cfm_cb = le_pairing_complete_cb;
+ }
- conn->security_cfm_cb = pairing_complete_cb;
- conn->disconn_cfm_cb = pairing_complete_cb;
conn->io_capability = cp->io_cap;
cmd->user_data = conn;
@@ -2826,8 +3031,13 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
}
if (addr->type == BDADDR_LE_PUBLIC || addr->type == BDADDR_LE_RANDOM) {
- /* Continue with pairing via SMP */
+ /* Continue with pairing via SMP. The hdev lock must be
+ * released as SMP may try to recquire it for crypto
+ * purposes.
+ */
+ hci_dev_unlock(hdev);
err = smp_user_confirm_reply(conn, mgmt_op, passkey);
+ hci_dev_lock(hdev);
if (!err)
err = cmd_complete(sk, hdev->id, mgmt_op,
@@ -3071,7 +3281,12 @@ static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- err = hci_send_cmd(hdev, HCI_OP_READ_LOCAL_OOB_DATA, 0, NULL);
+ if (test_bit(HCI_SC_ENABLED, &hdev->dev_flags))
+ err = hci_send_cmd(hdev, HCI_OP_READ_LOCAL_OOB_EXT_DATA,
+ 0, NULL);
+ else
+ err = hci_send_cmd(hdev, HCI_OP_READ_LOCAL_OOB_DATA, 0, NULL);
+
if (err < 0)
mgmt_pending_remove(cmd);
@@ -3083,23 +3298,46 @@ unlock:
static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev,
void *data, u16 len)
{
- struct mgmt_cp_add_remote_oob_data *cp = data;
- u8 status;
int err;
BT_DBG("%s ", hdev->name);
hci_dev_lock(hdev);
- err = hci_add_remote_oob_data(hdev, &cp->addr.bdaddr, cp->hash,
- cp->randomizer);
- if (err < 0)
- status = MGMT_STATUS_FAILED;
- else
- status = MGMT_STATUS_SUCCESS;
+ if (len == MGMT_ADD_REMOTE_OOB_DATA_SIZE) {
+ struct mgmt_cp_add_remote_oob_data *cp = data;
+ u8 status;
- err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, status,
- &cp->addr, sizeof(cp->addr));
+ err = hci_add_remote_oob_data(hdev, &cp->addr.bdaddr,
+ cp->hash, cp->randomizer);
+ if (err < 0)
+ status = MGMT_STATUS_FAILED;
+ else
+ status = MGMT_STATUS_SUCCESS;
+
+ err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA,
+ status, &cp->addr, sizeof(cp->addr));
+ } else if (len == MGMT_ADD_REMOTE_OOB_EXT_DATA_SIZE) {
+ struct mgmt_cp_add_remote_oob_ext_data *cp = data;
+ u8 status;
+
+ err = hci_add_remote_oob_ext_data(hdev, &cp->addr.bdaddr,
+ cp->hash192,
+ cp->randomizer192,
+ cp->hash256,
+ cp->randomizer256);
+ if (err < 0)
+ status = MGMT_STATUS_FAILED;
+ else
+ status = MGMT_STATUS_SUCCESS;
+
+ err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA,
+ status, &cp->addr, sizeof(cp->addr));
+ } else {
+ BT_ERR("add_remote_oob_data: invalid length of %u bytes", len);
+ err = cmd_status(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA,
+ MGMT_STATUS_INVALID_PARAMS);
+ }
hci_dev_unlock(hdev);
return err;
@@ -3152,6 +3390,8 @@ static int mgmt_start_discovery_failed(struct hci_dev *hdev, u8 status)
static void start_discovery_complete(struct hci_dev *hdev, u8 status)
{
+ unsigned long timeout = 0;
+
BT_DBG("status %d", status);
if (status) {
@@ -3167,13 +3407,11 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status)
switch (hdev->discovery.type) {
case DISCOV_TYPE_LE:
- queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable,
- DISCOV_LE_TIMEOUT);
+ timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT);
break;
case DISCOV_TYPE_INTERLEAVED:
- queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable,
- DISCOV_INTERLEAVED_TIMEOUT);
+ timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout);
break;
case DISCOV_TYPE_BREDR:
@@ -3182,6 +3420,11 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status)
default:
BT_ERR("Invalid discovery type %d", hdev->discovery.type);
}
+
+ if (!timeout)
+ return;
+
+ queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, timeout);
}
static int start_discovery(struct sock *sk, struct hci_dev *hdev,
@@ -3195,7 +3438,7 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev,
struct hci_request req;
/* General inquiry access code (GIAC) */
u8 lap[3] = { 0x33, 0x8b, 0x9e };
- u8 status;
+ u8 status, own_addr_type;
int err;
BT_DBG("%s", hdev->name);
@@ -3280,18 +3523,31 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev,
goto failed;
}
- if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) {
+ /* If controller is scanning, it means the background scanning
+ * is running. Thus, we should temporarily stop it in order to
+ * set the discovery scanning parameters.
+ */
+ if (test_bit(HCI_LE_SCAN, &hdev->dev_flags))
+ hci_req_add_le_scan_disable(&req);
+
+ memset(&param_cp, 0, sizeof(param_cp));
+
+ /* All active scans will be done with either a resolvable
+ * private address (when privacy feature has been enabled)
+ * or unresolvable private address.
+ */
+ err = hci_update_random_address(&req, true, &own_addr_type);
+ if (err < 0) {
err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY,
- MGMT_STATUS_BUSY);
+ MGMT_STATUS_FAILED);
mgmt_pending_remove(cmd);
goto failed;
}
- memset(&param_cp, 0, sizeof(param_cp));
param_cp.type = LE_SCAN_ACTIVE;
param_cp.interval = cpu_to_le16(DISCOV_LE_SCAN_INT);
param_cp.window = cpu_to_le16(DISCOV_LE_SCAN_WIN);
- param_cp.own_address_type = hdev->own_addr_type;
+ param_cp.own_address_type = own_addr_type;
hci_req_add(&req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp),
&param_cp);
@@ -3358,10 +3614,7 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data,
{
struct mgmt_cp_stop_discovery *mgmt_cp = data;
struct pending_cmd *cmd;
- struct hci_cp_remote_name_req_cancel cp;
- struct inquiry_entry *e;
struct hci_request req;
- struct hci_cp_le_set_scan_enable enable_cp;
int err;
BT_DBG("%s", hdev->name);
@@ -3390,55 +3643,22 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data,
hci_req_init(&req, hdev);
- switch (hdev->discovery.state) {
- case DISCOVERY_FINDING:
- if (test_bit(HCI_INQUIRY, &hdev->flags)) {
- hci_req_add(&req, HCI_OP_INQUIRY_CANCEL, 0, NULL);
- } else {
- cancel_delayed_work(&hdev->le_scan_disable);
-
- memset(&enable_cp, 0, sizeof(enable_cp));
- enable_cp.enable = LE_SCAN_DISABLE;
- hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE,
- sizeof(enable_cp), &enable_cp);
- }
+ hci_stop_discovery(&req);
- break;
-
- case DISCOVERY_RESOLVING:
- e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY,
- NAME_PENDING);
- if (!e) {
- mgmt_pending_remove(cmd);
- err = cmd_complete(sk, hdev->id,
- MGMT_OP_STOP_DISCOVERY, 0,
- &mgmt_cp->type,
- sizeof(mgmt_cp->type));
- hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
- goto unlock;
- }
-
- bacpy(&cp.bdaddr, &e->data.bdaddr);
- hci_req_add(&req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp),
- &cp);
-
- break;
-
- default:
- BT_DBG("unknown discovery state %u", hdev->discovery.state);
-
- mgmt_pending_remove(cmd);
- err = cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY,
- MGMT_STATUS_FAILED, &mgmt_cp->type,
- sizeof(mgmt_cp->type));
+ err = hci_req_run(&req, stop_discovery_complete);
+ if (!err) {
+ hci_discovery_set_state(hdev, DISCOVERY_STOPPING);
goto unlock;
}
- err = hci_req_run(&req, stop_discovery_complete);
- if (err < 0)
- mgmt_pending_remove(cmd);
- else
- hci_discovery_set_state(hdev, DISCOVERY_STOPPING);
+ mgmt_pending_remove(cmd);
+
+ /* If no HCI commands were sent we're done */
+ if (err == -ENODATA) {
+ err = cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, 0,
+ &mgmt_cp->type, sizeof(mgmt_cp->type));
+ hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
+ }
unlock:
hci_dev_unlock(hdev);
@@ -3457,15 +3677,17 @@ static int confirm_name(struct sock *sk, struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
if (!hci_discovery_active(hdev)) {
- err = cmd_status(sk, hdev->id, MGMT_OP_CONFIRM_NAME,
- MGMT_STATUS_FAILED);
+ err = cmd_complete(sk, hdev->id, MGMT_OP_CONFIRM_NAME,
+ MGMT_STATUS_FAILED, &cp->addr,
+ sizeof(cp->addr));
goto failed;
}
e = hci_inquiry_cache_lookup_unknown(hdev, &cp->addr.bdaddr);
if (!e) {
- err = cmd_status(sk, hdev->id, MGMT_OP_CONFIRM_NAME,
- MGMT_STATUS_INVALID_PARAMS);
+ err = cmd_complete(sk, hdev->id, MGMT_OP_CONFIRM_NAME,
+ MGMT_STATUS_INVALID_PARAMS, &cp->addr,
+ sizeof(cp->addr));
goto failed;
}
@@ -3754,6 +3976,21 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev,
err = cmd_complete(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, 0, NULL, 0);
+ /* If background scan is running, restart it so new parameters are
+ * loaded.
+ */
+ if (test_bit(HCI_LE_SCAN, &hdev->dev_flags) &&
+ hdev->discovery.state == DISCOVERY_STOPPED) {
+ struct hci_request req;
+
+ hci_req_init(&req, hdev);
+
+ hci_req_add_le_scan_disable(&req);
+ hci_req_add_le_passive_scan(&req);
+
+ hci_req_run(&req, NULL);
+ }
+
hci_dev_unlock(hdev);
return err;
@@ -3999,15 +4236,269 @@ unlock:
return err;
}
+static int set_secure_conn(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 len)
+{
+ struct mgmt_mode *cp = data;
+ struct pending_cmd *cmd;
+ u8 val, status;
+ int err;
+
+ BT_DBG("request for %s", hdev->name);
+
+ status = mgmt_bredr_support(hdev);
+ if (status)
+ return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN,
+ status);
+
+ if (!lmp_sc_capable(hdev) &&
+ !test_bit(HCI_FORCE_SC, &hdev->dev_flags))
+ return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN,
+ MGMT_STATUS_NOT_SUPPORTED);
+
+ if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02)
+ return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ hci_dev_lock(hdev);
+
+ if (!hdev_is_powered(hdev)) {
+ bool changed;
+
+ if (cp->val) {
+ changed = !test_and_set_bit(HCI_SC_ENABLED,
+ &hdev->dev_flags);
+ if (cp->val == 0x02)
+ set_bit(HCI_SC_ONLY, &hdev->dev_flags);
+ else
+ clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
+ } else {
+ changed = test_and_clear_bit(HCI_SC_ENABLED,
+ &hdev->dev_flags);
+ clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
+ }
+
+ err = send_settings_rsp(sk, MGMT_OP_SET_SECURE_CONN, hdev);
+ if (err < 0)
+ goto failed;
+
+ if (changed)
+ err = new_settings(hdev, sk);
+
+ goto failed;
+ }
+
+ if (mgmt_pending_find(MGMT_OP_SET_SECURE_CONN, hdev)) {
+ err = cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN,
+ MGMT_STATUS_BUSY);
+ goto failed;
+ }
+
+ val = !!cp->val;
+
+ if (val == test_bit(HCI_SC_ENABLED, &hdev->dev_flags) &&
+ (cp->val == 0x02) == test_bit(HCI_SC_ONLY, &hdev->dev_flags)) {
+ err = send_settings_rsp(sk, MGMT_OP_SET_SECURE_CONN, hdev);
+ goto failed;
+ }
+
+ cmd = mgmt_pending_add(sk, MGMT_OP_SET_SECURE_CONN, hdev, data, len);
+ if (!cmd) {
+ err = -ENOMEM;
+ goto failed;
+ }
+
+ err = hci_send_cmd(hdev, HCI_OP_WRITE_SC_SUPPORT, 1, &val);
+ if (err < 0) {
+ mgmt_pending_remove(cmd);
+ goto failed;
+ }
+
+ if (cp->val == 0x02)
+ set_bit(HCI_SC_ONLY, &hdev->dev_flags);
+ else
+ clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
+
+failed:
+ hci_dev_unlock(hdev);
+ return err;
+}
+
+static int set_debug_keys(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 len)
+{
+ struct mgmt_mode *cp = data;
+ bool changed;
+ int err;
+
+ BT_DBG("request for %s", hdev->name);
+
+ if (cp->val != 0x00 && cp->val != 0x01)
+ return cmd_status(sk, hdev->id, MGMT_OP_SET_DEBUG_KEYS,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ hci_dev_lock(hdev);
+
+ if (cp->val)
+ changed = !test_and_set_bit(HCI_DEBUG_KEYS, &hdev->dev_flags);
+ else
+ changed = test_and_clear_bit(HCI_DEBUG_KEYS, &hdev->dev_flags);
+
+ err = send_settings_rsp(sk, MGMT_OP_SET_DEBUG_KEYS, hdev);
+ if (err < 0)
+ goto unlock;
+
+ if (changed)
+ err = new_settings(hdev, sk);
+
+unlock:
+ hci_dev_unlock(hdev);
+ return err;
+}
+
+static int set_privacy(struct sock *sk, struct hci_dev *hdev, void *cp_data,
+ u16 len)
+{
+ struct mgmt_cp_set_privacy *cp = cp_data;
+ bool changed;
+ int err;
+
+ BT_DBG("request for %s", hdev->name);
+
+ if (!lmp_le_capable(hdev))
+ return cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY,
+ MGMT_STATUS_NOT_SUPPORTED);
+
+ if (cp->privacy != 0x00 && cp->privacy != 0x01)
+ return cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ if (hdev_is_powered(hdev))
+ return cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY,
+ MGMT_STATUS_REJECTED);
+
+ hci_dev_lock(hdev);
+
+ /* If user space supports this command it is also expected to
+ * handle IRKs. Therefore, set the HCI_RPA_RESOLVING flag.
+ */
+ set_bit(HCI_RPA_RESOLVING, &hdev->dev_flags);
+
+ if (cp->privacy) {
+ changed = !test_and_set_bit(HCI_PRIVACY, &hdev->dev_flags);
+ memcpy(hdev->irk, cp->irk, sizeof(hdev->irk));
+ set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags);
+ } else {
+ changed = test_and_clear_bit(HCI_PRIVACY, &hdev->dev_flags);
+ memset(hdev->irk, 0, sizeof(hdev->irk));
+ clear_bit(HCI_RPA_EXPIRED, &hdev->dev_flags);
+ }
+
+ err = send_settings_rsp(sk, MGMT_OP_SET_PRIVACY, hdev);
+ if (err < 0)
+ goto unlock;
+
+ if (changed)
+ err = new_settings(hdev, sk);
+
+unlock:
+ hci_dev_unlock(hdev);
+ return err;
+}
+
+static bool irk_is_valid(struct mgmt_irk_info *irk)
+{
+ switch (irk->addr.type) {
+ case BDADDR_LE_PUBLIC:
+ return true;
+
+ case BDADDR_LE_RANDOM:
+ /* Two most significant bits shall be set */
+ if ((irk->addr.bdaddr.b[5] & 0xc0) != 0xc0)
+ return false;
+ return true;
+ }
+
+ return false;
+}
+
+static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
+ u16 len)
+{
+ struct mgmt_cp_load_irks *cp = cp_data;
+ u16 irk_count, expected_len;
+ int i, err;
+
+ BT_DBG("request for %s", hdev->name);
+
+ if (!lmp_le_capable(hdev))
+ return cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS,
+ MGMT_STATUS_NOT_SUPPORTED);
+
+ irk_count = __le16_to_cpu(cp->irk_count);
+
+ expected_len = sizeof(*cp) + irk_count * sizeof(struct mgmt_irk_info);
+ if (expected_len != len) {
+ BT_ERR("load_irks: expected %u bytes, got %u bytes",
+ expected_len, len);
+ return cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS,
+ MGMT_STATUS_INVALID_PARAMS);
+ }
+
+ BT_DBG("%s irk_count %u", hdev->name, irk_count);
+
+ for (i = 0; i < irk_count; i++) {
+ struct mgmt_irk_info *key = &cp->irks[i];
+
+ if (!irk_is_valid(key))
+ return cmd_status(sk, hdev->id,
+ MGMT_OP_LOAD_IRKS,
+ MGMT_STATUS_INVALID_PARAMS);
+ }
+
+ hci_dev_lock(hdev);
+
+ hci_smp_irks_clear(hdev);
+
+ for (i = 0; i < irk_count; i++) {
+ struct mgmt_irk_info *irk = &cp->irks[i];
+ u8 addr_type;
+
+ if (irk->addr.type == BDADDR_LE_PUBLIC)
+ addr_type = ADDR_LE_DEV_PUBLIC;
+ else
+ addr_type = ADDR_LE_DEV_RANDOM;
+
+ hci_add_irk(hdev, &irk->addr.bdaddr, addr_type, irk->val,
+ BDADDR_ANY);
+ }
+
+ set_bit(HCI_RPA_RESOLVING, &hdev->dev_flags);
+
+ err = cmd_complete(sk, hdev->id, MGMT_OP_LOAD_IRKS, 0, NULL, 0);
+
+ hci_dev_unlock(hdev);
+
+ return err;
+}
+
static bool ltk_is_valid(struct mgmt_ltk_info *key)
{
- if (key->authenticated != 0x00 && key->authenticated != 0x01)
- return false;
if (key->master != 0x00 && key->master != 0x01)
return false;
- if (!bdaddr_type_is_le(key->addr.type))
- return false;
- return true;
+
+ switch (key->addr.type) {
+ case BDADDR_LE_PUBLIC:
+ return true;
+
+ case BDADDR_LE_RANDOM:
+ /* Two most significant bits shall be set */
+ if ((key->addr.bdaddr.b[5] & 0xc0) != 0xc0)
+ return false;
+ return true;
+ }
+
+ return false;
}
static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
@@ -4029,7 +4520,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
sizeof(struct mgmt_ltk_info);
if (expected_len != len) {
BT_ERR("load_keys: expected %u bytes, got %u bytes",
- len, expected_len);
+ expected_len, len);
return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS,
MGMT_STATUS_INVALID_PARAMS);
}
@@ -4051,7 +4542,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
for (i = 0; i < key_count; i++) {
struct mgmt_ltk_info *key = &cp->keys[i];
- u8 type, addr_type;
+ u8 type, addr_type, authenticated;
if (key->addr.type == BDADDR_LE_PUBLIC)
addr_type = ADDR_LE_DEV_PUBLIC;
@@ -4063,9 +4554,20 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
else
type = HCI_SMP_LTK_SLAVE;
- hci_add_ltk(hdev, &key->addr.bdaddr, addr_type,
- type, 0, key->authenticated, key->val,
- key->enc_size, key->ediv, key->rand);
+ switch (key->type) {
+ case MGMT_LTK_UNAUTHENTICATED:
+ authenticated = 0x00;
+ break;
+ case MGMT_LTK_AUTHENTICATED:
+ authenticated = 0x01;
+ break;
+ default:
+ continue;
+ }
+
+ hci_add_ltk(hdev, &key->addr.bdaddr, addr_type, type,
+ authenticated, key->val, key->enc_size, key->ediv,
+ key->rand);
}
err = cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, 0,
@@ -4076,6 +4578,218 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
return err;
}
+struct cmd_conn_lookup {
+ struct hci_conn *conn;
+ bool valid_tx_power;
+ u8 mgmt_status;
+};
+
+static void get_conn_info_complete(struct pending_cmd *cmd, void *data)
+{
+ struct cmd_conn_lookup *match = data;
+ struct mgmt_cp_get_conn_info *cp;
+ struct mgmt_rp_get_conn_info rp;
+ struct hci_conn *conn = cmd->user_data;
+
+ if (conn != match->conn)
+ return;
+
+ cp = (struct mgmt_cp_get_conn_info *) cmd->param;
+
+ memset(&rp, 0, sizeof(rp));
+ bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr);
+ rp.addr.type = cp->addr.type;
+
+ if (!match->mgmt_status) {
+ rp.rssi = conn->rssi;
+
+ if (match->valid_tx_power) {
+ rp.tx_power = conn->tx_power;
+ rp.max_tx_power = conn->max_tx_power;
+ } else {
+ rp.tx_power = HCI_TX_POWER_INVALID;
+ rp.max_tx_power = HCI_TX_POWER_INVALID;
+ }
+ }
+
+ cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO,
+ match->mgmt_status, &rp, sizeof(rp));
+
+ hci_conn_drop(conn);
+
+ mgmt_pending_remove(cmd);
+}
+
+static void conn_info_refresh_complete(struct hci_dev *hdev, u8 status)
+{
+ struct hci_cp_read_rssi *cp;
+ struct hci_conn *conn;
+ struct cmd_conn_lookup match;
+ u16 handle;
+
+ BT_DBG("status 0x%02x", status);
+
+ hci_dev_lock(hdev);
+
+ /* TX power data is valid in case request completed successfully,
+ * otherwise we assume it's not valid. At the moment we assume that
+ * either both or none of current and max values are valid to keep code
+ * simple.
+ */
+ match.valid_tx_power = !status;
+
+ /* Commands sent in request are either Read RSSI or Read Transmit Power
+ * Level so we check which one was last sent to retrieve connection
+ * handle. Both commands have handle as first parameter so it's safe to
+ * cast data on the same command struct.
+ *
+ * First command sent is always Read RSSI and we fail only if it fails.
+ * In other case we simply override error to indicate success as we
+ * already remembered if TX power value is actually valid.
+ */
+ cp = hci_sent_cmd_data(hdev, HCI_OP_READ_RSSI);
+ if (!cp) {
+ cp = hci_sent_cmd_data(hdev, HCI_OP_READ_TX_POWER);
+ status = 0;
+ }
+
+ if (!cp) {
+ BT_ERR("invalid sent_cmd in response");
+ goto unlock;
+ }
+
+ handle = __le16_to_cpu(cp->handle);
+ conn = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!conn) {
+ BT_ERR("unknown handle (%d) in response", handle);
+ goto unlock;
+ }
+
+ match.conn = conn;
+ match.mgmt_status = mgmt_status(status);
+
+ /* Cache refresh is complete, now reply for mgmt request for given
+ * connection only.
+ */
+ mgmt_pending_foreach(MGMT_OP_GET_CONN_INFO, hdev,
+ get_conn_info_complete, &match);
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
+static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data,
+ u16 len)
+{
+ struct mgmt_cp_get_conn_info *cp = data;
+ struct mgmt_rp_get_conn_info rp;
+ struct hci_conn *conn;
+ unsigned long conn_info_age;
+ int err = 0;
+
+ BT_DBG("%s", hdev->name);
+
+ memset(&rp, 0, sizeof(rp));
+ bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr);
+ rp.addr.type = cp->addr.type;
+
+ if (!bdaddr_type_is_valid(cp->addr.type))
+ return cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO,
+ MGMT_STATUS_INVALID_PARAMS,
+ &rp, sizeof(rp));
+
+ hci_dev_lock(hdev);
+
+ if (!hdev_is_powered(hdev)) {
+ err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO,
+ MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp));
+ goto unlock;
+ }
+
+ if (cp->addr.type == BDADDR_BREDR)
+ conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK,
+ &cp->addr.bdaddr);
+ else
+ conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr);
+
+ if (!conn || conn->state != BT_CONNECTED) {
+ err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO,
+ MGMT_STATUS_NOT_CONNECTED, &rp, sizeof(rp));
+ goto unlock;
+ }
+
+ /* To avoid client trying to guess when to poll again for information we
+ * calculate conn info age as random value between min/max set in hdev.
+ */
+ conn_info_age = hdev->conn_info_min_age +
+ prandom_u32_max(hdev->conn_info_max_age -
+ hdev->conn_info_min_age);
+
+ /* Query controller to refresh cached values if they are too old or were
+ * never read.
+ */
+ if (time_after(jiffies, conn->conn_info_timestamp +
+ msecs_to_jiffies(conn_info_age)) ||
+ !conn->conn_info_timestamp) {
+ struct hci_request req;
+ struct hci_cp_read_tx_power req_txp_cp;
+ struct hci_cp_read_rssi req_rssi_cp;
+ struct pending_cmd *cmd;
+
+ hci_req_init(&req, hdev);
+ req_rssi_cp.handle = cpu_to_le16(conn->handle);
+ hci_req_add(&req, HCI_OP_READ_RSSI, sizeof(req_rssi_cp),
+ &req_rssi_cp);
+
+ /* For LE links TX power does not change thus we don't need to
+ * query for it once value is known.
+ */
+ if (!bdaddr_type_is_le(cp->addr.type) ||
+ conn->tx_power == HCI_TX_POWER_INVALID) {
+ req_txp_cp.handle = cpu_to_le16(conn->handle);
+ req_txp_cp.type = 0x00;
+ hci_req_add(&req, HCI_OP_READ_TX_POWER,
+ sizeof(req_txp_cp), &req_txp_cp);
+ }
+
+ /* Max TX power needs to be read only once per connection */
+ if (conn->max_tx_power == HCI_TX_POWER_INVALID) {
+ req_txp_cp.handle = cpu_to_le16(conn->handle);
+ req_txp_cp.type = 0x01;
+ hci_req_add(&req, HCI_OP_READ_TX_POWER,
+ sizeof(req_txp_cp), &req_txp_cp);
+ }
+
+ err = hci_req_run(&req, conn_info_refresh_complete);
+ if (err < 0)
+ goto unlock;
+
+ cmd = mgmt_pending_add(sk, MGMT_OP_GET_CONN_INFO, hdev,
+ data, len);
+ if (!cmd) {
+ err = -ENOMEM;
+ goto unlock;
+ }
+
+ hci_conn_hold(conn);
+ cmd->user_data = conn;
+
+ conn->conn_info_timestamp = jiffies;
+ } else {
+ /* Cache is valid, just reply with values cached in hci_conn */
+ rp.rssi = conn->rssi;
+ rp.tx_power = conn->tx_power;
+ rp.max_tx_power = conn->max_tx_power;
+
+ err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO,
+ MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
+ }
+
+unlock:
+ hci_dev_unlock(hdev);
+ return err;
+}
+
static const struct mgmt_handler {
int (*func) (struct sock *sk, struct hci_dev *hdev, void *data,
u16 data_len);
@@ -4115,7 +4829,7 @@ static const struct mgmt_handler {
{ user_passkey_reply, false, MGMT_USER_PASSKEY_REPLY_SIZE },
{ user_passkey_neg_reply, false, MGMT_USER_PASSKEY_NEG_REPLY_SIZE },
{ read_local_oob_data, false, MGMT_READ_LOCAL_OOB_DATA_SIZE },
- { add_remote_oob_data, false, MGMT_ADD_REMOTE_OOB_DATA_SIZE },
+ { add_remote_oob_data, true, MGMT_ADD_REMOTE_OOB_DATA_SIZE },
{ remove_remote_oob_data, false, MGMT_REMOVE_REMOTE_OOB_DATA_SIZE },
{ start_discovery, false, MGMT_START_DISCOVERY_SIZE },
{ stop_discovery, false, MGMT_STOP_DISCOVERY_SIZE },
@@ -4127,6 +4841,11 @@ static const struct mgmt_handler {
{ set_bredr, false, MGMT_SETTING_SIZE },
{ set_static_address, false, MGMT_SET_STATIC_ADDRESS_SIZE },
{ set_scan_params, false, MGMT_SET_SCAN_PARAMS_SIZE },
+ { set_secure_conn, false, MGMT_SETTING_SIZE },
+ { set_debug_keys, false, MGMT_SETTING_SIZE },
+ { set_privacy, false, MGMT_SET_PRIVACY_SIZE },
+ { load_irks, true, MGMT_LOAD_IRKS_SIZE },
+ { get_conn_info, false, MGMT_GET_CONN_INFO_SIZE },
};
@@ -4243,6 +4962,17 @@ void mgmt_index_removed(struct hci_dev *hdev)
mgmt_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, NULL);
}
+/* This function requires the caller holds hdev->lock */
+static void restart_le_auto_conns(struct hci_dev *hdev)
+{
+ struct hci_conn_params *p;
+
+ list_for_each_entry(p, &hdev->le_conn_params, list) {
+ if (p->auto_connect == HCI_AUTO_CONN_ALWAYS)
+ hci_pend_le_conn_add(hdev, &p->addr, p->addr_type);
+ }
+}
+
static void powered_complete(struct hci_dev *hdev, u8 status)
{
struct cmd_lookup match = { NULL, hdev };
@@ -4251,6 +4981,8 @@ static void powered_complete(struct hci_dev *hdev, u8 status)
hci_dev_lock(hdev);
+ restart_le_auto_conns(hdev);
+
mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
new_settings(hdev, match.sk);
@@ -4292,11 +5024,6 @@ static int powered_update_hci(struct hci_dev *hdev)
}
if (lmp_le_capable(hdev)) {
- /* Set random address to static address if configured */
- if (bacmp(&hdev->static_addr, BDADDR_ANY))
- hci_req_add(&req, HCI_OP_LE_SET_RANDOM_ADDR, 6,
- &hdev->static_addr);
-
/* Make sure the controller has a good default for
* advertising data. This also applies to the case
* where BR/EDR was toggled during the AUTO_OFF phase.
@@ -4422,6 +5149,10 @@ void mgmt_discoverable(struct hci_dev *hdev, u8 discoverable)
if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, hdev))
return;
+ /* Powering off may clear the scan mode - don't let that interfere */
+ if (!discoverable && mgmt_pending_find(MGMT_OP_SET_POWERED, hdev))
+ return;
+
if (discoverable) {
changed = !test_and_set_bit(HCI_DISCOVERABLE, &hdev->dev_flags);
} else {
@@ -4455,6 +5186,10 @@ void mgmt_connectable(struct hci_dev *hdev, u8 connectable)
if (mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev))
return;
+ /* Powering off may clear the scan mode - don't let that interfere */
+ if (!connectable && mgmt_pending_find(MGMT_OP_SET_POWERED, hdev))
+ return;
+
if (connectable)
changed = !test_and_set_bit(HCI_CONNECTABLE, &hdev->dev_flags);
else
@@ -4464,6 +5199,18 @@ void mgmt_connectable(struct hci_dev *hdev, u8 connectable)
new_settings(hdev, NULL);
}
+void mgmt_advertising(struct hci_dev *hdev, u8 advertising)
+{
+ /* Powering off may stop advertising - don't let that interfere */
+ if (!advertising && mgmt_pending_find(MGMT_OP_SET_POWERED, hdev))
+ return;
+
+ if (advertising)
+ set_bit(HCI_ADVERTISING, &hdev->dev_flags);
+ else
+ clear_bit(HCI_ADVERTISING, &hdev->dev_flags);
+}
+
void mgmt_write_scan_failed(struct hci_dev *hdev, u8 scan, u8 status)
{
u8 mgmt_err = mgmt_status(status);
@@ -4494,28 +5241,112 @@ void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key,
mgmt_event(MGMT_EV_NEW_LINK_KEY, hdev, &ev, sizeof(ev), NULL);
}
-void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, u8 persistent)
+static u8 mgmt_ltk_type(struct smp_ltk *ltk)
+{
+ if (ltk->authenticated)
+ return MGMT_LTK_AUTHENTICATED;
+
+ return MGMT_LTK_UNAUTHENTICATED;
+}
+
+void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent)
{
struct mgmt_ev_new_long_term_key ev;
memset(&ev, 0, sizeof(ev));
- ev.store_hint = persistent;
+ /* Devices using resolvable or non-resolvable random addresses
+ * without providing an indentity resolving key don't require
+ * to store long term keys. Their addresses will change the
+ * next time around.
+ *
+ * Only when a remote device provides an identity address
+ * make sure the long term key is stored. If the remote
+ * identity is known, the long term keys are internally
+ * mapped to the identity address. So allow static random
+ * and public addresses here.
+ */
+ if (key->bdaddr_type == ADDR_LE_DEV_RANDOM &&
+ (key->bdaddr.b[5] & 0xc0) != 0xc0)
+ ev.store_hint = 0x00;
+ else
+ ev.store_hint = persistent;
+
bacpy(&ev.key.addr.bdaddr, &key->bdaddr);
ev.key.addr.type = link_to_bdaddr(LE_LINK, key->bdaddr_type);
- ev.key.authenticated = key->authenticated;
+ ev.key.type = mgmt_ltk_type(key);
ev.key.enc_size = key->enc_size;
ev.key.ediv = key->ediv;
+ ev.key.rand = key->rand;
if (key->type == HCI_SMP_LTK)
ev.key.master = 1;
- memcpy(ev.key.rand, key->rand, sizeof(key->rand));
memcpy(ev.key.val, key->val, sizeof(key->val));
mgmt_event(MGMT_EV_NEW_LONG_TERM_KEY, hdev, &ev, sizeof(ev), NULL);
}
+void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk)
+{
+ struct mgmt_ev_new_irk ev;
+
+ memset(&ev, 0, sizeof(ev));
+
+ /* For identity resolving keys from devices that are already
+ * using a public address or static random address, do not
+ * ask for storing this key. The identity resolving key really
+ * is only mandatory for devices using resovlable random
+ * addresses.
+ *
+ * Storing all identity resolving keys has the downside that
+ * they will be also loaded on next boot of they system. More
+ * identity resolving keys, means more time during scanning is
+ * needed to actually resolve these addresses.
+ */
+ if (bacmp(&irk->rpa, BDADDR_ANY))
+ ev.store_hint = 0x01;
+ else
+ ev.store_hint = 0x00;
+
+ bacpy(&ev.rpa, &irk->rpa);
+ bacpy(&ev.irk.addr.bdaddr, &irk->bdaddr);
+ ev.irk.addr.type = link_to_bdaddr(LE_LINK, irk->addr_type);
+ memcpy(ev.irk.val, irk->val, sizeof(irk->val));
+
+ mgmt_event(MGMT_EV_NEW_IRK, hdev, &ev, sizeof(ev), NULL);
+}
+
+void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk,
+ bool persistent)
+{
+ struct mgmt_ev_new_csrk ev;
+
+ memset(&ev, 0, sizeof(ev));
+
+ /* Devices using resolvable or non-resolvable random addresses
+ * without providing an indentity resolving key don't require
+ * to store signature resolving keys. Their addresses will change
+ * the next time around.
+ *
+ * Only when a remote device provides an identity address
+ * make sure the signature resolving key is stored. So allow
+ * static random and public addresses here.
+ */
+ if (csrk->bdaddr_type == ADDR_LE_DEV_RANDOM &&
+ (csrk->bdaddr.b[5] & 0xc0) != 0xc0)
+ ev.store_hint = 0x00;
+ else
+ ev.store_hint = persistent;
+
+ bacpy(&ev.key.addr.bdaddr, &csrk->bdaddr);
+ ev.key.addr.type = link_to_bdaddr(LE_LINK, csrk->bdaddr_type);
+ ev.key.master = csrk->master;
+ memcpy(ev.key.val, csrk->val, sizeof(csrk->val));
+
+ mgmt_event(MGMT_EV_NEW_CSRK, hdev, &ev, sizeof(ev), NULL);
+}
+
static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data,
u8 data_len)
{
@@ -4590,11 +5421,29 @@ static void unpair_device_rsp(struct pending_cmd *cmd, void *data)
}
void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr,
- u8 link_type, u8 addr_type, u8 reason)
+ u8 link_type, u8 addr_type, u8 reason,
+ bool mgmt_connected)
{
struct mgmt_ev_device_disconnected ev;
+ struct pending_cmd *power_off;
struct sock *sk = NULL;
+ power_off = mgmt_pending_find(MGMT_OP_SET_POWERED, hdev);
+ if (power_off) {
+ struct mgmt_mode *cp = power_off->param;
+
+ /* The connection is still in hci_conn_hash so test for 1
+ * instead of 0 to know if this is the last one.
+ */
+ if (!cp->val && hci_conn_count(hdev) == 1) {
+ cancel_delayed_work(&hdev->power_off);
+ queue_work(hdev->req_workqueue, &hdev->power_off.work);
+ }
+ }
+
+ if (!mgmt_connected)
+ return;
+
if (link_type != ACL_LINK && link_type != LE_LINK)
return;
@@ -4649,6 +5498,20 @@ void mgmt_connect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
u8 addr_type, u8 status)
{
struct mgmt_ev_connect_failed ev;
+ struct pending_cmd *power_off;
+
+ power_off = mgmt_pending_find(MGMT_OP_SET_POWERED, hdev);
+ if (power_off) {
+ struct mgmt_mode *cp = power_off->param;
+
+ /* The connection is still in hci_conn_hash so test for 1
+ * instead of 0 to know if this is the last one.
+ */
+ if (!cp->val && hci_conn_count(hdev) == 1) {
+ cancel_delayed_work(&hdev->power_off);
+ queue_work(hdev->req_workqueue, &hdev->power_off.work);
+ }
+ }
bacpy(&ev.addr.bdaddr, bdaddr);
ev.addr.type = link_to_bdaddr(link_type, addr_type);
@@ -4707,7 +5570,7 @@ void mgmt_pin_code_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr,
}
int mgmt_user_confirm_request(struct hci_dev *hdev, bdaddr_t *bdaddr,
- u8 link_type, u8 addr_type, __le32 value,
+ u8 link_type, u8 addr_type, u32 value,
u8 confirm_hint)
{
struct mgmt_ev_user_confirm_request ev;
@@ -4717,7 +5580,7 @@ int mgmt_user_confirm_request(struct hci_dev *hdev, bdaddr_t *bdaddr,
bacpy(&ev.addr.bdaddr, bdaddr);
ev.addr.type = link_to_bdaddr(link_type, addr_type);
ev.confirm_hint = confirm_hint;
- ev.value = value;
+ ev.value = cpu_to_le32(value);
return mgmt_event(MGMT_EV_USER_CONFIRM_REQUEST, hdev, &ev, sizeof(ev),
NULL);
@@ -4910,6 +5773,43 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
hci_req_run(&req, NULL);
}
+void mgmt_sc_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
+{
+ struct cmd_lookup match = { NULL, hdev };
+ bool changed = false;
+
+ if (status) {
+ u8 mgmt_err = mgmt_status(status);
+
+ if (enable) {
+ if (test_and_clear_bit(HCI_SC_ENABLED,
+ &hdev->dev_flags))
+ new_settings(hdev, NULL);
+ clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
+ }
+
+ mgmt_pending_foreach(MGMT_OP_SET_SECURE_CONN, hdev,
+ cmd_status_rsp, &mgmt_err);
+ return;
+ }
+
+ if (enable) {
+ changed = !test_and_set_bit(HCI_SC_ENABLED, &hdev->dev_flags);
+ } else {
+ changed = test_and_clear_bit(HCI_SC_ENABLED, &hdev->dev_flags);
+ clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
+ }
+
+ mgmt_pending_foreach(MGMT_OP_SET_SECURE_CONN, hdev,
+ settings_rsp, &match);
+
+ if (changed)
+ new_settings(hdev, match.sk);
+
+ if (match.sk)
+ sock_put(match.sk);
+}
+
static void sk_lookup(struct pending_cmd *cmd, void *data)
{
struct cmd_lookup *match = data;
@@ -4964,8 +5864,9 @@ void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status)
cmd ? cmd->sk : NULL);
}
-void mgmt_read_local_oob_data_reply_complete(struct hci_dev *hdev, u8 *hash,
- u8 *randomizer, u8 status)
+void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192,
+ u8 *randomizer192, u8 *hash256,
+ u8 *randomizer256, u8 status)
{
struct pending_cmd *cmd;
@@ -4979,42 +5880,72 @@ void mgmt_read_local_oob_data_reply_complete(struct hci_dev *hdev, u8 *hash,
cmd_status(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA,
mgmt_status(status));
} else {
- struct mgmt_rp_read_local_oob_data rp;
+ if (test_bit(HCI_SC_ENABLED, &hdev->dev_flags) &&
+ hash256 && randomizer256) {
+ struct mgmt_rp_read_local_oob_ext_data rp;
+
+ memcpy(rp.hash192, hash192, sizeof(rp.hash192));
+ memcpy(rp.randomizer192, randomizer192,
+ sizeof(rp.randomizer192));
- memcpy(rp.hash, hash, sizeof(rp.hash));
- memcpy(rp.randomizer, randomizer, sizeof(rp.randomizer));
+ memcpy(rp.hash256, hash256, sizeof(rp.hash256));
+ memcpy(rp.randomizer256, randomizer256,
+ sizeof(rp.randomizer256));
+
+ cmd_complete(cmd->sk, hdev->id,
+ MGMT_OP_READ_LOCAL_OOB_DATA, 0,
+ &rp, sizeof(rp));
+ } else {
+ struct mgmt_rp_read_local_oob_data rp;
- cmd_complete(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA,
- 0, &rp, sizeof(rp));
+ memcpy(rp.hash, hash192, sizeof(rp.hash));
+ memcpy(rp.randomizer, randomizer192,
+ sizeof(rp.randomizer));
+
+ cmd_complete(cmd->sk, hdev->id,
+ MGMT_OP_READ_LOCAL_OOB_DATA, 0,
+ &rp, sizeof(rp));
+ }
}
mgmt_pending_remove(cmd);
}
void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
- u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name, u8
- ssp, u8 *eir, u16 eir_len)
+ u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name,
+ u8 ssp, u8 *eir, u16 eir_len, u8 *scan_rsp,
+ u8 scan_rsp_len)
{
char buf[512];
struct mgmt_ev_device_found *ev = (void *) buf;
+ struct smp_irk *irk;
size_t ev_size;
if (!hci_discovery_active(hdev))
return;
- /* Leave 5 bytes for a potential CoD field */
- if (sizeof(*ev) + eir_len + 5 > sizeof(buf))
+ /* Make sure that the buffer is big enough. The 5 extra bytes
+ * are for the potential CoD field.
+ */
+ if (sizeof(*ev) + eir_len + scan_rsp_len + 5 > sizeof(buf))
return;
memset(buf, 0, sizeof(buf));
- bacpy(&ev->addr.bdaddr, bdaddr);
- ev->addr.type = link_to_bdaddr(link_type, addr_type);
+ irk = hci_get_irk(hdev, bdaddr, addr_type);
+ if (irk) {
+ bacpy(&ev->addr.bdaddr, &irk->bdaddr);
+ ev->addr.type = link_to_bdaddr(link_type, irk->addr_type);
+ } else {
+ bacpy(&ev->addr.bdaddr, bdaddr);
+ ev->addr.type = link_to_bdaddr(link_type, addr_type);
+ }
+
ev->rssi = rssi;
if (cfm_name)
- ev->flags |= __constant_cpu_to_le32(MGMT_DEV_FOUND_CONFIRM_NAME);
+ ev->flags |= cpu_to_le32(MGMT_DEV_FOUND_CONFIRM_NAME);
if (!ssp)
- ev->flags |= __constant_cpu_to_le32(MGMT_DEV_FOUND_LEGACY_PAIRING);
+ ev->flags |= cpu_to_le32(MGMT_DEV_FOUND_LEGACY_PAIRING);
if (eir_len > 0)
memcpy(ev->eir, eir, eir_len);
@@ -5023,8 +5954,11 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
eir_len = eir_append_data(ev->eir, eir_len, EIR_CLASS_OF_DEV,
dev_class, 3);
- ev->eir_len = cpu_to_le16(eir_len);
- ev_size = sizeof(*ev) + eir_len;
+ if (scan_rsp_len > 0)
+ memcpy(ev->eir + eir_len, scan_rsp, scan_rsp_len);
+
+ ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len);
+ ev_size = sizeof(*ev) + eir_len + scan_rsp_len;
mgmt_event(MGMT_EV_DEVICE_FOUND, hdev, ev, ev_size, NULL);
}
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index facd8a79c03..754b6fe4f74 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -186,9 +186,9 @@ static void rfcomm_l2state_change(struct sock *sk)
rfcomm_schedule();
}
-static void rfcomm_l2data_ready(struct sock *sk, int bytes)
+static void rfcomm_l2data_ready(struct sock *sk)
{
- BT_DBG("%p bytes %d", sk, bytes);
+ BT_DBG("%p", sk);
rfcomm_schedule();
}
@@ -216,6 +216,7 @@ static int rfcomm_check_security(struct rfcomm_dlc *d)
switch (d->sec_level) {
case BT_SECURITY_HIGH:
+ case BT_SECURITY_FIPS:
auth_type = HCI_AT_GENERAL_BONDING_MITM;
break;
case BT_SECURITY_MEDIUM:
@@ -306,7 +307,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio)
setup_timer(&d->timer, rfcomm_dlc_timeout, (unsigned long)d);
skb_queue_head_init(&d->tx_queue);
- spin_lock_init(&d->lock);
+ mutex_init(&d->lock);
atomic_set(&d->refcnt, 1);
rfcomm_dlc_clear_state(d);
@@ -359,6 +360,11 @@ static struct rfcomm_dlc *rfcomm_dlc_get(struct rfcomm_session *s, u8 dlci)
return NULL;
}
+static int rfcomm_check_channel(u8 channel)
+{
+ return channel < 1 || channel > 30;
+}
+
static int __rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, u8 channel)
{
struct rfcomm_session *s;
@@ -368,7 +374,7 @@ static int __rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst,
BT_DBG("dlc %p state %ld %pMR -> %pMR channel %d",
d, d->state, src, dst, channel);
- if (channel < 1 || channel > 30)
+ if (rfcomm_check_channel(channel))
return -EINVAL;
if (d->state != BT_OPEN && d->state != BT_CLOSED)
@@ -425,6 +431,20 @@ int rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, u8 chann
return r;
}
+static void __rfcomm_dlc_disconn(struct rfcomm_dlc *d)
+{
+ struct rfcomm_session *s = d->session;
+
+ d->state = BT_DISCONN;
+ if (skb_queue_empty(&d->tx_queue)) {
+ rfcomm_send_disc(s, d->dlci);
+ rfcomm_dlc_set_timer(d, RFCOMM_DISC_TIMEOUT);
+ } else {
+ rfcomm_queue_disc(d);
+ rfcomm_dlc_set_timer(d, RFCOMM_DISC_TIMEOUT * 2);
+ }
+}
+
static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
{
struct rfcomm_session *s = d->session;
@@ -437,32 +457,29 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
switch (d->state) {
case BT_CONNECT:
case BT_CONFIG:
+ case BT_OPEN:
+ case BT_CONNECT2:
if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
set_bit(RFCOMM_AUTH_REJECT, &d->flags);
rfcomm_schedule();
- break;
+ return 0;
}
- /* Fall through */
+ }
+ switch (d->state) {
+ case BT_CONNECT:
case BT_CONNECTED:
- d->state = BT_DISCONN;
- if (skb_queue_empty(&d->tx_queue)) {
- rfcomm_send_disc(s, d->dlci);
- rfcomm_dlc_set_timer(d, RFCOMM_DISC_TIMEOUT);
- } else {
- rfcomm_queue_disc(d);
- rfcomm_dlc_set_timer(d, RFCOMM_DISC_TIMEOUT * 2);
- }
+ __rfcomm_dlc_disconn(d);
break;
- case BT_OPEN:
- case BT_CONNECT2:
- if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
- set_bit(RFCOMM_AUTH_REJECT, &d->flags);
- rfcomm_schedule();
+ case BT_CONFIG:
+ if (s->state != BT_BOUND) {
+ __rfcomm_dlc_disconn(d);
break;
}
- /* Fall through */
+ /* if closing a dlc in a session that hasn't been started,
+ * just close and unlink the dlc
+ */
default:
rfcomm_dlc_clear_timer(d);
@@ -513,6 +530,25 @@ no_session:
return r;
}
+struct rfcomm_dlc *rfcomm_dlc_exists(bdaddr_t *src, bdaddr_t *dst, u8 channel)
+{
+ struct rfcomm_session *s;
+ struct rfcomm_dlc *dlc = NULL;
+ u8 dlci;
+
+ if (rfcomm_check_channel(channel))
+ return ERR_PTR(-EINVAL);
+
+ rfcomm_lock();
+ s = rfcomm_session_get(src, dst);
+ if (s) {
+ dlci = __dlci(!s->initiator, channel);
+ dlc = rfcomm_dlc_get(s, dlci);
+ }
+ rfcomm_unlock();
+ return dlc;
+}
+
int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb)
{
int len = skb->len;
@@ -533,6 +569,20 @@ int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb)
return len;
}
+void rfcomm_dlc_send_noerror(struct rfcomm_dlc *d, struct sk_buff *skb)
+{
+ int len = skb->len;
+
+ BT_DBG("dlc %p mtu %d len %d", d, d->mtu, len);
+
+ rfcomm_make_uih(skb, d->addr);
+ skb_queue_tail(&d->tx_queue, skb);
+
+ if (d->state == BT_CONNECTED &&
+ !test_bit(RFCOMM_TX_THROTTLED, &d->flags))
+ rfcomm_schedule();
+}
+
void __rfcomm_dlc_throttle(struct rfcomm_dlc *d)
{
BT_DBG("dlc %p state %ld", d, d->state);
@@ -718,7 +768,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
bacpy(&addr.l2_bdaddr, dst);
addr.l2_family = AF_BLUETOOTH;
- addr.l2_psm = __constant_cpu_to_le16(RFCOMM_PSM);
+ addr.l2_psm = cpu_to_le16(RFCOMM_PSM);
addr.l2_cid = 0;
addr.l2_bdaddr_type = BDADDR_BREDR;
*err = kernel_connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK);
@@ -1943,12 +1993,11 @@ static void rfcomm_process_sessions(void)
continue;
}
- if (s->state == BT_LISTEN) {
+ switch (s->state) {
+ case BT_LISTEN:
rfcomm_accept_connection(s);
continue;
- }
- switch (s->state) {
case BT_BOUND:
s = rfcomm_check_connection(s);
break;
@@ -1983,7 +2032,7 @@ static int rfcomm_add_listener(bdaddr_t *ba)
/* Bind socket */
bacpy(&addr.l2_bdaddr, ba);
addr.l2_family = AF_BLUETOOTH;
- addr.l2_psm = __constant_cpu_to_le16(RFCOMM_PSM);
+ addr.l2_psm = cpu_to_le16(RFCOMM_PSM);
addr.l2_cid = 0;
addr.l2_bdaddr_type = BDADDR_BREDR;
err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr));
@@ -2085,7 +2134,8 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
set_bit(RFCOMM_SEC_PENDING, &d->flags);
rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT);
continue;
- } else if (d->sec_level == BT_SECURITY_HIGH) {
+ } else if (d->sec_level == BT_SECURITY_HIGH ||
+ d->sec_level == BT_SECURITY_FIPS) {
set_bit(RFCOMM_ENC_DROP, &d->flags);
continue;
}
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 3c2d3e4aa2f..c603a5eb472 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -54,7 +54,7 @@ static void rfcomm_sk_data_ready(struct rfcomm_dlc *d, struct sk_buff *skb)
atomic_add(skb->len, &sk->sk_rmem_alloc);
skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk, skb->len);
+ sk->sk_data_ready(sk);
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
rfcomm_dlc_throttle(d);
@@ -84,7 +84,7 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
sock_set_flag(sk, SOCK_ZAPPED);
bt_accept_unlink(sk);
}
- parent->sk_data_ready(parent, 0);
+ parent->sk_data_ready(parent);
} else {
if (d->state == BT_CONNECTED)
rfcomm_session_getaddr(d->session,
@@ -105,13 +105,18 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
}
/* ---- Socket functions ---- */
-static struct sock *__rfcomm_get_sock_by_addr(u8 channel, bdaddr_t *src)
+static struct sock *__rfcomm_get_listen_sock_by_addr(u8 channel, bdaddr_t *src)
{
struct sock *sk = NULL;
sk_for_each(sk, &rfcomm_sk_list.head) {
- if (rfcomm_pi(sk)->channel == channel &&
- !bacmp(&rfcomm_pi(sk)->src, src))
+ if (rfcomm_pi(sk)->channel != channel)
+ continue;
+
+ if (bacmp(&rfcomm_pi(sk)->src, src))
+ continue;
+
+ if (sk->sk_state == BT_BOUND || sk->sk_state == BT_LISTEN)
break;
}
@@ -331,6 +336,7 @@ static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr
{
struct sockaddr_rc *sa = (struct sockaddr_rc *) addr;
struct sock *sk = sock->sk;
+ int chan = sa->rc_channel;
int err = 0;
BT_DBG("sk %p %pMR", sk, &sa->rc_bdaddr);
@@ -352,12 +358,12 @@ static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr
write_lock(&rfcomm_sk_list.lock);
- if (sa->rc_channel && __rfcomm_get_sock_by_addr(sa->rc_channel, &sa->rc_bdaddr)) {
+ if (chan && __rfcomm_get_listen_sock_by_addr(chan, &sa->rc_bdaddr)) {
err = -EADDRINUSE;
} else {
/* Save source address */
bacpy(&rfcomm_pi(sk)->src, &sa->rc_bdaddr);
- rfcomm_pi(sk)->channel = sa->rc_channel;
+ rfcomm_pi(sk)->channel = chan;
sk->sk_state = BT_BOUND;
}
@@ -439,7 +445,7 @@ static int rfcomm_sock_listen(struct socket *sock, int backlog)
write_lock(&rfcomm_sk_list.lock);
for (channel = 1; channel < 31; channel++)
- if (!__rfcomm_get_sock_by_addr(channel, src)) {
+ if (!__rfcomm_get_listen_sock_by_addr(channel, src)) {
rfcomm_pi(sk)->channel = channel;
err = 0;
break;
@@ -528,6 +534,10 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *
BT_DBG("sock %p, sk %p", sock, sk);
+ if (peer && sk->sk_state != BT_CONNECTED &&
+ sk->sk_state != BT_CONNECT && sk->sk_state != BT_CONNECT2)
+ return -ENOTCONN;
+
memset(sa, 0, sizeof(*sa));
sa->rc_family = AF_BLUETOOTH;
sa->rc_channel = rfcomm_pi(sk)->channel;
@@ -648,6 +658,11 @@ static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __u
break;
}
+ if (opt & RFCOMM_LM_FIPS) {
+ err = -EINVAL;
+ break;
+ }
+
if (opt & RFCOMM_LM_AUTH)
rfcomm_pi(sk)->sec_level = BT_SECURITY_LOW;
if (opt & RFCOMM_LM_ENCRYPT)
@@ -762,7 +777,11 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u
break;
case BT_SECURITY_HIGH:
opt = RFCOMM_LM_AUTH | RFCOMM_LM_ENCRYPT |
- RFCOMM_LM_SECURE;
+ RFCOMM_LM_SECURE;
+ break;
+ case BT_SECURITY_FIPS:
+ opt = RFCOMM_LM_AUTH | RFCOMM_LM_ENCRYPT |
+ RFCOMM_LM_SECURE | RFCOMM_LM_FIPS;
break;
default:
opt = 0;
@@ -774,6 +793,7 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u
if (put_user(opt, (u32 __user *) optval))
err = -EFAULT;
+
break;
case RFCOMM_CONNINFO:
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index f9c0980abee..8e385a0ae60 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -40,6 +40,7 @@
#define RFCOMM_TTY_MAJOR 216 /* device node major id of the usb/bluetooth.c driver */
#define RFCOMM_TTY_MINOR 0
+static DEFINE_MUTEX(rfcomm_ioctl_mutex);
static struct tty_driver *rfcomm_tty_driver;
struct rfcomm_dev {
@@ -51,6 +52,8 @@ struct rfcomm_dev {
unsigned long flags;
int err;
+ unsigned long status; /* don't export to userspace */
+
bdaddr_t src;
bdaddr_t dst;
u8 channel;
@@ -58,7 +61,6 @@ struct rfcomm_dev {
uint modem_status;
struct rfcomm_dlc *dlc;
- wait_queue_head_t conn_wait;
struct device *tty_dev;
@@ -68,7 +70,7 @@ struct rfcomm_dev {
};
static LIST_HEAD(rfcomm_dev_list);
-static DEFINE_SPINLOCK(rfcomm_dev_lock);
+static DEFINE_MUTEX(rfcomm_dev_lock);
static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb);
static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err);
@@ -83,10 +85,6 @@ static void rfcomm_dev_destruct(struct tty_port *port)
BT_DBG("dev %p dlc %p", dev, dlc);
- spin_lock(&rfcomm_dev_lock);
- list_del(&dev->list);
- spin_unlock(&rfcomm_dev_lock);
-
rfcomm_dlc_lock(dlc);
/* Detach DLC if it's owned by this dev */
if (dlc->owner == dev)
@@ -95,7 +93,12 @@ static void rfcomm_dev_destruct(struct tty_port *port)
rfcomm_dlc_put(dlc);
- tty_unregister_device(rfcomm_tty_driver, dev->id);
+ if (dev->tty_dev)
+ tty_unregister_device(rfcomm_tty_driver, dev->id);
+
+ mutex_lock(&rfcomm_dev_lock);
+ list_del(&dev->list);
+ mutex_unlock(&rfcomm_dev_lock);
kfree(dev);
@@ -104,60 +107,24 @@ static void rfcomm_dev_destruct(struct tty_port *port)
module_put(THIS_MODULE);
}
-static struct device *rfcomm_get_device(struct rfcomm_dev *dev)
-{
- struct hci_dev *hdev;
- struct hci_conn *conn;
-
- hdev = hci_get_route(&dev->dst, &dev->src);
- if (!hdev)
- return NULL;
-
- conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &dev->dst);
-
- hci_dev_put(hdev);
-
- return conn ? &conn->dev : NULL;
-}
-
/* device-specific initialization: open the dlc */
static int rfcomm_dev_activate(struct tty_port *port, struct tty_struct *tty)
{
struct rfcomm_dev *dev = container_of(port, struct rfcomm_dev, port);
- DEFINE_WAIT(wait);
int err;
err = rfcomm_dlc_open(dev->dlc, &dev->src, &dev->dst, dev->channel);
if (err)
- return err;
-
- while (1) {
- prepare_to_wait(&dev->conn_wait, &wait, TASK_INTERRUPTIBLE);
-
- if (dev->dlc->state == BT_CLOSED) {
- err = -dev->err;
- break;
- }
-
- if (dev->dlc->state == BT_CONNECTED)
- break;
-
- if (signal_pending(current)) {
- err = -ERESTARTSYS;
- break;
- }
-
- tty_unlock(tty);
- schedule();
- tty_lock(tty);
- }
- finish_wait(&dev->conn_wait, &wait);
+ set_bit(TTY_IO_ERROR, &tty->flags);
+ return err;
+}
- if (!err)
- device_move(dev->tty_dev, rfcomm_get_device(dev),
- DPM_ORDER_DEV_AFTER_PARENT);
+/* we block the open until the dlc->state becomes BT_CONNECTED */
+static int rfcomm_dev_carrier_raised(struct tty_port *port)
+{
+ struct rfcomm_dev *dev = container_of(port, struct rfcomm_dev, port);
- return err;
+ return (dev->dlc->state == BT_CONNECTED);
}
/* device-specific cleanup: close the dlc */
@@ -176,9 +143,10 @@ static const struct tty_port_operations rfcomm_port_ops = {
.destruct = rfcomm_dev_destruct,
.activate = rfcomm_dev_activate,
.shutdown = rfcomm_dev_shutdown,
+ .carrier_raised = rfcomm_dev_carrier_raised,
};
-static struct rfcomm_dev *__rfcomm_dev_get(int id)
+static struct rfcomm_dev *__rfcomm_dev_lookup(int id)
{
struct rfcomm_dev *dev;
@@ -193,22 +161,43 @@ static struct rfcomm_dev *rfcomm_dev_get(int id)
{
struct rfcomm_dev *dev;
- spin_lock(&rfcomm_dev_lock);
+ mutex_lock(&rfcomm_dev_lock);
- dev = __rfcomm_dev_get(id);
+ dev = __rfcomm_dev_lookup(id);
- if (dev) {
- if (test_bit(RFCOMM_TTY_RELEASED, &dev->flags))
- dev = NULL;
- else
- tty_port_get(&dev->port);
- }
+ if (dev && !tty_port_get(&dev->port))
+ dev = NULL;
- spin_unlock(&rfcomm_dev_lock);
+ mutex_unlock(&rfcomm_dev_lock);
return dev;
}
+static void rfcomm_reparent_device(struct rfcomm_dev *dev)
+{
+ struct hci_dev *hdev;
+ struct hci_conn *conn;
+
+ hdev = hci_get_route(&dev->dst, &dev->src);
+ if (!hdev)
+ return;
+
+ /* The lookup results are unsafe to access without the
+ * hci device lock (FIXME: why is this not documented?)
+ */
+ hci_dev_lock(hdev);
+ conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &dev->dst);
+
+ /* Just because the acl link is in the hash table is no
+ * guarantee the sysfs device has been added ...
+ */
+ if (conn && device_is_registered(&conn->dev))
+ device_move(dev->tty_dev, &conn->dev, DPM_ORDER_DEV_AFTER_PARENT);
+
+ hci_dev_unlock(hdev);
+ hci_dev_put(hdev);
+}
+
static ssize_t show_address(struct device *tty_dev, struct device_attribute *attr, char *buf)
{
struct rfcomm_dev *dev = dev_get_drvdata(tty_dev);
@@ -224,19 +213,18 @@ static ssize_t show_channel(struct device *tty_dev, struct device_attribute *att
static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
static DEVICE_ATTR(channel, S_IRUGO, show_channel, NULL);
-static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc)
+static struct rfcomm_dev *__rfcomm_dev_add(struct rfcomm_dev_req *req,
+ struct rfcomm_dlc *dlc)
{
struct rfcomm_dev *dev, *entry;
struct list_head *head = &rfcomm_dev_list;
int err = 0;
- BT_DBG("id %d channel %d", req->dev_id, req->channel);
-
dev = kzalloc(sizeof(struct rfcomm_dev), GFP_KERNEL);
if (!dev)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
- spin_lock(&rfcomm_dev_lock);
+ mutex_lock(&rfcomm_dev_lock);
if (req->dev_id < 0) {
dev->id = 0;
@@ -282,7 +270,6 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc)
tty_port_init(&dev->port);
dev->port.ops = &rfcomm_port_ops;
- init_waitqueue_head(&dev->conn_wait);
skb_queue_head_init(&dev->pending);
@@ -318,22 +305,37 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc)
holds reference to this module. */
__module_get(THIS_MODULE);
+ mutex_unlock(&rfcomm_dev_lock);
+ return dev;
+
out:
- spin_unlock(&rfcomm_dev_lock);
+ mutex_unlock(&rfcomm_dev_lock);
+ kfree(dev);
+ return ERR_PTR(err);
+}
- if (err < 0)
- goto free;
+static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc)
+{
+ struct rfcomm_dev *dev;
+ struct device *tty;
- dev->tty_dev = tty_port_register_device(&dev->port, rfcomm_tty_driver,
+ BT_DBG("id %d channel %d", req->dev_id, req->channel);
+
+ dev = __rfcomm_dev_add(req, dlc);
+ if (IS_ERR(dev)) {
+ rfcomm_dlc_put(dlc);
+ return PTR_ERR(dev);
+ }
+
+ tty = tty_port_register_device(&dev->port, rfcomm_tty_driver,
dev->id, NULL);
- if (IS_ERR(dev->tty_dev)) {
- err = PTR_ERR(dev->tty_dev);
- spin_lock(&rfcomm_dev_lock);
- list_del(&dev->list);
- spin_unlock(&rfcomm_dev_lock);
- goto free;
+ if (IS_ERR(tty)) {
+ tty_port_put(&dev->port);
+ return PTR_ERR(tty);
}
+ dev->tty_dev = tty;
+ rfcomm_reparent_device(dev);
dev_set_drvdata(dev->tty_dev, dev);
if (device_create_file(dev->tty_dev, &dev_attr_address) < 0)
@@ -343,24 +345,23 @@ out:
BT_ERR("Failed to create channel attribute");
return dev->id;
-
-free:
- kfree(dev);
- return err;
}
/* ---- Send buffer ---- */
-static inline unsigned int rfcomm_room(struct rfcomm_dlc *dlc)
+static inline unsigned int rfcomm_room(struct rfcomm_dev *dev)
{
- /* We can't let it be zero, because we don't get a callback
- when tx_credits becomes nonzero, hence we'd never wake up */
- return dlc->mtu * (dlc->tx_credits?:1);
+ struct rfcomm_dlc *dlc = dev->dlc;
+
+ /* Limit the outstanding number of packets not yet sent to 40 */
+ int pending = 40 - atomic_read(&dev->wmem_alloc);
+
+ return max(0, pending) * dlc->mtu;
}
static void rfcomm_wfree(struct sk_buff *skb)
{
struct rfcomm_dev *dev = (void *) skb->sk;
- atomic_sub(skb->truesize, &dev->wmem_alloc);
+ atomic_dec(&dev->wmem_alloc);
if (test_bit(RFCOMM_TTY_ATTACHED, &dev->flags))
tty_port_tty_wakeup(&dev->port);
tty_port_put(&dev->port);
@@ -369,28 +370,24 @@ static void rfcomm_wfree(struct sk_buff *skb)
static void rfcomm_set_owner_w(struct sk_buff *skb, struct rfcomm_dev *dev)
{
tty_port_get(&dev->port);
- atomic_add(skb->truesize, &dev->wmem_alloc);
+ atomic_inc(&dev->wmem_alloc);
skb->sk = (void *) dev;
skb->destructor = rfcomm_wfree;
}
static struct sk_buff *rfcomm_wmalloc(struct rfcomm_dev *dev, unsigned long size, gfp_t priority)
{
- if (atomic_read(&dev->wmem_alloc) < rfcomm_room(dev->dlc)) {
- struct sk_buff *skb = alloc_skb(size, priority);
- if (skb) {
- rfcomm_set_owner_w(skb, dev);
- return skb;
- }
- }
- return NULL;
+ struct sk_buff *skb = alloc_skb(size, priority);
+ if (skb)
+ rfcomm_set_owner_w(skb, dev);
+ return skb;
}
/* ---- Device IOCTLs ---- */
#define NOCAP_FLAGS ((1 << RFCOMM_REUSE_DLC) | (1 << RFCOMM_RELEASE_ONHUP))
-static int rfcomm_create_dev(struct sock *sk, void __user *arg)
+static int __rfcomm_create_dev(struct sock *sk, void __user *arg)
{
struct rfcomm_dev_req req;
struct rfcomm_dlc *dlc;
@@ -412,16 +409,22 @@ static int rfcomm_create_dev(struct sock *sk, void __user *arg)
dlc = rfcomm_pi(sk)->dlc;
rfcomm_dlc_hold(dlc);
} else {
+ /* Validate the channel is unused */
+ dlc = rfcomm_dlc_exists(&req.src, &req.dst, req.channel);
+ if (IS_ERR(dlc))
+ return PTR_ERR(dlc);
+ else if (dlc) {
+ rfcomm_dlc_put(dlc);
+ return -EBUSY;
+ }
dlc = rfcomm_dlc_alloc(GFP_KERNEL);
if (!dlc)
return -ENOMEM;
}
id = rfcomm_dev_add(&req, dlc);
- if (id < 0) {
- rfcomm_dlc_put(dlc);
+ if (id < 0)
return id;
- }
if (req.flags & (1 << RFCOMM_REUSE_DLC)) {
/* DLC is now used by device.
@@ -432,7 +435,7 @@ static int rfcomm_create_dev(struct sock *sk, void __user *arg)
return id;
}
-static int rfcomm_release_dev(void __user *arg)
+static int __rfcomm_release_dev(void __user *arg)
{
struct rfcomm_dev_req req;
struct rfcomm_dev *dev;
@@ -452,6 +455,12 @@ static int rfcomm_release_dev(void __user *arg)
return -EPERM;
}
+ /* only release once */
+ if (test_and_set_bit(RFCOMM_DEV_RELEASED, &dev->status)) {
+ tty_port_put(&dev->port);
+ return -EALREADY;
+ }
+
if (req.flags & (1 << RFCOMM_HANGUP_NOW))
rfcomm_dlc_close(dev->dlc, 0);
@@ -462,14 +471,35 @@ static int rfcomm_release_dev(void __user *arg)
tty_kref_put(tty);
}
- if (!test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags) &&
- !test_and_set_bit(RFCOMM_TTY_RELEASED, &dev->flags))
+ if (!test_bit(RFCOMM_TTY_OWNED, &dev->status))
tty_port_put(&dev->port);
tty_port_put(&dev->port);
return 0;
}
+static int rfcomm_create_dev(struct sock *sk, void __user *arg)
+{
+ int ret;
+
+ mutex_lock(&rfcomm_ioctl_mutex);
+ ret = __rfcomm_create_dev(sk, arg);
+ mutex_unlock(&rfcomm_ioctl_mutex);
+
+ return ret;
+}
+
+static int rfcomm_release_dev(void __user *arg)
+{
+ int ret;
+
+ mutex_lock(&rfcomm_ioctl_mutex);
+ ret = __rfcomm_release_dev(arg);
+ mutex_unlock(&rfcomm_ioctl_mutex);
+
+ return ret;
+}
+
static int rfcomm_get_dev_list(void __user *arg)
{
struct rfcomm_dev *dev;
@@ -494,10 +524,10 @@ static int rfcomm_get_dev_list(void __user *arg)
di = dl->dev_info;
- spin_lock(&rfcomm_dev_lock);
+ mutex_lock(&rfcomm_dev_lock);
list_for_each_entry(dev, &rfcomm_dev_list, list) {
- if (test_bit(RFCOMM_TTY_RELEASED, &dev->flags))
+ if (!tty_port_get(&dev->port))
continue;
(di + n)->id = dev->id;
(di + n)->flags = dev->flags;
@@ -505,11 +535,12 @@ static int rfcomm_get_dev_list(void __user *arg)
(di + n)->channel = dev->channel;
bacpy(&(di + n)->src, &dev->src);
bacpy(&(di + n)->dst, &dev->dst);
+ tty_port_put(&dev->port);
if (++n >= dev_num)
break;
}
- spin_unlock(&rfcomm_dev_lock);
+ mutex_unlock(&rfcomm_dev_lock);
dl->dev_num = n;
size = sizeof(*dl) + n * sizeof(*di);
@@ -601,9 +632,11 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err)
BT_DBG("dlc %p dev %p err %d", dlc, dev, err);
dev->err = err;
- wake_up_interruptible(&dev->conn_wait);
+ if (dlc->state == BT_CONNECTED) {
+ rfcomm_reparent_device(dev);
- if (dlc->state == BT_CLOSED)
+ wake_up_interruptible(&dev->port.open_wait);
+ } else if (dlc->state == BT_CLOSED)
tty_port_tty_hangup(&dev->port, false);
}
@@ -703,8 +736,10 @@ static int rfcomm_tty_install(struct tty_driver *driver, struct tty_struct *tty)
* when the last process closes the tty. The behaviour is expected by
* userspace.
*/
- if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags))
+ if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) {
+ set_bit(RFCOMM_TTY_OWNED, &dev->status);
tty_port_put(&dev->port);
+ }
return 0;
}
@@ -750,7 +785,7 @@ static int rfcomm_tty_write(struct tty_struct *tty, const unsigned char *buf, in
struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
struct rfcomm_dlc *dlc = dev->dlc;
struct sk_buff *skb;
- int err = 0, sent = 0, size;
+ int sent = 0, size;
BT_DBG("tty %p count %d", tty, count);
@@ -758,7 +793,6 @@ static int rfcomm_tty_write(struct tty_struct *tty, const unsigned char *buf, in
size = min_t(uint, count, dlc->mtu);
skb = rfcomm_wmalloc(dev, size + RFCOMM_SKB_RESERVE, GFP_ATOMIC);
-
if (!skb)
break;
@@ -766,32 +800,24 @@ static int rfcomm_tty_write(struct tty_struct *tty, const unsigned char *buf, in
memcpy(skb_put(skb, size), buf + sent, size);
- err = rfcomm_dlc_send(dlc, skb);
- if (err < 0) {
- kfree_skb(skb);
- break;
- }
+ rfcomm_dlc_send_noerror(dlc, skb);
sent += size;
count -= size;
}
- return sent ? sent : err;
+ return sent;
}
static int rfcomm_tty_write_room(struct tty_struct *tty)
{
struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
- int room;
+ int room = 0;
- BT_DBG("tty %p", tty);
-
- if (!dev || !dev->dlc)
- return 0;
+ if (dev && dev->dlc)
+ room = rfcomm_room(dev);
- room = rfcomm_room(dev->dlc) - atomic_read(&dev->wmem_alloc);
- if (room < 0)
- room = 0;
+ BT_DBG("tty %p room %d", tty, room);
return room;
}
@@ -1125,7 +1151,7 @@ int __init rfcomm_init_ttys(void)
rfcomm_tty_driver->subtype = SERIAL_TYPE_NORMAL;
rfcomm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
rfcomm_tty_driver->init_termios = tty_std_termios;
- rfcomm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL;
+ rfcomm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL;
rfcomm_tty_driver->init_termios.c_lflag &= ~ICANON;
tty_set_operations(rfcomm_tty_driver, &rfcomm_ops);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 24fa3964b3c..c06dbd3938e 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -676,20 +676,20 @@ static void sco_conn_defer_accept(struct hci_conn *conn, u16 setting)
bacpy(&cp.bdaddr, &conn->dst);
cp.pkt_type = cpu_to_le16(conn->pkt_type);
- cp.tx_bandwidth = __constant_cpu_to_le32(0x00001f40);
- cp.rx_bandwidth = __constant_cpu_to_le32(0x00001f40);
+ cp.tx_bandwidth = cpu_to_le32(0x00001f40);
+ cp.rx_bandwidth = cpu_to_le32(0x00001f40);
cp.content_format = cpu_to_le16(setting);
switch (setting & SCO_AIRMODE_MASK) {
case SCO_AIRMODE_TRANSP:
if (conn->pkt_type & ESCO_2EV3)
- cp.max_latency = __constant_cpu_to_le16(0x0008);
+ cp.max_latency = cpu_to_le16(0x0008);
else
- cp.max_latency = __constant_cpu_to_le16(0x000D);
+ cp.max_latency = cpu_to_le16(0x000D);
cp.retrans_effort = 0x02;
break;
case SCO_AIRMODE_CVSD:
- cp.max_latency = __constant_cpu_to_le16(0xffff);
+ cp.max_latency = cpu_to_le16(0xffff);
cp.retrans_effort = 0xff;
break;
}
@@ -1024,7 +1024,7 @@ static void sco_conn_ready(struct sco_conn *conn)
sk->sk_state = BT_CONNECTED;
/* Wake up parent */
- parent->sk_data_ready(parent, 1);
+ parent->sk_data_ready(parent);
bh_unlock_sock(parent);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 45007362683..e33a982161c 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -35,14 +35,41 @@
#define AUTH_REQ_MASK 0x07
-static inline void swap128(u8 src[16], u8 dst[16])
+#define SMP_FLAG_TK_VALID 1
+#define SMP_FLAG_CFM_PENDING 2
+#define SMP_FLAG_MITM_AUTH 3
+#define SMP_FLAG_COMPLETE 4
+#define SMP_FLAG_INITIATOR 5
+
+struct smp_chan {
+ struct l2cap_conn *conn;
+ u8 preq[7]; /* SMP Pairing Request */
+ u8 prsp[7]; /* SMP Pairing Response */
+ u8 prnd[16]; /* SMP Pairing Random (local) */
+ u8 rrnd[16]; /* SMP Pairing Random (remote) */
+ u8 pcnf[16]; /* SMP Pairing Confirm */
+ u8 tk[16]; /* SMP Temporary Key */
+ u8 enc_key_size;
+ u8 remote_key_dist;
+ bdaddr_t id_addr;
+ u8 id_addr_type;
+ u8 irk[16];
+ struct smp_csrk *csrk;
+ struct smp_csrk *slave_csrk;
+ struct smp_ltk *ltk;
+ struct smp_ltk *slave_ltk;
+ struct smp_irk *remote_irk;
+ unsigned long flags;
+};
+
+static inline void swap128(const u8 src[16], u8 dst[16])
{
int i;
for (i = 0; i < 16; i++)
dst[15 - i] = src[i];
}
-static inline void swap56(u8 src[7], u8 dst[7])
+static inline void swap56(const u8 src[7], u8 dst[7])
{
int i;
for (i = 0; i < 7; i++)
@@ -53,6 +80,7 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r)
{
struct blkcipher_desc desc;
struct scatterlist sg;
+ uint8_t tmp[16], data[16];
int err;
if (tfm == NULL) {
@@ -63,21 +91,89 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r)
desc.tfm = tfm;
desc.flags = 0;
- err = crypto_blkcipher_setkey(tfm, k, 16);
+ /* The most significant octet of key corresponds to k[0] */
+ swap128(k, tmp);
+
+ err = crypto_blkcipher_setkey(tfm, tmp, 16);
if (err) {
BT_ERR("cipher setkey failed: %d", err);
return err;
}
- sg_init_one(&sg, r, 16);
+ /* Most significant octet of plaintextData corresponds to data[0] */
+ swap128(r, data);
+
+ sg_init_one(&sg, data, 16);
err = crypto_blkcipher_encrypt(&desc, &sg, &sg, 16);
if (err)
BT_ERR("Encrypt data error %d", err);
+ /* Most significant octet of encryptedData corresponds to data[0] */
+ swap128(data, r);
+
return err;
}
+static int smp_ah(struct crypto_blkcipher *tfm, u8 irk[16], u8 r[3], u8 res[3])
+{
+ u8 _res[16];
+ int err;
+
+ /* r' = padding || r */
+ memcpy(_res, r, 3);
+ memset(_res + 3, 0, 13);
+
+ err = smp_e(tfm, irk, _res);
+ if (err) {
+ BT_ERR("Encrypt error");
+ return err;
+ }
+
+ /* The output of the random address function ah is:
+ * ah(h, r) = e(k, r') mod 2^24
+ * The output of the security function e is then truncated to 24 bits
+ * by taking the least significant 24 bits of the output of e as the
+ * result of ah.
+ */
+ memcpy(res, _res, 3);
+
+ return 0;
+}
+
+bool smp_irk_matches(struct crypto_blkcipher *tfm, u8 irk[16],
+ bdaddr_t *bdaddr)
+{
+ u8 hash[3];
+ int err;
+
+ BT_DBG("RPA %pMR IRK %*phN", bdaddr, 16, irk);
+
+ err = smp_ah(tfm, irk, &bdaddr->b[3], hash);
+ if (err)
+ return false;
+
+ return !memcmp(bdaddr->b, hash, 3);
+}
+
+int smp_generate_rpa(struct crypto_blkcipher *tfm, u8 irk[16], bdaddr_t *rpa)
+{
+ int err;
+
+ get_random_bytes(&rpa->b[3], 3);
+
+ rpa->b[5] &= 0x3f; /* Clear two most significant bits */
+ rpa->b[5] |= 0x40; /* Set second most significant bit */
+
+ err = smp_ah(tfm, irk, &rpa->b[3], rpa->b);
+ if (err < 0)
+ return err;
+
+ BT_DBG("RPA %pMR", rpa);
+
+ return 0;
+}
+
static int smp_c1(struct crypto_blkcipher *tfm, u8 k[16], u8 r[16],
u8 preq[7], u8 pres[7], u8 _iat, bdaddr_t *ia,
u8 _rat, bdaddr_t *ra, u8 res[16])
@@ -88,16 +184,15 @@ static int smp_c1(struct crypto_blkcipher *tfm, u8 k[16], u8 r[16],
memset(p1, 0, 16);
/* p1 = pres || preq || _rat || _iat */
- swap56(pres, p1);
- swap56(preq, p1 + 7);
- p1[14] = _rat;
- p1[15] = _iat;
-
- memset(p2, 0, 16);
+ p1[0] = _iat;
+ p1[1] = _rat;
+ memcpy(p1 + 2, preq, 7);
+ memcpy(p1 + 9, pres, 7);
/* p2 = padding || ia || ra */
- baswap((bdaddr_t *) (p2 + 4), ia);
- baswap((bdaddr_t *) (p2 + 10), ra);
+ memcpy(p2, ra, 6);
+ memcpy(p2 + 6, ia, 6);
+ memset(p2 + 12, 0, 4);
/* res = r XOR p1 */
u128_xor((u128 *) res, (u128 *) r, (u128 *) p1);
@@ -126,8 +221,8 @@ static int smp_s1(struct crypto_blkcipher *tfm, u8 k[16], u8 r1[16],
int err;
/* Just least significant octets from r1 and r2 are considered */
- memcpy(_r, r1 + 8, 8);
- memcpy(_r + 8, r2 + 8, 8);
+ memcpy(_r, r2, 8);
+ memcpy(_r + 8, r1, 8);
err = smp_e(tfm, k, _r);
if (err)
@@ -154,7 +249,7 @@ static struct sk_buff *smp_build_cmd(struct l2cap_conn *conn, u8 code,
lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
lh->len = cpu_to_le16(sizeof(code) + dlen);
- lh->cid = __constant_cpu_to_le16(L2CAP_CID_SMP);
+ lh->cid = cpu_to_le16(L2CAP_CID_SMP);
memcpy(skb_put(skb, sizeof(code)), &code, sizeof(code));
@@ -203,31 +298,45 @@ static void build_pairing_cmd(struct l2cap_conn *conn,
struct smp_cmd_pairing *req,
struct smp_cmd_pairing *rsp, __u8 authreq)
{
- u8 dist_keys = 0;
+ struct smp_chan *smp = conn->smp_chan;
+ struct hci_conn *hcon = conn->hcon;
+ struct hci_dev *hdev = hcon->hdev;
+ u8 local_dist = 0, remote_dist = 0;
if (test_bit(HCI_PAIRABLE, &conn->hcon->hdev->dev_flags)) {
- dist_keys = SMP_DIST_ENC_KEY;
+ local_dist = SMP_DIST_ENC_KEY | SMP_DIST_SIGN;
+ remote_dist = SMP_DIST_ENC_KEY | SMP_DIST_SIGN;
authreq |= SMP_AUTH_BONDING;
} else {
authreq &= ~SMP_AUTH_BONDING;
}
+ if (test_bit(HCI_RPA_RESOLVING, &hdev->dev_flags))
+ remote_dist |= SMP_DIST_ID_KEY;
+
+ if (test_bit(HCI_PRIVACY, &hdev->dev_flags))
+ local_dist |= SMP_DIST_ID_KEY;
+
if (rsp == NULL) {
req->io_capability = conn->hcon->io_capability;
req->oob_flag = SMP_OOB_NOT_PRESENT;
req->max_key_size = SMP_MAX_ENC_KEY_SIZE;
- req->init_key_dist = 0;
- req->resp_key_dist = dist_keys;
+ req->init_key_dist = local_dist;
+ req->resp_key_dist = remote_dist;
req->auth_req = (authreq & AUTH_REQ_MASK);
+
+ smp->remote_key_dist = remote_dist;
return;
}
rsp->io_capability = conn->hcon->io_capability;
rsp->oob_flag = SMP_OOB_NOT_PRESENT;
rsp->max_key_size = SMP_MAX_ENC_KEY_SIZE;
- rsp->init_key_dist = 0;
- rsp->resp_key_dist = req->resp_key_dist & dist_keys;
+ rsp->init_key_dist = req->init_key_dist & remote_dist;
+ rsp->resp_key_dist = req->resp_key_dist & local_dist;
rsp->auth_req = (authreq & AUTH_REQ_MASK);
+
+ smp->remote_key_dist = rsp->init_key_dist;
}
static u8 check_enc_key_size(struct l2cap_conn *conn, __u8 max_key_size)
@@ -276,6 +385,16 @@ static const u8 gen_method[5][5] = {
{ CFM_PASSKEY, CFM_PASSKEY, REQ_PASSKEY, JUST_WORKS, OVERLAP },
};
+static u8 get_auth_method(struct smp_chan *smp, u8 local_io, u8 remote_io)
+{
+ /* If either side has unknown io_caps, use JUST WORKS */
+ if (local_io > SMP_IO_KEYBOARD_DISPLAY ||
+ remote_io > SMP_IO_KEYBOARD_DISPLAY)
+ return JUST_WORKS;
+
+ return gen_method[remote_io][local_io];
+}
+
static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
u8 local_io, u8 remote_io)
{
@@ -287,33 +406,34 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
/* Initialize key for JUST WORKS */
memset(smp->tk, 0, sizeof(smp->tk));
- clear_bit(SMP_FLAG_TK_VALID, &smp->smp_flags);
+ clear_bit(SMP_FLAG_TK_VALID, &smp->flags);
BT_DBG("tk_request: auth:%d lcl:%d rem:%d", auth, local_io, remote_io);
/* If neither side wants MITM, use JUST WORKS */
- /* If either side has unknown io_caps, use JUST WORKS */
/* Otherwise, look up method from the table */
- if (!(auth & SMP_AUTH_MITM) ||
- local_io > SMP_IO_KEYBOARD_DISPLAY ||
- remote_io > SMP_IO_KEYBOARD_DISPLAY)
+ if (!(auth & SMP_AUTH_MITM))
method = JUST_WORKS;
else
- method = gen_method[remote_io][local_io];
+ method = get_auth_method(smp, local_io, remote_io);
/* If not bonding, don't ask user to confirm a Zero TK */
if (!(auth & SMP_AUTH_BONDING) && method == JUST_CFM)
method = JUST_WORKS;
+ /* Don't confirm locally initiated pairing attempts */
+ if (method == JUST_CFM && test_bit(SMP_FLAG_INITIATOR, &smp->flags))
+ method = JUST_WORKS;
+
/* If Just Works, Continue with Zero TK */
if (method == JUST_WORKS) {
- set_bit(SMP_FLAG_TK_VALID, &smp->smp_flags);
+ set_bit(SMP_FLAG_TK_VALID, &smp->flags);
return 0;
}
/* Not Just Works/Confirm results in MITM Authentication */
if (method != JUST_CFM)
- set_bit(SMP_FLAG_MITM_AUTH, &smp->smp_flags);
+ set_bit(SMP_FLAG_MITM_AUTH, &smp->flags);
/* If both devices have Keyoard-Display I/O, the master
* Confirms and the slave Enters the passkey.
@@ -325,16 +445,14 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
method = REQ_PASSKEY;
}
- /* Generate random passkey. Not valid until confirmed. */
+ /* Generate random passkey. */
if (method == CFM_PASSKEY) {
- u8 key[16];
-
- memset(key, 0, sizeof(key));
+ memset(smp->tk, 0, sizeof(smp->tk));
get_random_bytes(&passkey, sizeof(passkey));
passkey %= 1000000;
- put_unaligned_le32(passkey, key);
- swap128(key, smp->tk);
+ put_unaligned_le32(passkey, smp->tk);
BT_DBG("PassKey: %d", passkey);
+ set_bit(SMP_FLAG_TK_VALID, &smp->flags);
}
hci_dev_lock(hcon->hdev);
@@ -342,141 +460,120 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
if (method == REQ_PASSKEY)
ret = mgmt_user_passkey_request(hcon->hdev, &hcon->dst,
hcon->type, hcon->dst_type);
- else
+ else if (method == JUST_CFM)
ret = mgmt_user_confirm_request(hcon->hdev, &hcon->dst,
hcon->type, hcon->dst_type,
- cpu_to_le32(passkey), 0);
+ passkey, 1);
+ else
+ ret = mgmt_user_passkey_notify(hcon->hdev, &hcon->dst,
+ hcon->type, hcon->dst_type,
+ passkey, 0);
hci_dev_unlock(hcon->hdev);
return ret;
}
-static void confirm_work(struct work_struct *work)
+static u8 smp_confirm(struct smp_chan *smp)
{
- struct smp_chan *smp = container_of(work, struct smp_chan, confirm);
struct l2cap_conn *conn = smp->conn;
- struct crypto_blkcipher *tfm;
+ struct hci_dev *hdev = conn->hcon->hdev;
+ struct crypto_blkcipher *tfm = hdev->tfm_aes;
struct smp_cmd_pairing_confirm cp;
int ret;
- u8 res[16], reason;
BT_DBG("conn %p", conn);
- tfm = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(tfm)) {
- reason = SMP_UNSPECIFIED;
- goto error;
- }
+ /* Prevent mutual access to hdev->tfm_aes */
+ hci_dev_lock(hdev);
- smp->tfm = tfm;
+ ret = smp_c1(tfm, smp->tk, smp->prnd, smp->preq, smp->prsp,
+ conn->hcon->init_addr_type, &conn->hcon->init_addr,
+ conn->hcon->resp_addr_type, &conn->hcon->resp_addr,
+ cp.confirm_val);
- if (conn->hcon->out)
- ret = smp_c1(tfm, smp->tk, smp->prnd, smp->preq, smp->prsp,
- conn->hcon->src_type, &conn->hcon->src,
- conn->hcon->dst_type, &conn->hcon->dst, res);
- else
- ret = smp_c1(tfm, smp->tk, smp->prnd, smp->preq, smp->prsp,
- conn->hcon->dst_type, &conn->hcon->dst,
- conn->hcon->src_type, &conn->hcon->src, res);
- if (ret) {
- reason = SMP_UNSPECIFIED;
- goto error;
- }
+ hci_dev_unlock(hdev);
- clear_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags);
+ if (ret)
+ return SMP_UNSPECIFIED;
- swap128(res, cp.confirm_val);
- smp_send_cmd(smp->conn, SMP_CMD_PAIRING_CONFIRM, sizeof(cp), &cp);
+ clear_bit(SMP_FLAG_CFM_PENDING, &smp->flags);
- return;
+ smp_send_cmd(smp->conn, SMP_CMD_PAIRING_CONFIRM, sizeof(cp), &cp);
-error:
- smp_failure(conn, reason);
+ return 0;
}
-static void random_work(struct work_struct *work)
+static u8 smp_random(struct smp_chan *smp)
{
- struct smp_chan *smp = container_of(work, struct smp_chan, random);
struct l2cap_conn *conn = smp->conn;
struct hci_conn *hcon = conn->hcon;
- struct crypto_blkcipher *tfm = smp->tfm;
- u8 reason, confirm[16], res[16], key[16];
+ struct hci_dev *hdev = hcon->hdev;
+ struct crypto_blkcipher *tfm = hdev->tfm_aes;
+ u8 confirm[16];
int ret;
- if (IS_ERR_OR_NULL(tfm)) {
- reason = SMP_UNSPECIFIED;
- goto error;
- }
+ if (IS_ERR_OR_NULL(tfm))
+ return SMP_UNSPECIFIED;
BT_DBG("conn %p %s", conn, conn->hcon->out ? "master" : "slave");
- if (hcon->out)
- ret = smp_c1(tfm, smp->tk, smp->rrnd, smp->preq, smp->prsp,
- hcon->src_type, &hcon->src,
- hcon->dst_type, &hcon->dst, res);
- else
- ret = smp_c1(tfm, smp->tk, smp->rrnd, smp->preq, smp->prsp,
- hcon->dst_type, &hcon->dst,
- hcon->src_type, &hcon->src, res);
- if (ret) {
- reason = SMP_UNSPECIFIED;
- goto error;
- }
+ /* Prevent mutual access to hdev->tfm_aes */
+ hci_dev_lock(hdev);
+
+ ret = smp_c1(tfm, smp->tk, smp->rrnd, smp->preq, smp->prsp,
+ hcon->init_addr_type, &hcon->init_addr,
+ hcon->resp_addr_type, &hcon->resp_addr, confirm);
+
+ hci_dev_unlock(hdev);
- swap128(res, confirm);
+ if (ret)
+ return SMP_UNSPECIFIED;
if (memcmp(smp->pcnf, confirm, sizeof(smp->pcnf)) != 0) {
BT_ERR("Pairing failed (confirmation values mismatch)");
- reason = SMP_CONFIRM_FAILED;
- goto error;
+ return SMP_CONFIRM_FAILED;
}
if (hcon->out) {
- u8 stk[16], rand[8];
- __le16 ediv;
-
- memset(rand, 0, sizeof(rand));
- ediv = 0;
+ u8 stk[16];
+ __le64 rand = 0;
+ __le16 ediv = 0;
- smp_s1(tfm, smp->tk, smp->rrnd, smp->prnd, key);
- swap128(key, stk);
+ smp_s1(tfm, smp->tk, smp->rrnd, smp->prnd, stk);
memset(stk + smp->enc_key_size, 0,
SMP_MAX_ENC_KEY_SIZE - smp->enc_key_size);
- if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags)) {
- reason = SMP_UNSPECIFIED;
- goto error;
- }
+ if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags))
+ return SMP_UNSPECIFIED;
hci_le_start_enc(hcon, ediv, rand, stk);
hcon->enc_key_size = smp->enc_key_size;
} else {
- u8 stk[16], r[16], rand[8];
- __le16 ediv;
+ u8 stk[16], auth;
+ __le64 rand = 0;
+ __le16 ediv = 0;
- memset(rand, 0, sizeof(rand));
- ediv = 0;
+ smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd),
+ smp->prnd);
- swap128(smp->prnd, r);
- smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(r), r);
-
- smp_s1(tfm, smp->tk, smp->prnd, smp->rrnd, key);
- swap128(key, stk);
+ smp_s1(tfm, smp->tk, smp->prnd, smp->rrnd, stk);
memset(stk + smp->enc_key_size, 0,
SMP_MAX_ENC_KEY_SIZE - smp->enc_key_size);
+ if (hcon->pending_sec_level == BT_SECURITY_HIGH)
+ auth = 1;
+ else
+ auth = 0;
+
hci_add_ltk(hcon->hdev, &hcon->dst, hcon->dst_type,
- HCI_SMP_STK_SLAVE, 0, 0, stk, smp->enc_key_size,
+ HCI_SMP_STK_SLAVE, auth, stk, smp->enc_key_size,
ediv, rand);
}
- return;
-
-error:
- smp_failure(conn, reason);
+ return 0;
}
static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
@@ -487,9 +584,6 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
if (!smp)
return NULL;
- INIT_WORK(&smp->confirm, confirm_work);
- INIT_WORK(&smp->random, random_work);
-
smp->conn = conn;
conn->smp_chan = smp;
conn->hcon->smp_conn = conn;
@@ -502,11 +596,33 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
void smp_chan_destroy(struct l2cap_conn *conn)
{
struct smp_chan *smp = conn->smp_chan;
+ bool complete;
BUG_ON(!smp);
- if (smp->tfm)
- crypto_free_blkcipher(smp->tfm);
+ complete = test_bit(SMP_FLAG_COMPLETE, &smp->flags);
+ mgmt_smp_complete(conn->hcon, complete);
+
+ kfree(smp->csrk);
+ kfree(smp->slave_csrk);
+
+ /* If pairing failed clean up any keys we might have */
+ if (!complete) {
+ if (smp->ltk) {
+ list_del(&smp->ltk->list);
+ kfree(smp->ltk);
+ }
+
+ if (smp->slave_ltk) {
+ list_del(&smp->slave_ltk->list);
+ kfree(smp->slave_ltk);
+ }
+
+ if (smp->remote_irk) {
+ list_del(&smp->remote_irk->list);
+ kfree(smp->remote_irk);
+ }
+ }
kfree(smp);
conn->smp_chan = NULL;
@@ -519,7 +635,6 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey)
struct l2cap_conn *conn = hcon->smp_conn;
struct smp_chan *smp;
u32 value;
- u8 key[16];
BT_DBG("");
@@ -531,13 +646,12 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey)
switch (mgmt_op) {
case MGMT_OP_USER_PASSKEY_REPLY:
value = le32_to_cpu(passkey);
- memset(key, 0, sizeof(key));
+ memset(smp->tk, 0, sizeof(smp->tk));
BT_DBG("PassKey: %d", value);
- put_unaligned_le32(value, key);
- swap128(key, smp->tk);
+ put_unaligned_le32(value, smp->tk);
/* Fall Through */
case MGMT_OP_USER_CONFIRM_REPLY:
- set_bit(SMP_FLAG_TK_VALID, &smp->smp_flags);
+ set_bit(SMP_FLAG_TK_VALID, &smp->flags);
break;
case MGMT_OP_USER_PASSKEY_NEG_REPLY:
case MGMT_OP_USER_CONFIRM_NEG_REPLY:
@@ -549,8 +663,11 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey)
}
/* If it is our turn to send Pairing Confirm, do so now */
- if (test_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags))
- queue_work(hcon->hdev->workqueue, &smp->confirm);
+ if (test_bit(SMP_FLAG_CFM_PENDING, &smp->flags)) {
+ u8 rsp = smp_confirm(smp);
+ if (rsp)
+ smp_failure(conn, rsp);
+ }
return 0;
}
@@ -559,12 +676,14 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
{
struct smp_cmd_pairing rsp, *req = (void *) skb->data;
struct smp_chan *smp;
- u8 key_size;
- u8 auth = SMP_AUTH_NONE;
+ u8 key_size, auth, sec_level;
int ret;
BT_DBG("conn %p", conn);
+ if (skb->len < sizeof(*req))
+ return SMP_INVALID_PARAMS;
+
if (conn->hcon->link_mode & HCI_LM_MASTER)
return SMP_CMD_NOTSUPP;
@@ -581,10 +700,21 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
skb_pull(skb, sizeof(*req));
/* We didn't start the pairing, so match remote */
- if (req->auth_req & SMP_AUTH_BONDING)
- auth = req->auth_req;
+ auth = req->auth_req;
+
+ sec_level = authreq_to_seclevel(auth);
+ if (sec_level > conn->hcon->pending_sec_level)
+ conn->hcon->pending_sec_level = sec_level;
- conn->hcon->pending_sec_level = authreq_to_seclevel(auth);
+ /* If we need MITM check that it can be acheived */
+ if (conn->hcon->pending_sec_level >= BT_SECURITY_HIGH) {
+ u8 method;
+
+ method = get_auth_method(smp, conn->hcon->io_capability,
+ req->io_capability);
+ if (method == JUST_WORKS || method == JUST_CFM)
+ return SMP_AUTH_REQUIREMENTS;
+ }
build_pairing_cmd(conn, req, &rsp, auth);
@@ -604,6 +734,8 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
if (ret)
return SMP_UNSPECIFIED;
+ clear_bit(SMP_FLAG_INITIATOR, &smp->flags);
+
return 0;
}
@@ -611,12 +743,14 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb)
{
struct smp_cmd_pairing *req, *rsp = (void *) skb->data;
struct smp_chan *smp = conn->smp_chan;
- struct hci_dev *hdev = conn->hcon->hdev;
u8 key_size, auth = SMP_AUTH_NONE;
int ret;
BT_DBG("conn %p", conn);
+ if (skb->len < sizeof(*rsp))
+ return SMP_INVALID_PARAMS;
+
if (!(conn->hcon->link_mode & HCI_LM_MASTER))
return SMP_CMD_NOTSUPP;
@@ -628,11 +762,26 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb)
if (check_enc_key_size(conn, key_size))
return SMP_ENC_KEY_SIZE;
+ /* If we need MITM check that it can be acheived */
+ if (conn->hcon->pending_sec_level >= BT_SECURITY_HIGH) {
+ u8 method;
+
+ method = get_auth_method(smp, req->io_capability,
+ rsp->io_capability);
+ if (method == JUST_WORKS || method == JUST_CFM)
+ return SMP_AUTH_REQUIREMENTS;
+ }
+
get_random_bytes(smp->prnd, sizeof(smp->prnd));
smp->prsp[0] = SMP_CMD_PAIRING_RSP;
memcpy(&smp->prsp[1], rsp, sizeof(*rsp));
+ /* Update remote key distribution in case the remote cleared
+ * some bits that we had enabled in our request.
+ */
+ smp->remote_key_dist &= rsp->resp_key_dist;
+
if ((req->auth_req & SMP_AUTH_BONDING) &&
(rsp->auth_req & SMP_AUTH_BONDING))
auth = SMP_AUTH_BONDING;
@@ -643,13 +792,11 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb)
if (ret)
return SMP_UNSPECIFIED;
- set_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags);
+ set_bit(SMP_FLAG_CFM_PENDING, &smp->flags);
/* Can't compose response until we have been confirmed */
- if (!test_bit(SMP_FLAG_TK_VALID, &smp->smp_flags))
- return 0;
-
- queue_work(hdev->workqueue, &smp->confirm);
+ if (test_bit(SMP_FLAG_TK_VALID, &smp->flags))
+ return smp_confirm(smp);
return 0;
}
@@ -657,24 +804,22 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb)
static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb)
{
struct smp_chan *smp = conn->smp_chan;
- struct hci_dev *hdev = conn->hcon->hdev;
BT_DBG("conn %p %s", conn, conn->hcon->out ? "master" : "slave");
+ if (skb->len < sizeof(smp->pcnf))
+ return SMP_INVALID_PARAMS;
+
memcpy(smp->pcnf, skb->data, sizeof(smp->pcnf));
skb_pull(skb, sizeof(smp->pcnf));
- if (conn->hcon->out) {
- u8 random[16];
-
- swap128(smp->prnd, random);
- smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(random),
- random);
- } else if (test_bit(SMP_FLAG_TK_VALID, &smp->smp_flags)) {
- queue_work(hdev->workqueue, &smp->confirm);
- } else {
- set_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags);
- }
+ if (conn->hcon->out)
+ smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd),
+ smp->prnd);
+ else if (test_bit(SMP_FLAG_TK_VALID, &smp->flags))
+ return smp_confirm(smp);
+ else
+ set_bit(SMP_FLAG_CFM_PENDING, &smp->flags);
return 0;
}
@@ -682,16 +827,16 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb)
static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb)
{
struct smp_chan *smp = conn->smp_chan;
- struct hci_dev *hdev = conn->hcon->hdev;
BT_DBG("conn %p", conn);
- swap128(skb->data, smp->rrnd);
- skb_pull(skb, sizeof(smp->rrnd));
+ if (skb->len < sizeof(smp->rrnd))
+ return SMP_INVALID_PARAMS;
- queue_work(hdev->workqueue, &smp->random);
+ memcpy(smp->rrnd, skb->data, sizeof(smp->rrnd));
+ skb_pull(skb, sizeof(smp->rrnd));
- return 0;
+ return smp_random(smp);
}
static u8 smp_ltk_encrypt(struct l2cap_conn *conn, u8 sec_level)
@@ -699,7 +844,8 @@ static u8 smp_ltk_encrypt(struct l2cap_conn *conn, u8 sec_level)
struct smp_ltk *key;
struct hci_conn *hcon = conn->hcon;
- key = hci_find_ltk_by_addr(hcon->hdev, &hcon->dst, hcon->dst_type);
+ key = hci_find_ltk_by_addr(hcon->hdev, &hcon->dst, hcon->dst_type,
+ hcon->out);
if (!key)
return 0;
@@ -721,13 +867,19 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb)
struct smp_cmd_pairing cp;
struct hci_conn *hcon = conn->hcon;
struct smp_chan *smp;
+ u8 sec_level;
BT_DBG("conn %p", conn);
+ if (skb->len < sizeof(*rp))
+ return SMP_INVALID_PARAMS;
+
if (!(conn->hcon->link_mode & HCI_LM_MASTER))
return SMP_CMD_NOTSUPP;
- hcon->pending_sec_level = authreq_to_seclevel(rp->auth_req);
+ sec_level = authreq_to_seclevel(rp->auth_req);
+ if (sec_level > hcon->pending_sec_level)
+ hcon->pending_sec_level = sec_level;
if (smp_ltk_encrypt(conn, hcon->pending_sec_level))
return 0;
@@ -747,6 +899,8 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb)
smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp);
+ clear_bit(SMP_FLAG_INITIATOR, &smp->flags);
+
return 0;
}
@@ -764,20 +918,27 @@ bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level)
int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
{
struct l2cap_conn *conn = hcon->l2cap_data;
- struct smp_chan *smp = conn->smp_chan;
+ struct smp_chan *smp;
__u8 authreq;
BT_DBG("conn %p hcon %p level 0x%2.2x", conn, hcon, sec_level);
+ /* This may be NULL if there's an unexpected disconnection */
+ if (!conn)
+ return 1;
+
if (!test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags))
return 1;
if (smp_sufficient_security(hcon, sec_level))
return 1;
+ if (sec_level > hcon->pending_sec_level)
+ hcon->pending_sec_level = sec_level;
+
if (hcon->link_mode & HCI_LM_MASTER)
- if (smp_ltk_encrypt(conn, sec_level))
- goto done;
+ if (smp_ltk_encrypt(conn, hcon->pending_sec_level))
+ return 0;
if (test_and_set_bit(HCI_CONN_LE_SMP_PEND, &hcon->flags))
return 0;
@@ -788,6 +949,13 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
authreq = seclevel_to_authreq(sec_level);
+ /* Require MITM if IO Capability allows or the security level
+ * requires it.
+ */
+ if (hcon->io_capability != HCI_IO_NO_INPUT_OUTPUT ||
+ hcon->pending_sec_level > BT_SECURITY_MEDIUM)
+ authreq |= SMP_AUTH_MITM;
+
if (hcon->link_mode & HCI_LM_MASTER) {
struct smp_cmd_pairing cp;
@@ -802,8 +970,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
smp_send_cmd(conn, SMP_CMD_SECURITY_REQ, sizeof(cp), &cp);
}
-done:
- hcon->pending_sec_level = sec_level;
+ set_bit(SMP_FLAG_INITIATOR, &smp->flags);
return 0;
}
@@ -813,6 +980,15 @@ static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb)
struct smp_cmd_encrypt_info *rp = (void *) skb->data;
struct smp_chan *smp = conn->smp_chan;
+ BT_DBG("conn %p", conn);
+
+ if (skb->len < sizeof(*rp))
+ return SMP_INVALID_PARAMS;
+
+ /* Ignore this PDU if it wasn't requested */
+ if (!(smp->remote_key_dist & SMP_DIST_ENC_KEY))
+ return 0;
+
skb_pull(skb, sizeof(*rp));
memcpy(smp->tk, rp->ltk, sizeof(smp->tk));
@@ -826,16 +1002,138 @@ static int smp_cmd_master_ident(struct l2cap_conn *conn, struct sk_buff *skb)
struct smp_chan *smp = conn->smp_chan;
struct hci_dev *hdev = conn->hcon->hdev;
struct hci_conn *hcon = conn->hcon;
+ struct smp_ltk *ltk;
u8 authenticated;
+ BT_DBG("conn %p", conn);
+
+ if (skb->len < sizeof(*rp))
+ return SMP_INVALID_PARAMS;
+
+ /* Ignore this PDU if it wasn't requested */
+ if (!(smp->remote_key_dist & SMP_DIST_ENC_KEY))
+ return 0;
+
+ /* Mark the information as received */
+ smp->remote_key_dist &= ~SMP_DIST_ENC_KEY;
+
skb_pull(skb, sizeof(*rp));
hci_dev_lock(hdev);
authenticated = (hcon->sec_level == BT_SECURITY_HIGH);
- hci_add_ltk(hdev, &hcon->dst, hcon->dst_type, HCI_SMP_LTK, 1,
- authenticated, smp->tk, smp->enc_key_size,
- rp->ediv, rp->rand);
- smp_distribute_keys(conn, 1);
+ ltk = hci_add_ltk(hdev, &hcon->dst, hcon->dst_type, HCI_SMP_LTK,
+ authenticated, smp->tk, smp->enc_key_size,
+ rp->ediv, rp->rand);
+ smp->ltk = ltk;
+ if (!(smp->remote_key_dist & SMP_DIST_ID_KEY))
+ smp_distribute_keys(conn);
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+static int smp_cmd_ident_info(struct l2cap_conn *conn, struct sk_buff *skb)
+{
+ struct smp_cmd_ident_info *info = (void *) skb->data;
+ struct smp_chan *smp = conn->smp_chan;
+
+ BT_DBG("");
+
+ if (skb->len < sizeof(*info))
+ return SMP_INVALID_PARAMS;
+
+ /* Ignore this PDU if it wasn't requested */
+ if (!(smp->remote_key_dist & SMP_DIST_ID_KEY))
+ return 0;
+
+ skb_pull(skb, sizeof(*info));
+
+ memcpy(smp->irk, info->irk, 16);
+
+ return 0;
+}
+
+static int smp_cmd_ident_addr_info(struct l2cap_conn *conn,
+ struct sk_buff *skb)
+{
+ struct smp_cmd_ident_addr_info *info = (void *) skb->data;
+ struct smp_chan *smp = conn->smp_chan;
+ struct hci_conn *hcon = conn->hcon;
+ bdaddr_t rpa;
+
+ BT_DBG("");
+
+ if (skb->len < sizeof(*info))
+ return SMP_INVALID_PARAMS;
+
+ /* Ignore this PDU if it wasn't requested */
+ if (!(smp->remote_key_dist & SMP_DIST_ID_KEY))
+ return 0;
+
+ /* Mark the information as received */
+ smp->remote_key_dist &= ~SMP_DIST_ID_KEY;
+
+ skb_pull(skb, sizeof(*info));
+
+ /* Strictly speaking the Core Specification (4.1) allows sending
+ * an empty address which would force us to rely on just the IRK
+ * as "identity information". However, since such
+ * implementations are not known of and in order to not over
+ * complicate our implementation, simply pretend that we never
+ * received an IRK for such a device.
+ */
+ if (!bacmp(&info->bdaddr, BDADDR_ANY)) {
+ BT_ERR("Ignoring IRK with no identity address");
+ smp_distribute_keys(conn);
+ return 0;
+ }
+
+ bacpy(&smp->id_addr, &info->bdaddr);
+ smp->id_addr_type = info->addr_type;
+
+ if (hci_bdaddr_is_rpa(&hcon->dst, hcon->dst_type))
+ bacpy(&rpa, &hcon->dst);
+ else
+ bacpy(&rpa, BDADDR_ANY);
+
+ smp->remote_irk = hci_add_irk(conn->hcon->hdev, &smp->id_addr,
+ smp->id_addr_type, smp->irk, &rpa);
+
+ smp_distribute_keys(conn);
+
+ return 0;
+}
+
+static int smp_cmd_sign_info(struct l2cap_conn *conn, struct sk_buff *skb)
+{
+ struct smp_cmd_sign_info *rp = (void *) skb->data;
+ struct smp_chan *smp = conn->smp_chan;
+ struct hci_dev *hdev = conn->hcon->hdev;
+ struct smp_csrk *csrk;
+
+ BT_DBG("conn %p", conn);
+
+ if (skb->len < sizeof(*rp))
+ return SMP_INVALID_PARAMS;
+
+ /* Ignore this PDU if it wasn't requested */
+ if (!(smp->remote_key_dist & SMP_DIST_SIGN))
+ return 0;
+
+ /* Mark the information as received */
+ smp->remote_key_dist &= ~SMP_DIST_SIGN;
+
+ skb_pull(skb, sizeof(*rp));
+
+ hci_dev_lock(hdev);
+ csrk = kzalloc(sizeof(*csrk), GFP_KERNEL);
+ if (csrk) {
+ csrk->master = 0x01;
+ memcpy(csrk->val, rp->csrk, sizeof(csrk->val));
+ }
+ smp->csrk = csrk;
+ if (!(smp->remote_key_dist & SMP_DIST_SIGN))
+ smp_distribute_keys(conn);
hci_dev_unlock(hdev);
return 0;
@@ -915,10 +1213,15 @@ int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb)
break;
case SMP_CMD_IDENT_INFO:
+ reason = smp_cmd_ident_info(conn, skb);
+ break;
+
case SMP_CMD_IDENT_ADDR_INFO:
+ reason = smp_cmd_ident_addr_info(conn, skb);
+ break;
+
case SMP_CMD_SIGN_INFO:
- /* Just ignored */
- reason = 0;
+ reason = smp_cmd_sign_info(conn, skb);
break;
default:
@@ -937,26 +1240,78 @@ done:
return err;
}
-int smp_distribute_keys(struct l2cap_conn *conn, __u8 force)
+static void smp_notify_keys(struct l2cap_conn *conn)
+{
+ struct smp_chan *smp = conn->smp_chan;
+ struct hci_conn *hcon = conn->hcon;
+ struct hci_dev *hdev = hcon->hdev;
+ struct smp_cmd_pairing *req = (void *) &smp->preq[1];
+ struct smp_cmd_pairing *rsp = (void *) &smp->prsp[1];
+ bool persistent;
+
+ if (smp->remote_irk) {
+ mgmt_new_irk(hdev, smp->remote_irk);
+ /* Now that user space can be considered to know the
+ * identity address track the connection based on it
+ * from now on.
+ */
+ bacpy(&hcon->dst, &smp->remote_irk->bdaddr);
+ hcon->dst_type = smp->remote_irk->addr_type;
+ l2cap_conn_update_id_addr(hcon);
+ }
+
+ /* The LTKs and CSRKs should be persistent only if both sides
+ * had the bonding bit set in their authentication requests.
+ */
+ persistent = !!((req->auth_req & rsp->auth_req) & SMP_AUTH_BONDING);
+
+ if (smp->csrk) {
+ smp->csrk->bdaddr_type = hcon->dst_type;
+ bacpy(&smp->csrk->bdaddr, &hcon->dst);
+ mgmt_new_csrk(hdev, smp->csrk, persistent);
+ }
+
+ if (smp->slave_csrk) {
+ smp->slave_csrk->bdaddr_type = hcon->dst_type;
+ bacpy(&smp->slave_csrk->bdaddr, &hcon->dst);
+ mgmt_new_csrk(hdev, smp->slave_csrk, persistent);
+ }
+
+ if (smp->ltk) {
+ smp->ltk->bdaddr_type = hcon->dst_type;
+ bacpy(&smp->ltk->bdaddr, &hcon->dst);
+ mgmt_new_ltk(hdev, smp->ltk, persistent);
+ }
+
+ if (smp->slave_ltk) {
+ smp->slave_ltk->bdaddr_type = hcon->dst_type;
+ bacpy(&smp->slave_ltk->bdaddr, &hcon->dst);
+ mgmt_new_ltk(hdev, smp->slave_ltk, persistent);
+ }
+}
+
+int smp_distribute_keys(struct l2cap_conn *conn)
{
struct smp_cmd_pairing *req, *rsp;
struct smp_chan *smp = conn->smp_chan;
+ struct hci_conn *hcon = conn->hcon;
+ struct hci_dev *hdev = hcon->hdev;
__u8 *keydist;
- BT_DBG("conn %p force %d", conn, force);
+ BT_DBG("conn %p", conn);
- if (!test_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags))
+ if (!test_bit(HCI_CONN_LE_SMP_PEND, &hcon->flags))
return 0;
rsp = (void *) &smp->prsp[1];
/* The responder sends its keys first */
- if (!force && conn->hcon->out && (rsp->resp_key_dist & 0x07))
+ if (hcon->out && (smp->remote_key_dist & 0x07))
return 0;
req = (void *) &smp->preq[1];
- if (conn->hcon->out) {
+ if (hcon->out) {
keydist = &rsp->init_key_dist;
*keydist &= req->init_key_dist;
} else {
@@ -964,28 +1319,30 @@ int smp_distribute_keys(struct l2cap_conn *conn, __u8 force)
*keydist &= req->resp_key_dist;
}
-
BT_DBG("keydist 0x%x", *keydist);
if (*keydist & SMP_DIST_ENC_KEY) {
struct smp_cmd_encrypt_info enc;
struct smp_cmd_master_ident ident;
- struct hci_conn *hcon = conn->hcon;
+ struct smp_ltk *ltk;
u8 authenticated;
__le16 ediv;
+ __le64 rand;
get_random_bytes(enc.ltk, sizeof(enc.ltk));
get_random_bytes(&ediv, sizeof(ediv));
- get_random_bytes(ident.rand, sizeof(ident.rand));
+ get_random_bytes(&rand, sizeof(rand));
smp_send_cmd(conn, SMP_CMD_ENCRYPT_INFO, sizeof(enc), &enc);
authenticated = hcon->sec_level == BT_SECURITY_HIGH;
- hci_add_ltk(hcon->hdev, &hcon->dst, hcon->dst_type,
- HCI_SMP_LTK_SLAVE, 1, authenticated,
- enc.ltk, smp->enc_key_size, ediv, ident.rand);
+ ltk = hci_add_ltk(hdev, &hcon->dst, hcon->dst_type,
+ HCI_SMP_LTK_SLAVE, authenticated, enc.ltk,
+ smp->enc_key_size, ediv, rand);
+ smp->slave_ltk = ltk;
ident.ediv = ediv;
+ ident.rand = rand;
smp_send_cmd(conn, SMP_CMD_MASTER_IDENT, sizeof(ident), &ident);
@@ -996,14 +1353,18 @@ int smp_distribute_keys(struct l2cap_conn *conn, __u8 force)
struct smp_cmd_ident_addr_info addrinfo;
struct smp_cmd_ident_info idinfo;
- /* Send a dummy key */
- get_random_bytes(idinfo.irk, sizeof(idinfo.irk));
+ memcpy(idinfo.irk, hdev->irk, sizeof(idinfo.irk));
smp_send_cmd(conn, SMP_CMD_IDENT_INFO, sizeof(idinfo), &idinfo);
- /* Just public address */
- memset(&addrinfo, 0, sizeof(addrinfo));
- bacpy(&addrinfo.bdaddr, &conn->hcon->src);
+ /* The hci_conn contains the local identity address
+ * after the connection has been established.
+ *
+ * This is true even when the connection has been
+ * established using a resolvable random address.
+ */
+ bacpy(&addrinfo.bdaddr, &hcon->src);
+ addrinfo.addr_type = hcon->src_type;
smp_send_cmd(conn, SMP_CMD_IDENT_ADDR_INFO, sizeof(addrinfo),
&addrinfo);
@@ -1013,20 +1374,33 @@ int smp_distribute_keys(struct l2cap_conn *conn, __u8 force)
if (*keydist & SMP_DIST_SIGN) {
struct smp_cmd_sign_info sign;
+ struct smp_csrk *csrk;
- /* Send a dummy key */
+ /* Generate a new random key */
get_random_bytes(sign.csrk, sizeof(sign.csrk));
+ csrk = kzalloc(sizeof(*csrk), GFP_KERNEL);
+ if (csrk) {
+ csrk->master = 0x00;
+ memcpy(csrk->val, sign.csrk, sizeof(csrk->val));
+ }
+ smp->slave_csrk = csrk;
+
smp_send_cmd(conn, SMP_CMD_SIGN_INFO, sizeof(sign), &sign);
*keydist &= ~SMP_DIST_SIGN;
}
- if (conn->hcon->out || force) {
- clear_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags);
- cancel_delayed_work_sync(&conn->security_timer);
- smp_chan_destroy(conn);
- }
+ /* If there are still keys to be received wait for them */
+ if ((smp->remote_key_dist & 0x07))
+ return 0;
+
+ clear_bit(HCI_CONN_LE_SMP_PEND, &hcon->flags);
+ cancel_delayed_work_sync(&conn->security_timer);
+ set_bit(SMP_FLAG_COMPLETE, &smp->flags);
+ smp_notify_keys(conn);
+
+ smp_chan_destroy(conn);
return 0;
}
diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h
index a700bcb490d..5a8dc36460a 100644
--- a/net/bluetooth/smp.h
+++ b/net/bluetooth/smp.h
@@ -78,7 +78,7 @@ struct smp_cmd_encrypt_info {
#define SMP_CMD_MASTER_IDENT 0x07
struct smp_cmd_master_ident {
__le16 ediv;
- __u8 rand[8];
+ __le64 rand;
} __packed;
#define SMP_CMD_IDENT_INFO 0x08
@@ -111,37 +111,22 @@ struct smp_cmd_security_req {
#define SMP_CMD_NOTSUPP 0x07
#define SMP_UNSPECIFIED 0x08
#define SMP_REPEATED_ATTEMPTS 0x09
+#define SMP_INVALID_PARAMS 0x0a
#define SMP_MIN_ENC_KEY_SIZE 7
#define SMP_MAX_ENC_KEY_SIZE 16
-#define SMP_FLAG_TK_VALID 1
-#define SMP_FLAG_CFM_PENDING 2
-#define SMP_FLAG_MITM_AUTH 3
-
-struct smp_chan {
- struct l2cap_conn *conn;
- u8 preq[7]; /* SMP Pairing Request */
- u8 prsp[7]; /* SMP Pairing Response */
- u8 prnd[16]; /* SMP Pairing Random (local) */
- u8 rrnd[16]; /* SMP Pairing Random (remote) */
- u8 pcnf[16]; /* SMP Pairing Confirm */
- u8 tk[16]; /* SMP Temporary Key */
- u8 enc_key_size;
- unsigned long smp_flags;
- struct crypto_blkcipher *tfm;
- struct work_struct confirm;
- struct work_struct random;
-
-};
-
/* SMP Commands */
bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level);
int smp_conn_security(struct hci_conn *hcon, __u8 sec_level);
int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb);
-int smp_distribute_keys(struct l2cap_conn *conn, __u8 force);
+int smp_distribute_keys(struct l2cap_conn *conn);
int smp_user_confirm_reply(struct hci_conn *conn, u16 mgmt_op, __le32 passkey);
void smp_chan_destroy(struct l2cap_conn *conn);
+bool smp_irk_matches(struct crypto_blkcipher *tfm, u8 irk[16],
+ bdaddr_t *bdaddr);
+int smp_generate_rpa(struct crypto_blkcipher *tfm, u8 irk[16], bdaddr_t *rpa);
+
#endif /* __SMP_H */
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index e85498b2f16..8590b942bff 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -5,7 +5,7 @@
obj-$(CONFIG_BRIDGE) += bridge.o
bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \
- br_ioctl.o br_notify.o br_stp.o br_stp_bpdu.o \
+ br_ioctl.o br_stp.o br_stp_bpdu.o \
br_stp_if.o br_stp_timer.o br_netlink.o
bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
@@ -16,4 +16,4 @@ bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o
-obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/
+obj-$(CONFIG_NETFILTER) += netfilter/
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 19311aafcf5..1a755a1e541 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -22,6 +22,104 @@
#include "br_private.h"
+/*
+ * Handle changes in state of network devices enslaved to a bridge.
+ *
+ * Note: don't care about up/down if bridge itself is down, because
+ * port state is checked when bridge is brought up.
+ */
+static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net_bridge_port *p;
+ struct net_bridge *br;
+ bool changed_addr;
+ int err;
+
+ /* register of bridge completed, add sysfs entries */
+ if ((dev->priv_flags & IFF_EBRIDGE) && event == NETDEV_REGISTER) {
+ br_sysfs_addbr(dev);
+ return NOTIFY_DONE;
+ }
+
+ /* not a port of a bridge */
+ p = br_port_get_rtnl(dev);
+ if (!p)
+ return NOTIFY_DONE;
+
+ br = p->br;
+
+ switch (event) {
+ case NETDEV_CHANGEMTU:
+ dev_set_mtu(br->dev, br_min_mtu(br));
+ break;
+
+ case NETDEV_CHANGEADDR:
+ spin_lock_bh(&br->lock);
+ br_fdb_changeaddr(p, dev->dev_addr);
+ changed_addr = br_stp_recalculate_bridge_id(br);
+ spin_unlock_bh(&br->lock);
+
+ if (changed_addr)
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
+
+ break;
+
+ case NETDEV_CHANGE:
+ br_port_carrier_check(p);
+ break;
+
+ case NETDEV_FEAT_CHANGE:
+ netdev_update_features(br->dev);
+ break;
+
+ case NETDEV_DOWN:
+ spin_lock_bh(&br->lock);
+ if (br->dev->flags & IFF_UP)
+ br_stp_disable_port(p);
+ spin_unlock_bh(&br->lock);
+ break;
+
+ case NETDEV_UP:
+ if (netif_running(br->dev) && netif_oper_up(dev)) {
+ spin_lock_bh(&br->lock);
+ br_stp_enable_port(p);
+ spin_unlock_bh(&br->lock);
+ }
+ break;
+
+ case NETDEV_UNREGISTER:
+ br_del_if(br, dev);
+ break;
+
+ case NETDEV_CHANGENAME:
+ err = br_sysfs_renameif(p);
+ if (err)
+ return notifier_from_errno(err);
+ break;
+
+ case NETDEV_PRE_TYPE_CHANGE:
+ /* Forbid underlaying device to change its type. */
+ return NOTIFY_BAD;
+
+ case NETDEV_RESEND_IGMP:
+ /* Propagate to master device */
+ call_netdevice_notifiers(event, br->dev);
+ break;
+ }
+
+ /* Events that may cause spanning tree to refresh */
+ if (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
+ event == NETDEV_CHANGE || event == NETDEV_DOWN)
+ br_ifinfo_notify(RTM_NEWLINK, p);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block br_device_notifier = {
+ .notifier_call = br_device_event
+};
+
static void __net_exit br_net_exit(struct net *net)
{
struct net_device *dev;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 8fe8b71b487..568cccd39a3 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -88,18 +88,11 @@ out:
static int br_dev_init(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
- int i;
- br->stats = alloc_percpu(struct pcpu_sw_netstats);
+ br->stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!br->stats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *br_dev_stats;
- br_dev_stats = per_cpu_ptr(br->stats, i);
- u64_stats_init(&br_dev_stats->syncp);
- }
-
return 0;
}
@@ -119,6 +112,12 @@ static void br_dev_set_multicast_list(struct net_device *dev)
{
}
+static void br_dev_change_rx_flags(struct net_device *dev, int change)
+{
+ if (change & IFF_PROMISC)
+ br_manage_promisc(netdev_priv(dev));
+}
+
static int br_dev_stop(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
@@ -143,9 +142,9 @@ static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev,
const struct pcpu_sw_netstats *bstats
= per_cpu_ptr(br->stats, cpu);
do {
- start = u64_stats_fetch_begin_bh(&bstats->syncp);
+ start = u64_stats_fetch_begin_irq(&bstats->syncp);
memcpy(&tmp, bstats, sizeof(tmp));
- } while (u64_stats_fetch_retry_bh(&bstats->syncp, start));
+ } while (u64_stats_fetch_retry_irq(&bstats->syncp, start));
sum.tx_bytes += tmp.tx_bytes;
sum.tx_packets += tmp.tx_packets;
sum.rx_bytes += tmp.rx_bytes;
@@ -225,16 +224,16 @@ static void br_netpoll_cleanup(struct net_device *dev)
br_netpoll_disable(p);
}
-static int __br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
+static int __br_netpoll_enable(struct net_bridge_port *p)
{
struct netpoll *np;
int err;
- np = kzalloc(sizeof(*p->np), gfp);
+ np = kzalloc(sizeof(*p->np), GFP_KERNEL);
if (!np)
return -ENOMEM;
- err = __netpoll_setup(np, p->dev, gfp);
+ err = __netpoll_setup(np, p->dev);
if (err) {
kfree(np);
return err;
@@ -244,16 +243,15 @@ static int __br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
return err;
}
-int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
+int br_netpoll_enable(struct net_bridge_port *p)
{
if (!p->br->dev->npinfo)
return 0;
- return __br_netpoll_enable(p, gfp);
+ return __br_netpoll_enable(p);
}
-static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni,
- gfp_t gfp)
+static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)
{
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_port *p;
@@ -262,7 +260,7 @@ static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni,
list_for_each_entry(p, &br->port_list, list) {
if (!p->dev)
continue;
- err = __br_netpoll_enable(p, gfp);
+ err = __br_netpoll_enable(p);
if (err)
goto fail;
}
@@ -317,6 +315,7 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_get_stats64 = br_get_stats64,
.ndo_set_mac_address = br_set_mac_address,
.ndo_set_rx_mode = br_dev_set_multicast_list,
+ .ndo_change_rx_flags = br_dev_change_rx_flags,
.ndo_change_mtu = br_change_mtu,
.ndo_do_ioctl = br_dev_ioctl,
#ifdef CONFIG_NET_POLL_CONTROLLER
@@ -356,14 +355,15 @@ void br_dev_setup(struct net_device *dev)
dev->netdev_ops = &br_netdev_ops;
dev->destructor = br_dev_free;
- SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
+ dev->ethtool_ops = &br_ethtool_ops;
SET_NETDEV_DEVTYPE(dev, &br_type);
dev->tx_queue_len = 0;
dev->priv_flags = IFF_EBRIDGE;
dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL |
- NETIF_F_HW_VLAN_CTAG_TX;
- dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX;
+ NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
+ dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_STAG_TX;
dev->vlan_features = COMMON_FEATURES;
br->dev = dev;
@@ -374,10 +374,11 @@ void br_dev_setup(struct net_device *dev)
br->bridge_id.prio[0] = 0x80;
br->bridge_id.prio[1] = 0x00;
- memcpy(br->group_addr, eth_reserved_addr_base, ETH_ALEN);
+ ether_addr_copy(br->group_addr, eth_reserved_addr_base);
br->stp_enabled = BR_NO_STP;
br->group_fwd_mask = BR_GROUPFWD_DEFAULT;
+ br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT;
br->designated_root = br->bridge_id;
br->bridge_max_age = br->max_age = 20 * HZ;
@@ -388,4 +389,5 @@ void br_dev_setup(struct net_device *dev)
br_netfilter_rtable_init(br);
br_stp_timer_init(br);
br_multicast_init(br);
+ br_vlan_init(br);
}
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 9203d5a1943..b524c36c127 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -85,8 +85,58 @@ static void fdb_rcu_free(struct rcu_head *head)
kmem_cache_free(br_fdb_cache, ent);
}
+/* When a static FDB entry is added, the mac address from the entry is
+ * added to the bridge private HW address list and all required ports
+ * are then updated with the new information.
+ * Called under RTNL.
+ */
+static void fdb_add_hw(struct net_bridge *br, const unsigned char *addr)
+{
+ int err;
+ struct net_bridge_port *p, *tmp;
+
+ ASSERT_RTNL();
+
+ list_for_each_entry(p, &br->port_list, list) {
+ if (!br_promisc_port(p)) {
+ err = dev_uc_add(p->dev, addr);
+ if (err)
+ goto undo;
+ }
+ }
+
+ return;
+undo:
+ list_for_each_entry(tmp, &br->port_list, list) {
+ if (tmp == p)
+ break;
+ if (!br_promisc_port(tmp))
+ dev_uc_del(tmp->dev, addr);
+ }
+}
+
+/* When a static FDB entry is deleted, the HW address from that entry is
+ * also removed from the bridge private HW address list and updates all
+ * the ports with needed information.
+ * Called under RTNL.
+ */
+static void fdb_del_hw(struct net_bridge *br, const unsigned char *addr)
+{
+ struct net_bridge_port *p;
+
+ ASSERT_RTNL();
+
+ list_for_each_entry(p, &br->port_list, list) {
+ if (!br_promisc_port(p))
+ dev_uc_del(p->dev, addr);
+ }
+}
+
static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
{
+ if (f->is_static)
+ fdb_del_hw(br, f->addr.addr);
+
hlist_del_rcu(&f->hlist);
fdb_notify(br, f, RTM_DELNEIGH);
call_rcu(&f->rcu, fdb_rcu_free);
@@ -466,6 +516,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
return -ENOMEM;
fdb->is_local = fdb->is_static = 1;
+ fdb_add_hw(br, addr);
fdb_notify(br, fdb, RTM_NEWNEIGH);
return 0;
}
@@ -487,6 +538,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
{
struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
struct net_bridge_fdb_entry *fdb;
+ bool fdb_modified = false;
/* some users want to always flood. */
if (hold_time(br) == 0)
@@ -507,10 +559,15 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
source->dev->name);
} else {
/* fastpath: update of existing entry */
- fdb->dst = source;
+ if (unlikely(source != fdb->dst)) {
+ fdb->dst = source;
+ fdb_modified = true;
+ }
fdb->updated = jiffies;
if (unlikely(added_by_user))
fdb->added_by_user = 1;
+ if (unlikely(fdb_modified))
+ fdb_notify(br, fdb, RTM_NEWNEIGH);
}
} else {
spin_lock(&br->hash_lock);
@@ -565,6 +622,8 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr))
goto nla_put_failure;
+ if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex))
+ goto nla_put_failure;
ci.ndm_used = jiffies_to_clock_t(now - fdb->used);
ci.ndm_confirmed = 0;
ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated);
@@ -586,6 +645,7 @@ static inline size_t fdb_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct ndmsg))
+ nla_total_size(ETH_ALEN) /* NDA_LLADDR */
+ + nla_total_size(sizeof(u32)) /* NDA_MASTER */
+ nla_total_size(sizeof(u16)) /* NDA_VLAN */
+ nla_total_size(sizeof(struct nda_cacheinfo));
}
@@ -678,13 +738,25 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
}
if (fdb_to_nud(fdb) != state) {
- if (state & NUD_PERMANENT)
- fdb->is_local = fdb->is_static = 1;
- else if (state & NUD_NOARP) {
+ if (state & NUD_PERMANENT) {
+ fdb->is_local = 1;
+ if (!fdb->is_static) {
+ fdb->is_static = 1;
+ fdb_add_hw(br, addr);
+ }
+ } else if (state & NUD_NOARP) {
+ fdb->is_local = 0;
+ if (!fdb->is_static) {
+ fdb->is_static = 1;
+ fdb_add_hw(br, addr);
+ }
+ } else {
fdb->is_local = 0;
- fdb->is_static = 1;
- } else
- fdb->is_local = fdb->is_static = 0;
+ if (fdb->is_static) {
+ fdb->is_static = 0;
+ fdb_del_hw(br, addr);
+ }
+ }
modified = true;
}
@@ -874,3 +946,59 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
out:
return err;
}
+
+int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p)
+{
+ struct net_bridge_fdb_entry *fdb, *tmp;
+ int i;
+ int err;
+
+ ASSERT_RTNL();
+
+ for (i = 0; i < BR_HASH_SIZE; i++) {
+ hlist_for_each_entry(fdb, &br->hash[i], hlist) {
+ /* We only care for static entries */
+ if (!fdb->is_static)
+ continue;
+
+ err = dev_uc_add(p->dev, fdb->addr.addr);
+ if (err)
+ goto rollback;
+ }
+ }
+ return 0;
+
+rollback:
+ for (i = 0; i < BR_HASH_SIZE; i++) {
+ hlist_for_each_entry(tmp, &br->hash[i], hlist) {
+ /* If we reached the fdb that failed, we can stop */
+ if (tmp == fdb)
+ break;
+
+ /* We only care for static entries */
+ if (!tmp->is_static)
+ continue;
+
+ dev_uc_del(p->dev, tmp->addr.addr);
+ }
+ }
+ return err;
+}
+
+void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p)
+{
+ struct net_bridge_fdb_entry *fdb;
+ int i;
+
+ ASSERT_RTNL();
+
+ for (i = 0; i < BR_HASH_SIZE; i++) {
+ hlist_for_each_entry_rcu(fdb, &br->hash[i], hlist) {
+ /* We only care for static entries */
+ if (!fdb->is_static)
+ continue;
+
+ dev_uc_del(p->dev, fdb->addr.addr);
+ }
+ }
+}
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index d3409e6b545..056b67b0e27 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -35,16 +35,11 @@ static inline int should_deliver(const struct net_bridge_port *p,
p->state == BR_STATE_FORWARDING;
}
-static inline unsigned int packet_length(const struct sk_buff *skb)
-{
- return skb->len - (skb->protocol == htons(ETH_P_8021Q) ? VLAN_HLEN : 0);
-}
-
int br_dev_queue_push_xmit(struct sk_buff *skb)
{
/* ip_fragment doesn't copy the MAC header */
if (nf_bridge_maybe_copy_header(skb) ||
- (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))) {
+ !is_skb_forwardable(skb->dev, skb)) {
kfree_skb(skb);
} else {
skb_push(skb, ETH_HLEN);
@@ -71,7 +66,7 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
skb->dev = to->dev;
if (unlikely(netpoll_tx_running(to->br->dev))) {
- if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
+ if (!is_skb_forwardable(skb->dev, skb))
kfree_skb(skb);
else {
skb_push(skb, ETH_HLEN);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 54d207d3a31..3eca3fdf8fe 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -85,6 +85,111 @@ void br_port_carrier_check(struct net_bridge_port *p)
spin_unlock_bh(&br->lock);
}
+static void br_port_set_promisc(struct net_bridge_port *p)
+{
+ int err = 0;
+
+ if (br_promisc_port(p))
+ return;
+
+ err = dev_set_promiscuity(p->dev, 1);
+ if (err)
+ return;
+
+ br_fdb_unsync_static(p->br, p);
+ p->flags |= BR_PROMISC;
+}
+
+static void br_port_clear_promisc(struct net_bridge_port *p)
+{
+ int err;
+
+ /* Check if the port is already non-promisc or if it doesn't
+ * support UNICAST filtering. Without unicast filtering support
+ * we'll end up re-enabling promisc mode anyway, so just check for
+ * it here.
+ */
+ if (!br_promisc_port(p) || !(p->dev->priv_flags & IFF_UNICAST_FLT))
+ return;
+
+ /* Since we'll be clearing the promisc mode, program the port
+ * first so that we don't have interruption in traffic.
+ */
+ err = br_fdb_sync_static(p->br, p);
+ if (err)
+ return;
+
+ dev_set_promiscuity(p->dev, -1);
+ p->flags &= ~BR_PROMISC;
+}
+
+/* When a port is added or removed or when certain port flags
+ * change, this function is called to automatically manage
+ * promiscuity setting of all the bridge ports. We are always called
+ * under RTNL so can skip using rcu primitives.
+ */
+void br_manage_promisc(struct net_bridge *br)
+{
+ struct net_bridge_port *p;
+ bool set_all = false;
+
+ /* If vlan filtering is disabled or bridge interface is placed
+ * into promiscuous mode, place all ports in promiscuous mode.
+ */
+ if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br))
+ set_all = true;
+
+ list_for_each_entry(p, &br->port_list, list) {
+ if (set_all) {
+ br_port_set_promisc(p);
+ } else {
+ /* If the number of auto-ports is <= 1, then all other
+ * ports will have their output configuration
+ * statically specified through fdbs. Since ingress
+ * on the auto-port becomes forwarding/egress to other
+ * ports and egress configuration is statically known,
+ * we can say that ingress configuration of the
+ * auto-port is also statically known.
+ * This lets us disable promiscuous mode and write
+ * this config to hw.
+ */
+ if (br->auto_cnt == 0 ||
+ (br->auto_cnt == 1 && br_auto_port(p)))
+ br_port_clear_promisc(p);
+ else
+ br_port_set_promisc(p);
+ }
+ }
+}
+
+static void nbp_update_port_count(struct net_bridge *br)
+{
+ struct net_bridge_port *p;
+ u32 cnt = 0;
+
+ list_for_each_entry(p, &br->port_list, list) {
+ if (br_auto_port(p))
+ cnt++;
+ }
+ if (br->auto_cnt != cnt) {
+ br->auto_cnt = cnt;
+ br_manage_promisc(br);
+ }
+}
+
+static void nbp_delete_promisc(struct net_bridge_port *p)
+{
+ /* If port is currently promiscuous, unset promiscuity.
+ * Otherwise, it is a static port so remove all addresses
+ * from it.
+ */
+ dev_set_allmulti(p->dev, -1);
+ if (br_promisc_port(p))
+ dev_set_promiscuity(p->dev, -1);
+ else
+ br_fdb_unsync_static(p->br, p);
+}
+
static void release_nbp(struct kobject *kobj)
{
struct net_bridge_port *p
@@ -133,7 +238,7 @@ static void del_nbp(struct net_bridge_port *p)
sysfs_remove_link(br->ifobj, p->dev->name);
- dev_set_promiscuity(dev, -1);
+ nbp_delete_promisc(p);
spin_lock_bh(&br->lock);
br_stp_disable_port(p);
@@ -141,10 +246,11 @@ static void del_nbp(struct net_bridge_port *p)
br_ifinfo_notify(RTM_DELLINK, p);
+ list_del_rcu(&p->list);
+
nbp_vlan_flush(p);
br_fdb_delete_by_port(br, p, 1);
-
- list_del_rcu(&p->list);
+ nbp_update_port_count(br);
dev->priv_flags &= ~IFF_BRIDGE_PORT;
@@ -353,7 +459,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
call_netdevice_notifiers(NETDEV_JOIN, dev);
- err = dev_set_promiscuity(dev, 1);
+ err = dev_set_allmulti(dev, 1);
if (err)
goto put_back;
@@ -366,7 +472,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
if (err)
goto err2;
- err = br_netpoll_enable(p, GFP_KERNEL);
+ err = br_netpoll_enable(p);
if (err)
goto err3;
@@ -384,6 +490,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
list_add_rcu(&p->list, &br->port_list);
+ nbp_update_port_count(br);
+
netdev_update_features(br->dev);
if (br->dev->needed_headroom < dev->needed_headroom)
@@ -421,7 +529,7 @@ err2:
kobject_put(&p->kobj);
p = NULL; /* kobject_put frees */
err1:
- dev_set_promiscuity(dev, -1);
+ dev_set_allmulti(dev, -1);
put_back:
dev_put(dev);
kfree(p);
@@ -455,3 +563,11 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
return 0;
}
+
+void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
+{
+ struct net_bridge *br = p->br;
+
+ if (mask & BR_AUTO_MASK)
+ nbp_update_port_count(br);
+}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index d0cca3c65f0..366c4364907 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -73,7 +73,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
goto drop;
if (!br_allowed_ingress(p->br, nbp_get_vlan_info(p), skb, &vid))
- goto drop;
+ goto out;
/* insert into forwarding database after filtering to avoid spoofing */
br = p->br;
@@ -147,8 +147,8 @@ static int br_handle_local_finish(struct sk_buff *skb)
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
u16 vid = 0;
- br_vlan_get_tag(skb, &vid);
- if (p->flags & BR_LEARNING)
+ /* check if vlan is allowed, to avoid spoofing */
+ if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid))
br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
return 0; /* process further */
}
@@ -177,6 +177,8 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
p = br_port_get_rcu(skb->dev);
if (unlikely(is_link_local_ether_addr(dest))) {
+ u16 fwd_mask = p->br->group_fwd_mask_required;
+
/*
* See IEEE 802.1D Table 7-10 Reserved addresses
*
@@ -194,7 +196,8 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
case 0x00: /* Bridge Group Address */
/* If STP is turned off,
then must forward to keep loop detection */
- if (p->br->stp_enabled == BR_NO_STP)
+ if (p->br->stp_enabled == BR_NO_STP ||
+ fwd_mask & (1u << dest[5]))
goto forward;
break;
@@ -203,7 +206,8 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
default:
/* Allow selective forwarding for most other protocols */
- if (p->br->group_fwd_mask & (1u << dest[5]))
+ fwd_mask |= p->br->group_fwd_mask;
+ if (fwd_mask & (1u << dest[5]))
goto forward;
}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index b7b1914dfa2..5df05269d17 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -418,13 +418,13 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
ip.proto = entry->addr.proto;
if (ip.proto == htons(ETH_P_IP)) {
- if (timer_pending(&br->ip4_querier.timer))
+ if (timer_pending(&br->ip4_other_query.timer))
return -EBUSY;
ip.u.ip4 = entry->addr.u.ip4;
#if IS_ENABLED(CONFIG_IPV6)
} else {
- if (timer_pending(&br->ip6_querier.timer))
+ if (timer_pending(&br->ip6_other_query.timer))
return -EBUSY;
ip.u.ip6 = entry->addr.u.ip6;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 93067ecdb9a..abfa0b65a11 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -11,6 +11,7 @@
*/
#include <linux/err.h>
+#include <linux/export.h>
#include <linux/if_ether.h>
#include <linux/igmp.h>
#include <linux/jhash.h>
@@ -35,7 +36,7 @@
#include "br_private.h"
static void br_multicast_start_querier(struct net_bridge *br,
- struct bridge_mcast_query *query);
+ struct bridge_mcast_own_query *query);
unsigned int br_mdb_rehash_seq;
static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
@@ -363,7 +364,7 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
skb_reset_mac_header(skb);
eth = eth_hdr(skb);
- memcpy(eth->h_source, br->dev->dev_addr, ETH_ALEN);
+ ether_addr_copy(eth->h_source, br->dev->dev_addr);
eth->h_dest[0] = 1;
eth->h_dest[1] = 0;
eth->h_dest[2] = 0x5e;
@@ -433,7 +434,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
skb_reset_mac_header(skb);
eth = eth_hdr(skb);
- memcpy(eth->h_source, br->dev->dev_addr, ETH_ALEN);
+ ether_addr_copy(eth->h_source, br->dev->dev_addr);
eth->h_proto = htons(ETH_P_IPV6);
skb_put(skb, sizeof(*eth));
@@ -761,7 +762,7 @@ static void br_multicast_local_router_expired(unsigned long data)
}
static void br_multicast_querier_expired(struct net_bridge *br,
- struct bridge_mcast_query *query)
+ struct bridge_mcast_own_query *query)
{
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) || br->multicast_disabled)
@@ -777,7 +778,7 @@ static void br_ip4_multicast_querier_expired(unsigned long data)
{
struct net_bridge *br = (void *)data;
- br_multicast_querier_expired(br, &br->ip4_query);
+ br_multicast_querier_expired(br, &br->ip4_own_query);
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -785,10 +786,22 @@ static void br_ip6_multicast_querier_expired(unsigned long data)
{
struct net_bridge *br = (void *)data;
- br_multicast_querier_expired(br, &br->ip6_query);
+ br_multicast_querier_expired(br, &br->ip6_own_query);
}
#endif
+static void br_multicast_select_own_querier(struct net_bridge *br,
+ struct br_ip *ip,
+ struct sk_buff *skb)
+{
+ if (ip->proto == htons(ETH_P_IP))
+ br->ip4_querier.addr.u.ip4 = ip_hdr(skb)->saddr;
+#if IS_ENABLED(CONFIG_IPV6)
+ else
+ br->ip6_querier.addr.u.ip6 = ipv6_hdr(skb)->saddr;
+#endif
+}
+
static void __br_multicast_send_query(struct net_bridge *br,
struct net_bridge_port *port,
struct br_ip *ip)
@@ -804,17 +817,19 @@ static void __br_multicast_send_query(struct net_bridge *br,
skb->dev = port->dev;
NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
dev_queue_xmit);
- } else
+ } else {
+ br_multicast_select_own_querier(br, ip, skb);
netif_rx(skb);
+ }
}
static void br_multicast_send_query(struct net_bridge *br,
struct net_bridge_port *port,
- struct bridge_mcast_query *query)
+ struct bridge_mcast_own_query *own_query)
{
unsigned long time;
struct br_ip br_group;
- struct bridge_mcast_querier *querier = NULL;
+ struct bridge_mcast_other_query *other_query = NULL;
if (!netif_running(br->dev) || br->multicast_disabled ||
!br->multicast_querier)
@@ -822,31 +837,32 @@ static void br_multicast_send_query(struct net_bridge *br,
memset(&br_group.u, 0, sizeof(br_group.u));
- if (port ? (query == &port->ip4_query) :
- (query == &br->ip4_query)) {
- querier = &br->ip4_querier;
+ if (port ? (own_query == &port->ip4_own_query) :
+ (own_query == &br->ip4_own_query)) {
+ other_query = &br->ip4_other_query;
br_group.proto = htons(ETH_P_IP);
#if IS_ENABLED(CONFIG_IPV6)
} else {
- querier = &br->ip6_querier;
+ other_query = &br->ip6_other_query;
br_group.proto = htons(ETH_P_IPV6);
#endif
}
- if (!querier || timer_pending(&querier->timer))
+ if (!other_query || timer_pending(&other_query->timer))
return;
__br_multicast_send_query(br, port, &br_group);
time = jiffies;
- time += query->startup_sent < br->multicast_startup_query_count ?
+ time += own_query->startup_sent < br->multicast_startup_query_count ?
br->multicast_startup_query_interval :
br->multicast_query_interval;
- mod_timer(&query->timer, time);
+ mod_timer(&own_query->timer, time);
}
-static void br_multicast_port_query_expired(struct net_bridge_port *port,
- struct bridge_mcast_query *query)
+static void
+br_multicast_port_query_expired(struct net_bridge_port *port,
+ struct bridge_mcast_own_query *query)
{
struct net_bridge *br = port->br;
@@ -868,7 +884,7 @@ static void br_ip4_multicast_port_query_expired(unsigned long data)
{
struct net_bridge_port *port = (void *)data;
- br_multicast_port_query_expired(port, &port->ip4_query);
+ br_multicast_port_query_expired(port, &port->ip4_own_query);
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -876,7 +892,7 @@ static void br_ip6_multicast_port_query_expired(unsigned long data)
{
struct net_bridge_port *port = (void *)data;
- br_multicast_port_query_expired(port, &port->ip6_query);
+ br_multicast_port_query_expired(port, &port->ip6_own_query);
}
#endif
@@ -886,11 +902,11 @@ void br_multicast_add_port(struct net_bridge_port *port)
setup_timer(&port->multicast_router_timer, br_multicast_router_expired,
(unsigned long)port);
- setup_timer(&port->ip4_query.timer, br_ip4_multicast_port_query_expired,
- (unsigned long)port);
+ setup_timer(&port->ip4_own_query.timer,
+ br_ip4_multicast_port_query_expired, (unsigned long)port);
#if IS_ENABLED(CONFIG_IPV6)
- setup_timer(&port->ip6_query.timer, br_ip6_multicast_port_query_expired,
- (unsigned long)port);
+ setup_timer(&port->ip6_own_query.timer,
+ br_ip6_multicast_port_query_expired, (unsigned long)port);
#endif
}
@@ -899,7 +915,7 @@ void br_multicast_del_port(struct net_bridge_port *port)
del_timer_sync(&port->multicast_router_timer);
}
-static void br_multicast_enable(struct bridge_mcast_query *query)
+static void br_multicast_enable(struct bridge_mcast_own_query *query)
{
query->startup_sent = 0;
@@ -916,9 +932,9 @@ void br_multicast_enable_port(struct net_bridge_port *port)
if (br->multicast_disabled || !netif_running(br->dev))
goto out;
- br_multicast_enable(&port->ip4_query);
+ br_multicast_enable(&port->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
- br_multicast_enable(&port->ip6_query);
+ br_multicast_enable(&port->ip6_own_query);
#endif
out:
@@ -938,9 +954,9 @@ void br_multicast_disable_port(struct net_bridge_port *port)
if (!hlist_unhashed(&port->rlist))
hlist_del_init_rcu(&port->rlist);
del_timer(&port->multicast_router_timer);
- del_timer(&port->ip4_query.timer);
+ del_timer(&port->ip4_own_query.timer);
#if IS_ENABLED(CONFIG_IPV6)
- del_timer(&port->ip6_query.timer);
+ del_timer(&port->ip6_own_query.timer);
#endif
spin_unlock(&br->multicast_lock);
}
@@ -1064,15 +1080,80 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
}
#endif
+static bool br_ip4_multicast_select_querier(struct net_bridge *br,
+ struct net_bridge_port *port,
+ __be32 saddr)
+{
+ if (!timer_pending(&br->ip4_own_query.timer) &&
+ !timer_pending(&br->ip4_other_query.timer))
+ goto update;
+
+ if (!br->ip4_querier.addr.u.ip4)
+ goto update;
+
+ if (ntohl(saddr) <= ntohl(br->ip4_querier.addr.u.ip4))
+ goto update;
+
+ return false;
+
+update:
+ br->ip4_querier.addr.u.ip4 = saddr;
+
+ /* update protected by general multicast_lock by caller */
+ rcu_assign_pointer(br->ip4_querier.port, port);
+
+ return true;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static bool br_ip6_multicast_select_querier(struct net_bridge *br,
+ struct net_bridge_port *port,
+ struct in6_addr *saddr)
+{
+ if (!timer_pending(&br->ip6_own_query.timer) &&
+ !timer_pending(&br->ip6_other_query.timer))
+ goto update;
+
+ if (ipv6_addr_cmp(saddr, &br->ip6_querier.addr.u.ip6) <= 0)
+ goto update;
+
+ return false;
+
+update:
+ br->ip6_querier.addr.u.ip6 = *saddr;
+
+ /* update protected by general multicast_lock by caller */
+ rcu_assign_pointer(br->ip6_querier.port, port);
+
+ return true;
+}
+#endif
+
+static bool br_multicast_select_querier(struct net_bridge *br,
+ struct net_bridge_port *port,
+ struct br_ip *saddr)
+{
+ switch (saddr->proto) {
+ case htons(ETH_P_IP):
+ return br_ip4_multicast_select_querier(br, port, saddr->u.ip4);
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ return br_ip6_multicast_select_querier(br, port, &saddr->u.ip6);
+#endif
+ }
+
+ return false;
+}
+
static void
-br_multicast_update_querier_timer(struct net_bridge *br,
- struct bridge_mcast_querier *querier,
- unsigned long max_delay)
+br_multicast_update_query_timer(struct net_bridge *br,
+ struct bridge_mcast_other_query *query,
+ unsigned long max_delay)
{
- if (!timer_pending(&querier->timer))
- querier->delay_time = jiffies + max_delay;
+ if (!timer_pending(&query->timer))
+ query->delay_time = jiffies + max_delay;
- mod_timer(&querier->timer, jiffies + br->multicast_querier_interval);
+ mod_timer(&query->timer, jiffies + br->multicast_querier_interval);
}
/*
@@ -1125,16 +1206,14 @@ timer:
static void br_multicast_query_received(struct net_bridge *br,
struct net_bridge_port *port,
- struct bridge_mcast_querier *querier,
- int saddr,
- bool is_general_query,
+ struct bridge_mcast_other_query *query,
+ struct br_ip *saddr,
unsigned long max_delay)
{
- if (saddr && is_general_query)
- br_multicast_update_querier_timer(br, querier, max_delay);
- else if (timer_pending(&querier->timer))
+ if (!br_multicast_select_querier(br, port, saddr))
return;
+ br_multicast_update_query_timer(br, query, max_delay);
br_multicast_mark_router(br, port);
}
@@ -1149,6 +1228,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
struct igmpv3_query *ih3;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
+ struct br_ip saddr;
unsigned long max_delay;
unsigned long now = jiffies;
__be32 group;
@@ -1190,11 +1270,14 @@ static int br_ip4_multicast_query(struct net_bridge *br,
goto out;
}
- br_multicast_query_received(br, port, &br->ip4_querier, !!iph->saddr,
- !group, max_delay);
+ if (!group) {
+ saddr.proto = htons(ETH_P_IP);
+ saddr.u.ip4 = iph->saddr;
- if (!group)
+ br_multicast_query_received(br, port, &br->ip4_other_query,
+ &saddr, max_delay);
goto out;
+ }
mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group, vid);
if (!mp)
@@ -1234,6 +1317,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
struct mld2_query *mld2q;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
+ struct br_ip saddr;
unsigned long max_delay;
unsigned long now = jiffies;
const struct in6_addr *group = NULL;
@@ -1282,12 +1366,16 @@ static int br_ip6_multicast_query(struct net_bridge *br,
goto out;
}
- br_multicast_query_received(br, port, &br->ip6_querier,
- !ipv6_addr_any(&ip6h->saddr),
- is_general_query, max_delay);
+ if (is_general_query) {
+ saddr.proto = htons(ETH_P_IPV6);
+ saddr.u.ip6 = ip6h->saddr;
- if (!group)
+ br_multicast_query_received(br, port, &br->ip6_other_query,
+ &saddr, max_delay);
+ goto out;
+ } else if (!group) {
goto out;
+ }
mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group, vid);
if (!mp)
@@ -1315,11 +1403,12 @@ out:
}
#endif
-static void br_multicast_leave_group(struct net_bridge *br,
- struct net_bridge_port *port,
- struct br_ip *group,
- struct bridge_mcast_querier *querier,
- struct bridge_mcast_query *query)
+static void
+br_multicast_leave_group(struct net_bridge *br,
+ struct net_bridge_port *port,
+ struct br_ip *group,
+ struct bridge_mcast_other_query *other_query,
+ struct bridge_mcast_own_query *own_query)
{
struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
@@ -1330,7 +1419,7 @@ static void br_multicast_leave_group(struct net_bridge *br,
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) ||
(port && port->state == BR_STATE_DISABLED) ||
- timer_pending(&querier->timer))
+ timer_pending(&other_query->timer))
goto out;
mdb = mlock_dereference(br->mdb, br);
@@ -1344,7 +1433,7 @@ static void br_multicast_leave_group(struct net_bridge *br,
time = jiffies + br->multicast_last_member_count *
br->multicast_last_member_interval;
- mod_timer(&query->timer, time);
+ mod_timer(&own_query->timer, time);
for (p = mlock_dereference(mp->ports, br);
p != NULL;
@@ -1425,17 +1514,19 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
__u16 vid)
{
struct br_ip br_group;
- struct bridge_mcast_query *query = port ? &port->ip4_query :
- &br->ip4_query;
+ struct bridge_mcast_own_query *own_query;
if (ipv4_is_local_multicast(group))
return;
+ own_query = port ? &port->ip4_own_query : &br->ip4_own_query;
+
br_group.u.ip4 = group;
br_group.proto = htons(ETH_P_IP);
br_group.vid = vid;
- br_multicast_leave_group(br, port, &br_group, &br->ip4_querier, query);
+ br_multicast_leave_group(br, port, &br_group, &br->ip4_other_query,
+ own_query);
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -1445,18 +1536,19 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
__u16 vid)
{
struct br_ip br_group;
- struct bridge_mcast_query *query = port ? &port->ip6_query :
- &br->ip6_query;
-
+ struct bridge_mcast_own_query *own_query;
if (ipv6_addr_is_ll_all_nodes(group))
return;
+ own_query = port ? &port->ip6_own_query : &br->ip6_own_query;
+
br_group.u.ip6 = *group;
br_group.proto = htons(ETH_P_IPV6);
br_group.vid = vid;
- br_multicast_leave_group(br, port, &br_group, &br->ip6_querier, query);
+ br_multicast_leave_group(br, port, &br_group, &br->ip6_other_query,
+ own_query);
}
#endif
@@ -1723,12 +1815,14 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
}
static void br_multicast_query_expired(struct net_bridge *br,
- struct bridge_mcast_query *query)
+ struct bridge_mcast_own_query *query,
+ struct bridge_mcast_querier *querier)
{
spin_lock(&br->multicast_lock);
if (query->startup_sent < br->multicast_startup_query_count)
query->startup_sent++;
+ rcu_assign_pointer(querier, NULL);
br_multicast_send_query(br, NULL, query);
spin_unlock(&br->multicast_lock);
}
@@ -1737,7 +1831,7 @@ static void br_ip4_multicast_query_expired(unsigned long data)
{
struct net_bridge *br = (void *)data;
- br_multicast_query_expired(br, &br->ip4_query);
+ br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier);
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -1745,7 +1839,7 @@ static void br_ip6_multicast_query_expired(unsigned long data)
{
struct net_bridge *br = (void *)data;
- br_multicast_query_expired(br, &br->ip6_query);
+ br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier);
}
#endif
@@ -1767,28 +1861,30 @@ void br_multicast_init(struct net_bridge *br)
br->multicast_querier_interval = 255 * HZ;
br->multicast_membership_interval = 260 * HZ;
- br->ip4_querier.delay_time = 0;
+ br->ip4_other_query.delay_time = 0;
+ br->ip4_querier.port = NULL;
#if IS_ENABLED(CONFIG_IPV6)
- br->ip6_querier.delay_time = 0;
+ br->ip6_other_query.delay_time = 0;
+ br->ip6_querier.port = NULL;
#endif
spin_lock_init(&br->multicast_lock);
setup_timer(&br->multicast_router_timer,
br_multicast_local_router_expired, 0);
- setup_timer(&br->ip4_querier.timer, br_ip4_multicast_querier_expired,
- (unsigned long)br);
- setup_timer(&br->ip4_query.timer, br_ip4_multicast_query_expired,
+ setup_timer(&br->ip4_other_query.timer,
+ br_ip4_multicast_querier_expired, (unsigned long)br);
+ setup_timer(&br->ip4_own_query.timer, br_ip4_multicast_query_expired,
(unsigned long)br);
#if IS_ENABLED(CONFIG_IPV6)
- setup_timer(&br->ip6_querier.timer, br_ip6_multicast_querier_expired,
- (unsigned long)br);
- setup_timer(&br->ip6_query.timer, br_ip6_multicast_query_expired,
+ setup_timer(&br->ip6_other_query.timer,
+ br_ip6_multicast_querier_expired, (unsigned long)br);
+ setup_timer(&br->ip6_own_query.timer, br_ip6_multicast_query_expired,
(unsigned long)br);
#endif
}
static void __br_multicast_open(struct net_bridge *br,
- struct bridge_mcast_query *query)
+ struct bridge_mcast_own_query *query)
{
query->startup_sent = 0;
@@ -1800,9 +1896,9 @@ static void __br_multicast_open(struct net_bridge *br,
void br_multicast_open(struct net_bridge *br)
{
- __br_multicast_open(br, &br->ip4_query);
+ __br_multicast_open(br, &br->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
- __br_multicast_open(br, &br->ip6_query);
+ __br_multicast_open(br, &br->ip6_own_query);
#endif
}
@@ -1815,11 +1911,11 @@ void br_multicast_stop(struct net_bridge *br)
int i;
del_timer_sync(&br->multicast_router_timer);
- del_timer_sync(&br->ip4_querier.timer);
- del_timer_sync(&br->ip4_query.timer);
+ del_timer_sync(&br->ip4_other_query.timer);
+ del_timer_sync(&br->ip4_own_query.timer);
#if IS_ENABLED(CONFIG_IPV6)
- del_timer_sync(&br->ip6_querier.timer);
- del_timer_sync(&br->ip6_query.timer);
+ del_timer_sync(&br->ip6_other_query.timer);
+ del_timer_sync(&br->ip6_own_query.timer);
#endif
spin_lock_bh(&br->multicast_lock);
@@ -1923,7 +2019,7 @@ unlock:
}
static void br_multicast_start_querier(struct net_bridge *br,
- struct bridge_mcast_query *query)
+ struct bridge_mcast_own_query *query)
{
struct net_bridge_port *port;
@@ -1934,11 +2030,11 @@ static void br_multicast_start_querier(struct net_bridge *br,
port->state == BR_STATE_BLOCKING)
continue;
- if (query == &br->ip4_query)
- br_multicast_enable(&port->ip4_query);
+ if (query == &br->ip4_own_query)
+ br_multicast_enable(&port->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
else
- br_multicast_enable(&port->ip6_query);
+ br_multicast_enable(&port->ip6_own_query);
#endif
}
}
@@ -1974,9 +2070,9 @@ rollback:
goto rollback;
}
- br_multicast_start_querier(br, &br->ip4_query);
+ br_multicast_start_querier(br, &br->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
- br_multicast_start_querier(br, &br->ip6_query);
+ br_multicast_start_querier(br, &br->ip6_own_query);
#endif
unlock:
@@ -2001,16 +2097,16 @@ int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
max_delay = br->multicast_query_response_interval;
- if (!timer_pending(&br->ip4_querier.timer))
- br->ip4_querier.delay_time = jiffies + max_delay;
+ if (!timer_pending(&br->ip4_other_query.timer))
+ br->ip4_other_query.delay_time = jiffies + max_delay;
- br_multicast_start_querier(br, &br->ip4_query);
+ br_multicast_start_querier(br, &br->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
- if (!timer_pending(&br->ip6_querier.timer))
- br->ip6_querier.delay_time = jiffies + max_delay;
+ if (!timer_pending(&br->ip6_other_query.timer))
+ br->ip6_other_query.delay_time = jiffies + max_delay;
- br_multicast_start_querier(br, &br->ip6_query);
+ br_multicast_start_querier(br, &br->ip6_own_query);
#endif
unlock:
@@ -2061,3 +2157,109 @@ unlock:
return err;
}
+
+/**
+ * br_multicast_list_adjacent - Returns snooped multicast addresses
+ * @dev: The bridge port adjacent to which to retrieve addresses
+ * @br_ip_list: The list to store found, snooped multicast IP addresses in
+ *
+ * Creates a list of IP addresses (struct br_ip_list) sensed by the multicast
+ * snooping feature on all bridge ports of dev's bridge device, excluding
+ * the addresses from dev itself.
+ *
+ * Returns the number of items added to br_ip_list.
+ *
+ * Notes:
+ * - br_ip_list needs to be initialized by caller
+ * - br_ip_list might contain duplicates in the end
+ * (needs to be taken care of by caller)
+ * - br_ip_list needs to be freed by caller
+ */
+int br_multicast_list_adjacent(struct net_device *dev,
+ struct list_head *br_ip_list)
+{
+ struct net_bridge *br;
+ struct net_bridge_port *port;
+ struct net_bridge_port_group *group;
+ struct br_ip_list *entry;
+ int count = 0;
+
+ rcu_read_lock();
+ if (!br_ip_list || !br_port_exists(dev))
+ goto unlock;
+
+ port = br_port_get_rcu(dev);
+ if (!port || !port->br)
+ goto unlock;
+
+ br = port->br;
+
+ list_for_each_entry_rcu(port, &br->port_list, list) {
+ if (!port->dev || port->dev == dev)
+ continue;
+
+ hlist_for_each_entry_rcu(group, &port->mglist, mglist) {
+ entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+ if (!entry)
+ goto unlock;
+
+ entry->addr = group->addr;
+ list_add(&entry->list, br_ip_list);
+ count++;
+ }
+ }
+
+unlock:
+ rcu_read_unlock();
+ return count;
+}
+EXPORT_SYMBOL_GPL(br_multicast_list_adjacent);
+
+/**
+ * br_multicast_has_querier_adjacent - Checks for a querier behind a bridge port
+ * @dev: The bridge port adjacent to which to check for a querier
+ * @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6
+ *
+ * Checks whether the given interface has a bridge on top and if so returns
+ * true if a selected querier is behind one of the other ports of this
+ * bridge. Otherwise returns false.
+ */
+bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto)
+{
+ struct net_bridge *br;
+ struct net_bridge_port *port;
+ bool ret = false;
+
+ rcu_read_lock();
+ if (!br_port_exists(dev))
+ goto unlock;
+
+ port = br_port_get_rcu(dev);
+ if (!port || !port->br)
+ goto unlock;
+
+ br = port->br;
+
+ switch (proto) {
+ case ETH_P_IP:
+ if (!timer_pending(&br->ip4_other_query.timer) ||
+ rcu_dereference(br->ip4_querier.port) == port)
+ goto unlock;
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case ETH_P_IPV6:
+ if (!timer_pending(&br->ip6_other_query.timer) ||
+ rcu_dereference(br->ip6_querier.port) == port)
+ goto unlock;
+ break;
+#endif
+ default:
+ goto unlock;
+ }
+
+ ret = true;
+unlock:
+ rcu_read_unlock();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index b008c59a92c..a615264cf01 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -167,7 +167,7 @@ void br_netfilter_rtable_init(struct net_bridge *br)
rt->dst.dev = br->dev;
rt->dst.path = &rt->dst;
dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
- rt->dst.flags = DST_NOXFRM | DST_NOPEER | DST_FAKE_RTABLE;
+ rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE;
rt->dst.ops = &fake_dst_ops;
}
@@ -506,7 +506,7 @@ bridged_dnat:
1);
return 0;
}
- memcpy(eth_hdr(skb)->h_dest, dev->dev_addr, ETH_ALEN);
+ ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
skb->pkt_type = PACKET_HOST;
}
} else {
@@ -535,7 +535,7 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct
if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb))
return br;
- vlan = __vlan_find_dev_deep(br, skb->vlan_proto,
+ vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto,
vlan_tx_tag_get(skb) & VLAN_VID_MASK);
return vlan ? vlan : br;
@@ -859,12 +859,12 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
return NF_STOLEN;
}
-#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV4)
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
static int br_nf_dev_queue_xmit(struct sk_buff *skb)
{
int ret;
- if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) &&
+ if (skb->protocol == htons(ETH_P_IP) &&
skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu &&
!skb_is_gso(skb)) {
if (br_parse_ip_options(skb))
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index e74b6d530cb..26edb518b83 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -328,6 +328,7 @@ static void br_set_port_flag(struct net_bridge_port *p, struct nlattr *tb[],
static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
{
int err;
+ unsigned long old_flags = p->flags;
br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE);
br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD);
@@ -353,6 +354,8 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
if (err)
return err;
}
+
+ br_port_flags_change(p, old_flags ^ p->flags);
return 0;
}
@@ -445,6 +448,20 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[])
return 0;
}
+static int br_dev_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct net_bridge *br = netdev_priv(dev);
+
+ if (tb[IFLA_ADDRESS]) {
+ spin_lock_bh(&br->lock);
+ br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
+ spin_unlock_bh(&br->lock);
+ }
+
+ return register_netdevice(dev);
+}
+
static size_t br_get_link_af_size(const struct net_device *dev)
{
struct net_port_vlans *pv;
@@ -473,6 +490,7 @@ struct rtnl_link_ops br_link_ops __read_mostly = {
.priv_size = sizeof(struct net_bridge),
.setup = br_dev_setup,
.validate = br_validate,
+ .newlink = br_dev_newlink,
.dellink = br_dev_delete,
};
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
deleted file mode 100644
index 2998dd1769a..00000000000
--- a/net/bridge/br_notify.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Device event handling
- * Linux ethernet bridge
- *
- * Authors:
- * Lennert Buytenhek <buytenh@gnu.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/rtnetlink.h>
-#include <net/net_namespace.h>
-
-#include "br_private.h"
-
-static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr);
-
-struct notifier_block br_device_notifier = {
- .notifier_call = br_device_event
-};
-
-/*
- * Handle changes in state of network devices enslaved to a bridge.
- *
- * Note: don't care about up/down if bridge itself is down, because
- * port state is checked when bridge is brought up.
- */
-static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct net_bridge_port *p;
- struct net_bridge *br;
- bool changed_addr;
- int err;
-
- /* register of bridge completed, add sysfs entries */
- if ((dev->priv_flags & IFF_EBRIDGE) && event == NETDEV_REGISTER) {
- br_sysfs_addbr(dev);
- return NOTIFY_DONE;
- }
-
- /* not a port of a bridge */
- p = br_port_get_rtnl(dev);
- if (!p)
- return NOTIFY_DONE;
-
- br = p->br;
-
- switch (event) {
- case NETDEV_CHANGEMTU:
- dev_set_mtu(br->dev, br_min_mtu(br));
- break;
-
- case NETDEV_CHANGEADDR:
- spin_lock_bh(&br->lock);
- br_fdb_changeaddr(p, dev->dev_addr);
- changed_addr = br_stp_recalculate_bridge_id(br);
- spin_unlock_bh(&br->lock);
-
- if (changed_addr)
- call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
-
- break;
-
- case NETDEV_CHANGE:
- br_port_carrier_check(p);
- break;
-
- case NETDEV_FEAT_CHANGE:
- netdev_update_features(br->dev);
- break;
-
- case NETDEV_DOWN:
- spin_lock_bh(&br->lock);
- if (br->dev->flags & IFF_UP)
- br_stp_disable_port(p);
- spin_unlock_bh(&br->lock);
- break;
-
- case NETDEV_UP:
- if (netif_running(br->dev) && netif_oper_up(dev)) {
- spin_lock_bh(&br->lock);
- br_stp_enable_port(p);
- spin_unlock_bh(&br->lock);
- }
- break;
-
- case NETDEV_UNREGISTER:
- br_del_if(br, dev);
- break;
-
- case NETDEV_CHANGENAME:
- err = br_sysfs_renameif(p);
- if (err)
- return notifier_from_errno(err);
- break;
-
- case NETDEV_PRE_TYPE_CHANGE:
- /* Forbid underlaying device to change its type. */
- return NOTIFY_BAD;
-
- case NETDEV_RESEND_IGMP:
- /* Propagate to master device */
- call_netdevice_notifiers(event, br->dev);
- break;
- }
-
- /* Events that may cause spanning tree to refresh */
- if (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
- event == NETDEV_CHANGE || event == NETDEV_DOWN)
- br_ifinfo_notify(RTM_NEWLINK, p);
-
- return NOTIFY_DONE;
-}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 3ba11bc99b6..23caf5b0309 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -35,6 +35,8 @@
#define BR_GROUPFWD_DEFAULT 0
/* Don't allow forwarding control protocols like STP and LLDP */
#define BR_GROUPFWD_RESTRICTED 0x4007u
+/* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */
+#define BR_GROUPFWD_8021AD 0xB801u
/* Path to usermode spanning tree program */
#define BR_STP_PROG "/sbin/bridge-stp"
@@ -46,38 +48,32 @@ typedef __u16 port_id;
struct bridge_id
{
unsigned char prio[2];
- unsigned char addr[6];
+ unsigned char addr[ETH_ALEN];
};
struct mac_addr
{
- unsigned char addr[6];
-};
-
-struct br_ip
-{
- union {
- __be32 ip4;
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr ip6;
-#endif
- } u;
- __be16 proto;
- __u16 vid;
+ unsigned char addr[ETH_ALEN];
};
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
/* our own querier */
-struct bridge_mcast_query {
+struct bridge_mcast_own_query {
struct timer_list timer;
u32 startup_sent;
};
/* other querier */
-struct bridge_mcast_querier {
+struct bridge_mcast_other_query {
struct timer_list timer;
unsigned long delay_time;
};
+
+/* selected querier */
+struct bridge_mcast_querier {
+ struct br_ip addr;
+ struct net_bridge_port __rcu *port;
+};
#endif
struct net_port_vlans {
@@ -174,11 +170,13 @@ struct net_bridge_port
#define BR_ADMIN_COST 0x00000010
#define BR_LEARNING 0x00000020
#define BR_FLOOD 0x00000040
+#define BR_AUTO_MASK (BR_FLOOD | BR_LEARNING)
+#define BR_PROMISC 0x00000080
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
- struct bridge_mcast_query ip4_query;
+ struct bridge_mcast_own_query ip4_own_query;
#if IS_ENABLED(CONFIG_IPV6)
- struct bridge_mcast_query ip6_query;
+ struct bridge_mcast_own_query ip6_own_query;
#endif /* IS_ENABLED(CONFIG_IPV6) */
unsigned char multicast_router;
struct timer_list multicast_router_timer;
@@ -198,6 +196,9 @@ struct net_bridge_port
#endif
};
+#define br_auto_port(p) ((p)->flags & BR_AUTO_MASK)
+#define br_promisc_port(p) ((p)->flags & BR_PROMISC)
+
#define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev)
@@ -227,6 +228,7 @@ struct net_bridge
bool nf_call_arptables;
#endif
u16 group_fwd_mask;
+ u16 group_fwd_mask_required;
/* STP */
bridge_id designated_root;
@@ -241,6 +243,7 @@ struct net_bridge
unsigned long bridge_forward_delay;
u8 group_addr[ETH_ALEN];
+ bool group_addr_set;
u16 root_port;
enum {
@@ -277,11 +280,13 @@ struct net_bridge
struct hlist_head router_list;
struct timer_list multicast_router_timer;
+ struct bridge_mcast_other_query ip4_other_query;
+ struct bridge_mcast_own_query ip4_own_query;
struct bridge_mcast_querier ip4_querier;
- struct bridge_mcast_query ip4_query;
#if IS_ENABLED(CONFIG_IPV6)
+ struct bridge_mcast_other_query ip6_other_query;
+ struct bridge_mcast_own_query ip6_own_query;
struct bridge_mcast_querier ip6_querier;
- struct bridge_mcast_query ip6_query;
#endif /* IS_ENABLED(CONFIG_IPV6) */
#endif
@@ -290,8 +295,10 @@ struct net_bridge
struct timer_list topology_change_timer;
struct timer_list gc_timer;
struct kobject *ifobj;
+ u32 auto_cnt;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
u8 vlan_enabled;
+ __be16 vlan_proto;
struct net_port_vlans __rcu *vlan_info;
#endif
};
@@ -327,8 +334,6 @@ struct br_input_skb_cb {
#define br_debug(br, format, args...) \
pr_debug("%s: " format, (br)->dev->name, ##args)
-extern struct notifier_block br_device_notifier;
-
/* called under bridge lock */
static inline int br_is_root_bridge(const struct net_bridge *br)
{
@@ -349,7 +354,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
netpoll_send_skb(np, skb);
}
-int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp);
+int br_netpoll_enable(struct net_bridge_port *p);
void br_netpoll_disable(struct net_bridge_port *p);
#else
static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
@@ -357,7 +362,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
{
}
-static inline int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
+static inline int br_netpoll_enable(struct net_bridge_port *p)
{
return 0;
}
@@ -395,6 +400,8 @@ int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
const unsigned char *addr, u16 nlh_flags);
int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev, int idx);
+int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
+void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
/* br_forward.c */
void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb);
@@ -415,6 +422,8 @@ int br_del_if(struct net_bridge *br, struct net_device *dev);
int br_min_mtu(const struct net_bridge *br);
netdev_features_t br_features_recompute(struct net_bridge *br,
netdev_features_t features);
+void br_port_flags_change(struct net_bridge_port *port, unsigned long mask);
+void br_manage_promisc(struct net_bridge *br);
/* br_input.c */
int br_handle_frame_finish(struct sk_buff *skb);
@@ -485,7 +494,7 @@ static inline bool br_multicast_is_router(struct net_bridge *br)
static inline bool
__br_multicast_querier_exists(struct net_bridge *br,
- struct bridge_mcast_querier *querier)
+ struct bridge_mcast_other_query *querier)
{
return time_is_before_jiffies(querier->delay_time) &&
(br->multicast_querier || timer_pending(&querier->timer));
@@ -496,10 +505,10 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br,
{
switch (eth->h_proto) {
case (htons(ETH_P_IP)):
- return __br_multicast_querier_exists(br, &br->ip4_querier);
+ return __br_multicast_querier_exists(br, &br->ip4_other_query);
#if IS_ENABLED(CONFIG_IPV6)
case (htons(ETH_P_IPV6)):
- return __br_multicast_querier_exists(br, &br->ip6_querier);
+ return __br_multicast_querier_exists(br, &br->ip6_other_query);
#endif
default:
return false;
@@ -581,6 +590,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
struct sk_buff *skb, u16 *vid);
bool br_allowed_egress(struct net_bridge *br, const struct net_port_vlans *v,
const struct sk_buff *skb);
+bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid);
struct sk_buff *br_handle_vlan(struct net_bridge *br,
const struct net_port_vlans *v,
struct sk_buff *skb);
@@ -588,7 +598,10 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags);
int br_vlan_delete(struct net_bridge *br, u16 vid);
void br_vlan_flush(struct net_bridge *br);
bool br_vlan_find(struct net_bridge *br, u16 vid);
+void br_recalculate_fwd_mask(struct net_bridge *br);
int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
+int br_vlan_set_proto(struct net_bridge *br, unsigned long val);
+void br_vlan_init(struct net_bridge *br);
int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags);
int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
void nbp_vlan_flush(struct net_bridge_port *port);
@@ -632,6 +645,10 @@ static inline u16 br_get_pvid(const struct net_port_vlans *v)
return v->pvid ?: VLAN_N_VID;
}
+static inline int br_vlan_enabled(struct net_bridge *br)
+{
+ return br->vlan_enabled;
+}
#else
static inline bool br_allowed_ingress(struct net_bridge *br,
struct net_port_vlans *v,
@@ -648,6 +665,12 @@ static inline bool br_allowed_egress(struct net_bridge *br,
return true;
}
+static inline bool br_should_learn(struct net_bridge_port *p,
+ struct sk_buff *skb, u16 *vid)
+{
+ return true;
+}
+
static inline struct sk_buff *br_handle_vlan(struct net_bridge *br,
const struct net_port_vlans *v,
struct sk_buff *skb)
@@ -674,6 +697,14 @@ static inline bool br_vlan_find(struct net_bridge *br, u16 vid)
return false;
}
+static inline void br_recalculate_fwd_mask(struct net_bridge *br)
+{
+}
+
+static inline void br_vlan_init(struct net_bridge *br)
+{
+}
+
static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
{
return -EOPNOTSUPP;
@@ -712,6 +743,11 @@ static inline u16 br_get_pvid(const struct net_port_vlans *v)
{
return VLAN_N_VID; /* Returns invalid vid */
}
+
+static inline int br_vlan_enabled(struct net_bridge *br)
+{
+ return 0;
+}
#endif
/* br_netfilter.c */
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 8dac65552f1..c9e2572b15f 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -312,10 +312,19 @@ static ssize_t group_addr_store(struct device *d,
new_addr[5] == 3) /* 802.1X PAE address */
return -EINVAL;
+ if (!rtnl_trylock())
+ return restart_syscall();
+
spin_lock_bh(&br->lock);
for (i = 0; i < 6; i++)
br->group_addr[i] = new_addr[i];
spin_unlock_bh(&br->lock);
+
+ br->group_addr_set = true;
+ br_recalculate_fwd_mask(br);
+
+ rtnl_unlock();
+
return len;
}
@@ -700,6 +709,22 @@ static ssize_t vlan_filtering_store(struct device *d,
return store_bridge_parm(d, buf, len, br_vlan_filter_toggle);
}
static DEVICE_ATTR_RW(vlan_filtering);
+
+static ssize_t vlan_protocol_show(struct device *d,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_bridge *br = to_bridge(d);
+ return sprintf(buf, "%#06x\n", ntohs(br->vlan_proto));
+}
+
+static ssize_t vlan_protocol_store(struct device *d,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ return store_bridge_parm(d, buf, len, br_vlan_set_proto);
+}
+static DEVICE_ATTR_RW(vlan_protocol);
#endif
static struct attribute *bridge_attrs[] = {
@@ -745,6 +770,7 @@ static struct attribute *bridge_attrs[] = {
#endif
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
&dev_attr_vlan_filtering.attr,
+ &dev_attr_vlan_protocol.attr,
#endif
NULL
};
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index dd595bd7fa8..e561cd59b8a 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -41,20 +41,30 @@ static ssize_t show_##_name(struct net_bridge_port *p, char *buf) \
} \
static int store_##_name(struct net_bridge_port *p, unsigned long v) \
{ \
- unsigned long flags = p->flags; \
- if (v) \
- flags |= _mask; \
- else \
- flags &= ~_mask; \
- if (flags != p->flags) { \
- p->flags = flags; \
- br_ifinfo_notify(RTM_NEWLINK, p); \
- } \
- return 0; \
+ return store_flag(p, v, _mask); \
} \
static BRPORT_ATTR(_name, S_IRUGO | S_IWUSR, \
show_##_name, store_##_name)
+static int store_flag(struct net_bridge_port *p, unsigned long v,
+ unsigned long mask)
+{
+ unsigned long flags;
+
+ flags = p->flags;
+
+ if (v)
+ flags |= mask;
+ else
+ flags &= ~mask;
+
+ if (flags != p->flags) {
+ p->flags = flags;
+ br_port_flags_change(p, mask);
+ br_ifinfo_notify(RTM_NEWLINK, p);
+ }
+ return 0;
+}
static ssize_t show_path_cost(struct net_bridge_port *p, char *buf)
{
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index f23c74b3a95..2b2774fe070 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -60,7 +60,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
* that ever changes this code will allow tagged
* traffic to enter the bridge.
*/
- err = vlan_vid_add(dev, htons(ETH_P_8021Q), vid);
+ err = vlan_vid_add(dev, br->vlan_proto, vid);
if (err)
return err;
}
@@ -80,7 +80,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
out_filt:
if (p)
- vlan_vid_del(dev, htons(ETH_P_8021Q), vid);
+ vlan_vid_del(dev, br->vlan_proto, vid);
return err;
}
@@ -92,16 +92,18 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid)
__vlan_delete_pvid(v, vid);
clear_bit(vid, v->untagged_bitmap);
- if (v->port_idx)
- vlan_vid_del(v->parent.port->dev, htons(ETH_P_8021Q), vid);
+ if (v->port_idx) {
+ struct net_bridge_port *p = v->parent.port;
+ vlan_vid_del(p->dev, p->br->vlan_proto, vid);
+ }
clear_bit(vid, v->vlan_bitmap);
v->num_vlans--;
if (bitmap_empty(v->vlan_bitmap, VLAN_N_VID)) {
if (v->port_idx)
- rcu_assign_pointer(v->parent.port->vlan_info, NULL);
+ RCU_INIT_POINTER(v->parent.port->vlan_info, NULL);
else
- rcu_assign_pointer(v->parent.br->vlan_info, NULL);
+ RCU_INIT_POINTER(v->parent.br->vlan_info, NULL);
kfree_rcu(v, rcu);
}
return 0;
@@ -113,9 +115,9 @@ static void __vlan_flush(struct net_port_vlans *v)
v->pvid = 0;
bitmap_zero(v->vlan_bitmap, VLAN_N_VID);
if (v->port_idx)
- rcu_assign_pointer(v->parent.port->vlan_info, NULL);
+ RCU_INIT_POINTER(v->parent.port->vlan_info, NULL);
else
- rcu_assign_pointer(v->parent.br->vlan_info, NULL);
+ RCU_INIT_POINTER(v->parent.br->vlan_info, NULL);
kfree_rcu(v, rcu);
}
@@ -158,7 +160,8 @@ out:
bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
struct sk_buff *skb, u16 *vid)
{
- int err;
+ bool tagged;
+ __be16 proto;
/* If VLAN filtering is disabled on the bridge, all packets are
* permitted.
@@ -170,21 +173,43 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
* rejected.
*/
if (!v)
- return false;
+ goto drop;
+
+ proto = br->vlan_proto;
/* If vlan tx offload is disabled on bridge device and frame was
* sent from vlan device on the bridge device, it does not have
* HW accelerated vlan tag.
*/
if (unlikely(!vlan_tx_tag_present(skb) &&
- (skb->protocol == htons(ETH_P_8021Q) ||
- skb->protocol == htons(ETH_P_8021AD)))) {
+ skb->protocol == proto)) {
skb = vlan_untag(skb);
if (unlikely(!skb))
return false;
}
- err = br_vlan_get_tag(skb, vid);
+ if (!br_vlan_get_tag(skb, vid)) {
+ /* Tagged frame */
+ if (skb->vlan_proto != proto) {
+ /* Protocol-mismatch, empty out vlan_tci for new tag */
+ skb_push(skb, ETH_HLEN);
+ skb = __vlan_put_tag(skb, skb->vlan_proto,
+ vlan_tx_tag_get(skb));
+ if (unlikely(!skb))
+ return false;
+
+ skb_pull(skb, ETH_HLEN);
+ skb_reset_mac_len(skb);
+ *vid = 0;
+ tagged = false;
+ } else {
+ tagged = true;
+ }
+ } else {
+ /* Untagged frame */
+ tagged = false;
+ }
+
if (!*vid) {
u16 pvid = br_get_pvid(v);
@@ -193,15 +218,15 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
* vlan untagged or priority-tagged traffic belongs to.
*/
if (pvid == VLAN_N_VID)
- return false;
+ goto drop;
/* PVID is set on this port. Any untagged or priority-tagged
* ingress frame is considered to belong to this vlan.
*/
*vid = pvid;
- if (likely(err))
+ if (likely(!tagged))
/* Untagged Frame. */
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid);
+ __vlan_hwaccel_put_tag(skb, proto, pvid);
else
/* Priority-tagged Frame.
* At this point, We know that skb->vlan_tci had
@@ -216,7 +241,8 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
/* Frame had a valid vlan tag. See if vlan is allowed */
if (test_bit(*vid, v->vlan_bitmap))
return true;
-
+drop:
+ kfree_skb(skb);
return false;
}
@@ -240,6 +266,36 @@ bool br_allowed_egress(struct net_bridge *br,
return false;
}
+/* Called under RCU */
+bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
+{
+ struct net_bridge *br = p->br;
+ struct net_port_vlans *v;
+
+ if (!br->vlan_enabled)
+ return true;
+
+ v = rcu_dereference(p->vlan_info);
+ if (!v)
+ return false;
+
+ if (!br_vlan_get_tag(skb, vid) && skb->vlan_proto != br->vlan_proto)
+ *vid = 0;
+
+ if (!*vid) {
+ *vid = br_get_pvid(v);
+ if (*vid == VLAN_N_VID)
+ return false;
+
+ return true;
+ }
+
+ if (test_bit(*vid, v->vlan_bitmap))
+ return true;
+
+ return false;
+}
+
/* Must be protected by RTNL.
* Must be called with vid in range from 1 to 4094 inclusive.
*/
@@ -322,6 +378,33 @@ out:
return found;
}
+/* Must be protected by RTNL. */
+static void recalculate_group_addr(struct net_bridge *br)
+{
+ if (br->group_addr_set)
+ return;
+
+ spin_lock_bh(&br->lock);
+ if (!br->vlan_enabled || br->vlan_proto == htons(ETH_P_8021Q)) {
+ /* Bridge Group Address */
+ br->group_addr[5] = 0x00;
+ } else { /* vlan_enabled && ETH_P_8021AD */
+ /* Provider Bridge Group Address */
+ br->group_addr[5] = 0x08;
+ }
+ spin_unlock_bh(&br->lock);
+}
+
+/* Must be protected by RTNL. */
+void br_recalculate_fwd_mask(struct net_bridge *br)
+{
+ if (!br->vlan_enabled || br->vlan_proto == htons(ETH_P_8021Q))
+ br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT;
+ else /* vlan_enabled && ETH_P_8021AD */
+ br->group_fwd_mask_required = BR_GROUPFWD_8021AD &
+ ~(1u << br->group_addr[5]);
+}
+
int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
{
if (!rtnl_trylock())
@@ -331,12 +414,88 @@ int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
goto unlock;
br->vlan_enabled = val;
+ br_manage_promisc(br);
+ recalculate_group_addr(br);
+ br_recalculate_fwd_mask(br);
unlock:
rtnl_unlock();
return 0;
}
+int br_vlan_set_proto(struct net_bridge *br, unsigned long val)
+{
+ int err = 0;
+ struct net_bridge_port *p;
+ struct net_port_vlans *pv;
+ __be16 proto, oldproto;
+ u16 vid, errvid;
+
+ if (val != ETH_P_8021Q && val != ETH_P_8021AD)
+ return -EPROTONOSUPPORT;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ proto = htons(val);
+ if (br->vlan_proto == proto)
+ goto unlock;
+
+ /* Add VLANs for the new proto to the device filter. */
+ list_for_each_entry(p, &br->port_list, list) {
+ pv = rtnl_dereference(p->vlan_info);
+ if (!pv)
+ continue;
+
+ for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
+ err = vlan_vid_add(p->dev, proto, vid);
+ if (err)
+ goto err_filt;
+ }
+ }
+
+ oldproto = br->vlan_proto;
+ br->vlan_proto = proto;
+
+ recalculate_group_addr(br);
+ br_recalculate_fwd_mask(br);
+
+ /* Delete VLANs for the old proto from the device filter. */
+ list_for_each_entry(p, &br->port_list, list) {
+ pv = rtnl_dereference(p->vlan_info);
+ if (!pv)
+ continue;
+
+ for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
+ vlan_vid_del(p->dev, oldproto, vid);
+ }
+
+unlock:
+ rtnl_unlock();
+ return err;
+
+err_filt:
+ errvid = vid;
+ for_each_set_bit(vid, pv->vlan_bitmap, errvid)
+ vlan_vid_del(p->dev, proto, vid);
+
+ list_for_each_entry_continue_reverse(p, &br->port_list, list) {
+ pv = rtnl_dereference(p->vlan_info);
+ if (!pv)
+ continue;
+
+ for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
+ vlan_vid_del(p->dev, proto, vid);
+ }
+
+ goto unlock;
+}
+
+void br_vlan_init(struct net_bridge *br)
+{
+ br->vlan_proto = htons(ETH_P_8021Q);
+}
+
/* Must be protected by RTNL.
* Must be called with vid in range from 1 to 4094 inclusive.
*/
@@ -403,7 +562,7 @@ void nbp_vlan_flush(struct net_bridge_port *port)
return;
for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
- vlan_vid_del(port->dev, htons(ETH_P_8021Q), vid);
+ vlan_vid_del(port->dev, port->br->vlan_proto, vid);
__vlan_flush(pv);
}
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 5ca74a0e595..629dc77874a 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -2,14 +2,23 @@
# Bridge netfilter configuration
#
#
-config NF_TABLES_BRIDGE
- depends on NF_TABLES
+menuconfig NF_TABLES_BRIDGE
+ depends on BRIDGE && NETFILTER && NF_TABLES
tristate "Ethernet Bridge nf_tables support"
+if NF_TABLES_BRIDGE
+
+config NFT_BRIDGE_META
+ tristate "Netfilter nf_table bridge meta support"
+ depends on NFT_META
+ help
+ Add support for bridge dedicated meta key.
+
+endif # NF_TABLES_BRIDGE
+
menuconfig BRIDGE_NF_EBTABLES
tristate "Ethernet Bridge tables (ebtables) support"
- depends on BRIDGE && NETFILTER
- select NETFILTER_XTABLES
+ depends on BRIDGE && NETFILTER && NETFILTER_XTABLES
help
ebtables is a general, extensible frame/packet identification
framework. Say 'Y' or 'M' here if you want to do Ethernet
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index ea7629f58b3..6f2f3943d66 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -3,6 +3,7 @@
#
obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o
+obj-$(CONFIG_NFT_BRIDGE_META) += nft_meta_bridge.o
obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 3fb3c848aff..9024283d2bc 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -28,7 +28,7 @@ static bool ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh,
uint32_t cmp[2] = { 0, 0 };
int key = ((const unsigned char *)mac)[5];
- memcpy(((char *) cmp) + 2, mac, ETH_ALEN);
+ ether_addr_copy(((char *) cmp) + 2, mac);
start = wh->table[key];
limit = wh->table[key + 1];
if (ip) {
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index c59f7bfae6e..4e0b0c35932 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -22,7 +22,7 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par)
if (!skb_make_writable(skb, 0))
return EBT_DROP;
- memcpy(eth_hdr(skb)->h_dest, info->mac, ETH_ALEN);
+ ether_addr_copy(eth_hdr(skb)->h_dest, info->mac);
return info->target;
}
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 46624bb6d9b..203964997a5 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -25,10 +25,10 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
if (par->hooknum != NF_BR_BROUTING)
/* rcu_read_lock()ed by nf_hook_slow */
- memcpy(eth_hdr(skb)->h_dest,
- br_port_get_rcu(par->in)->br->dev->dev_addr, ETH_ALEN);
+ ether_addr_copy(eth_hdr(skb)->h_dest,
+ br_port_get_rcu(par->in)->br->dev->dev_addr);
else
- memcpy(eth_hdr(skb)->h_dest, par->in->dev_addr, ETH_ALEN);
+ ether_addr_copy(eth_hdr(skb)->h_dest, par->in->dev_addr);
skb->pkt_type = PACKET_HOST;
return info->target;
}
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index 0f6b118d6cb..e56ccd060d2 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -24,7 +24,7 @@ ebt_snat_tg(struct sk_buff *skb, const struct xt_action_param *par)
if (!skb_make_writable(skb, 0))
return EBT_DROP;
- memcpy(eth_hdr(skb)->h_source, info->mac, ETH_ALEN);
+ ether_addr_copy(eth_hdr(skb)->h_source, info->mac);
if (!(info->target & NAT_ARP_BIT) &&
eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) {
const struct arphdr *ap;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 0e474b13463..1059ed3bc25 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1044,10 +1044,9 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
if (repl->num_counters &&
copy_to_user(repl->counters, counterstmp,
repl->num_counters * sizeof(struct ebt_counter))) {
- ret = -EFAULT;
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("ebtables: counters copy to user failed while replacing table\n");
}
- else
- ret = 0;
/* decrease module count and free resources */
EBT_ENTRY_ITERATE(table->entries, table->entries_size,
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
new file mode 100644
index 00000000000..4f02109d708
--- /dev/null
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2014 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_meta.h>
+
+#include "../br_private.h"
+
+static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_meta *priv = nft_expr_priv(expr);
+ const struct net_device *in = pkt->in, *out = pkt->out;
+ struct nft_data *dest = &data[priv->dreg];
+ const struct net_bridge_port *p;
+
+ switch (priv->key) {
+ case NFT_META_BRI_IIFNAME:
+ if (in == NULL || (p = br_port_get_rcu(in)) == NULL)
+ goto err;
+ break;
+ case NFT_META_BRI_OIFNAME:
+ if (out == NULL || (p = br_port_get_rcu(out)) == NULL)
+ goto err;
+ break;
+ default:
+ goto out;
+ }
+
+ strncpy((char *)dest->data, p->br->dev->name, sizeof(dest->data));
+ return;
+out:
+ return nft_meta_get_eval(expr, data, pkt);
+err:
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static int nft_meta_bridge_get_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_meta *priv = nft_expr_priv(expr);
+ int err;
+
+ priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+ switch (priv->key) {
+ case NFT_META_BRI_IIFNAME:
+ case NFT_META_BRI_OIFNAME:
+ break;
+ default:
+ return nft_meta_get_init(ctx, expr, tb);
+ }
+
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+
+ err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static struct nft_expr_type nft_meta_bridge_type;
+static const struct nft_expr_ops nft_meta_bridge_get_ops = {
+ .type = &nft_meta_bridge_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+ .eval = nft_meta_bridge_get_eval,
+ .init = nft_meta_bridge_get_init,
+ .dump = nft_meta_get_dump,
+};
+
+static const struct nft_expr_ops nft_meta_bridge_set_ops = {
+ .type = &nft_meta_bridge_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+ .eval = nft_meta_set_eval,
+ .init = nft_meta_set_init,
+ .dump = nft_meta_set_dump,
+};
+
+static const struct nft_expr_ops *
+nft_meta_bridge_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ if (tb[NFTA_META_KEY] == NULL)
+ return ERR_PTR(-EINVAL);
+
+ if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG])
+ return ERR_PTR(-EINVAL);
+
+ if (tb[NFTA_META_DREG])
+ return &nft_meta_bridge_get_ops;
+
+ if (tb[NFTA_META_SREG])
+ return &nft_meta_bridge_set_ops;
+
+ return ERR_PTR(-EINVAL);
+}
+
+static struct nft_expr_type nft_meta_bridge_type __read_mostly = {
+ .family = NFPROTO_BRIDGE,
+ .name = "meta",
+ .select_ops = &nft_meta_bridge_select_ops,
+ .policy = nft_meta_policy,
+ .maxattr = NFTA_META_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_meta_bridge_module_init(void)
+{
+ return nft_register_expr(&nft_meta_bridge_type);
+}
+
+static void __exit nft_meta_bridge_module_exit(void)
+{
+ nft_unregister_expr(&nft_meta_bridge_type);
+}
+
+module_init(nft_meta_bridge_module_init);
+module_exit(nft_meta_bridge_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "meta");
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index d6be3edb7a4..e8437094d15 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -124,7 +124,6 @@ static void caif_flow_ctrl(struct sock *sk, int mode)
static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
int err;
- int skb_len;
unsigned long flags;
struct sk_buff_head *list = &sk->sk_receive_queue;
struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
@@ -153,14 +152,13 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
* may be freed by other threads of control pulling packets
* from the queue.
*/
- skb_len = skb->len;
spin_lock_irqsave(&list->lock, flags);
if (!sock_flag(sk, SOCK_DEAD))
__skb_queue_tail(list, skb);
spin_unlock_irqrestore(&list->lock, flags);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk, skb_len);
+ sk->sk_data_ready(sk);
else
kfree_skb(skb);
return 0;
diff --git a/net/can/af_can.c b/net/can/af_can.c
index a27f8aad9e9..ce82337521f 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -338,6 +338,29 @@ static struct dev_rcv_lists *find_dev_rcv_lists(struct net_device *dev)
}
/**
+ * effhash - hash function for 29 bit CAN identifier reduction
+ * @can_id: 29 bit CAN identifier
+ *
+ * Description:
+ * To reduce the linear traversal in one linked list of _single_ EFF CAN
+ * frame subscriptions the 29 bit identifier is mapped to 10 bits.
+ * (see CAN_EFF_RCV_HASH_BITS definition)
+ *
+ * Return:
+ * Hash value from 0x000 - 0x3FF ( enforced by CAN_EFF_RCV_HASH_BITS mask )
+ */
+static unsigned int effhash(canid_t can_id)
+{
+ unsigned int hash;
+
+ hash = can_id;
+ hash ^= can_id >> CAN_EFF_RCV_HASH_BITS;
+ hash ^= can_id >> (2 * CAN_EFF_RCV_HASH_BITS);
+
+ return hash & ((1 << CAN_EFF_RCV_HASH_BITS) - 1);
+}
+
+/**
* find_rcv_list - determine optimal filterlist inside device filter struct
* @can_id: pointer to CAN identifier of a given can_filter
* @mask: pointer to CAN mask of a given can_filter
@@ -400,10 +423,8 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
!(*can_id & CAN_RTR_FLAG)) {
if (*can_id & CAN_EFF_FLAG) {
- if (*mask == (CAN_EFF_MASK | CAN_EFF_RTR_FLAGS)) {
- /* RFC: a future use-case for hash-tables? */
- return &d->rx[RX_EFF];
- }
+ if (*mask == (CAN_EFF_MASK | CAN_EFF_RTR_FLAGS))
+ return &d->rx_eff[effhash(*can_id)];
} else {
if (*mask == (CAN_SFF_MASK | CAN_EFF_RTR_FLAGS))
return &d->rx_sff[*can_id];
@@ -632,7 +653,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
return matches;
if (can_id & CAN_EFF_FLAG) {
- hlist_for_each_entry_rcu(r, &d->rx[RX_EFF], list) {
+ hlist_for_each_entry_rcu(r, &d->rx_eff[effhash(can_id)], list) {
if (r->can_id == can_id) {
deliver(skb, r);
matches++;
diff --git a/net/can/af_can.h b/net/can/af_can.h
index 6de58b40535..fca0fe9fc45 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -59,12 +59,17 @@ struct receiver {
char *ident;
};
-enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_EFF, RX_MAX };
+#define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS)
+#define CAN_EFF_RCV_HASH_BITS 10
+#define CAN_EFF_RCV_ARRAY_SZ (1 << CAN_EFF_RCV_HASH_BITS)
+
+enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_MAX };
/* per device receive filters linked at dev->ml_priv */
struct dev_rcv_lists {
struct hlist_head rx[RX_MAX];
- struct hlist_head rx_sff[0x800];
+ struct hlist_head rx_sff[CAN_SFF_RCV_ARRAY_SZ];
+ struct hlist_head rx_eff[CAN_EFF_RCV_ARRAY_SZ];
int remove_on_zero_entries;
int entries;
};
diff --git a/net/can/gw.c b/net/can/gw.c
index ac31891967d..050a2110d43 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -804,7 +804,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
u8 limhops = 0;
int err = 0;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if (nlmsg_len(nlh) < sizeof(*r))
@@ -893,7 +893,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
u8 limhops = 0;
int err = 0;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if (nlmsg_len(nlh) < sizeof(*r))
diff --git a/net/can/proc.c b/net/can/proc.c
index b543470c8f8..1a19b985a86 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -80,7 +80,6 @@ static const char rx_list_name[][8] = {
[RX_ALL] = "rx_all",
[RX_FIL] = "rx_fil",
[RX_INV] = "rx_inv",
- [RX_EFF] = "rx_eff",
};
/*
@@ -389,25 +388,26 @@ static const struct file_operations can_rcvlist_proc_fops = {
.release = single_release,
};
-static inline void can_rcvlist_sff_proc_show_one(struct seq_file *m,
- struct net_device *dev,
- struct dev_rcv_lists *d)
+static inline void can_rcvlist_proc_show_array(struct seq_file *m,
+ struct net_device *dev,
+ struct hlist_head *rcv_array,
+ unsigned int rcv_array_sz)
{
- int i;
+ unsigned int i;
int all_empty = 1;
/* check whether at least one list is non-empty */
- for (i = 0; i < 0x800; i++)
- if (!hlist_empty(&d->rx_sff[i])) {
+ for (i = 0; i < rcv_array_sz; i++)
+ if (!hlist_empty(&rcv_array[i])) {
all_empty = 0;
break;
}
if (!all_empty) {
can_print_recv_banner(m);
- for (i = 0; i < 0x800; i++) {
- if (!hlist_empty(&d->rx_sff[i]))
- can_print_rcvlist(m, &d->rx_sff[i], dev);
+ for (i = 0; i < rcv_array_sz; i++) {
+ if (!hlist_empty(&rcv_array[i]))
+ can_print_rcvlist(m, &rcv_array[i], dev);
}
} else
seq_printf(m, " (%s: no entry)\n", DNAME(dev));
@@ -425,12 +425,15 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
/* sff receive list for 'all' CAN devices (dev == NULL) */
d = &can_rx_alldev_list;
- can_rcvlist_sff_proc_show_one(m, NULL, d);
+ can_rcvlist_proc_show_array(m, NULL, d->rx_sff, ARRAY_SIZE(d->rx_sff));
/* sff receive list for registered CAN devices */
for_each_netdev_rcu(&init_net, dev) {
- if (dev->type == ARPHRD_CAN && dev->ml_priv)
- can_rcvlist_sff_proc_show_one(m, dev, dev->ml_priv);
+ if (dev->type == ARPHRD_CAN && dev->ml_priv) {
+ d = dev->ml_priv;
+ can_rcvlist_proc_show_array(m, dev, d->rx_sff,
+ ARRAY_SIZE(d->rx_sff));
+ }
}
rcu_read_unlock();
@@ -452,6 +455,49 @@ static const struct file_operations can_rcvlist_sff_proc_fops = {
.release = single_release,
};
+
+static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
+{
+ struct net_device *dev;
+ struct dev_rcv_lists *d;
+
+ /* RX_EFF */
+ seq_puts(m, "\nreceive list 'rx_eff':\n");
+
+ rcu_read_lock();
+
+ /* eff receive list for 'all' CAN devices (dev == NULL) */
+ d = &can_rx_alldev_list;
+ can_rcvlist_proc_show_array(m, NULL, d->rx_eff, ARRAY_SIZE(d->rx_eff));
+
+ /* eff receive list for registered CAN devices */
+ for_each_netdev_rcu(&init_net, dev) {
+ if (dev->type == ARPHRD_CAN && dev->ml_priv) {
+ d = dev->ml_priv;
+ can_rcvlist_proc_show_array(m, dev, d->rx_eff,
+ ARRAY_SIZE(d->rx_eff));
+ }
+ }
+
+ rcu_read_unlock();
+
+ seq_putc(m, '\n');
+ return 0;
+}
+
+static int can_rcvlist_eff_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, can_rcvlist_eff_proc_show, NULL);
+}
+
+static const struct file_operations can_rcvlist_eff_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = can_rcvlist_eff_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
/*
* proc utility functions
*/
@@ -491,8 +537,8 @@ void can_init_proc(void)
&can_rcvlist_proc_fops, (void *)RX_FIL);
pde_rcvlist_inv = proc_create_data(CAN_PROC_RCVLIST_INV, 0644, can_dir,
&can_rcvlist_proc_fops, (void *)RX_INV);
- pde_rcvlist_eff = proc_create_data(CAN_PROC_RCVLIST_EFF, 0644, can_dir,
- &can_rcvlist_proc_fops, (void *)RX_EFF);
+ pde_rcvlist_eff = proc_create(CAN_PROC_RCVLIST_EFF, 0644, can_dir,
+ &can_rcvlist_eff_proc_fops);
pde_rcvlist_sff = proc_create(CAN_PROC_RCVLIST_SFF, 0644, can_dir,
&can_rcvlist_sff_proc_fops);
}
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 67d7721d237..1675021d8c1 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -72,6 +72,8 @@ const char *ceph_msg_type_name(int type)
case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
case CEPH_MSG_STATFS: return "statfs";
case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
+ case CEPH_MSG_MON_GET_VERSION: return "mon_get_version";
+ case CEPH_MSG_MON_GET_VERSION_REPLY: return "mon_get_version_reply";
case CEPH_MSG_MDS_MAP: return "mds_map";
case CEPH_MSG_CLIENT_SESSION: return "client_session";
case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index b703790b4e4..a1ef53c0441 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -292,10 +292,12 @@ static int is_out(const struct crush_map *map,
* @outpos: our position in that vector
* @tries: number of attempts to make
* @recurse_tries: number of attempts to have recursive chooseleaf make
- * @local_tries: localized retries
- * @local_fallback_tries: localized fallback retries
+ * @local_retries: localized retries
+ * @local_fallback_retries: localized fallback retries
* @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose)
+ * @vary_r: pass r to recursive calls
* @out2: second output vector for leaf items (if @recurse_to_leaf)
+ * @parent_r: r value passed from the parent
*/
static int crush_choose_firstn(const struct crush_map *map,
struct crush_bucket *bucket,
@@ -304,10 +306,12 @@ static int crush_choose_firstn(const struct crush_map *map,
int *out, int outpos,
unsigned int tries,
unsigned int recurse_tries,
- unsigned int local_tries,
- unsigned int local_fallback_tries,
+ unsigned int local_retries,
+ unsigned int local_fallback_retries,
int recurse_to_leaf,
- int *out2)
+ unsigned int vary_r,
+ int *out2,
+ int parent_r)
{
int rep;
unsigned int ftotal, flocal;
@@ -319,8 +323,11 @@ static int crush_choose_firstn(const struct crush_map *map,
int itemtype;
int collide, reject;
- dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
- bucket->id, x, outpos, numrep);
+ dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
+ recurse_to_leaf ? "_LEAF" : "",
+ bucket->id, x, outpos, numrep,
+ tries, recurse_tries, local_retries, local_fallback_retries,
+ parent_r);
for (rep = outpos; rep < numrep; rep++) {
/* keep trying until we get a non-out, non-colliding item */
@@ -335,7 +342,7 @@ static int crush_choose_firstn(const struct crush_map *map,
do {
collide = 0;
retry_bucket = 0;
- r = rep;
+ r = rep + parent_r;
/* r' = r + f_total */
r += ftotal;
@@ -344,9 +351,9 @@ static int crush_choose_firstn(const struct crush_map *map,
reject = 1;
goto reject;
}
- if (local_fallback_tries > 0 &&
+ if (local_fallback_retries > 0 &&
flocal >= (in->size>>1) &&
- flocal > local_fallback_tries)
+ flocal > local_fallback_retries)
item = bucket_perm_choose(in, x, r);
else
item = crush_bucket_choose(in, x, r);
@@ -387,16 +394,23 @@ static int crush_choose_firstn(const struct crush_map *map,
reject = 0;
if (!collide && recurse_to_leaf) {
if (item < 0) {
+ int sub_r;
+ if (vary_r)
+ sub_r = r >> (vary_r-1);
+ else
+ sub_r = 0;
if (crush_choose_firstn(map,
map->buckets[-1-item],
weight, weight_max,
x, outpos+1, 0,
out2, outpos,
recurse_tries, 0,
- local_tries,
- local_fallback_tries,
+ local_retries,
+ local_fallback_retries,
0,
- NULL) <= outpos)
+ vary_r,
+ NULL,
+ sub_r) <= outpos)
/* didn't get leaf */
reject = 1;
} else {
@@ -420,14 +434,14 @@ reject:
ftotal++;
flocal++;
- if (collide && flocal <= local_tries)
+ if (collide && flocal <= local_retries)
/* retry locally a few times */
retry_bucket = 1;
- else if (local_fallback_tries > 0 &&
- flocal <= in->size + local_fallback_tries)
+ else if (local_fallback_retries > 0 &&
+ flocal <= in->size + local_fallback_retries)
/* exhaustive bucket search */
retry_bucket = 1;
- else if (ftotal <= tries)
+ else if (ftotal < tries)
/* then retry descent */
retry_descent = 1;
else
@@ -640,10 +654,20 @@ int crush_do_rule(const struct crush_map *map,
__u32 step;
int i, j;
int numrep;
- int choose_tries = map->choose_total_tries;
- int choose_local_tries = map->choose_local_tries;
- int choose_local_fallback_tries = map->choose_local_fallback_tries;
+ /*
+ * the original choose_total_tries value was off by one (it
+ * counted "retries" and not "tries"). add one.
+ */
+ int choose_tries = map->choose_total_tries + 1;
int choose_leaf_tries = 0;
+ /*
+ * the local tries values were counted as "retries", though,
+ * and need no adjustment
+ */
+ int choose_local_retries = map->choose_local_tries;
+ int choose_local_fallback_retries = map->choose_local_fallback_tries;
+
+ int vary_r = map->chooseleaf_vary_r;
if ((__u32)ruleno >= map->max_rules) {
dprintk(" bad ruleno %d\n", ruleno);
@@ -676,13 +700,18 @@ int crush_do_rule(const struct crush_map *map,
break;
case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES:
- if (curstep->arg1 > 0)
- choose_local_tries = curstep->arg1;
+ if (curstep->arg1 >= 0)
+ choose_local_retries = curstep->arg1;
break;
case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES:
- if (curstep->arg1 > 0)
- choose_local_fallback_tries = curstep->arg1;
+ if (curstep->arg1 >= 0)
+ choose_local_fallback_retries = curstep->arg1;
+ break;
+
+ case CRUSH_RULE_SET_CHOOSELEAF_VARY_R:
+ if (curstep->arg1 >= 0)
+ vary_r = curstep->arg1;
break;
case CRUSH_RULE_CHOOSELEAF_FIRSTN:
@@ -734,10 +763,12 @@ int crush_do_rule(const struct crush_map *map,
o+osize, j,
choose_tries,
recurse_tries,
- choose_local_tries,
- choose_local_fallback_tries,
+ choose_local_retries,
+ choose_local_fallback_retries,
recurse_to_leaf,
- c+osize);
+ vary_r,
+ c+osize,
+ 0);
} else {
crush_choose_indep(
map,
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 258a382e75e..d1a62c69a9f 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -53,34 +53,55 @@ static int osdmap_show(struct seq_file *s, void *p)
{
int i;
struct ceph_client *client = s->private;
+ struct ceph_osdmap *map = client->osdc.osdmap;
struct rb_node *n;
- if (client->osdc.osdmap == NULL)
+ if (map == NULL)
return 0;
- seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch);
+
+ seq_printf(s, "epoch %d\n", map->epoch);
seq_printf(s, "flags%s%s\n",
- (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ?
- " NEARFULL" : "",
- (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ?
- " FULL" : "");
- for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) {
+ (map->flags & CEPH_OSDMAP_NEARFULL) ? " NEARFULL" : "",
+ (map->flags & CEPH_OSDMAP_FULL) ? " FULL" : "");
+
+ for (n = rb_first(&map->pg_pools); n; n = rb_next(n)) {
struct ceph_pg_pool_info *pool =
rb_entry(n, struct ceph_pg_pool_info, node);
- seq_printf(s, "pg_pool %llu pg_num %d / %d\n",
- (unsigned long long)pool->id, pool->pg_num,
- pool->pg_num_mask);
+
+ seq_printf(s, "pool %lld pg_num %u (%d) read_tier %lld write_tier %lld\n",
+ pool->id, pool->pg_num, pool->pg_num_mask,
+ pool->read_tier, pool->write_tier);
}
- for (i = 0; i < client->osdc.osdmap->max_osd; i++) {
- struct ceph_entity_addr *addr =
- &client->osdc.osdmap->osd_addr[i];
- int state = client->osdc.osdmap->osd_state[i];
+ for (i = 0; i < map->max_osd; i++) {
+ struct ceph_entity_addr *addr = &map->osd_addr[i];
+ int state = map->osd_state[i];
char sb[64];
- seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n",
+ seq_printf(s, "osd%d\t%s\t%3d%%\t(%s)\t%3d%%\n",
i, ceph_pr_addr(&addr->in_addr),
- ((client->osdc.osdmap->osd_weight[i]*100) >> 16),
- ceph_osdmap_state_str(sb, sizeof(sb), state));
+ ((map->osd_weight[i]*100) >> 16),
+ ceph_osdmap_state_str(sb, sizeof(sb), state),
+ ((ceph_get_primary_affinity(map, i)*100) >> 16));
+ }
+ for (n = rb_first(&map->pg_temp); n; n = rb_next(n)) {
+ struct ceph_pg_mapping *pg =
+ rb_entry(n, struct ceph_pg_mapping, node);
+
+ seq_printf(s, "pg_temp %llu.%x [", pg->pgid.pool,
+ pg->pgid.seed);
+ for (i = 0; i < pg->pg_temp.len; i++)
+ seq_printf(s, "%s%d", (i == 0 ? "" : ","),
+ pg->pg_temp.osds[i]);
+ seq_printf(s, "]\n");
}
+ for (n = rb_first(&map->primary_temp); n; n = rb_next(n)) {
+ struct ceph_pg_mapping *pg =
+ rb_entry(n, struct ceph_pg_mapping, node);
+
+ seq_printf(s, "primary_temp %llu.%x %d\n", pg->pgid.pool,
+ pg->pgid.seed, pg->primary_temp.osd);
+ }
+
return 0;
}
@@ -105,9 +126,13 @@ static int monc_show(struct seq_file *s, void *p)
req = rb_entry(rp, struct ceph_mon_generic_request, node);
op = le16_to_cpu(req->request->hdr.type);
if (op == CEPH_MSG_STATFS)
- seq_printf(s, "%lld statfs\n", req->tid);
+ seq_printf(s, "%llu statfs\n", req->tid);
+ else if (op == CEPH_MSG_POOLOP)
+ seq_printf(s, "%llu poolop\n", req->tid);
+ else if (op == CEPH_MSG_MON_GET_VERSION)
+ seq_printf(s, "%llu mon_get_version", req->tid);
else
- seq_printf(s, "%lld unknown\n", req->tid);
+ seq_printf(s, "%llu unknown\n", req->tid);
}
mutex_unlock(&monc->mutex);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 30efc5c1862..1948d592aa5 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -383,7 +383,7 @@ static void con_sock_state_closed(struct ceph_connection *con)
*/
/* data available on socket, or listen socket received a connect */
-static void ceph_sock_data_ready(struct sock *sk, int count_unused)
+static void ceph_sock_data_ready(struct sock *sk)
{
struct ceph_connection *con = sk->sk_user_data;
if (atomic_read(&con->msgr->stopping)) {
@@ -557,7 +557,7 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
return r;
}
-static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
+static int __ceph_tcp_sendpage(struct socket *sock, struct page *page,
int offset, size_t size, bool more)
{
int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR);
@@ -570,6 +570,24 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
return ret;
}
+static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
+ int offset, size_t size, bool more)
+{
+ int ret;
+ struct kvec iov;
+
+ /* sendpage cannot properly handle pages with page_count == 0,
+ * we need to fallback to sendmsg if that's the case */
+ if (page_count(page) >= 1)
+ return __ceph_tcp_sendpage(sock, page, offset, size, more);
+
+ iov.iov_base = kmap(page) + offset;
+ iov.iov_len = size;
+ ret = ceph_tcp_sendmsg(sock, &iov, 1, size, more);
+ kunmap(page);
+
+ return ret;
+}
/*
* Shutdown/close the socket for the given connection.
@@ -919,6 +937,9 @@ static bool ceph_msg_data_pages_advance(struct ceph_msg_data_cursor *cursor,
if (!bytes || cursor->page_offset)
return false; /* more bytes to process in the current page */
+ if (!cursor->resid)
+ return false; /* no more data */
+
/* Move on to the next page; offset is already at 0 */
BUG_ON(cursor->page_index >= cursor->page_count);
@@ -1004,6 +1025,9 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
if (!bytes || cursor->offset & ~PAGE_MASK)
return false; /* more bytes to process in the current page */
+ if (!cursor->resid)
+ return false; /* no more data */
+
/* Move on to the next page */
BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head));
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 2ac9ef35110..067d3af2eaf 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -296,6 +296,33 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
__send_subscribe(monc);
mutex_unlock(&monc->mutex);
}
+EXPORT_SYMBOL(ceph_monc_request_next_osdmap);
+
+int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
+ unsigned long timeout)
+{
+ unsigned long started = jiffies;
+ int ret;
+
+ mutex_lock(&monc->mutex);
+ while (monc->have_osdmap < epoch) {
+ mutex_unlock(&monc->mutex);
+
+ if (timeout != 0 && time_after_eq(jiffies, started + timeout))
+ return -ETIMEDOUT;
+
+ ret = wait_event_interruptible_timeout(monc->client->auth_wq,
+ monc->have_osdmap >= epoch, timeout);
+ if (ret < 0)
+ return ret;
+
+ mutex_lock(&monc->mutex);
+ }
+
+ mutex_unlock(&monc->mutex);
+ return 0;
+}
+EXPORT_SYMBOL(ceph_monc_wait_osdmap);
/*
*
@@ -477,14 +504,13 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con,
return m;
}
-static int do_generic_request(struct ceph_mon_client *monc,
- struct ceph_mon_generic_request *req)
+static int __do_generic_request(struct ceph_mon_client *monc, u64 tid,
+ struct ceph_mon_generic_request *req)
{
int err;
/* register request */
- mutex_lock(&monc->mutex);
- req->tid = ++monc->last_tid;
+ req->tid = tid != 0 ? tid : ++monc->last_tid;
req->request->hdr.tid = cpu_to_le64(req->tid);
__insert_generic_request(monc, req);
monc->num_generic_requests++;
@@ -496,13 +522,24 @@ static int do_generic_request(struct ceph_mon_client *monc,
mutex_lock(&monc->mutex);
rb_erase(&req->node, &monc->generic_request_tree);
monc->num_generic_requests--;
- mutex_unlock(&monc->mutex);
if (!err)
err = req->result;
return err;
}
+static int do_generic_request(struct ceph_mon_client *monc,
+ struct ceph_mon_generic_request *req)
+{
+ int err;
+
+ mutex_lock(&monc->mutex);
+ err = __do_generic_request(monc, 0, req);
+ mutex_unlock(&monc->mutex);
+
+ return err;
+}
+
/*
* statfs
*/
@@ -579,6 +616,96 @@ out:
}
EXPORT_SYMBOL(ceph_monc_do_statfs);
+static void handle_get_version_reply(struct ceph_mon_client *monc,
+ struct ceph_msg *msg)
+{
+ struct ceph_mon_generic_request *req;
+ u64 tid = le64_to_cpu(msg->hdr.tid);
+ void *p = msg->front.iov_base;
+ void *end = p + msg->front_alloc_len;
+ u64 handle;
+
+ dout("%s %p tid %llu\n", __func__, msg, tid);
+
+ ceph_decode_need(&p, end, 2*sizeof(u64), bad);
+ handle = ceph_decode_64(&p);
+ if (tid != 0 && tid != handle)
+ goto bad;
+
+ mutex_lock(&monc->mutex);
+ req = __lookup_generic_req(monc, handle);
+ if (req) {
+ *(u64 *)req->buf = ceph_decode_64(&p);
+ req->result = 0;
+ get_generic_request(req);
+ }
+ mutex_unlock(&monc->mutex);
+ if (req) {
+ complete_all(&req->completion);
+ put_generic_request(req);
+ }
+
+ return;
+bad:
+ pr_err("corrupt mon_get_version reply\n");
+ ceph_msg_dump(msg);
+}
+
+/*
+ * Send MMonGetVersion and wait for the reply.
+ *
+ * @what: one of "mdsmap", "osdmap" or "monmap"
+ */
+int ceph_monc_do_get_version(struct ceph_mon_client *monc, const char *what,
+ u64 *newest)
+{
+ struct ceph_mon_generic_request *req;
+ void *p, *end;
+ u64 tid;
+ int err;
+
+ req = kzalloc(sizeof(*req), GFP_NOFS);
+ if (!req)
+ return -ENOMEM;
+
+ kref_init(&req->kref);
+ req->buf = newest;
+ req->buf_len = sizeof(*newest);
+ init_completion(&req->completion);
+
+ req->request = ceph_msg_new(CEPH_MSG_MON_GET_VERSION,
+ sizeof(u64) + sizeof(u32) + strlen(what),
+ GFP_NOFS, true);
+ if (!req->request) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ req->reply = ceph_msg_new(CEPH_MSG_MON_GET_VERSION_REPLY, 1024,
+ GFP_NOFS, true);
+ if (!req->reply) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ p = req->request->front.iov_base;
+ end = p + req->request->front_alloc_len;
+
+ /* fill out request */
+ mutex_lock(&monc->mutex);
+ tid = ++monc->last_tid;
+ ceph_encode_64(&p, tid); /* handle */
+ ceph_encode_string(&p, end, what, strlen(what));
+
+ err = __do_generic_request(monc, tid, req);
+
+ mutex_unlock(&monc->mutex);
+out:
+ kref_put(&req->kref, release_generic_request);
+ return err;
+}
+EXPORT_SYMBOL(ceph_monc_do_get_version);
+
/*
* pool ops
*/
@@ -981,6 +1108,10 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
handle_statfs_reply(monc, msg);
break;
+ case CEPH_MSG_MON_GET_VERSION_REPLY:
+ handle_get_version_reply(monc, msg);
+ break;
+
case CEPH_MSG_POOLOP_REPLY:
handle_poolop_reply(monc, msg);
break;
@@ -1029,6 +1160,15 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
case CEPH_MSG_AUTH_REPLY:
m = ceph_msg_get(monc->m_auth_reply);
break;
+ case CEPH_MSG_MON_GET_VERSION_REPLY:
+ if (le64_to_cpu(hdr->tid) != 0)
+ return get_generic_reply(con, hdr, skip);
+
+ /*
+ * Older OSDs don't set reply tid even if the orignal
+ * request had a non-zero tid. Workaround this weirdness
+ * by falling through to the allocate case.
+ */
case CEPH_MSG_MON_MAP:
case CEPH_MSG_MDS_MAP:
case CEPH_MSG_OSD_MAP:
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 0676f2b199d..05be0c18169 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -436,6 +436,7 @@ static bool osd_req_opcode_valid(u16 opcode)
case CEPH_OSD_OP_OMAPCLEAR:
case CEPH_OSD_OP_OMAPRMKEYS:
case CEPH_OSD_OP_OMAP_CMP:
+ case CEPH_OSD_OP_SETALLOCHINT:
case CEPH_OSD_OP_CLONERANGE:
case CEPH_OSD_OP_ASSERT_SRC_VERSION:
case CEPH_OSD_OP_SRC_CMPXATTR:
@@ -591,6 +592,26 @@ void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
}
EXPORT_SYMBOL(osd_req_op_watch_init);
+void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
+ unsigned int which,
+ u64 expected_object_size,
+ u64 expected_write_size)
+{
+ struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
+ CEPH_OSD_OP_SETALLOCHINT);
+
+ op->alloc_hint.expected_object_size = expected_object_size;
+ op->alloc_hint.expected_write_size = expected_write_size;
+
+ /*
+ * CEPH_OSD_OP_SETALLOCHINT op is advisory and therefore deemed
+ * not worth a feature bit. Set FAILOK per-op flag to make
+ * sure older osds don't trip over an unsupported opcode.
+ */
+ op->flags |= CEPH_OSD_OP_FLAG_FAILOK;
+}
+EXPORT_SYMBOL(osd_req_op_alloc_hint_init);
+
static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
struct ceph_osd_data *osd_data)
{
@@ -681,6 +702,12 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
dst->watch.ver = cpu_to_le64(src->watch.ver);
dst->watch.flag = src->watch.flag;
break;
+ case CEPH_OSD_OP_SETALLOCHINT:
+ dst->alloc_hint.expected_object_size =
+ cpu_to_le64(src->alloc_hint.expected_object_size);
+ dst->alloc_hint.expected_write_size =
+ cpu_to_le64(src->alloc_hint.expected_write_size);
+ break;
default:
pr_err("unsupported osd opcode %s\n",
ceph_osd_op_name(src->op));
@@ -688,7 +715,9 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
return 0;
}
+
dst->op = cpu_to_le16(src->op);
+ dst->flags = cpu_to_le32(src->flags);
dst->payload_len = cpu_to_le32(src->payload_len);
return request_data_len;
@@ -1304,7 +1333,7 @@ static int __map_request(struct ceph_osd_client *osdc,
{
struct ceph_pg pgid;
int acting[CEPH_PG_MAX_SIZE];
- int o = -1, num = 0;
+ int num, o;
int err;
bool was_paused;
@@ -1317,11 +1346,9 @@ static int __map_request(struct ceph_osd_client *osdc,
}
req->r_pgid = pgid;
- err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting);
- if (err > 0) {
- o = acting[0];
- num = err;
- }
+ num = ceph_calc_pg_acting(osdc->osdmap, pgid, acting, &o);
+ if (num < 0)
+ num = 0;
was_paused = req->r_paused;
req->r_paused = __req_should_be_paused(osdc, req);
@@ -2033,7 +2060,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
int skipped_map = 0;
dout("taking full map %u len %d\n", epoch, maplen);
- newmap = osdmap_decode(&p, p+maplen);
+ newmap = ceph_osdmap_decode(&p, p+maplen);
if (IS_ERR(newmap)) {
err = PTR_ERR(newmap);
goto bad;
@@ -2082,7 +2109,6 @@ bad:
pr_err("osdc handle_map corrupt msg\n");
ceph_msg_dump(msg);
up_write(&osdc->map_sem);
- return;
}
/*
@@ -2281,7 +2307,6 @@ done_err:
bad:
pr_err("osdc handle_watch_notify corrupt msg\n");
- return;
}
/*
@@ -2466,7 +2491,7 @@ EXPORT_SYMBOL(ceph_osdc_sync);
* Call all pending notify callbacks - for use after a watch is
* unregistered, to make sure no more callbacks for it will be invoked
*/
-extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc)
+void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc)
{
flush_workqueue(osdc->notify_wq);
}
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index aade4a5c1c0..c547e46084d 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -329,6 +329,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
dout("crush decode tunable chooseleaf_descend_once = %d",
c->chooseleaf_descend_once);
+ ceph_decode_need(p, end, sizeof(u8), done);
+ c->chooseleaf_vary_r = ceph_decode_8(p);
+ dout("crush decode tunable chooseleaf_vary_r = %d",
+ c->chooseleaf_vary_r);
+
done:
dout("crush_decode success\n");
return c;
@@ -343,7 +348,7 @@ bad:
/*
* rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
- * to a set of osds)
+ * to a set of osds) and primary_temp (explicit primary setting)
*/
static int pgid_cmp(struct ceph_pg l, struct ceph_pg r)
{
@@ -506,7 +511,7 @@ static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
kfree(pi);
}
-static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
+static int decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
{
u8 ev, cv;
unsigned len, num;
@@ -587,7 +592,7 @@ bad:
return -EINVAL;
}
-static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
+static int decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
{
struct ceph_pg_pool_info *pi;
u32 num, len;
@@ -633,6 +638,13 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
rb_erase(&pg->node, &map->pg_temp);
kfree(pg);
}
+ while (!RB_EMPTY_ROOT(&map->primary_temp)) {
+ struct ceph_pg_mapping *pg =
+ rb_entry(rb_first(&map->primary_temp),
+ struct ceph_pg_mapping, node);
+ rb_erase(&pg->node, &map->primary_temp);
+ kfree(pg);
+ }
while (!RB_EMPTY_ROOT(&map->pg_pools)) {
struct ceph_pg_pool_info *pi =
rb_entry(rb_first(&map->pg_pools),
@@ -642,186 +654,516 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
kfree(map->osd_state);
kfree(map->osd_weight);
kfree(map->osd_addr);
+ kfree(map->osd_primary_affinity);
kfree(map);
}
/*
- * adjust max osd value. reallocate arrays.
+ * Adjust max_osd value, (re)allocate arrays.
+ *
+ * The new elements are properly initialized.
*/
static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
{
u8 *state;
- struct ceph_entity_addr *addr;
u32 *weight;
+ struct ceph_entity_addr *addr;
+ int i;
- state = kcalloc(max, sizeof(*state), GFP_NOFS);
- addr = kcalloc(max, sizeof(*addr), GFP_NOFS);
- weight = kcalloc(max, sizeof(*weight), GFP_NOFS);
- if (state == NULL || addr == NULL || weight == NULL) {
+ state = krealloc(map->osd_state, max*sizeof(*state), GFP_NOFS);
+ weight = krealloc(map->osd_weight, max*sizeof(*weight), GFP_NOFS);
+ addr = krealloc(map->osd_addr, max*sizeof(*addr), GFP_NOFS);
+ if (!state || !weight || !addr) {
kfree(state);
- kfree(addr);
kfree(weight);
+ kfree(addr);
+
return -ENOMEM;
}
- /* copy old? */
- if (map->osd_state) {
- memcpy(state, map->osd_state, map->max_osd*sizeof(*state));
- memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr));
- memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight));
- kfree(map->osd_state);
- kfree(map->osd_addr);
- kfree(map->osd_weight);
+ for (i = map->max_osd; i < max; i++) {
+ state[i] = 0;
+ weight[i] = CEPH_OSD_OUT;
+ memset(addr + i, 0, sizeof(*addr));
}
map->osd_state = state;
map->osd_weight = weight;
map->osd_addr = addr;
+
+ if (map->osd_primary_affinity) {
+ u32 *affinity;
+
+ affinity = krealloc(map->osd_primary_affinity,
+ max*sizeof(*affinity), GFP_NOFS);
+ if (!affinity)
+ return -ENOMEM;
+
+ for (i = map->max_osd; i < max; i++)
+ affinity[i] = CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
+
+ map->osd_primary_affinity = affinity;
+ }
+
map->max_osd = max;
+
return 0;
}
+#define OSDMAP_WRAPPER_COMPAT_VER 7
+#define OSDMAP_CLIENT_DATA_COMPAT_VER 1
+
/*
- * decode a full map.
+ * Return 0 or error. On success, *v is set to 0 for old (v6) osdmaps,
+ * to struct_v of the client_data section for new (v7 and above)
+ * osdmaps.
*/
-struct ceph_osdmap *osdmap_decode(void **p, void *end)
+static int get_osdmap_client_data_v(void **p, void *end,
+ const char *prefix, u8 *v)
{
- struct ceph_osdmap *map;
- u16 version;
- u32 len, max, i;
- int err = -EINVAL;
- void *start = *p;
- struct ceph_pg_pool_info *pi;
+ u8 struct_v;
+
+ ceph_decode_8_safe(p, end, struct_v, e_inval);
+ if (struct_v >= 7) {
+ u8 struct_compat;
+
+ ceph_decode_8_safe(p, end, struct_compat, e_inval);
+ if (struct_compat > OSDMAP_WRAPPER_COMPAT_VER) {
+ pr_warning("got v %d cv %d > %d of %s ceph_osdmap\n",
+ struct_v, struct_compat,
+ OSDMAP_WRAPPER_COMPAT_VER, prefix);
+ return -EINVAL;
+ }
+ *p += 4; /* ignore wrapper struct_len */
+
+ ceph_decode_8_safe(p, end, struct_v, e_inval);
+ ceph_decode_8_safe(p, end, struct_compat, e_inval);
+ if (struct_compat > OSDMAP_CLIENT_DATA_COMPAT_VER) {
+ pr_warning("got v %d cv %d > %d of %s ceph_osdmap client data\n",
+ struct_v, struct_compat,
+ OSDMAP_CLIENT_DATA_COMPAT_VER, prefix);
+ return -EINVAL;
+ }
+ *p += 4; /* ignore client data struct_len */
+ } else {
+ u16 version;
+
+ *p -= 1;
+ ceph_decode_16_safe(p, end, version, e_inval);
+ if (version < 6) {
+ pr_warning("got v %d < 6 of %s ceph_osdmap\n", version,
+ prefix);
+ return -EINVAL;
+ }
- dout("osdmap_decode %p to %p len %d\n", *p, end, (int)(end - *p));
+ /* old osdmap enconding */
+ struct_v = 0;
+ }
- map = kzalloc(sizeof(*map), GFP_NOFS);
- if (map == NULL)
- return ERR_PTR(-ENOMEM);
- map->pg_temp = RB_ROOT;
+ *v = struct_v;
+ return 0;
- ceph_decode_16_safe(p, end, version, bad);
- if (version > 6) {
- pr_warning("got unknown v %d > 6 of osdmap\n", version);
- goto bad;
+e_inval:
+ return -EINVAL;
+}
+
+static int __decode_pools(void **p, void *end, struct ceph_osdmap *map,
+ bool incremental)
+{
+ u32 n;
+
+ ceph_decode_32_safe(p, end, n, e_inval);
+ while (n--) {
+ struct ceph_pg_pool_info *pi;
+ u64 pool;
+ int ret;
+
+ ceph_decode_64_safe(p, end, pool, e_inval);
+
+ pi = __lookup_pg_pool(&map->pg_pools, pool);
+ if (!incremental || !pi) {
+ pi = kzalloc(sizeof(*pi), GFP_NOFS);
+ if (!pi)
+ return -ENOMEM;
+
+ pi->id = pool;
+
+ ret = __insert_pg_pool(&map->pg_pools, pi);
+ if (ret) {
+ kfree(pi);
+ return ret;
+ }
+ }
+
+ ret = decode_pool(p, end, pi);
+ if (ret)
+ return ret;
}
- if (version < 6) {
- pr_warning("got old v %d < 6 of osdmap\n", version);
- goto bad;
+
+ return 0;
+
+e_inval:
+ return -EINVAL;
+}
+
+static int decode_pools(void **p, void *end, struct ceph_osdmap *map)
+{
+ return __decode_pools(p, end, map, false);
+}
+
+static int decode_new_pools(void **p, void *end, struct ceph_osdmap *map)
+{
+ return __decode_pools(p, end, map, true);
+}
+
+static int __decode_pg_temp(void **p, void *end, struct ceph_osdmap *map,
+ bool incremental)
+{
+ u32 n;
+
+ ceph_decode_32_safe(p, end, n, e_inval);
+ while (n--) {
+ struct ceph_pg pgid;
+ u32 len, i;
+ int ret;
+
+ ret = ceph_decode_pgid(p, end, &pgid);
+ if (ret)
+ return ret;
+
+ ceph_decode_32_safe(p, end, len, e_inval);
+
+ ret = __remove_pg_mapping(&map->pg_temp, pgid);
+ BUG_ON(!incremental && ret != -ENOENT);
+
+ if (!incremental || len > 0) {
+ struct ceph_pg_mapping *pg;
+
+ ceph_decode_need(p, end, len*sizeof(u32), e_inval);
+
+ if (len > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
+ return -EINVAL;
+
+ pg = kzalloc(sizeof(*pg) + len*sizeof(u32), GFP_NOFS);
+ if (!pg)
+ return -ENOMEM;
+
+ pg->pgid = pgid;
+ pg->pg_temp.len = len;
+ for (i = 0; i < len; i++)
+ pg->pg_temp.osds[i] = ceph_decode_32(p);
+
+ ret = __insert_pg_mapping(pg, &map->pg_temp);
+ if (ret) {
+ kfree(pg);
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+
+e_inval:
+ return -EINVAL;
+}
+
+static int decode_pg_temp(void **p, void *end, struct ceph_osdmap *map)
+{
+ return __decode_pg_temp(p, end, map, false);
+}
+
+static int decode_new_pg_temp(void **p, void *end, struct ceph_osdmap *map)
+{
+ return __decode_pg_temp(p, end, map, true);
+}
+
+static int __decode_primary_temp(void **p, void *end, struct ceph_osdmap *map,
+ bool incremental)
+{
+ u32 n;
+
+ ceph_decode_32_safe(p, end, n, e_inval);
+ while (n--) {
+ struct ceph_pg pgid;
+ u32 osd;
+ int ret;
+
+ ret = ceph_decode_pgid(p, end, &pgid);
+ if (ret)
+ return ret;
+
+ ceph_decode_32_safe(p, end, osd, e_inval);
+
+ ret = __remove_pg_mapping(&map->primary_temp, pgid);
+ BUG_ON(!incremental && ret != -ENOENT);
+
+ if (!incremental || osd != (u32)-1) {
+ struct ceph_pg_mapping *pg;
+
+ pg = kzalloc(sizeof(*pg), GFP_NOFS);
+ if (!pg)
+ return -ENOMEM;
+
+ pg->pgid = pgid;
+ pg->primary_temp.osd = osd;
+
+ ret = __insert_pg_mapping(pg, &map->primary_temp);
+ if (ret) {
+ kfree(pg);
+ return ret;
+ }
+ }
}
- ceph_decode_need(p, end, 2*sizeof(u64)+6*sizeof(u32), bad);
+ return 0;
+
+e_inval:
+ return -EINVAL;
+}
+
+static int decode_primary_temp(void **p, void *end, struct ceph_osdmap *map)
+{
+ return __decode_primary_temp(p, end, map, false);
+}
+
+static int decode_new_primary_temp(void **p, void *end,
+ struct ceph_osdmap *map)
+{
+ return __decode_primary_temp(p, end, map, true);
+}
+
+u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd)
+{
+ BUG_ON(osd >= map->max_osd);
+
+ if (!map->osd_primary_affinity)
+ return CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
+
+ return map->osd_primary_affinity[osd];
+}
+
+static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff)
+{
+ BUG_ON(osd >= map->max_osd);
+
+ if (!map->osd_primary_affinity) {
+ int i;
+
+ map->osd_primary_affinity = kmalloc(map->max_osd*sizeof(u32),
+ GFP_NOFS);
+ if (!map->osd_primary_affinity)
+ return -ENOMEM;
+
+ for (i = 0; i < map->max_osd; i++)
+ map->osd_primary_affinity[i] =
+ CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
+ }
+
+ map->osd_primary_affinity[osd] = aff;
+
+ return 0;
+}
+
+static int decode_primary_affinity(void **p, void *end,
+ struct ceph_osdmap *map)
+{
+ u32 len, i;
+
+ ceph_decode_32_safe(p, end, len, e_inval);
+ if (len == 0) {
+ kfree(map->osd_primary_affinity);
+ map->osd_primary_affinity = NULL;
+ return 0;
+ }
+ if (len != map->max_osd)
+ goto e_inval;
+
+ ceph_decode_need(p, end, map->max_osd*sizeof(u32), e_inval);
+
+ for (i = 0; i < map->max_osd; i++) {
+ int ret;
+
+ ret = set_primary_affinity(map, i, ceph_decode_32(p));
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+
+e_inval:
+ return -EINVAL;
+}
+
+static int decode_new_primary_affinity(void **p, void *end,
+ struct ceph_osdmap *map)
+{
+ u32 n;
+
+ ceph_decode_32_safe(p, end, n, e_inval);
+ while (n--) {
+ u32 osd, aff;
+ int ret;
+
+ ceph_decode_32_safe(p, end, osd, e_inval);
+ ceph_decode_32_safe(p, end, aff, e_inval);
+
+ ret = set_primary_affinity(map, osd, aff);
+ if (ret)
+ return ret;
+
+ pr_info("osd%d primary-affinity 0x%x\n", osd, aff);
+ }
+
+ return 0;
+
+e_inval:
+ return -EINVAL;
+}
+
+/*
+ * decode a full map.
+ */
+static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
+{
+ u8 struct_v;
+ u32 epoch = 0;
+ void *start = *p;
+ u32 max;
+ u32 len, i;
+ int err;
+
+ dout("%s %p to %p len %d\n", __func__, *p, end, (int)(end - *p));
+
+ err = get_osdmap_client_data_v(p, end, "full", &struct_v);
+ if (err)
+ goto bad;
+
+ /* fsid, epoch, created, modified */
+ ceph_decode_need(p, end, sizeof(map->fsid) + sizeof(u32) +
+ sizeof(map->created) + sizeof(map->modified), e_inval);
ceph_decode_copy(p, &map->fsid, sizeof(map->fsid));
- map->epoch = ceph_decode_32(p);
+ epoch = map->epoch = ceph_decode_32(p);
ceph_decode_copy(p, &map->created, sizeof(map->created));
ceph_decode_copy(p, &map->modified, sizeof(map->modified));
- ceph_decode_32_safe(p, end, max, bad);
- while (max--) {
- ceph_decode_need(p, end, 8 + 2, bad);
- err = -ENOMEM;
- pi = kzalloc(sizeof(*pi), GFP_NOFS);
- if (!pi)
- goto bad;
- pi->id = ceph_decode_64(p);
- err = __decode_pool(p, end, pi);
- if (err < 0) {
- kfree(pi);
- goto bad;
- }
- __insert_pg_pool(&map->pg_pools, pi);
- }
+ /* pools */
+ err = decode_pools(p, end, map);
+ if (err)
+ goto bad;
- err = __decode_pool_names(p, end, map);
- if (err < 0) {
- dout("fail to decode pool names");
+ /* pool_name */
+ err = decode_pool_names(p, end, map);
+ if (err)
goto bad;
- }
- ceph_decode_32_safe(p, end, map->pool_max, bad);
+ ceph_decode_32_safe(p, end, map->pool_max, e_inval);
- ceph_decode_32_safe(p, end, map->flags, bad);
+ ceph_decode_32_safe(p, end, map->flags, e_inval);
- max = ceph_decode_32(p);
+ /* max_osd */
+ ceph_decode_32_safe(p, end, max, e_inval);
/* (re)alloc osd arrays */
err = osdmap_set_max_osd(map, max);
- if (err < 0)
+ if (err)
goto bad;
- dout("osdmap_decode max_osd = %d\n", map->max_osd);
- /* osds */
- err = -EINVAL;
+ /* osd_state, osd_weight, osd_addrs->client_addr */
ceph_decode_need(p, end, 3*sizeof(u32) +
map->max_osd*(1 + sizeof(*map->osd_weight) +
- sizeof(*map->osd_addr)), bad);
- *p += 4; /* skip length field (should match max) */
+ sizeof(*map->osd_addr)), e_inval);
+
+ if (ceph_decode_32(p) != map->max_osd)
+ goto e_inval;
+
ceph_decode_copy(p, map->osd_state, map->max_osd);
- *p += 4; /* skip length field (should match max) */
+ if (ceph_decode_32(p) != map->max_osd)
+ goto e_inval;
+
for (i = 0; i < map->max_osd; i++)
map->osd_weight[i] = ceph_decode_32(p);
- *p += 4; /* skip length field (should match max) */
+ if (ceph_decode_32(p) != map->max_osd)
+ goto e_inval;
+
ceph_decode_copy(p, map->osd_addr, map->max_osd*sizeof(*map->osd_addr));
for (i = 0; i < map->max_osd; i++)
ceph_decode_addr(&map->osd_addr[i]);
/* pg_temp */
- ceph_decode_32_safe(p, end, len, bad);
- for (i = 0; i < len; i++) {
- int n, j;
- struct ceph_pg pgid;
- struct ceph_pg_mapping *pg;
+ err = decode_pg_temp(p, end, map);
+ if (err)
+ goto bad;
- err = ceph_decode_pgid(p, end, &pgid);
+ /* primary_temp */
+ if (struct_v >= 1) {
+ err = decode_primary_temp(p, end, map);
if (err)
goto bad;
- ceph_decode_need(p, end, sizeof(u32), bad);
- n = ceph_decode_32(p);
- err = -EINVAL;
- if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
- goto bad;
- ceph_decode_need(p, end, n * sizeof(u32), bad);
- err = -ENOMEM;
- pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS);
- if (!pg)
- goto bad;
- pg->pgid = pgid;
- pg->len = n;
- for (j = 0; j < n; j++)
- pg->osds[j] = ceph_decode_32(p);
+ }
- err = __insert_pg_mapping(pg, &map->pg_temp);
+ /* primary_affinity */
+ if (struct_v >= 2) {
+ err = decode_primary_affinity(p, end, map);
if (err)
goto bad;
- dout(" added pg_temp %lld.%x len %d\n", pgid.pool, pgid.seed,
- len);
+ } else {
+ /* XXX can this happen? */
+ kfree(map->osd_primary_affinity);
+ map->osd_primary_affinity = NULL;
}
/* crush */
- ceph_decode_32_safe(p, end, len, bad);
- dout("osdmap_decode crush len %d from off 0x%x\n", len,
- (int)(*p - start));
- ceph_decode_need(p, end, len, bad);
- map->crush = crush_decode(*p, end);
- *p += len;
+ ceph_decode_32_safe(p, end, len, e_inval);
+ map->crush = crush_decode(*p, min(*p + len, end));
if (IS_ERR(map->crush)) {
err = PTR_ERR(map->crush);
map->crush = NULL;
goto bad;
}
+ *p += len;
- /* ignore the rest of the map */
+ /* ignore the rest */
*p = end;
- dout("osdmap_decode done %p %p\n", *p, end);
- return map;
+ dout("full osdmap epoch %d max_osd %d\n", map->epoch, map->max_osd);
+ return 0;
+e_inval:
+ err = -EINVAL;
bad:
- dout("osdmap_decode fail err %d\n", err);
- ceph_osdmap_destroy(map);
- return ERR_PTR(err);
+ pr_err("corrupt full osdmap (%d) epoch %d off %d (%p of %p-%p)\n",
+ err, epoch, (int)(*p - start), *p, start, end);
+ print_hex_dump(KERN_DEBUG, "osdmap: ",
+ DUMP_PREFIX_OFFSET, 16, 1,
+ start, end - start, true);
+ return err;
+}
+
+/*
+ * Allocate and decode a full map.
+ */
+struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
+{
+ struct ceph_osdmap *map;
+ int ret;
+
+ map = kzalloc(sizeof(*map), GFP_NOFS);
+ if (!map)
+ return ERR_PTR(-ENOMEM);
+
+ map->pg_temp = RB_ROOT;
+ map->primary_temp = RB_ROOT;
+ mutex_init(&map->crush_scratch_mutex);
+
+ ret = osdmap_decode(p, end, map);
+ if (ret) {
+ ceph_osdmap_destroy(map);
+ return ERR_PTR(ret);
+ }
+
+ return map;
}
/*
@@ -840,17 +1182,18 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
__s64 new_pool_max;
__s32 new_flags, max;
void *start = *p;
- int err = -EINVAL;
- u16 version;
+ int err;
+ u8 struct_v;
+
+ dout("%s %p to %p len %d\n", __func__, *p, end, (int)(end - *p));
- ceph_decode_16_safe(p, end, version, bad);
- if (version != 6) {
- pr_warning("got unknown v %d != 6 of inc osdmap\n", version);
+ err = get_osdmap_client_data_v(p, end, "inc", &struct_v);
+ if (err)
goto bad;
- }
- ceph_decode_need(p, end, sizeof(fsid)+sizeof(modified)+2*sizeof(u32),
- bad);
+ /* fsid, epoch, modified, new_pool_max, new_flags */
+ ceph_decode_need(p, end, sizeof(fsid) + sizeof(u32) + sizeof(modified) +
+ sizeof(u64) + sizeof(u32), e_inval);
ceph_decode_copy(p, &fsid, sizeof(fsid));
epoch = ceph_decode_32(p);
BUG_ON(epoch != map->epoch+1);
@@ -859,21 +1202,22 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
new_flags = ceph_decode_32(p);
/* full map? */
- ceph_decode_32_safe(p, end, len, bad);
+ ceph_decode_32_safe(p, end, len, e_inval);
if (len > 0) {
dout("apply_incremental full map len %d, %p to %p\n",
len, *p, end);
- return osdmap_decode(p, min(*p+len, end));
+ return ceph_osdmap_decode(p, min(*p+len, end));
}
/* new crush? */
- ceph_decode_32_safe(p, end, len, bad);
+ ceph_decode_32_safe(p, end, len, e_inval);
if (len > 0) {
- dout("apply_incremental new crush map len %d, %p to %p\n",
- len, *p, end);
newcrush = crush_decode(*p, min(*p+len, end));
- if (IS_ERR(newcrush))
- return ERR_CAST(newcrush);
+ if (IS_ERR(newcrush)) {
+ err = PTR_ERR(newcrush);
+ newcrush = NULL;
+ goto bad;
+ }
*p += len;
}
@@ -883,13 +1227,11 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
if (new_pool_max >= 0)
map->pool_max = new_pool_max;
- ceph_decode_need(p, end, 5*sizeof(u32), bad);
-
/* new max? */
- max = ceph_decode_32(p);
+ ceph_decode_32_safe(p, end, max, e_inval);
if (max >= 0) {
err = osdmap_set_max_osd(map, max);
- if (err < 0)
+ if (err)
goto bad;
}
@@ -902,51 +1244,34 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
newcrush = NULL;
}
- /* new_pool */
- ceph_decode_32_safe(p, end, len, bad);
- while (len--) {
- struct ceph_pg_pool_info *pi;
+ /* new_pools */
+ err = decode_new_pools(p, end, map);
+ if (err)
+ goto bad;
- ceph_decode_64_safe(p, end, pool, bad);
- pi = __lookup_pg_pool(&map->pg_pools, pool);
- if (!pi) {
- pi = kzalloc(sizeof(*pi), GFP_NOFS);
- if (!pi) {
- err = -ENOMEM;
- goto bad;
- }
- pi->id = pool;
- __insert_pg_pool(&map->pg_pools, pi);
- }
- err = __decode_pool(p, end, pi);
- if (err < 0)
- goto bad;
- }
- if (version >= 5) {
- err = __decode_pool_names(p, end, map);
- if (err < 0)
- goto bad;
- }
+ /* new_pool_names */
+ err = decode_pool_names(p, end, map);
+ if (err)
+ goto bad;
/* old_pool */
- ceph_decode_32_safe(p, end, len, bad);
+ ceph_decode_32_safe(p, end, len, e_inval);
while (len--) {
struct ceph_pg_pool_info *pi;
- ceph_decode_64_safe(p, end, pool, bad);
+ ceph_decode_64_safe(p, end, pool, e_inval);
pi = __lookup_pg_pool(&map->pg_pools, pool);
if (pi)
__remove_pg_pool(&map->pg_pools, pi);
}
/* new_up */
- err = -EINVAL;
- ceph_decode_32_safe(p, end, len, bad);
+ ceph_decode_32_safe(p, end, len, e_inval);
while (len--) {
u32 osd;
struct ceph_entity_addr addr;
- ceph_decode_32_safe(p, end, osd, bad);
- ceph_decode_copy_safe(p, end, &addr, sizeof(addr), bad);
+ ceph_decode_32_safe(p, end, osd, e_inval);
+ ceph_decode_copy_safe(p, end, &addr, sizeof(addr), e_inval);
ceph_decode_addr(&addr);
pr_info("osd%d up\n", osd);
BUG_ON(osd >= map->max_osd);
@@ -955,11 +1280,11 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
}
/* new_state */
- ceph_decode_32_safe(p, end, len, bad);
+ ceph_decode_32_safe(p, end, len, e_inval);
while (len--) {
u32 osd;
u8 xorstate;
- ceph_decode_32_safe(p, end, osd, bad);
+ ceph_decode_32_safe(p, end, osd, e_inval);
xorstate = **(u8 **)p;
(*p)++; /* clean flag */
if (xorstate == 0)
@@ -971,10 +1296,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
}
/* new_weight */
- ceph_decode_32_safe(p, end, len, bad);
+ ceph_decode_32_safe(p, end, len, e_inval);
while (len--) {
u32 osd, off;
- ceph_decode_need(p, end, sizeof(u32)*2, bad);
+ ceph_decode_need(p, end, sizeof(u32)*2, e_inval);
osd = ceph_decode_32(p);
off = ceph_decode_32(p);
pr_info("osd%d weight 0x%x %s\n", osd, off,
@@ -985,56 +1310,35 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
}
/* new_pg_temp */
- ceph_decode_32_safe(p, end, len, bad);
- while (len--) {
- struct ceph_pg_mapping *pg;
- int j;
- struct ceph_pg pgid;
- u32 pglen;
+ err = decode_new_pg_temp(p, end, map);
+ if (err)
+ goto bad;
- err = ceph_decode_pgid(p, end, &pgid);
+ /* new_primary_temp */
+ if (struct_v >= 1) {
+ err = decode_new_primary_temp(p, end, map);
if (err)
goto bad;
- ceph_decode_need(p, end, sizeof(u32), bad);
- pglen = ceph_decode_32(p);
- if (pglen) {
- ceph_decode_need(p, end, pglen*sizeof(u32), bad);
-
- /* removing existing (if any) */
- (void) __remove_pg_mapping(&map->pg_temp, pgid);
+ }
- /* insert */
- err = -EINVAL;
- if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
- goto bad;
- err = -ENOMEM;
- pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS);
- if (!pg)
- goto bad;
- pg->pgid = pgid;
- pg->len = pglen;
- for (j = 0; j < pglen; j++)
- pg->osds[j] = ceph_decode_32(p);
- err = __insert_pg_mapping(pg, &map->pg_temp);
- if (err) {
- kfree(pg);
- goto bad;
- }
- dout(" added pg_temp %lld.%x len %d\n", pgid.pool,
- pgid.seed, pglen);
- } else {
- /* remove */
- __remove_pg_mapping(&map->pg_temp, pgid);
- }
+ /* new_primary_affinity */
+ if (struct_v >= 2) {
+ err = decode_new_primary_affinity(p, end, map);
+ if (err)
+ goto bad;
}
/* ignore the rest */
*p = end;
+
+ dout("inc osdmap epoch %d max_osd %d\n", map->epoch, map->max_osd);
return map;
+e_inval:
+ err = -EINVAL;
bad:
- pr_err("corrupt inc osdmap epoch %d off %d (%p of %p-%p)\n",
- epoch, (int)(*p - start), *p, start, end);
+ pr_err("corrupt inc osdmap (%d) epoch %d off %d (%p of %p-%p)\n",
+ err, epoch, (int)(*p - start), *p, start, end);
print_hex_dump(KERN_DEBUG, "osdmap: ",
DUMP_PREFIX_OFFSET, 16, 1,
start, end - start, true);
@@ -1142,61 +1446,250 @@ int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap,
}
EXPORT_SYMBOL(ceph_oloc_oid_to_pg);
-static int crush_do_rule_ary(const struct crush_map *map, int ruleno, int x,
- int *result, int result_max,
- const __u32 *weight, int weight_max)
+static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
+ int *result, int result_max,
+ const __u32 *weight, int weight_max)
{
- int scratch[result_max * 3];
+ int r;
+
+ BUG_ON(result_max > CEPH_PG_MAX_SIZE);
- return crush_do_rule(map, ruleno, x, result, result_max,
- weight, weight_max, scratch);
+ mutex_lock(&map->crush_scratch_mutex);
+ r = crush_do_rule(map->crush, ruleno, x, result, result_max,
+ weight, weight_max, map->crush_scratch_ary);
+ mutex_unlock(&map->crush_scratch_mutex);
+
+ return r;
}
/*
- * Calculate raw osd vector for the given pgid. Return pointer to osd
- * array, or NULL on failure.
+ * Calculate raw (crush) set for given pgid.
+ *
+ * Return raw set length, or error.
*/
-static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
- int *osds, int *num)
+static int pg_to_raw_osds(struct ceph_osdmap *osdmap,
+ struct ceph_pg_pool_info *pool,
+ struct ceph_pg pgid, u32 pps, int *osds)
{
- struct ceph_pg_mapping *pg;
- struct ceph_pg_pool_info *pool;
int ruleno;
- int r;
- u32 pps;
+ int len;
- pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool);
- if (!pool)
- return NULL;
+ /* crush */
+ ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset,
+ pool->type, pool->size);
+ if (ruleno < 0) {
+ pr_err("no crush rule: pool %lld ruleset %d type %d size %d\n",
+ pgid.pool, pool->crush_ruleset, pool->type,
+ pool->size);
+ return -ENOENT;
+ }
- /* pg_temp? */
+ len = do_crush(osdmap, ruleno, pps, osds,
+ min_t(int, pool->size, CEPH_PG_MAX_SIZE),
+ osdmap->osd_weight, osdmap->max_osd);
+ if (len < 0) {
+ pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n",
+ len, ruleno, pgid.pool, pool->crush_ruleset,
+ pool->type, pool->size);
+ return len;
+ }
+
+ return len;
+}
+
+/*
+ * Given raw set, calculate up set and up primary.
+ *
+ * Return up set length. *primary is set to up primary osd id, or -1
+ * if up set is empty.
+ */
+static int raw_to_up_osds(struct ceph_osdmap *osdmap,
+ struct ceph_pg_pool_info *pool,
+ int *osds, int len, int *primary)
+{
+ int up_primary = -1;
+ int i;
+
+ if (ceph_can_shift_osds(pool)) {
+ int removed = 0;
+
+ for (i = 0; i < len; i++) {
+ if (ceph_osd_is_down(osdmap, osds[i])) {
+ removed++;
+ continue;
+ }
+ if (removed)
+ osds[i - removed] = osds[i];
+ }
+
+ len -= removed;
+ if (len > 0)
+ up_primary = osds[0];
+ } else {
+ for (i = len - 1; i >= 0; i--) {
+ if (ceph_osd_is_down(osdmap, osds[i]))
+ osds[i] = CRUSH_ITEM_NONE;
+ else
+ up_primary = osds[i];
+ }
+ }
+
+ *primary = up_primary;
+ return len;
+}
+
+static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps,
+ struct ceph_pg_pool_info *pool,
+ int *osds, int len, int *primary)
+{
+ int i;
+ int pos = -1;
+
+ /*
+ * Do we have any non-default primary_affinity values for these
+ * osds?
+ */
+ if (!osdmap->osd_primary_affinity)
+ return;
+
+ for (i = 0; i < len; i++) {
+ int osd = osds[i];
+
+ if (osd != CRUSH_ITEM_NONE &&
+ osdmap->osd_primary_affinity[osd] !=
+ CEPH_OSD_DEFAULT_PRIMARY_AFFINITY) {
+ break;
+ }
+ }
+ if (i == len)
+ return;
+
+ /*
+ * Pick the primary. Feed both the seed (for the pg) and the
+ * osd into the hash/rng so that a proportional fraction of an
+ * osd's pgs get rejected as primary.
+ */
+ for (i = 0; i < len; i++) {
+ int osd = osds[i];
+ u32 aff;
+
+ if (osd == CRUSH_ITEM_NONE)
+ continue;
+
+ aff = osdmap->osd_primary_affinity[osd];
+ if (aff < CEPH_OSD_MAX_PRIMARY_AFFINITY &&
+ (crush_hash32_2(CRUSH_HASH_RJENKINS1,
+ pps, osd) >> 16) >= aff) {
+ /*
+ * We chose not to use this primary. Note it
+ * anyway as a fallback in case we don't pick
+ * anyone else, but keep looking.
+ */
+ if (pos < 0)
+ pos = i;
+ } else {
+ pos = i;
+ break;
+ }
+ }
+ if (pos < 0)
+ return;
+
+ *primary = osds[pos];
+
+ if (ceph_can_shift_osds(pool) && pos > 0) {
+ /* move the new primary to the front */
+ for (i = pos; i > 0; i--)
+ osds[i] = osds[i - 1];
+ osds[0] = *primary;
+ }
+}
+
+/*
+ * Given up set, apply pg_temp and primary_temp mappings.
+ *
+ * Return acting set length. *primary is set to acting primary osd id,
+ * or -1 if acting set is empty.
+ */
+static int apply_temps(struct ceph_osdmap *osdmap,
+ struct ceph_pg_pool_info *pool, struct ceph_pg pgid,
+ int *osds, int len, int *primary)
+{
+ struct ceph_pg_mapping *pg;
+ int temp_len;
+ int temp_primary;
+ int i;
+
+ /* raw_pg -> pg */
pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num,
pool->pg_num_mask);
+
+ /* pg_temp? */
pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
if (pg) {
- *num = pg->len;
- return pg->osds;
+ temp_len = 0;
+ temp_primary = -1;
+
+ for (i = 0; i < pg->pg_temp.len; i++) {
+ if (ceph_osd_is_down(osdmap, pg->pg_temp.osds[i])) {
+ if (ceph_can_shift_osds(pool))
+ continue;
+ else
+ osds[temp_len++] = CRUSH_ITEM_NONE;
+ } else {
+ osds[temp_len++] = pg->pg_temp.osds[i];
+ }
+ }
+
+ /* apply pg_temp's primary */
+ for (i = 0; i < temp_len; i++) {
+ if (osds[i] != CRUSH_ITEM_NONE) {
+ temp_primary = osds[i];
+ break;
+ }
+ }
+ } else {
+ temp_len = len;
+ temp_primary = *primary;
}
- /* crush */
- ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset,
- pool->type, pool->size);
- if (ruleno < 0) {
- pr_err("no crush rule pool %lld ruleset %d type %d size %d\n",
- pgid.pool, pool->crush_ruleset, pool->type,
- pool->size);
- return NULL;
+ /* primary_temp? */
+ pg = __lookup_pg_mapping(&osdmap->primary_temp, pgid);
+ if (pg)
+ temp_primary = pg->primary_temp.osd;
+
+ *primary = temp_primary;
+ return temp_len;
+}
+
+/*
+ * Calculate acting set for given pgid.
+ *
+ * Return acting set length, or error. *primary is set to acting
+ * primary osd id, or -1 if acting set is empty or on error.
+ */
+int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
+ int *osds, int *primary)
+{
+ struct ceph_pg_pool_info *pool;
+ u32 pps;
+ int len;
+
+ pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool);
+ if (!pool) {
+ *primary = -1;
+ return -ENOENT;
}
if (pool->flags & CEPH_POOL_FLAG_HASHPSPOOL) {
- /* hash pool id and seed sothat pool PGs do not overlap */
+ /* hash pool id and seed so that pool PGs do not overlap */
pps = crush_hash32_2(CRUSH_HASH_RJENKINS1,
ceph_stable_mod(pgid.seed, pool->pgp_num,
pool->pgp_num_mask),
pgid.pool);
} else {
/*
- * legacy ehavior: add ps and pool together. this is
+ * legacy behavior: add ps and pool together. this is
* not a great approach because the PGs from each pool
* will overlap on top of each other: 0.5 == 1.4 ==
* 2.3 == ...
@@ -1205,38 +1698,20 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
pool->pgp_num_mask) +
(unsigned)pgid.pool;
}
- r = crush_do_rule_ary(osdmap->crush, ruleno, pps,
- osds, min_t(int, pool->size, *num),
- osdmap->osd_weight, osdmap->max_osd);
- if (r < 0) {
- pr_err("error %d from crush rule: pool %lld ruleset %d type %d"
- " size %d\n", r, pgid.pool, pool->crush_ruleset,
- pool->type, pool->size);
- return NULL;
+
+ len = pg_to_raw_osds(osdmap, pool, pgid, pps, osds);
+ if (len < 0) {
+ *primary = -1;
+ return len;
}
- *num = r;
- return osds;
-}
-/*
- * Return acting set for given pgid.
- */
-int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
- int *acting)
-{
- int rawosds[CEPH_PG_MAX_SIZE], *osds;
- int i, o, num = CEPH_PG_MAX_SIZE;
+ len = raw_to_up_osds(osdmap, pool, osds, len, primary);
- osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
- if (!osds)
- return -1;
+ apply_primary_affinity(osdmap, pps, pool, osds, len, primary);
- /* primary is first up osd */
- o = 0;
- for (i = 0; i < num; i++)
- if (ceph_osd_is_up(osdmap, osds[i]))
- acting[o++] = osds[i];
- return o;
+ len = apply_temps(osdmap, pool, pgid, osds, len, primary);
+
+ return len;
}
/*
@@ -1244,17 +1719,11 @@ int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
*/
int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
{
- int rawosds[CEPH_PG_MAX_SIZE], *osds;
- int i, num = CEPH_PG_MAX_SIZE;
+ int osds[CEPH_PG_MAX_SIZE];
+ int primary;
- osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
- if (!osds)
- return -1;
+ ceph_calc_pg_acting(osdmap, pgid, osds, &primary);
- /* primary is first up osd */
- for (i = 0; i < num; i++)
- if (ceph_osd_is_up(osdmap, osds[i]))
- return osds[i];
- return -1;
+ return primary;
}
EXPORT_SYMBOL(ceph_calc_pg_primary);
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 815a2249cfa..555013034f7 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -53,7 +53,10 @@ void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
set_page_dirty_lock(pages[i]);
put_page(pages[i]);
}
- kfree(pages);
+ if (is_vmalloc_addr(pages))
+ vfree(pages);
+ else
+ kfree(pages);
}
EXPORT_SYMBOL(ceph_put_page_vector);
@@ -165,36 +168,6 @@ void ceph_copy_from_page_vector(struct page **pages,
EXPORT_SYMBOL(ceph_copy_from_page_vector);
/*
- * copy user data from a page vector into a user pointer
- */
-int ceph_copy_page_vector_to_user(struct page **pages,
- void __user *data,
- loff_t off, size_t len)
-{
- int i = 0;
- int po = off & ~PAGE_CACHE_MASK;
- int left = len;
- int l, bad;
-
- while (left > 0) {
- l = min_t(int, left, PAGE_CACHE_SIZE-po);
- bad = copy_to_user(data, page_address(pages[i]) + po, l);
- if (bad == l)
- return -EFAULT;
- data += l - bad;
- left -= l - bad;
- if (po) {
- po += l - bad;
- if (po == PAGE_CACHE_SIZE)
- po = 0;
- }
- i++;
- }
- return len;
-}
-EXPORT_SYMBOL(ceph_copy_page_vector_to_user);
-
-/*
* Zero an extent within a page vector. Offset is relative to the
* start of the first page.
*/
diff --git a/net/compat.c b/net/compat.c
index 9a76eaf6318..bc8aeefddf3 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -85,7 +85,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
{
int tot_len;
- if (kern_msg->msg_namelen) {
+ if (kern_msg->msg_name && kern_msg->msg_namelen) {
if (mode == VERIFY_READ) {
int err = move_addr_to_kernel(kern_msg->msg_name,
kern_msg->msg_namelen,
@@ -93,10 +93,11 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
if (err < 0)
return err;
}
- if (kern_msg->msg_name)
- kern_msg->msg_name = kern_address;
- } else
+ kern_msg->msg_name = kern_address;
+ } else {
kern_msg->msg_name = NULL;
+ kern_msg->msg_namelen = 0;
+ }
tot_len = iov_from_user_compat_to_kern(kern_iov,
(struct compat_iovec __user *)kern_msg->msg_iov,
diff --git a/net/core/Makefile b/net/core/Makefile
index 9628c20acff..71093d94ad2 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
- sock_diag.o dev_ioctl.o
+ sock_diag.o dev_ioctl.o tso.o
obj-$(CONFIG_XFRM) += flow.o
obj-y += net-sysfs.o
@@ -21,5 +21,6 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o
obj-$(CONFIG_TRACEPOINTS) += net-traces.o
obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
+obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index a16ed7bbe37..488dd1a825c 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -740,17 +740,37 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
if (likely(!sum)) {
- if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+ !skb->csum_complete_sw)
netdev_rx_csum_fault(skb->dev);
- skb->ip_summed = CHECKSUM_UNNECESSARY;
}
+ skb->csum_valid = !sum;
return sum;
}
EXPORT_SYMBOL(__skb_checksum_complete_head);
__sum16 __skb_checksum_complete(struct sk_buff *skb)
{
- return __skb_checksum_complete_head(skb, skb->len);
+ __wsum csum;
+ __sum16 sum;
+
+ csum = skb_checksum(skb, 0, skb->len, 0);
+
+ /* skb->csum holds pseudo checksum */
+ sum = csum_fold(csum_add(skb->csum, csum));
+ if (likely(!sum)) {
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+ !skb->csum_complete_sw)
+ netdev_rx_csum_fault(skb->dev);
+ }
+
+ /* Save full packet checksum */
+ skb->csum = csum;
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ skb->csum_complete_sw = 1;
+ skb->csum_valid = !sum;
+
+ return sum;
}
EXPORT_SYMBOL(__skb_checksum_complete);
diff --git a/net/core/dev.c b/net/core/dev.c
index bc3c89792b0..367a586d0c8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -148,6 +148,9 @@ struct list_head ptype_all __read_mostly; /* Taps */
static struct list_head offload_base __read_mostly;
static int netif_rx_internal(struct sk_buff *skb);
+static int call_netdevice_notifiers_info(unsigned long val,
+ struct net_device *dev,
+ struct netdev_notifier_info *info);
/*
* The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -1207,7 +1210,11 @@ EXPORT_SYMBOL(netdev_features_change);
void netdev_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
- call_netdevice_notifiers(NETDEV_CHANGE, dev);
+ struct netdev_notifier_change_info change_info;
+
+ change_info.flags_changed = 0;
+ call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
+ &change_info.info);
rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
}
}
@@ -1245,7 +1252,7 @@ static int __dev_open(struct net_device *dev)
* If we don't do this there is a chance ndo_poll_controller
* or ndo_poll may be running while we open the device
*/
- netpoll_rx_disable(dev);
+ netpoll_poll_disable(dev);
ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
ret = notifier_to_errno(ret);
@@ -1260,7 +1267,7 @@ static int __dev_open(struct net_device *dev)
if (!ret && ops->ndo_open)
ret = ops->ndo_open(dev);
- netpoll_rx_enable(dev);
+ netpoll_poll_enable(dev);
if (ret)
clear_bit(__LINK_STATE_START, &dev->state);
@@ -1313,6 +1320,9 @@ static int __dev_close_many(struct list_head *head)
might_sleep();
list_for_each_entry(dev, head, close_list) {
+ /* Temporarily disable netpoll until the interface is down */
+ netpoll_poll_disable(dev);
+
call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
clear_bit(__LINK_STATE_START, &dev->state);
@@ -1323,7 +1333,7 @@ static int __dev_close_many(struct list_head *head)
* dev->stop() will invoke napi_disable() on all of it's
* napi_struct instances on this device.
*/
- smp_mb__after_clear_bit(); /* Commit netif_running(). */
+ smp_mb__after_atomic(); /* Commit netif_running(). */
}
dev_deactivate_many(head);
@@ -1343,6 +1353,7 @@ static int __dev_close_many(struct list_head *head)
dev->flags &= ~IFF_UP;
net_dmaengine_put();
+ netpoll_poll_enable(dev);
}
return 0;
@@ -1353,14 +1364,10 @@ static int __dev_close(struct net_device *dev)
int retval;
LIST_HEAD(single);
- /* Temporarily disable netpoll until the interface is down */
- netpoll_rx_disable(dev);
-
list_add(&dev->close_list, &single);
retval = __dev_close_many(&single);
list_del(&single);
- netpoll_rx_enable(dev);
return retval;
}
@@ -1398,14 +1405,9 @@ int dev_close(struct net_device *dev)
if (dev->flags & IFF_UP) {
LIST_HEAD(single);
- /* Block netpoll rx while the interface is going down */
- netpoll_rx_disable(dev);
-
list_add(&dev->close_list, &single);
dev_close_many(&single);
list_del(&single);
-
- netpoll_rx_enable(dev);
}
return 0;
}
@@ -1645,8 +1647,7 @@ static inline void net_timestamp_set(struct sk_buff *skb)
__net_timestamp(SKB); \
} \
-static inline bool is_skb_forwardable(struct net_device *dev,
- struct sk_buff *skb)
+bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb)
{
unsigned int len;
@@ -1665,6 +1666,30 @@ static inline bool is_skb_forwardable(struct net_device *dev,
return false;
}
+EXPORT_SYMBOL_GPL(is_skb_forwardable);
+
+int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
+{
+ if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+ if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
+ atomic_long_inc(&dev->rx_dropped);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+ }
+
+ if (unlikely(!is_skb_forwardable(dev, skb))) {
+ atomic_long_inc(&dev->rx_dropped);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+
+ skb_scrub_packet(skb, true);
+ skb->protocol = eth_type_trans(skb, dev);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__dev_forward_skb);
/**
* dev_forward_skb - loopback an skb to another netif
@@ -1686,24 +1711,7 @@ static inline bool is_skb_forwardable(struct net_device *dev,
*/
int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
{
- if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
- if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
- atomic_long_inc(&dev->rx_dropped);
- kfree_skb(skb);
- return NET_RX_DROP;
- }
- }
-
- if (unlikely(!is_skb_forwardable(dev, skb))) {
- atomic_long_inc(&dev->rx_dropped);
- kfree_skb(skb);
- return NET_RX_DROP;
- }
-
- skb_scrub_packet(skb, true);
- skb->protocol = eth_type_trans(skb, dev);
-
- return netif_rx_internal(skb);
+ return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb);
}
EXPORT_SYMBOL_GPL(dev_forward_skb);
@@ -2288,8 +2296,8 @@ EXPORT_SYMBOL(skb_checksum_help);
__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
{
+ unsigned int vlan_depth = skb->mac_len;
__be16 type = skb->protocol;
- int vlan_depth = ETH_HLEN;
/* Tunnel gso handlers can set protocol to ethernet. */
if (type == htons(ETH_P_TEB)) {
@@ -2302,15 +2310,30 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
type = eth->h_proto;
}
- while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
- struct vlan_hdr *vh;
+ /* if skb->protocol is 802.1Q/AD then the header should already be
+ * present at mac_len - VLAN_HLEN (if mac_len > 0), or at
+ * ETH_HLEN otherwise
+ */
+ if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
+ if (vlan_depth) {
+ if (unlikely(WARN_ON(vlan_depth < VLAN_HLEN)))
+ return 0;
+ vlan_depth -= VLAN_HLEN;
+ } else {
+ vlan_depth = ETH_HLEN;
+ }
+ do {
+ struct vlan_hdr *vh;
- if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
- return 0;
+ if (unlikely(!pskb_may_pull(skb,
+ vlan_depth + VLAN_HLEN)))
+ return 0;
- vh = (struct vlan_hdr *)(skb->data + vlan_depth);
- type = vh->h_vlan_encapsulated_proto;
- vlan_depth += VLAN_HLEN;
+ vh = (struct vlan_hdr *)(skb->data + vlan_depth);
+ type = vh->h_vlan_encapsulated_proto;
+ vlan_depth += VLAN_HLEN;
+ } while (type == htons(ETH_P_8021Q) ||
+ type == htons(ETH_P_8021AD));
}
*depth = vlan_depth;
@@ -2423,7 +2446,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
* 2. No high memory really exists on this machine.
*/
-static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb)
+static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_HIGHMEM
int i;
@@ -2497,39 +2520,63 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
return 0;
}
+/* If MPLS offload request, verify we are testing hardware MPLS features
+ * instead of standard features for the netdev.
+ */
+#ifdef CONFIG_NET_MPLS_GSO
+static netdev_features_t net_mpls_features(struct sk_buff *skb,
+ netdev_features_t features,
+ __be16 type)
+{
+ if (type == htons(ETH_P_MPLS_UC) || type == htons(ETH_P_MPLS_MC))
+ features &= skb->dev->mpls_features;
+
+ return features;
+}
+#else
+static netdev_features_t net_mpls_features(struct sk_buff *skb,
+ netdev_features_t features,
+ __be16 type)
+{
+ return features;
+}
+#endif
+
static netdev_features_t harmonize_features(struct sk_buff *skb,
- const struct net_device *dev,
- netdev_features_t features)
+ netdev_features_t features)
{
int tmp;
+ __be16 type;
+
+ type = skb_network_protocol(skb, &tmp);
+ features = net_mpls_features(skb, features, type);
if (skb->ip_summed != CHECKSUM_NONE &&
- !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) {
+ !can_checksum_protocol(features, type)) {
features &= ~NETIF_F_ALL_CSUM;
- } else if (illegal_highdma(dev, skb)) {
+ } else if (illegal_highdma(skb->dev, skb)) {
features &= ~NETIF_F_SG;
}
return features;
}
-netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
- const struct net_device *dev)
+netdev_features_t netif_skb_features(struct sk_buff *skb)
{
__be16 protocol = skb->protocol;
- netdev_features_t features = dev->features;
+ netdev_features_t features = skb->dev->features;
- if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs)
+ if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
features &= ~NETIF_F_GSO_MASK;
if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
protocol = veh->h_vlan_encapsulated_proto;
} else if (!vlan_tx_tag_present(skb)) {
- return harmonize_features(skb, dev, features);
+ return harmonize_features(skb, features);
}
- features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
+ features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX);
if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
@@ -2537,9 +2584,9 @@ netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX;
- return harmonize_features(skb, dev, features);
+ return harmonize_features(skb, features);
}
-EXPORT_SYMBOL(netif_skb_dev_features);
+EXPORT_SYMBOL(netif_skb_features);
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq)
@@ -2885,6 +2932,7 @@ recursion_alert:
rc = -ENETDOWN;
rcu_read_unlock_bh();
+ atomic_long_inc(&dev->tx_dropped);
kfree_skb(skb);
return rc;
out:
@@ -2957,7 +3005,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
flow_table = rcu_dereference(rxqueue->rps_flow_table);
if (!flow_table)
goto out;
- flow_id = skb->rxhash & flow_table->mask;
+ flow_id = skb_get_hash(skb) & flow_table->mask;
rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
rxq_index, flow_id);
if (rc < 0)
@@ -2991,6 +3039,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct rps_sock_flow_table *sock_flow_table;
int cpu = -1;
u16 tcpu;
+ u32 hash;
if (skb_rx_queue_recorded(skb)) {
u16 index = skb_get_rx_queue(skb);
@@ -3019,7 +3068,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
}
skb_reset_network_header(skb);
- if (!skb_get_hash(skb))
+ hash = skb_get_hash(skb);
+ if (!hash)
goto done;
flow_table = rcu_dereference(rxqueue->rps_flow_table);
@@ -3028,11 +3078,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
u16 next_cpu;
struct rps_dev_flow *rflow;
- rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
+ rflow = &flow_table->flows[hash & flow_table->mask];
tcpu = rflow->cpu;
- next_cpu = sock_flow_table->ents[skb->rxhash &
- sock_flow_table->mask];
+ next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask];
/*
* If the desired CPU (where last recvmsg was done) is
@@ -3061,7 +3110,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
}
if (map) {
- tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
+ tcpu = map->cpus[((u64) hash * map->len) >> 32];
if (cpu_online(tcpu)) {
cpu = tcpu;
@@ -3236,10 +3285,6 @@ static int netif_rx_internal(struct sk_buff *skb)
{
int ret;
- /* if netpoll wants it, pretend we never saw it */
- if (netpoll_rx(skb))
- return NET_RX_DROP;
-
net_timestamp_check(netdev_tstamp_prequeue, skb);
trace_netif_rx(skb);
@@ -3350,7 +3395,7 @@ static void net_tx_action(struct softirq_action *h)
root_lock = qdisc_lock(q);
if (spin_trylock(root_lock)) {
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(__QDISC_STATE_SCHED,
&q->state);
qdisc_run(q);
@@ -3360,7 +3405,7 @@ static void net_tx_action(struct softirq_action *h)
&q->state)) {
__netif_reschedule(q);
} else {
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(__QDISC_STATE_SCHED,
&q->state);
}
@@ -3446,7 +3491,7 @@ out:
* @rx_handler: receive handler to register
* @rx_handler_data: data pointer that is used by rx handler
*
- * Register a receive hander for a device. This handler will then be
+ * Register a receive handler for a device. This handler will then be
* called from __netif_receive_skb. A negative errno code is returned
* on a failure.
*
@@ -3500,11 +3545,11 @@ EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
{
switch (skb->protocol) {
- case __constant_htons(ETH_P_ARP):
- case __constant_htons(ETH_P_IP):
- case __constant_htons(ETH_P_IPV6):
- case __constant_htons(ETH_P_8021Q):
- case __constant_htons(ETH_P_8021AD):
+ case htons(ETH_P_ARP):
+ case htons(ETH_P_IP):
+ case htons(ETH_P_IPV6):
+ case htons(ETH_P_8021Q):
+ case htons(ETH_P_8021AD):
return true;
default:
return false;
@@ -3525,10 +3570,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
trace_netif_receive_skb(skb);
- /* if we've gotten here through NAPI, check netpoll */
- if (netpoll_receive_skb(skb))
- goto out;
-
orig_dev = skb->dev;
skb_reset_network_header(skb);
@@ -3655,7 +3696,6 @@ drop:
unlock:
rcu_read_unlock();
-out:
return ret;
}
@@ -3845,10 +3885,10 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
diffs |= p->vlan_tci ^ skb->vlan_tci;
if (maclen == ETH_HLEN)
diffs |= compare_ether_header(skb_mac_header(p),
- skb_gro_mac_header(skb));
+ skb_mac_header(skb));
else if (!diffs)
diffs = memcmp(skb_mac_header(p),
- skb_gro_mac_header(skb),
+ skb_mac_header(skb),
maclen);
NAPI_GRO_CB(p)->same_flow = !diffs;
}
@@ -3871,6 +3911,27 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
}
}
+static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
+{
+ struct skb_shared_info *pinfo = skb_shinfo(skb);
+
+ BUG_ON(skb->end - skb->tail < grow);
+
+ memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
+
+ skb->data_len -= grow;
+ skb->tail += grow;
+
+ pinfo->frags[0].page_offset += grow;
+ skb_frag_size_sub(&pinfo->frags[0], grow);
+
+ if (unlikely(!skb_frag_size(&pinfo->frags[0]))) {
+ skb_frag_unref(skb, 0);
+ memmove(pinfo->frags, pinfo->frags + 1,
+ --pinfo->nr_frags * sizeof(pinfo->frags[0]));
+ }
+}
+
static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
struct sk_buff **pp = NULL;
@@ -3879,14 +3940,14 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
struct list_head *head = &offload_base;
int same_flow;
enum gro_result ret;
+ int grow;
- if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
+ if (!(skb->dev->features & NETIF_F_GRO))
goto normal;
if (skb_is_gso(skb) || skb_has_frag_list(skb))
goto normal;
- skb_gro_reset_offset(skb);
gro_list_prepare(napi, skb);
NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */
@@ -3944,33 +4005,16 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
}
NAPI_GRO_CB(skb)->count = 1;
NAPI_GRO_CB(skb)->age = jiffies;
+ NAPI_GRO_CB(skb)->last = skb;
skb_shinfo(skb)->gso_size = skb_gro_len(skb);
skb->next = napi->gro_list;
napi->gro_list = skb;
ret = GRO_HELD;
pull:
- if (skb_headlen(skb) < skb_gro_offset(skb)) {
- int grow = skb_gro_offset(skb) - skb_headlen(skb);
-
- BUG_ON(skb->end - skb->tail < grow);
-
- memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
-
- skb->tail += grow;
- skb->data_len -= grow;
-
- skb_shinfo(skb)->frags[0].page_offset += grow;
- skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
-
- if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
- skb_frag_unref(skb, 0);
- memmove(skb_shinfo(skb)->frags,
- skb_shinfo(skb)->frags + 1,
- --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
- }
- }
-
+ grow = skb_gro_offset(skb) - skb_headlen(skb);
+ if (grow > 0)
+ gro_pull_from_frag0(skb, grow);
ok:
return ret;
@@ -4038,6 +4082,8 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
trace_napi_gro_receive_entry(skb);
+ skb_gro_reset_offset(skb);
+
return napi_skb_finish(dev_gro_receive(napi, skb), skb);
}
EXPORT_SYMBOL(napi_gro_receive);
@@ -4050,6 +4096,9 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->vlan_tci = 0;
skb->dev = napi->dev;
skb->skb_iif = 0;
+ skb->encapsulation = 0;
+ skb_shinfo(skb)->gso_type = 0;
+ skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
napi->skb = skb;
}
@@ -4066,12 +4115,16 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
}
EXPORT_SYMBOL(napi_get_frags);
-static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
- gro_result_t ret)
+static gro_result_t napi_frags_finish(struct napi_struct *napi,
+ struct sk_buff *skb,
+ gro_result_t ret)
{
switch (ret) {
case GRO_NORMAL:
- if (netif_receive_skb_internal(skb))
+ case GRO_HELD:
+ __skb_push(skb, ETH_HLEN);
+ skb->protocol = eth_type_trans(skb, skb->dev);
+ if (ret == GRO_NORMAL && netif_receive_skb_internal(skb))
ret = GRO_DROP;
break;
@@ -4080,7 +4133,6 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
napi_reuse_skb(napi, skb);
break;
- case GRO_HELD:
case GRO_MERGED:
break;
}
@@ -4088,17 +4140,41 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
return ret;
}
+/* Upper GRO stack assumes network header starts at gro_offset=0
+ * Drivers could call both napi_gro_frags() and napi_gro_receive()
+ * We copy ethernet header into skb->data to have a common layout.
+ */
static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
{
struct sk_buff *skb = napi->skb;
+ const struct ethhdr *eth;
+ unsigned int hlen = sizeof(*eth);
napi->skb = NULL;
- if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) {
- napi_reuse_skb(napi, skb);
- return NULL;
+ skb_reset_mac_header(skb);
+ skb_gro_reset_offset(skb);
+
+ eth = skb_gro_header_fast(skb, 0);
+ if (unlikely(skb_gro_header_hard(skb, hlen))) {
+ eth = skb_gro_header_slow(skb, hlen, 0);
+ if (unlikely(!eth)) {
+ napi_reuse_skb(napi, skb);
+ return NULL;
+ }
+ } else {
+ gro_pull_from_frag0(skb, hlen);
+ NAPI_GRO_CB(skb)->frag0 += hlen;
+ NAPI_GRO_CB(skb)->frag0_len -= hlen;
}
- skb->protocol = eth_type_trans(skb, skb->dev);
+ __skb_pull(skb, hlen);
+
+ /*
+ * This works because the only protocols we care about don't require
+ * special handling.
+ * We'll fix it up properly in napi_frags_finish()
+ */
+ skb->protocol = eth->h_proto;
return skb;
}
@@ -4160,9 +4236,8 @@ static int process_backlog(struct napi_struct *napi, int quota)
#endif
napi->weight = weight_p;
local_irq_disable();
- while (work < quota) {
+ while (1) {
struct sk_buff *skb;
- unsigned int qlen;
while ((skb = __skb_dequeue(&sd->process_queue))) {
local_irq_enable();
@@ -4176,24 +4251,24 @@ static int process_backlog(struct napi_struct *napi, int quota)
}
rps_lock(sd);
- qlen = skb_queue_len(&sd->input_pkt_queue);
- if (qlen)
- skb_queue_splice_tail_init(&sd->input_pkt_queue,
- &sd->process_queue);
-
- if (qlen < quota - work) {
+ if (skb_queue_empty(&sd->input_pkt_queue)) {
/*
* Inline a custom version of __napi_complete().
* only current cpu owns and manipulates this napi,
- * and NAPI_STATE_SCHED is the only possible flag set on backlog.
- * we can use a plain write instead of clear_bit(),
+ * and NAPI_STATE_SCHED is the only possible flag set
+ * on backlog.
+ * We can use a plain write instead of clear_bit(),
* and we dont need an smp_mb() memory barrier.
*/
list_del(&napi->poll_list);
napi->state = 0;
+ rps_unlock(sd);
- quota = work + qlen;
+ break;
}
+
+ skb_queue_splice_tail_init(&sd->input_pkt_queue,
+ &sd->process_queue);
rps_unlock(sd);
}
local_irq_enable();
@@ -4223,7 +4298,7 @@ void __napi_complete(struct napi_struct *n)
BUG_ON(n->gro_list);
list_del(&n->poll_list);
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(NAPI_STATE_SCHED, &n->state);
}
EXPORT_SYMBOL(__napi_complete);
@@ -4522,6 +4597,32 @@ void *netdev_adjacent_get_private(struct list_head *adj_list)
EXPORT_SYMBOL(netdev_adjacent_get_private);
/**
+ * netdev_upper_get_next_dev_rcu - Get the next dev from upper list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next device from the dev's upper list, starting from iter
+ * position. The caller must hold RCU read lock.
+ */
+struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
+ struct list_head **iter)
+{
+ struct netdev_adjacent *upper;
+
+ WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
+
+ upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
+
+ if (&upper->list == &dev->adj_list.upper)
+ return NULL;
+
+ *iter = &upper->list;
+
+ return upper->dev;
+}
+EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
+
+/**
* netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
* @dev: device
* @iter: list_head ** of the current position
@@ -4568,8 +4669,7 @@ void *netdev_lower_get_next_private(struct net_device *dev,
if (&lower->list == &dev->adj_list.lower)
return NULL;
- if (iter)
- *iter = lower->list.next;
+ *iter = lower->list.next;
return lower->private;
}
@@ -4597,14 +4697,39 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
if (&lower->list == &dev->adj_list.lower)
return NULL;
- if (iter)
- *iter = &lower->list;
+ *iter = &lower->list;
return lower->private;
}
EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
/**
+ * netdev_lower_get_next - Get the next device from the lower neighbour
+ * list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent from the dev's lower neighbour
+ * list, starting from iter position. The caller must hold RTNL lock or
+ * its own locking that guarantees that the neighbour lower
+ * list will remain unchainged.
+ */
+void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
+{
+ struct netdev_adjacent *lower;
+
+ lower = list_entry((*iter)->next, struct netdev_adjacent, list);
+
+ if (&lower->list == &dev->adj_list.lower)
+ return NULL;
+
+ *iter = &lower->list;
+
+ return lower->dev;
+}
+EXPORT_SYMBOL(netdev_lower_get_next);
+
+/**
* netdev_lower_get_first_private_rcu - Get the first ->private from the
* lower neighbour list, RCU
* variant
@@ -5054,6 +5179,30 @@ void *netdev_lower_dev_get_private(struct net_device *dev,
}
EXPORT_SYMBOL(netdev_lower_dev_get_private);
+
+int dev_get_nest_level(struct net_device *dev,
+ bool (*type_check)(struct net_device *dev))
+{
+ struct net_device *lower = NULL;
+ struct list_head *iter;
+ int max_nest = -1;
+ int nest;
+
+ ASSERT_RTNL();
+
+ netdev_for_each_lower_dev(dev, lower, iter) {
+ nest = dev_get_nest_level(lower, type_check);
+ if (max_nest < nest)
+ max_nest = nest;
+ }
+
+ if (type_check(dev))
+ max_nest++;
+
+ return max_nest;
+}
+EXPORT_SYMBOL(dev_get_nest_level);
+
static void dev_change_rx_flags(struct net_device *dev, int flags)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -5523,7 +5672,7 @@ static int dev_new_index(struct net *net)
/* Delayed registration/unregisteration */
static LIST_HEAD(net_todo_list);
-static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
+DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
static void net_set_todo(struct net_device *dev)
{
@@ -5580,10 +5729,6 @@ static void rollback_registered_many(struct list_head *head)
*/
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
- if (!dev->rtnl_link_ops ||
- dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
- rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
-
/*
* Flush the unicast and multicast chains
*/
@@ -5593,6 +5738,10 @@ static void rollback_registered_many(struct list_head *head)
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
+ if (!dev->rtnl_link_ops ||
+ dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+ rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
+
/* Notifier chain MUST detach us all upper devices. */
WARN_ON(netdev_has_any_upper_dev(dev));
@@ -5676,6 +5825,13 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
}
}
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ if (dev->netdev_ops->ndo_busy_poll)
+ features |= NETIF_F_BUSY_POLL;
+ else
+#endif
+ features &= ~NETIF_F_BUSY_POLL;
+
return features;
}
@@ -5811,10 +5967,7 @@ static void netdev_init_one_queue(struct net_device *dev,
static void netif_free_tx_queues(struct net_device *dev)
{
- if (is_vmalloc_addr(dev->_tx))
- vfree(dev->_tx);
- else
- kfree(dev->_tx);
+ kvfree(dev->_tx);
}
static int netif_alloc_netdev_queues(struct net_device *dev)
@@ -6251,6 +6404,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
netdev_stats_to_stats64(storage, &dev->stats);
}
storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
+ storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
return storage;
}
EXPORT_SYMBOL(dev_get_stats);
@@ -6287,10 +6441,7 @@ void netdev_freemem(struct net_device *dev)
{
char *addr = (char *)dev - dev->padded;
- if (is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
+ kvfree(addr);
}
/**
@@ -6395,11 +6546,6 @@ free_all:
free_pcpu:
free_percpu(dev->pcpu_refcnt);
- netif_free_tx_queues(dev);
-#ifdef CONFIG_SYSFS
- kfree(dev->_rx);
-#endif
-
free_dev:
netdev_freemem(dev);
return NULL;
@@ -6496,6 +6642,9 @@ EXPORT_SYMBOL(unregister_netdevice_queue);
/**
* unregister_netdevice_many - unregister many devices
* @head: list of devices
+ *
+ * Note: As most callers use a stack allocated list_head,
+ * we force a list_del() to make sure stack wont be corrupted later.
*/
void unregister_netdevice_many(struct list_head *head)
{
@@ -6505,6 +6654,7 @@ void unregister_netdevice_many(struct list_head *head)
rollback_registered_many(head);
list_for_each_entry(dev, head, unreg_list)
net_set_todo(dev);
+ list_del(head);
}
}
EXPORT_SYMBOL(unregister_netdevice_many);
@@ -6960,7 +7110,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
}
}
unregister_netdevice_many(&dev_kill_list);
- list_del(&dev_kill_list);
rtnl_unlock();
}
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 329d5794e7d..b6b230600b9 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -225,6 +225,91 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
}
EXPORT_SYMBOL(__hw_addr_unsync);
+/**
+ * __hw_addr_sync_dev - Synchonize device's multicast list
+ * @list: address list to syncronize
+ * @dev: device to sync
+ * @sync: function to call if address should be added
+ * @unsync: function to call if address should be removed
+ *
+ * This funciton is intended to be called from the ndo_set_rx_mode
+ * function of devices that require explicit address add/remove
+ * notifications. The unsync function may be NULL in which case
+ * the addresses requiring removal will simply be removed without
+ * any notification to the device.
+ **/
+int __hw_addr_sync_dev(struct netdev_hw_addr_list *list,
+ struct net_device *dev,
+ int (*sync)(struct net_device *, const unsigned char *),
+ int (*unsync)(struct net_device *,
+ const unsigned char *))
+{
+ struct netdev_hw_addr *ha, *tmp;
+ int err;
+
+ /* first go through and flush out any stale entries */
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ if (!ha->sync_cnt || ha->refcount != 1)
+ continue;
+
+ /* if unsync is defined and fails defer unsyncing address */
+ if (unsync && unsync(dev, ha->addr))
+ continue;
+
+ ha->sync_cnt--;
+ __hw_addr_del_entry(list, ha, false, false);
+ }
+
+ /* go through and sync new entries to the list */
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ if (ha->sync_cnt)
+ continue;
+
+ err = sync(dev, ha->addr);
+ if (err)
+ return err;
+
+ ha->sync_cnt++;
+ ha->refcount++;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(__hw_addr_sync_dev);
+
+/**
+ * __hw_addr_unsync_dev - Remove synchonized addresses from device
+ * @list: address list to remove syncronized addresses from
+ * @dev: device to sync
+ * @unsync: function to call if address should be removed
+ *
+ * Remove all addresses that were added to the device by __hw_addr_sync_dev().
+ * This function is intended to be called from the ndo_stop or ndo_open
+ * functions on devices that require explicit address add/remove
+ * notifications. If the unsync function pointer is NULL then this function
+ * can be used to just reset the sync_cnt for the addresses in the list.
+ **/
+void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list,
+ struct net_device *dev,
+ int (*unsync)(struct net_device *,
+ const unsigned char *))
+{
+ struct netdev_hw_addr *ha, *tmp;
+
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ if (!ha->sync_cnt)
+ continue;
+
+ /* if unsync is defined and fails defer unsyncing address */
+ if (unsync && unsync(dev, ha->addr))
+ continue;
+
+ ha->sync_cnt--;
+ __hw_addr_del_entry(list, ha, false, false);
+ }
+}
+EXPORT_SYMBOL(__hw_addr_unsync_dev);
+
static void __hw_addr_flush(struct netdev_hw_addr_list *list)
{
struct netdev_hw_addr *ha, *tmp;
diff --git a/net/core/dst.c b/net/core/dst.c
index ca4231ec734..a028409ee43 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -142,12 +142,12 @@ loop:
mutex_unlock(&dst_gc_mutex);
}
-int dst_discard(struct sk_buff *skb)
+int dst_discard_sk(struct sock *sk, struct sk_buff *skb)
{
kfree_skb(skb);
return 0;
}
-EXPORT_SYMBOL(dst_discard);
+EXPORT_SYMBOL(dst_discard_sk);
const u32 dst_default_metrics[RTAX_MAX + 1] = {
/* This initializer is needed to force linker to place this variable
@@ -184,7 +184,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
dst->xfrm = NULL;
#endif
dst->input = dst_discard;
- dst->output = dst_discard;
+ dst->output = dst_discard_sk;
dst->error = 0;
dst->obsolete = initial_obsolete;
dst->header_len = 0;
@@ -209,8 +209,10 @@ static void ___dst_free(struct dst_entry *dst)
/* The first case (dev==NULL) is required, when
protocol module is unloaded.
*/
- if (dst->dev == NULL || !(dst->dev->flags&IFF_UP))
- dst->input = dst->output = dst_discard;
+ if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
+ dst->input = dst_discard;
+ dst->output = dst_discard_sk;
+ }
dst->obsolete = DST_OBSOLETE_DEAD;
}
@@ -267,6 +269,15 @@ again:
}
EXPORT_SYMBOL(dst_destroy);
+static void dst_destroy_rcu(struct rcu_head *head)
+{
+ struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
+
+ dst = dst_destroy(dst);
+ if (dst)
+ __dst_free(dst);
+}
+
void dst_release(struct dst_entry *dst)
{
if (dst) {
@@ -274,11 +285,8 @@ void dst_release(struct dst_entry *dst)
newrefcnt = atomic_dec_return(&dst->__refcnt);
WARN_ON(newrefcnt < 0);
- if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) {
- dst = dst_destroy(dst);
- if (dst)
- __dst_free(dst);
- }
+ if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt)
+ call_rcu(&dst->rcu_head, dst_destroy_rcu);
}
}
EXPORT_SYMBOL(dst_release);
@@ -361,7 +369,8 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
return;
if (!unregister) {
- dst->input = dst->output = dst_discard;
+ dst->input = dst_discard;
+ dst->output = dst_discard_sk;
} else {
dst->dev = dev_net(dst->dev)->loopback_dev;
dev_hold(dst->dev);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 30071dec287..17cb912793f 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -97,6 +97,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_RXFCS_BIT] = "rx-fcs",
[NETIF_F_RXALL_BIT] = "rx-all",
[NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
+ [NETIF_F_BUSY_POLL_BIT] = "busy-poll",
};
static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
@@ -556,6 +557,23 @@ err_out:
return ret;
}
+static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr,
+ struct ethtool_rxnfc *rx_rings,
+ u32 size)
+{
+ int i;
+
+ if (copy_from_user(indir, useraddr, size * sizeof(indir[0])))
+ return -EFAULT;
+
+ /* Validate ring indices */
+ for (i = 0; i < size; i++)
+ if (indir[i] >= rx_rings->data)
+ return -EINVAL;
+
+ return 0;
+}
+
static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
void __user *useraddr)
{
@@ -564,7 +582,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
int ret;
if (!dev->ethtool_ops->get_rxfh_indir_size ||
- !dev->ethtool_ops->get_rxfh_indir)
+ !dev->ethtool_ops->get_rxfh)
return -EOPNOTSUPP;
dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
if (dev_size == 0)
@@ -590,7 +608,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
if (!indir)
return -ENOMEM;
- ret = dev->ethtool_ops->get_rxfh_indir(dev, indir);
+ ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL);
if (ret)
goto out;
@@ -612,8 +630,9 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
u32 *indir;
const struct ethtool_ops *ops = dev->ethtool_ops;
int ret;
+ u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]);
- if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir ||
+ if (!ops->get_rxfh_indir_size || !ops->set_rxfh ||
!ops->get_rxnfc)
return -EOPNOTSUPP;
@@ -642,28 +661,184 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
for (i = 0; i < dev_size; i++)
indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
} else {
- if (copy_from_user(indir,
- useraddr +
- offsetof(struct ethtool_rxfh_indir,
- ring_index[0]),
- dev_size * sizeof(indir[0]))) {
+ ret = ethtool_copy_validate_indir(indir,
+ useraddr + ringidx_offset,
+ &rx_rings,
+ dev_size);
+ if (ret)
+ goto out;
+ }
+
+ ret = ops->set_rxfh(dev, indir, NULL);
+
+out:
+ kfree(indir);
+ return ret;
+}
+
+static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
+ void __user *useraddr)
+{
+ int ret;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ u32 user_indir_size, user_key_size;
+ u32 dev_indir_size = 0, dev_key_size = 0;
+ struct ethtool_rxfh rxfh;
+ u32 total_size;
+ u32 indir_bytes;
+ u32 *indir = NULL;
+ u8 *hkey = NULL;
+ u8 *rss_config;
+
+ if (!(dev->ethtool_ops->get_rxfh_indir_size ||
+ dev->ethtool_ops->get_rxfh_key_size) ||
+ !dev->ethtool_ops->get_rxfh)
+ return -EOPNOTSUPP;
+
+ if (ops->get_rxfh_indir_size)
+ dev_indir_size = ops->get_rxfh_indir_size(dev);
+ if (ops->get_rxfh_key_size)
+ dev_key_size = ops->get_rxfh_key_size(dev);
+
+ if ((dev_key_size + dev_indir_size) == 0)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&rxfh, useraddr, sizeof(rxfh)))
+ return -EFAULT;
+ user_indir_size = rxfh.indir_size;
+ user_key_size = rxfh.key_size;
+
+ /* Check that reserved fields are 0 for now */
+ if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1])
+ return -EINVAL;
+
+ rxfh.indir_size = dev_indir_size;
+ rxfh.key_size = dev_key_size;
+ if (copy_to_user(useraddr, &rxfh, sizeof(rxfh)))
+ return -EFAULT;
+
+ /* If the user buffer size is 0, this is just a query for the
+ * device table size and key size. Otherwise, if the User size is
+ * not equal to device table size or key size it's an error.
+ */
+ if (!user_indir_size && !user_key_size)
+ return 0;
+
+ if ((user_indir_size && (user_indir_size != dev_indir_size)) ||
+ (user_key_size && (user_key_size != dev_key_size)))
+ return -EINVAL;
+
+ indir_bytes = user_indir_size * sizeof(indir[0]);
+ total_size = indir_bytes + user_key_size;
+ rss_config = kzalloc(total_size, GFP_USER);
+ if (!rss_config)
+ return -ENOMEM;
+
+ if (user_indir_size)
+ indir = (u32 *)rss_config;
+
+ if (user_key_size)
+ hkey = rss_config + indir_bytes;
+
+ ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey);
+ if (!ret) {
+ if (copy_to_user(useraddr +
+ offsetof(struct ethtool_rxfh, rss_config[0]),
+ rss_config, total_size))
ret = -EFAULT;
+ }
+
+ kfree(rss_config);
+
+ return ret;
+}
+
+static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
+ void __user *useraddr)
+{
+ int ret;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_rxnfc rx_rings;
+ struct ethtool_rxfh rxfh;
+ u32 dev_indir_size = 0, dev_key_size = 0, i;
+ u32 *indir = NULL, indir_bytes = 0;
+ u8 *hkey = NULL;
+ u8 *rss_config;
+ u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
+
+ if (!(ops->get_rxfh_indir_size || ops->get_rxfh_key_size) ||
+ !ops->get_rxnfc || !ops->set_rxfh)
+ return -EOPNOTSUPP;
+
+ if (ops->get_rxfh_indir_size)
+ dev_indir_size = ops->get_rxfh_indir_size(dev);
+ if (ops->get_rxfh_key_size)
+ dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev);
+ if ((dev_key_size + dev_indir_size) == 0)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&rxfh, useraddr, sizeof(rxfh)))
+ return -EFAULT;
+
+ /* Check that reserved fields are 0 for now */
+ if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1])
+ return -EINVAL;
+
+ /* If either indir or hash key is valid, proceed further.
+ * It is not valid to request that both be unchanged.
+ */
+ if ((rxfh.indir_size &&
+ rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE &&
+ rxfh.indir_size != dev_indir_size) ||
+ (rxfh.key_size && (rxfh.key_size != dev_key_size)) ||
+ (rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE &&
+ rxfh.key_size == 0))
+ return -EINVAL;
+
+ if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
+ indir_bytes = dev_indir_size * sizeof(indir[0]);
+
+ rss_config = kzalloc(indir_bytes + rxfh.key_size, GFP_USER);
+ if (!rss_config)
+ return -ENOMEM;
+
+ rx_rings.cmd = ETHTOOL_GRXRINGS;
+ ret = ops->get_rxnfc(dev, &rx_rings, NULL);
+ if (ret)
+ goto out;
+
+ /* rxfh.indir_size == 0 means reset the indir table to default.
+ * rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged.
+ */
+ if (rxfh.indir_size &&
+ rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) {
+ indir = (u32 *)rss_config;
+ ret = ethtool_copy_validate_indir(indir,
+ useraddr + rss_cfg_offset,
+ &rx_rings,
+ rxfh.indir_size);
+ if (ret)
goto out;
- }
+ } else if (rxfh.indir_size == 0) {
+ indir = (u32 *)rss_config;
+ for (i = 0; i < dev_indir_size; i++)
+ indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+ }
- /* Validate ring indices */
- for (i = 0; i < dev_size; i++) {
- if (indir[i] >= rx_rings.data) {
- ret = -EINVAL;
- goto out;
- }
+ if (rxfh.key_size) {
+ hkey = rss_config + indir_bytes;
+ if (copy_from_user(hkey,
+ useraddr + rss_cfg_offset + indir_bytes,
+ rxfh.key_size)) {
+ ret = -EFAULT;
+ goto out;
}
}
- ret = ops->set_rxfh_indir(dev, indir);
+ ret = ops->set_rxfh(dev, indir, hkey);
out:
- kfree(indir);
+ kfree(rss_config);
return ret;
}
@@ -1490,6 +1665,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GRXCLSRULE:
case ETHTOOL_GRXCLSRLALL:
case ETHTOOL_GRXFHINDIR:
+ case ETHTOOL_GRSSH:
case ETHTOOL_GFEATURES:
case ETHTOOL_GCHANNELS:
case ETHTOOL_GET_TS_INFO:
@@ -1627,6 +1803,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_SRXFHINDIR:
rc = ethtool_set_rxfh_indir(dev, useraddr);
break;
+ case ETHTOOL_GRSSH:
+ rc = ethtool_get_rxfh(dev, useraddr);
+ break;
+ case ETHTOOL_SRSSH:
+ rc = ethtool_set_rxfh(dev, useraddr);
+ break;
case ETHTOOL_GFEATURES:
rc = ethtool_get_features(dev, useraddr);
break;
diff --git a/net/core/filter.c b/net/core/filter.c
index ad30d626a5b..1dbf6462f76 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1,11 +1,16 @@
/*
* Linux Socket Filter - Kernel level socket filtering
*
- * Author:
- * Jay Schulist <jschlst@samba.org>
+ * Based on the design of the Berkeley Packet Filter. The new
+ * internal format has been designed by PLUMgrid:
*
- * Based on the design of:
- * - The Berkeley Packet Filter
+ * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
+ *
+ * Authors:
+ *
+ * Jay Schulist <jschlst@samba.org>
+ * Alexei Starovoitov <ast@plumgrid.com>
+ * Daniel Borkmann <dborkman@redhat.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -40,6 +45,27 @@
#include <linux/seccomp.h>
#include <linux/if_vlan.h>
+/* Registers */
+#define BPF_R0 regs[BPF_REG_0]
+#define BPF_R1 regs[BPF_REG_1]
+#define BPF_R2 regs[BPF_REG_2]
+#define BPF_R3 regs[BPF_REG_3]
+#define BPF_R4 regs[BPF_REG_4]
+#define BPF_R5 regs[BPF_REG_5]
+#define BPF_R6 regs[BPF_REG_6]
+#define BPF_R7 regs[BPF_REG_7]
+#define BPF_R8 regs[BPF_REG_8]
+#define BPF_R9 regs[BPF_REG_9]
+#define BPF_R10 regs[BPF_REG_10]
+
+/* Named registers */
+#define DST regs[insn->dst_reg]
+#define SRC regs[insn->src_reg]
+#define FP regs[BPF_REG_FP]
+#define ARG1 regs[BPF_REG_ARG1]
+#define CTX regs[BPF_REG_CTX]
+#define IMM insn->imm
+
/* No hurry in this branch
*
* Exported for the bpf jit load helper.
@@ -52,9 +78,9 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
ptr = skb_network_header(skb) + k - SKF_NET_OFF;
else if (k >= SKF_LL_OFF)
ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
-
if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
return ptr;
+
return NULL;
}
@@ -63,6 +89,7 @@ static inline void *load_pointer(const struct sk_buff *skb, int k,
{
if (k >= 0)
return skb_header_pointer(skb, k, size, buffer);
+
return bpf_internal_load_pointer_neg_helper(skb, k, size);
}
@@ -108,304 +135,960 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(sk_filter);
+/* Base function for offset calculation. Needs to go into .text section,
+ * therefore keeping it non-static as well; will also be used by JITs
+ * anyway later on, so do not let the compiler omit it.
+ */
+noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ return 0;
+}
+
/**
- * sk_run_filter - run a filter on a socket
- * @skb: buffer to run the filter on
- * @fentry: filter to apply
+ * __sk_run_filter - run a filter on a given context
+ * @ctx: buffer to run the filter on
+ * @insn: filter to apply
*
- * Decode and apply filter instructions to the skb->data.
- * Return length to keep, 0 for none. @skb is the data we are
- * filtering, @filter is the array of filter instructions.
- * Because all jumps are guaranteed to be before last instruction,
- * and last instruction guaranteed to be a RET, we dont need to check
- * flen. (We used to pass to this function the length of filter)
+ * Decode and apply filter instructions to the skb->data. Return length to
+ * keep, 0 for none. @ctx is the data we are operating on, @insn is the
+ * array of filter instructions.
*/
-unsigned int sk_run_filter(const struct sk_buff *skb,
- const struct sock_filter *fentry)
+static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
{
+ u64 stack[MAX_BPF_STACK / sizeof(u64)];
+ u64 regs[MAX_BPF_REG], tmp;
+ static const void *jumptable[256] = {
+ [0 ... 255] = &&default_label,
+ /* Now overwrite non-defaults ... */
+ /* 32 bit ALU operations */
+ [BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X,
+ [BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K,
+ [BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X,
+ [BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K,
+ [BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X,
+ [BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K,
+ [BPF_ALU | BPF_OR | BPF_X] = &&ALU_OR_X,
+ [BPF_ALU | BPF_OR | BPF_K] = &&ALU_OR_K,
+ [BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X,
+ [BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K,
+ [BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X,
+ [BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K,
+ [BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X,
+ [BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K,
+ [BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X,
+ [BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K,
+ [BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X,
+ [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K,
+ [BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X,
+ [BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K,
+ [BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X,
+ [BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K,
+ [BPF_ALU | BPF_NEG] = &&ALU_NEG,
+ [BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE,
+ [BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE,
+ /* 64 bit ALU operations */
+ [BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X,
+ [BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K,
+ [BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X,
+ [BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K,
+ [BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X,
+ [BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K,
+ [BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X,
+ [BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K,
+ [BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X,
+ [BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K,
+ [BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X,
+ [BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K,
+ [BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X,
+ [BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K,
+ [BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X,
+ [BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K,
+ [BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X,
+ [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K,
+ [BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X,
+ [BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K,
+ [BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X,
+ [BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K,
+ [BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X,
+ [BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K,
+ [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
+ /* Call instruction */
+ [BPF_JMP | BPF_CALL] = &&JMP_CALL,
+ /* Jumps */
+ [BPF_JMP | BPF_JA] = &&JMP_JA,
+ [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
+ [BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K,
+ [BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X,
+ [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K,
+ [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X,
+ [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K,
+ [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X,
+ [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K,
+ [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X,
+ [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K,
+ [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X,
+ [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K,
+ [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X,
+ [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K,
+ /* Program return */
+ [BPF_JMP | BPF_EXIT] = &&JMP_EXIT,
+ /* Store instructions */
+ [BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B,
+ [BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H,
+ [BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W,
+ [BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW,
+ [BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W,
+ [BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW,
+ [BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B,
+ [BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H,
+ [BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W,
+ [BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW,
+ /* Load instructions */
+ [BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B,
+ [BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H,
+ [BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W,
+ [BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW,
+ [BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W,
+ [BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H,
+ [BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B,
+ [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W,
+ [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H,
+ [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B,
+ };
void *ptr;
- u32 A = 0; /* Accumulator */
- u32 X = 0; /* Index Register */
- u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
- u32 tmp;
- int k;
+ int off;
- /*
- * Process array of filter instructions.
- */
- for (;; fentry++) {
-#if defined(CONFIG_X86_32)
-#define K (fentry->k)
-#else
- const u32 K = fentry->k;
-#endif
+#define CONT ({ insn++; goto select_insn; })
+#define CONT_JMP ({ insn++; goto select_insn; })
- switch (fentry->code) {
- case BPF_S_ALU_ADD_X:
- A += X;
- continue;
- case BPF_S_ALU_ADD_K:
- A += K;
- continue;
- case BPF_S_ALU_SUB_X:
- A -= X;
- continue;
- case BPF_S_ALU_SUB_K:
- A -= K;
- continue;
- case BPF_S_ALU_MUL_X:
- A *= X;
- continue;
- case BPF_S_ALU_MUL_K:
- A *= K;
- continue;
- case BPF_S_ALU_DIV_X:
- if (X == 0)
- return 0;
- A /= X;
- continue;
- case BPF_S_ALU_DIV_K:
- A /= K;
- continue;
- case BPF_S_ALU_MOD_X:
- if (X == 0)
- return 0;
- A %= X;
- continue;
- case BPF_S_ALU_MOD_K:
- A %= K;
- continue;
- case BPF_S_ALU_AND_X:
- A &= X;
- continue;
- case BPF_S_ALU_AND_K:
- A &= K;
- continue;
- case BPF_S_ALU_OR_X:
- A |= X;
- continue;
- case BPF_S_ALU_OR_K:
- A |= K;
- continue;
- case BPF_S_ANC_ALU_XOR_X:
- case BPF_S_ALU_XOR_X:
- A ^= X;
- continue;
- case BPF_S_ALU_XOR_K:
- A ^= K;
- continue;
- case BPF_S_ALU_LSH_X:
- A <<= X;
- continue;
- case BPF_S_ALU_LSH_K:
- A <<= K;
- continue;
- case BPF_S_ALU_RSH_X:
- A >>= X;
- continue;
- case BPF_S_ALU_RSH_K:
- A >>= K;
- continue;
- case BPF_S_ALU_NEG:
- A = -A;
- continue;
- case BPF_S_JMP_JA:
- fentry += K;
- continue;
- case BPF_S_JMP_JGT_K:
- fentry += (A > K) ? fentry->jt : fentry->jf;
- continue;
- case BPF_S_JMP_JGE_K:
- fentry += (A >= K) ? fentry->jt : fentry->jf;
- continue;
- case BPF_S_JMP_JEQ_K:
- fentry += (A == K) ? fentry->jt : fentry->jf;
- continue;
- case BPF_S_JMP_JSET_K:
- fentry += (A & K) ? fentry->jt : fentry->jf;
- continue;
- case BPF_S_JMP_JGT_X:
- fentry += (A > X) ? fentry->jt : fentry->jf;
- continue;
- case BPF_S_JMP_JGE_X:
- fentry += (A >= X) ? fentry->jt : fentry->jf;
- continue;
- case BPF_S_JMP_JEQ_X:
- fentry += (A == X) ? fentry->jt : fentry->jf;
- continue;
- case BPF_S_JMP_JSET_X:
- fentry += (A & X) ? fentry->jt : fentry->jf;
- continue;
- case BPF_S_LD_W_ABS:
- k = K;
-load_w:
- ptr = load_pointer(skb, k, 4, &tmp);
- if (ptr != NULL) {
- A = get_unaligned_be32(ptr);
- continue;
- }
+ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
+ ARG1 = (u64) (unsigned long) ctx;
+
+ /* Registers used in classic BPF programs need to be reset first. */
+ regs[BPF_REG_A] = 0;
+ regs[BPF_REG_X] = 0;
+
+select_insn:
+ goto *jumptable[insn->code];
+
+ /* ALU */
+#define ALU(OPCODE, OP) \
+ ALU64_##OPCODE##_X: \
+ DST = DST OP SRC; \
+ CONT; \
+ ALU_##OPCODE##_X: \
+ DST = (u32) DST OP (u32) SRC; \
+ CONT; \
+ ALU64_##OPCODE##_K: \
+ DST = DST OP IMM; \
+ CONT; \
+ ALU_##OPCODE##_K: \
+ DST = (u32) DST OP (u32) IMM; \
+ CONT;
+
+ ALU(ADD, +)
+ ALU(SUB, -)
+ ALU(AND, &)
+ ALU(OR, |)
+ ALU(LSH, <<)
+ ALU(RSH, >>)
+ ALU(XOR, ^)
+ ALU(MUL, *)
+#undef ALU
+ ALU_NEG:
+ DST = (u32) -DST;
+ CONT;
+ ALU64_NEG:
+ DST = -DST;
+ CONT;
+ ALU_MOV_X:
+ DST = (u32) SRC;
+ CONT;
+ ALU_MOV_K:
+ DST = (u32) IMM;
+ CONT;
+ ALU64_MOV_X:
+ DST = SRC;
+ CONT;
+ ALU64_MOV_K:
+ DST = IMM;
+ CONT;
+ ALU64_ARSH_X:
+ (*(s64 *) &DST) >>= SRC;
+ CONT;
+ ALU64_ARSH_K:
+ (*(s64 *) &DST) >>= IMM;
+ CONT;
+ ALU64_MOD_X:
+ if (unlikely(SRC == 0))
return 0;
- case BPF_S_LD_H_ABS:
- k = K;
-load_h:
- ptr = load_pointer(skb, k, 2, &tmp);
- if (ptr != NULL) {
- A = get_unaligned_be16(ptr);
- continue;
- }
+ tmp = DST;
+ DST = do_div(tmp, SRC);
+ CONT;
+ ALU_MOD_X:
+ if (unlikely(SRC == 0))
return 0;
- case BPF_S_LD_B_ABS:
- k = K;
-load_b:
- ptr = load_pointer(skb, k, 1, &tmp);
- if (ptr != NULL) {
- A = *(u8 *)ptr;
- continue;
- }
+ tmp = (u32) DST;
+ DST = do_div(tmp, (u32) SRC);
+ CONT;
+ ALU64_MOD_K:
+ tmp = DST;
+ DST = do_div(tmp, IMM);
+ CONT;
+ ALU_MOD_K:
+ tmp = (u32) DST;
+ DST = do_div(tmp, (u32) IMM);
+ CONT;
+ ALU64_DIV_X:
+ if (unlikely(SRC == 0))
return 0;
- case BPF_S_LD_W_LEN:
- A = skb->len;
- continue;
- case BPF_S_LDX_W_LEN:
- X = skb->len;
- continue;
- case BPF_S_LD_W_IND:
- k = X + K;
- goto load_w;
- case BPF_S_LD_H_IND:
- k = X + K;
- goto load_h;
- case BPF_S_LD_B_IND:
- k = X + K;
- goto load_b;
- case BPF_S_LDX_B_MSH:
- ptr = load_pointer(skb, K, 1, &tmp);
- if (ptr != NULL) {
- X = (*(u8 *)ptr & 0xf) << 2;
- continue;
- }
+ do_div(DST, SRC);
+ CONT;
+ ALU_DIV_X:
+ if (unlikely(SRC == 0))
return 0;
- case BPF_S_LD_IMM:
- A = K;
- continue;
- case BPF_S_LDX_IMM:
- X = K;
- continue;
- case BPF_S_LD_MEM:
- A = mem[K];
- continue;
- case BPF_S_LDX_MEM:
- X = mem[K];
- continue;
- case BPF_S_MISC_TAX:
- X = A;
- continue;
- case BPF_S_MISC_TXA:
- A = X;
- continue;
- case BPF_S_RET_K:
- return K;
- case BPF_S_RET_A:
- return A;
- case BPF_S_ST:
- mem[K] = A;
- continue;
- case BPF_S_STX:
- mem[K] = X;
- continue;
- case BPF_S_ANC_PROTOCOL:
- A = ntohs(skb->protocol);
- continue;
- case BPF_S_ANC_PKTTYPE:
- A = skb->pkt_type;
- continue;
- case BPF_S_ANC_IFINDEX:
- if (!skb->dev)
- return 0;
- A = skb->dev->ifindex;
- continue;
- case BPF_S_ANC_MARK:
- A = skb->mark;
- continue;
- case BPF_S_ANC_QUEUE:
- A = skb->queue_mapping;
- continue;
- case BPF_S_ANC_HATYPE:
- if (!skb->dev)
- return 0;
- A = skb->dev->type;
- continue;
- case BPF_S_ANC_RXHASH:
- A = skb->rxhash;
- continue;
- case BPF_S_ANC_CPU:
- A = raw_smp_processor_id();
- continue;
- case BPF_S_ANC_VLAN_TAG:
- A = vlan_tx_tag_get(skb);
- continue;
- case BPF_S_ANC_VLAN_TAG_PRESENT:
- A = !!vlan_tx_tag_present(skb);
- continue;
- case BPF_S_ANC_PAY_OFFSET:
- A = __skb_get_poff(skb);
- continue;
- case BPF_S_ANC_NLATTR: {
- struct nlattr *nla;
-
- if (skb_is_nonlinear(skb))
- return 0;
- if (A > skb->len - sizeof(struct nlattr))
- return 0;
-
- nla = nla_find((struct nlattr *)&skb->data[A],
- skb->len - A, X);
- if (nla)
- A = (void *)nla - (void *)skb->data;
- else
- A = 0;
- continue;
+ tmp = (u32) DST;
+ do_div(tmp, (u32) SRC);
+ DST = (u32) tmp;
+ CONT;
+ ALU64_DIV_K:
+ do_div(DST, IMM);
+ CONT;
+ ALU_DIV_K:
+ tmp = (u32) DST;
+ do_div(tmp, (u32) IMM);
+ DST = (u32) tmp;
+ CONT;
+ ALU_END_TO_BE:
+ switch (IMM) {
+ case 16:
+ DST = (__force u16) cpu_to_be16(DST);
+ break;
+ case 32:
+ DST = (__force u32) cpu_to_be32(DST);
+ break;
+ case 64:
+ DST = (__force u64) cpu_to_be64(DST);
+ break;
+ }
+ CONT;
+ ALU_END_TO_LE:
+ switch (IMM) {
+ case 16:
+ DST = (__force u16) cpu_to_le16(DST);
+ break;
+ case 32:
+ DST = (__force u32) cpu_to_le32(DST);
+ break;
+ case 64:
+ DST = (__force u64) cpu_to_le64(DST);
+ break;
+ }
+ CONT;
+
+ /* CALL */
+ JMP_CALL:
+ /* Function call scratches BPF_R1-BPF_R5 registers,
+ * preserves BPF_R6-BPF_R9, and stores return value
+ * into BPF_R0.
+ */
+ BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3,
+ BPF_R4, BPF_R5);
+ CONT;
+
+ /* JMP */
+ JMP_JA:
+ insn += insn->off;
+ CONT;
+ JMP_JEQ_X:
+ if (DST == SRC) {
+ insn += insn->off;
+ CONT_JMP;
+ }
+ CONT;
+ JMP_JEQ_K:
+ if (DST == IMM) {
+ insn += insn->off;
+ CONT_JMP;
+ }
+ CONT;
+ JMP_JNE_X:
+ if (DST != SRC) {
+ insn += insn->off;
+ CONT_JMP;
+ }
+ CONT;
+ JMP_JNE_K:
+ if (DST != IMM) {
+ insn += insn->off;
+ CONT_JMP;
+ }
+ CONT;
+ JMP_JGT_X:
+ if (DST > SRC) {
+ insn += insn->off;
+ CONT_JMP;
}
- case BPF_S_ANC_NLATTR_NEST: {
- struct nlattr *nla;
-
- if (skb_is_nonlinear(skb))
- return 0;
- if (A > skb->len - sizeof(struct nlattr))
- return 0;
-
- nla = (struct nlattr *)&skb->data[A];
- if (nla->nla_len > A - skb->len)
- return 0;
-
- nla = nla_find_nested(nla, X);
- if (nla)
- A = (void *)nla - (void *)skb->data;
- else
- A = 0;
- continue;
+ CONT;
+ JMP_JGT_K:
+ if (DST > IMM) {
+ insn += insn->off;
+ CONT_JMP;
}
-#ifdef CONFIG_SECCOMP_FILTER
- case BPF_S_ANC_SECCOMP_LD_W:
- A = seccomp_bpf_load(fentry->k);
- continue;
+ CONT;
+ JMP_JGE_X:
+ if (DST >= SRC) {
+ insn += insn->off;
+ CONT_JMP;
+ }
+ CONT;
+ JMP_JGE_K:
+ if (DST >= IMM) {
+ insn += insn->off;
+ CONT_JMP;
+ }
+ CONT;
+ JMP_JSGT_X:
+ if (((s64) DST) > ((s64) SRC)) {
+ insn += insn->off;
+ CONT_JMP;
+ }
+ CONT;
+ JMP_JSGT_K:
+ if (((s64) DST) > ((s64) IMM)) {
+ insn += insn->off;
+ CONT_JMP;
+ }
+ CONT;
+ JMP_JSGE_X:
+ if (((s64) DST) >= ((s64) SRC)) {
+ insn += insn->off;
+ CONT_JMP;
+ }
+ CONT;
+ JMP_JSGE_K:
+ if (((s64) DST) >= ((s64) IMM)) {
+ insn += insn->off;
+ CONT_JMP;
+ }
+ CONT;
+ JMP_JSET_X:
+ if (DST & SRC) {
+ insn += insn->off;
+ CONT_JMP;
+ }
+ CONT;
+ JMP_JSET_K:
+ if (DST & IMM) {
+ insn += insn->off;
+ CONT_JMP;
+ }
+ CONT;
+ JMP_EXIT:
+ return BPF_R0;
+
+ /* STX and ST and LDX*/
+#define LDST(SIZEOP, SIZE) \
+ STX_MEM_##SIZEOP: \
+ *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \
+ CONT; \
+ ST_MEM_##SIZEOP: \
+ *(SIZE *)(unsigned long) (DST + insn->off) = IMM; \
+ CONT; \
+ LDX_MEM_##SIZEOP: \
+ DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
+ CONT;
+
+ LDST(B, u8)
+ LDST(H, u16)
+ LDST(W, u32)
+ LDST(DW, u64)
+#undef LDST
+ STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
+ atomic_add((u32) SRC, (atomic_t *)(unsigned long)
+ (DST + insn->off));
+ CONT;
+ STX_XADD_DW: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
+ atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
+ (DST + insn->off));
+ CONT;
+ LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
+ off = IMM;
+load_word:
+ /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
+ * only appearing in the programs where ctx ==
+ * skb. All programs keep 'ctx' in regs[BPF_REG_CTX]
+ * == BPF_R6, sk_convert_filter() saves it in BPF_R6,
+ * internal BPF verifier will check that BPF_R6 ==
+ * ctx.
+ *
+ * BPF_ABS and BPF_IND are wrappers of function calls,
+ * so they scratch BPF_R1-BPF_R5 registers, preserve
+ * BPF_R6-BPF_R9, and store return value into BPF_R0.
+ *
+ * Implicit input:
+ * ctx == skb == BPF_R6 == CTX
+ *
+ * Explicit input:
+ * SRC == any register
+ * IMM == 32-bit immediate
+ *
+ * Output:
+ * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
+ */
+
+ ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp);
+ if (likely(ptr != NULL)) {
+ BPF_R0 = get_unaligned_be32(ptr);
+ CONT;
+ }
+
+ return 0;
+ LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */
+ off = IMM;
+load_half:
+ ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp);
+ if (likely(ptr != NULL)) {
+ BPF_R0 = get_unaligned_be16(ptr);
+ CONT;
+ }
+
+ return 0;
+ LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */
+ off = IMM;
+load_byte:
+ ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp);
+ if (likely(ptr != NULL)) {
+ BPF_R0 = *(u8 *)ptr;
+ CONT;
+ }
+
+ return 0;
+ LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */
+ off = IMM + SRC;
+ goto load_word;
+ LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */
+ off = IMM + SRC;
+ goto load_half;
+ LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */
+ off = IMM + SRC;
+ goto load_byte;
+
+ default_label:
+ /* If we ever reach this, we have a bug somewhere. */
+ WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
+ return 0;
+}
+
+/* Helper to find the offset of pkt_type in sk_buff structure. We want
+ * to make sure its still a 3bit field starting at a byte boundary;
+ * taken from arch/x86/net/bpf_jit_comp.c.
+ */
+#ifdef __BIG_ENDIAN_BITFIELD
+#define PKT_TYPE_MAX (7 << 5)
+#else
+#define PKT_TYPE_MAX 7
+#endif
+static unsigned int pkt_type_offset(void)
+{
+ struct sk_buff skb_probe = { .pkt_type = ~0, };
+ u8 *ct = (u8 *) &skb_probe;
+ unsigned int off;
+
+ for (off = 0; off < sizeof(struct sk_buff); off++) {
+ if (ct[off] == PKT_TYPE_MAX)
+ return off;
+ }
+
+ pr_err_once("Please fix %s, as pkt_type couldn't be found!\n", __func__);
+ return -1;
+}
+
+static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+{
+ return __skb_get_poff((struct sk_buff *)(unsigned long) ctx);
+}
+
+static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
+ struct nlattr *nla;
+
+ if (skb_is_nonlinear(skb))
+ return 0;
+
+ if (skb->len < sizeof(struct nlattr))
+ return 0;
+
+ if (a > skb->len - sizeof(struct nlattr))
+ return 0;
+
+ nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
+ if (nla)
+ return (void *) nla - (void *) skb->data;
+
+ return 0;
+}
+
+static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
+ struct nlattr *nla;
+
+ if (skb_is_nonlinear(skb))
+ return 0;
+
+ if (skb->len < sizeof(struct nlattr))
+ return 0;
+
+ if (a > skb->len - sizeof(struct nlattr))
+ return 0;
+
+ nla = (struct nlattr *) &skb->data[a];
+ if (nla->nla_len > skb->len - a)
+ return 0;
+
+ nla = nla_find_nested(nla, x);
+ if (nla)
+ return (void *) nla - (void *) skb->data;
+
+ return 0;
+}
+
+static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+{
+ return raw_smp_processor_id();
+}
+
+/* note that this only generates 32-bit random numbers */
+static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+{
+ return prandom_u32();
+}
+
+static bool convert_bpf_extensions(struct sock_filter *fp,
+ struct sock_filter_int **insnp)
+{
+ struct sock_filter_int *insn = *insnp;
+
+ switch (fp->k) {
+ case SKF_AD_OFF + SKF_AD_PROTOCOL:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
+
+ /* A = *(u16 *) (CTX + offsetof(protocol)) */
+ *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+ offsetof(struct sk_buff, protocol));
+ /* A = ntohs(A) [emitting a nop or swap16] */
+ *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
+ break;
+
+ case SKF_AD_OFF + SKF_AD_PKTTYPE:
+ *insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
+ pkt_type_offset());
+ if (insn->off < 0)
+ return false;
+ insn++;
+ *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
+#ifdef __BIG_ENDIAN_BITFIELD
+ insn++;
+ *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 5);
#endif
+ break;
+
+ case SKF_AD_OFF + SKF_AD_IFINDEX:
+ case SKF_AD_OFF + SKF_AD_HATYPE:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
+ BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
+ BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0);
+
+ *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
+ BPF_REG_TMP, BPF_REG_CTX,
+ offsetof(struct sk_buff, dev));
+ /* if (tmp != 0) goto pc + 1 */
+ *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
+ *insn++ = BPF_EXIT_INSN();
+ if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
+ *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
+ offsetof(struct net_device, ifindex));
+ else
+ *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
+ offsetof(struct net_device, type));
+ break;
+
+ case SKF_AD_OFF + SKF_AD_MARK:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+
+ *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
+ offsetof(struct sk_buff, mark));
+ break;
+
+ case SKF_AD_OFF + SKF_AD_RXHASH:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
+
+ *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
+ offsetof(struct sk_buff, hash));
+ break;
+
+ case SKF_AD_OFF + SKF_AD_QUEUE:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
+
+ *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+ offsetof(struct sk_buff, queue_mapping));
+ break;
+
+ case SKF_AD_OFF + SKF_AD_VLAN_TAG:
+ case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
+ BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+
+ /* A = *(u16 *) (CTX + offsetof(vlan_tci)) */
+ *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+ offsetof(struct sk_buff, vlan_tci));
+ if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
+ *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A,
+ ~VLAN_TAG_PRESENT);
+ } else {
+ /* A >>= 12 */
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12);
+ /* A &= 1 */
+ *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1);
+ }
+ break;
+
+ case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
+ case SKF_AD_OFF + SKF_AD_NLATTR:
+ case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
+ case SKF_AD_OFF + SKF_AD_CPU:
+ case SKF_AD_OFF + SKF_AD_RANDOM:
+ /* arg1 = CTX */
+ *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
+ /* arg2 = A */
+ *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
+ /* arg3 = X */
+ *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
+ /* Emit call(arg1=CTX, arg2=A, arg3=X) */
+ switch (fp->k) {
+ case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
+ *insn = BPF_EMIT_CALL(__skb_get_pay_offset);
+ break;
+ case SKF_AD_OFF + SKF_AD_NLATTR:
+ *insn = BPF_EMIT_CALL(__skb_get_nlattr);
+ break;
+ case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
+ *insn = BPF_EMIT_CALL(__skb_get_nlattr_nest);
+ break;
+ case SKF_AD_OFF + SKF_AD_CPU:
+ *insn = BPF_EMIT_CALL(__get_raw_cpu_id);
+ break;
+ case SKF_AD_OFF + SKF_AD_RANDOM:
+ *insn = BPF_EMIT_CALL(__get_random_u32);
+ break;
+ }
+ break;
+
+ case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
+ /* A ^= X */
+ *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
+ break;
+
+ default:
+ /* This is just a dummy call to avoid letting the compiler
+ * evict __bpf_call_base() as an optimization. Placed here
+ * where no-one bothers.
+ */
+ BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
+ return false;
+ }
+
+ *insnp = insn;
+ return true;
+}
+
+/**
+ * sk_convert_filter - convert filter program
+ * @prog: the user passed filter program
+ * @len: the length of the user passed filter program
+ * @new_prog: buffer where converted program will be stored
+ * @new_len: pointer to store length of converted program
+ *
+ * Remap 'sock_filter' style BPF instruction set to 'sock_filter_ext' style.
+ * Conversion workflow:
+ *
+ * 1) First pass for calculating the new program length:
+ * sk_convert_filter(old_prog, old_len, NULL, &new_len)
+ *
+ * 2) 2nd pass to remap in two passes: 1st pass finds new
+ * jump offsets, 2nd pass remapping:
+ * new_prog = kmalloc(sizeof(struct sock_filter_int) * new_len);
+ * sk_convert_filter(old_prog, old_len, new_prog, &new_len);
+ *
+ * User BPF's register A is mapped to our BPF register 6, user BPF
+ * register X is mapped to BPF register 7; frame pointer is always
+ * register 10; Context 'void *ctx' is stored in register 1, that is,
+ * for socket filters: ctx == 'struct sk_buff *', for seccomp:
+ * ctx == 'struct seccomp_data *'.
+ */
+int sk_convert_filter(struct sock_filter *prog, int len,
+ struct sock_filter_int *new_prog, int *new_len)
+{
+ int new_flen = 0, pass = 0, target, i;
+ struct sock_filter_int *new_insn;
+ struct sock_filter *fp;
+ int *addrs = NULL;
+ u8 bpf_src;
+
+ BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
+ BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
+
+ if (len <= 0 || len > BPF_MAXINSNS)
+ return -EINVAL;
+
+ if (new_prog) {
+ addrs = kcalloc(len, sizeof(*addrs), GFP_KERNEL);
+ if (!addrs)
+ return -ENOMEM;
+ }
+
+do_pass:
+ new_insn = new_prog;
+ fp = prog;
+
+ if (new_insn)
+ *new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
+ new_insn++;
+
+ for (i = 0; i < len; fp++, i++) {
+ struct sock_filter_int tmp_insns[6] = { };
+ struct sock_filter_int *insn = tmp_insns;
+
+ if (addrs)
+ addrs[i] = new_insn - new_prog;
+
+ switch (fp->code) {
+ /* All arithmetic insns and skb loads map as-is. */
+ case BPF_ALU | BPF_ADD | BPF_X:
+ case BPF_ALU | BPF_ADD | BPF_K:
+ case BPF_ALU | BPF_SUB | BPF_X:
+ case BPF_ALU | BPF_SUB | BPF_K:
+ case BPF_ALU | BPF_AND | BPF_X:
+ case BPF_ALU | BPF_AND | BPF_K:
+ case BPF_ALU | BPF_OR | BPF_X:
+ case BPF_ALU | BPF_OR | BPF_K:
+ case BPF_ALU | BPF_LSH | BPF_X:
+ case BPF_ALU | BPF_LSH | BPF_K:
+ case BPF_ALU | BPF_RSH | BPF_X:
+ case BPF_ALU | BPF_RSH | BPF_K:
+ case BPF_ALU | BPF_XOR | BPF_X:
+ case BPF_ALU | BPF_XOR | BPF_K:
+ case BPF_ALU | BPF_MUL | BPF_X:
+ case BPF_ALU | BPF_MUL | BPF_K:
+ case BPF_ALU | BPF_DIV | BPF_X:
+ case BPF_ALU | BPF_DIV | BPF_K:
+ case BPF_ALU | BPF_MOD | BPF_X:
+ case BPF_ALU | BPF_MOD | BPF_K:
+ case BPF_ALU | BPF_NEG:
+ case BPF_LD | BPF_ABS | BPF_W:
+ case BPF_LD | BPF_ABS | BPF_H:
+ case BPF_LD | BPF_ABS | BPF_B:
+ case BPF_LD | BPF_IND | BPF_W:
+ case BPF_LD | BPF_IND | BPF_H:
+ case BPF_LD | BPF_IND | BPF_B:
+ /* Check for overloaded BPF extension and
+ * directly convert it if found, otherwise
+ * just move on with mapping.
+ */
+ if (BPF_CLASS(fp->code) == BPF_LD &&
+ BPF_MODE(fp->code) == BPF_ABS &&
+ convert_bpf_extensions(fp, &insn))
+ break;
+
+ *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
+ break;
+
+ /* Jump transformation cannot use BPF block macros
+ * everywhere as offset calculation and target updates
+ * require a bit more work than the rest, i.e. jump
+ * opcodes map as-is, but offsets need adjustment.
+ */
+
+#define BPF_EMIT_JMP \
+ do { \
+ if (target >= len || target < 0) \
+ goto err; \
+ insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
+ /* Adjust pc relative offset for 2nd or 3rd insn. */ \
+ insn->off -= insn - tmp_insns; \
+ } while (0)
+
+ case BPF_JMP | BPF_JA:
+ target = i + fp->k + 1;
+ insn->code = fp->code;
+ BPF_EMIT_JMP;
+ break;
+
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP | BPF_JSET | BPF_K:
+ case BPF_JMP | BPF_JSET | BPF_X:
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
+ /* BPF immediates are signed, zero extend
+ * immediate into tmp register and use it
+ * in compare insn.
+ */
+ *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
+
+ insn->dst_reg = BPF_REG_A;
+ insn->src_reg = BPF_REG_TMP;
+ bpf_src = BPF_X;
+ } else {
+ insn->dst_reg = BPF_REG_A;
+ insn->src_reg = BPF_REG_X;
+ insn->imm = fp->k;
+ bpf_src = BPF_SRC(fp->code);
+ }
+
+ /* Common case where 'jump_false' is next insn. */
+ if (fp->jf == 0) {
+ insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
+ target = i + fp->jt + 1;
+ BPF_EMIT_JMP;
+ break;
+ }
+
+ /* Convert JEQ into JNE when 'jump_true' is next insn. */
+ if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
+ insn->code = BPF_JMP | BPF_JNE | bpf_src;
+ target = i + fp->jf + 1;
+ BPF_EMIT_JMP;
+ break;
+ }
+
+ /* Other jumps are mapped into two insns: Jxx and JA. */
+ target = i + fp->jt + 1;
+ insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
+ BPF_EMIT_JMP;
+ insn++;
+
+ insn->code = BPF_JMP | BPF_JA;
+ target = i + fp->jf + 1;
+ BPF_EMIT_JMP;
+ break;
+
+ /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
+ case BPF_LDX | BPF_MSH | BPF_B:
+ /* tmp = A */
+ *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A);
+ /* A = BPF_R0 = *(u8 *) (skb->data + K) */
+ *insn++ = BPF_LD_ABS(BPF_B, fp->k);
+ /* A &= 0xf */
+ *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
+ /* A <<= 2 */
+ *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
+ /* X = A */
+ *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
+ /* A = tmp */
+ *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
+ break;
+
+ /* RET_K, RET_A are remaped into 2 insns. */
+ case BPF_RET | BPF_A:
+ case BPF_RET | BPF_K:
+ *insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ?
+ BPF_K : BPF_X, BPF_REG_0,
+ BPF_REG_A, fp->k);
+ *insn = BPF_EXIT_INSN();
+ break;
+
+ /* Store to stack. */
+ case BPF_ST:
+ case BPF_STX:
+ *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
+ BPF_ST ? BPF_REG_A : BPF_REG_X,
+ -(BPF_MEMWORDS - fp->k) * 4);
+ break;
+
+ /* Load from stack. */
+ case BPF_LD | BPF_MEM:
+ case BPF_LDX | BPF_MEM:
+ *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
+ BPF_REG_A : BPF_REG_X, BPF_REG_FP,
+ -(BPF_MEMWORDS - fp->k) * 4);
+ break;
+
+ /* A = K or X = K */
+ case BPF_LD | BPF_IMM:
+ case BPF_LDX | BPF_IMM:
+ *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
+ BPF_REG_A : BPF_REG_X, fp->k);
+ break;
+
+ /* X = A */
+ case BPF_MISC | BPF_TAX:
+ *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
+ break;
+
+ /* A = X */
+ case BPF_MISC | BPF_TXA:
+ *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
+ break;
+
+ /* A = skb->len or X = skb->len */
+ case BPF_LD | BPF_W | BPF_LEN:
+ case BPF_LDX | BPF_W | BPF_LEN:
+ *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
+ BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
+ offsetof(struct sk_buff, len));
+ break;
+
+ /* Access seccomp_data fields. */
+ case BPF_LDX | BPF_ABS | BPF_W:
+ /* A = *(u32 *) (ctx + K) */
+ *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
+ break;
+
+ /* Unkown instruction. */
default:
- WARN_RATELIMIT(1, "Unknown code:%u jt:%u tf:%u k:%u\n",
- fentry->code, fentry->jt,
- fentry->jf, fentry->k);
- return 0;
+ goto err;
}
+
+ insn++;
+ if (new_prog)
+ memcpy(new_insn, tmp_insns,
+ sizeof(*insn) * (insn - tmp_insns));
+ new_insn += insn - tmp_insns;
+ }
+
+ if (!new_prog) {
+ /* Only calculating new length. */
+ *new_len = new_insn - new_prog;
+ return 0;
}
+ pass++;
+ if (new_flen != new_insn - new_prog) {
+ new_flen = new_insn - new_prog;
+ if (pass > 2)
+ goto err;
+ goto do_pass;
+ }
+
+ kfree(addrs);
+ BUG_ON(*new_len != new_flen);
return 0;
+err:
+ kfree(addrs);
+ return -EINVAL;
}
-EXPORT_SYMBOL(sk_run_filter);
-/*
- * Security :
+/* Security:
+ *
* A BPF program is able to use 16 cells of memory to store intermediate
- * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter())
+ * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()).
+ *
* As we dont want to clear mem[] array for each packet going through
* sk_run_filter(), we check that filter loaded by user never try to read
* a cell if not previously written, and we check all branches to be sure
@@ -413,44 +1096,46 @@ EXPORT_SYMBOL(sk_run_filter);
*/
static int check_load_and_stores(struct sock_filter *filter, int flen)
{
- u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
+ u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
int pc, ret = 0;
BUILD_BUG_ON(BPF_MEMWORDS > 16);
- masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
+
+ masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL);
if (!masks)
return -ENOMEM;
+
memset(masks, 0xff, flen * sizeof(*masks));
for (pc = 0; pc < flen; pc++) {
memvalid &= masks[pc];
switch (filter[pc].code) {
- case BPF_S_ST:
- case BPF_S_STX:
+ case BPF_ST:
+ case BPF_STX:
memvalid |= (1 << filter[pc].k);
break;
- case BPF_S_LD_MEM:
- case BPF_S_LDX_MEM:
+ case BPF_LD | BPF_MEM:
+ case BPF_LDX | BPF_MEM:
if (!(memvalid & (1 << filter[pc].k))) {
ret = -EINVAL;
goto error;
}
break;
- case BPF_S_JMP_JA:
- /* a jump must set masks on target */
+ case BPF_JMP | BPF_JA:
+ /* A jump must set masks on target */
masks[pc + 1 + filter[pc].k] &= memvalid;
memvalid = ~0;
break;
- case BPF_S_JMP_JEQ_K:
- case BPF_S_JMP_JEQ_X:
- case BPF_S_JMP_JGE_K:
- case BPF_S_JMP_JGE_X:
- case BPF_S_JMP_JGT_K:
- case BPF_S_JMP_JGT_X:
- case BPF_S_JMP_JSET_X:
- case BPF_S_JMP_JSET_K:
- /* a jump must set masks on targets */
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JSET | BPF_K:
+ case BPF_JMP | BPF_JSET | BPF_X:
+ /* A jump must set masks on targets */
masks[pc + 1 + filter[pc].jt] &= memvalid;
masks[pc + 1 + filter[pc].jf] &= memvalid;
memvalid = ~0;
@@ -462,6 +1147,72 @@ error:
return ret;
}
+static bool chk_code_allowed(u16 code_to_probe)
+{
+ static const bool codes[] = {
+ /* 32 bit ALU operations */
+ [BPF_ALU | BPF_ADD | BPF_K] = true,
+ [BPF_ALU | BPF_ADD | BPF_X] = true,
+ [BPF_ALU | BPF_SUB | BPF_K] = true,
+ [BPF_ALU | BPF_SUB | BPF_X] = true,
+ [BPF_ALU | BPF_MUL | BPF_K] = true,
+ [BPF_ALU | BPF_MUL | BPF_X] = true,
+ [BPF_ALU | BPF_DIV | BPF_K] = true,
+ [BPF_ALU | BPF_DIV | BPF_X] = true,
+ [BPF_ALU | BPF_MOD | BPF_K] = true,
+ [BPF_ALU | BPF_MOD | BPF_X] = true,
+ [BPF_ALU | BPF_AND | BPF_K] = true,
+ [BPF_ALU | BPF_AND | BPF_X] = true,
+ [BPF_ALU | BPF_OR | BPF_K] = true,
+ [BPF_ALU | BPF_OR | BPF_X] = true,
+ [BPF_ALU | BPF_XOR | BPF_K] = true,
+ [BPF_ALU | BPF_XOR | BPF_X] = true,
+ [BPF_ALU | BPF_LSH | BPF_K] = true,
+ [BPF_ALU | BPF_LSH | BPF_X] = true,
+ [BPF_ALU | BPF_RSH | BPF_K] = true,
+ [BPF_ALU | BPF_RSH | BPF_X] = true,
+ [BPF_ALU | BPF_NEG] = true,
+ /* Load instructions */
+ [BPF_LD | BPF_W | BPF_ABS] = true,
+ [BPF_LD | BPF_H | BPF_ABS] = true,
+ [BPF_LD | BPF_B | BPF_ABS] = true,
+ [BPF_LD | BPF_W | BPF_LEN] = true,
+ [BPF_LD | BPF_W | BPF_IND] = true,
+ [BPF_LD | BPF_H | BPF_IND] = true,
+ [BPF_LD | BPF_B | BPF_IND] = true,
+ [BPF_LD | BPF_IMM] = true,
+ [BPF_LD | BPF_MEM] = true,
+ [BPF_LDX | BPF_W | BPF_LEN] = true,
+ [BPF_LDX | BPF_B | BPF_MSH] = true,
+ [BPF_LDX | BPF_IMM] = true,
+ [BPF_LDX | BPF_MEM] = true,
+ /* Store instructions */
+ [BPF_ST] = true,
+ [BPF_STX] = true,
+ /* Misc instructions */
+ [BPF_MISC | BPF_TAX] = true,
+ [BPF_MISC | BPF_TXA] = true,
+ /* Return instructions */
+ [BPF_RET | BPF_K] = true,
+ [BPF_RET | BPF_A] = true,
+ /* Jump instructions */
+ [BPF_JMP | BPF_JA] = true,
+ [BPF_JMP | BPF_JEQ | BPF_K] = true,
+ [BPF_JMP | BPF_JEQ | BPF_X] = true,
+ [BPF_JMP | BPF_JGE | BPF_K] = true,
+ [BPF_JMP | BPF_JGE | BPF_X] = true,
+ [BPF_JMP | BPF_JGT | BPF_K] = true,
+ [BPF_JMP | BPF_JGT | BPF_X] = true,
+ [BPF_JMP | BPF_JSET | BPF_K] = true,
+ [BPF_JMP | BPF_JSET | BPF_X] = true,
+ };
+
+ if (code_to_probe >= ARRAY_SIZE(codes))
+ return false;
+
+ return codes[code_to_probe];
+}
+
/**
* sk_chk_filter - verify socket filter code
* @filter: filter to verify
@@ -478,187 +1229,303 @@ error:
*/
int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
{
- /*
- * Valid instructions are initialized to non-0.
- * Invalid instructions are initialized to 0.
- */
- static const u8 codes[] = {
- [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K,
- [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X,
- [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K,
- [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X,
- [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K,
- [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X,
- [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X,
- [BPF_ALU|BPF_MOD|BPF_K] = BPF_S_ALU_MOD_K,
- [BPF_ALU|BPF_MOD|BPF_X] = BPF_S_ALU_MOD_X,
- [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K,
- [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X,
- [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K,
- [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X,
- [BPF_ALU|BPF_XOR|BPF_K] = BPF_S_ALU_XOR_K,
- [BPF_ALU|BPF_XOR|BPF_X] = BPF_S_ALU_XOR_X,
- [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K,
- [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X,
- [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K,
- [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X,
- [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG,
- [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS,
- [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS,
- [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS,
- [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN,
- [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND,
- [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND,
- [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND,
- [BPF_LD|BPF_IMM] = BPF_S_LD_IMM,
- [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN,
- [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH,
- [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM,
- [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX,
- [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA,
- [BPF_RET|BPF_K] = BPF_S_RET_K,
- [BPF_RET|BPF_A] = BPF_S_RET_A,
- [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K,
- [BPF_LD|BPF_MEM] = BPF_S_LD_MEM,
- [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM,
- [BPF_ST] = BPF_S_ST,
- [BPF_STX] = BPF_S_STX,
- [BPF_JMP|BPF_JA] = BPF_S_JMP_JA,
- [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K,
- [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X,
- [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K,
- [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X,
- [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K,
- [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X,
- [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
- [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
- };
- int pc;
bool anc_found;
+ int pc;
if (flen == 0 || flen > BPF_MAXINSNS)
return -EINVAL;
- /* check the filter code now */
+ /* Check the filter code now */
for (pc = 0; pc < flen; pc++) {
struct sock_filter *ftest = &filter[pc];
- u16 code = ftest->code;
- if (code >= ARRAY_SIZE(codes))
- return -EINVAL;
- code = codes[code];
- if (!code)
+ /* May we actually operate on this code? */
+ if (!chk_code_allowed(ftest->code))
return -EINVAL;
+
/* Some instructions need special checks */
- switch (code) {
- case BPF_S_ALU_DIV_K:
- case BPF_S_ALU_MOD_K:
- /* check for division by zero */
+ switch (ftest->code) {
+ case BPF_ALU | BPF_DIV | BPF_K:
+ case BPF_ALU | BPF_MOD | BPF_K:
+ /* Check for division by zero */
if (ftest->k == 0)
return -EINVAL;
break;
- case BPF_S_LD_MEM:
- case BPF_S_LDX_MEM:
- case BPF_S_ST:
- case BPF_S_STX:
- /* check for invalid memory addresses */
+ case BPF_LD | BPF_MEM:
+ case BPF_LDX | BPF_MEM:
+ case BPF_ST:
+ case BPF_STX:
+ /* Check for invalid memory addresses */
if (ftest->k >= BPF_MEMWORDS)
return -EINVAL;
break;
- case BPF_S_JMP_JA:
- /*
- * Note, the large ftest->k might cause loops.
+ case BPF_JMP | BPF_JA:
+ /* Note, the large ftest->k might cause loops.
* Compare this with conditional jumps below,
* where offsets are limited. --ANK (981016)
*/
- if (ftest->k >= (unsigned int)(flen-pc-1))
+ if (ftest->k >= (unsigned int)(flen - pc - 1))
return -EINVAL;
break;
- case BPF_S_JMP_JEQ_K:
- case BPF_S_JMP_JEQ_X:
- case BPF_S_JMP_JGE_K:
- case BPF_S_JMP_JGE_X:
- case BPF_S_JMP_JGT_K:
- case BPF_S_JMP_JGT_X:
- case BPF_S_JMP_JSET_X:
- case BPF_S_JMP_JSET_K:
- /* for conditionals both must be safe */
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JSET | BPF_K:
+ case BPF_JMP | BPF_JSET | BPF_X:
+ /* Both conditionals must be safe */
if (pc + ftest->jt + 1 >= flen ||
pc + ftest->jf + 1 >= flen)
return -EINVAL;
break;
- case BPF_S_LD_W_ABS:
- case BPF_S_LD_H_ABS:
- case BPF_S_LD_B_ABS:
+ case BPF_LD | BPF_W | BPF_ABS:
+ case BPF_LD | BPF_H | BPF_ABS:
+ case BPF_LD | BPF_B | BPF_ABS:
anc_found = false;
-#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \
- code = BPF_S_ANC_##CODE; \
- anc_found = true; \
- break
- switch (ftest->k) {
- ANCILLARY(PROTOCOL);
- ANCILLARY(PKTTYPE);
- ANCILLARY(IFINDEX);
- ANCILLARY(NLATTR);
- ANCILLARY(NLATTR_NEST);
- ANCILLARY(MARK);
- ANCILLARY(QUEUE);
- ANCILLARY(HATYPE);
- ANCILLARY(RXHASH);
- ANCILLARY(CPU);
- ANCILLARY(ALU_XOR_X);
- ANCILLARY(VLAN_TAG);
- ANCILLARY(VLAN_TAG_PRESENT);
- ANCILLARY(PAY_OFFSET);
- }
-
- /* ancillary operation unknown or unsupported */
+ if (bpf_anc_helper(ftest) & BPF_ANC)
+ anc_found = true;
+ /* Ancillary operation unknown or unsupported */
if (anc_found == false && ftest->k >= SKF_AD_OFF)
return -EINVAL;
}
- ftest->code = code;
}
- /* last instruction must be a RET code */
+ /* Last instruction must be a RET code */
switch (filter[flen - 1].code) {
- case BPF_S_RET_K:
- case BPF_S_RET_A:
+ case BPF_RET | BPF_K:
+ case BPF_RET | BPF_A:
return check_load_and_stores(filter, flen);
}
+
return -EINVAL;
}
EXPORT_SYMBOL(sk_chk_filter);
+static int sk_store_orig_filter(struct sk_filter *fp,
+ const struct sock_fprog *fprog)
+{
+ unsigned int fsize = sk_filter_proglen(fprog);
+ struct sock_fprog_kern *fkprog;
+
+ fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
+ if (!fp->orig_prog)
+ return -ENOMEM;
+
+ fkprog = fp->orig_prog;
+ fkprog->len = fprog->len;
+ fkprog->filter = kmemdup(fp->insns, fsize, GFP_KERNEL);
+ if (!fkprog->filter) {
+ kfree(fp->orig_prog);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void sk_release_orig_filter(struct sk_filter *fp)
+{
+ struct sock_fprog_kern *fprog = fp->orig_prog;
+
+ if (fprog) {
+ kfree(fprog->filter);
+ kfree(fprog);
+ }
+}
+
/**
* sk_filter_release_rcu - Release a socket filter by rcu_head
* @rcu: rcu_head that contains the sk_filter to free
*/
-void sk_filter_release_rcu(struct rcu_head *rcu)
+static void sk_filter_release_rcu(struct rcu_head *rcu)
{
struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
+ sk_release_orig_filter(fp);
+ sk_filter_free(fp);
+}
+
+/**
+ * sk_filter_release - release a socket filter
+ * @fp: filter to remove
+ *
+ * Remove a filter from a socket and release its resources.
+ */
+static void sk_filter_release(struct sk_filter *fp)
+{
+ if (atomic_dec_and_test(&fp->refcnt))
+ call_rcu(&fp->rcu, sk_filter_release_rcu);
+}
+
+void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
+{
+ atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc);
+ sk_filter_release(fp);
+}
+
+void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
+{
+ atomic_inc(&fp->refcnt);
+ atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc);
+}
+
+static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp,
+ struct sock *sk,
+ unsigned int len)
+{
+ struct sk_filter *fp_new;
+
+ if (sk == NULL)
+ return krealloc(fp, len, GFP_KERNEL);
+
+ fp_new = sock_kmalloc(sk, len, GFP_KERNEL);
+ if (fp_new) {
+ *fp_new = *fp;
+ /* As we're keeping orig_prog in fp_new along,
+ * we need to make sure we're not evicting it
+ * from the old fp.
+ */
+ fp->orig_prog = NULL;
+ sk_filter_uncharge(sk, fp);
+ }
+
+ return fp_new;
+}
+
+static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
+ struct sock *sk)
+{
+ struct sock_filter *old_prog;
+ struct sk_filter *old_fp;
+ int err, new_len, old_len = fp->len;
+
+ /* We are free to overwrite insns et al right here as it
+ * won't be used at this point in time anymore internally
+ * after the migration to the internal BPF instruction
+ * representation.
+ */
+ BUILD_BUG_ON(sizeof(struct sock_filter) !=
+ sizeof(struct sock_filter_int));
+
+ /* Conversion cannot happen on overlapping memory areas,
+ * so we need to keep the user BPF around until the 2nd
+ * pass. At this time, the user BPF is stored in fp->insns.
+ */
+ old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
+ GFP_KERNEL);
+ if (!old_prog) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ /* 1st pass: calculate the new program length. */
+ err = sk_convert_filter(old_prog, old_len, NULL, &new_len);
+ if (err)
+ goto out_err_free;
+
+ /* Expand fp for appending the new filter representation. */
+ old_fp = fp;
+ fp = __sk_migrate_realloc(old_fp, sk, sk_filter_size(new_len));
+ if (!fp) {
+ /* The old_fp is still around in case we couldn't
+ * allocate new memory, so uncharge on that one.
+ */
+ fp = old_fp;
+ err = -ENOMEM;
+ goto out_err_free;
+ }
+
+ fp->len = new_len;
+
+ /* 2nd pass: remap sock_filter insns into sock_filter_int insns. */
+ err = sk_convert_filter(old_prog, old_len, fp->insnsi, &new_len);
+ if (err)
+ /* 2nd sk_convert_filter() can fail only if it fails
+ * to allocate memory, remapping must succeed. Note,
+ * that at this time old_fp has already been released
+ * by __sk_migrate_realloc().
+ */
+ goto out_err_free;
+
+ sk_filter_select_runtime(fp);
+
+ kfree(old_prog);
+ return fp;
+
+out_err_free:
+ kfree(old_prog);
+out_err:
+ /* Rollback filter setup. */
+ if (sk != NULL)
+ sk_filter_uncharge(sk, fp);
+ else
+ kfree(fp);
+ return ERR_PTR(err);
+}
+
+void __weak bpf_int_jit_compile(struct sk_filter *prog)
+{
+}
+
+/**
+ * sk_filter_select_runtime - select execution runtime for BPF program
+ * @fp: sk_filter populated with internal BPF program
+ *
+ * try to JIT internal BPF program, if JIT is not available select interpreter
+ * BPF program will be executed via SK_RUN_FILTER() macro
+ */
+void sk_filter_select_runtime(struct sk_filter *fp)
+{
+ fp->bpf_func = (void *) __sk_run_filter;
+
+ /* Probe if internal BPF can be JITed */
+ bpf_int_jit_compile(fp);
+}
+EXPORT_SYMBOL_GPL(sk_filter_select_runtime);
+
+/* free internal BPF program */
+void sk_filter_free(struct sk_filter *fp)
+{
bpf_jit_free(fp);
}
-EXPORT_SYMBOL(sk_filter_release_rcu);
+EXPORT_SYMBOL_GPL(sk_filter_free);
-static int __sk_prepare_filter(struct sk_filter *fp)
+static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
+ struct sock *sk)
{
int err;
- fp->bpf_func = sk_run_filter;
+ fp->bpf_func = NULL;
+ fp->jited = 0;
err = sk_chk_filter(fp->insns, fp->len);
- if (err)
- return err;
+ if (err) {
+ if (sk != NULL)
+ sk_filter_uncharge(sk, fp);
+ else
+ kfree(fp);
+ return ERR_PTR(err);
+ }
+ /* Probe if we can JIT compile the filter and if so, do
+ * the compilation of the filter.
+ */
bpf_jit_compile(fp);
- return 0;
+
+ /* JIT compiler couldn't process this filter, so do the
+ * internal BPF translation for the optimized interpreter.
+ */
+ if (!fp->jited)
+ fp = __sk_migrate_filter(fp, sk);
+
+ return fp;
}
/**
* sk_unattached_filter_create - create an unattached filter
- * @fprog: the filter program
* @pfp: the unattached filter that is created
+ * @fprog: the filter program
*
* Create a filter independent of any socket. We first run some
* sanity checks on it to make sure it does not explode on us later.
@@ -666,11 +1533,10 @@ static int __sk_prepare_filter(struct sk_filter *fp)
* a negative errno code is returned. On success the return is zero.
*/
int sk_unattached_filter_create(struct sk_filter **pfp,
- struct sock_fprog *fprog)
+ struct sock_fprog_kern *fprog)
{
+ unsigned int fsize = sk_filter_proglen(fprog);
struct sk_filter *fp;
- unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
- int err;
/* Make sure new filter is there and in the right amounts. */
if (fprog->filter == NULL)
@@ -679,20 +1545,26 @@ int sk_unattached_filter_create(struct sk_filter **pfp,
fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL);
if (!fp)
return -ENOMEM;
+
memcpy(fp->insns, fprog->filter, fsize);
atomic_set(&fp->refcnt, 1);
fp->len = fprog->len;
+ /* Since unattached filters are not copied back to user
+ * space through sk_get_filter(), we do not need to hold
+ * a copy here, and can spare us the work.
+ */
+ fp->orig_prog = NULL;
- err = __sk_prepare_filter(fp);
- if (err)
- goto free_mem;
+ /* __sk_prepare_filter() already takes care of uncharging
+ * memory in case something goes wrong.
+ */
+ fp = __sk_prepare_filter(fp, NULL);
+ if (IS_ERR(fp))
+ return PTR_ERR(fp);
*pfp = fp;
return 0;
-free_mem:
- kfree(fp);
- return err;
}
EXPORT_SYMBOL_GPL(sk_unattached_filter_create);
@@ -715,7 +1587,7 @@ EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy);
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
struct sk_filter *fp, *old_fp;
- unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
+ unsigned int fsize = sk_filter_proglen(fprog);
unsigned int sk_fsize = sk_filter_size(fprog->len);
int err;
@@ -729,6 +1601,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL);
if (!fp)
return -ENOMEM;
+
if (copy_from_user(fp->insns, fprog->filter, fsize)) {
sock_kfree_s(sk, fp, sk_fsize);
return -EFAULT;
@@ -737,18 +1610,26 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
atomic_set(&fp->refcnt, 1);
fp->len = fprog->len;
- err = __sk_prepare_filter(fp);
+ err = sk_store_orig_filter(fp, fprog);
if (err) {
sk_filter_uncharge(sk, fp);
- return err;
+ return -ENOMEM;
}
+ /* __sk_prepare_filter() already takes care of uncharging
+ * memory in case something goes wrong.
+ */
+ fp = __sk_prepare_filter(fp, sk);
+ if (IS_ERR(fp))
+ return PTR_ERR(fp);
+
old_fp = rcu_dereference_protected(sk->sk_filter,
sock_owned_by_user(sk));
rcu_assign_pointer(sk->sk_filter, fp);
if (old_fp)
sk_filter_uncharge(sk, old_fp);
+
return 0;
}
EXPORT_SYMBOL_GPL(sk_attach_filter);
@@ -768,116 +1649,46 @@ int sk_detach_filter(struct sock *sk)
sk_filter_uncharge(sk, filter);
ret = 0;
}
+
return ret;
}
EXPORT_SYMBOL_GPL(sk_detach_filter);
-void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
-{
- static const u16 decodes[] = {
- [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K,
- [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X,
- [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K,
- [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X,
- [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K,
- [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X,
- [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X,
- [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K,
- [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X,
- [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K,
- [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X,
- [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K,
- [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X,
- [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K,
- [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X,
- [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K,
- [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X,
- [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K,
- [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X,
- [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG,
- [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS,
- [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS,
- [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN,
- [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND,
- [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND,
- [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND,
- [BPF_S_LD_IMM] = BPF_LD|BPF_IMM,
- [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN,
- [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH,
- [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM,
- [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX,
- [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA,
- [BPF_S_RET_K] = BPF_RET|BPF_K,
- [BPF_S_RET_A] = BPF_RET|BPF_A,
- [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K,
- [BPF_S_LD_MEM] = BPF_LD|BPF_MEM,
- [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM,
- [BPF_S_ST] = BPF_ST,
- [BPF_S_STX] = BPF_STX,
- [BPF_S_JMP_JA] = BPF_JMP|BPF_JA,
- [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K,
- [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X,
- [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K,
- [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X,
- [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K,
- [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X,
- [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K,
- [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X,
- };
- u16 code;
-
- code = filt->code;
-
- to->code = decodes[code];
- to->jt = filt->jt;
- to->jf = filt->jf;
- to->k = filt->k;
-}
-
-int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len)
+int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
+ unsigned int len)
{
+ struct sock_fprog_kern *fprog;
struct sk_filter *filter;
- int i, ret;
+ int ret = 0;
lock_sock(sk);
filter = rcu_dereference_protected(sk->sk_filter,
- sock_owned_by_user(sk));
- ret = 0;
+ sock_owned_by_user(sk));
if (!filter)
goto out;
- ret = filter->len;
+
+ /* We're copying the filter that has been originally attached,
+ * so no conversion/decode needed anymore.
+ */
+ fprog = filter->orig_prog;
+
+ ret = fprog->len;
if (!len)
+ /* User space only enquires number of filter blocks. */
goto out;
+
ret = -EINVAL;
- if (len < filter->len)
+ if (len < fprog->len)
goto out;
ret = -EFAULT;
- for (i = 0; i < filter->len; i++) {
- struct sock_filter fb;
-
- sk_decode_filter(&filter->insns[i], &fb);
- if (copy_to_user(&ubuf[i], &fb, sizeof(fb)))
- goto out;
- }
+ if (copy_to_user(ubuf, fprog->filter, sk_filter_proglen(fprog)))
+ goto out;
- ret = filter->len;
+ /* Instead of bytes, the API requests to return the number
+ * of filter blocks.
+ */
+ ret = fprog->len;
out:
release_sock(sk);
return ret;
diff --git a/net/core/flow.c b/net/core/flow.c
index dfa602ceb8c..a0348fde1fd 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -24,6 +24,7 @@
#include <net/flow.h>
#include <linux/atomic.h>
#include <linux/security.h>
+#include <net/net_namespace.h>
struct flow_cache_entry {
union {
@@ -38,37 +39,14 @@ struct flow_cache_entry {
struct flow_cache_object *object;
};
-struct flow_cache_percpu {
- struct hlist_head *hash_table;
- int hash_count;
- u32 hash_rnd;
- int hash_rnd_recalc;
- struct tasklet_struct flush_tasklet;
-};
-
struct flow_flush_info {
struct flow_cache *cache;
atomic_t cpuleft;
struct completion completion;
};
-struct flow_cache {
- u32 hash_shift;
- struct flow_cache_percpu __percpu *percpu;
- struct notifier_block hotcpu_notifier;
- int low_watermark;
- int high_watermark;
- struct timer_list rnd_timer;
-};
-
-atomic_t flow_cache_genid = ATOMIC_INIT(0);
-EXPORT_SYMBOL(flow_cache_genid);
-static struct flow_cache flow_cache_global;
static struct kmem_cache *flow_cachep __read_mostly;
-static DEFINE_SPINLOCK(flow_cache_gc_lock);
-static LIST_HEAD(flow_cache_gc_list);
-
#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift)
#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
@@ -84,16 +62,18 @@ static void flow_cache_new_hashrnd(unsigned long arg)
add_timer(&fc->rnd_timer);
}
-static int flow_entry_valid(struct flow_cache_entry *fle)
+static int flow_entry_valid(struct flow_cache_entry *fle,
+ struct netns_xfrm *xfrm)
{
- if (atomic_read(&flow_cache_genid) != fle->genid)
+ if (atomic_read(&xfrm->flow_cache_genid) != fle->genid)
return 0;
if (fle->object && !fle->object->ops->check(fle->object))
return 0;
return 1;
}
-static void flow_entry_kill(struct flow_cache_entry *fle)
+static void flow_entry_kill(struct flow_cache_entry *fle,
+ struct netns_xfrm *xfrm)
{
if (fle->object)
fle->object->ops->delete(fle->object);
@@ -104,26 +84,28 @@ static void flow_cache_gc_task(struct work_struct *work)
{
struct list_head gc_list;
struct flow_cache_entry *fce, *n;
+ struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
+ flow_cache_gc_work);
INIT_LIST_HEAD(&gc_list);
- spin_lock_bh(&flow_cache_gc_lock);
- list_splice_tail_init(&flow_cache_gc_list, &gc_list);
- spin_unlock_bh(&flow_cache_gc_lock);
+ spin_lock_bh(&xfrm->flow_cache_gc_lock);
+ list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list);
+ spin_unlock_bh(&xfrm->flow_cache_gc_lock);
list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)
- flow_entry_kill(fce);
+ flow_entry_kill(fce, xfrm);
}
-static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task);
static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
- int deleted, struct list_head *gc_list)
+ int deleted, struct list_head *gc_list,
+ struct netns_xfrm *xfrm)
{
if (deleted) {
fcp->hash_count -= deleted;
- spin_lock_bh(&flow_cache_gc_lock);
- list_splice_tail(gc_list, &flow_cache_gc_list);
- spin_unlock_bh(&flow_cache_gc_lock);
- schedule_work(&flow_cache_gc_work);
+ spin_lock_bh(&xfrm->flow_cache_gc_lock);
+ list_splice_tail(gc_list, &xfrm->flow_cache_gc_list);
+ spin_unlock_bh(&xfrm->flow_cache_gc_lock);
+ schedule_work(&xfrm->flow_cache_gc_work);
}
}
@@ -135,6 +117,8 @@ static void __flow_cache_shrink(struct flow_cache *fc,
struct hlist_node *tmp;
LIST_HEAD(gc_list);
int i, deleted = 0;
+ struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
+ flow_cache_global);
for (i = 0; i < flow_cache_hash_size(fc); i++) {
int saved = 0;
@@ -142,7 +126,7 @@ static void __flow_cache_shrink(struct flow_cache *fc,
hlist_for_each_entry_safe(fle, tmp,
&fcp->hash_table[i], u.hlist) {
if (saved < shrink_to &&
- flow_entry_valid(fle)) {
+ flow_entry_valid(fle, xfrm)) {
saved++;
} else {
deleted++;
@@ -152,7 +136,7 @@ static void __flow_cache_shrink(struct flow_cache *fc,
}
}
- flow_cache_queue_garbage(fcp, deleted, &gc_list);
+ flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
}
static void flow_cache_shrink(struct flow_cache *fc,
@@ -208,7 +192,7 @@ struct flow_cache_object *
flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
flow_resolve_t resolver, void *ctx)
{
- struct flow_cache *fc = &flow_cache_global;
+ struct flow_cache *fc = &net->xfrm.flow_cache_global;
struct flow_cache_percpu *fcp;
struct flow_cache_entry *fle, *tfle;
struct flow_cache_object *flo;
@@ -258,7 +242,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
fcp->hash_count++;
}
- } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
+ } else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) {
flo = fle->object;
if (!flo)
goto ret_object;
@@ -279,7 +263,7 @@ nocache:
}
flo = resolver(net, key, family, dir, flo, ctx);
if (fle) {
- fle->genid = atomic_read(&flow_cache_genid);
+ fle->genid = atomic_read(&net->xfrm.flow_cache_genid);
if (!IS_ERR(flo))
fle->object = flo;
else
@@ -303,12 +287,14 @@ static void flow_cache_flush_tasklet(unsigned long data)
struct hlist_node *tmp;
LIST_HEAD(gc_list);
int i, deleted = 0;
+ struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
+ flow_cache_global);
fcp = this_cpu_ptr(fc->percpu);
for (i = 0; i < flow_cache_hash_size(fc); i++) {
hlist_for_each_entry_safe(fle, tmp,
&fcp->hash_table[i], u.hlist) {
- if (flow_entry_valid(fle))
+ if (flow_entry_valid(fle, xfrm))
continue;
deleted++;
@@ -317,7 +303,7 @@ static void flow_cache_flush_tasklet(unsigned long data)
}
}
- flow_cache_queue_garbage(fcp, deleted, &gc_list);
+ flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
if (atomic_dec_and_test(&info->cpuleft))
complete(&info->completion);
@@ -351,10 +337,9 @@ static void flow_cache_flush_per_cpu(void *data)
tasklet_schedule(tasklet);
}
-void flow_cache_flush(void)
+void flow_cache_flush(struct net *net)
{
struct flow_flush_info info;
- static DEFINE_MUTEX(flow_flush_sem);
cpumask_var_t mask;
int i, self;
@@ -365,8 +350,8 @@ void flow_cache_flush(void)
/* Don't want cpus going down or up during this. */
get_online_cpus();
- mutex_lock(&flow_flush_sem);
- info.cache = &flow_cache_global;
+ mutex_lock(&net->xfrm.flow_flush_sem);
+ info.cache = &net->xfrm.flow_cache_global;
for_each_online_cpu(i)
if (!flow_cache_percpu_empty(info.cache, i))
cpumask_set_cpu(i, mask);
@@ -386,21 +371,23 @@ void flow_cache_flush(void)
wait_for_completion(&info.completion);
done:
- mutex_unlock(&flow_flush_sem);
+ mutex_unlock(&net->xfrm.flow_flush_sem);
put_online_cpus();
free_cpumask_var(mask);
}
static void flow_cache_flush_task(struct work_struct *work)
{
- flow_cache_flush();
-}
+ struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
+ flow_cache_gc_work);
+ struct net *net = container_of(xfrm, struct net, xfrm);
-static DECLARE_WORK(flow_cache_flush_work, flow_cache_flush_task);
+ flow_cache_flush(net);
+}
-void flow_cache_flush_deferred(void)
+void flow_cache_flush_deferred(struct net *net)
{
- schedule_work(&flow_cache_flush_work);
+ schedule_work(&net->xfrm.flow_cache_flush_work);
}
static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
@@ -425,7 +412,8 @@ static int flow_cache_cpu(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
- struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
+ struct flow_cache *fc = container_of(nfb, struct flow_cache,
+ hotcpu_notifier);
int res, cpu = (unsigned long) hcpu;
struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
@@ -444,9 +432,20 @@ static int flow_cache_cpu(struct notifier_block *nfb,
return NOTIFY_OK;
}
-static int __init flow_cache_init(struct flow_cache *fc)
+int flow_cache_init(struct net *net)
{
int i;
+ struct flow_cache *fc = &net->xfrm.flow_cache_global;
+
+ if (!flow_cachep)
+ flow_cachep = kmem_cache_create("flow_cache",
+ sizeof(struct flow_cache_entry),
+ 0, SLAB_PANIC, NULL);
+ spin_lock_init(&net->xfrm.flow_cache_gc_lock);
+ INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list);
+ INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task);
+ INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task);
+ mutex_init(&net->xfrm.flow_flush_sem);
fc->hash_shift = 10;
fc->low_watermark = 2 * flow_cache_hash_size(fc);
@@ -456,6 +455,8 @@ static int __init flow_cache_init(struct flow_cache *fc)
if (!fc->percpu)
return -ENOMEM;
+ cpu_notifier_register_begin();
+
for_each_online_cpu(i) {
if (flow_cache_cpu_prepare(fc, i))
goto err;
@@ -463,7 +464,9 @@ static int __init flow_cache_init(struct flow_cache *fc)
fc->hotcpu_notifier = (struct notifier_block){
.notifier_call = flow_cache_cpu,
};
- register_hotcpu_notifier(&fc->hotcpu_notifier);
+ __register_hotcpu_notifier(&fc->hotcpu_notifier);
+
+ cpu_notifier_register_done();
setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
(unsigned long) fc);
@@ -479,19 +482,30 @@ err:
fcp->hash_table = NULL;
}
+ cpu_notifier_register_done();
+
free_percpu(fc->percpu);
fc->percpu = NULL;
return -ENOMEM;
}
+EXPORT_SYMBOL(flow_cache_init);
-static int __init flow_cache_init_global(void)
+void flow_cache_fini(struct net *net)
{
- flow_cachep = kmem_cache_create("flow_cache",
- sizeof(struct flow_cache_entry),
- 0, SLAB_PANIC, NULL);
+ int i;
+ struct flow_cache *fc = &net->xfrm.flow_cache_global;
- return flow_cache_init(&flow_cache_global);
-}
+ del_timer_sync(&fc->rnd_timer);
+ unregister_hotcpu_notifier(&fc->hotcpu_notifier);
+
+ for_each_possible_cpu(i) {
+ struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
+ kfree(fcp->hash_table);
+ fcp->hash_table = NULL;
+ }
-module_init(flow_cache_init_global);
+ free_percpu(fc->percpu);
+ fc->percpu = NULL;
+}
+EXPORT_SYMBOL(flow_cache_fini);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index e29e810663d..107ed12a532 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -61,7 +61,7 @@ bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
again:
switch (proto) {
- case __constant_htons(ETH_P_IP): {
+ case htons(ETH_P_IP): {
const struct iphdr *iph;
struct iphdr _iph;
ip:
@@ -77,7 +77,7 @@ ip:
iph_to_flow_copy_addrs(flow, iph);
break;
}
- case __constant_htons(ETH_P_IPV6): {
+ case htons(ETH_P_IPV6): {
const struct ipv6hdr *iph;
struct ipv6hdr _iph;
ipv6:
@@ -91,8 +91,8 @@ ipv6:
nhoff += sizeof(struct ipv6hdr);
break;
}
- case __constant_htons(ETH_P_8021AD):
- case __constant_htons(ETH_P_8021Q): {
+ case htons(ETH_P_8021AD):
+ case htons(ETH_P_8021Q): {
const struct vlan_hdr *vlan;
struct vlan_hdr _vlan;
@@ -104,7 +104,7 @@ ipv6:
nhoff += sizeof(*vlan);
goto again;
}
- case __constant_htons(ETH_P_PPP_SES): {
+ case htons(ETH_P_PPP_SES): {
struct {
struct pppoe_hdr hdr;
__be16 proto;
@@ -115,9 +115,9 @@ ipv6:
proto = hdr->proto;
nhoff += PPPOE_SES_HLEN;
switch (proto) {
- case __constant_htons(PPP_IP):
+ case htons(PPP_IP):
goto ip;
- case __constant_htons(PPP_IPV6):
+ case htons(PPP_IPV6):
goto ipv6;
default:
return false;
@@ -203,8 +203,8 @@ static __always_inline u32 __flow_hash_1word(u32 a)
/*
* __skb_get_hash: calculate a flow hash based on src/dst addresses
- * and src/dst port numbers. Sets rxhash in skb to non-zero hash value
- * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb
+ * and src/dst port numbers. Sets hash in skb to non-zero hash value
+ * on success, zero indicates no valid hash. Also, sets l4_hash in skb
* if hash is a canonical 4-tuple hash over transport ports.
*/
void __skb_get_hash(struct sk_buff *skb)
@@ -216,7 +216,7 @@ void __skb_get_hash(struct sk_buff *skb)
return;
if (keys.ports)
- skb->l4_rxhash = 1;
+ skb->l4_hash = 1;
/* get a consistent hash (same value on both flow directions) */
if (((__force u32)keys.dst < (__force u32)keys.src) ||
@@ -232,7 +232,7 @@ void __skb_get_hash(struct sk_buff *skb)
if (!hash)
hash = 1;
- skb->rxhash = hash;
+ skb->hash = hash;
}
EXPORT_SYMBOL(__skb_get_hash);
@@ -344,7 +344,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
hash = skb->sk->sk_hash;
else
hash = (__force u16) skb->protocol ^
- skb->rxhash;
+ skb->hash;
hash = __flow_hash_1word(hash);
queue_index = map->queues[
((u64)hash * map->len) >> 32];
diff --git a/net/core/iovec.c b/net/core/iovec.c
index b61869429f4..e1ec45ab1e6 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -39,7 +39,7 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a
{
int size, ct, err;
- if (m->msg_namelen) {
+ if (m->msg_name && m->msg_namelen) {
if (mode == VERIFY_READ) {
void __user *namep;
namep = (void __user __force *) m->msg_name;
@@ -48,10 +48,10 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a
if (err < 0)
return err;
}
- if (m->msg_name)
- m->msg_name = address;
+ m->msg_name = address;
} else {
m->msg_name = NULL;
+ m->msg_namelen = 0;
}
size = m->msg_iovlen * sizeof(struct iovec);
@@ -75,61 +75,6 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a
}
/*
- * Copy kernel to iovec. Returns -EFAULT on error.
- */
-
-int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
- int offset, int len)
-{
- int copy;
- for (; len > 0; ++iov) {
- /* Skip over the finished iovecs */
- if (unlikely(offset >= iov->iov_len)) {
- offset -= iov->iov_len;
- continue;
- }
- copy = min_t(unsigned int, iov->iov_len - offset, len);
- if (copy_to_user(iov->iov_base + offset, kdata, copy))
- return -EFAULT;
- offset = 0;
- kdata += copy;
- len -= copy;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(memcpy_toiovecend);
-
-/*
- * Copy iovec to kernel. Returns -EFAULT on error.
- */
-
-int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
- int offset, int len)
-{
- /* Skip over the finished iovecs */
- while (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- iov++;
- }
-
- while (len > 0) {
- u8 __user *base = iov->iov_base + offset;
- int copy = min_t(unsigned int, len, iov->iov_len - offset);
-
- offset = 0;
- if (copy_from_user(kdata, base, copy))
- return -EFAULT;
- len -= copy;
- kdata += copy;
- iov++;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(memcpy_fromiovecend);
-
-/*
* And now for the all-in-one: copy and checksum from a user iovec
* directly to a datagram
* Calls to csum_partial but the last must be in 32 bit chunks
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 9c3a839322b..bd0767e6b2b 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -147,7 +147,7 @@ static void linkwatch_do_dev(struct net_device *dev)
* Make sure the above read is complete since it can be
* rewritten as soon as we clear the bit below.
*/
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
/* We are about to handle this device,
* so new events can be accepted
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index e16129019c6..ef31fef25e5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -836,10 +836,10 @@ out:
static __inline__ int neigh_max_probes(struct neighbour *n)
{
struct neigh_parms *p = n->parms;
- return (n->nud_state & NUD_PROBE) ?
- NEIGH_VAR(p, UCAST_PROBES) :
- NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
- NEIGH_VAR(p, MCAST_PROBES);
+ int max_probes = NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES);
+ if (!(n->nud_state & NUD_PROBE))
+ max_probes += NEIGH_VAR(p, MCAST_PROBES);
+ return max_probes;
}
static void neigh_invalidate(struct neighbour *neigh)
@@ -945,6 +945,7 @@ static void neigh_timer_handler(unsigned long arg)
neigh->nud_state = NUD_FAILED;
notify = 1;
neigh_invalidate(neigh);
+ goto out;
}
if (neigh->nud_state & NUD_IN_TIMER) {
@@ -1247,8 +1248,8 @@ void __neigh_set_probe_once(struct neighbour *neigh)
neigh->updated = jiffies;
if (!(neigh->nud_state & NUD_FAILED))
return;
- neigh->nud_state = NUD_PROBE;
- atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES));
+ neigh->nud_state = NUD_INCOMPLETE;
+ atomic_set(&neigh->probes, neigh_max_probes(neigh));
neigh_add_timer(neigh,
jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
}
@@ -2248,7 +2249,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
ndm->ndm_pad1 = 0;
ndm->ndm_pad2 = 0;
ndm->ndm_flags = pn->flags | NTF_PROXY;
- ndm->ndm_type = NDA_DST;
+ ndm->ndm_type = RTN_UNICAST;
ndm->ndm_ifindex = pn->dev->ifindex;
ndm->ndm_state = NUD_NONE;
@@ -3058,11 +3059,12 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
} else {
+ struct neigh_table *tbl = p->tbl;
dev_name_source = "default";
- t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
- t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
- t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
- t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
+ t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
+ t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
+ t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
+ t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
}
if (handler) {
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 93886246a0b..1cac29ebb05 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -104,6 +104,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
}
NETDEVICE_SHOW_RO(dev_id, fmt_hex);
+NETDEVICE_SHOW_RO(dev_port, fmt_dec);
NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec);
NETDEVICE_SHOW_RO(addr_len, fmt_dec);
NETDEVICE_SHOW_RO(iflink, fmt_dec);
@@ -252,6 +253,16 @@ static ssize_t operstate_show(struct device *dev,
}
static DEVICE_ATTR_RO(operstate);
+static ssize_t carrier_changes_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_device *netdev = to_net_dev(dev);
+ return sprintf(buf, fmt_dec,
+ atomic_read(&netdev->carrier_changes));
+}
+static DEVICE_ATTR_RO(carrier_changes);
+
/* read-write attributes */
static int change_mtu(struct net_device *net, unsigned long new_mtu)
@@ -373,6 +384,7 @@ static struct attribute *net_class_attrs[] = {
&dev_attr_netdev_group.attr,
&dev_attr_type.attr,
&dev_attr_dev_id.attr,
+ &dev_attr_dev_port.attr,
&dev_attr_iflink.attr,
&dev_attr_ifindex.attr,
&dev_attr_addr_assign_type.attr,
@@ -384,6 +396,7 @@ static struct attribute *net_class_attrs[] = {
&dev_attr_duplex.attr,
&dev_attr_dormant.attr,
&dev_attr_operstate.attr,
+ &dev_attr_carrier_changes.attr,
&dev_attr_ifalias.attr,
&dev_attr_carrier.attr,
&dev_attr_mtu.attr,
@@ -789,7 +802,7 @@ exit:
kobject_put(kobj);
return error;
}
-#endif /* CONFIG_SYFS */
+#endif /* CONFIG_SYSFS */
int
net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
@@ -996,15 +1009,12 @@ static struct attribute_group dql_group = {
#endif /* CONFIG_BQL */
#ifdef CONFIG_XPS
-static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
+static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
{
struct net_device *dev = queue->dev;
- int i;
-
- for (i = 0; i < dev->num_tx_queues; i++)
- if (queue == &dev->_tx[i])
- break;
+ unsigned int i;
+ i = queue - dev->_tx;
BUG_ON(i >= dev->num_tx_queues);
return i;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 81d3a9a0845..85b62691f4f 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -24,7 +24,7 @@
static LIST_HEAD(pernet_list);
static struct list_head *first_device = &pernet_list;
-static DEFINE_MUTEX(net_mutex);
+DEFINE_MUTEX(net_mutex);
LIST_HEAD(net_namespace_list);
EXPORT_SYMBOL_GPL(net_namespace_list);
@@ -273,7 +273,7 @@ static void cleanup_net(struct work_struct *work)
{
const struct pernet_operations *ops;
struct net *net, *tmp;
- LIST_HEAD(net_kill_list);
+ struct list_head net_kill_list;
LIST_HEAD(net_exit_list);
/* Atomically snapshot the list of namespaces to cleanup */
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 719efd54166..30d903b19c6 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -23,7 +23,7 @@ static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state
struct cgroup_cls_state *task_cls_state(struct task_struct *p)
{
- return css_cls_state(task_css(p, net_cls_subsys_id));
+ return css_cls_state(task_css(p, net_cls_cgrp_id));
}
EXPORT_SYMBOL_GPL(task_cls_state);
@@ -42,7 +42,7 @@ cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
static int cgrp_css_online(struct cgroup_subsys_state *css)
{
struct cgroup_cls_state *cs = css_cls_state(css);
- struct cgroup_cls_state *parent = css_cls_state(css_parent(css));
+ struct cgroup_cls_state *parent = css_cls_state(css->parent);
if (parent)
cs->classid = parent->classid;
@@ -73,7 +73,7 @@ static void cgrp_attach(struct cgroup_subsys_state *css,
void *v = (void *)(unsigned long)cs->classid;
struct task_struct *p;
- cgroup_taskset_for_each(p, css, tset) {
+ cgroup_taskset_for_each(p, tset) {
task_lock(p);
iterate_fd(p->files, 0, update_classid, v);
task_unlock(p);
@@ -102,19 +102,10 @@ static struct cftype ss_files[] = {
{ } /* terminate */
};
-struct cgroup_subsys net_cls_subsys = {
- .name = "net_cls",
+struct cgroup_subsys net_cls_cgrp_subsys = {
.css_alloc = cgrp_css_alloc,
.css_online = cgrp_css_online,
.css_free = cgrp_css_free,
.attach = cgrp_attach,
- .subsys_id = net_cls_subsys_id,
.base_cftypes = ss_files,
- .module = THIS_MODULE,
};
-
-static int __init init_netclassid_cgroup(void)
-{
- return cgroup_load_subsys(&net_cls_subsys);
-}
-__initcall(init_netclassid_cgroup);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index df9e6b1a975..e33937fb32a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -46,13 +46,9 @@
static struct sk_buff_head skb_pool;
-static atomic_t trapped;
-
DEFINE_STATIC_SRCU(netpoll_srcu);
#define USEC_PER_POLL 50
-#define NETPOLL_RX_ENABLED 1
-#define NETPOLL_RX_DROP 2
#define MAX_SKB_SIZE \
(sizeof(struct ethhdr) + \
@@ -61,7 +57,6 @@ DEFINE_STATIC_SRCU(netpoll_srcu);
MAX_UDP_CHUNK)
static void zap_completion_queue(void);
-static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
static void netpoll_async_cleanup(struct work_struct *work);
static unsigned int carrier_timeout = 4;
@@ -74,6 +69,37 @@ module_param(carrier_timeout, uint, 0644);
#define np_notice(np, fmt, ...) \
pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)
+static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
+ struct netdev_queue *txq)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ int status = NETDEV_TX_OK;
+ netdev_features_t features;
+
+ features = netif_skb_features(skb);
+
+ if (vlan_tx_tag_present(skb) &&
+ !vlan_hw_offload_capable(features, skb->vlan_proto)) {
+ skb = __vlan_put_tag(skb, skb->vlan_proto,
+ vlan_tx_tag_get(skb));
+ if (unlikely(!skb)) {
+ /* This is actually a packet drop, but we
+ * don't want the code that calls this
+ * function to try and operate on a NULL skb.
+ */
+ goto out;
+ }
+ skb->vlan_tci = 0;
+ }
+
+ status = ops->ndo_start_xmit(skb, dev);
+ if (status == NETDEV_TX_OK)
+ txq_trans_update(txq);
+
+out:
+ return status;
+}
+
static void queue_process(struct work_struct *work)
{
struct netpoll_info *npinfo =
@@ -83,51 +109,31 @@ static void queue_process(struct work_struct *work)
while ((skb = skb_dequeue(&npinfo->txq))) {
struct net_device *dev = skb->dev;
- const struct net_device_ops *ops = dev->netdev_ops;
struct netdev_queue *txq;
if (!netif_device_present(dev) || !netif_running(dev)) {
- __kfree_skb(skb);
+ kfree_skb(skb);
continue;
}
txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
local_irq_save(flags);
- __netif_tx_lock(txq, smp_processor_id());
+ HARD_TX_LOCK(dev, txq, smp_processor_id());
if (netif_xmit_frozen_or_stopped(txq) ||
- ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
+ netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) {
skb_queue_head(&npinfo->txq, skb);
- __netif_tx_unlock(txq);
+ HARD_TX_UNLOCK(dev, txq);
local_irq_restore(flags);
schedule_delayed_work(&npinfo->tx_work, HZ/10);
return;
}
- __netif_tx_unlock(txq);
+ HARD_TX_UNLOCK(dev, txq);
local_irq_restore(flags);
}
}
-static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
- unsigned short ulen, __be32 saddr, __be32 daddr)
-{
- __wsum psum;
-
- if (uh->check == 0 || skb_csum_unnecessary(skb))
- return 0;
-
- psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
-
- if (skb->ip_summed == CHECKSUM_COMPLETE &&
- !csum_fold(csum_add(psum, skb->csum)))
- return 0;
-
- skb->csum = psum;
-
- return __skb_checksum_complete(skb);
-}
-
/*
* Check whether delayed processing was scheduled for our NIC. If so,
* we attempt to grab the poll lock and use ->poll() to pump the card.
@@ -138,14 +144,8 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
* trylock here and interrupts are already disabled in the softirq
* case. Further, we test the poll_owner to avoid recursion on UP
* systems where the lock doesn't exist.
- *
- * In cases where there is bi-directional communications, reading only
- * one message at a time can lead to packets being dropped by the
- * network adapter, forcing superfluous retries and possibly timeouts.
- * Thus, we set our budget to greater than 1.
*/
-static int poll_one_napi(struct netpoll_info *npinfo,
- struct napi_struct *napi, int budget)
+static int poll_one_napi(struct napi_struct *napi, int budget)
{
int work;
@@ -156,52 +156,35 @@ static int poll_one_napi(struct netpoll_info *npinfo,
if (!test_bit(NAPI_STATE_SCHED, &napi->state))
return budget;
- npinfo->rx_flags |= NETPOLL_RX_DROP;
- atomic_inc(&trapped);
set_bit(NAPI_STATE_NPSVC, &napi->state);
work = napi->poll(napi, budget);
+ WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll);
trace_napi_poll(napi);
clear_bit(NAPI_STATE_NPSVC, &napi->state);
- atomic_dec(&trapped);
- npinfo->rx_flags &= ~NETPOLL_RX_DROP;
return budget - work;
}
-static void poll_napi(struct net_device *dev)
+static void poll_napi(struct net_device *dev, int budget)
{
struct napi_struct *napi;
- int budget = 16;
list_for_each_entry(napi, &dev->napi_list, dev_list) {
if (napi->poll_owner != smp_processor_id() &&
spin_trylock(&napi->poll_lock)) {
- budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
- napi, budget);
+ budget = poll_one_napi(napi, budget);
spin_unlock(&napi->poll_lock);
-
- if (!budget)
- break;
}
}
}
-static void service_neigh_queue(struct netpoll_info *npi)
-{
- if (npi) {
- struct sk_buff *skb;
-
- while ((skb = skb_dequeue(&npi->neigh_tx)))
- netpoll_neigh_reply(skb, npi);
- }
-}
-
static void netpoll_poll_dev(struct net_device *dev)
{
const struct net_device_ops *ops;
struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
+ int budget = 0;
/* Don't do any rx activity if the dev_lock mutex is held
* the dev_open/close paths use this to block netpoll activity
@@ -224,31 +207,14 @@ static void netpoll_poll_dev(struct net_device *dev)
/* Process pending work on NIC */
ops->ndo_poll_controller(dev);
- poll_napi(dev);
+ poll_napi(dev, budget);
up(&ni->dev_lock);
- if (dev->flags & IFF_SLAVE) {
- if (ni) {
- struct net_device *bond_dev;
- struct sk_buff *skb;
- struct netpoll_info *bond_ni;
-
- bond_dev = netdev_master_upper_dev_get_rcu(dev);
- bond_ni = rcu_dereference_bh(bond_dev->npinfo);
- while ((skb = skb_dequeue(&ni->neigh_tx))) {
- skb->dev = bond_dev;
- skb_queue_tail(&bond_ni->neigh_tx, skb);
- }
- }
- }
-
- service_neigh_queue(ni);
-
zap_completion_queue();
}
-void netpoll_rx_disable(struct net_device *dev)
+void netpoll_poll_disable(struct net_device *dev)
{
struct netpoll_info *ni;
int idx;
@@ -259,9 +225,9 @@ void netpoll_rx_disable(struct net_device *dev)
down(&ni->dev_lock);
srcu_read_unlock(&netpoll_srcu, idx);
}
-EXPORT_SYMBOL(netpoll_rx_disable);
+EXPORT_SYMBOL(netpoll_poll_disable);
-void netpoll_rx_enable(struct net_device *dev)
+void netpoll_poll_enable(struct net_device *dev)
{
struct netpoll_info *ni;
rcu_read_lock();
@@ -270,7 +236,7 @@ void netpoll_rx_enable(struct net_device *dev)
up(&ni->dev_lock);
rcu_read_unlock();
}
-EXPORT_SYMBOL(netpoll_rx_enable);
+EXPORT_SYMBOL(netpoll_poll_enable);
static void refill_skbs(void)
{
@@ -304,7 +270,7 @@ static void zap_completion_queue(void)
while (clist != NULL) {
struct sk_buff *skb = clist;
clist = clist->next;
- if (skb->destructor) {
+ if (!skb_irq_freeable(skb)) {
atomic_inc(&skb->users);
dev_kfree_skb_any(skb); /* put this one back */
} else {
@@ -359,7 +325,6 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
{
int status = NETDEV_TX_BUSY;
unsigned long tries;
- const struct net_device_ops *ops = dev->netdev_ops;
/* It is up to the caller to keep npinfo alive. */
struct netpoll_info *npinfo;
@@ -367,7 +332,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
npinfo = rcu_dereference_bh(np->dev->npinfo);
if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
- __kfree_skb(skb);
+ dev_kfree_skb_irq(skb);
return;
}
@@ -380,29 +345,11 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
/* try until next clock tick */
for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
tries > 0; --tries) {
- if (__netif_tx_trylock(txq)) {
- if (!netif_xmit_stopped(txq)) {
- if (vlan_tx_tag_present(skb) &&
- !vlan_hw_offload_capable(netif_skb_features(skb),
- skb->vlan_proto)) {
- skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb));
- if (unlikely(!skb)) {
- /* This is actually a packet drop, but we
- * don't want the code at the end of this
- * function to try and re-queue a NULL skb.
- */
- status = NETDEV_TX_OK;
- goto unlock_txq;
- }
- skb->vlan_tci = 0;
- }
-
- status = ops->ndo_start_xmit(skb, dev);
- if (status == NETDEV_TX_OK)
- txq_trans_update(txq);
- }
- unlock_txq:
- __netif_tx_unlock(txq);
+ if (HARD_TX_TRYLOCK(dev, txq)) {
+ if (!netif_xmit_stopped(txq))
+ status = netpoll_start_xmit(skb, dev, txq);
+
+ HARD_TX_UNLOCK(dev, txq);
if (status == NETDEV_TX_OK)
break;
@@ -417,7 +364,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
WARN_ONCE(!irqs_disabled(),
"netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
- dev->name, ops->ndo_start_xmit);
+ dev->name, dev->netdev_ops->ndo_start_xmit);
}
@@ -529,384 +476,6 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
}
EXPORT_SYMBOL(netpoll_send_udp);
-static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
-{
- int size, type = ARPOP_REPLY;
- __be32 sip, tip;
- unsigned char *sha;
- struct sk_buff *send_skb;
- struct netpoll *np, *tmp;
- unsigned long flags;
- int hlen, tlen;
- int hits = 0, proto;
-
- if (list_empty(&npinfo->rx_np))
- return;
-
- /* Before checking the packet, we do some early
- inspection whether this is interesting at all */
- spin_lock_irqsave(&npinfo->rx_lock, flags);
- list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
- if (np->dev == skb->dev)
- hits++;
- }
- spin_unlock_irqrestore(&npinfo->rx_lock, flags);
-
- /* No netpoll struct is using this dev */
- if (!hits)
- return;
-
- proto = ntohs(eth_hdr(skb)->h_proto);
- if (proto == ETH_P_ARP) {
- struct arphdr *arp;
- unsigned char *arp_ptr;
- /* No arp on this interface */
- if (skb->dev->flags & IFF_NOARP)
- return;
-
- if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
- return;
-
- skb_reset_network_header(skb);
- skb_reset_transport_header(skb);
- arp = arp_hdr(skb);
-
- if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
- arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
- arp->ar_pro != htons(ETH_P_IP) ||
- arp->ar_op != htons(ARPOP_REQUEST))
- return;
-
- arp_ptr = (unsigned char *)(arp+1);
- /* save the location of the src hw addr */
- sha = arp_ptr;
- arp_ptr += skb->dev->addr_len;
- memcpy(&sip, arp_ptr, 4);
- arp_ptr += 4;
- /* If we actually cared about dst hw addr,
- it would get copied here */
- arp_ptr += skb->dev->addr_len;
- memcpy(&tip, arp_ptr, 4);
-
- /* Should we ignore arp? */
- if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
- return;
-
- size = arp_hdr_len(skb->dev);
-
- spin_lock_irqsave(&npinfo->rx_lock, flags);
- list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
- if (tip != np->local_ip.ip)
- continue;
-
- hlen = LL_RESERVED_SPACE(np->dev);
- tlen = np->dev->needed_tailroom;
- send_skb = find_skb(np, size + hlen + tlen, hlen);
- if (!send_skb)
- continue;
-
- skb_reset_network_header(send_skb);
- arp = (struct arphdr *) skb_put(send_skb, size);
- send_skb->dev = skb->dev;
- send_skb->protocol = htons(ETH_P_ARP);
-
- /* Fill the device header for the ARP frame */
- if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP,
- sha, np->dev->dev_addr,
- send_skb->len) < 0) {
- kfree_skb(send_skb);
- continue;
- }
-
- /*
- * Fill out the arp protocol part.
- *
- * we only support ethernet device type,
- * which (according to RFC 1390) should
- * always equal 1 (Ethernet).
- */
-
- arp->ar_hrd = htons(np->dev->type);
- arp->ar_pro = htons(ETH_P_IP);
- arp->ar_hln = np->dev->addr_len;
- arp->ar_pln = 4;
- arp->ar_op = htons(type);
-
- arp_ptr = (unsigned char *)(arp + 1);
- memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
- arp_ptr += np->dev->addr_len;
- memcpy(arp_ptr, &tip, 4);
- arp_ptr += 4;
- memcpy(arp_ptr, sha, np->dev->addr_len);
- arp_ptr += np->dev->addr_len;
- memcpy(arp_ptr, &sip, 4);
-
- netpoll_send_skb(np, send_skb);
-
- /* If there are several rx_skb_hooks for the same
- * address we're fine by sending a single reply
- */
- break;
- }
- spin_unlock_irqrestore(&npinfo->rx_lock, flags);
- } else if( proto == ETH_P_IPV6) {
-#if IS_ENABLED(CONFIG_IPV6)
- struct nd_msg *msg;
- u8 *lladdr = NULL;
- struct ipv6hdr *hdr;
- struct icmp6hdr *icmp6h;
- const struct in6_addr *saddr;
- const struct in6_addr *daddr;
- struct inet6_dev *in6_dev = NULL;
- struct in6_addr *target;
-
- in6_dev = in6_dev_get(skb->dev);
- if (!in6_dev || !in6_dev->cnf.accept_ra)
- return;
-
- if (!pskb_may_pull(skb, skb->len))
- return;
-
- msg = (struct nd_msg *)skb_transport_header(skb);
-
- __skb_push(skb, skb->data - skb_transport_header(skb));
-
- if (ipv6_hdr(skb)->hop_limit != 255)
- return;
- if (msg->icmph.icmp6_code != 0)
- return;
- if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
- return;
-
- saddr = &ipv6_hdr(skb)->saddr;
- daddr = &ipv6_hdr(skb)->daddr;
-
- size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
-
- spin_lock_irqsave(&npinfo->rx_lock, flags);
- list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
- if (!ipv6_addr_equal(daddr, &np->local_ip.in6))
- continue;
-
- hlen = LL_RESERVED_SPACE(np->dev);
- tlen = np->dev->needed_tailroom;
- send_skb = find_skb(np, size + hlen + tlen, hlen);
- if (!send_skb)
- continue;
-
- send_skb->protocol = htons(ETH_P_IPV6);
- send_skb->dev = skb->dev;
-
- skb_reset_network_header(send_skb);
- hdr = (struct ipv6hdr *) skb_put(send_skb, sizeof(struct ipv6hdr));
- *(__be32*)hdr = htonl(0x60000000);
- hdr->payload_len = htons(size);
- hdr->nexthdr = IPPROTO_ICMPV6;
- hdr->hop_limit = 255;
- hdr->saddr = *saddr;
- hdr->daddr = *daddr;
-
- icmp6h = (struct icmp6hdr *) skb_put(send_skb, sizeof(struct icmp6hdr));
- icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
- icmp6h->icmp6_router = 0;
- icmp6h->icmp6_solicited = 1;
-
- target = (struct in6_addr *) skb_put(send_skb, sizeof(struct in6_addr));
- *target = msg->target;
- icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size,
- IPPROTO_ICMPV6,
- csum_partial(icmp6h,
- size, 0));
-
- if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6,
- lladdr, np->dev->dev_addr,
- send_skb->len) < 0) {
- kfree_skb(send_skb);
- continue;
- }
-
- netpoll_send_skb(np, send_skb);
-
- /* If there are several rx_skb_hooks for the same
- * address, we're fine by sending a single reply
- */
- break;
- }
- spin_unlock_irqrestore(&npinfo->rx_lock, flags);
-#endif
- }
-}
-
-static bool pkt_is_ns(struct sk_buff *skb)
-{
- struct nd_msg *msg;
- struct ipv6hdr *hdr;
-
- if (skb->protocol != htons(ETH_P_IPV6))
- return false;
- if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
- return false;
-
- msg = (struct nd_msg *)skb_transport_header(skb);
- __skb_push(skb, skb->data - skb_transport_header(skb));
- hdr = ipv6_hdr(skb);
-
- if (hdr->nexthdr != IPPROTO_ICMPV6)
- return false;
- if (hdr->hop_limit != 255)
- return false;
- if (msg->icmph.icmp6_code != 0)
- return false;
- if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
- return false;
-
- return true;
-}
-
-int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
-{
- int proto, len, ulen, data_len;
- int hits = 0, offset;
- const struct iphdr *iph;
- struct udphdr *uh;
- struct netpoll *np, *tmp;
- uint16_t source;
-
- if (list_empty(&npinfo->rx_np))
- goto out;
-
- if (skb->dev->type != ARPHRD_ETHER)
- goto out;
-
- /* check if netpoll clients need ARP */
- if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) {
- skb_queue_tail(&npinfo->neigh_tx, skb);
- return 1;
- } else if (pkt_is_ns(skb) && atomic_read(&trapped)) {
- skb_queue_tail(&npinfo->neigh_tx, skb);
- return 1;
- }
-
- if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
- skb = vlan_untag(skb);
- if (unlikely(!skb))
- goto out;
- }
-
- proto = ntohs(eth_hdr(skb)->h_proto);
- if (proto != ETH_P_IP && proto != ETH_P_IPV6)
- goto out;
- if (skb->pkt_type == PACKET_OTHERHOST)
- goto out;
- if (skb_shared(skb))
- goto out;
-
- if (proto == ETH_P_IP) {
- if (!pskb_may_pull(skb, sizeof(struct iphdr)))
- goto out;
- iph = (struct iphdr *)skb->data;
- if (iph->ihl < 5 || iph->version != 4)
- goto out;
- if (!pskb_may_pull(skb, iph->ihl*4))
- goto out;
- iph = (struct iphdr *)skb->data;
- if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
- goto out;
-
- len = ntohs(iph->tot_len);
- if (skb->len < len || len < iph->ihl*4)
- goto out;
-
- /*
- * Our transport medium may have padded the buffer out.
- * Now We trim to the true length of the frame.
- */
- if (pskb_trim_rcsum(skb, len))
- goto out;
-
- iph = (struct iphdr *)skb->data;
- if (iph->protocol != IPPROTO_UDP)
- goto out;
-
- len -= iph->ihl*4;
- uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
- offset = (unsigned char *)(uh + 1) - skb->data;
- ulen = ntohs(uh->len);
- data_len = skb->len - offset;
- source = ntohs(uh->source);
-
- if (ulen != len)
- goto out;
- if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
- goto out;
- list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
- if (np->local_ip.ip && np->local_ip.ip != iph->daddr)
- continue;
- if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr)
- continue;
- if (np->local_port && np->local_port != ntohs(uh->dest))
- continue;
-
- np->rx_skb_hook(np, source, skb, offset, data_len);
- hits++;
- }
- } else {
-#if IS_ENABLED(CONFIG_IPV6)
- const struct ipv6hdr *ip6h;
-
- if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
- goto out;
- ip6h = (struct ipv6hdr *)skb->data;
- if (ip6h->version != 6)
- goto out;
- len = ntohs(ip6h->payload_len);
- if (!len)
- goto out;
- if (len + sizeof(struct ipv6hdr) > skb->len)
- goto out;
- if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr)))
- goto out;
- ip6h = ipv6_hdr(skb);
- if (!pskb_may_pull(skb, sizeof(struct udphdr)))
- goto out;
- uh = udp_hdr(skb);
- offset = (unsigned char *)(uh + 1) - skb->data;
- ulen = ntohs(uh->len);
- data_len = skb->len - offset;
- source = ntohs(uh->source);
- if (ulen != skb->len)
- goto out;
- if (udp6_csum_init(skb, uh, IPPROTO_UDP))
- goto out;
- list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
- if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr))
- continue;
- if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr))
- continue;
- if (np->local_port && np->local_port != ntohs(uh->dest))
- continue;
-
- np->rx_skb_hook(np, source, skb, offset, data_len);
- hits++;
- }
-#endif
- }
-
- if (!hits)
- goto out;
-
- kfree_skb(skb);
- return 1;
-
-out:
- if (atomic_read(&trapped)) {
- kfree_skb(skb);
- return 1;
- }
-
- return 0;
-}
-
void netpoll_print_options(struct netpoll *np)
{
np_info(np, "local port %d\n", np->local_port);
@@ -1026,11 +595,10 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
}
EXPORT_SYMBOL(netpoll_parse_options);
-int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
+int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
{
struct netpoll_info *npinfo;
const struct net_device_ops *ops;
- unsigned long flags;
int err;
np->dev = ndev;
@@ -1046,18 +614,13 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
}
if (!ndev->npinfo) {
- npinfo = kmalloc(sizeof(*npinfo), gfp);
+ npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
if (!npinfo) {
err = -ENOMEM;
goto out;
}
- npinfo->rx_flags = 0;
- INIT_LIST_HEAD(&npinfo->rx_np);
-
- spin_lock_init(&npinfo->rx_lock);
sema_init(&npinfo->dev_lock, 1);
- skb_queue_head_init(&npinfo->neigh_tx);
skb_queue_head_init(&npinfo->txq);
INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
@@ -1065,7 +628,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
ops = np->dev->netdev_ops;
if (ops->ndo_netpoll_setup) {
- err = ops->ndo_netpoll_setup(ndev, npinfo, gfp);
+ err = ops->ndo_netpoll_setup(ndev, npinfo);
if (err)
goto free_npinfo;
}
@@ -1076,13 +639,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
npinfo->netpoll = np;
- if (np->rx_skb_hook) {
- spin_lock_irqsave(&npinfo->rx_lock, flags);
- npinfo->rx_flags |= NETPOLL_RX_ENABLED;
- list_add_tail(&np->rx, &npinfo->rx_np);
- spin_unlock_irqrestore(&npinfo->rx_lock, flags);
- }
-
/* last thing to do is link it to the net device structure */
rcu_assign_pointer(ndev->npinfo, npinfo);
@@ -1204,7 +760,7 @@ int netpoll_setup(struct netpoll *np)
/* fill up the skb queue */
refill_skbs();
- err = __netpoll_setup(np, ndev, GFP_KERNEL);
+ err = __netpoll_setup(np, ndev);
if (err)
goto put;
@@ -1231,7 +787,6 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
struct netpoll_info *npinfo =
container_of(rcu_head, struct netpoll_info, rcu);
- skb_queue_purge(&npinfo->neigh_tx);
skb_queue_purge(&npinfo->txq);
/* we can't call cancel_delayed_work_sync here, as we are in softirq */
@@ -1247,7 +802,6 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
void __netpoll_cleanup(struct netpoll *np)
{
struct netpoll_info *npinfo;
- unsigned long flags;
/* rtnl_dereference would be preferable here but
* rcu_cleanup_netpoll path can put us in here safely without
@@ -1257,14 +811,6 @@ void __netpoll_cleanup(struct netpoll *np)
if (!npinfo)
return;
- if (!list_empty(&npinfo->rx_np)) {
- spin_lock_irqsave(&npinfo->rx_lock, flags);
- list_del(&np->rx);
- if (list_empty(&npinfo->rx_np))
- npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
- spin_unlock_irqrestore(&npinfo->rx_lock, flags);
- }
-
synchronize_srcu(&netpoll_srcu);
if (atomic_dec_and_test(&npinfo->refcnt)) {
@@ -1274,7 +820,7 @@ void __netpoll_cleanup(struct netpoll *np)
if (ops->ndo_netpoll_cleanup)
ops->ndo_netpoll_cleanup(np->dev);
- rcu_assign_pointer(np->dev->npinfo, NULL);
+ RCU_INIT_POINTER(np->dev->npinfo, NULL);
call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
}
}
@@ -1308,18 +854,3 @@ out:
rtnl_unlock();
}
EXPORT_SYMBOL(netpoll_cleanup);
-
-int netpoll_trap(void)
-{
- return atomic_read(&trapped);
-}
-EXPORT_SYMBOL(netpoll_trap);
-
-void netpoll_set_trap(int trap)
-{
- if (trap)
- atomic_inc(&trapped);
- else
- atomic_dec(&trapped);
-}
-EXPORT_SYMBOL(netpoll_set_trap);
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 9043caedcd0..2f385b9bccc 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -140,7 +140,7 @@ cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
static int cgrp_css_online(struct cgroup_subsys_state *css)
{
- struct cgroup_subsys_state *parent_css = css_parent(css);
+ struct cgroup_subsys_state *parent_css = css->parent;
struct net_device *dev;
int ret = 0;
@@ -185,15 +185,15 @@ static int read_priomap(struct seq_file *sf, void *v)
return 0;
}
-static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
- const char *buffer)
+static ssize_t write_priomap(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
{
char devname[IFNAMSIZ + 1];
struct net_device *dev;
u32 prio;
int ret;
- if (sscanf(buffer, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)
+ if (sscanf(buf, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)
return -EINVAL;
dev = dev_get_by_name(&init_net, devname);
@@ -202,11 +202,11 @@ static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
rtnl_lock();
- ret = netprio_set_prio(css, dev, prio);
+ ret = netprio_set_prio(of_css(of), dev, prio);
rtnl_unlock();
dev_put(dev);
- return ret;
+ return ret ?: nbytes;
}
static int update_netprio(const void *v, struct file *file, unsigned n)
@@ -224,7 +224,7 @@ static void net_prio_attach(struct cgroup_subsys_state *css,
struct task_struct *p;
void *v = (void *)(unsigned long)css->cgroup->id;
- cgroup_taskset_for_each(p, css, tset) {
+ cgroup_taskset_for_each(p, tset) {
task_lock(p);
iterate_fd(p->files, 0, update_netprio, v);
task_unlock(p);
@@ -239,20 +239,17 @@ static struct cftype ss_files[] = {
{
.name = "ifpriomap",
.seq_show = read_priomap,
- .write_string = write_priomap,
+ .write = write_priomap,
},
{ } /* terminate */
};
-struct cgroup_subsys net_prio_subsys = {
- .name = "net_prio",
+struct cgroup_subsys net_prio_cgrp_subsys = {
.css_alloc = cgrp_css_alloc,
.css_online = cgrp_css_online,
.css_free = cgrp_css_free,
.attach = net_prio_attach,
- .subsys_id = net_prio_subsys_id,
.base_cftypes = ss_files,
- .module = THIS_MODULE,
};
static int netprio_device_event(struct notifier_block *unused,
@@ -283,37 +280,9 @@ static struct notifier_block netprio_device_notifier = {
static int __init init_cgroup_netprio(void)
{
- int ret;
-
- ret = cgroup_load_subsys(&net_prio_subsys);
- if (ret)
- goto out;
-
register_netdevice_notifier(&netprio_device_notifier);
-
-out:
- return ret;
-}
-
-static void __exit exit_cgroup_netprio(void)
-{
- struct netprio_map *old;
- struct net_device *dev;
-
- unregister_netdevice_notifier(&netprio_device_notifier);
-
- cgroup_unload_subsys(&net_prio_subsys);
-
- rtnl_lock();
- for_each_netdev(&init_net, dev) {
- old = rtnl_dereference(dev->priomap);
- RCU_INIT_POINTER(dev->priomap, NULL);
- if (old)
- kfree_rcu(old, rcu);
- }
- rtnl_unlock();
+ return 0;
}
-module_init(init_cgroup_netprio);
-module_exit(exit_cgroup_netprio);
+subsys_initcall(init_cgroup_netprio);
MODULE_LICENSE("GPL v2");
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index fdac61cac1b..fc17a9d309a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -476,23 +476,22 @@ static int pgctrl_show(struct seq_file *seq, void *v)
static ssize_t pgctrl_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- int err = 0;
char data[128];
struct pktgen_net *pn = net_generic(current->nsproxy->net_ns, pg_net_id);
- if (!capable(CAP_NET_ADMIN)) {
- err = -EPERM;
- goto out;
- }
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (count == 0)
+ return -EINVAL;
if (count > sizeof(data))
count = sizeof(data);
- if (copy_from_user(data, buf, count)) {
- err = -EFAULT;
- goto out;
- }
- data[count - 1] = 0; /* Make string */
+ if (copy_from_user(data, buf, count))
+ return -EFAULT;
+
+ data[count - 1] = 0; /* Strip trailing '\n' and terminate string */
if (!strcmp(data, "stop"))
pktgen_stop_all_threads_ifs(pn);
@@ -506,10 +505,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
else
pr_warning("Unknown command: %s\n", data);
- err = count;
-
-out:
- return err;
+ return count;
}
static int pgctrl_open(struct inode *inode, struct file *file)
@@ -577,7 +573,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
is_zero_ether_addr(pkt_dev->src_mac) ?
pkt_dev->odev->dev_addr : pkt_dev->src_mac);
- seq_printf(seq, "dst_mac: ");
+ seq_puts(seq, "dst_mac: ");
seq_printf(seq, "%pM\n", pkt_dev->dst_mac);
seq_printf(seq,
@@ -592,7 +588,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->nr_labels) {
unsigned int i;
- seq_printf(seq, " mpls: ");
+ seq_puts(seq, " mpls: ");
for (i = 0; i < pkt_dev->nr_labels; i++)
seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]),
i == pkt_dev->nr_labels-1 ? "\n" : ", ");
@@ -617,67 +613,67 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->node >= 0)
seq_printf(seq, " node: %d\n", pkt_dev->node);
- seq_printf(seq, " Flags: ");
+ seq_puts(seq, " Flags: ");
if (pkt_dev->flags & F_IPV6)
- seq_printf(seq, "IPV6 ");
+ seq_puts(seq, "IPV6 ");
if (pkt_dev->flags & F_IPSRC_RND)
- seq_printf(seq, "IPSRC_RND ");
+ seq_puts(seq, "IPSRC_RND ");
if (pkt_dev->flags & F_IPDST_RND)
- seq_printf(seq, "IPDST_RND ");
+ seq_puts(seq, "IPDST_RND ");
if (pkt_dev->flags & F_TXSIZE_RND)
- seq_printf(seq, "TXSIZE_RND ");
+ seq_puts(seq, "TXSIZE_RND ");
if (pkt_dev->flags & F_UDPSRC_RND)
- seq_printf(seq, "UDPSRC_RND ");
+ seq_puts(seq, "UDPSRC_RND ");
if (pkt_dev->flags & F_UDPDST_RND)
- seq_printf(seq, "UDPDST_RND ");
+ seq_puts(seq, "UDPDST_RND ");
if (pkt_dev->flags & F_UDPCSUM)
- seq_printf(seq, "UDPCSUM ");
+ seq_puts(seq, "UDPCSUM ");
if (pkt_dev->flags & F_MPLS_RND)
- seq_printf(seq, "MPLS_RND ");
+ seq_puts(seq, "MPLS_RND ");
if (pkt_dev->flags & F_QUEUE_MAP_RND)
- seq_printf(seq, "QUEUE_MAP_RND ");
+ seq_puts(seq, "QUEUE_MAP_RND ");
if (pkt_dev->flags & F_QUEUE_MAP_CPU)
- seq_printf(seq, "QUEUE_MAP_CPU ");
+ seq_puts(seq, "QUEUE_MAP_CPU ");
if (pkt_dev->cflows) {
if (pkt_dev->flags & F_FLOW_SEQ)
- seq_printf(seq, "FLOW_SEQ "); /*in sequence flows*/
+ seq_puts(seq, "FLOW_SEQ "); /*in sequence flows*/
else
- seq_printf(seq, "FLOW_RND ");
+ seq_puts(seq, "FLOW_RND ");
}
#ifdef CONFIG_XFRM
if (pkt_dev->flags & F_IPSEC_ON) {
- seq_printf(seq, "IPSEC ");
+ seq_puts(seq, "IPSEC ");
if (pkt_dev->spi)
seq_printf(seq, "spi:%u", pkt_dev->spi);
}
#endif
if (pkt_dev->flags & F_MACSRC_RND)
- seq_printf(seq, "MACSRC_RND ");
+ seq_puts(seq, "MACSRC_RND ");
if (pkt_dev->flags & F_MACDST_RND)
- seq_printf(seq, "MACDST_RND ");
+ seq_puts(seq, "MACDST_RND ");
if (pkt_dev->flags & F_VID_RND)
- seq_printf(seq, "VID_RND ");
+ seq_puts(seq, "VID_RND ");
if (pkt_dev->flags & F_SVID_RND)
- seq_printf(seq, "SVID_RND ");
+ seq_puts(seq, "SVID_RND ");
if (pkt_dev->flags & F_NODE)
- seq_printf(seq, "NODE_ALLOC ");
+ seq_puts(seq, "NODE_ALLOC ");
seq_puts(seq, "\n");
@@ -720,7 +716,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->result[0])
seq_printf(seq, "Result: %s\n", pkt_dev->result);
else
- seq_printf(seq, "Result: Idle\n");
+ seq_puts(seq, "Result: Idle\n");
return 0;
}
@@ -1251,7 +1247,13 @@ static ssize_t pktgen_if_write(struct file *file,
"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
f,
"IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, "
- "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC, NODE_ALLOC\n");
+ "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, "
+ "MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, "
+ "QUEUE_MAP_RND, QUEUE_MAP_CPU, UDPCSUM, "
+#ifdef CONFIG_XFRM
+ "IPSEC, "
+#endif
+ "NODE_ALLOC\n");
return count;
}
sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags);
@@ -1733,14 +1735,14 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
BUG_ON(!t);
- seq_printf(seq, "Running: ");
+ seq_puts(seq, "Running: ");
if_lock(t);
list_for_each_entry(pkt_dev, &t->if_list, list)
if (pkt_dev->running)
seq_printf(seq, "%s ", pkt_dev->odevname);
- seq_printf(seq, "\nStopped: ");
+ seq_puts(seq, "\nStopped: ");
list_for_each_entry(pkt_dev, &t->if_list, list)
if (!pkt_dev->running)
@@ -1749,7 +1751,7 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
if (t->result[0])
seq_printf(seq, "\nResult: %s\n", t->result);
else
- seq_printf(seq, "\nResult: NA\n");
+ seq_puts(seq, "\nResult: NA\n");
if_unlock(t);
@@ -3336,9 +3338,11 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
queue_map = skb_get_queue_mapping(pkt_dev->skb);
txq = netdev_get_tx_queue(odev, queue_map);
- __netif_tx_lock_bh(txq);
+ local_bh_disable();
- if (unlikely(netif_xmit_frozen_or_stopped(txq))) {
+ HARD_TX_LOCK(odev, txq, smp_processor_id());
+
+ if (unlikely(netif_xmit_frozen_or_drv_stopped(txq))) {
ret = NETDEV_TX_BUSY;
pkt_dev->last_ok = 0;
goto unlock;
@@ -3372,7 +3376,9 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
pkt_dev->last_ok = 0;
}
unlock:
- __netif_tx_unlock_bh(txq);
+ HARD_TX_UNLOCK(odev, txq);
+
+ local_bh_enable();
/* If pkt_dev->count is zero, then run forever */
if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) {
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
new file mode 100644
index 00000000000..d3027a73fd4
--- /dev/null
+++ b/net/core/ptp_classifier.c
@@ -0,0 +1,141 @@
+/* PTP classifier
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+/* The below program is the bpf_asm (tools/net/) representation of
+ * the opcode array in the ptp_filter structure.
+ *
+ * For convenience, this can easily be altered and reviewed with
+ * bpf_asm and bpf_dbg, e.g. `./bpf_asm -c prog` where prog is a
+ * simple file containing the below program:
+ *
+ * ldh [12] ; load ethertype
+ *
+ * ; PTP over UDP over IPv4 over Ethernet
+ * test_ipv4:
+ * jneq #0x800, test_ipv6 ; ETH_P_IP ?
+ * ldb [23] ; load proto
+ * jneq #17, drop_ipv4 ; IPPROTO_UDP ?
+ * ldh [20] ; load frag offset field
+ * jset #0x1fff, drop_ipv4 ; don't allow fragments
+ * ldxb 4*([14]&0xf) ; load IP header len
+ * ldh [x + 16] ; load UDP dst port
+ * jneq #319, drop_ipv4 ; is port PTP_EV_PORT ?
+ * ldh [x + 22] ; load payload
+ * and #0xf ; mask PTP_CLASS_VMASK
+ * or #0x10 ; PTP_CLASS_IPV4
+ * ret a ; return PTP class
+ * drop_ipv4: ret #0x0 ; PTP_CLASS_NONE
+ *
+ * ; PTP over UDP over IPv6 over Ethernet
+ * test_ipv6:
+ * jneq #0x86dd, test_8021q ; ETH_P_IPV6 ?
+ * ldb [20] ; load proto
+ * jneq #17, drop_ipv6 ; IPPROTO_UDP ?
+ * ldh [56] ; load UDP dst port
+ * jneq #319, drop_ipv6 ; is port PTP_EV_PORT ?
+ * ldh [62] ; load payload
+ * and #0xf ; mask PTP_CLASS_VMASK
+ * or #0x20 ; PTP_CLASS_IPV6
+ * ret a ; return PTP class
+ * drop_ipv6: ret #0x0 ; PTP_CLASS_NONE
+ *
+ * ; PTP over 802.1Q over Ethernet
+ * test_8021q:
+ * jneq #0x8100, test_ieee1588 ; ETH_P_8021Q ?
+ * ldh [16] ; load inner type
+ * jneq #0x88f7, drop_ieee1588 ; ETH_P_1588 ?
+ * ldb [18] ; load payload
+ * and #0x8 ; as we don't have ports here, test
+ * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these
+ * ldh [18] ; reload payload
+ * and #0xf ; mask PTP_CLASS_VMASK
+ * or #0x40 ; PTP_CLASS_V2_VLAN
+ * ret a ; return PTP class
+ *
+ * ; PTP over Ethernet
+ * test_ieee1588:
+ * jneq #0x88f7, drop_ieee1588 ; ETH_P_1588 ?
+ * ldb [14] ; load payload
+ * and #0x8 ; as we don't have ports here, test
+ * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these
+ * ldh [14] ; reload payload
+ * and #0xf ; mask PTP_CLASS_VMASK
+ * or #0x30 ; PTP_CLASS_L2
+ * ret a ; return PTP class
+ * drop_ieee1588: ret #0x0 ; PTP_CLASS_NONE
+ */
+
+#include <linux/skbuff.h>
+#include <linux/filter.h>
+#include <linux/ptp_classify.h>
+
+static struct sk_filter *ptp_insns __read_mostly;
+
+unsigned int ptp_classify_raw(const struct sk_buff *skb)
+{
+ return SK_RUN_FILTER(ptp_insns, skb);
+}
+EXPORT_SYMBOL_GPL(ptp_classify_raw);
+
+void __init ptp_classifier_init(void)
+{
+ static struct sock_filter ptp_filter[] __initdata = {
+ { 0x28, 0, 0, 0x0000000c },
+ { 0x15, 0, 12, 0x00000800 },
+ { 0x30, 0, 0, 0x00000017 },
+ { 0x15, 0, 9, 0x00000011 },
+ { 0x28, 0, 0, 0x00000014 },
+ { 0x45, 7, 0, 0x00001fff },
+ { 0xb1, 0, 0, 0x0000000e },
+ { 0x48, 0, 0, 0x00000010 },
+ { 0x15, 0, 4, 0x0000013f },
+ { 0x48, 0, 0, 0x00000016 },
+ { 0x54, 0, 0, 0x0000000f },
+ { 0x44, 0, 0, 0x00000010 },
+ { 0x16, 0, 0, 0x00000000 },
+ { 0x06, 0, 0, 0x00000000 },
+ { 0x15, 0, 9, 0x000086dd },
+ { 0x30, 0, 0, 0x00000014 },
+ { 0x15, 0, 6, 0x00000011 },
+ { 0x28, 0, 0, 0x00000038 },
+ { 0x15, 0, 4, 0x0000013f },
+ { 0x28, 0, 0, 0x0000003e },
+ { 0x54, 0, 0, 0x0000000f },
+ { 0x44, 0, 0, 0x00000020 },
+ { 0x16, 0, 0, 0x00000000 },
+ { 0x06, 0, 0, 0x00000000 },
+ { 0x15, 0, 9, 0x00008100 },
+ { 0x28, 0, 0, 0x00000010 },
+ { 0x15, 0, 15, 0x000088f7 },
+ { 0x30, 0, 0, 0x00000012 },
+ { 0x54, 0, 0, 0x00000008 },
+ { 0x15, 0, 12, 0x00000000 },
+ { 0x28, 0, 0, 0x00000012 },
+ { 0x54, 0, 0, 0x0000000f },
+ { 0x44, 0, 0, 0x00000040 },
+ { 0x16, 0, 0, 0x00000000 },
+ { 0x15, 0, 7, 0x000088f7 },
+ { 0x30, 0, 0, 0x0000000e },
+ { 0x54, 0, 0, 0x00000008 },
+ { 0x15, 0, 4, 0x00000000 },
+ { 0x28, 0, 0, 0x0000000e },
+ { 0x54, 0, 0, 0x0000000f },
+ { 0x44, 0, 0, 0x00000030 },
+ { 0x16, 0, 0, 0x00000000 },
+ { 0x06, 0, 0, 0x00000000 },
+ };
+ struct sock_fprog_kern ptp_prog = {
+ .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter,
+ };
+
+ BUG_ON(sk_unattached_filter_create(&ptp_insns, &ptp_prog));
+}
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 4425148d2b5..467f326126e 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -221,5 +221,4 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
out:
spin_unlock_bh(&fastopenq->lock);
sock_put(lsk);
- return;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 120eecc0f5a..1063996f831 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -353,15 +353,46 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops)
}
EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
+/* Return with the rtnl_lock held when there are no network
+ * devices unregistering in any network namespace.
+ */
+static void rtnl_lock_unregistering_all(void)
+{
+ struct net *net;
+ bool unregistering;
+ DEFINE_WAIT(wait);
+
+ for (;;) {
+ prepare_to_wait(&netdev_unregistering_wq, &wait,
+ TASK_UNINTERRUPTIBLE);
+ unregistering = false;
+ rtnl_lock();
+ for_each_net(net) {
+ if (net->dev_unreg_count > 0) {
+ unregistering = true;
+ break;
+ }
+ }
+ if (!unregistering)
+ break;
+ __rtnl_unlock();
+ schedule();
+ }
+ finish_wait(&netdev_unregistering_wq, &wait);
+}
+
/**
* rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
* @ops: struct rtnl_link_ops * to unregister
*/
void rtnl_link_unregister(struct rtnl_link_ops *ops)
{
- rtnl_lock();
+ /* Close the race with cleanup_net() */
+ mutex_lock(&net_mutex);
+ rtnl_lock_unregistering_all();
__rtnl_link_unregister(ops);
rtnl_unlock();
+ mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(rtnl_link_unregister);
@@ -767,14 +798,15 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
size += num_vfs *
(nla_total_size(sizeof(struct ifla_vf_mac)) +
nla_total_size(sizeof(struct ifla_vf_vlan)) +
- nla_total_size(sizeof(struct ifla_vf_tx_rate)) +
- nla_total_size(sizeof(struct ifla_vf_spoofchk)));
+ nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
+ nla_total_size(sizeof(struct ifla_vf_rate)));
return size;
} else
return 0;
}
-static size_t rtnl_port_size(const struct net_device *dev)
+static size_t rtnl_port_size(const struct net_device *dev,
+ u32 ext_filter_mask)
{
size_t port_size = nla_total_size(4) /* PORT_VF */
+ nla_total_size(PORT_PROFILE_MAX) /* PORT_PROFILE */
@@ -790,7 +822,8 @@ static size_t rtnl_port_size(const struct net_device *dev)
size_t port_self_size = nla_total_size(sizeof(struct nlattr))
+ port_size;
- if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
+ if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent ||
+ !(ext_filter_mask & RTEXT_FILTER_VF))
return 0;
if (dev_num_vf(dev->dev.parent))
return port_self_size + vf_ports_size +
@@ -822,10 +855,11 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(4) /* IFLA_NUM_RX_QUEUES */
+ nla_total_size(1) /* IFLA_OPERSTATE */
+ nla_total_size(1) /* IFLA_LINKMODE */
+ + nla_total_size(4) /* IFLA_CARRIER_CHANGES */
+ nla_total_size(ext_filter_mask
& RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
+ rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
- + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
+ + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
+ rtnl_link_get_size(dev) /* IFLA_LINKINFO */
+ rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */
+ nla_total_size(MAX_PHYS_PORT_ID_LEN); /* IFLA_PHYS_PORT_ID */
@@ -887,11 +921,13 @@ static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
return 0;
}
-static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
+static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev,
+ u32 ext_filter_mask)
{
int err;
- if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
+ if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent ||
+ !(ext_filter_mask & RTEXT_FILTER_VF))
return 0;
err = rtnl_port_self_fill(skb, dev);
@@ -970,7 +1006,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
(dev->qdisc &&
nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
(dev->ifalias &&
- nla_put_string(skb, IFLA_IFALIAS, dev->ifalias)))
+ nla_put_string(skb, IFLA_IFALIAS, dev->ifalias)) ||
+ nla_put_u32(skb, IFLA_CARRIER_CHANGES,
+ atomic_read(&dev->carrier_changes)))
goto nla_put_failure;
if (1) {
@@ -1027,6 +1065,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
struct ifla_vf_info ivi;
struct ifla_vf_mac vf_mac;
struct ifla_vf_vlan vf_vlan;
+ struct ifla_vf_rate vf_rate;
struct ifla_vf_tx_rate vf_tx_rate;
struct ifla_vf_spoofchk vf_spoofchk;
struct ifla_vf_link_state vf_linkstate;
@@ -1047,6 +1086,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
break;
vf_mac.vf =
vf_vlan.vf =
+ vf_rate.vf =
vf_tx_rate.vf =
vf_spoofchk.vf =
vf_linkstate.vf = ivi.vf;
@@ -1054,7 +1094,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
vf_vlan.vlan = ivi.vlan;
vf_vlan.qos = ivi.qos;
- vf_tx_rate.rate = ivi.tx_rate;
+ vf_tx_rate.rate = ivi.max_tx_rate;
+ vf_rate.min_tx_rate = ivi.min_tx_rate;
+ vf_rate.max_tx_rate = ivi.max_tx_rate;
vf_spoofchk.setting = ivi.spoofchk;
vf_linkstate.link_state = ivi.linkstate;
vf = nla_nest_start(skb, IFLA_VF_INFO);
@@ -1064,6 +1106,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
}
if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
+ nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
+ &vf_rate) ||
nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
&vf_tx_rate) ||
nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
@@ -1076,7 +1120,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
nla_nest_end(skb, vfinfo);
}
- if (rtnl_port_fill(skb, dev))
+ if (rtnl_port_fill(skb, dev, ext_filter_mask))
goto nla_put_failure;
if (dev->rtnl_link_ops || rtnl_have_link_slave_info(dev)) {
@@ -1121,56 +1165,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct net *net = sock_net(skb->sk);
- int h, s_h;
- int idx = 0, s_idx;
- struct net_device *dev;
- struct hlist_head *head;
- struct nlattr *tb[IFLA_MAX+1];
- u32 ext_filter_mask = 0;
-
- s_h = cb->args[0];
- s_idx = cb->args[1];
-
- rcu_read_lock();
- cb->seq = net->dev_base_seq;
-
- if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
- ifla_policy) >= 0) {
-
- if (tb[IFLA_EXT_MASK])
- ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
- }
-
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &net->dev_index_head[h];
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, 0,
- NLM_F_MULTI,
- ext_filter_mask) <= 0)
- goto out;
-
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-cont:
- idx++;
- }
- }
-out:
- rcu_read_unlock();
- cb->args[1] = idx;
- cb->args[0] = h;
-
- return skb->len;
-}
-
-const struct nla_policy ifla_policy[IFLA_MAX+1] = {
+static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 },
[IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
[IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
@@ -1196,8 +1191,8 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 },
[IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 },
[IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_PORT_ID_LEN },
+ [IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */
};
-EXPORT_SYMBOL(ifla_policy);
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
[IFLA_INFO_KIND] = { .type = NLA_STRING },
@@ -1219,6 +1214,10 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
.len = sizeof(struct ifla_vf_tx_rate) },
[IFLA_VF_SPOOFCHK] = { .type = NLA_BINARY,
.len = sizeof(struct ifla_vf_spoofchk) },
+ [IFLA_VF_RATE] = { .type = NLA_BINARY,
+ .len = sizeof(struct ifla_vf_rate) },
+ [IFLA_VF_LINK_STATE] = { .type = NLA_BINARY,
+ .len = sizeof(struct ifla_vf_link_state) },
};
static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
@@ -1235,6 +1234,78 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
[IFLA_PORT_RESPONSE] = { .type = NLA_U16, },
};
+static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ int h, s_h;
+ int idx = 0, s_idx;
+ struct net_device *dev;
+ struct hlist_head *head;
+ struct nlattr *tb[IFLA_MAX+1];
+ u32 ext_filter_mask = 0;
+ int err;
+ int hdrlen;
+
+ s_h = cb->args[0];
+ s_idx = cb->args[1];
+
+ rcu_read_lock();
+ cb->seq = net->dev_base_seq;
+
+ /* A hack to preserve kernel<->userspace interface.
+ * The correct header is ifinfomsg. It is consistent with rtnl_getlink.
+ * However, before Linux v3.9 the code here assumed rtgenmsg and that's
+ * what iproute2 < v3.9.0 used.
+ * We can detect the old iproute2. Even including the IFLA_EXT_MASK
+ * attribute, its netlink message is shorter than struct ifinfomsg.
+ */
+ hdrlen = nlmsg_len(cb->nlh) < sizeof(struct ifinfomsg) ?
+ sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
+
+ if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) {
+
+ if (tb[IFLA_EXT_MASK])
+ ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+ }
+
+ for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+ idx = 0;
+ head = &net->dev_index_head[h];
+ hlist_for_each_entry_rcu(dev, head, index_hlist) {
+ if (idx < s_idx)
+ goto cont;
+ err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, 0,
+ NLM_F_MULTI,
+ ext_filter_mask);
+ /* If we ran out of room on the first message,
+ * we're in trouble
+ */
+ WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
+
+ if (err <= 0)
+ goto out;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+ idx++;
+ }
+ }
+out:
+ rcu_read_unlock();
+ cb->args[1] = idx;
+ cb->args[0] = h;
+
+ return skb->len;
+}
+
+int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len)
+{
+ return nla_parse(tb, IFLA_MAX, head, len, ifla_policy);
+}
+EXPORT_SYMBOL(rtnl_nla_parse_ifla);
+
struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
{
struct net *net;
@@ -1316,11 +1387,29 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
}
case IFLA_VF_TX_RATE: {
struct ifla_vf_tx_rate *ivt;
+ struct ifla_vf_info ivf;
+ ivt = nla_data(vf);
+ err = -EOPNOTSUPP;
+ if (ops->ndo_get_vf_config)
+ err = ops->ndo_get_vf_config(dev, ivt->vf,
+ &ivf);
+ if (err)
+ break;
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_rate)
+ err = ops->ndo_set_vf_rate(dev, ivt->vf,
+ ivf.min_tx_rate,
+ ivt->rate);
+ break;
+ }
+ case IFLA_VF_RATE: {
+ struct ifla_vf_rate *ivt;
ivt = nla_data(vf);
err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_tx_rate)
- err = ops->ndo_set_vf_tx_rate(dev, ivt->vf,
- ivt->rate);
+ if (ops->ndo_set_vf_rate)
+ err = ops->ndo_set_vf_rate(dev, ivt->vf,
+ ivt->min_tx_rate,
+ ivt->max_tx_rate);
break;
}
case IFLA_VF_SPOOFCHK: {
@@ -1386,7 +1475,8 @@ static int do_set_master(struct net_device *dev, int ifindex)
return 0;
}
-static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
+static int do_setlink(const struct sk_buff *skb,
+ struct net_device *dev, struct ifinfomsg *ifm,
struct nlattr **tb, char *ifname, int modified)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -1398,7 +1488,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
err = PTR_ERR(net);
goto errout;
}
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+ if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
err = -EPERM;
goto errout;
}
@@ -1652,7 +1742,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
if (err < 0)
goto errout;
- err = do_setlink(dev, ifm, tb, ifname, 0);
+ err = do_setlink(skb, dev, ifm, tb, ifname, 0);
errout:
return err;
}
@@ -1692,7 +1782,6 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
ops->dellink(dev, &list_kill);
unregister_netdevice_many(&list_kill);
- list_del(&list_kill);
return 0;
}
@@ -1769,7 +1858,8 @@ err:
}
EXPORT_SYMBOL(rtnl_create_link);
-static int rtnl_group_changelink(struct net *net, int group,
+static int rtnl_group_changelink(const struct sk_buff *skb,
+ struct net *net, int group,
struct ifinfomsg *ifm,
struct nlattr **tb)
{
@@ -1778,7 +1868,7 @@ static int rtnl_group_changelink(struct net *net, int group,
for_each_netdev(net, dev) {
if (dev->group == group) {
- err = do_setlink(dev, ifm, tb, NULL, 0);
+ err = do_setlink(skb, dev, ifm, tb, NULL, 0);
if (err < 0)
return err;
}
@@ -1920,12 +2010,12 @@ replay:
modified = 1;
}
- return do_setlink(dev, ifm, tb, ifname, modified);
+ return do_setlink(skb, dev, ifm, tb, ifname, modified);
}
if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
- return rtnl_group_changelink(net,
+ return rtnl_group_changelink(skb, net,
nla_get_u32(tb[IFLA_GROUP]),
ifm, tb);
return -ENODEV;
@@ -1966,11 +2056,15 @@ replay:
if (ops->newlink) {
err = ops->newlink(net, dev, tb, data);
/* Drivers should call free_netdev() in ->destructor
- * and unregister it on failure so that device could be
- * finally freed in rtnl_unlock.
+ * and unregister it on failure after registration
+ * so that device could be finally freed in rtnl_unlock.
*/
- if (err < 0)
+ if (err < 0) {
+ /* If device is not registered at all, free it now */
+ if (dev->reg_state == NETREG_UNINITIALIZED)
+ free_netdev(dev);
goto out;
+ }
} else {
err = register_netdevice(dev);
if (err < 0) {
@@ -2042,9 +2136,13 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
struct nlattr *tb[IFLA_MAX+1];
u32 ext_filter_mask = 0;
u16 min_ifinfo_dump_size = 0;
+ int hdrlen;
+
+ /* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */
+ hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ?
+ sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
- if (nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
- ifla_policy) >= 0) {
+ if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) {
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
}
@@ -2312,7 +2410,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
int err = -EINVAL;
__u8 *addr;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
@@ -2764,7 +2862,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
sz_idx = type>>2;
kind = type&3;
- if (kind != 2 && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+ if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 897da56f3af..ba71212f025 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -85,31 +85,6 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#endif
#ifdef CONFIG_INET
-__u32 secure_ip_id(__be32 daddr)
-{
- u32 hash[MD5_DIGEST_WORDS];
-
- net_secret_init();
- hash[0] = (__force __u32) daddr;
- hash[1] = net_secret[13];
- hash[2] = net_secret[14];
- hash[3] = net_secret[15];
-
- md5_transform(hash, net_secret);
-
- return hash[0];
-}
-
-__u32 secure_ipv6_id(const __be32 daddr[4])
-{
- __u32 hash[4];
-
- net_secret_init();
- memcpy(hash, daddr, 16);
- md5_transform(hash, net_secret);
-
- return hash[0];
-}
__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
__be16 sport, __be16 dport)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 90b96a11b97..c1a33033cbe 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -689,12 +689,15 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->ooo_okay = old->ooo_okay;
new->no_fcs = old->no_fcs;
new->encapsulation = old->encapsulation;
+ new->encap_hdr_csum = old->encap_hdr_csum;
+ new->csum_valid = old->csum_valid;
+ new->csum_complete_sw = old->csum_complete_sw;
#ifdef CONFIG_XFRM
new->sp = secpath_get(old->sp);
#endif
memcpy(new->cb, old->cb, sizeof(old->cb));
new->csum = old->csum;
- new->local_df = old->local_df;
+ new->ignore_df = old->ignore_df;
new->pkt_type = old->pkt_type;
new->ip_summed = old->ip_summed;
skb_copy_queue_mapping(new, old);
@@ -951,10 +954,13 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
EXPORT_SYMBOL(skb_copy);
/**
- * __pskb_copy - create copy of an sk_buff with private head.
+ * __pskb_copy_fclone - create copy of an sk_buff with private head.
* @skb: buffer to copy
* @headroom: headroom of new skb
* @gfp_mask: allocation priority
+ * @fclone: if true allocate the copy of the skb from the fclone
+ * cache instead of the head cache; it is recommended to set this
+ * to true for the cases where the copy will likely be cloned
*
* Make a copy of both an &sk_buff and part of its data, located
* in header. Fragmented data remain shared. This is used when
@@ -964,11 +970,12 @@ EXPORT_SYMBOL(skb_copy);
* The returned buffer has a reference count of 1.
*/
-struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
+struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
+ gfp_t gfp_mask, bool fclone)
{
unsigned int size = skb_headlen(skb) + headroom;
- struct sk_buff *n = __alloc_skb(size, gfp_mask,
- skb_alloc_rx_flag(skb), NUMA_NO_NODE);
+ int flags = skb_alloc_rx_flag(skb) | (fclone ? SKB_ALLOC_FCLONE : 0);
+ struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE);
if (!n)
goto out;
@@ -1008,7 +1015,7 @@ struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
out:
return n;
}
-EXPORT_SYMBOL(__pskb_copy);
+EXPORT_SYMBOL(__pskb_copy_fclone);
/**
* pskb_expand_head - reallocate header of &sk_buff
@@ -2881,12 +2888,14 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
int pos;
int dummy;
+ __skb_push(head_skb, doffset);
proto = skb_network_protocol(head_skb, &dummy);
if (unlikely(!proto))
return ERR_PTR(-EINVAL);
- csum = !!can_checksum_protocol(features, proto);
- __skb_push(head_skb, doffset);
+ csum = !head_skb->encap_hdr_csum &&
+ !!can_checksum_protocol(features, proto);
+
headroom = skb_headroom(head_skb);
pos = skb_headlen(head_skb);
@@ -2983,6 +2992,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
skb_put(nskb, len),
len, 0);
+ SKB_GSO_CB(nskb)->csum_start =
+ skb_headroom(nskb) + doffset;
continue;
}
@@ -3052,6 +3063,8 @@ perform_csum_check:
nskb->csum = skb_checksum(nskb, doffset,
nskb->len - doffset, 0);
nskb->ip_summed = CHECKSUM_NONE;
+ SKB_GSO_CB(nskb)->csum_start =
+ skb_headroom(nskb) + doffset;
}
} while ((offset += len) < head_skb->len);
@@ -3076,7 +3089,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
if (unlikely(p->len + len >= 65536))
return -E2BIG;
- lp = NAPI_GRO_CB(p)->last ?: p;
+ lp = NAPI_GRO_CB(p)->last;
pinfo = skb_shinfo(lp);
if (headlen <= offset) {
@@ -3192,7 +3205,7 @@ merge:
__skb_pull(skb, offset);
- if (!NAPI_GRO_CB(p)->last)
+ if (NAPI_GRO_CB(p)->last == p)
skb_shinfo(p)->frag_list = skb;
else
NAPI_GRO_CB(p)->last->next = skb;
@@ -3300,6 +3313,32 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
return elt;
}
+/* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given
+ * sglist without mark the sg which contain last skb data as the end.
+ * So the caller can mannipulate sg list as will when padding new data after
+ * the first call without calling sg_unmark_end to expend sg list.
+ *
+ * Scenario to use skb_to_sgvec_nomark:
+ * 1. sg_init_table
+ * 2. skb_to_sgvec_nomark(payload1)
+ * 3. skb_to_sgvec_nomark(payload2)
+ *
+ * This is equivalent to:
+ * 1. sg_init_table
+ * 2. skb_to_sgvec(payload1)
+ * 3. sg_unmark_end
+ * 4. skb_to_sgvec(payload2)
+ *
+ * When mapping mutilple payload conditionally, skb_to_sgvec_nomark
+ * is more preferable.
+ */
+int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
+ int offset, int len)
+{
+ return __skb_to_sgvec(skb, sg, offset, len);
+}
+EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark);
+
int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
{
int nsg = __skb_to_sgvec(skb, sg, offset, len);
@@ -3432,8 +3471,6 @@ static void sock_rmem_free(struct sk_buff *skb)
*/
int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
{
- int len = skb->len;
-
if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
(unsigned int)sk->sk_rcvbuf)
return -ENOMEM;
@@ -3448,7 +3485,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
skb_queue_tail(&sk->sk_error_queue, skb);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk, len);
+ sk->sk_data_ready(sk);
return 0;
}
EXPORT_SYMBOL(sock_queue_err_skb);
@@ -3562,15 +3599,47 @@ static int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len,
return 0;
}
+#define MAX_TCP_HDR_LEN (15 * 4)
+
+static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb,
+ typeof(IPPROTO_IP) proto,
+ unsigned int off)
+{
+ switch (proto) {
+ int err;
+
+ case IPPROTO_TCP:
+ err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr),
+ off + MAX_TCP_HDR_LEN);
+ if (!err && !skb_partial_csum_set(skb, off,
+ offsetof(struct tcphdr,
+ check)))
+ err = -EPROTO;
+ return err ? ERR_PTR(err) : &tcp_hdr(skb)->check;
+
+ case IPPROTO_UDP:
+ err = skb_maybe_pull_tail(skb, off + sizeof(struct udphdr),
+ off + sizeof(struct udphdr));
+ if (!err && !skb_partial_csum_set(skb, off,
+ offsetof(struct udphdr,
+ check)))
+ err = -EPROTO;
+ return err ? ERR_PTR(err) : &udp_hdr(skb)->check;
+ }
+
+ return ERR_PTR(-EPROTO);
+}
+
/* This value should be large enough to cover a tagged ethernet header plus
* maximally sized IP and TCP or UDP headers.
*/
#define MAX_IP_HDR_LEN 128
-static int skb_checksum_setup_ip(struct sk_buff *skb, bool recalculate)
+static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate)
{
unsigned int off;
bool fragment;
+ __sum16 *csum;
int err;
fragment = false;
@@ -3591,51 +3660,15 @@ static int skb_checksum_setup_ip(struct sk_buff *skb, bool recalculate)
if (fragment)
goto out;
- switch (ip_hdr(skb)->protocol) {
- case IPPROTO_TCP:
- err = skb_maybe_pull_tail(skb,
- off + sizeof(struct tcphdr),
- MAX_IP_HDR_LEN);
- if (err < 0)
- goto out;
-
- if (!skb_partial_csum_set(skb, off,
- offsetof(struct tcphdr, check))) {
- err = -EPROTO;
- goto out;
- }
-
- if (recalculate)
- tcp_hdr(skb)->check =
- ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr,
- skb->len - off,
- IPPROTO_TCP, 0);
- break;
- case IPPROTO_UDP:
- err = skb_maybe_pull_tail(skb,
- off + sizeof(struct udphdr),
- MAX_IP_HDR_LEN);
- if (err < 0)
- goto out;
-
- if (!skb_partial_csum_set(skb, off,
- offsetof(struct udphdr, check))) {
- err = -EPROTO;
- goto out;
- }
-
- if (recalculate)
- udp_hdr(skb)->check =
- ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr,
- skb->len - off,
- IPPROTO_UDP, 0);
- break;
- default:
- goto out;
- }
+ csum = skb_checksum_setup_ip(skb, ip_hdr(skb)->protocol, off);
+ if (IS_ERR(csum))
+ return PTR_ERR(csum);
+ if (recalculate)
+ *csum = ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr,
+ skb->len - off,
+ ip_hdr(skb)->protocol, 0);
err = 0;
out:
@@ -3658,6 +3691,7 @@ static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate)
unsigned int len;
bool fragment;
bool done;
+ __sum16 *csum;
fragment = false;
done = false;
@@ -3735,51 +3769,14 @@ static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate)
if (!done || fragment)
goto out;
- switch (nexthdr) {
- case IPPROTO_TCP:
- err = skb_maybe_pull_tail(skb,
- off + sizeof(struct tcphdr),
- MAX_IPV6_HDR_LEN);
- if (err < 0)
- goto out;
-
- if (!skb_partial_csum_set(skb, off,
- offsetof(struct tcphdr, check))) {
- err = -EPROTO;
- goto out;
- }
-
- if (recalculate)
- tcp_hdr(skb)->check =
- ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr,
- skb->len - off,
- IPPROTO_TCP, 0);
- break;
- case IPPROTO_UDP:
- err = skb_maybe_pull_tail(skb,
- off + sizeof(struct udphdr),
- MAX_IPV6_HDR_LEN);
- if (err < 0)
- goto out;
-
- if (!skb_partial_csum_set(skb, off,
- offsetof(struct udphdr, check))) {
- err = -EPROTO;
- goto out;
- }
-
- if (recalculate)
- udp_hdr(skb)->check =
- ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr,
- skb->len - off,
- IPPROTO_UDP, 0);
- break;
- default:
- goto out;
- }
+ csum = skb_checksum_setup_ip(skb, nexthdr, off);
+ if (IS_ERR(csum))
+ return PTR_ERR(csum);
+ if (recalculate)
+ *csum = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ skb->len - off, nexthdr, 0);
err = 0;
out:
@@ -3797,7 +3794,7 @@ int skb_checksum_setup(struct sk_buff *skb, bool recalculate)
switch (skb->protocol) {
case htons(ETH_P_IP):
- err = skb_checksum_setup_ip(skb, recalculate);
+ err = skb_checksum_setup_ipv4(skb, recalculate);
break;
case htons(ETH_P_IPV6):
@@ -3929,7 +3926,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
skb->tstamp.tv64 = 0;
skb->pkt_type = PACKET_HOST;
skb->skb_iif = 0;
- skb->local_df = 0;
+ skb->ignore_df = 0;
skb_dst_drop(skb);
skb->mark = 0;
secpath_reset(skb);
@@ -3951,12 +3948,14 @@ EXPORT_SYMBOL_GPL(skb_scrub_packet);
unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
{
const struct skb_shared_info *shinfo = skb_shinfo(skb);
- unsigned int hdr_len;
if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
- hdr_len = tcp_hdrlen(skb);
- else
- hdr_len = sizeof(struct udphdr);
- return hdr_len + shinfo->gso_size;
+ return tcp_hdrlen(skb) + shinfo->gso_size;
+
+ /* UFO sets gso_size to the size of the fragmentation
+ * payload, i.e. the size of the L4 (UDP) header is already
+ * accounted for.
+ */
+ return shinfo->gso_size;
}
EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
diff --git a/net/core/sock.c b/net/core/sock.c
index c0fc6bdad1e..026e01f7027 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -145,6 +145,55 @@
static DEFINE_MUTEX(proto_list_mutex);
static LIST_HEAD(proto_list);
+/**
+ * sk_ns_capable - General socket capability test
+ * @sk: Socket to use a capability on or through
+ * @user_ns: The user namespace of the capability to use
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket had when the socket was
+ * created and the current process has the capability @cap in the user
+ * namespace @user_ns.
+ */
+bool sk_ns_capable(const struct sock *sk,
+ struct user_namespace *user_ns, int cap)
+{
+ return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
+ ns_capable(user_ns, cap);
+}
+EXPORT_SYMBOL(sk_ns_capable);
+
+/**
+ * sk_capable - Socket global capability test
+ * @sk: Socket to use a capability on or through
+ * @cap: The global capbility to use
+ *
+ * Test to see if the opener of the socket had when the socket was
+ * created and the current process has the capability @cap in all user
+ * namespaces.
+ */
+bool sk_capable(const struct sock *sk, int cap)
+{
+ return sk_ns_capable(sk, &init_user_ns, cap);
+}
+EXPORT_SYMBOL(sk_capable);
+
+/**
+ * sk_net_capable - Network namespace socket capability test
+ * @sk: Socket to use a capability on or through
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket had when the socke was created
+ * and the current process has the capability @cap over the network namespace
+ * the socket is a member of.
+ */
+bool sk_net_capable(const struct sock *sk, int cap)
+{
+ return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
+}
+EXPORT_SYMBOL(sk_net_capable);
+
+
#ifdef CONFIG_MEMCG_KMEM
int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
{
@@ -428,7 +477,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
spin_unlock_irqrestore(&list->lock, flags);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk, skb_len);
+ sk->sk_data_ready(sk);
return 0;
}
EXPORT_SYMBOL(sock_queue_rcv_skb);
@@ -735,7 +784,7 @@ set_rcvbuf:
break;
case SO_NO_CHECK:
- sk->sk_no_check = valbool;
+ sk->sk_no_check_tx = valbool;
break;
case SO_PRIORITY:
@@ -1015,7 +1064,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
case SO_NO_CHECK:
- v.val = sk->sk_no_check;
+ v.val = sk->sk_no_check_tx;
break;
case SO_PRIORITY:
@@ -2196,7 +2245,7 @@ static void sock_def_error_report(struct sock *sk)
rcu_read_unlock();
}
-static void sock_def_readable(struct sock *sk, int len)
+static void sock_def_readable(struct sock *sk)
{
struct socket_wq *wq;
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index a0e9cf6379d..a4216a4c957 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -49,38 +49,35 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
}
EXPORT_SYMBOL_GPL(sock_diag_put_meminfo);
-int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk,
+int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
struct sk_buff *skb, int attrtype)
{
- struct nlattr *attr;
+ struct sock_fprog_kern *fprog;
struct sk_filter *filter;
- unsigned int len;
+ struct nlattr *attr;
+ unsigned int flen;
int err = 0;
- if (!ns_capable(user_ns, CAP_NET_ADMIN)) {
+ if (!may_report_filterinfo) {
nla_reserve(skb, attrtype, 0);
return 0;
}
rcu_read_lock();
-
filter = rcu_dereference(sk->sk_filter);
- len = filter ? filter->len * sizeof(struct sock_filter) : 0;
+ if (!filter)
+ goto out;
- attr = nla_reserve(skb, attrtype, len);
+ fprog = filter->orig_prog;
+ flen = sk_filter_proglen(fprog);
+
+ attr = nla_reserve(skb, attrtype, flen);
if (attr == NULL) {
err = -EMSGSIZE;
goto out;
}
- if (filter) {
- struct sock_filter *fb = (struct sock_filter *)nla_data(attr);
- int i;
-
- for (i = 0; i < filter->len; i++, fb++)
- sk_decode_filter(&filter->insns[i], fb);
- }
-
+ memcpy(nla_data(attr), fprog->filter, flen);
out:
rcu_read_unlock();
return err;
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 661b5a40ec1..6521dfd8b7c 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -23,16 +23,11 @@
#include <linux/skbuff.h>
#include <linux/export.h>
-static struct sock_filter ptp_filter[] = {
- PTP_FILTER
-};
-
static unsigned int classify(const struct sk_buff *skb)
{
- if (likely(skb->dev &&
- skb->dev->phydev &&
+ if (likely(skb->dev && skb->dev->phydev &&
skb->dev->phydev->drv))
- return sk_run_filter(skb, ptp_filter);
+ return ptp_classify_raw(skb);
else
return PTP_CLASS_NONE;
}
@@ -60,11 +55,13 @@ void skb_clone_tx_timestamp(struct sk_buff *skb)
if (likely(phydev->drv->txtstamp)) {
if (!atomic_inc_not_zero(&sk->sk_refcnt))
return;
+
clone = skb_clone(skb, GFP_ATOMIC);
if (!clone) {
sock_put(sk);
return;
}
+
clone->sk = sk;
phydev->drv->txtstamp(phydev, clone, type);
}
@@ -89,12 +86,15 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
}
*skb_hwtstamps(skb) = *hwtstamps;
+
serr = SKB_EXT_ERR(skb);
memset(serr, 0, sizeof(*serr));
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
skb->sk = NULL;
+
err = sock_queue_err_skb(sk, skb);
+
sock_put(sk);
if (err)
kfree_skb(skb);
@@ -132,8 +132,3 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
return false;
}
EXPORT_SYMBOL_GPL(skb_defer_rx_timestamp);
-
-void __init skb_timestamping_init(void)
-{
- BUG_ON(sk_chk_filter(ptp_filter, ARRAY_SIZE(ptp_filter)));
-}
diff --git a/net/core/tso.c b/net/core/tso.c
new file mode 100644
index 00000000000..8c3203c585b
--- /dev/null
+++ b/net/core/tso.c
@@ -0,0 +1,77 @@
+#include <linux/export.h>
+#include <net/ip.h>
+#include <net/tso.h>
+
+/* Calculate expected number of TX descriptors */
+int tso_count_descs(struct sk_buff *skb)
+{
+ /* The Marvell Way */
+ return skb_shinfo(skb)->gso_segs * 2 + skb_shinfo(skb)->nr_frags;
+}
+EXPORT_SYMBOL(tso_count_descs);
+
+void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
+ int size, bool is_last)
+{
+ struct iphdr *iph;
+ struct tcphdr *tcph;
+ int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ int mac_hdr_len = skb_network_offset(skb);
+
+ memcpy(hdr, skb->data, hdr_len);
+ iph = (struct iphdr *)(hdr + mac_hdr_len);
+ iph->id = htons(tso->ip_id);
+ iph->tot_len = htons(size + hdr_len - mac_hdr_len);
+ tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb));
+ tcph->seq = htonl(tso->tcp_seq);
+ tso->ip_id++;
+
+ if (!is_last) {
+ /* Clear all special flags for not last packet */
+ tcph->psh = 0;
+ tcph->fin = 0;
+ tcph->rst = 0;
+ }
+}
+EXPORT_SYMBOL(tso_build_hdr);
+
+void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size)
+{
+ tso->tcp_seq += size;
+ tso->size -= size;
+ tso->data += size;
+
+ if ((tso->size == 0) &&
+ (tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
+
+ /* Move to next segment */
+ tso->size = frag->size;
+ tso->data = page_address(frag->page.p) + frag->page_offset;
+ tso->next_frag_idx++;
+ }
+}
+EXPORT_SYMBOL(tso_build_data);
+
+void tso_start(struct sk_buff *skb, struct tso_t *tso)
+{
+ int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+
+ tso->ip_id = ntohs(ip_hdr(skb)->id);
+ tso->tcp_seq = ntohl(tcp_hdr(skb)->seq);
+ tso->next_frag_idx = 0;
+
+ /* Build first data */
+ tso->size = skb_headlen(skb) - hdr_len;
+ tso->data = skb->data + hdr_len;
+ if ((tso->size == 0) &&
+ (tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
+
+ /* Move to next segment */
+ tso->size = frag->size;
+ tso->data = page_address(frag->page.p) + frag->page_offset;
+ tso->next_frag_idx++;
+ }
+}
+EXPORT_SYMBOL(tso_start);
diff --git a/net/core/utils.c b/net/core/utils.c
index 2f737bf90b3..eed34338736 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -348,8 +348,8 @@ static void __net_random_once_deferred(struct work_struct *w)
{
struct __net_random_once_work *work =
container_of(w, struct __net_random_once_work, work);
- if (!static_key_enabled(work->key))
- static_key_slow_inc(work->key);
+ BUG_ON(!static_key_enabled(work->key));
+ static_key_slow_dec(work->key);
kfree(work);
}
@@ -367,7 +367,7 @@ static void __net_random_once_disable_jump(struct static_key *key)
}
bool __net_get_random_once(void *buf, int nbytes, bool *done,
- struct static_key *done_key)
+ struct static_key *once_key)
{
static DEFINE_SPINLOCK(lock);
unsigned long flags;
@@ -382,7 +382,7 @@ bool __net_get_random_once(void *buf, int nbytes, bool *done,
*done = true;
spin_unlock_irqrestore(&lock, flags);
- __net_random_once_disable_jump(done_key);
+ __net_random_once_disable_jump(once_key);
return true;
}
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 55364440267..f8b98d89c28 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1669,7 +1669,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
struct nlmsghdr *reply_nlh = NULL;
const struct reply_func *fn;
- if ((nlh->nlmsg_type == RTM_SETDCB) && !capable(CAP_NET_ADMIN))
+ if ((nlh->nlmsg_type == RTM_SETDCB) && !netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX,
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 14cdafad7a9..3c8ec7d4a34 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -28,7 +28,7 @@ static void dccp_enqueue_skb(struct sock *sk, struct sk_buff *skb)
__skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4);
__skb_queue_tail(&sk->sk_receive_queue, skb);
skb_set_owner_r(skb, sk);
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk);
}
static void dccp_fin(struct sock *sk, struct sk_buff *skb)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 22b5d818b20..6ca645c4b48 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -1024,7 +1024,6 @@ static struct inet_protosw dccp_v4_protosw = {
.protocol = IPPROTO_DCCP,
.prot = &dccp_v4_prot,
.ops = &inet_dccp_ops,
- .no_check = 0,
.flags = INET_PROTOSW_ICSK,
};
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 9e2f78bc155..c69eb9c4fbb 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -237,7 +237,7 @@ int dccp_child_process(struct sock *parent, struct sock *child,
/* Wakeup parent, send SIGIO */
if (state == DCCP_RESPOND && child->sk_state != state)
- parent->sk_data_ready(parent, 0);
+ parent->sk_data_ready(parent);
} else {
/* Alas, it is possible again, because we do lookup
* in main socket hash table and lock on listening
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 8876078859d..0248e8a3460 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -138,7 +138,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
- err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
+ err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
return net_xmit_eval(err);
}
return -ENOBUFS;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index eb892b4f481..de2c1e71930 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1084,14 +1084,15 @@ EXPORT_SYMBOL_GPL(dccp_shutdown);
static inline int dccp_mib_init(void)
{
- return snmp_mib_init((void __percpu **)dccp_statistics,
- sizeof(struct dccp_mib),
- __alignof__(struct dccp_mib));
+ dccp_statistics = alloc_percpu(struct dccp_mib);
+ if (!dccp_statistics)
+ return -ENOMEM;
+ return 0;
}
static inline void dccp_mib_exit(void)
{
- snmp_mib_free((void __percpu **)dccp_statistics);
+ free_percpu(dccp_statistics);
}
static int thash_entries;
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 607ab71b5a0..53731e45403 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -20,6 +20,7 @@
/* Boundary values */
static int zero = 0,
+ one = 1,
u8_max = 0xFF;
static unsigned long seqw_min = DCCPF_SEQ_WMIN,
seqw_max = 0xFFFFFFFF; /* maximum on 32 bit */
@@ -58,7 +59,7 @@ static struct ctl_table dccp_default_table[] = {
.maxlen = sizeof(sysctl_dccp_request_retries),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = &one,
.extra2 = &u8_max,
},
{
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 16f0b223102..1cd46a345cb 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -280,7 +280,7 @@ static ktime_t dccp_timestamp_seed;
*/
u32 dccp_timestamp(void)
{
- s64 delta = ktime_us_delta(ktime_get_real(), dccp_timestamp_seed);
+ u64 delta = (u64)ktime_us_delta(ktime_get_real(), dccp_timestamp_seed);
do_div(delta, 10);
return delta;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 4c04848953b..ae011b46c07 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -481,7 +481,7 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf
sk->sk_backlog_rcv = dn_nsp_backlog_rcv;
sk->sk_destruct = dn_destruct;
- sk->sk_no_check = 1;
+ sk->sk_no_check_tx = 1;
sk->sk_family = PF_DECnet;
sk->sk_protocol = 0;
sk->sk_allocation = gfp;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index a603823a3e2..3b726f31c64 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -574,7 +574,7 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
struct dn_ifaddr __rcu **ifap;
int err = -EINVAL;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if (!net_eq(net, &init_net))
@@ -618,7 +618,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
struct dn_ifaddr *ifa;
int err;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if (!net_eq(net, &init_net))
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 57dc159245e..d332aefb084 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -505,7 +505,7 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
struct nlattr *attrs[RTA_MAX+1];
int err;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if (!net_eq(net, &init_net))
@@ -530,7 +530,7 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
struct nlattr *attrs[RTA_MAX+1];
int err;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if (!net_eq(net, &init_net))
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index c344163e6ac..fe5f01485d3 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -585,7 +585,6 @@ out:
static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig, struct sk_buff_head *queue)
{
int err;
- int skb_len;
/* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
number of warnings when compiling with -W --ANK
@@ -600,12 +599,11 @@ static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig
if (err)
goto out;
- skb_len = skb->len;
skb_set_owner_r(skb, sk);
skb_queue_tail(queue, skb);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk, skb_len);
+ sk->sk_data_ready(sk);
out:
return err;
}
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index ce0cbbfe0f4..daccc4a36d8 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -752,7 +752,7 @@ static int dn_to_neigh_output(struct sk_buff *skb)
return n->output(n, skb);
}
-static int dn_output(struct sk_buff *skb)
+static int dn_output(struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct dn_route *rt = (struct dn_route *)dst;
@@ -838,6 +838,18 @@ drop:
* Used to catch bugs. This should never normally get
* called.
*/
+static int dn_rt_bug_sk(struct sock *sk, struct sk_buff *skb)
+{
+ struct dn_skb_cb *cb = DN_SKB_CB(skb);
+
+ net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n",
+ le16_to_cpu(cb->src), le16_to_cpu(cb->dst));
+
+ kfree_skb(skb);
+
+ return NET_RX_DROP;
+}
+
static int dn_rt_bug(struct sk_buff *skb)
{
struct dn_skb_cb *cb = DN_SKB_CB(skb);
@@ -1463,7 +1475,7 @@ make_route:
rt->n = neigh;
rt->dst.lastuse = jiffies;
- rt->dst.output = dn_rt_bug;
+ rt->dst.output = dn_rt_bug_sk;
switch (res.type) {
case RTN_UNICAST:
rt->dst.input = dn_forward;
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index e83015cecfa..e4d9560a910 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -107,7 +107,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
return;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
RCV_SKB_FAIL(-EPERM);
/* Eventually we might send routing messages too */
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index e7b6d53eef8..dd8696a3dbe 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -93,8 +93,8 @@ int dns_query(const char *type, const char *name, size_t namelen,
}
if (!namelen)
- namelen = strlen(name);
- if (namelen < 3)
+ namelen = strnlen(name, 256);
+ if (namelen < 3 || namelen > 255)
return -EINVAL;
desclen += namelen + 1;
@@ -149,7 +149,9 @@ int dns_query(const char *type, const char *name, size_t namelen,
if (!*_result)
goto put;
- memcpy(*_result, upayload->data, len + 1);
+ memcpy(*_result, upayload->data, len);
+ (*_result)[len] = '\0';
+
if (_expiry)
*_expiry = rkey->expiry;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 0eb5d5e76df..5db37cef50a 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -406,8 +406,9 @@ static int dsa_of_probe(struct platform_device *pdev)
goto out_free;
}
- chip_index = 0;
+ chip_index = -1;
for_each_available_child_of_node(np, child) {
+ chip_index++;
cd = &pd->chip[chip_index];
cd->mii_bus = &mdio_bus->dev;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 02c0e1716f6..64c5af0a10d 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -346,7 +346,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
return slave_dev;
slave_dev->features = master->vlan_features;
- SET_ETHTOOL_OPS(slave_dev, &dsa_slave_ethtool_ops);
+ slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
eth_hw_addr_inherit(slave_dev, master);
slave_dev->tx_queue_len = 0;
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index cac505f166d..e5302b7f7ca 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -209,7 +209,7 @@ static int slave_xmit(struct sk_buff *skb, struct hsr_priv *hsr_priv,
/* Address substitution (IEC62439-3 pp 26, 50): replace mac
* address of outgoing frame with that of the outgoing slave's.
*/
- memcpy(hsr_ethhdr->ethhdr.h_source, skb->dev->dev_addr, ETH_ALEN);
+ ether_addr_copy(hsr_ethhdr->ethhdr.h_source, skb->dev->dev_addr);
return dev_queue_xmit(skb);
}
@@ -346,7 +346,7 @@ static void send_hsr_supervision_frame(struct net_device *hsr_dev, u8 type)
/* Payload: MacAddressA */
hsr_sp = (typeof(hsr_sp)) skb_put(skb, sizeof(*hsr_sp));
- memcpy(hsr_sp->MacAddressA, hsr_dev->dev_addr, ETH_ALEN);
+ ether_addr_copy(hsr_sp->MacAddressA, hsr_dev->dev_addr);
dev_queue_xmit(skb);
return;
@@ -493,7 +493,7 @@ static int check_slave_ok(struct net_device *dev)
/* Default multicast address for HSR Supervision frames */
-static const unsigned char def_multicast_addr[ETH_ALEN] = {
+static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = {
0x01, 0x15, 0x4e, 0x00, 0x01, 0x00
};
@@ -519,7 +519,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
hsr_priv->announce_timer.function = hsr_announce;
hsr_priv->announce_timer.data = (unsigned long) hsr_priv;
- memcpy(hsr_priv->sup_multicast_addr, def_multicast_addr, ETH_ALEN);
+ ether_addr_copy(hsr_priv->sup_multicast_addr, def_multicast_addr);
hsr_priv->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec;
/* FIXME: should I modify the value of these?
@@ -547,7 +547,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
hsr_dev->features |= NETIF_F_VLAN_CHALLENGED;
/* Set hsr_dev's MAC address to that of mac_slave1 */
- memcpy(hsr_dev->dev_addr, hsr_priv->slave[0]->dev_addr, ETH_ALEN);
+ ether_addr_copy(hsr_dev->dev_addr, hsr_priv->slave[0]->dev_addr);
/* Set required header length */
for (i = 0; i < HSR_MAX_SLAVE; i++) {
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 7ae0d7f6dbd..83e58449366 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -108,8 +108,8 @@ int hsr_create_self_node(struct list_head *self_node_db,
if (!node)
return -ENOMEM;
- memcpy(node->MacAddressA, addr_a, ETH_ALEN);
- memcpy(node->MacAddressB, addr_b, ETH_ALEN);
+ ether_addr_copy(node->MacAddressA, addr_a);
+ ether_addr_copy(node->MacAddressB, addr_b);
rcu_read_lock();
oldnode = list_first_or_null_rcu(self_node_db,
@@ -199,7 +199,7 @@ struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv,
/* Node is known, but frame was received from an unknown
* address. Node is PICS_SUBS capable; merge its AddrB.
*/
- memcpy(node->MacAddressB, hsr_ethsup->ethhdr.h_source, ETH_ALEN);
+ ether_addr_copy(node->MacAddressB, hsr_ethsup->ethhdr.h_source);
node->AddrB_if = dev_idx;
return node;
}
@@ -208,8 +208,8 @@ struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv,
if (!node)
return NULL;
- memcpy(node->MacAddressA, hsr_sp->MacAddressA, ETH_ALEN);
- memcpy(node->MacAddressB, hsr_ethsup->ethhdr.h_source, ETH_ALEN);
+ ether_addr_copy(node->MacAddressA, hsr_sp->MacAddressA);
+ ether_addr_copy(node->MacAddressB, hsr_ethsup->ethhdr.h_source);
if (!ether_addr_equal(hsr_sp->MacAddressA, hsr_ethsup->ethhdr.h_source))
node->AddrB_if = dev_idx;
else
@@ -250,7 +250,7 @@ void hsr_addr_subst_source(struct hsr_priv *hsr_priv, struct sk_buff *skb)
rcu_read_lock();
node = find_node_by_AddrB(&hsr_priv->node_db, ethhdr->h_source);
if (node)
- memcpy(ethhdr->h_source, node->MacAddressA, ETH_ALEN);
+ ether_addr_copy(ethhdr->h_source, node->MacAddressA);
rcu_read_unlock();
}
@@ -272,7 +272,7 @@ void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr,
rcu_read_lock();
node = find_node_by_AddrA(&hsr_priv->node_db, ethhdr->h_dest);
if (node && (node->AddrB_if == dev_idx))
- memcpy(ethhdr->h_dest, node->MacAddressB, ETH_ALEN);
+ ether_addr_copy(ethhdr->h_dest, node->MacAddressB);
rcu_read_unlock();
}
@@ -428,13 +428,13 @@ void *hsr_get_next_node(struct hsr_priv *hsr_priv, void *_pos,
node = list_first_or_null_rcu(&hsr_priv->node_db,
struct node_entry, mac_list);
if (node)
- memcpy(addr, node->MacAddressA, ETH_ALEN);
+ ether_addr_copy(addr, node->MacAddressA);
return node;
}
node = _pos;
list_for_each_entry_continue_rcu(node, &hsr_priv->node_db, mac_list) {
- memcpy(addr, node->MacAddressA, ETH_ALEN);
+ ether_addr_copy(addr, node->MacAddressA);
return node;
}
@@ -462,7 +462,7 @@ int hsr_get_node_data(struct hsr_priv *hsr_priv,
return -ENOENT; /* No such entry */
}
- memcpy(addr_b, node->MacAddressB, ETH_ALEN);
+ ether_addr_copy(addr_b, node->MacAddressB);
tdiff = jiffies - node->time_in[HSR_DEV_SLAVE_A];
if (node->time_in_stale[HSR_DEV_SLAVE_A])
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index af68dd83a4e..3fee5218a69 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -138,8 +138,8 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
break;
if (dev == hsr_priv->slave[0])
- memcpy(hsr_priv->dev->dev_addr,
- hsr_priv->slave[0]->dev_addr, ETH_ALEN);
+ ether_addr_copy(hsr_priv->dev->dev_addr,
+ hsr_priv->slave[0]->dev_addr);
/* Make sure we recognize frames from ourselves in hsr_rcv() */
res = hsr_create_self_node(&hsr_priv->self_node_db,
@@ -459,7 +459,7 @@ static int __init hsr_init(void)
static void __exit hsr_exit(void)
{
unregister_netdevice_notifier(&hsr_nb);
- del_timer(&prune_timer);
+ del_timer_sync(&prune_timer);
hsr_netlink_exit();
dev_remove_pack(&hsr_pt);
}
diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h
deleted file mode 100644
index 2b835db3bda..00000000000
--- a/net/ieee802154/6lowpan.h
+++ /dev/null
@@ -1,319 +0,0 @@
-/*
- * Copyright 2011, Siemens AG
- * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
- */
-
-/*
- * Based on patches from Jon Smirl <jonsmirl@gmail.com>
- * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-/* Jon's code is based on 6lowpan implementation for Contiki which is:
- * Copyright (c) 2008, Swedish Institute of Computer Science.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the Institute nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef __6LOWPAN_H__
-#define __6LOWPAN_H__
-
-#define UIP_802154_SHORTADDR_LEN 2 /* compressed ipv6 address length */
-#define UIP_IPH_LEN 40 /* ipv6 fixed header size */
-#define UIP_PROTO_UDP 17 /* ipv6 next header value for UDP */
-#define UIP_FRAGH_LEN 8 /* ipv6 fragment header size */
-
-/*
- * ipv6 address based on mac
- * second bit-flip (Universe/Local) is done according RFC2464
- */
-#define is_addr_mac_addr_based(a, m) \
- ((((a)->s6_addr[8]) == (((m)[0]) ^ 0x02)) && \
- (((a)->s6_addr[9]) == (m)[1]) && \
- (((a)->s6_addr[10]) == (m)[2]) && \
- (((a)->s6_addr[11]) == (m)[3]) && \
- (((a)->s6_addr[12]) == (m)[4]) && \
- (((a)->s6_addr[13]) == (m)[5]) && \
- (((a)->s6_addr[14]) == (m)[6]) && \
- (((a)->s6_addr[15]) == (m)[7]))
-
-/* ipv6 address is unspecified */
-#define is_addr_unspecified(a) \
- ((((a)->s6_addr32[0]) == 0) && \
- (((a)->s6_addr32[1]) == 0) && \
- (((a)->s6_addr32[2]) == 0) && \
- (((a)->s6_addr32[3]) == 0))
-
-/* compare ipv6 addresses prefixes */
-#define ipaddr_prefixcmp(addr1, addr2, length) \
- (memcmp(addr1, addr2, length >> 3) == 0)
-
-/* local link, i.e. FE80::/10 */
-#define is_addr_link_local(a) (((a)->s6_addr16[0]) == htons(0xFE80))
-
-/*
- * check whether we can compress the IID to 16 bits,
- * it's possible for unicast adresses with first 49 bits are zero only.
- */
-#define lowpan_is_iid_16_bit_compressable(a) \
- ((((a)->s6_addr16[4]) == 0) && \
- (((a)->s6_addr[10]) == 0) && \
- (((a)->s6_addr[11]) == 0xff) && \
- (((a)->s6_addr[12]) == 0xfe) && \
- (((a)->s6_addr[13]) == 0))
-
-/* multicast address */
-#define is_addr_mcast(a) (((a)->s6_addr[0]) == 0xFF)
-
-/* check whether the 112-bit gid of the multicast address is mappable to: */
-
-/* 9 bits, for FF02::1 (all nodes) and FF02::2 (all routers) addresses only. */
-#define lowpan_is_mcast_addr_compressable(a) \
- ((((a)->s6_addr16[1]) == 0) && \
- (((a)->s6_addr16[2]) == 0) && \
- (((a)->s6_addr16[3]) == 0) && \
- (((a)->s6_addr16[4]) == 0) && \
- (((a)->s6_addr16[5]) == 0) && \
- (((a)->s6_addr16[6]) == 0) && \
- (((a)->s6_addr[14]) == 0) && \
- ((((a)->s6_addr[15]) == 1) || (((a)->s6_addr[15]) == 2)))
-
-/* 48 bits, FFXX::00XX:XXXX:XXXX */
-#define lowpan_is_mcast_addr_compressable48(a) \
- ((((a)->s6_addr16[1]) == 0) && \
- (((a)->s6_addr16[2]) == 0) && \
- (((a)->s6_addr16[3]) == 0) && \
- (((a)->s6_addr16[4]) == 0) && \
- (((a)->s6_addr[10]) == 0))
-
-/* 32 bits, FFXX::00XX:XXXX */
-#define lowpan_is_mcast_addr_compressable32(a) \
- ((((a)->s6_addr16[1]) == 0) && \
- (((a)->s6_addr16[2]) == 0) && \
- (((a)->s6_addr16[3]) == 0) && \
- (((a)->s6_addr16[4]) == 0) && \
- (((a)->s6_addr16[5]) == 0) && \
- (((a)->s6_addr[12]) == 0))
-
-/* 8 bits, FF02::00XX */
-#define lowpan_is_mcast_addr_compressable8(a) \
- ((((a)->s6_addr[1]) == 2) && \
- (((a)->s6_addr16[1]) == 0) && \
- (((a)->s6_addr16[2]) == 0) && \
- (((a)->s6_addr16[3]) == 0) && \
- (((a)->s6_addr16[4]) == 0) && \
- (((a)->s6_addr16[5]) == 0) && \
- (((a)->s6_addr16[6]) == 0) && \
- (((a)->s6_addr[14]) == 0))
-
-#define lowpan_is_addr_broadcast(a) \
- ((((a)[0]) == 0xFF) && \
- (((a)[1]) == 0xFF) && \
- (((a)[2]) == 0xFF) && \
- (((a)[3]) == 0xFF) && \
- (((a)[4]) == 0xFF) && \
- (((a)[5]) == 0xFF) && \
- (((a)[6]) == 0xFF) && \
- (((a)[7]) == 0xFF))
-
-#define LOWPAN_DISPATCH_IPV6 0x41 /* 01000001 = 65 */
-#define LOWPAN_DISPATCH_HC1 0x42 /* 01000010 = 66 */
-#define LOWPAN_DISPATCH_IPHC 0x60 /* 011xxxxx = ... */
-#define LOWPAN_DISPATCH_FRAG1 0xc0 /* 11000xxx */
-#define LOWPAN_DISPATCH_FRAGN 0xe0 /* 11100xxx */
-
-#define LOWPAN_DISPATCH_MASK 0xf8 /* 11111000 */
-
-#define LOWPAN_FRAG_TIMEOUT (HZ * 60) /* time-out 60 sec */
-
-#define LOWPAN_FRAG1_HEAD_SIZE 0x4
-#define LOWPAN_FRAGN_HEAD_SIZE 0x5
-
-/*
- * According IEEE802.15.4 standard:
- * - MTU is 127 octets
- * - maximum MHR size is 37 octets
- * - MFR size is 2 octets
- *
- * so minimal payload size that we may guarantee is:
- * MTU - MHR - MFR = 88 octets
- */
-#define LOWPAN_FRAG_SIZE 88
-
-/*
- * Values of fields within the IPHC encoding first byte
- * (C stands for compressed and I for inline)
- */
-#define LOWPAN_IPHC_TF 0x18
-
-#define LOWPAN_IPHC_FL_C 0x10
-#define LOWPAN_IPHC_TC_C 0x08
-#define LOWPAN_IPHC_NH_C 0x04
-#define LOWPAN_IPHC_TTL_1 0x01
-#define LOWPAN_IPHC_TTL_64 0x02
-#define LOWPAN_IPHC_TTL_255 0x03
-#define LOWPAN_IPHC_TTL_I 0x00
-
-
-/* Values of fields within the IPHC encoding second byte */
-#define LOWPAN_IPHC_CID 0x80
-
-#define LOWPAN_IPHC_ADDR_00 0x00
-#define LOWPAN_IPHC_ADDR_01 0x01
-#define LOWPAN_IPHC_ADDR_02 0x02
-#define LOWPAN_IPHC_ADDR_03 0x03
-
-#define LOWPAN_IPHC_SAC 0x40
-#define LOWPAN_IPHC_SAM 0x30
-
-#define LOWPAN_IPHC_SAM_BIT 4
-
-#define LOWPAN_IPHC_M 0x08
-#define LOWPAN_IPHC_DAC 0x04
-#define LOWPAN_IPHC_DAM_00 0x00
-#define LOWPAN_IPHC_DAM_01 0x01
-#define LOWPAN_IPHC_DAM_10 0x02
-#define LOWPAN_IPHC_DAM_11 0x03
-
-#define LOWPAN_IPHC_DAM_BIT 0
-/*
- * LOWPAN_UDP encoding (works together with IPHC)
- */
-#define LOWPAN_NHC_UDP_MASK 0xF8
-#define LOWPAN_NHC_UDP_ID 0xF0
-#define LOWPAN_NHC_UDP_CHECKSUMC 0x04
-#define LOWPAN_NHC_UDP_CHECKSUMI 0x00
-
-#define LOWPAN_NHC_UDP_4BIT_PORT 0xF0B0
-#define LOWPAN_NHC_UDP_4BIT_MASK 0xFFF0
-#define LOWPAN_NHC_UDP_8BIT_PORT 0xF000
-#define LOWPAN_NHC_UDP_8BIT_MASK 0xFF00
-
-/* values for port compression, _with checksum_ ie bit 5 set to 0 */
-#define LOWPAN_NHC_UDP_CS_P_00 0xF0 /* all inline */
-#define LOWPAN_NHC_UDP_CS_P_01 0xF1 /* source 16bit inline,
- dest = 0xF0 + 8 bit inline */
-#define LOWPAN_NHC_UDP_CS_P_10 0xF2 /* source = 0xF0 + 8bit inline,
- dest = 16 bit inline */
-#define LOWPAN_NHC_UDP_CS_P_11 0xF3 /* source & dest = 0xF0B + 4bit inline */
-#define LOWPAN_NHC_UDP_CS_C 0x04 /* checksum elided */
-
-#ifdef DEBUG
-/* print data in line */
-static inline void raw_dump_inline(const char *caller, char *msg,
- unsigned char *buf, int len)
-{
- if (msg)
- pr_debug("%s():%s: ", caller, msg);
-
- print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, buf, len, false);
-}
-
-/* print data in a table format:
- *
- * addr: xx xx xx xx xx xx
- * addr: xx xx xx xx xx xx
- * ...
- */
-static inline void raw_dump_table(const char *caller, char *msg,
- unsigned char *buf, int len)
-{
- if (msg)
- pr_debug("%s():%s:\n", caller, msg);
-
- print_hex_dump_debug("\t", DUMP_PREFIX_OFFSET, 16, 1, buf, len, false);
-}
-#else
-static inline void raw_dump_table(const char *caller, char *msg,
- unsigned char *buf, int len) { }
-static inline void raw_dump_inline(const char *caller, char *msg,
- unsigned char *buf, int len) { }
-#endif
-
-static inline int lowpan_fetch_skb_u8(struct sk_buff *skb, u8 *val)
-{
- if (unlikely(!pskb_may_pull(skb, 1)))
- return -EINVAL;
-
- *val = skb->data[0];
- skb_pull(skb, 1);
-
- return 0;
-}
-
-static inline int lowpan_fetch_skb_u16(struct sk_buff *skb, u16 *val)
-{
- if (unlikely(!pskb_may_pull(skb, 2)))
- return -EINVAL;
-
- *val = (skb->data[0] << 8) | skb->data[1];
- skb_pull(skb, 2);
-
- return 0;
-}
-
-static inline bool lowpan_fetch_skb(struct sk_buff *skb,
- void *data, const unsigned int len)
-{
- if (unlikely(!pskb_may_pull(skb, len)))
- return true;
-
- skb_copy_from_linear_data(skb, data, len);
- skb_pull(skb, len);
-
- return false;
-}
-
-static inline void lowpan_push_hc_data(u8 **hc_ptr, const void *data,
- const size_t len)
-{
- memcpy(*hc_ptr, data, len);
- *hc_ptr += len;
-}
-
-typedef int (*skb_delivery_cb)(struct sk_buff *skb, struct net_device *dev);
-
-int lowpan_process_data(struct sk_buff *skb, struct net_device *dev,
- const u8 *saddr, const u8 saddr_type, const u8 saddr_len,
- const u8 *daddr, const u8 daddr_type, const u8 daddr_len,
- u8 iphc0, u8 iphc1, skb_delivery_cb skb_deliver);
-int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
- unsigned short type, const void *_daddr,
- const void *_saddr, unsigned int len);
-
-#endif /* __6LOWPAN_H__ */
diff --git a/net/ieee802154/6lowpan_iphc.c b/net/ieee802154/6lowpan_iphc.c
index 860aa2d445b..211b5686d71 100644
--- a/net/ieee802154/6lowpan_iphc.c
+++ b/net/ieee802154/6lowpan_iphc.c
@@ -54,11 +54,10 @@
#include <linux/if_arp.h>
#include <linux/module.h>
#include <linux/netdevice.h>
+#include <net/6lowpan.h>
#include <net/ipv6.h>
#include <net/af_ieee802154.h>
-#include "6lowpan.h"
-
/*
* Uncompress address function for source and
* destination address(non-multicast).
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan_rtnl.c
index 8edfea5da57..fe6bd7a7108 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan_rtnl.c
@@ -1,10 +1,8 @@
-/*
- * Copyright 2011, Siemens AG
+/* Copyright 2011, Siemens AG
* written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
*/
-/*
- * Based on patches from Jon Smirl <jonsmirl@gmail.com>
+/* Based on patches from Jon Smirl <jonsmirl@gmail.com>
* Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
@@ -15,10 +13,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
/* Jon's code is based on 6lowpan implementation for Contiki which is:
@@ -58,9 +52,10 @@
#include <net/af_ieee802154.h>
#include <net/ieee802154.h>
#include <net/ieee802154_netdev.h>
+#include <net/6lowpan.h>
#include <net/ipv6.h>
-#include "6lowpan.h"
+#include "reassembly.h"
static LIST_HEAD(lowpan_devices);
@@ -68,7 +63,7 @@ static LIST_HEAD(lowpan_devices);
struct lowpan_dev_info {
struct net_device *real_dev; /* real WPAN device ptr */
struct mutex dev_list_mtx; /* mutex for list ops */
- unsigned short fragment_tag;
+ __be16 fragment_tag;
};
struct lowpan_dev_record {
@@ -76,18 +71,6 @@ struct lowpan_dev_record {
struct list_head list;
};
-struct lowpan_fragment {
- struct sk_buff *skb; /* skb to be assembled */
- u16 length; /* length to be assemled */
- u32 bytes_rcv; /* bytes received */
- u16 tag; /* current fragment tag */
- struct timer_list timer; /* assembling timer */
- struct list_head list; /* fragments list */
-};
-
-static LIST_HEAD(lowpan_fragments);
-static DEFINE_SPINLOCK(flist_lock);
-
static inline struct
lowpan_dev_info *lowpan_dev_info(const struct net_device *dev)
{
@@ -109,6 +92,7 @@ static int lowpan_header_create(struct sk_buff *skb,
const u8 *saddr = _saddr;
const u8 *daddr = _daddr;
struct ieee802154_addr sa, da;
+ struct ieee802154_mac_cb *cb = mac_cb_init(skb);
/* TODO:
* if this package isn't ipv6 one, where should it be routed?
@@ -124,44 +108,39 @@ static int lowpan_header_create(struct sk_buff *skb,
lowpan_header_compress(skb, dev, type, daddr, saddr, len);
- /*
- * NOTE1: I'm still unsure about the fact that compression and WPAN
+ /* NOTE1: I'm still unsure about the fact that compression and WPAN
* header are created here and not later in the xmit. So wait for
* an opinion of net maintainers.
*/
- /*
- * NOTE2: to be absolutely correct, we must derive PANid information
+ /* NOTE2: to be absolutely correct, we must derive PANid information
* from MAC subif of the 'dev' and 'real_dev' network devices, but
* this isn't implemented in mainline yet, so currently we assign 0xff
*/
- mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA;
- mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev);
+ cb->type = IEEE802154_FC_TYPE_DATA;
/* prepare wpan address data */
- sa.addr_type = IEEE802154_ADDR_LONG;
+ sa.mode = IEEE802154_ADDR_LONG;
sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
+ sa.extended_addr = ieee802154_devaddr_from_raw(saddr);
- memcpy(&(sa.hwaddr), saddr, 8);
/* intra-PAN communications */
- da.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
+ da.pan_id = sa.pan_id;
- /*
- * if the destination address is the broadcast address, use the
+ /* if the destination address is the broadcast address, use the
* corresponding short address
*/
if (lowpan_is_addr_broadcast(daddr)) {
- da.addr_type = IEEE802154_ADDR_SHORT;
- da.short_addr = IEEE802154_ADDR_BROADCAST;
+ da.mode = IEEE802154_ADDR_SHORT;
+ da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
} else {
- da.addr_type = IEEE802154_ADDR_LONG;
- memcpy(&(da.hwaddr), daddr, IEEE802154_ADDR_LEN);
-
- /* request acknowledgment */
- mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ;
+ da.mode = IEEE802154_ADDR_LONG;
+ da.extended_addr = ieee802154_devaddr_from_raw(daddr);
}
+ cb->ackreq = !lowpan_is_addr_broadcast(daddr);
+
return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
- type, (void *)&da, (void *)&sa, skb->len);
+ type, (void *)&da, (void *)&sa, 0);
}
static int lowpan_give_skb_to_devices(struct sk_buff *skb,
@@ -188,73 +167,11 @@ static int lowpan_give_skb_to_devices(struct sk_buff *skb,
return stat;
}
-static void lowpan_fragment_timer_expired(unsigned long entry_addr)
-{
- struct lowpan_fragment *entry = (struct lowpan_fragment *)entry_addr;
-
- pr_debug("timer expired for frame with tag %d\n", entry->tag);
-
- list_del(&entry->list);
- dev_kfree_skb(entry->skb);
- kfree(entry);
-}
-
-static struct lowpan_fragment *
-lowpan_alloc_new_frame(struct sk_buff *skb, u16 len, u16 tag)
-{
- struct lowpan_fragment *frame;
-
- frame = kzalloc(sizeof(struct lowpan_fragment),
- GFP_ATOMIC);
- if (!frame)
- goto frame_err;
-
- INIT_LIST_HEAD(&frame->list);
-
- frame->length = len;
- frame->tag = tag;
-
- /* allocate buffer for frame assembling */
- frame->skb = netdev_alloc_skb_ip_align(skb->dev, frame->length +
- sizeof(struct ipv6hdr));
-
- if (!frame->skb)
- goto skb_err;
-
- frame->skb->priority = skb->priority;
-
- /* reserve headroom for uncompressed ipv6 header */
- skb_reserve(frame->skb, sizeof(struct ipv6hdr));
- skb_put(frame->skb, frame->length);
-
- /* copy the first control block to keep a
- * trace of the link-layer addresses in case
- * of a link-local compressed address
- */
- memcpy(frame->skb->cb, skb->cb, sizeof(skb->cb));
-
- init_timer(&frame->timer);
- /* time out is the same as for ipv6 - 60 sec */
- frame->timer.expires = jiffies + LOWPAN_FRAG_TIMEOUT;
- frame->timer.data = (unsigned long)frame;
- frame->timer.function = lowpan_fragment_timer_expired;
-
- add_timer(&frame->timer);
-
- list_add_tail(&frame->list, &lowpan_fragments);
-
- return frame;
-
-skb_err:
- kfree(frame);
-frame_err:
- return NULL;
-}
-
-static int process_data(struct sk_buff *skb)
+static int process_data(struct sk_buff *skb, const struct ieee802154_hdr *hdr)
{
u8 iphc0, iphc1;
- const struct ieee802154_addr *_saddr, *_daddr;
+ struct ieee802154_addr_sa sa, da;
+ void *sap, *dap;
raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len);
/* at least two bytes will be used for the encoding */
@@ -264,108 +181,27 @@ static int process_data(struct sk_buff *skb)
if (lowpan_fetch_skb_u8(skb, &iphc0))
goto drop;
- /* fragments assembling */
- switch (iphc0 & LOWPAN_DISPATCH_MASK) {
- case LOWPAN_DISPATCH_FRAG1:
- case LOWPAN_DISPATCH_FRAGN:
- {
- struct lowpan_fragment *frame;
- /* slen stores the rightmost 8 bits of the 11 bits length */
- u8 slen, offset = 0;
- u16 len, tag;
- bool found = false;
-
- if (lowpan_fetch_skb_u8(skb, &slen) || /* frame length */
- lowpan_fetch_skb_u16(skb, &tag)) /* fragment tag */
- goto drop;
-
- /* adds the 3 MSB to the 8 LSB to retrieve the 11 bits length */
- len = ((iphc0 & 7) << 8) | slen;
-
- if ((iphc0 & LOWPAN_DISPATCH_MASK) == LOWPAN_DISPATCH_FRAG1) {
- pr_debug("%s received a FRAG1 packet (tag: %d, "
- "size of the entire IP packet: %d)",
- __func__, tag, len);
- } else { /* FRAGN */
- if (lowpan_fetch_skb_u8(skb, &offset))
- goto unlock_and_drop;
- pr_debug("%s received a FRAGN packet (tag: %d, "
- "size of the entire IP packet: %d, "
- "offset: %d)", __func__, tag, len, offset * 8);
- }
-
- /*
- * check if frame assembling with the same tag is
- * already in progress
- */
- spin_lock_bh(&flist_lock);
-
- list_for_each_entry(frame, &lowpan_fragments, list)
- if (frame->tag == tag) {
- found = true;
- break;
- }
-
- /* alloc new frame structure */
- if (!found) {
- pr_debug("%s first fragment received for tag %d, "
- "begin packet reassembly", __func__, tag);
- frame = lowpan_alloc_new_frame(skb, len, tag);
- if (!frame)
- goto unlock_and_drop;
- }
-
- /* if payload fits buffer, copy it */
- if (likely((offset * 8 + skb->len) <= frame->length))
- skb_copy_to_linear_data_offset(frame->skb, offset * 8,
- skb->data, skb->len);
- else
- goto unlock_and_drop;
-
- frame->bytes_rcv += skb->len;
-
- /* frame assembling complete */
- if ((frame->bytes_rcv == frame->length) &&
- frame->timer.expires > jiffies) {
- /* if timer haven't expired - first of all delete it */
- del_timer_sync(&frame->timer);
- list_del(&frame->list);
- spin_unlock_bh(&flist_lock);
-
- pr_debug("%s successfully reassembled fragment "
- "(tag %d)", __func__, tag);
-
- dev_kfree_skb(skb);
- skb = frame->skb;
- kfree(frame);
-
- if (lowpan_fetch_skb_u8(skb, &iphc0))
- goto drop;
-
- break;
- }
- spin_unlock_bh(&flist_lock);
-
- return kfree_skb(skb), 0;
- }
- default:
- break;
- }
-
if (lowpan_fetch_skb_u8(skb, &iphc1))
goto drop;
- _saddr = &mac_cb(skb)->sa;
- _daddr = &mac_cb(skb)->da;
+ ieee802154_addr_to_sa(&sa, &hdr->source);
+ ieee802154_addr_to_sa(&da, &hdr->dest);
+
+ if (sa.addr_type == IEEE802154_ADDR_SHORT)
+ sap = &sa.short_addr;
+ else
+ sap = &sa.hwaddr;
- return lowpan_process_data(skb, skb->dev, (u8 *)_saddr->hwaddr,
- _saddr->addr_type, IEEE802154_ADDR_LEN,
- (u8 *)_daddr->hwaddr, _daddr->addr_type,
- IEEE802154_ADDR_LEN, iphc0, iphc1,
- lowpan_give_skb_to_devices);
+ if (da.addr_type == IEEE802154_ADDR_SHORT)
+ dap = &da.short_addr;
+ else
+ dap = &da.hwaddr;
+
+ return lowpan_process_data(skb, skb->dev, sap, sa.addr_type,
+ IEEE802154_ADDR_LEN, dap, da.addr_type,
+ IEEE802154_ADDR_LEN, iphc0, iphc1,
+ lowpan_give_skb_to_devices);
-unlock_and_drop:
- spin_unlock_bh(&flist_lock);
drop:
kfree_skb(skb);
return -EINVAL;
@@ -384,122 +220,149 @@ static int lowpan_set_address(struct net_device *dev, void *p)
return 0;
}
-static int
-lowpan_fragment_xmit(struct sk_buff *skb, u8 *head,
- int mlen, int plen, int offset, int type)
+static struct sk_buff*
+lowpan_alloc_frag(struct sk_buff *skb, int size,
+ const struct ieee802154_hdr *master_hdr)
{
+ struct net_device *real_dev = lowpan_dev_info(skb->dev)->real_dev;
struct sk_buff *frag;
- int hlen;
-
- hlen = (type == LOWPAN_DISPATCH_FRAG1) ?
- LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE;
-
- raw_dump_inline(__func__, "6lowpan fragment header", head, hlen);
+ int rc;
+
+ frag = alloc_skb(real_dev->hard_header_len +
+ real_dev->needed_tailroom + size,
+ GFP_ATOMIC);
+
+ if (likely(frag)) {
+ frag->dev = real_dev;
+ frag->priority = skb->priority;
+ skb_reserve(frag, real_dev->hard_header_len);
+ skb_reset_network_header(frag);
+ *mac_cb(frag) = *mac_cb(skb);
+
+ rc = dev_hard_header(frag, real_dev, 0, &master_hdr->dest,
+ &master_hdr->source, size);
+ if (rc < 0) {
+ kfree_skb(frag);
+ return ERR_PTR(-rc);
+ }
+ } else {
+ frag = ERR_PTR(ENOMEM);
+ }
- frag = netdev_alloc_skb(skb->dev,
- hlen + mlen + plen + IEEE802154_MFR_SIZE);
- if (!frag)
- return -ENOMEM;
+ return frag;
+}
- frag->priority = skb->priority;
+static int
+lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr,
+ u8 *frag_hdr, int frag_hdrlen,
+ int offset, int len)
+{
+ struct sk_buff *frag;
- /* copy header, MFR and payload */
- skb_put(frag, mlen);
- skb_copy_to_linear_data(frag, skb_mac_header(skb), mlen);
+ raw_dump_inline(__func__, " fragment header", frag_hdr, frag_hdrlen);
- skb_put(frag, hlen);
- skb_copy_to_linear_data_offset(frag, mlen, head, hlen);
+ frag = lowpan_alloc_frag(skb, frag_hdrlen + len, wpan_hdr);
+ if (IS_ERR(frag))
+ return -PTR_ERR(frag);
- skb_put(frag, plen);
- skb_copy_to_linear_data_offset(frag, mlen + hlen,
- skb_network_header(skb) + offset, plen);
+ memcpy(skb_put(frag, frag_hdrlen), frag_hdr, frag_hdrlen);
+ memcpy(skb_put(frag, len), skb_network_header(skb) + offset, len);
- raw_dump_table(__func__, " raw fragment dump", frag->data, frag->len);
+ raw_dump_table(__func__, " fragment dump", frag->data, frag->len);
return dev_queue_xmit(frag);
}
static int
-lowpan_skb_fragmentation(struct sk_buff *skb, struct net_device *dev)
+lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev,
+ const struct ieee802154_hdr *wpan_hdr)
{
- int err, header_length, payload_length, tag, offset = 0;
- u8 head[5];
-
- header_length = skb->mac_len;
- payload_length = skb->len - header_length;
- tag = lowpan_dev_info(dev)->fragment_tag++;
+ u16 dgram_size, dgram_offset;
+ __be16 frag_tag;
+ u8 frag_hdr[5];
+ int frag_cap, frag_len, payload_cap, rc;
+ int skb_unprocessed, skb_offset;
- /* first fragment header */
- head[0] = LOWPAN_DISPATCH_FRAG1 | ((payload_length >> 8) & 0x7);
- head[1] = payload_length & 0xff;
- head[2] = tag >> 8;
- head[3] = tag & 0xff;
+ dgram_size = lowpan_uncompress_size(skb, &dgram_offset) -
+ skb->mac_len;
+ frag_tag = lowpan_dev_info(dev)->fragment_tag++;
- err = lowpan_fragment_xmit(skb, head, header_length, LOWPAN_FRAG_SIZE,
- 0, LOWPAN_DISPATCH_FRAG1);
+ frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07);
+ frag_hdr[1] = dgram_size & 0xff;
+ memcpy(frag_hdr + 2, &frag_tag, sizeof(frag_tag));
- if (err) {
- pr_debug("%s unable to send FRAG1 packet (tag: %d)",
- __func__, tag);
- goto exit;
- }
+ payload_cap = ieee802154_max_payload(wpan_hdr);
- offset = LOWPAN_FRAG_SIZE;
+ frag_len = round_down(payload_cap - LOWPAN_FRAG1_HEAD_SIZE -
+ skb_network_header_len(skb), 8);
- /* next fragment header */
- head[0] &= ~LOWPAN_DISPATCH_FRAG1;
- head[0] |= LOWPAN_DISPATCH_FRAGN;
+ skb_offset = skb_network_header_len(skb);
+ skb_unprocessed = skb->len - skb->mac_len - skb_offset;
- while (payload_length - offset > 0) {
- int len = LOWPAN_FRAG_SIZE;
-
- head[4] = offset / 8;
-
- if (payload_length - offset < len)
- len = payload_length - offset;
+ rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr,
+ LOWPAN_FRAG1_HEAD_SIZE, 0,
+ frag_len + skb_network_header_len(skb));
+ if (rc) {
+ pr_debug("%s unable to send FRAG1 packet (tag: %d)",
+ __func__, frag_tag);
+ goto err;
+ }
- err = lowpan_fragment_xmit(skb, head, header_length,
- len, offset, LOWPAN_DISPATCH_FRAGN);
- if (err) {
- pr_debug("%s unable to send a subsequent FRAGN packet "
- "(tag: %d, offset: %d", __func__, tag, offset);
- goto exit;
+ frag_hdr[0] &= ~LOWPAN_DISPATCH_FRAG1;
+ frag_hdr[0] |= LOWPAN_DISPATCH_FRAGN;
+ frag_cap = round_down(payload_cap - LOWPAN_FRAGN_HEAD_SIZE, 8);
+
+ do {
+ dgram_offset += frag_len;
+ skb_offset += frag_len;
+ skb_unprocessed -= frag_len;
+ frag_len = min(frag_cap, skb_unprocessed);
+
+ frag_hdr[4] = dgram_offset >> 3;
+
+ rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr,
+ LOWPAN_FRAGN_HEAD_SIZE, skb_offset,
+ frag_len);
+ if (rc) {
+ pr_debug("%s unable to send a FRAGN packet. (tag: %d, offset: %d)\n",
+ __func__, frag_tag, skb_offset);
+ goto err;
}
+ } while (skb_unprocessed > frag_cap);
- offset += len;
- }
+ consume_skb(skb);
+ return NET_XMIT_SUCCESS;
-exit:
- return err;
+err:
+ kfree_skb(skb);
+ return rc;
}
static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
{
- int err = -1;
+ struct ieee802154_hdr wpan_hdr;
+ int max_single;
pr_debug("package xmit\n");
- skb->dev = lowpan_dev_info(dev)->real_dev;
- if (skb->dev == NULL) {
- pr_debug("ERROR: no real wpan device found\n");
- goto error;
+ if (ieee802154_hdr_peek(skb, &wpan_hdr) < 0) {
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
}
- /* Send directly if less than the MTU minus the 2 checksum bytes. */
- if (skb->len <= IEEE802154_MTU - IEEE802154_MFR_SIZE) {
- err = dev_queue_xmit(skb);
- goto out;
- }
+ max_single = ieee802154_max_payload(&wpan_hdr);
- pr_debug("frame is too big, fragmentation is needed\n");
- err = lowpan_skb_fragmentation(skb, dev);
-error:
- dev_kfree_skb(skb);
-out:
- if (err)
- pr_debug("ERROR: xmit failed\n");
+ if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) {
+ skb->dev = lowpan_dev_info(dev)->real_dev;
+ return dev_queue_xmit(skb);
+ } else {
+ netdev_tx_t rc;
+
+ pr_debug("frame is too big, fragmentation is needed\n");
+ rc = lowpan_xmit_fragmented(skb, dev, &wpan_hdr);
- return (err < 0) ? NET_XMIT_DROP : err;
+ return rc < 0 ? NET_XMIT_DROP : rc;
+ }
}
static struct wpan_phy *lowpan_get_phy(const struct net_device *dev)
@@ -508,13 +371,13 @@ static struct wpan_phy *lowpan_get_phy(const struct net_device *dev)
return ieee802154_mlme_ops(real_dev)->get_phy(real_dev);
}
-static u16 lowpan_get_pan_id(const struct net_device *dev)
+static __le16 lowpan_get_pan_id(const struct net_device *dev)
{
struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev);
}
-static u16 lowpan_get_short_addr(const struct net_device *dev)
+static __le16 lowpan_get_short_addr(const struct net_device *dev)
{
struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev);
@@ -593,45 +456,55 @@ static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[])
static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
- struct sk_buff *local_skb;
+ struct ieee802154_hdr hdr;
+ int ret;
- if (!netif_running(dev))
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (!skb)
goto drop;
+ if (!netif_running(dev))
+ goto drop_skb;
+
if (dev->type != ARPHRD_IEEE802154)
- goto drop;
+ goto drop_skb;
+
+ if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
+ goto drop_skb;
/* check that it's our buffer */
if (skb->data[0] == LOWPAN_DISPATCH_IPV6) {
- /* Copy the packet so that the IPv6 header is
- * properly aligned.
- */
- local_skb = skb_copy_expand(skb, NET_SKB_PAD - 1,
- skb_tailroom(skb), GFP_ATOMIC);
- if (!local_skb)
- goto drop;
-
- local_skb->protocol = htons(ETH_P_IPV6);
- local_skb->pkt_type = PACKET_HOST;
+ skb->protocol = htons(ETH_P_IPV6);
+ skb->pkt_type = PACKET_HOST;
/* Pull off the 1-byte of 6lowpan header. */
- skb_pull(local_skb, 1);
+ skb_pull(skb, 1);
- lowpan_give_skb_to_devices(local_skb, NULL);
-
- kfree_skb(local_skb);
- kfree_skb(skb);
+ ret = lowpan_give_skb_to_devices(skb, NULL);
+ if (ret == NET_RX_DROP)
+ goto drop;
} else {
switch (skb->data[0] & 0xe0) {
case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */
+ ret = process_data(skb, &hdr);
+ if (ret == NET_RX_DROP)
+ goto drop;
+ break;
case LOWPAN_DISPATCH_FRAG1: /* first fragment header */
+ ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAG1);
+ if (ret == 1) {
+ ret = process_data(skb, &hdr);
+ if (ret == NET_RX_DROP)
+ goto drop;
+ }
+ break;
case LOWPAN_DISPATCH_FRAGN: /* next fragments headers */
- local_skb = skb_clone(skb, GFP_ATOMIC);
- if (!local_skb)
- goto drop;
- process_data(local_skb);
-
- kfree_skb(skb);
+ ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAGN);
+ if (ret == 1) {
+ ret = process_data(skb, &hdr);
+ if (ret == NET_RX_DROP)
+ goto drop;
+ }
break;
default:
break;
@@ -639,9 +512,9 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
}
return NET_RX_SUCCESS;
-
-drop:
+drop_skb:
kfree_skb(skb);
+drop:
return NET_RX_DROP;
}
@@ -665,10 +538,9 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
}
lowpan_dev_info(dev)->real_dev = real_dev;
- lowpan_dev_info(dev)->fragment_tag = 0;
mutex_init(&lowpan_dev_info(dev)->dev_list_mtx);
- entry = kzalloc(sizeof(struct lowpan_dev_record), GFP_KERNEL);
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry) {
dev_put(real_dev);
lowpan_dev_info(dev)->real_dev = NULL;
@@ -761,7 +633,7 @@ static struct notifier_block lowpan_dev_notifier = {
};
static struct packet_type lowpan_packet_type = {
- .type = __constant_htons(ETH_P_IEEE802154),
+ .type = htons(ETH_P_IEEE802154),
.func = lowpan_rcv,
};
@@ -769,43 +641,40 @@ static int __init lowpan_init_module(void)
{
int err = 0;
- err = lowpan_netlink_init();
+ err = lowpan_net_frag_init();
if (err < 0)
goto out;
+ err = lowpan_netlink_init();
+ if (err < 0)
+ goto out_frag;
+
dev_add_pack(&lowpan_packet_type);
err = register_netdevice_notifier(&lowpan_dev_notifier);
- if (err < 0) {
- dev_remove_pack(&lowpan_packet_type);
- lowpan_netlink_fini();
- }
+ if (err < 0)
+ goto out_pack;
+
+ return 0;
+
+out_pack:
+ dev_remove_pack(&lowpan_packet_type);
+ lowpan_netlink_fini();
+out_frag:
+ lowpan_net_frag_exit();
out:
return err;
}
static void __exit lowpan_cleanup_module(void)
{
- struct lowpan_fragment *frame, *tframe;
-
lowpan_netlink_fini();
dev_remove_pack(&lowpan_packet_type);
- unregister_netdevice_notifier(&lowpan_dev_notifier);
+ lowpan_net_frag_exit();
- /* Now 6lowpan packet_type is removed, so no new fragments are
- * expected on RX, therefore that's the time to clean incomplete
- * fragments.
- */
- spin_lock_bh(&flist_lock);
- list_for_each_entry_safe(frame, tframe, &lowpan_fragments, list) {
- del_timer_sync(&frame->timer);
- list_del(&frame->list);
- dev_kfree_skb(frame->skb);
- kfree(frame);
- }
- spin_unlock_bh(&flist_lock);
+ unregister_netdevice_notifier(&lowpan_dev_notifier);
}
module_init(lowpan_init_module);
diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig
index 9c9879d5ea6..8af1330b313 100644
--- a/net/ieee802154/Kconfig
+++ b/net/ieee802154/Kconfig
@@ -15,7 +15,7 @@ config IEEE802154_6LOWPAN
depends on IEEE802154 && IPV6
select 6LOWPAN_IPHC
---help---
- IPv6 compression over IEEE 802.15.4.
+ IPv6 compression over IEEE 802.15.4.
config 6LOWPAN_IPHC
tristate
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index e8f05885ced..bf1b51497a4 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -2,5 +2,9 @@ obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o
obj-$(CONFIG_IEEE802154_6LOWPAN) += 6lowpan.o
obj-$(CONFIG_6LOWPAN_IPHC) += 6lowpan_iphc.o
-ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o
+6lowpan-y := 6lowpan_rtnl.o reassembly.o
+ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o \
+ header_ops.o
af_802154-y := af_ieee802154.o raw.o dgram.o
+
+ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/ieee802154/af802154.h b/net/ieee802154/af802154.h
index b1ec5253752..8330a09bfc9 100644
--- a/net/ieee802154/af802154.h
+++ b/net/ieee802154/af802154.h
@@ -25,12 +25,13 @@
#define AF802154_H
struct sk_buff;
-struct net_devce;
+struct net_device;
+struct ieee802154_addr;
extern struct proto ieee802154_raw_prot;
extern struct proto ieee802154_dgram_prot;
void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb);
int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb);
struct net_device *ieee802154_get_dev(struct net *net,
- struct ieee802154_addr *addr);
+ const struct ieee802154_addr *addr);
#endif
diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c
index 40e606f3788..351d9a94ec2 100644
--- a/net/ieee802154/af_ieee802154.c
+++ b/net/ieee802154/af_ieee802154.c
@@ -43,25 +43,27 @@
/*
* Utility function for families
*/
-struct net_device *ieee802154_get_dev(struct net *net,
- struct ieee802154_addr *addr)
+struct net_device*
+ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr)
{
struct net_device *dev = NULL;
struct net_device *tmp;
- u16 pan_id, short_addr;
+ __le16 pan_id, short_addr;
+ u8 hwaddr[IEEE802154_ADDR_LEN];
- switch (addr->addr_type) {
+ switch (addr->mode) {
case IEEE802154_ADDR_LONG:
+ ieee802154_devaddr_to_raw(hwaddr, addr->extended_addr);
rcu_read_lock();
- dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, addr->hwaddr);
+ dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, hwaddr);
if (dev)
dev_hold(dev);
rcu_read_unlock();
break;
case IEEE802154_ADDR_SHORT:
- if (addr->pan_id == 0xffff ||
- addr->short_addr == IEEE802154_ADDR_UNDEF ||
- addr->short_addr == 0xffff)
+ if (addr->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST) ||
+ addr->short_addr == cpu_to_le16(IEEE802154_ADDR_UNDEF) ||
+ addr->short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST))
break;
rtnl_lock();
@@ -86,7 +88,7 @@ struct net_device *ieee802154_get_dev(struct net *net,
break;
default:
pr_warning("Unsupported ieee802154 address type: %d\n",
- addr->addr_type);
+ addr->mode);
break;
}
@@ -326,7 +328,7 @@ drop:
static struct packet_type ieee802154_packet_type = {
- .type = __constant_htons(ETH_P_IEEE802154),
+ .type = htons(ETH_P_IEEE802154),
.func = ieee802154_rcv,
};
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index 1846c1fe0d0..4f0ed878019 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -21,6 +21,7 @@
* Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
*/
+#include <linux/capability.h>
#include <linux/net.h>
#include <linux/module.h>
#include <linux/if_arp.h>
@@ -45,7 +46,12 @@ struct dgram_sock {
struct ieee802154_addr dst_addr;
unsigned int bound:1;
+ unsigned int connected:1;
unsigned int want_ack:1;
+ unsigned int secen:1;
+ unsigned int secen_override:1;
+ unsigned int seclevel:3;
+ unsigned int seclevel_override:1;
};
static inline struct dgram_sock *dgram_sk(const struct sock *sk)
@@ -73,10 +79,7 @@ static int dgram_init(struct sock *sk)
{
struct dgram_sock *ro = dgram_sk(sk);
- ro->dst_addr.addr_type = IEEE802154_ADDR_LONG;
- ro->dst_addr.pan_id = 0xffff;
ro->want_ack = 1;
- memset(&ro->dst_addr.hwaddr, 0xff, sizeof(ro->dst_addr.hwaddr));
return 0;
}
@@ -88,6 +91,7 @@ static void dgram_close(struct sock *sk, long timeout)
static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len)
{
struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr;
+ struct ieee802154_addr haddr;
struct dgram_sock *ro = dgram_sk(sk);
int err = -EINVAL;
struct net_device *dev;
@@ -102,7 +106,8 @@ static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len)
if (addr->family != AF_IEEE802154)
goto out;
- dev = ieee802154_get_dev(sock_net(sk), &addr->addr);
+ ieee802154_addr_from_sa(&haddr, &addr->addr);
+ dev = ieee802154_get_dev(sock_net(sk), &haddr);
if (!dev) {
err = -ENODEV;
goto out;
@@ -113,7 +118,7 @@ static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len)
goto out_put;
}
- memcpy(&ro->src_addr, &addr->addr, sizeof(struct ieee802154_addr));
+ ro->src_addr = haddr;
ro->bound = 1;
err = 0;
@@ -149,8 +154,7 @@ static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg)
* of this packet since that is all
* that will be read.
*/
- /* FIXME: parse the header for more correct value */
- amount = skb->len - (3+8+8);
+ amount = skb->len - ieee802154_hdr_length(skb);
}
spin_unlock_bh(&sk->sk_receive_queue.lock);
return put_user(amount, (int __user *)arg);
@@ -181,7 +185,8 @@ static int dgram_connect(struct sock *sk, struct sockaddr *uaddr,
goto out;
}
- memcpy(&ro->dst_addr, &addr->addr, sizeof(struct ieee802154_addr));
+ ieee802154_addr_from_sa(&ro->dst_addr, &addr->addr);
+ ro->connected = 1;
out:
release_sock(sk);
@@ -193,10 +198,7 @@ static int dgram_disconnect(struct sock *sk, int flags)
struct dgram_sock *ro = dgram_sk(sk);
lock_sock(sk);
-
- ro->dst_addr.addr_type = IEEE802154_ADDR_LONG;
- memset(&ro->dst_addr.hwaddr, 0xff, sizeof(ro->dst_addr.hwaddr));
-
+ ro->connected = 0;
release_sock(sk);
return 0;
@@ -208,7 +210,9 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
struct net_device *dev;
unsigned int mtu;
struct sk_buff *skb;
+ struct ieee802154_mac_cb *cb;
struct dgram_sock *ro = dgram_sk(sk);
+ struct ieee802154_addr dst_addr;
int hlen, tlen;
int err;
@@ -217,6 +221,11 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
return -EOPNOTSUPP;
}
+ if (!ro->connected && !msg->msg_name)
+ return -EDESTADDRREQ;
+ else if (ro->connected && msg->msg_name)
+ return -EISCONN;
+
if (!ro->bound)
dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154);
else
@@ -232,7 +241,7 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
if (size > mtu) {
pr_debug("size = %Zu, mtu = %u\n", size, mtu);
- err = -EINVAL;
+ err = -EMSGSIZE;
goto out_dev;
}
@@ -248,18 +257,28 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
skb_reset_network_header(skb);
- mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA;
- if (ro->want_ack)
- mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ;
+ cb = mac_cb_init(skb);
+ cb->type = IEEE802154_FC_TYPE_DATA;
+ cb->ackreq = ro->want_ack;
+
+ if (msg->msg_name) {
+ DECLARE_SOCKADDR(struct sockaddr_ieee802154*, daddr, msg->msg_name);
+
+ ieee802154_addr_from_sa(&dst_addr, &daddr->addr);
+ } else {
+ dst_addr = ro->dst_addr;
+ }
+
+ cb->secen = ro->secen;
+ cb->secen_override = ro->secen_override;
+ cb->seclevel = ro->seclevel;
+ cb->seclevel_override = ro->seclevel_override;
- mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev);
- err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &ro->dst_addr,
- ro->bound ? &ro->src_addr : NULL, size);
+ err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &dst_addr,
+ ro->bound ? &ro->src_addr : NULL, size);
if (err < 0)
goto out_skb;
- skb_reset_mac_header(skb);
-
err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
if (err < 0)
goto out_skb;
@@ -312,7 +331,7 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
if (saddr) {
saddr->family = AF_IEEE802154;
- saddr->addr = mac_cb(skb)->sa;
+ ieee802154_addr_to_sa(&saddr->addr, &mac_cb(skb)->source);
*addr_len = sizeof(*saddr);
}
@@ -328,6 +347,10 @@ out:
static int dgram_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (!skb)
+ return NET_RX_DROP;
+
if (sock_queue_rcv_skb(sk, skb) < 0) {
kfree_skb(skb);
return NET_RX_DROP;
@@ -336,40 +359,43 @@ static int dgram_rcv_skb(struct sock *sk, struct sk_buff *skb)
return NET_RX_SUCCESS;
}
-static inline int ieee802154_match_sock(u8 *hw_addr, u16 pan_id,
- u16 short_addr, struct dgram_sock *ro)
+static inline bool
+ieee802154_match_sock(__le64 hw_addr, __le16 pan_id, __le16 short_addr,
+ struct dgram_sock *ro)
{
if (!ro->bound)
- return 1;
+ return true;
- if (ro->src_addr.addr_type == IEEE802154_ADDR_LONG &&
- !memcmp(ro->src_addr.hwaddr, hw_addr, IEEE802154_ADDR_LEN))
- return 1;
+ if (ro->src_addr.mode == IEEE802154_ADDR_LONG &&
+ hw_addr == ro->src_addr.extended_addr)
+ return true;
- if (ro->src_addr.addr_type == IEEE802154_ADDR_SHORT &&
- pan_id == ro->src_addr.pan_id &&
- short_addr == ro->src_addr.short_addr)
- return 1;
+ if (ro->src_addr.mode == IEEE802154_ADDR_SHORT &&
+ pan_id == ro->src_addr.pan_id &&
+ short_addr == ro->src_addr.short_addr)
+ return true;
- return 0;
+ return false;
}
int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb)
{
struct sock *sk, *prev = NULL;
int ret = NET_RX_SUCCESS;
- u16 pan_id, short_addr;
+ __le16 pan_id, short_addr;
+ __le64 hw_addr;
/* Data frame processing */
BUG_ON(dev->type != ARPHRD_IEEE802154);
pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev);
+ hw_addr = ieee802154_devaddr_from_raw(dev->dev_addr);
read_lock(&dgram_lock);
sk_for_each(sk, &dgram_head) {
- if (ieee802154_match_sock(dev->dev_addr, pan_id, short_addr,
- dgram_sk(sk))) {
+ if (ieee802154_match_sock(hw_addr, pan_id, short_addr,
+ dgram_sk(sk))) {
if (prev) {
struct sk_buff *clone;
clone = skb_clone(skb, GFP_ATOMIC);
@@ -411,6 +437,20 @@ static int dgram_getsockopt(struct sock *sk, int level, int optname,
case WPAN_WANTACK:
val = ro->want_ack;
break;
+ case WPAN_SECURITY:
+ if (!ro->secen_override)
+ val = WPAN_SECURITY_DEFAULT;
+ else if (ro->secen)
+ val = WPAN_SECURITY_ON;
+ else
+ val = WPAN_SECURITY_OFF;
+ break;
+ case WPAN_SECURITY_LEVEL:
+ if (!ro->seclevel_override)
+ val = WPAN_SECURITY_LEVEL_DEFAULT;
+ else
+ val = ro->seclevel;
+ break;
default:
return -ENOPROTOOPT;
}
@@ -426,6 +466,7 @@ static int dgram_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen)
{
struct dgram_sock *ro = dgram_sk(sk);
+ struct net *net = sock_net(sk);
int val;
int err = 0;
@@ -441,6 +482,47 @@ static int dgram_setsockopt(struct sock *sk, int level, int optname,
case WPAN_WANTACK:
ro->want_ack = !!val;
break;
+ case WPAN_SECURITY:
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN) &&
+ !ns_capable(net->user_ns, CAP_NET_RAW)) {
+ err = -EPERM;
+ break;
+ }
+
+ switch (val) {
+ case WPAN_SECURITY_DEFAULT:
+ ro->secen_override = 0;
+ break;
+ case WPAN_SECURITY_ON:
+ ro->secen_override = 1;
+ ro->secen = 1;
+ break;
+ case WPAN_SECURITY_OFF:
+ ro->secen_override = 1;
+ ro->secen = 0;
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+ break;
+ case WPAN_SECURITY_LEVEL:
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN) &&
+ !ns_capable(net->user_ns, CAP_NET_RAW)) {
+ err = -EPERM;
+ break;
+ }
+
+ if (val < WPAN_SECURITY_LEVEL_DEFAULT ||
+ val > IEEE802154_SCF_SECLEVEL_ENC_MIC128) {
+ err = -EINVAL;
+ } else if (val == WPAN_SECURITY_LEVEL_DEFAULT) {
+ ro->seclevel_override = 0;
+ } else {
+ ro->seclevel_override = 1;
+ ro->seclevel = val;
+ }
+ break;
default:
err = -ENOPROTOOPT;
break;
diff --git a/net/ieee802154/header_ops.c b/net/ieee802154/header_ops.c
new file mode 100644
index 00000000000..c09294e39ca
--- /dev/null
+++ b/net/ieee802154/header_ops.c
@@ -0,0 +1,325 @@
+/*
+ * Copyright (C) 2014 Fraunhofer ITWM
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Written by:
+ * Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
+ */
+
+#include <net/mac802154.h>
+#include <net/ieee802154.h>
+#include <net/ieee802154_netdev.h>
+
+static int
+ieee802154_hdr_push_addr(u8 *buf, const struct ieee802154_addr *addr,
+ bool omit_pan)
+{
+ int pos = 0;
+
+ if (addr->mode == IEEE802154_ADDR_NONE)
+ return 0;
+
+ if (!omit_pan) {
+ memcpy(buf + pos, &addr->pan_id, 2);
+ pos += 2;
+ }
+
+ switch (addr->mode) {
+ case IEEE802154_ADDR_SHORT:
+ memcpy(buf + pos, &addr->short_addr, 2);
+ pos += 2;
+ break;
+
+ case IEEE802154_ADDR_LONG:
+ memcpy(buf + pos, &addr->extended_addr, IEEE802154_ADDR_LEN);
+ pos += IEEE802154_ADDR_LEN;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return pos;
+}
+
+static int
+ieee802154_hdr_push_sechdr(u8 *buf, const struct ieee802154_sechdr *hdr)
+{
+ int pos = 5;
+
+ memcpy(buf, hdr, 1);
+ memcpy(buf + 1, &hdr->frame_counter, 4);
+
+ switch (hdr->key_id_mode) {
+ case IEEE802154_SCF_KEY_IMPLICIT:
+ return pos;
+
+ case IEEE802154_SCF_KEY_INDEX:
+ break;
+
+ case IEEE802154_SCF_KEY_SHORT_INDEX:
+ memcpy(buf + pos, &hdr->short_src, 4);
+ pos += 4;
+ break;
+
+ case IEEE802154_SCF_KEY_HW_INDEX:
+ memcpy(buf + pos, &hdr->extended_src, IEEE802154_ADDR_LEN);
+ pos += IEEE802154_ADDR_LEN;
+ break;
+ }
+
+ buf[pos++] = hdr->key_id;
+
+ return pos;
+}
+
+int
+ieee802154_hdr_push(struct sk_buff *skb, const struct ieee802154_hdr *hdr)
+{
+ u8 buf[MAC802154_FRAME_HARD_HEADER_LEN];
+ int pos = 2;
+ int rc;
+ struct ieee802154_hdr_fc fc = hdr->fc;
+
+ buf[pos++] = hdr->seq;
+
+ fc.dest_addr_mode = hdr->dest.mode;
+
+ rc = ieee802154_hdr_push_addr(buf + pos, &hdr->dest, false);
+ if (rc < 0)
+ return -EINVAL;
+ pos += rc;
+
+ fc.source_addr_mode = hdr->source.mode;
+
+ if (hdr->source.pan_id == hdr->dest.pan_id &&
+ hdr->dest.mode != IEEE802154_ADDR_NONE)
+ fc.intra_pan = true;
+
+ rc = ieee802154_hdr_push_addr(buf + pos, &hdr->source, fc.intra_pan);
+ if (rc < 0)
+ return -EINVAL;
+ pos += rc;
+
+ if (fc.security_enabled) {
+ fc.version = 1;
+
+ rc = ieee802154_hdr_push_sechdr(buf + pos, &hdr->sec);
+ if (rc < 0)
+ return -EINVAL;
+
+ pos += rc;
+ }
+
+ memcpy(buf, &fc, 2);
+
+ memcpy(skb_push(skb, pos), buf, pos);
+
+ return pos;
+}
+EXPORT_SYMBOL_GPL(ieee802154_hdr_push);
+
+static int
+ieee802154_hdr_get_addr(const u8 *buf, int mode, bool omit_pan,
+ struct ieee802154_addr *addr)
+{
+ int pos = 0;
+
+ addr->mode = mode;
+
+ if (mode == IEEE802154_ADDR_NONE)
+ return 0;
+
+ if (!omit_pan) {
+ memcpy(&addr->pan_id, buf + pos, 2);
+ pos += 2;
+ }
+
+ if (mode == IEEE802154_ADDR_SHORT) {
+ memcpy(&addr->short_addr, buf + pos, 2);
+ return pos + 2;
+ } else {
+ memcpy(&addr->extended_addr, buf + pos, IEEE802154_ADDR_LEN);
+ return pos + IEEE802154_ADDR_LEN;
+ }
+}
+
+static int ieee802154_hdr_addr_len(int mode, bool omit_pan)
+{
+ int pan_len = omit_pan ? 0 : 2;
+
+ switch (mode) {
+ case IEEE802154_ADDR_NONE: return 0;
+ case IEEE802154_ADDR_SHORT: return 2 + pan_len;
+ case IEEE802154_ADDR_LONG: return IEEE802154_ADDR_LEN + pan_len;
+ default: return -EINVAL;
+ }
+}
+
+static int
+ieee802154_hdr_get_sechdr(const u8 *buf, struct ieee802154_sechdr *hdr)
+{
+ int pos = 5;
+
+ memcpy(hdr, buf, 1);
+ memcpy(&hdr->frame_counter, buf + 1, 4);
+
+ switch (hdr->key_id_mode) {
+ case IEEE802154_SCF_KEY_IMPLICIT:
+ return pos;
+
+ case IEEE802154_SCF_KEY_INDEX:
+ break;
+
+ case IEEE802154_SCF_KEY_SHORT_INDEX:
+ memcpy(&hdr->short_src, buf + pos, 4);
+ pos += 4;
+ break;
+
+ case IEEE802154_SCF_KEY_HW_INDEX:
+ memcpy(&hdr->extended_src, buf + pos, IEEE802154_ADDR_LEN);
+ pos += IEEE802154_ADDR_LEN;
+ break;
+ }
+
+ hdr->key_id = buf[pos++];
+
+ return pos;
+}
+
+static int ieee802154_sechdr_lengths[4] = {
+ [IEEE802154_SCF_KEY_IMPLICIT] = 5,
+ [IEEE802154_SCF_KEY_INDEX] = 6,
+ [IEEE802154_SCF_KEY_SHORT_INDEX] = 10,
+ [IEEE802154_SCF_KEY_HW_INDEX] = 14,
+};
+
+static int ieee802154_hdr_sechdr_len(u8 sc)
+{
+ return ieee802154_sechdr_lengths[IEEE802154_SCF_KEY_ID_MODE(sc)];
+}
+
+static int ieee802154_hdr_minlen(const struct ieee802154_hdr *hdr)
+{
+ int dlen, slen;
+
+ dlen = ieee802154_hdr_addr_len(hdr->fc.dest_addr_mode, false);
+ slen = ieee802154_hdr_addr_len(hdr->fc.source_addr_mode,
+ hdr->fc.intra_pan);
+
+ if (slen < 0 || dlen < 0)
+ return -EINVAL;
+
+ return 3 + dlen + slen + hdr->fc.security_enabled;
+}
+
+static int
+ieee802154_hdr_get_addrs(const u8 *buf, struct ieee802154_hdr *hdr)
+{
+ int pos = 0;
+
+ pos += ieee802154_hdr_get_addr(buf + pos, hdr->fc.dest_addr_mode,
+ false, &hdr->dest);
+ pos += ieee802154_hdr_get_addr(buf + pos, hdr->fc.source_addr_mode,
+ hdr->fc.intra_pan, &hdr->source);
+
+ if (hdr->fc.intra_pan)
+ hdr->source.pan_id = hdr->dest.pan_id;
+
+ return pos;
+}
+
+int
+ieee802154_hdr_pull(struct sk_buff *skb, struct ieee802154_hdr *hdr)
+{
+ int pos = 3, rc;
+
+ if (!pskb_may_pull(skb, 3))
+ return -EINVAL;
+
+ memcpy(hdr, skb->data, 3);
+
+ rc = ieee802154_hdr_minlen(hdr);
+ if (rc < 0 || !pskb_may_pull(skb, rc))
+ return -EINVAL;
+
+ pos += ieee802154_hdr_get_addrs(skb->data + pos, hdr);
+
+ if (hdr->fc.security_enabled) {
+ int want = pos + ieee802154_hdr_sechdr_len(skb->data[pos]);
+
+ if (!pskb_may_pull(skb, want))
+ return -EINVAL;
+
+ pos += ieee802154_hdr_get_sechdr(skb->data + pos, &hdr->sec);
+ }
+
+ skb_pull(skb, pos);
+ return pos;
+}
+EXPORT_SYMBOL_GPL(ieee802154_hdr_pull);
+
+int
+ieee802154_hdr_peek_addrs(const struct sk_buff *skb, struct ieee802154_hdr *hdr)
+{
+ const u8 *buf = skb_mac_header(skb);
+ int pos = 3, rc;
+
+ if (buf + 3 > skb_tail_pointer(skb))
+ return -EINVAL;
+
+ memcpy(hdr, buf, 3);
+
+ rc = ieee802154_hdr_minlen(hdr);
+ if (rc < 0 || buf + rc > skb_tail_pointer(skb))
+ return -EINVAL;
+
+ pos += ieee802154_hdr_get_addrs(buf + pos, hdr);
+ return pos;
+}
+EXPORT_SYMBOL_GPL(ieee802154_hdr_peek_addrs);
+
+int
+ieee802154_hdr_peek(const struct sk_buff *skb, struct ieee802154_hdr *hdr)
+{
+ const u8 *buf = skb_mac_header(skb);
+ int pos;
+
+ pos = ieee802154_hdr_peek_addrs(skb, hdr);
+ if (pos < 0)
+ return -EINVAL;
+
+ if (hdr->fc.security_enabled) {
+ u8 key_id_mode = IEEE802154_SCF_KEY_ID_MODE(*(buf + pos));
+ int want = pos + ieee802154_sechdr_lengths[key_id_mode];
+
+ if (buf + want > skb_tail_pointer(skb))
+ return -EINVAL;
+
+ pos += ieee802154_hdr_get_sechdr(buf + pos, &hdr->sec);
+ }
+
+ return pos;
+}
+EXPORT_SYMBOL_GPL(ieee802154_hdr_peek);
+
+int ieee802154_max_payload(const struct ieee802154_hdr *hdr)
+{
+ int hlen = ieee802154_hdr_minlen(hdr);
+
+ if (hdr->fc.security_enabled) {
+ hlen += ieee802154_sechdr_lengths[hdr->sec.key_id_mode] - 1;
+ hlen += ieee802154_sechdr_authtag_len(&hdr->sec);
+ }
+
+ return IEEE802154_MTU - hlen - IEEE802154_MFR_SIZE;
+}
+EXPORT_SYMBOL_GPL(ieee802154_max_payload);
diff --git a/net/ieee802154/ieee802154.h b/net/ieee802154/ieee802154.h
index cee4425b995..8b83a231299 100644
--- a/net/ieee802154/ieee802154.h
+++ b/net/ieee802154/ieee802154.h
@@ -66,5 +66,25 @@ int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info);
int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info);
int ieee802154_list_iface(struct sk_buff *skb, struct genl_info *info);
int ieee802154_dump_iface(struct sk_buff *skb, struct netlink_callback *cb);
+int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info);
+
+int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_setparams(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_add_key(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_del_key(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_dump_keys(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int ieee802154_llsec_add_dev(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_del_dev(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_dump_devs(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int ieee802154_llsec_add_devkey(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_del_devkey(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_dump_devkeys(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int ieee802154_llsec_add_seclevel(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_del_seclevel(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_dump_seclevels(struct sk_buff *skb,
+ struct netlink_callback *cb);
#endif
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 43f1b2bf469..26efcf4fd2f 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -123,6 +123,27 @@ static const struct genl_ops ieee8021154_ops[] = {
IEEE802154_OP(IEEE802154_START_REQ, ieee802154_start_req),
IEEE802154_DUMP(IEEE802154_LIST_IFACE, ieee802154_list_iface,
ieee802154_dump_iface),
+ IEEE802154_OP(IEEE802154_SET_MACPARAMS, ieee802154_set_macparams),
+ IEEE802154_OP(IEEE802154_LLSEC_GETPARAMS, ieee802154_llsec_getparams),
+ IEEE802154_OP(IEEE802154_LLSEC_SETPARAMS, ieee802154_llsec_setparams),
+ IEEE802154_DUMP(IEEE802154_LLSEC_LIST_KEY, NULL,
+ ieee802154_llsec_dump_keys),
+ IEEE802154_OP(IEEE802154_LLSEC_ADD_KEY, ieee802154_llsec_add_key),
+ IEEE802154_OP(IEEE802154_LLSEC_DEL_KEY, ieee802154_llsec_del_key),
+ IEEE802154_DUMP(IEEE802154_LLSEC_LIST_DEV, NULL,
+ ieee802154_llsec_dump_devs),
+ IEEE802154_OP(IEEE802154_LLSEC_ADD_DEV, ieee802154_llsec_add_dev),
+ IEEE802154_OP(IEEE802154_LLSEC_DEL_DEV, ieee802154_llsec_del_dev),
+ IEEE802154_DUMP(IEEE802154_LLSEC_LIST_DEVKEY, NULL,
+ ieee802154_llsec_dump_devkeys),
+ IEEE802154_OP(IEEE802154_LLSEC_ADD_DEVKEY, ieee802154_llsec_add_devkey),
+ IEEE802154_OP(IEEE802154_LLSEC_DEL_DEVKEY, ieee802154_llsec_del_devkey),
+ IEEE802154_DUMP(IEEE802154_LLSEC_LIST_SECLEVEL, NULL,
+ ieee802154_llsec_dump_seclevels),
+ IEEE802154_OP(IEEE802154_LLSEC_ADD_SECLEVEL,
+ ieee802154_llsec_add_seclevel),
+ IEEE802154_OP(IEEE802154_LLSEC_DEL_SECLEVEL,
+ ieee802154_llsec_del_seclevel),
};
static const struct genl_multicast_group ieee802154_mcgrps[] = {
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index ba5c1e002f3..a3281b8bfd5 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -39,6 +39,26 @@
#include "ieee802154.h"
+static int nla_put_hwaddr(struct sk_buff *msg, int type, __le64 hwaddr)
+{
+ return nla_put_u64(msg, type, swab64((__force u64)hwaddr));
+}
+
+static __le64 nla_get_hwaddr(const struct nlattr *nla)
+{
+ return ieee802154_devaddr_from_raw(nla_data(nla));
+}
+
+static int nla_put_shortaddr(struct sk_buff *msg, int type, __le16 addr)
+{
+ return nla_put_u16(msg, type, le16_to_cpu(addr));
+}
+
+static __le16 nla_get_shortaddr(const struct nlattr *nla)
+{
+ return cpu_to_le16(nla_get_u16(nla));
+}
+
int ieee802154_nl_assoc_indic(struct net_device *dev,
struct ieee802154_addr *addr, u8 cap)
{
@@ -46,7 +66,7 @@ int ieee802154_nl_assoc_indic(struct net_device *dev,
pr_debug("%s\n", __func__);
- if (addr->addr_type != IEEE802154_ADDR_LONG) {
+ if (addr->mode != IEEE802154_ADDR_LONG) {
pr_err("%s: received non-long source address!\n", __func__);
return -EINVAL;
}
@@ -59,8 +79,8 @@ int ieee802154_nl_assoc_indic(struct net_device *dev,
nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
nla_put(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN,
dev->dev_addr) ||
- nla_put(msg, IEEE802154_ATTR_SRC_HW_ADDR, IEEE802154_ADDR_LEN,
- addr->hwaddr) ||
+ nla_put_hwaddr(msg, IEEE802154_ATTR_SRC_HW_ADDR,
+ addr->extended_addr) ||
nla_put_u8(msg, IEEE802154_ATTR_CAPABILITY, cap))
goto nla_put_failure;
@@ -72,7 +92,7 @@ nla_put_failure:
}
EXPORT_SYMBOL(ieee802154_nl_assoc_indic);
-int ieee802154_nl_assoc_confirm(struct net_device *dev, u16 short_addr,
+int ieee802154_nl_assoc_confirm(struct net_device *dev, __le16 short_addr,
u8 status)
{
struct sk_buff *msg;
@@ -87,7 +107,7 @@ int ieee802154_nl_assoc_confirm(struct net_device *dev, u16 short_addr,
nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
nla_put(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN,
dev->dev_addr) ||
- nla_put_u16(msg, IEEE802154_ATTR_SHORT_ADDR, short_addr) ||
+ nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR, short_addr) ||
nla_put_u8(msg, IEEE802154_ATTR_STATUS, status))
goto nla_put_failure;
return ieee802154_nl_mcast(msg, IEEE802154_COORD_MCGRP);
@@ -114,13 +134,13 @@ int ieee802154_nl_disassoc_indic(struct net_device *dev,
nla_put(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN,
dev->dev_addr))
goto nla_put_failure;
- if (addr->addr_type == IEEE802154_ADDR_LONG) {
- if (nla_put(msg, IEEE802154_ATTR_SRC_HW_ADDR, IEEE802154_ADDR_LEN,
- addr->hwaddr))
+ if (addr->mode == IEEE802154_ADDR_LONG) {
+ if (nla_put_hwaddr(msg, IEEE802154_ATTR_SRC_HW_ADDR,
+ addr->extended_addr))
goto nla_put_failure;
} else {
- if (nla_put_u16(msg, IEEE802154_ATTR_SRC_SHORT_ADDR,
- addr->short_addr))
+ if (nla_put_shortaddr(msg, IEEE802154_ATTR_SRC_SHORT_ADDR,
+ addr->short_addr))
goto nla_put_failure;
}
if (nla_put_u8(msg, IEEE802154_ATTR_REASON, reason))
@@ -157,8 +177,8 @@ nla_put_failure:
}
EXPORT_SYMBOL(ieee802154_nl_disassoc_confirm);
-int ieee802154_nl_beacon_indic(struct net_device *dev,
- u16 panid, u16 coord_addr)
+int ieee802154_nl_beacon_indic(struct net_device *dev, __le16 panid,
+ __le16 coord_addr)
{
struct sk_buff *msg;
@@ -172,8 +192,9 @@ int ieee802154_nl_beacon_indic(struct net_device *dev,
nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
nla_put(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN,
dev->dev_addr) ||
- nla_put_u16(msg, IEEE802154_ATTR_COORD_SHORT_ADDR, coord_addr) ||
- nla_put_u16(msg, IEEE802154_ATTR_COORD_PAN_ID, panid))
+ nla_put_shortaddr(msg, IEEE802154_ATTR_COORD_SHORT_ADDR,
+ coord_addr) ||
+ nla_put_shortaddr(msg, IEEE802154_ATTR_COORD_PAN_ID, panid))
goto nla_put_failure;
return ieee802154_nl_mcast(msg, IEEE802154_COORD_MCGRP);
@@ -243,6 +264,8 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid,
{
void *hdr;
struct wpan_phy *phy;
+ struct ieee802154_mlme_ops *ops;
+ __le16 short_addr, pan_id;
pr_debug("%s\n", __func__);
@@ -251,19 +274,45 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid,
if (!hdr)
goto out;
- phy = ieee802154_mlme_ops(dev)->get_phy(dev);
+ ops = ieee802154_mlme_ops(dev);
+ phy = ops->get_phy(dev);
BUG_ON(!phy);
+ short_addr = ops->get_short_addr(dev);
+ pan_id = ops->get_pan_id(dev);
+
if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) ||
nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
nla_put(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN,
dev->dev_addr) ||
- nla_put_u16(msg, IEEE802154_ATTR_SHORT_ADDR,
- ieee802154_mlme_ops(dev)->get_short_addr(dev)) ||
- nla_put_u16(msg, IEEE802154_ATTR_PAN_ID,
- ieee802154_mlme_ops(dev)->get_pan_id(dev)))
+ nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR, short_addr) ||
+ nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, pan_id))
goto nla_put_failure;
+
+ if (ops->get_mac_params) {
+ struct ieee802154_mac_params params;
+
+ ops->get_mac_params(dev, &params);
+
+ if (nla_put_s8(msg, IEEE802154_ATTR_TXPOWER,
+ params.transmit_power) ||
+ nla_put_u8(msg, IEEE802154_ATTR_LBT_ENABLED, params.lbt) ||
+ nla_put_u8(msg, IEEE802154_ATTR_CCA_MODE,
+ params.cca_mode) ||
+ nla_put_s32(msg, IEEE802154_ATTR_CCA_ED_LEVEL,
+ params.cca_ed_level) ||
+ nla_put_u8(msg, IEEE802154_ATTR_CSMA_RETRIES,
+ params.csma_retries) ||
+ nla_put_u8(msg, IEEE802154_ATTR_CSMA_MIN_BE,
+ params.min_be) ||
+ nla_put_u8(msg, IEEE802154_ATTR_CSMA_MAX_BE,
+ params.max_be) ||
+ nla_put_s8(msg, IEEE802154_ATTR_FRAME_RETRIES,
+ params.frame_retries))
+ goto nla_put_failure;
+ }
+
wpan_phy_put(phy);
return genlmsg_end(msg, hdr);
@@ -322,16 +371,16 @@ int ieee802154_associate_req(struct sk_buff *skb, struct genl_info *info)
goto out;
if (info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]) {
- addr.addr_type = IEEE802154_ADDR_LONG;
- nla_memcpy(addr.hwaddr,
- info->attrs[IEEE802154_ATTR_COORD_HW_ADDR],
- IEEE802154_ADDR_LEN);
+ addr.mode = IEEE802154_ADDR_LONG;
+ addr.extended_addr = nla_get_hwaddr(
+ info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]);
} else {
- addr.addr_type = IEEE802154_ADDR_SHORT;
- addr.short_addr = nla_get_u16(
+ addr.mode = IEEE802154_ADDR_SHORT;
+ addr.short_addr = nla_get_shortaddr(
info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR]);
}
- addr.pan_id = nla_get_u16(info->attrs[IEEE802154_ATTR_COORD_PAN_ID]);
+ addr.pan_id = nla_get_shortaddr(
+ info->attrs[IEEE802154_ATTR_COORD_PAN_ID]);
if (info->attrs[IEEE802154_ATTR_PAGE])
page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]);
@@ -365,14 +414,13 @@ int ieee802154_associate_resp(struct sk_buff *skb, struct genl_info *info)
if (!ieee802154_mlme_ops(dev)->assoc_resp)
goto out;
- addr.addr_type = IEEE802154_ADDR_LONG;
- nla_memcpy(addr.hwaddr, info->attrs[IEEE802154_ATTR_DEST_HW_ADDR],
- IEEE802154_ADDR_LEN);
+ addr.mode = IEEE802154_ADDR_LONG;
+ addr.extended_addr = nla_get_hwaddr(
+ info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]);
addr.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
-
ret = ieee802154_mlme_ops(dev)->assoc_resp(dev, &addr,
- nla_get_u16(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]),
+ nla_get_shortaddr(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]),
nla_get_u8(info->attrs[IEEE802154_ATTR_STATUS]));
out:
@@ -398,13 +446,12 @@ int ieee802154_disassociate_req(struct sk_buff *skb, struct genl_info *info)
goto out;
if (info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]) {
- addr.addr_type = IEEE802154_ADDR_LONG;
- nla_memcpy(addr.hwaddr,
- info->attrs[IEEE802154_ATTR_DEST_HW_ADDR],
- IEEE802154_ADDR_LEN);
+ addr.mode = IEEE802154_ADDR_LONG;
+ addr.extended_addr = nla_get_hwaddr(
+ info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]);
} else {
- addr.addr_type = IEEE802154_ADDR_SHORT;
- addr.short_addr = nla_get_u16(
+ addr.mode = IEEE802154_ADDR_SHORT;
+ addr.short_addr = nla_get_shortaddr(
info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]);
}
addr.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
@@ -449,10 +496,11 @@ int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
if (!ieee802154_mlme_ops(dev)->start_req)
goto out;
- addr.addr_type = IEEE802154_ADDR_SHORT;
- addr.short_addr = nla_get_u16(
+ addr.mode = IEEE802154_ADDR_SHORT;
+ addr.short_addr = nla_get_shortaddr(
info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR]);
- addr.pan_id = nla_get_u16(info->attrs[IEEE802154_ATTR_COORD_PAN_ID]);
+ addr.pan_id = nla_get_shortaddr(
+ info->attrs[IEEE802154_ATTR_COORD_PAN_ID]);
channel = nla_get_u8(info->attrs[IEEE802154_ATTR_CHANNEL]);
bcn_ord = nla_get_u8(info->attrs[IEEE802154_ATTR_BCN_ORD]);
@@ -467,7 +515,7 @@ int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
page = 0;
- if (addr.short_addr == IEEE802154_ADDR_BROADCAST) {
+ if (addr.short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) {
ieee802154_nl_start_confirm(dev, IEEE802154_NO_SHORT_ADDRESS);
dev_put(dev);
return -EINVAL;
@@ -577,3 +625,902 @@ cont:
return skb->len;
}
+
+int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net_device *dev = NULL;
+ struct ieee802154_mlme_ops *ops;
+ struct ieee802154_mac_params params;
+ struct wpan_phy *phy;
+ int rc = -EINVAL;
+
+ pr_debug("%s\n", __func__);
+
+ dev = ieee802154_nl_get_dev(info);
+ if (!dev)
+ return -ENODEV;
+
+ ops = ieee802154_mlme_ops(dev);
+
+ if (!ops->get_mac_params || !ops->set_mac_params) {
+ rc = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if (netif_running(dev)) {
+ rc = -EBUSY;
+ goto out;
+ }
+
+ if (!info->attrs[IEEE802154_ATTR_LBT_ENABLED] &&
+ !info->attrs[IEEE802154_ATTR_CCA_MODE] &&
+ !info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL] &&
+ !info->attrs[IEEE802154_ATTR_CSMA_RETRIES] &&
+ !info->attrs[IEEE802154_ATTR_CSMA_MIN_BE] &&
+ !info->attrs[IEEE802154_ATTR_CSMA_MAX_BE] &&
+ !info->attrs[IEEE802154_ATTR_FRAME_RETRIES])
+ goto out;
+
+ phy = ops->get_phy(dev);
+
+ if ((!phy->set_lbt && info->attrs[IEEE802154_ATTR_LBT_ENABLED]) ||
+ (!phy->set_cca_mode && info->attrs[IEEE802154_ATTR_CCA_MODE]) ||
+ (!phy->set_cca_ed_level &&
+ info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]) ||
+ (!phy->set_csma_params &&
+ (info->attrs[IEEE802154_ATTR_CSMA_RETRIES] ||
+ info->attrs[IEEE802154_ATTR_CSMA_MIN_BE] ||
+ info->attrs[IEEE802154_ATTR_CSMA_MAX_BE])) ||
+ (!phy->set_frame_retries &&
+ info->attrs[IEEE802154_ATTR_FRAME_RETRIES])) {
+ rc = -EOPNOTSUPP;
+ goto out_phy;
+ }
+
+ ops->get_mac_params(dev, &params);
+
+ if (info->attrs[IEEE802154_ATTR_TXPOWER])
+ params.transmit_power = nla_get_s8(info->attrs[IEEE802154_ATTR_TXPOWER]);
+
+ if (info->attrs[IEEE802154_ATTR_LBT_ENABLED])
+ params.lbt = nla_get_u8(info->attrs[IEEE802154_ATTR_LBT_ENABLED]);
+
+ if (info->attrs[IEEE802154_ATTR_CCA_MODE])
+ params.cca_mode = nla_get_u8(info->attrs[IEEE802154_ATTR_CCA_MODE]);
+
+ if (info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL])
+ params.cca_ed_level = nla_get_s32(info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]);
+
+ if (info->attrs[IEEE802154_ATTR_CSMA_RETRIES])
+ params.csma_retries = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_RETRIES]);
+
+ if (info->attrs[IEEE802154_ATTR_CSMA_MIN_BE])
+ params.min_be = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_MIN_BE]);
+
+ if (info->attrs[IEEE802154_ATTR_CSMA_MAX_BE])
+ params.max_be = nla_get_u8(info->attrs[IEEE802154_ATTR_CSMA_MAX_BE]);
+
+ if (info->attrs[IEEE802154_ATTR_FRAME_RETRIES])
+ params.frame_retries = nla_get_s8(info->attrs[IEEE802154_ATTR_FRAME_RETRIES]);
+
+ rc = ops->set_mac_params(dev, &params);
+
+ wpan_phy_put(phy);
+ dev_put(dev);
+ return rc;
+
+out_phy:
+ wpan_phy_put(phy);
+out:
+ dev_put(dev);
+ return rc;
+}
+
+
+
+static int
+ieee802154_llsec_parse_key_id(struct genl_info *info,
+ struct ieee802154_llsec_key_id *desc)
+{
+ memset(desc, 0, sizeof(*desc));
+
+ if (!info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE])
+ return -EINVAL;
+
+ desc->mode = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]);
+
+ if (desc->mode == IEEE802154_SCF_KEY_IMPLICIT) {
+ if (!info->attrs[IEEE802154_ATTR_PAN_ID] &&
+ !(info->attrs[IEEE802154_ATTR_SHORT_ADDR] ||
+ info->attrs[IEEE802154_ATTR_HW_ADDR]))
+ return -EINVAL;
+
+ desc->device_addr.pan_id = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_PAN_ID]);
+
+ if (info->attrs[IEEE802154_ATTR_SHORT_ADDR]) {
+ desc->device_addr.mode = IEEE802154_ADDR_SHORT;
+ desc->device_addr.short_addr = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_SHORT_ADDR]);
+ } else {
+ desc->device_addr.mode = IEEE802154_ADDR_LONG;
+ desc->device_addr.extended_addr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
+ }
+ }
+
+ if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT &&
+ !info->attrs[IEEE802154_ATTR_LLSEC_KEY_ID])
+ return -EINVAL;
+
+ if (desc->mode == IEEE802154_SCF_KEY_SHORT_INDEX &&
+ !info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT])
+ return -EINVAL;
+
+ if (desc->mode == IEEE802154_SCF_KEY_HW_INDEX &&
+ !info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED])
+ return -EINVAL;
+
+ if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT)
+ desc->id = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_ID]);
+
+ switch (desc->mode) {
+ case IEEE802154_SCF_KEY_SHORT_INDEX:
+ {
+ u32 source = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT]);
+ desc->short_source = cpu_to_le32(source);
+ break;
+ }
+ case IEEE802154_SCF_KEY_HW_INDEX:
+ desc->extended_source = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED]);
+ break;
+ }
+
+ return 0;
+}
+
+static int
+ieee802154_llsec_fill_key_id(struct sk_buff *msg,
+ const struct ieee802154_llsec_key_id *desc)
+{
+ if (nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_MODE, desc->mode))
+ return -EMSGSIZE;
+
+ if (desc->mode == IEEE802154_SCF_KEY_IMPLICIT) {
+ if (nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID,
+ desc->device_addr.pan_id))
+ return -EMSGSIZE;
+
+ if (desc->device_addr.mode == IEEE802154_ADDR_SHORT &&
+ nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR,
+ desc->device_addr.short_addr))
+ return -EMSGSIZE;
+
+ if (desc->device_addr.mode == IEEE802154_ADDR_LONG &&
+ nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR,
+ desc->device_addr.extended_addr))
+ return -EMSGSIZE;
+ }
+
+ if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT &&
+ nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_ID, desc->id))
+ return -EMSGSIZE;
+
+ if (desc->mode == IEEE802154_SCF_KEY_SHORT_INDEX &&
+ nla_put_u32(msg, IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT,
+ le32_to_cpu(desc->short_source)))
+ return -EMSGSIZE;
+
+ if (desc->mode == IEEE802154_SCF_KEY_HW_INDEX &&
+ nla_put_hwaddr(msg, IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED,
+ desc->extended_source))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info)
+{
+ struct sk_buff *msg;
+ struct net_device *dev = NULL;
+ int rc = -ENOBUFS;
+ struct ieee802154_mlme_ops *ops;
+ void *hdr;
+ struct ieee802154_llsec_params params;
+
+ pr_debug("%s\n", __func__);
+
+ dev = ieee802154_nl_get_dev(info);
+ if (!dev)
+ return -ENODEV;
+
+ ops = ieee802154_mlme_ops(dev);
+ if (!ops->llsec) {
+ rc = -EOPNOTSUPP;
+ goto out_dev;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ goto out_dev;
+
+ hdr = genlmsg_put(msg, 0, info->snd_seq, &nl802154_family, 0,
+ IEEE802154_LLSEC_GETPARAMS);
+ if (!hdr)
+ goto out_free;
+
+ rc = ops->llsec->get_params(dev, &params);
+ if (rc < 0)
+ goto out_free;
+
+ if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
+ nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
+ nla_put_u8(msg, IEEE802154_ATTR_LLSEC_ENABLED, params.enabled) ||
+ nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVEL, params.out_level) ||
+ nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER,
+ be32_to_cpu(params.frame_counter)) ||
+ ieee802154_llsec_fill_key_id(msg, &params.out_key))
+ goto out_free;
+
+ dev_put(dev);
+
+ return ieee802154_nl_reply(msg, info);
+out_free:
+ nlmsg_free(msg);
+out_dev:
+ dev_put(dev);
+ return rc;
+}
+
+int ieee802154_llsec_setparams(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net_device *dev = NULL;
+ int rc = -EINVAL;
+ struct ieee802154_mlme_ops *ops;
+ struct ieee802154_llsec_params params;
+ int changed = 0;
+
+ pr_debug("%s\n", __func__);
+
+ dev = ieee802154_nl_get_dev(info);
+ if (!dev)
+ return -ENODEV;
+
+ if (!info->attrs[IEEE802154_ATTR_LLSEC_ENABLED] &&
+ !info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE] &&
+ !info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL])
+ goto out;
+
+ ops = ieee802154_mlme_ops(dev);
+ if (!ops->llsec) {
+ rc = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if (info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL] &&
+ nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) > 7)
+ goto out;
+
+ if (info->attrs[IEEE802154_ATTR_LLSEC_ENABLED]) {
+ params.enabled = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_ENABLED]);
+ changed |= IEEE802154_LLSEC_PARAM_ENABLED;
+ }
+
+ if (info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]) {
+ if (ieee802154_llsec_parse_key_id(info, &params.out_key))
+ goto out;
+
+ changed |= IEEE802154_LLSEC_PARAM_OUT_KEY;
+ }
+
+ if (info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) {
+ params.out_level = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]);
+ changed |= IEEE802154_LLSEC_PARAM_OUT_LEVEL;
+ }
+
+ if (info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]) {
+ u32 fc = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]);
+
+ params.frame_counter = cpu_to_be32(fc);
+ changed |= IEEE802154_LLSEC_PARAM_FRAME_COUNTER;
+ }
+
+ rc = ops->llsec->set_params(dev, &params, changed);
+
+ dev_put(dev);
+
+ return rc;
+out:
+ dev_put(dev);
+ return rc;
+}
+
+
+
+struct llsec_dump_data {
+ struct sk_buff *skb;
+ int s_idx, s_idx2;
+ int portid;
+ int nlmsg_seq;
+ struct net_device *dev;
+ struct ieee802154_mlme_ops *ops;
+ struct ieee802154_llsec_table *table;
+};
+
+static int
+ieee802154_llsec_dump_table(struct sk_buff *skb, struct netlink_callback *cb,
+ int (*step)(struct llsec_dump_data*))
+{
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ struct llsec_dump_data data;
+ int idx = 0;
+ int first_dev = cb->args[0];
+ int rc;
+
+ for_each_netdev(net, dev) {
+ if (idx < first_dev || dev->type != ARPHRD_IEEE802154)
+ goto skip;
+
+ data.ops = ieee802154_mlme_ops(dev);
+ if (!data.ops->llsec)
+ goto skip;
+
+ data.skb = skb;
+ data.s_idx = cb->args[1];
+ data.s_idx2 = cb->args[2];
+ data.dev = dev;
+ data.portid = NETLINK_CB(cb->skb).portid;
+ data.nlmsg_seq = cb->nlh->nlmsg_seq;
+
+ data.ops->llsec->lock_table(dev);
+ data.ops->llsec->get_table(data.dev, &data.table);
+ rc = step(&data);
+ data.ops->llsec->unlock_table(dev);
+
+ if (rc < 0)
+ break;
+
+skip:
+ idx++;
+ }
+ cb->args[0] = idx;
+
+ return skb->len;
+}
+
+static int
+ieee802154_nl_llsec_change(struct sk_buff *skb, struct genl_info *info,
+ int (*fn)(struct net_device*, struct genl_info*))
+{
+ struct net_device *dev = NULL;
+ int rc = -EINVAL;
+
+ dev = ieee802154_nl_get_dev(info);
+ if (!dev)
+ return -ENODEV;
+
+ if (!ieee802154_mlme_ops(dev)->llsec)
+ rc = -EOPNOTSUPP;
+ else
+ rc = fn(dev, info);
+
+ dev_put(dev);
+ return rc;
+}
+
+
+
+static int
+ieee802154_llsec_parse_key(struct genl_info *info,
+ struct ieee802154_llsec_key *key)
+{
+ u8 frames;
+ u32 commands[256 / 32];
+
+ memset(key, 0, sizeof(*key));
+
+ if (!info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES] ||
+ !info->attrs[IEEE802154_ATTR_LLSEC_KEY_BYTES])
+ return -EINVAL;
+
+ frames = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES]);
+ if ((frames & BIT(IEEE802154_FC_TYPE_MAC_CMD)) &&
+ !info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS])
+ return -EINVAL;
+
+ if (info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS]) {
+ nla_memcpy(commands,
+ info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS],
+ 256 / 8);
+
+ if (commands[0] || commands[1] || commands[2] || commands[3] ||
+ commands[4] || commands[5] || commands[6] ||
+ commands[7] >= BIT(IEEE802154_CMD_GTS_REQ + 1))
+ return -EINVAL;
+
+ key->cmd_frame_ids = commands[7];
+ }
+
+ key->frame_types = frames;
+
+ nla_memcpy(key->key, info->attrs[IEEE802154_ATTR_LLSEC_KEY_BYTES],
+ IEEE802154_LLSEC_KEY_SIZE);
+
+ return 0;
+}
+
+static int llsec_add_key(struct net_device *dev, struct genl_info *info)
+{
+ struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+ struct ieee802154_llsec_key key;
+ struct ieee802154_llsec_key_id id;
+
+ if (ieee802154_llsec_parse_key(info, &key) ||
+ ieee802154_llsec_parse_key_id(info, &id))
+ return -EINVAL;
+
+ return ops->llsec->add_key(dev, &id, &key);
+}
+
+int ieee802154_llsec_add_key(struct sk_buff *skb, struct genl_info *info)
+{
+ if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) !=
+ (NLM_F_CREATE | NLM_F_EXCL))
+ return -EINVAL;
+
+ return ieee802154_nl_llsec_change(skb, info, llsec_add_key);
+}
+
+static int llsec_remove_key(struct net_device *dev, struct genl_info *info)
+{
+ struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+ struct ieee802154_llsec_key_id id;
+
+ if (ieee802154_llsec_parse_key_id(info, &id))
+ return -EINVAL;
+
+ return ops->llsec->del_key(dev, &id);
+}
+
+int ieee802154_llsec_del_key(struct sk_buff *skb, struct genl_info *info)
+{
+ return ieee802154_nl_llsec_change(skb, info, llsec_remove_key);
+}
+
+static int
+ieee802154_nl_fill_key(struct sk_buff *msg, u32 portid, u32 seq,
+ const struct ieee802154_llsec_key_entry *key,
+ const struct net_device *dev)
+{
+ void *hdr;
+ u32 commands[256 / 32];
+
+ hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI,
+ IEEE802154_LLSEC_LIST_KEY);
+ if (!hdr)
+ goto out;
+
+ if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
+ nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
+ ieee802154_llsec_fill_key_id(msg, &key->id) ||
+ nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES,
+ key->key->frame_types))
+ goto nla_put_failure;
+
+ if (key->key->frame_types & BIT(IEEE802154_FC_TYPE_MAC_CMD)) {
+ memset(commands, 0, sizeof(commands));
+ commands[7] = key->key->cmd_frame_ids;
+ if (nla_put(msg, IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS,
+ sizeof(commands), commands))
+ goto nla_put_failure;
+ }
+
+ if (nla_put(msg, IEEE802154_ATTR_LLSEC_KEY_BYTES,
+ IEEE802154_LLSEC_KEY_SIZE, key->key->key))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+out:
+ return -EMSGSIZE;
+}
+
+static int llsec_iter_keys(struct llsec_dump_data *data)
+{
+ struct ieee802154_llsec_key_entry *pos;
+ int rc = 0, idx = 0;
+
+ list_for_each_entry(pos, &data->table->keys, list) {
+ if (idx++ < data->s_idx)
+ continue;
+
+ if (ieee802154_nl_fill_key(data->skb, data->portid,
+ data->nlmsg_seq, pos, data->dev)) {
+ rc = -EMSGSIZE;
+ break;
+ }
+
+ data->s_idx++;
+ }
+
+ return rc;
+}
+
+int ieee802154_llsec_dump_keys(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return ieee802154_llsec_dump_table(skb, cb, llsec_iter_keys);
+}
+
+
+
+static int
+llsec_parse_dev(struct genl_info *info,
+ struct ieee802154_llsec_device *dev)
+{
+ memset(dev, 0, sizeof(*dev));
+
+ if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER] ||
+ !info->attrs[IEEE802154_ATTR_HW_ADDR] ||
+ !info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE] ||
+ !info->attrs[IEEE802154_ATTR_LLSEC_DEV_KEY_MODE] ||
+ (!!info->attrs[IEEE802154_ATTR_PAN_ID] !=
+ !!info->attrs[IEEE802154_ATTR_SHORT_ADDR]))
+ return -EINVAL;
+
+ if (info->attrs[IEEE802154_ATTR_PAN_ID]) {
+ dev->pan_id = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_PAN_ID]);
+ dev->short_addr = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_SHORT_ADDR]);
+ } else {
+ dev->short_addr = cpu_to_le16(IEEE802154_ADDR_UNDEF);
+ }
+
+ dev->hwaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
+ dev->frame_counter = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]);
+ dev->seclevel_exempt = !!nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]);
+ dev->key_mode = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_KEY_MODE]);
+
+ if (dev->key_mode >= __IEEE802154_LLSEC_DEVKEY_MAX)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int llsec_add_dev(struct net_device *dev, struct genl_info *info)
+{
+ struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+ struct ieee802154_llsec_device desc;
+
+ if (llsec_parse_dev(info, &desc))
+ return -EINVAL;
+
+ return ops->llsec->add_dev(dev, &desc);
+}
+
+int ieee802154_llsec_add_dev(struct sk_buff *skb, struct genl_info *info)
+{
+ if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) !=
+ (NLM_F_CREATE | NLM_F_EXCL))
+ return -EINVAL;
+
+ return ieee802154_nl_llsec_change(skb, info, llsec_add_dev);
+}
+
+static int llsec_del_dev(struct net_device *dev, struct genl_info *info)
+{
+ struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+ __le64 devaddr;
+
+ if (!info->attrs[IEEE802154_ATTR_HW_ADDR])
+ return -EINVAL;
+
+ devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
+
+ return ops->llsec->del_dev(dev, devaddr);
+}
+
+int ieee802154_llsec_del_dev(struct sk_buff *skb, struct genl_info *info)
+{
+ return ieee802154_nl_llsec_change(skb, info, llsec_del_dev);
+}
+
+static int
+ieee802154_nl_fill_dev(struct sk_buff *msg, u32 portid, u32 seq,
+ const struct ieee802154_llsec_device *desc,
+ const struct net_device *dev)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI,
+ IEEE802154_LLSEC_LIST_DEV);
+ if (!hdr)
+ goto out;
+
+ if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
+ nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
+ nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, desc->pan_id) ||
+ nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR,
+ desc->short_addr) ||
+ nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, desc->hwaddr) ||
+ nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER,
+ desc->frame_counter) ||
+ nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_OVERRIDE,
+ desc->seclevel_exempt) ||
+ nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_KEY_MODE, desc->key_mode))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+out:
+ return -EMSGSIZE;
+}
+
+static int llsec_iter_devs(struct llsec_dump_data *data)
+{
+ struct ieee802154_llsec_device *pos;
+ int rc = 0, idx = 0;
+
+ list_for_each_entry(pos, &data->table->devices, list) {
+ if (idx++ < data->s_idx)
+ continue;
+
+ if (ieee802154_nl_fill_dev(data->skb, data->portid,
+ data->nlmsg_seq, pos, data->dev)) {
+ rc = -EMSGSIZE;
+ break;
+ }
+
+ data->s_idx++;
+ }
+
+ return rc;
+}
+
+int ieee802154_llsec_dump_devs(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return ieee802154_llsec_dump_table(skb, cb, llsec_iter_devs);
+}
+
+
+
+static int llsec_add_devkey(struct net_device *dev, struct genl_info *info)
+{
+ struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+ struct ieee802154_llsec_device_key key;
+ __le64 devaddr;
+
+ if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER] ||
+ !info->attrs[IEEE802154_ATTR_HW_ADDR] ||
+ ieee802154_llsec_parse_key_id(info, &key.key_id))
+ return -EINVAL;
+
+ devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
+ key.frame_counter = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]);
+
+ return ops->llsec->add_devkey(dev, devaddr, &key);
+}
+
+int ieee802154_llsec_add_devkey(struct sk_buff *skb, struct genl_info *info)
+{
+ if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) !=
+ (NLM_F_CREATE | NLM_F_EXCL))
+ return -EINVAL;
+
+ return ieee802154_nl_llsec_change(skb, info, llsec_add_devkey);
+}
+
+static int llsec_del_devkey(struct net_device *dev, struct genl_info *info)
+{
+ struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+ struct ieee802154_llsec_device_key key;
+ __le64 devaddr;
+
+ if (!info->attrs[IEEE802154_ATTR_HW_ADDR] ||
+ ieee802154_llsec_parse_key_id(info, &key.key_id))
+ return -EINVAL;
+
+ devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
+
+ return ops->llsec->del_devkey(dev, devaddr, &key);
+}
+
+int ieee802154_llsec_del_devkey(struct sk_buff *skb, struct genl_info *info)
+{
+ return ieee802154_nl_llsec_change(skb, info, llsec_del_devkey);
+}
+
+static int
+ieee802154_nl_fill_devkey(struct sk_buff *msg, u32 portid, u32 seq,
+ __le64 devaddr,
+ const struct ieee802154_llsec_device_key *devkey,
+ const struct net_device *dev)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI,
+ IEEE802154_LLSEC_LIST_DEVKEY);
+ if (!hdr)
+ goto out;
+
+ if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
+ nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
+ nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, devaddr) ||
+ nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER,
+ devkey->frame_counter) ||
+ ieee802154_llsec_fill_key_id(msg, &devkey->key_id))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+out:
+ return -EMSGSIZE;
+}
+
+static int llsec_iter_devkeys(struct llsec_dump_data *data)
+{
+ struct ieee802154_llsec_device *dpos;
+ struct ieee802154_llsec_device_key *kpos;
+ int rc = 0, idx = 0, idx2;
+
+ list_for_each_entry(dpos, &data->table->devices, list) {
+ if (idx++ < data->s_idx)
+ continue;
+
+ idx2 = 0;
+
+ list_for_each_entry(kpos, &dpos->keys, list) {
+ if (idx2++ < data->s_idx2)
+ continue;
+
+ if (ieee802154_nl_fill_devkey(data->skb, data->portid,
+ data->nlmsg_seq,
+ dpos->hwaddr, kpos,
+ data->dev)) {
+ return rc = -EMSGSIZE;
+ }
+
+ data->s_idx2++;
+ }
+
+ data->s_idx++;
+ }
+
+ return rc;
+}
+
+int ieee802154_llsec_dump_devkeys(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return ieee802154_llsec_dump_table(skb, cb, llsec_iter_devkeys);
+}
+
+
+
+static int
+llsec_parse_seclevel(struct genl_info *info,
+ struct ieee802154_llsec_seclevel *sl)
+{
+ memset(sl, 0, sizeof(*sl));
+
+ if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_TYPE] ||
+ !info->attrs[IEEE802154_ATTR_LLSEC_SECLEVELS] ||
+ !info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE])
+ return -EINVAL;
+
+ sl->frame_type = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_TYPE]);
+ if (sl->frame_type == IEEE802154_FC_TYPE_MAC_CMD) {
+ if (!info->attrs[IEEE802154_ATTR_LLSEC_CMD_FRAME_ID])
+ return -EINVAL;
+
+ sl->cmd_frame_id = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_CMD_FRAME_ID]);
+ }
+
+ sl->sec_levels = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVELS]);
+ sl->device_override = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]);
+
+ return 0;
+}
+
+static int llsec_add_seclevel(struct net_device *dev, struct genl_info *info)
+{
+ struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+ struct ieee802154_llsec_seclevel sl;
+
+ if (llsec_parse_seclevel(info, &sl))
+ return -EINVAL;
+
+ return ops->llsec->add_seclevel(dev, &sl);
+}
+
+int ieee802154_llsec_add_seclevel(struct sk_buff *skb, struct genl_info *info)
+{
+ if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) !=
+ (NLM_F_CREATE | NLM_F_EXCL))
+ return -EINVAL;
+
+ return ieee802154_nl_llsec_change(skb, info, llsec_add_seclevel);
+}
+
+static int llsec_del_seclevel(struct net_device *dev, struct genl_info *info)
+{
+ struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+ struct ieee802154_llsec_seclevel sl;
+
+ if (llsec_parse_seclevel(info, &sl))
+ return -EINVAL;
+
+ return ops->llsec->del_seclevel(dev, &sl);
+}
+
+int ieee802154_llsec_del_seclevel(struct sk_buff *skb, struct genl_info *info)
+{
+ return ieee802154_nl_llsec_change(skb, info, llsec_del_seclevel);
+}
+
+static int
+ieee802154_nl_fill_seclevel(struct sk_buff *msg, u32 portid, u32 seq,
+ const struct ieee802154_llsec_seclevel *sl,
+ const struct net_device *dev)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI,
+ IEEE802154_LLSEC_LIST_SECLEVEL);
+ if (!hdr)
+ goto out;
+
+ if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
+ nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
+ nla_put_u8(msg, IEEE802154_ATTR_LLSEC_FRAME_TYPE, sl->frame_type) ||
+ nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVELS, sl->sec_levels) ||
+ nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_OVERRIDE,
+ sl->device_override))
+ goto nla_put_failure;
+
+ if (sl->frame_type == IEEE802154_FC_TYPE_MAC_CMD &&
+ nla_put_u8(msg, IEEE802154_ATTR_LLSEC_CMD_FRAME_ID,
+ sl->cmd_frame_id))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+out:
+ return -EMSGSIZE;
+}
+
+static int llsec_iter_seclevels(struct llsec_dump_data *data)
+{
+ struct ieee802154_llsec_seclevel *pos;
+ int rc = 0, idx = 0;
+
+ list_for_each_entry(pos, &data->table->security_levels, list) {
+ if (idx++ < data->s_idx)
+ continue;
+
+ if (ieee802154_nl_fill_seclevel(data->skb, data->portid,
+ data->nlmsg_seq, pos,
+ data->dev)) {
+ rc = -EMSGSIZE;
+ break;
+ }
+
+ data->s_idx++;
+ }
+
+ return rc;
+}
+
+int ieee802154_llsec_dump_seclevels(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return ieee802154_llsec_dump_table(skb, cb, llsec_iter_seclevels);
+}
diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c
index 6adda4d46f9..3a703ab8834 100644
--- a/net/ieee802154/nl_policy.c
+++ b/net/ieee802154/nl_policy.c
@@ -52,5 +52,31 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = {
[IEEE802154_ATTR_DURATION] = { .type = NLA_U8, },
[IEEE802154_ATTR_ED_LIST] = { .len = 27 },
[IEEE802154_ATTR_CHANNEL_PAGE_LIST] = { .len = 32 * 4, },
+
+ [IEEE802154_ATTR_TXPOWER] = { .type = NLA_S8, },
+ [IEEE802154_ATTR_LBT_ENABLED] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_CCA_MODE] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_CCA_ED_LEVEL] = { .type = NLA_S32, },
+ [IEEE802154_ATTR_CSMA_RETRIES] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_CSMA_MIN_BE] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_CSMA_MAX_BE] = { .type = NLA_U8, },
+
+ [IEEE802154_ATTR_FRAME_RETRIES] = { .type = NLA_S8, },
+
+ [IEEE802154_ATTR_LLSEC_ENABLED] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_LLSEC_SECLEVEL] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_LLSEC_KEY_MODE] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT] = { .type = NLA_U32, },
+ [IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED] = { .type = NLA_HW_ADDR, },
+ [IEEE802154_ATTR_LLSEC_KEY_ID] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_LLSEC_FRAME_COUNTER] = { .type = NLA_U32 },
+ [IEEE802154_ATTR_LLSEC_KEY_BYTES] = { .len = 16, },
+ [IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS] = { .len = 258 / 8 },
+ [IEEE802154_ATTR_LLSEC_FRAME_TYPE] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_LLSEC_CMD_FRAME_ID] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_LLSEC_SECLEVELS] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_LLSEC_DEV_OVERRIDE] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_LLSEC_DEV_KEY_MODE] = { .type = NLA_U8, },
};
diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c
index 41f538b8e59..74d54fae33d 100644
--- a/net/ieee802154/raw.c
+++ b/net/ieee802154/raw.c
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <net/sock.h>
#include <net/af_ieee802154.h>
+#include <net/ieee802154_netdev.h>
#include "af802154.h"
@@ -55,21 +56,24 @@ static void raw_close(struct sock *sk, long timeout)
sk_common_release(sk);
}
-static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int len)
+static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len)
{
- struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr;
+ struct ieee802154_addr addr;
+ struct sockaddr_ieee802154 *uaddr = (struct sockaddr_ieee802154 *)_uaddr;
int err = 0;
struct net_device *dev = NULL;
- if (len < sizeof(*addr))
+ if (len < sizeof(*uaddr))
return -EINVAL;
- if (addr->family != AF_IEEE802154)
+ uaddr = (struct sockaddr_ieee802154 *)_uaddr;
+ if (uaddr->family != AF_IEEE802154)
return -EINVAL;
lock_sock(sk);
- dev = ieee802154_get_dev(sock_net(sk), &addr->addr);
+ ieee802154_addr_from_sa(&addr, &uaddr->addr);
+ dev = ieee802154_get_dev(sock_net(sk), &addr);
if (!dev) {
err = -ENODEV;
goto out;
@@ -209,6 +213,10 @@ out:
static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (!skb)
+ return NET_RX_DROP;
+
if (sock_queue_rcv_skb(sk, skb) < 0) {
kfree_skb(skb);
return NET_RX_DROP;
diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c
new file mode 100644
index 00000000000..6f1428c4870
--- /dev/null
+++ b/net/ieee802154/reassembly.c
@@ -0,0 +1,585 @@
+/* 6LoWPAN fragment reassembly
+ *
+ *
+ * Authors:
+ * Alexander Aring <aar@pengutronix.de>
+ *
+ * Based on: net/ipv6/reassembly.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "6LoWPAN: " fmt
+
+#include <linux/net.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include <net/ieee802154_netdev.h>
+#include <net/6lowpan.h>
+#include <net/ipv6.h>
+#include <net/inet_frag.h>
+
+#include "reassembly.h"
+
+struct lowpan_frag_info {
+ __be16 d_tag;
+ u16 d_size;
+ u8 d_offset;
+};
+
+static struct lowpan_frag_info *lowpan_cb(struct sk_buff *skb)
+{
+ return (struct lowpan_frag_info *)skb->cb;
+}
+
+static struct inet_frags lowpan_frags;
+
+static int lowpan_frag_reasm(struct lowpan_frag_queue *fq,
+ struct sk_buff *prev, struct net_device *dev);
+
+static unsigned int lowpan_hash_frag(__be16 tag, u16 d_size,
+ const struct ieee802154_addr *saddr,
+ const struct ieee802154_addr *daddr)
+{
+ u32 c;
+
+ net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd));
+ c = jhash_3words(ieee802154_addr_hash(saddr),
+ ieee802154_addr_hash(daddr),
+ (__force u32)(tag + (d_size << 16)),
+ lowpan_frags.rnd);
+
+ return c & (INETFRAGS_HASHSZ - 1);
+}
+
+static unsigned int lowpan_hashfn(struct inet_frag_queue *q)
+{
+ struct lowpan_frag_queue *fq;
+
+ fq = container_of(q, struct lowpan_frag_queue, q);
+ return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr);
+}
+
+static bool lowpan_frag_match(struct inet_frag_queue *q, void *a)
+{
+ struct lowpan_frag_queue *fq;
+ struct lowpan_create_arg *arg = a;
+
+ fq = container_of(q, struct lowpan_frag_queue, q);
+ return fq->tag == arg->tag && fq->d_size == arg->d_size &&
+ ieee802154_addr_equal(&fq->saddr, arg->src) &&
+ ieee802154_addr_equal(&fq->daddr, arg->dst);
+}
+
+static void lowpan_frag_init(struct inet_frag_queue *q, void *a)
+{
+ struct lowpan_frag_queue *fq;
+ struct lowpan_create_arg *arg = a;
+
+ fq = container_of(q, struct lowpan_frag_queue, q);
+
+ fq->tag = arg->tag;
+ fq->d_size = arg->d_size;
+ fq->saddr = *arg->src;
+ fq->daddr = *arg->dst;
+}
+
+static void lowpan_frag_expire(unsigned long data)
+{
+ struct frag_queue *fq;
+ struct net *net;
+
+ fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ net = container_of(fq->q.net, struct net, ieee802154_lowpan.frags);
+
+ spin_lock(&fq->q.lock);
+
+ if (fq->q.last_in & INET_FRAG_COMPLETE)
+ goto out;
+
+ inet_frag_kill(&fq->q, &lowpan_frags);
+out:
+ spin_unlock(&fq->q.lock);
+ inet_frag_put(&fq->q, &lowpan_frags);
+}
+
+static inline struct lowpan_frag_queue *
+fq_find(struct net *net, const struct lowpan_frag_info *frag_info,
+ const struct ieee802154_addr *src,
+ const struct ieee802154_addr *dst)
+{
+ struct inet_frag_queue *q;
+ struct lowpan_create_arg arg;
+ unsigned int hash;
+ struct netns_ieee802154_lowpan *ieee802154_lowpan =
+ net_ieee802154_lowpan(net);
+
+ arg.tag = frag_info->d_tag;
+ arg.d_size = frag_info->d_size;
+ arg.src = src;
+ arg.dst = dst;
+
+ read_lock(&lowpan_frags.lock);
+ hash = lowpan_hash_frag(frag_info->d_tag, frag_info->d_size, src, dst);
+
+ q = inet_frag_find(&ieee802154_lowpan->frags,
+ &lowpan_frags, &arg, hash);
+ if (IS_ERR_OR_NULL(q)) {
+ inet_frag_maybe_warn_overflow(q, pr_fmt());
+ return NULL;
+ }
+ return container_of(q, struct lowpan_frag_queue, q);
+}
+
+static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
+ struct sk_buff *skb, const u8 frag_type)
+{
+ struct sk_buff *prev, *next;
+ struct net_device *dev;
+ int end, offset;
+
+ if (fq->q.last_in & INET_FRAG_COMPLETE)
+ goto err;
+
+ offset = lowpan_cb(skb)->d_offset << 3;
+ end = lowpan_cb(skb)->d_size;
+
+ /* Is this the final fragment? */
+ if (offset + skb->len == end) {
+ /* If we already have some bits beyond end
+ * or have different end, the segment is corrupted.
+ */
+ if (end < fq->q.len ||
+ ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len))
+ goto err;
+ fq->q.last_in |= INET_FRAG_LAST_IN;
+ fq->q.len = end;
+ } else {
+ if (end > fq->q.len) {
+ /* Some bits beyond end -> corruption. */
+ if (fq->q.last_in & INET_FRAG_LAST_IN)
+ goto err;
+ fq->q.len = end;
+ }
+ }
+
+ /* Find out which fragments are in front and at the back of us
+ * in the chain of fragments so far. We must know where to put
+ * this fragment, right?
+ */
+ prev = fq->q.fragments_tail;
+ if (!prev || lowpan_cb(prev)->d_offset < lowpan_cb(skb)->d_offset) {
+ next = NULL;
+ goto found;
+ }
+ prev = NULL;
+ for (next = fq->q.fragments; next != NULL; next = next->next) {
+ if (lowpan_cb(next)->d_offset >= lowpan_cb(skb)->d_offset)
+ break; /* bingo! */
+ prev = next;
+ }
+
+found:
+ /* Insert this fragment in the chain of fragments. */
+ skb->next = next;
+ if (!next)
+ fq->q.fragments_tail = skb;
+ if (prev)
+ prev->next = skb;
+ else
+ fq->q.fragments = skb;
+
+ dev = skb->dev;
+ if (dev)
+ skb->dev = NULL;
+
+ fq->q.stamp = skb->tstamp;
+ if (frag_type == LOWPAN_DISPATCH_FRAG1) {
+ /* Calculate uncomp. 6lowpan header to estimate full size */
+ fq->q.meat += lowpan_uncompress_size(skb, NULL);
+ fq->q.last_in |= INET_FRAG_FIRST_IN;
+ } else {
+ fq->q.meat += skb->len;
+ }
+ add_frag_mem_limit(&fq->q, skb->truesize);
+
+ if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+ fq->q.meat == fq->q.len) {
+ int res;
+ unsigned long orefdst = skb->_skb_refdst;
+
+ skb->_skb_refdst = 0UL;
+ res = lowpan_frag_reasm(fq, prev, dev);
+ skb->_skb_refdst = orefdst;
+ return res;
+ }
+
+ inet_frag_lru_move(&fq->q);
+ return -1;
+err:
+ kfree_skb(skb);
+ return -1;
+}
+
+/* Check if this packet is complete.
+ * Returns NULL on failure by any reason, and pointer
+ * to current nexthdr field in reassembled frame.
+ *
+ * It is called with locked fq, and caller must check that
+ * queue is eligible for reassembly i.e. it is not COMPLETE,
+ * the last and the first frames arrived and all the bits are here.
+ */
+static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
+ struct net_device *dev)
+{
+ struct sk_buff *fp, *head = fq->q.fragments;
+ int sum_truesize;
+
+ inet_frag_kill(&fq->q, &lowpan_frags);
+
+ /* Make the one we just received the head. */
+ if (prev) {
+ head = prev->next;
+ fp = skb_clone(head, GFP_ATOMIC);
+
+ if (!fp)
+ goto out_oom;
+
+ fp->next = head->next;
+ if (!fp->next)
+ fq->q.fragments_tail = fp;
+ prev->next = fp;
+
+ skb_morph(head, fq->q.fragments);
+ head->next = fq->q.fragments->next;
+
+ consume_skb(fq->q.fragments);
+ fq->q.fragments = head;
+ }
+
+ /* Head of list must not be cloned. */
+ if (skb_unclone(head, GFP_ATOMIC))
+ goto out_oom;
+
+ /* If the first fragment is fragmented itself, we split
+ * it to two chunks: the first with data and paged part
+ * and the second, holding only fragments.
+ */
+ if (skb_has_frag_list(head)) {
+ struct sk_buff *clone;
+ int i, plen = 0;
+
+ clone = alloc_skb(0, GFP_ATOMIC);
+ if (!clone)
+ goto out_oom;
+ clone->next = head->next;
+ head->next = clone;
+ skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+ skb_frag_list_init(head);
+ for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+ plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+ clone->len = head->data_len - plen;
+ clone->data_len = clone->len;
+ head->data_len -= clone->len;
+ head->len -= clone->len;
+ add_frag_mem_limit(&fq->q, clone->truesize);
+ }
+
+ WARN_ON(head == NULL);
+
+ sum_truesize = head->truesize;
+ for (fp = head->next; fp;) {
+ bool headstolen;
+ int delta;
+ struct sk_buff *next = fp->next;
+
+ sum_truesize += fp->truesize;
+ if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
+ kfree_skb_partial(fp, headstolen);
+ } else {
+ if (!skb_shinfo(head)->frag_list)
+ skb_shinfo(head)->frag_list = fp;
+ head->data_len += fp->len;
+ head->len += fp->len;
+ head->truesize += fp->truesize;
+ }
+ fp = next;
+ }
+ sub_frag_mem_limit(&fq->q, sum_truesize);
+
+ head->next = NULL;
+ head->dev = dev;
+ head->tstamp = fq->q.stamp;
+
+ fq->q.fragments = NULL;
+ fq->q.fragments_tail = NULL;
+
+ return 1;
+out_oom:
+ net_dbg_ratelimited("lowpan_frag_reasm: no memory for reassembly\n");
+ return -1;
+}
+
+static int lowpan_get_frag_info(struct sk_buff *skb, const u8 frag_type,
+ struct lowpan_frag_info *frag_info)
+{
+ bool fail;
+ u8 pattern = 0, low = 0;
+
+ fail = lowpan_fetch_skb(skb, &pattern, 1);
+ fail |= lowpan_fetch_skb(skb, &low, 1);
+ frag_info->d_size = (pattern & 7) << 8 | low;
+ fail |= lowpan_fetch_skb(skb, &frag_info->d_tag, 2);
+
+ if (frag_type == LOWPAN_DISPATCH_FRAGN) {
+ fail |= lowpan_fetch_skb(skb, &frag_info->d_offset, 1);
+ } else {
+ skb_reset_network_header(skb);
+ frag_info->d_offset = 0;
+ }
+
+ if (unlikely(fail))
+ return -EIO;
+
+ return 0;
+}
+
+int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type)
+{
+ struct lowpan_frag_queue *fq;
+ struct net *net = dev_net(skb->dev);
+ struct lowpan_frag_info *frag_info = lowpan_cb(skb);
+ struct ieee802154_addr source, dest;
+ struct netns_ieee802154_lowpan *ieee802154_lowpan =
+ net_ieee802154_lowpan(net);
+ int err;
+
+ source = mac_cb(skb)->source;
+ dest = mac_cb(skb)->dest;
+
+ err = lowpan_get_frag_info(skb, frag_type, frag_info);
+ if (err < 0)
+ goto err;
+
+ if (frag_info->d_size > ieee802154_lowpan->max_dsize)
+ goto err;
+
+ inet_frag_evictor(&ieee802154_lowpan->frags, &lowpan_frags, false);
+
+ fq = fq_find(net, frag_info, &source, &dest);
+ if (fq != NULL) {
+ int ret;
+ spin_lock(&fq->q.lock);
+ ret = lowpan_frag_queue(fq, skb, frag_type);
+ spin_unlock(&fq->q.lock);
+
+ inet_frag_put(&fq->q, &lowpan_frags);
+ return ret;
+ }
+
+err:
+ kfree_skb(skb);
+ return -1;
+}
+EXPORT_SYMBOL(lowpan_frag_rcv);
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table lowpan_frags_ns_ctl_table[] = {
+ {
+ .procname = "6lowpanfrag_high_thresh",
+ .data = &init_net.ieee802154_lowpan.frags.high_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "6lowpanfrag_low_thresh",
+ .data = &init_net.ieee802154_lowpan.frags.low_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "6lowpanfrag_time",
+ .data = &init_net.ieee802154_lowpan.frags.timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "6lowpanfrag_max_datagram_size",
+ .data = &init_net.ieee802154_lowpan.max_dsize,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ { }
+};
+
+static struct ctl_table lowpan_frags_ctl_table[] = {
+ {
+ .procname = "6lowpanfrag_secret_interval",
+ .data = &lowpan_frags.secret_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ { }
+};
+
+static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
+{
+ struct ctl_table *table;
+ struct ctl_table_header *hdr;
+ struct netns_ieee802154_lowpan *ieee802154_lowpan =
+ net_ieee802154_lowpan(net);
+
+ table = lowpan_frags_ns_ctl_table;
+ if (!net_eq(net, &init_net)) {
+ table = kmemdup(table, sizeof(lowpan_frags_ns_ctl_table),
+ GFP_KERNEL);
+ if (table == NULL)
+ goto err_alloc;
+
+ table[0].data = &ieee802154_lowpan->frags.high_thresh;
+ table[1].data = &ieee802154_lowpan->frags.low_thresh;
+ table[2].data = &ieee802154_lowpan->frags.timeout;
+ table[3].data = &ieee802154_lowpan->max_dsize;
+
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ table[0].procname = NULL;
+ }
+
+ hdr = register_net_sysctl(net, "net/ieee802154/6lowpan", table);
+ if (hdr == NULL)
+ goto err_reg;
+
+ ieee802154_lowpan->sysctl.frags_hdr = hdr;
+ return 0;
+
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(table);
+err_alloc:
+ return -ENOMEM;
+}
+
+static void __net_exit lowpan_frags_ns_sysctl_unregister(struct net *net)
+{
+ struct ctl_table *table;
+ struct netns_ieee802154_lowpan *ieee802154_lowpan =
+ net_ieee802154_lowpan(net);
+
+ table = ieee802154_lowpan->sysctl.frags_hdr->ctl_table_arg;
+ unregister_net_sysctl_table(ieee802154_lowpan->sysctl.frags_hdr);
+ if (!net_eq(net, &init_net))
+ kfree(table);
+}
+
+static struct ctl_table_header *lowpan_ctl_header;
+
+static int lowpan_frags_sysctl_register(void)
+{
+ lowpan_ctl_header = register_net_sysctl(&init_net,
+ "net/ieee802154/6lowpan",
+ lowpan_frags_ctl_table);
+ return lowpan_ctl_header == NULL ? -ENOMEM : 0;
+}
+
+static void lowpan_frags_sysctl_unregister(void)
+{
+ unregister_net_sysctl_table(lowpan_ctl_header);
+}
+#else
+static inline int lowpan_frags_ns_sysctl_register(struct net *net)
+{
+ return 0;
+}
+
+static inline void lowpan_frags_ns_sysctl_unregister(struct net *net)
+{
+}
+
+static inline int lowpan_frags_sysctl_register(void)
+{
+ return 0;
+}
+
+static inline void lowpan_frags_sysctl_unregister(void)
+{
+}
+#endif
+
+static int __net_init lowpan_frags_init_net(struct net *net)
+{
+ struct netns_ieee802154_lowpan *ieee802154_lowpan =
+ net_ieee802154_lowpan(net);
+
+ ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
+ ieee802154_lowpan->max_dsize = 0xFFFF;
+
+ inet_frags_init_net(&ieee802154_lowpan->frags);
+
+ return lowpan_frags_ns_sysctl_register(net);
+}
+
+static void __net_exit lowpan_frags_exit_net(struct net *net)
+{
+ struct netns_ieee802154_lowpan *ieee802154_lowpan =
+ net_ieee802154_lowpan(net);
+
+ lowpan_frags_ns_sysctl_unregister(net);
+ inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags);
+}
+
+static struct pernet_operations lowpan_frags_ops = {
+ .init = lowpan_frags_init_net,
+ .exit = lowpan_frags_exit_net,
+};
+
+int __init lowpan_net_frag_init(void)
+{
+ int ret;
+
+ ret = lowpan_frags_sysctl_register();
+ if (ret)
+ return ret;
+
+ ret = register_pernet_subsys(&lowpan_frags_ops);
+ if (ret)
+ goto err_pernet;
+
+ lowpan_frags.hashfn = lowpan_hashfn;
+ lowpan_frags.constructor = lowpan_frag_init;
+ lowpan_frags.destructor = NULL;
+ lowpan_frags.skb_free = NULL;
+ lowpan_frags.qsize = sizeof(struct frag_queue);
+ lowpan_frags.match = lowpan_frag_match;
+ lowpan_frags.frag_expire = lowpan_frag_expire;
+ lowpan_frags.secret_interval = 10 * 60 * HZ;
+ inet_frags_init(&lowpan_frags);
+
+ return ret;
+err_pernet:
+ lowpan_frags_sysctl_unregister();
+ return ret;
+}
+
+void lowpan_net_frag_exit(void)
+{
+ inet_frags_fini(&lowpan_frags);
+ lowpan_frags_sysctl_unregister();
+ unregister_pernet_subsys(&lowpan_frags_ops);
+}
diff --git a/net/ieee802154/reassembly.h b/net/ieee802154/reassembly.h
new file mode 100644
index 00000000000..74e4a7c9819
--- /dev/null
+++ b/net/ieee802154/reassembly.h
@@ -0,0 +1,41 @@
+#ifndef __IEEE802154_6LOWPAN_REASSEMBLY_H__
+#define __IEEE802154_6LOWPAN_REASSEMBLY_H__
+
+#include <net/inet_frag.h>
+
+struct lowpan_create_arg {
+ __be16 tag;
+ u16 d_size;
+ const struct ieee802154_addr *src;
+ const struct ieee802154_addr *dst;
+};
+
+/* Equivalent of ipv4 struct ip
+ */
+struct lowpan_frag_queue {
+ struct inet_frag_queue q;
+
+ __be16 tag;
+ u16 d_size;
+ struct ieee802154_addr saddr;
+ struct ieee802154_addr daddr;
+};
+
+static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
+{
+ switch (a->mode) {
+ case IEEE802154_ADDR_LONG:
+ return (((__force u64)a->extended_addr) >> 32) ^
+ (((__force u64)a->extended_addr) & 0xffffffff);
+ case IEEE802154_ADDR_SHORT:
+ return (__force u32)(a->short_addr);
+ default:
+ return 0;
+ }
+}
+
+int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type);
+void lowpan_net_frag_exit(void);
+int lowpan_net_frag_init(void);
+
+#endif /* __IEEE802154_6LOWPAN_REASSEMBLY_H__ */
diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c
index 4dd37615a74..8d6f6704da8 100644
--- a/net/ieee802154/wpan-class.c
+++ b/net/ieee802154/wpan-class.c
@@ -44,9 +44,7 @@ static DEVICE_ATTR_RO(name);
MASTER_SHOW(current_channel, "%d");
MASTER_SHOW(current_page, "%d");
-MASTER_SHOW_COMPLEX(transmit_power, "%d +- %d dB",
- ((signed char) (phy->transmit_power << 2)) >> 2,
- (phy->transmit_power >> 6) ? (phy->transmit_power >> 6) * 3 : 1);
+MASTER_SHOW(transmit_power, "%d +- 1 dB");
MASTER_SHOW(cca_mode, "%d");
static ssize_t channels_supported_show(struct device *dev,
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index f8c49ce5b28..f032688d20d 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -55,4 +55,4 @@ obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
- xfrm4_output.o
+ xfrm4_output.o xfrm4_protocol.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 19ab78aca54..d156b3c5f36 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -254,7 +254,6 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
struct inet_sock *inet;
struct proto *answer_prot;
unsigned char answer_flags;
- char answer_no_check;
int try_loading_module = 0;
int err;
@@ -312,7 +311,6 @@ lookup_protocol:
sock->ops = answer->ops;
answer_prot = answer->prot;
- answer_no_check = answer->no_check;
answer_flags = answer->flags;
rcu_read_unlock();
@@ -324,7 +322,6 @@ lookup_protocol:
goto out;
err = 0;
- sk->sk_no_check = answer_no_check;
if (INET_PROTOSW_REUSE & answer_flags)
sk->sk_reuse = SK_CAN_REUSE;
@@ -1002,7 +999,6 @@ static struct inet_protosw inetsw_array[] =
.protocol = IPPROTO_TCP,
.prot = &tcp_prot,
.ops = &inet_stream_ops,
- .no_check = 0,
.flags = INET_PROTOSW_PERMANENT |
INET_PROTOSW_ICSK,
},
@@ -1012,7 +1008,6 @@ static struct inet_protosw inetsw_array[] =
.protocol = IPPROTO_UDP,
.prot = &udp_prot,
.ops = &inet_dgram_ops,
- .no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_PERMANENT,
},
@@ -1021,7 +1016,6 @@ static struct inet_protosw inetsw_array[] =
.protocol = IPPROTO_ICMP,
.prot = &ping_prot,
.ops = &inet_dgram_ops,
- .no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_REUSE,
},
@@ -1030,7 +1024,6 @@ static struct inet_protosw inetsw_array[] =
.protocol = IPPROTO_IP, /* wild card */
.prot = &raw_prot,
.ops = &inet_sockraw_ops,
- .no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_REUSE,
}
};
@@ -1261,10 +1254,12 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
+ SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP |
SKB_GSO_SIT |
SKB_GSO_TCPV6 |
SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM |
SKB_GSO_MPLS |
0)))
goto out;
@@ -1434,6 +1429,9 @@ static int inet_gro_complete(struct sk_buff *skb, int nhoff)
int proto = iph->protocol;
int err = -ENOSYS;
+ if (skb->encapsulation)
+ skb_set_inner_network_header(skb, nhoff);
+
csum_replace2(&iph->check, iph->tot_len, newlen);
iph->tot_len = newlen;
@@ -1476,22 +1474,20 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
}
EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
-unsigned long snmp_fold_field(void __percpu *mib[], int offt)
+unsigned long snmp_fold_field(void __percpu *mib, int offt)
{
unsigned long res = 0;
- int i, j;
+ int i;
- for_each_possible_cpu(i) {
- for (j = 0; j < SNMP_ARRAY_SZ; j++)
- res += *(((unsigned long *) per_cpu_ptr(mib[j], i)) + offt);
- }
+ for_each_possible_cpu(i)
+ res += *(((unsigned long *) per_cpu_ptr(mib, i)) + offt);
return res;
}
EXPORT_SYMBOL_GPL(snmp_fold_field);
#if BITS_PER_LONG==32
-u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
+u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset)
{
u64 res = 0;
int cpu;
@@ -1502,12 +1498,12 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
u64 v;
unsigned int start;
- bhptr = per_cpu_ptr(mib[0], cpu);
+ bhptr = per_cpu_ptr(mib, cpu);
syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
do {
- start = u64_stats_fetch_begin_bh(syncp);
+ start = u64_stats_fetch_begin_irq(syncp);
v = *(((u64 *) bhptr) + offt);
- } while (u64_stats_fetch_retry_bh(syncp, start));
+ } while (u64_stats_fetch_retry_irq(syncp, start));
res += v;
}
@@ -1516,25 +1512,6 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
EXPORT_SYMBOL_GPL(snmp_fold_field64);
#endif
-int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
-{
- BUG_ON(ptr == NULL);
- ptr[0] = __alloc_percpu(mibsize, align);
- if (!ptr[0])
- return -ENOMEM;
-
-#if SNMP_ARRAY_SZ == 2
- ptr[1] = __alloc_percpu(mibsize, align);
- if (!ptr[1]) {
- free_percpu(ptr[0]);
- ptr[0] = NULL;
- return -ENOMEM;
- }
-#endif
- return 0;
-}
-EXPORT_SYMBOL_GPL(snmp_mib_init);
-
#ifdef CONFIG_IP_MULTICAST
static const struct net_protocol igmp_protocol = {
.handler = igmp_rcv,
@@ -1570,40 +1547,30 @@ static __net_init int ipv4_mib_init_net(struct net *net)
{
int i;
- if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics,
- sizeof(struct tcp_mib),
- __alignof__(struct tcp_mib)) < 0)
+ net->mib.tcp_statistics = alloc_percpu(struct tcp_mib);
+ if (!net->mib.tcp_statistics)
goto err_tcp_mib;
- if (snmp_mib_init((void __percpu **)net->mib.ip_statistics,
- sizeof(struct ipstats_mib),
- __alignof__(struct ipstats_mib)) < 0)
+ net->mib.ip_statistics = alloc_percpu(struct ipstats_mib);
+ if (!net->mib.ip_statistics)
goto err_ip_mib;
for_each_possible_cpu(i) {
struct ipstats_mib *af_inet_stats;
- af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[0], i);
- u64_stats_init(&af_inet_stats->syncp);
-#if SNMP_ARRAY_SZ == 2
- af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[1], i);
+ af_inet_stats = per_cpu_ptr(net->mib.ip_statistics, i);
u64_stats_init(&af_inet_stats->syncp);
-#endif
}
- if (snmp_mib_init((void __percpu **)net->mib.net_statistics,
- sizeof(struct linux_mib),
- __alignof__(struct linux_mib)) < 0)
+ net->mib.net_statistics = alloc_percpu(struct linux_mib);
+ if (!net->mib.net_statistics)
goto err_net_mib;
- if (snmp_mib_init((void __percpu **)net->mib.udp_statistics,
- sizeof(struct udp_mib),
- __alignof__(struct udp_mib)) < 0)
+ net->mib.udp_statistics = alloc_percpu(struct udp_mib);
+ if (!net->mib.udp_statistics)
goto err_udp_mib;
- if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics,
- sizeof(struct udp_mib),
- __alignof__(struct udp_mib)) < 0)
+ net->mib.udplite_statistics = alloc_percpu(struct udp_mib);
+ if (!net->mib.udplite_statistics)
goto err_udplite_mib;
- if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics,
- sizeof(struct icmp_mib),
- __alignof__(struct icmp_mib)) < 0)
+ net->mib.icmp_statistics = alloc_percpu(struct icmp_mib);
+ if (!net->mib.icmp_statistics)
goto err_icmp_mib;
net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib),
GFP_KERNEL);
@@ -1614,17 +1581,17 @@ static __net_init int ipv4_mib_init_net(struct net *net)
return 0;
err_icmpmsg_mib:
- snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
+ free_percpu(net->mib.icmp_statistics);
err_icmp_mib:
- snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
+ free_percpu(net->mib.udplite_statistics);
err_udplite_mib:
- snmp_mib_free((void __percpu **)net->mib.udp_statistics);
+ free_percpu(net->mib.udp_statistics);
err_udp_mib:
- snmp_mib_free((void __percpu **)net->mib.net_statistics);
+ free_percpu(net->mib.net_statistics);
err_net_mib:
- snmp_mib_free((void __percpu **)net->mib.ip_statistics);
+ free_percpu(net->mib.ip_statistics);
err_ip_mib:
- snmp_mib_free((void __percpu **)net->mib.tcp_statistics);
+ free_percpu(net->mib.tcp_statistics);
err_tcp_mib:
return -ENOMEM;
}
@@ -1632,12 +1599,12 @@ err_tcp_mib:
static __net_exit void ipv4_mib_exit_net(struct net *net)
{
kfree(net->mib.icmpmsg_statistics);
- snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
- snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
- snmp_mib_free((void __percpu **)net->mib.udp_statistics);
- snmp_mib_free((void __percpu **)net->mib.net_statistics);
- snmp_mib_free((void __percpu **)net->mib.ip_statistics);
- snmp_mib_free((void __percpu **)net->mib.tcp_statistics);
+ free_percpu(net->mib.icmp_statistics);
+ free_percpu(net->mib.udplite_statistics);
+ free_percpu(net->mib.udp_statistics);
+ free_percpu(net->mib.net_statistics);
+ free_percpu(net->mib.ip_statistics);
+ free_percpu(net->mib.tcp_statistics);
}
static __net_initdata struct pernet_operations ipv4_mib_ops = {
@@ -1650,6 +1617,39 @@ static int __init init_ipv4_mibs(void)
return register_pernet_subsys(&ipv4_mib_ops);
}
+static __net_init int inet_init_net(struct net *net)
+{
+ /*
+ * Set defaults for local port range
+ */
+ seqlock_init(&net->ipv4.ip_local_ports.lock);
+ net->ipv4.ip_local_ports.range[0] = 32768;
+ net->ipv4.ip_local_ports.range[1] = 61000;
+
+ seqlock_init(&net->ipv4.ping_group_range.lock);
+ /*
+ * Sane defaults - nobody may create ping sockets.
+ * Boot scripts should set this to distro-specific group.
+ */
+ net->ipv4.ping_group_range.range[0] = make_kgid(&init_user_ns, 1);
+ net->ipv4.ping_group_range.range[1] = make_kgid(&init_user_ns, 0);
+ return 0;
+}
+
+static __net_exit void inet_exit_net(struct net *net)
+{
+}
+
+static __net_initdata struct pernet_operations af_inet_ops = {
+ .init = inet_init_net,
+ .exit = inet_exit_net,
+};
+
+static int __init init_inet_pernet_ops(void)
+{
+ return register_pernet_subsys(&af_inet_ops);
+}
+
static int ipv4_proc_init(void);
/*
@@ -1703,13 +1703,9 @@ static int __init inet_init(void)
BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
- sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
- if (!sysctl_local_reserved_ports)
- goto out;
-
rc = proto_register(&tcp_prot, 1);
if (rc)
- goto out_free_reserved_ports;
+ goto out;
rc = proto_register(&udp_prot, 1);
if (rc)
@@ -1794,6 +1790,9 @@ static int __init inet_init(void)
if (ip_mr_init())
pr_crit("%s: Cannot init ipv4 mroute\n", __func__);
#endif
+
+ if (init_inet_pernet_ops())
+ pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);
/*
* Initialise per-cpu ipv4 mibs
*/
@@ -1816,8 +1815,6 @@ out_unregister_udp_proto:
proto_unregister(&udp_prot);
out_unregister_tcp_proto:
proto_unregister(&tcp_prot);
-out_free_reserved_ports:
- kfree(sysctl_local_reserved_ports);
goto out;
}
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 717902669d2..a2afa89513a 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -155,6 +155,10 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
struct iphdr *iph, *top_iph;
struct ip_auth_hdr *ah;
struct ah_data *ahp;
+ int seqhi_len = 0;
+ __be32 *seqhi;
+ int sglists = 0;
+ struct scatterlist *seqhisg;
ahp = x->data;
ahash = ahp->ahash;
@@ -167,14 +171,19 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
ah = ip_auth_hdr(skb);
ihl = ip_hdrlen(skb);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists = 1;
+ seqhi_len = sizeof(*seqhi);
+ }
err = -ENOMEM;
- iph = ah_alloc_tmp(ahash, nfrags, ihl);
+ iph = ah_alloc_tmp(ahash, nfrags + sglists, ihl + seqhi_len);
if (!iph)
goto out;
-
- icv = ah_tmp_icv(ahash, iph, ihl);
+ seqhi = (__be32 *)((char *)iph + ihl);
+ icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
+ seqhisg = sg + nfrags;
memset(ah->auth_data, 0, ahp->icv_trunc_len);
@@ -210,10 +219,15 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
ah->spi = x->id.spi;
ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ sg_init_table(sg, nfrags + sglists);
+ skb_to_sgvec_nomark(skb, sg, 0, skb->len);
- ahash_request_set_crypt(req, sg, icv, skb->len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ /* Attach seqhi sg right after packet payload */
+ *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ sg_set_buf(seqhisg, seqhi, seqhi_len);
+ }
+ ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah_output_done, skb);
AH_SKB_CB(skb)->tmp = iph;
@@ -295,6 +309,10 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
struct ip_auth_hdr *ah;
struct ah_data *ahp;
int err = -ENOMEM;
+ int seqhi_len = 0;
+ __be32 *seqhi;
+ int sglists = 0;
+ struct scatterlist *seqhisg;
if (!pskb_may_pull(skb, sizeof(*ah)))
goto out;
@@ -335,14 +353,22 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
iph = ip_hdr(skb);
ihl = ip_hdrlen(skb);
- work_iph = ah_alloc_tmp(ahash, nfrags, ihl + ahp->icv_trunc_len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists = 1;
+ seqhi_len = sizeof(*seqhi);
+ }
+
+ work_iph = ah_alloc_tmp(ahash, nfrags + sglists, ihl +
+ ahp->icv_trunc_len + seqhi_len);
if (!work_iph)
goto out;
- auth_data = ah_tmp_auth(work_iph, ihl);
+ seqhi = (__be32 *)((char *)work_iph + ihl);
+ auth_data = ah_tmp_auth(seqhi, seqhi_len);
icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
+ seqhisg = sg + nfrags;
memcpy(work_iph, iph, ihl);
memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
@@ -361,10 +387,15 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
skb_push(skb, ihl);
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ sg_init_table(sg, nfrags + sglists);
+ skb_to_sgvec_nomark(skb, sg, 0, skb->len);
- ahash_request_set_crypt(req, sg, icv, skb->len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ /* Attach seqhi sg right after packet payload */
+ *seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
+ sg_set_buf(seqhisg, seqhi, seqhi_len);
+ }
+ ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah_input_done, skb);
AH_SKB_CB(skb)->tmp = work_iph;
@@ -397,7 +428,7 @@ out:
return err;
}
-static void ah4_err(struct sk_buff *skb, u32 info)
+static int ah4_err(struct sk_buff *skb, u32 info)
{
struct net *net = dev_net(skb->dev);
const struct iphdr *iph = (const struct iphdr *)skb->data;
@@ -407,23 +438,25 @@ static void ah4_err(struct sk_buff *skb, u32 info)
switch (icmp_hdr(skb)->type) {
case ICMP_DEST_UNREACH:
if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
- return;
+ return 0;
case ICMP_REDIRECT:
break;
default:
- return;
+ return 0;
}
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
ah->spi, IPPROTO_AH, AF_INET);
if (!x)
- return;
+ return 0;
if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0);
else
ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0);
xfrm_state_put(x);
+
+ return 0;
}
static int ah_init_state(struct xfrm_state *x)
@@ -505,6 +538,10 @@ static void ah_destroy(struct xfrm_state *x)
kfree(ahp);
}
+static int ah4_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
static const struct xfrm_type ah_type =
{
@@ -518,11 +555,12 @@ static const struct xfrm_type ah_type =
.output = ah_output
};
-static const struct net_protocol ah4_protocol = {
+static struct xfrm4_protocol ah4_protocol = {
.handler = xfrm4_rcv,
+ .input_handler = xfrm_input,
+ .cb_handler = ah4_rcv_cb,
.err_handler = ah4_err,
- .no_policy = 1,
- .netns_ok = 1,
+ .priority = 0,
};
static int __init ah4_init(void)
@@ -531,7 +569,7 @@ static int __init ah4_init(void)
pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
- if (inet_add_protocol(&ah4_protocol, IPPROTO_AH) < 0) {
+ if (xfrm4_protocol_register(&ah4_protocol, IPPROTO_AH) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&ah_type, AF_INET);
return -EAGAIN;
@@ -541,7 +579,7 @@ static int __init ah4_init(void)
static void __exit ah4_fini(void)
{
- if (inet_del_protocol(&ah4_protocol, IPPROTO_AH) < 0)
+ if (xfrm4_protocol_deregister(&ah4_protocol, IPPROTO_AH) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&ah_type, AF_INET) < 0)
pr_info("%s: can't remove xfrm type\n", __func__);
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 8b5134c582f..a3095fdefbe 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -86,18 +86,26 @@ out:
}
EXPORT_SYMBOL(ip4_datagram_connect);
+/* Because UDP xmit path can manipulate sk_dst_cache without holding
+ * socket lock, we need to use sk_dst_set() here,
+ * even if we own the socket lock.
+ */
void ip4_datagram_release_cb(struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
const struct ip_options_rcu *inet_opt;
__be32 daddr = inet->inet_daddr;
+ struct dst_entry *dst;
struct flowi4 fl4;
struct rtable *rt;
- if (! __sk_dst_get(sk) || __sk_dst_check(sk, 0))
- return;
-
rcu_read_lock();
+
+ dst = __sk_dst_get(sk);
+ if (!dst || !dst->obsolete || dst->ops->check(dst, 0)) {
+ rcu_read_unlock();
+ return;
+ }
inet_opt = rcu_dereference(inet->inet_opt);
if (inet_opt && inet_opt->opt.srr)
daddr = inet_opt->opt.faddr;
@@ -105,8 +113,10 @@ void ip4_datagram_release_cb(struct sock *sk)
inet->inet_saddr, inet->inet_dport,
inet->inet_sport, sk->sk_protocol,
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
- if (!IS_ERR(rt))
- __sk_dst_set(sk, &rt->dst);
+
+ dst = !IS_ERR(rt) ? &rt->dst : NULL;
+ sk_dst_set(sk, dst);
+
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(ip4_datagram_release_cb);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index bdbf68bb2e2..e9449376b58 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -106,7 +106,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
#define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
-static DEFINE_SPINLOCK(inet_addr_hash_lock);
static u32 inet_addr_hash(struct net *net, __be32 addr)
{
@@ -119,16 +118,14 @@ static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
{
u32 hash = inet_addr_hash(net, ifa->ifa_local);
- spin_lock(&inet_addr_hash_lock);
+ ASSERT_RTNL();
hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
- spin_unlock(&inet_addr_hash_lock);
}
static void inet_hash_remove(struct in_ifaddr *ifa)
{
- spin_lock(&inet_addr_hash_lock);
+ ASSERT_RTNL();
hlist_del_init_rcu(&ifa->hash);
- spin_unlock(&inet_addr_hash_lock);
}
/**
@@ -830,7 +827,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
ifa_existing = find_matching_ifa(ifa);
if (!ifa_existing) {
/* It would be best to check for !NLM_F_CREATE here but
- * userspace alreay relies on not having to provide this.
+ * userspace already relies on not having to provide this.
*/
set_ifa_lifetime(ifa, valid_lft, prefered_lft);
return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 7785b28061a..360b565918c 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -473,7 +473,7 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
net_adj) & ~(blksize - 1)) + net_adj - 2;
}
-static void esp4_err(struct sk_buff *skb, u32 info)
+static int esp4_err(struct sk_buff *skb, u32 info)
{
struct net *net = dev_net(skb->dev);
const struct iphdr *iph = (const struct iphdr *)skb->data;
@@ -483,23 +483,25 @@ static void esp4_err(struct sk_buff *skb, u32 info)
switch (icmp_hdr(skb)->type) {
case ICMP_DEST_UNREACH:
if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
- return;
+ return 0;
case ICMP_REDIRECT:
break;
default:
- return;
+ return 0;
}
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
esph->spi, IPPROTO_ESP, AF_INET);
if (!x)
- return;
+ return 0;
if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0);
else
ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0);
xfrm_state_put(x);
+
+ return 0;
}
static void esp_destroy(struct xfrm_state *x)
@@ -672,6 +674,11 @@ error:
return err;
}
+static int esp4_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type esp_type =
{
.description = "ESP4",
@@ -685,11 +692,12 @@ static const struct xfrm_type esp_type =
.output = esp_output
};
-static const struct net_protocol esp4_protocol = {
+static struct xfrm4_protocol esp4_protocol = {
.handler = xfrm4_rcv,
+ .input_handler = xfrm_input,
+ .cb_handler = esp4_rcv_cb,
.err_handler = esp4_err,
- .no_policy = 1,
- .netns_ok = 1,
+ .priority = 0,
};
static int __init esp4_init(void)
@@ -698,7 +706,7 @@ static int __init esp4_init(void)
pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
- if (inet_add_protocol(&esp4_protocol, IPPROTO_ESP) < 0) {
+ if (xfrm4_protocol_register(&esp4_protocol, IPPROTO_ESP) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&esp_type, AF_INET);
return -EAGAIN;
@@ -708,7 +716,7 @@ static int __init esp4_init(void)
static void __exit esp4_fini(void)
{
- if (inet_del_protocol(&esp4_protocol, IPPROTO_ESP) < 0)
+ if (xfrm4_protocol_deregister(&esp4_protocol, IPPROTO_ESP) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&esp_type, AF_INET) < 0)
pr_info("%s: can't remove xfrm type\n", __func__);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index c7539e22868..255aa9946fe 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -250,7 +250,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
bool dev_match;
fl4.flowi4_oif = 0;
- fl4.flowi4_iif = oif;
+ fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
fl4.daddr = src;
fl4.saddr = dst;
fl4.flowi4_tos = tos;
@@ -659,7 +659,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
- return ip_rt_dump(skb, cb);
+ return skb->len;
s_h = cb->args[0];
s_e = cb->args[1];
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index b53f0bf84dc..b10cd43a472 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -631,6 +631,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
.daddr = nh->nh_gw,
.flowi4_scope = cfg->fc_scope + 1,
.flowi4_oif = nh->nh_oif,
+ .flowi4_iif = LOOPBACK_IFINDEX,
};
/* It is not necessary, but requires a bit of thinking */
@@ -820,13 +821,13 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
if (fi == NULL)
goto failure;
+ fib_info_cnt++;
if (cfg->fc_mx) {
fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
if (!fi->fib_metrics)
goto failure;
} else
fi->fib_metrics = (u32 *) dst_default_metrics;
- fib_info_cnt++;
fi->fib_net = hold_net(net);
fi->fib_protocol = cfg->fc_protocol;
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 250be7421ab..0485bf7f8f0 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -68,6 +68,7 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
skb_push(skb, hdr_len);
+ skb_reset_transport_header(skb);
greh = (struct gre_base_hdr *)skb->data;
greh->flags = tnl_flags_to_gre_flags(tpi->flags);
greh->protocol = tpi->proto;
@@ -84,7 +85,8 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
ptr--;
}
if (tpi->flags&TUNNEL_CSUM &&
- !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
+ !(skb_shinfo(skb)->gso_type &
+ (SKB_GSO_GRE|SKB_GSO_GRE_CSUM))) {
*ptr = 0;
*(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
skb->len, 0));
@@ -93,28 +95,6 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
}
EXPORT_SYMBOL_GPL(gre_build_header);
-static __sum16 check_checksum(struct sk_buff *skb)
-{
- __sum16 csum = 0;
-
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- csum = csum_fold(skb->csum);
-
- if (!csum)
- break;
- /* Fall through. */
-
- case CHECKSUM_NONE:
- skb->csum = 0;
- csum = __skb_checksum_complete(skb);
- skb->ip_summed = CHECKSUM_COMPLETE;
- break;
- }
-
- return csum;
-}
-
static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
bool *csum_err)
{
@@ -141,7 +121,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
options = (__be32 *)(greh + 1);
if (greh->flags & GRE_CSUM) {
- if (check_checksum(skb)) {
+ if (skb_checksum_simple_validate(skb)) {
*csum_err = true;
return -EINVAL;
}
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index f1d32280cb5..f0bdd47bbbc 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -42,6 +42,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
+ SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP)))
goto out;
@@ -55,6 +56,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
goto out;
csum = !!(greh->flags & GRE_CSUM);
+ if (csum)
+ skb->encap_hdr_csum = 1;
if (unlikely(!pskb_may_pull(skb, ghl)))
goto out;
@@ -94,10 +97,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
}
}
- greh = (struct gre_base_hdr *)(skb->data);
+ skb_reset_transport_header(skb);
+
+ greh = (struct gre_base_hdr *)
+ skb_transport_header(skb);
pcsum = (__be32 *)(greh + 1);
*pcsum = 0;
- *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0));
+ *(__sum16 *)pcsum = gso_make_checksum(skb, 0);
}
__skb_push(skb, tnl_hlen - ghl);
@@ -125,10 +131,12 @@ static __sum16 gro_skb_checksum(struct sk_buff *skb)
csum_partial(skb->data, skb_gro_offset(skb), 0));
sum = csum_fold(NAPI_GRO_CB(skb)->csum);
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) {
- if (unlikely(!sum))
+ if (unlikely(!sum) && !skb->csum_complete_sw)
netdev_rx_csum_fault(skb->dev);
- } else
+ } else {
skb->ip_summed = CHECKSUM_COMPLETE;
+ skb->csum_complete_sw = 1;
+ }
return sum;
}
@@ -255,6 +263,9 @@ static int gre_gro_complete(struct sk_buff *skb, int nhoff)
int err = -ENOENT;
__be16 type;
+ skb->encapsulation = 1;
+ skb_shinfo(skb)->gso_type = SKB_GSO_GRE;
+
type = greh->protocol;
if (greh->flags & GRE_KEY)
grehlen += GRE_HEADER_SECTION;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 0134663fdbc..42b7bcf8045 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -337,6 +337,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
struct sock *sk;
struct inet_sock *inet;
__be32 daddr, saddr;
+ u32 mark = IP4_REPLY_MARK(net, skb->mark);
if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
return;
@@ -349,6 +350,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
icmp_param->data.icmph.checksum = 0;
inet->tos = ip_hdr(skb)->tos;
+ sk->sk_mark = mark;
daddr = ipc.addr = ip_hdr(skb)->saddr;
saddr = fib_compute_spec_dst(skb);
ipc.opt = NULL;
@@ -364,6 +366,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
memset(&fl4, 0, sizeof(fl4));
fl4.daddr = daddr;
fl4.saddr = saddr;
+ fl4.flowi4_mark = mark;
fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
fl4.flowi4_proto = IPPROTO_ICMP;
security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
@@ -382,7 +385,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
struct flowi4 *fl4,
struct sk_buff *skb_in,
const struct iphdr *iph,
- __be32 saddr, u8 tos,
+ __be32 saddr, u8 tos, u32 mark,
int type, int code,
struct icmp_bxm *param)
{
@@ -394,6 +397,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->daddr = (param->replyopts.opt.opt.srr ?
param->replyopts.opt.opt.faddr : iph->saddr);
fl4->saddr = saddr;
+ fl4->flowi4_mark = mark;
fl4->flowi4_tos = RT_TOS(tos);
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
@@ -491,6 +495,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
struct flowi4 fl4;
__be32 saddr;
u8 tos;
+ u32 mark;
struct net *net;
struct sock *sk;
@@ -592,6 +597,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
IPTOS_PREC_INTERNETCONTROL) :
iph->tos;
+ mark = IP4_REPLY_MARK(net, skb_in->mark);
if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in))
goto out_unlock;
@@ -608,13 +614,14 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
icmp_param->skb = skb_in;
icmp_param->offset = skb_network_offset(skb_in);
inet_sk(sk)->tos = tos;
+ sk->sk_mark = mark;
ipc.addr = iph->saddr;
ipc.opt = &icmp_param->replyopts.opt;
ipc.tx_flags = 0;
ipc.ttl = 0;
ipc.tos = -1;
- rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos,
+ rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
type, code, icmp_param);
if (IS_ERR(rt))
goto out_unlock;
@@ -732,8 +739,6 @@ static void icmp_unreach(struct sk_buff *skb)
/* fall through */
case 0:
info = ntohs(icmph->un.frag.mtu);
- if (!info)
- goto out;
}
break;
case ICMP_SR_FAILED:
@@ -908,16 +913,8 @@ int icmp_rcv(struct sk_buff *skb)
ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS);
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- if (!csum_fold(skb->csum))
- break;
- /* fall through */
- case CHECKSUM_NONE:
- skb->csum = 0;
- if (__skb_checksum_complete(skb))
- goto csum_error;
- }
+ if (skb_checksum_simple_validate(skb))
+ goto csum_error;
if (!pskb_pull(skb, sizeof(*icmph)))
goto error;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 97e4d1655d2..db710b059ba 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -369,7 +369,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
pip->saddr = fl4.saddr;
pip->protocol = IPPROTO_IGMP;
pip->tot_len = 0; /* filled in later */
- ip_select_ident(skb, &rt->dst, NULL);
+ ip_select_ident(skb, NULL);
((u8 *)&pip[1])[0] = IPOPT_RA;
((u8 *)&pip[1])[1] = 4;
((u8 *)&pip[1])[2] = 0;
@@ -714,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
iph->daddr = dst;
iph->saddr = fl4.saddr;
iph->protocol = IPPROTO_IGMP;
- ip_select_ident(skb, &rt->dst, NULL);
+ ip_select_ident(skb, NULL);
((u8 *)&iph[1])[0] = IPOPT_RA;
((u8 *)&iph[1])[1] = 4;
((u8 *)&iph[1])[2] = 0;
@@ -988,16 +988,8 @@ int igmp_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct igmphdr)))
goto drop;
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- if (!csum_fold(skb->csum))
- break;
- /* fall through */
- case CHECKSUM_NONE:
- skb->csum = 0;
- if (__skb_checksum_complete(skb))
- goto drop;
- }
+ if (skb_checksum_simple_validate(skb))
+ goto drop;
ih = igmp_hdr(skb);
switch (ih->type) {
@@ -1952,6 +1944,10 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
rtnl_lock();
in_dev = ip_mc_find_dev(net, imr);
+ if (!in_dev) {
+ ret = -ENODEV;
+ goto out;
+ }
ifindex = imr->imr_ifindex;
for (imlp = &inet->mc_list;
(iml = rtnl_dereference(*imlp)) != NULL;
@@ -1969,16 +1965,14 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
*imlp = iml->next_rcu;
- if (in_dev)
- ip_mc_dec_group(in_dev, group);
+ ip_mc_dec_group(in_dev, group);
rtnl_unlock();
/* decrease mem now to avoid the memleak warning */
atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
kfree_rcu(iml, rcu);
return 0;
}
- if (!in_dev)
- ret = -ENODEV;
+out:
rtnl_unlock();
return ret;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 0d1e2cb877e..14d02ea905b 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -29,19 +29,16 @@ const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
EXPORT_SYMBOL(inet_csk_timer_bug_msg);
#endif
-unsigned long *sysctl_local_reserved_ports;
-EXPORT_SYMBOL(sysctl_local_reserved_ports);
-
void inet_get_local_port_range(struct net *net, int *low, int *high)
{
unsigned int seq;
do {
- seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock);
+ seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
- *low = net->ipv4.sysctl_local_ports.range[0];
- *high = net->ipv4.sysctl_local_ports.range[1];
- } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq));
+ *low = net->ipv4.ip_local_ports.range[0];
+ *high = net->ipv4.ip_local_ports.range[1];
+ } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
}
EXPORT_SYMBOL(inet_get_local_port_range);
@@ -113,7 +110,7 @@ again:
smallest_size = -1;
do {
- if (inet_is_reserved_local_port(rover))
+ if (inet_is_local_reserved_port(net, rover))
goto next_nolock;
head = &hashinfo->bhash[inet_bhashfn(net, rover,
hashinfo->bhash_size)];
@@ -408,7 +405,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
struct net *net = sock_net(sk);
int flags = inet_sk_flowi_flags(sk);
- flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+ flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol,
flags,
@@ -445,7 +442,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
rcu_read_lock();
opt = rcu_dereference(newinet->inet_opt);
- flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+ flowi4_init_output(fl4, sk->sk_bound_dev_if, inet_rsk(req)->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
(opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
@@ -680,6 +677,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num);
newsk->sk_write_space = sk_stream_write_space;
+ newsk->sk_mark = inet_rsk(req)->ir_mark;
+
newicsk->icsk_retransmits = 0;
newicsk->icsk_backoff = 0;
newicsk->icsk_probes_out = 0;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 8b9cf279450..43116e8c8e1 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -274,7 +274,7 @@ struct sock *__inet_lookup_established(struct net *net,
const __be32 daddr, const u16 hnum,
const int dif)
{
- INET_ADDR_COOKIE(acookie, saddr, daddr)
+ INET_ADDR_COOKIE(acookie, saddr, daddr);
const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
struct sock *sk;
const struct hlist_nulls_node *node;
@@ -327,7 +327,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
__be32 daddr = inet->inet_rcv_saddr;
__be32 saddr = inet->inet_daddr;
int dif = sk->sk_bound_dev_if;
- INET_ADDR_COOKIE(acookie, saddr, daddr)
+ INET_ADDR_COOKIE(acookie, saddr, daddr);
const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
struct net *net = sock_net(sk);
unsigned int hash = inet_ehashfn(net, daddr, lport,
@@ -500,7 +500,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
local_bh_disable();
for (i = 1; i <= remaining; i++) {
port = low + (i + offset) % remaining;
- if (inet_is_reserved_local_port(port))
+ if (inet_is_local_reserved_port(net, port))
continue;
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 48f42446511..bd5f5928167 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -26,20 +26,7 @@
* Theory of operations.
* We keep one entry for each peer IP address. The nodes contains long-living
* information about the peer which doesn't depend on routes.
- * At this moment this information consists only of ID field for the next
- * outgoing IP packet. This field is incremented with each packet as encoded
- * in inet_getid() function (include/net/inetpeer.h).
- * At the moment of writing this notes identifier of IP packets is generated
- * to be unpredictable using this code only for packets subjected
- * (actually or potentially) to defragmentation. I.e. DF packets less than
- * PMTU in size when local fragmentation is disabled use a constant ID and do
- * not use this code (see ip_select_ident() in include/net/ip.h).
*
- * Route cache entries hold references to our nodes.
- * New cache entries get references via lookup by destination IP address in
- * the avl tree. The reference is grabbed only when it's needed i.e. only
- * when we try to output IP packet which needs an unpredictable ID (see
- * __ip_select_ident() in net/ipv4/route.c).
* Nodes are removed only when reference counter goes to 0.
* When it's happened the node may be removed when a sufficient amount of
* time has been passed since its last use. The less-recently-used entry can
@@ -62,7 +49,6 @@
* refcnt: atomically against modifications on other CPU;
* usually under some other lock to prevent node disappearing
* daddr: unchangeable
- * ip_id_count: atomic value (no lock needed)
*/
static struct kmem_cache *peer_cachep __read_mostly;
@@ -120,7 +106,7 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min
static void inetpeer_gc_worker(struct work_struct *work)
{
struct inet_peer *p, *n, *c;
- LIST_HEAD(list);
+ struct list_head list;
spin_lock_bh(&gc_lock);
list_replace_init(&gc_list, &list);
@@ -497,10 +483,6 @@ relookup:
p->daddr = *daddr;
atomic_set(&p->refcnt, 1);
atomic_set(&p->rid, 0);
- atomic_set(&p->ip_id_count,
- (daddr->family == AF_INET) ?
- secure_ip_id(daddr->addr.a4) :
- secure_ipv6_id(daddr->addr.a6));
p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
p->rate_tokens = 0;
/* 60*HZ is arbitrary, but chosen enough high so that the first
@@ -522,7 +504,7 @@ EXPORT_SYMBOL_GPL(inet_getpeer);
void inet_putpeer(struct inet_peer *p)
{
p->dtime = (__u32)jiffies;
- smp_mb__before_atomic_dec();
+ smp_mb__before_atomic();
atomic_dec(&p->refcnt);
}
EXPORT_SYMBOL_GPL(inet_putpeer);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index f3869c186d9..3a83ce5efa8 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -42,12 +42,12 @@
static bool ip_may_fragment(const struct sk_buff *skb)
{
return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ||
- !skb->local_df;
+ skb->ignore_df;
}
static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
{
- if (skb->len <= mtu || skb->local_df)
+ if (skb->len <= mtu)
return false;
if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
@@ -56,53 +56,6 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
return true;
}
-static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb)
-{
- unsigned int mtu;
-
- if (skb->local_df || !skb_is_gso(skb))
- return false;
-
- mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true);
-
- /* if seglen > mtu, do software segmentation for IP fragmentation on
- * output. DF bit cannot be set since ip_forward would have sent
- * icmp error.
- */
- return skb_gso_network_seglen(skb) > mtu;
-}
-
-/* called if GSO skb needs to be fragmented on forward */
-static int ip_forward_finish_gso(struct sk_buff *skb)
-{
- struct dst_entry *dst = skb_dst(skb);
- netdev_features_t features;
- struct sk_buff *segs;
- int ret = 0;
-
- features = netif_skb_dev_features(skb, dst->dev);
- segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
- if (IS_ERR(segs)) {
- kfree_skb(skb);
- return -ENOMEM;
- }
-
- consume_skb(skb);
-
- do {
- struct sk_buff *nskb = segs->next;
- int err;
-
- segs->next = NULL;
- err = dst_output(segs);
-
- if (err && ret == 0)
- ret = err;
- segs = nskb;
- } while (segs);
-
- return ret;
-}
static int ip_forward_finish(struct sk_buff *skb)
{
@@ -114,9 +67,6 @@ static int ip_forward_finish(struct sk_buff *skb)
if (unlikely(opt->optlen))
ip_forward_options(skb);
- if (ip_gso_exceeds_dst_mtu(skb))
- return ip_forward_finish_gso(skb);
-
return dst_output(skb);
}
@@ -127,6 +77,10 @@ int ip_forward(struct sk_buff *skb)
struct rtable *rt; /* Route we use */
struct ip_options *opt = &(IPCB(skb)->opt);
+ /* that should never happen */
+ if (skb->pkt_type != PACKET_HOST)
+ goto drop;
+
if (skb_warn_if_lro(skb))
goto drop;
@@ -136,9 +90,6 @@ int ip_forward(struct sk_buff *skb)
if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb))
return NET_RX_SUCCESS;
- if (skb->pkt_type != PACKET_HOST)
- goto drop;
-
skb_forward_csum(skb);
/*
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index c10a3ce5cbf..ed32313e307 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -232,8 +232,9 @@ static void ip_expire(unsigned long arg)
* "Fragment Reassembly Timeout" message, per RFC792.
*/
if (qp->user == IP_DEFRAG_AF_PACKET ||
- (qp->user == IP_DEFRAG_CONNTRACK_IN &&
- skb_rtable(head)->rt_type != RTN_LOCAL))
+ ((qp->user >= IP_DEFRAG_CONNTRACK_IN) &&
+ (qp->user <= __IP_DEFRAG_CONNTRACK_IN_END) &&
+ (skb_rtable(head)->rt_type != RTN_LOCAL)))
goto out_rcu_unlock;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ec4f762efda..9b842544aea 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -410,7 +410,7 @@ static int ipgre_open(struct net_device *dev)
struct flowi4 fl4;
struct rtable *rt;
- rt = ip_route_output_gre(dev_net(dev), &fl4,
+ rt = ip_route_output_gre(t->net, &fl4,
t->parms.iph.daddr,
t->parms.iph.saddr,
t->parms.o_key,
@@ -434,7 +434,7 @@ static int ipgre_close(struct net_device *dev)
if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
struct in_device *in_dev;
- in_dev = inetdev_by_index(dev_net(dev), t->mlink);
+ in_dev = inetdev_by_index(t->net, t->mlink);
if (in_dev)
ip_mc_dec_group(in_dev, t->parms.iph.daddr);
}
@@ -463,6 +463,7 @@ static const struct net_device_ops ipgre_netdev_ops = {
static void ipgre_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &ipgre_netdev_ops;
+ dev->type = ARPHRD_IPGRE;
ip_tunnel_setup(dev, ipgre_net_id);
}
@@ -477,7 +478,7 @@ static void __gre_tunnel_init(struct net_device *dev)
dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
- dev->features |= NETIF_F_NETNS_LOCAL | GRE_FEATURES;
+ dev->features |= GRE_FEATURES;
dev->hw_features |= GRE_FEATURES;
if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
@@ -501,7 +502,6 @@ static int ipgre_tunnel_init(struct net_device *dev)
memcpy(dev->dev_addr, &iph->saddr, 4);
memcpy(dev->broadcast, &iph->daddr, 4);
- dev->type = ARPHRD_IPGRE;
dev->flags = IFF_NOARP;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
dev->addr_len = 4;
@@ -649,6 +649,7 @@ static void ipgre_tap_setup(struct net_device *dev)
{
ether_setup(dev);
dev->netdev_ops = &gre_tap_netdev_ops;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
ip_tunnel_setup(dev, gre_tap_net_id);
}
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index f4ab72e19af..ad382499bac 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -288,6 +288,10 @@ int ip_options_compile(struct net *net,
optptr++;
continue;
}
+ if (unlikely(l < 2)) {
+ pp_ptr = optptr;
+ goto error;
+ }
optlen = optptr[1];
if (optlen < 2 || optlen > l) {
pp_ptr = optptr;
@@ -364,7 +368,7 @@ int ip_options_compile(struct net *net,
}
if (optptr[2] <= optlen) {
unsigned char *timeptr = NULL;
- if (optptr[2]+3 > optptr[1]) {
+ if (optptr[2]+3 > optlen) {
pp_ptr = optptr + 2;
goto error;
}
@@ -376,7 +380,7 @@ int ip_options_compile(struct net *net,
optptr[2] += 4;
break;
case IPOPT_TS_TSANDADDR:
- if (optptr[2]+7 > optptr[1]) {
+ if (optptr[2]+7 > optlen) {
pp_ptr = optptr + 2;
goto error;
}
@@ -390,7 +394,7 @@ int ip_options_compile(struct net *net,
optptr[2] += 8;
break;
case IPOPT_TS_PRESPEC:
- if (optptr[2]+7 > optptr[1]) {
+ if (optptr[2]+7 > optlen) {
pp_ptr = optptr + 2;
goto error;
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 73c6b63bba7..8d3b6b0e985 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -101,17 +101,17 @@ int __ip_local_out(struct sk_buff *skb)
skb_dst(skb)->dev, dst_output);
}
-int ip_local_out(struct sk_buff *skb)
+int ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
{
int err;
err = __ip_local_out(skb);
if (likely(err == 1))
- err = dst_output(skb);
+ err = dst_output_sk(sk, skb);
return err;
}
-EXPORT_SYMBOL_GPL(ip_local_out);
+EXPORT_SYMBOL_GPL(ip_local_out_sk);
static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
{
@@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
iph->saddr = saddr;
iph->protocol = sk->sk_protocol;
- ip_select_ident(skb, &rt->dst, sk);
+ ip_select_ident(skb, sk);
if (opt && opt->opt.optlen) {
iph->ihl += opt->opt.optlen>>2;
@@ -211,6 +211,48 @@ static inline int ip_finish_output2(struct sk_buff *skb)
return -EINVAL;
}
+static int ip_finish_output_gso(struct sk_buff *skb)
+{
+ netdev_features_t features;
+ struct sk_buff *segs;
+ int ret = 0;
+
+ /* common case: locally created skb or seglen is <= mtu */
+ if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
+ skb_gso_network_seglen(skb) <= ip_skb_dst_mtu(skb))
+ return ip_finish_output2(skb);
+
+ /* Slowpath - GSO segment length is exceeding the dst MTU.
+ *
+ * This can happen in two cases:
+ * 1) TCP GRO packet, DF bit not set
+ * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly
+ * from host network stack.
+ */
+ features = netif_skb_features(skb);
+ segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+ if (IS_ERR(segs)) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+
+ consume_skb(skb);
+
+ do {
+ struct sk_buff *nskb = segs->next;
+ int err;
+
+ segs->next = NULL;
+ err = ip_fragment(segs, ip_finish_output2);
+
+ if (err && ret == 0)
+ ret = err;
+ segs = nskb;
+ } while (segs);
+
+ return ret;
+}
+
static int ip_finish_output(struct sk_buff *skb)
{
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
@@ -220,15 +262,17 @@ static int ip_finish_output(struct sk_buff *skb)
return dst_output(skb);
}
#endif
- if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
+ if (skb_is_gso(skb))
+ return ip_finish_output_gso(skb);
+
+ if (skb->len > ip_skb_dst_mtu(skb))
return ip_fragment(skb, ip_finish_output2);
- else
- return ip_finish_output2(skb);
+
+ return ip_finish_output2(skb);
}
-int ip_mc_output(struct sk_buff *skb)
+int ip_mc_output(struct sock *sk, struct sk_buff *skb)
{
- struct sock *sk = skb->sk;
struct rtable *rt = skb_rtable(skb);
struct net_device *dev = rt->dst.dev;
@@ -287,7 +331,7 @@ int ip_mc_output(struct sk_buff *skb)
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
-int ip_output(struct sk_buff *skb)
+int ip_output(struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
@@ -315,9 +359,9 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
sizeof(fl4->saddr) + sizeof(fl4->daddr));
}
-int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl)
+/* Note: skb->sk can be different from sk, in case of tunnels */
+int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
{
- struct sock *sk = skb->sk;
struct inet_sock *inet = inet_sk(sk);
struct ip_options_rcu *inet_opt;
struct flowi4 *fl4;
@@ -371,7 +415,7 @@ packet_routed:
skb_reset_network_header(skb);
iph = ip_hdr(skb);
*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
- if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df)
+ if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df)
iph->frag_off = htons(IP_DF);
else
iph->frag_off = 0;
@@ -386,9 +430,9 @@ packet_routed:
ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
}
- ip_select_ident_more(skb, &rt->dst, sk,
- (skb_shinfo(skb)->gso_segs ?: 1) - 1);
+ ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1);
+ /* TODO : should we use skb->sk here instead of sk ? */
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
@@ -446,7 +490,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
__be16 not_last_frag;
struct rtable *rt = skb_rtable(skb);
int err = 0;
- bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
dev = rt->dst.dev;
@@ -456,8 +499,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
iph = ip_hdr(skb);
- mtu = ip_dst_mtu_maybe_forward(&rt->dst, forwarding);
- if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) ||
+ mtu = ip_skb_dst_mtu(skb);
+ if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
(IPCB(skb)->frag_max_size &&
IPCB(skb)->frag_max_size > mtu))) {
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
@@ -822,8 +865,7 @@ static int __ip_append_data(struct sock *sk,
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
- maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ?
- mtu : 0xFFFF;
+ maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu;
if (cork->length + length > maxnonfragsize - fragheaderlen) {
ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -1146,8 +1188,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
- maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ?
- mtu : 0xFFFF;
+ maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu;
if (cork->length + size > maxnonfragsize - fragheaderlen) {
ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -1308,11 +1349,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
* to fragment the frame generated here. No matter, what transforms
* how transforms change size of the packet, it will come out.
*/
- if (inet->pmtudisc < IP_PMTUDISC_DO)
- skb->local_df = 1;
+ skb->ignore_df = ip_sk_ignore_df(sk);
/* DF bit is set when we want to see DF on outgoing frames.
- * If local_df is set too, we still allow to fragment this frame
+ * If ignore_df is set too, we still allow to fragment this frame
* locally. */
if (inet->pmtudisc == IP_PMTUDISC_DO ||
inet->pmtudisc == IP_PMTUDISC_PROBE ||
@@ -1338,7 +1378,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
ip_copy_addrs(iph, fl4);
- ip_select_ident(skb, &rt->dst, sk);
+ ip_select_ident(skb, sk);
if (opt) {
iph->ihl += opt->optlen>>2;
@@ -1505,7 +1545,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
daddr = replyopts.opt.opt.faddr;
}
- flowi4_init_output(&fl4, arg->bound_dev_if, 0,
+ flowi4_init_output(&fl4, arg->bound_dev_if,
+ IP4_REPLY_MARK(net, skb->mark),
RT_TOS(arg->tos),
RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
ip_reply_arg_flowi_flags(arg),
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 580dd96666e..64741b93863 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -186,7 +186,8 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
}
EXPORT_SYMBOL(ip_cmsg_recv);
-int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
+int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc,
+ bool allow_ipv6)
{
int err, val;
struct cmsghdr *cmsg;
@@ -194,6 +195,22 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
if (!CMSG_OK(msg, cmsg))
return -EINVAL;
+#if defined(CONFIG_IPV6)
+ if (allow_ipv6 &&
+ cmsg->cmsg_level == SOL_IPV6 &&
+ cmsg->cmsg_type == IPV6_PKTINFO) {
+ struct in6_pktinfo *src_info;
+
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(*src_info)))
+ return -EINVAL;
+ src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
+ if (!ipv6_addr_v4mapped(&src_info->ipi6_addr))
+ return -EINVAL;
+ ipc->oif = src_info->ipi6_ifindex;
+ ipc->addr = src_info->ipi6_addr.s6_addr32[3];
+ continue;
+ }
+#endif
if (cmsg->cmsg_level != SOL_IP)
continue;
switch (cmsg->cmsg_type) {
@@ -626,7 +643,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
inet->nodefrag = val ? 1 : 0;
break;
case IP_MTU_DISCOVER:
- if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_INTERFACE)
+ if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
goto e_inval;
inet->pmtudisc = val;
break;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index a82a22d8f77..6f9de61dce5 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -73,12 +73,7 @@ static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
{
struct dst_entry *old_dst;
- if (dst) {
- if (dst->flags & DST_NOCACHE)
- dst = NULL;
- else
- dst_clone(dst);
- }
+ dst_clone(dst);
old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
dst_release(old_dst);
}
@@ -108,13 +103,14 @@ static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
rcu_read_lock();
dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
+ if (dst && !atomic_inc_not_zero(&dst->__refcnt))
+ dst = NULL;
if (dst) {
if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
- rcu_read_unlock();
tunnel_dst_reset(t);
- return NULL;
+ dst_release(dst);
+ dst = NULL;
}
- dst_hold(dst);
}
rcu_read_unlock();
return (struct rtable *)dst;
@@ -173,6 +169,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
hlist_for_each_entry_rcu(t, head, hash_node) {
if (remote != t->parms.iph.daddr ||
+ t->parms.iph.saddr != 0 ||
!(t->dev->flags & IFF_UP))
continue;
@@ -189,10 +186,11 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
head = &itn->tunnels[hash];
hlist_for_each_entry_rcu(t, head, hash_node) {
- if ((local != t->parms.iph.saddr &&
- (local != t->parms.iph.daddr ||
- !ipv4_is_multicast(local))) ||
- !(t->dev->flags & IFF_UP))
+ if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
+ (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
+ continue;
+
+ if (!(t->dev->flags & IFF_UP))
continue;
if (!ip_tunnel_key_match(&t->parms, flags, key))
@@ -209,6 +207,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
hlist_for_each_entry_rcu(t, head, hash_node) {
if (t->parms.i_key != key ||
+ t->parms.iph.saddr != 0 ||
+ t->parms.iph.daddr != 0 ||
!(t->dev->flags & IFF_UP))
continue;
@@ -235,13 +235,17 @@ static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
{
unsigned int h;
__be32 remote;
+ __be32 i_key = parms->i_key;
if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
remote = parms->iph.daddr;
else
remote = 0;
- h = ip_tunnel_hash(parms->i_key, remote);
+ if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
+ i_key = 0;
+
+ h = ip_tunnel_hash(i_key, remote);
return &itn->tunnels[h];
}
@@ -264,6 +268,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
__be32 key = parms->i_key;
+ __be16 flags = parms->i_flags;
int link = parms->link;
struct ip_tunnel *t = NULL;
struct hlist_head *head = ip_bucket(itn, parms);
@@ -271,9 +276,9 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
hlist_for_each_entry_rcu(t, head, hash_node) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
- key == t->parms.i_key &&
link == t->parms.link &&
- type == t->dev->type)
+ type == t->dev->type &&
+ ip_tunnel_key_match(&t->parms, flags, key))
break;
}
return t;
@@ -391,14 +396,13 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
struct ip_tunnel_net *itn,
struct ip_tunnel_parm *parms)
{
- struct ip_tunnel *nt, *fbt;
+ struct ip_tunnel *nt;
struct net_device *dev;
BUG_ON(!itn->fb_tunnel_dev);
- fbt = netdev_priv(itn->fb_tunnel_dev);
dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
if (IS_ERR(dev))
- return NULL;
+ return ERR_CAST(dev);
dev->mtu = ip_tunnel_bind_dev(dev);
@@ -438,6 +442,8 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
tunnel->i_seqno = ntohl(tpi->seq) + 1;
}
+ skb_reset_network_header(skb);
+
err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) {
if (log_ecn_error)
@@ -534,9 +540,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
unsigned int max_headroom; /* The extra header space needed */
__be32 dst;
int err;
- bool connected = true;
+ bool connected;
inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
+ connected = (tunnel->parms.iph.daddr != 0);
dst = tnl_params->daddr;
if (dst == 0) {
@@ -661,12 +668,13 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
dev->needed_headroom = max_headroom;
if (skb_cow_head(skb, dev->needed_headroom)) {
+ ip_rt_put(rt);
dev->stats.tx_dropped++;
kfree_skb(skb);
return;
}
- err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
+ err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
@@ -718,19 +726,18 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
{
int err = 0;
- struct ip_tunnel *t;
- struct net *net = dev_net(dev);
- struct ip_tunnel *tunnel = netdev_priv(dev);
- struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
+ struct ip_tunnel *t = netdev_priv(dev);
+ struct net *net = t->net;
+ struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
BUG_ON(!itn->fb_tunnel_dev);
switch (cmd) {
case SIOCGETTUNNEL:
- t = NULL;
- if (dev == itn->fb_tunnel_dev)
+ if (dev == itn->fb_tunnel_dev) {
t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
- if (t == NULL)
- t = netdev_priv(dev);
+ if (t == NULL)
+ t = netdev_priv(dev);
+ }
memcpy(p, &t->parms, sizeof(*p));
break;
@@ -741,16 +748,20 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
goto done;
if (p->iph.ttl)
p->iph.frag_off |= htons(IP_DF);
- if (!(p->i_flags&TUNNEL_KEY))
- p->i_key = 0;
- if (!(p->o_flags&TUNNEL_KEY))
- p->o_key = 0;
+ if (!(p->i_flags & VTI_ISVTI)) {
+ if (!(p->i_flags & TUNNEL_KEY))
+ p->i_key = 0;
+ if (!(p->o_flags & TUNNEL_KEY))
+ p->o_key = 0;
+ }
t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
- if (!t && (cmd == SIOCADDTUNNEL))
+ if (!t && (cmd == SIOCADDTUNNEL)) {
t = ip_tunnel_create(net, itn, p);
-
+ err = PTR_ERR_OR_ZERO(t);
+ break;
+ }
if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
if (t != NULL) {
if (t->dev != dev) {
@@ -777,8 +788,9 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
if (t) {
err = 0;
ip_tunnel_update(itn, t, dev, p, true);
- } else
- err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+ } else {
+ err = -ENOENT;
+ }
break;
case SIOCDELTUNNEL:
@@ -872,6 +884,7 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
*/
if (!IS_ERR(itn->fb_tunnel_dev)) {
itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+ itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
}
rtnl_unlock();
@@ -993,19 +1006,13 @@ int ip_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct iphdr *iph = &tunnel->parms.iph;
- int i, err;
+ int err;
dev->destructor = ip_tunnel_dev_free;
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *ipt_stats;
- ipt_stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&ipt_stats->syncp);
- }
-
tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
if (!tunnel->dst_cache) {
free_percpu(dev->tstats);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 8d69626f220..f4c987bb7e9 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -46,7 +46,7 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
-int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
+int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 proto,
__u8 tos, __u8 ttl, __be16 df, bool xnet)
{
@@ -74,9 +74,9 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
iph->daddr = dst;
iph->saddr = src;
iph->ttl = ttl;
- __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
+ __ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1);
- err = ip_local_out(skb);
+ err = ip_local_out_sk(sk, skb);
if (unlikely(net_xmit_eval(err)))
pkt_len = 0;
return pkt_len;
@@ -135,6 +135,14 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
return skb;
}
+ /* If packet is not gso and we are resolving any partial checksum,
+ * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL
+ * on the outer header without confusing devices that implement
+ * NETIF_F_IP_CSUM with encapsulation.
+ */
+ if (csum_help)
+ skb->encapsulation = 0;
+
if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) {
err = skb_checksum_help(skb);
if (unlikely(err))
@@ -162,12 +170,12 @@ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
unsigned int start;
do {
- start = u64_stats_fetch_begin_bh(&tstats->syncp);
+ start = u64_stats_fetch_begin_irq(&tstats->syncp);
rx_packets = tstats->rx_packets;
tx_packets = tstats->tx_packets;
rx_bytes = tstats->rx_bytes;
tx_bytes = tstats->tx_bytes;
- } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
+ } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
tot->rx_packets += rx_packets;
tot->tx_packets += tx_packets;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 48eafae5176..b8960f3527f 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -34,6 +34,7 @@
#include <linux/init.h>
#include <linux/netfilter_ipv4.h>
#include <linux/if_ether.h>
+#include <linux/icmpv6.h>
#include <net/sock.h>
#include <net/ip.h>
@@ -49,8 +50,8 @@ static struct rtnl_link_ops vti_link_ops __read_mostly;
static int vti_net_id __read_mostly;
static int vti_tunnel_init(struct net_device *dev);
-/* We dont digest the packet therefore let the packet pass */
-static int vti_rcv(struct sk_buff *skb)
+static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
+ int encap_type)
{
struct ip_tunnel *tunnel;
const struct iphdr *iph = ip_hdr(skb);
@@ -60,79 +61,120 @@ static int vti_rcv(struct sk_buff *skb)
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
iph->saddr, iph->daddr, 0);
if (tunnel != NULL) {
- struct pcpu_sw_netstats *tstats;
- u32 oldmark = skb->mark;
- int ret;
-
-
- /* temporarily mark the skb with the tunnel o_key, to
- * only match policies with this mark.
- */
- skb->mark = be32_to_cpu(tunnel->parms.o_key);
- ret = xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb);
- skb->mark = oldmark;
- if (!ret)
- return -1;
-
- tstats = this_cpu_ptr(tunnel->dev->tstats);
- u64_stats_update_begin(&tstats->syncp);
- tstats->rx_packets++;
- tstats->rx_bytes += skb->len;
- u64_stats_update_end(&tstats->syncp);
-
- secpath_reset(skb);
- skb->dev = tunnel->dev;
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto drop;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
+ skb->mark = be32_to_cpu(tunnel->parms.i_key);
+
+ return xfrm_input(skb, nexthdr, spi, encap_type);
+ }
+
+ return -EINVAL;
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int vti_rcv(struct sk_buff *skb)
+{
+ XFRM_SPI_SKB_CB(skb)->family = AF_INET;
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
+
+ return vti_input(skb, ip_hdr(skb)->protocol, 0, 0);
+}
+
+static int vti_rcv_cb(struct sk_buff *skb, int err)
+{
+ unsigned short family;
+ struct net_device *dev;
+ struct pcpu_sw_netstats *tstats;
+ struct xfrm_state *x;
+ struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4;
+
+ if (!tunnel)
return 1;
+
+ dev = tunnel->dev;
+
+ if (err) {
+ dev->stats.rx_errors++;
+ dev->stats.rx_dropped++;
+
+ return 0;
}
- return -1;
+ x = xfrm_input_state(skb);
+ family = x->inner_mode->afinfo->family;
+
+ if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
+ return -EPERM;
+
+ skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(skb->dev)));
+ skb->dev = dev;
+
+ tstats = this_cpu_ptr(dev->tstats);
+
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
+
+ return 0;
}
-/* This function assumes it is being called from dev_queue_xmit()
- * and that skb is filled properly by that function.
- */
+static bool vti_state_check(const struct xfrm_state *x, __be32 dst, __be32 src)
+{
+ xfrm_address_t *daddr = (xfrm_address_t *)&dst;
+ xfrm_address_t *saddr = (xfrm_address_t *)&src;
-static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+ /* if there is no transform then this tunnel is not functional.
+ * Or if the xfrm is not mode tunnel.
+ */
+ if (!x || x->props.mode != XFRM_MODE_TUNNEL ||
+ x->props.family != AF_INET)
+ return false;
+
+ if (!dst)
+ return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET);
+
+ if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET))
+ return false;
+
+ return true;
+}
+
+static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
+ struct flowi *fl)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct iphdr *tiph = &tunnel->parms.iph;
- u8 tos;
- struct rtable *rt; /* Route to the other host */
+ struct ip_tunnel_parm *parms = &tunnel->parms;
+ struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev; /* Device to other host */
- struct iphdr *old_iph = ip_hdr(skb);
- __be32 dst = tiph->daddr;
- struct flowi4 fl4;
int err;
- if (skb->protocol != htons(ETH_P_IP))
- goto tx_error;
-
- tos = old_iph->tos;
+ if (!dst) {
+ dev->stats.tx_carrier_errors++;
+ goto tx_error_icmp;
+ }
- memset(&fl4, 0, sizeof(fl4));
- flowi4_init_output(&fl4, tunnel->parms.link,
- be32_to_cpu(tunnel->parms.o_key), RT_TOS(tos),
- RT_SCOPE_UNIVERSE,
- IPPROTO_IPIP, 0,
- dst, tiph->saddr, 0, 0);
- rt = ip_route_output_key(dev_net(dev), &fl4);
- if (IS_ERR(rt)) {
+ dst_hold(dst);
+ dst = xfrm_lookup(tunnel->net, dst, fl, NULL, 0);
+ if (IS_ERR(dst)) {
dev->stats.tx_carrier_errors++;
goto tx_error_icmp;
}
- /* if there is no transform then this tunnel is not functional.
- * Or if the xfrm is not mode tunnel.
- */
- if (!rt->dst.xfrm ||
- rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) {
+
+ if (!vti_state_check(dst->xfrm, parms->iph.daddr, parms->iph.saddr)) {
dev->stats.tx_carrier_errors++;
- ip_rt_put(rt);
+ dst_release(dst);
goto tx_error_icmp;
}
- tdev = rt->dst.dev;
+
+ tdev = dst->dev;
if (tdev == dev) {
- ip_rt_put(rt);
+ dst_release(dst);
dev->stats.collisions++;
goto tx_error;
}
@@ -146,10 +188,8 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
tunnel->err_count = 0;
}
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
- nf_reset(skb);
+ skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
+ skb_dst_set(skb, dst);
skb->dev = skb_dst(skb)->dev;
err = dst_output(skb);
@@ -166,6 +206,98 @@ tx_error:
return NETDEV_TX_OK;
}
+/* This function assumes it is being called from dev_queue_xmit()
+ * and that skb is filled properly by that function.
+ */
+static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
+
+ skb->mark = be32_to_cpu(tunnel->parms.o_key);
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ xfrm_decode_session(skb, &fl, AF_INET);
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ break;
+ case htons(ETH_P_IPV6):
+ xfrm_decode_session(skb, &fl, AF_INET6);
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ break;
+ default:
+ dev->stats.tx_errors++;
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+
+ return vti_xmit(skb, dev, &fl);
+}
+
+static int vti4_err(struct sk_buff *skb, u32 info)
+{
+ __be32 spi;
+ __u32 mark;
+ struct xfrm_state *x;
+ struct ip_tunnel *tunnel;
+ struct ip_esp_hdr *esph;
+ struct ip_auth_hdr *ah ;
+ struct ip_comp_hdr *ipch;
+ struct net *net = dev_net(skb->dev);
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
+ int protocol = iph->protocol;
+ struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
+
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+ iph->daddr, iph->saddr, 0);
+ if (!tunnel)
+ return -1;
+
+ mark = be32_to_cpu(tunnel->parms.o_key);
+
+ switch (protocol) {
+ case IPPROTO_ESP:
+ esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
+ spi = esph->spi;
+ break;
+ case IPPROTO_AH:
+ ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
+ spi = ah->spi;
+ break;
+ case IPPROTO_COMP:
+ ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
+ spi = htonl(ntohs(ipch->cpi));
+ break;
+ default:
+ return 0;
+ }
+
+ switch (icmp_hdr(skb)->type) {
+ case ICMP_DEST_UNREACH:
+ if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
+ return 0;
+ case ICMP_REDIRECT:
+ break;
+ default:
+ return 0;
+ }
+
+ x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr,
+ spi, protocol, AF_INET);
+ if (!x)
+ return 0;
+
+ if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
+ ipv4_update_pmtu(skb, net, info, 0, 0, protocol, 0);
+ else
+ ipv4_redirect(skb, net, 0, 0, protocol, 0);
+ xfrm_state_put(x);
+
+ return 0;
+}
+
static int
vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
@@ -181,12 +313,19 @@ vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return -EINVAL;
}
+ if (!(p.i_flags & GRE_KEY))
+ p.i_key = 0;
+ if (!(p.o_flags & GRE_KEY))
+ p.o_key = 0;
+
+ p.i_flags = VTI_ISVTI;
+
err = ip_tunnel_ioctl(dev, &p, cmd);
if (err)
return err;
if (cmd != SIOCDELTUNNEL) {
- p.i_flags |= GRE_KEY | VTI_ISVTI;
+ p.i_flags |= GRE_KEY;
p.o_flags |= GRE_KEY;
}
@@ -207,6 +346,7 @@ static const struct net_device_ops vti_netdev_ops = {
static void vti_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &vti_netdev_ops;
+ dev->type = ARPHRD_TUNNEL;
ip_tunnel_setup(dev, vti_net_id);
}
@@ -218,13 +358,11 @@ static int vti_tunnel_init(struct net_device *dev)
memcpy(dev->dev_addr, &iph->saddr, 4);
memcpy(dev->broadcast, &iph->daddr, 4);
- dev->type = ARPHRD_TUNNEL;
dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
dev->mtu = ETH_DATA_LEN;
dev->flags = IFF_NOARP;
dev->iflink = 0;
dev->addr_len = 4;
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->features |= NETIF_F_LLTX;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
@@ -241,9 +379,28 @@ static void __net_init vti_fb_tunnel_init(struct net_device *dev)
iph->ihl = 5;
}
-static struct xfrm_tunnel_notifier vti_handler __read_mostly = {
+static struct xfrm4_protocol vti_esp4_protocol __read_mostly = {
.handler = vti_rcv,
- .priority = 1,
+ .input_handler = vti_input,
+ .cb_handler = vti_rcv_cb,
+ .err_handler = vti4_err,
+ .priority = 100,
+};
+
+static struct xfrm4_protocol vti_ah4_protocol __read_mostly = {
+ .handler = vti_rcv,
+ .input_handler = vti_input,
+ .cb_handler = vti_rcv_cb,
+ .err_handler = vti4_err,
+ .priority = 100,
+};
+
+static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = {
+ .handler = vti_rcv,
+ .input_handler = vti_input,
+ .cb_handler = vti_rcv_cb,
+ .err_handler = vti4_err,
+ .priority = 100,
};
static int __net_init vti_init_net(struct net *net)
@@ -287,6 +444,8 @@ static void vti_netlink_parms(struct nlattr *data[],
if (!data)
return;
+ parms->i_flags = VTI_ISVTI;
+
if (data[IFLA_VTI_LINK])
parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
@@ -382,10 +541,31 @@ static int __init vti_init(void)
err = register_pernet_device(&vti_net_ops);
if (err < 0)
return err;
- err = xfrm4_mode_tunnel_input_register(&vti_handler);
+ err = xfrm4_protocol_register(&vti_esp4_protocol, IPPROTO_ESP);
if (err < 0) {
unregister_pernet_device(&vti_net_ops);
pr_info("vti init: can't register tunnel\n");
+
+ return err;
+ }
+
+ err = xfrm4_protocol_register(&vti_ah4_protocol, IPPROTO_AH);
+ if (err < 0) {
+ xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
+ unregister_pernet_device(&vti_net_ops);
+ pr_info("vti init: can't register tunnel\n");
+
+ return err;
+ }
+
+ err = xfrm4_protocol_register(&vti_ipcomp4_protocol, IPPROTO_COMP);
+ if (err < 0) {
+ xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
+ xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
+ unregister_pernet_device(&vti_net_ops);
+ pr_info("vti init: can't register tunnel\n");
+
+ return err;
}
err = rtnl_link_register(&vti_link_ops);
@@ -395,7 +575,9 @@ static int __init vti_init(void)
return err;
rtnl_link_failed:
- xfrm4_mode_tunnel_input_deregister(&vti_handler);
+ xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
+ xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
+ xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
unregister_pernet_device(&vti_net_ops);
return err;
}
@@ -403,8 +585,13 @@ rtnl_link_failed:
static void __exit vti_fini(void)
{
rtnl_link_unregister(&vti_link_ops);
- if (xfrm4_mode_tunnel_input_deregister(&vti_handler))
+ if (xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP))
pr_info("vti close: can't deregister tunnel\n");
+ if (xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH))
+ pr_info("vti close: can't deregister tunnel\n");
+ if (xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP))
+ pr_info("vti close: can't deregister tunnel\n");
+
unregister_pernet_device(&vti_net_ops);
}
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 826be4cb482..c0855d50a3f 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -23,7 +23,7 @@
#include <net/protocol.h>
#include <net/sock.h>
-static void ipcomp4_err(struct sk_buff *skb, u32 info)
+static int ipcomp4_err(struct sk_buff *skb, u32 info)
{
struct net *net = dev_net(skb->dev);
__be32 spi;
@@ -34,24 +34,26 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
switch (icmp_hdr(skb)->type) {
case ICMP_DEST_UNREACH:
if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
- return;
+ return 0;
case ICMP_REDIRECT:
break;
default:
- return;
+ return 0;
}
spi = htonl(ntohs(ipch->cpi));
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
spi, IPPROTO_COMP, AF_INET);
if (!x)
- return;
+ return 0;
if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0);
else
ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0);
xfrm_state_put(x);
+
+ return 0;
}
/* We always hold one tunnel user reference to indicate a tunnel */
@@ -147,6 +149,11 @@ out:
return err;
}
+static int ipcomp4_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type ipcomp_type = {
.description = "IPCOMP4",
.owner = THIS_MODULE,
@@ -157,11 +164,12 @@ static const struct xfrm_type ipcomp_type = {
.output = ipcomp_output
};
-static const struct net_protocol ipcomp4_protocol = {
+static struct xfrm4_protocol ipcomp4_protocol = {
.handler = xfrm4_rcv,
+ .input_handler = xfrm_input,
+ .cb_handler = ipcomp4_rcv_cb,
.err_handler = ipcomp4_err,
- .no_policy = 1,
- .netns_ok = 1,
+ .priority = 0,
};
static int __init ipcomp4_init(void)
@@ -170,7 +178,7 @@ static int __init ipcomp4_init(void)
pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
- if (inet_add_protocol(&ipcomp4_protocol, IPPROTO_COMP) < 0) {
+ if (xfrm4_protocol_register(&ipcomp4_protocol, IPPROTO_COMP) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&ipcomp_type, AF_INET);
return -EAGAIN;
@@ -180,7 +188,7 @@ static int __init ipcomp4_init(void)
static void __exit ipcomp4_fini(void)
{
- if (inet_del_protocol(&ipcomp4_protocol, IPPROTO_COMP) < 0)
+ if (xfrm4_protocol_deregister(&ipcomp4_protocol, IPPROTO_COMP) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&ipcomp_type, AF_INET) < 0)
pr_info("%s: can't remove xfrm type\n", __func__);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 812b1835146..62eaa005e14 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -149,13 +149,13 @@ static int ipip_err(struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->dev->ifindex, 0, IPPROTO_IPIP, 0);
+ t->parms.link, 0, IPPROTO_IPIP, 0);
err = 0;
goto out;
}
if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
+ ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
IPPROTO_IPIP, 0);
err = 0;
goto out;
@@ -486,4 +486,5 @@ static void __exit ipip_fini(void)
module_init(ipip_init);
module_exit(ipip_fini);
MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("ipip");
MODULE_ALIAS_NETDEV("tunl0");
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 28863570dd6..65bcaa78904 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -455,7 +455,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
struct mr_table *mrt;
struct flowi4 fl4 = {
.flowi4_oif = dev->ifindex,
- .flowi4_iif = skb->skb_iif,
+ .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
.flowi4_mark = skb->mark,
};
int err;
@@ -484,7 +484,7 @@ static void reg_vif_setup(struct net_device *dev)
dev->type = ARPHRD_PIMREG;
dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
dev->flags = IFF_NOARP;
- dev->netdev_ops = &reg_vif_netdev_ops,
+ dev->netdev_ops = &reg_vif_netdev_ops;
dev->destructor = free_netdev;
dev->features |= NETIF_F_NETNS_LOCAL;
}
@@ -1663,7 +1663,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
iph->protocol = IPPROTO_IPIP;
iph->ihl = 5;
iph->tot_len = htons(skb->len);
- ip_select_ident(skb, skb_dst(skb), NULL);
+ ip_select_ident(skb, NULL);
ip_send_check(iph);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index c3e0adea9c2..7ebd6e37875 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -61,7 +61,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
skb_dst_set(skb, NULL);
dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0);
if (IS_ERR(dst))
- return PTR_ERR(dst);;
+ return PTR_ERR(dst);
skb_dst_set(skb, dst);
}
#endif
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 59da7cde072..f95b6f93814 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1044,8 +1044,10 @@ static int __do_replace(struct net *net, const char *name,
xt_free_table_info(oldinfo);
if (copy_to_user(counters_ptr, counters,
- sizeof(struct xt_counters) * num_counters) != 0)
- ret = -EFAULT;
+ sizeof(struct xt_counters) * num_counters) != 0) {
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("arptables: counters copy to user failed while replacing table\n");
+ }
vfree(counters);
xt_table_unlock(t);
return ret;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 718dfbd30cb..99e810f8467 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1231,8 +1231,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
xt_free_table_info(oldinfo);
if (copy_to_user(counters_ptr, counters,
- sizeof(struct xt_counters) * num_counters) != 0)
- ret = -EFAULT;
+ sizeof(struct xt_counters) * num_counters) != 0) {
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n");
+ }
vfree(counters);
xt_table_unlock(t);
return ret;
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index c49dcd0284a..4bfaedf9b34 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -89,11 +89,8 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (ipv4_is_multicast(iph->daddr)) {
if (ipv4_is_zeronet(iph->saddr))
return ipv4_is_local_multicast(iph->daddr) ^ invert;
- flow.flowi4_iif = 0;
- } else {
- flow.flowi4_iif = LOOPBACK_IFINDEX;
}
-
+ flow.flowi4_iif = LOOPBACK_IFINDEX;
flow.daddr = iph->saddr;
flow.saddr = rpfilter_get_saddr(iph->daddr);
flow.flowi4_oif = 0;
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index ee2886126e3..f1787c04a4d 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -91,17 +91,9 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops,
if (nf_ct_is_untracked(ct))
return NF_ACCEPT;
- nat = nfct_nat(ct);
- if (!nat) {
- /* NAT module was loaded late. */
- if (nf_ct_is_confirmed(ct))
- return NF_ACCEPT;
- nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
- if (nat == NULL) {
- pr_debug("failed to add NAT extension\n");
- return NF_ACCEPT;
- }
- }
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
switch (ctinfo) {
case IP_CT_RELATED:
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 12e13bd82b5..b8f6381c7d0 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -22,7 +22,6 @@
#endif
#include <net/netfilter/nf_conntrack_zones.h>
-/* Returns new sk_buff, or NULL */
static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
{
int err;
@@ -33,8 +32,10 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
err = ip_defrag(skb, user);
local_bh_enable();
- if (!err)
+ if (!err) {
ip_send_check(ip_hdr(skb));
+ skb->ignore_df = 1;
+ }
return err;
}
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index b5b256d45e6..3964157d826 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -48,15 +48,9 @@ static unsigned int nf_nat_fn(const struct nf_hook_ops *ops,
NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));
- nat = nfct_nat(ct);
- if (nat == NULL) {
- /* Conntrack module was loaded late, can't add extension. */
- if (nf_ct_is_confirmed(ct))
- return NF_ACCEPT;
- nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
- if (nat == NULL)
- return NF_ACCEPT;
- }
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
switch (ctinfo) {
case IP_CT_RELATED:
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 2d11c094296..044a0ddf6a7 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -236,15 +236,15 @@ exit:
static void inet_get_ping_group_range_net(struct net *net, kgid_t *low,
kgid_t *high)
{
- kgid_t *data = net->ipv4.sysctl_ping_group_range;
+ kgid_t *data = net->ipv4.ping_group_range.range;
unsigned int seq;
do {
- seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock);
+ seq = read_seqbegin(&net->ipv4.ping_group_range.lock);
*low = data[0];
*high = data[1];
- } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq));
+ } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq));
}
@@ -252,26 +252,33 @@ int ping_init_sock(struct sock *sk)
{
struct net *net = sock_net(sk);
kgid_t group = current_egid();
- struct group_info *group_info = get_current_groups();
- int i, j, count = group_info->ngroups;
+ struct group_info *group_info;
+ int i, j, count;
kgid_t low, high;
+ int ret = 0;
inet_get_ping_group_range_net(net, &low, &high);
if (gid_lte(low, group) && gid_lte(group, high))
return 0;
+ group_info = get_current_groups();
+ count = group_info->ngroups;
for (i = 0; i < group_info->nblocks; i++) {
int cp_count = min_t(int, NGROUPS_PER_BLOCK, count);
for (j = 0; j < cp_count; j++) {
kgid_t gid = group_info->blocks[i][j];
if (gid_lte(low, gid) && gid_lte(gid, high))
- return 0;
+ goto out_release_group;
}
count -= cp_count;
}
- return -EACCES;
+ ret = -EACCES;
+
+out_release_group:
+ put_group_info(group_info);
+ return ret;
}
EXPORT_SYMBOL_GPL(ping_init_sock);
@@ -727,7 +734,7 @@ static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
sock_tx_timestamp(sk, &ipc.tx_flags);
if (msg->msg_controllen) {
- err = ip_cmsg_send(sock_net(sk), msg, &ipc);
+ err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);
if (err)
return err;
if (ipc.opt)
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index a6c8a80ec9d..ae0af9386f7 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -273,6 +273,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK),
SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE),
SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE),
+ SNMP_MIB_ITEM("TCPFastOpenActiveFail", LINUX_MIB_TCPFASTOPENACTIVEFAIL),
SNMP_MIB_ITEM("TCPFastOpenPassive", LINUX_MIB_TCPFASTOPENPASSIVE),
SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL),
SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW),
@@ -280,6 +281,11 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES),
SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS),
SNMP_MIB_ITEM("TCPAutoCorking", LINUX_MIB_TCPAUTOCORKING),
+ SNMP_MIB_ITEM("TCPFromZeroWindowAdv", LINUX_MIB_TCPFROMZEROWINDOWADV),
+ SNMP_MIB_ITEM("TCPToZeroWindowAdv", LINUX_MIB_TCPTOZEROWINDOWADV),
+ SNMP_MIB_ITEM("TCPWantZeroWindowAdv", LINUX_MIB_TCPWANTZEROWINDOWADV),
+ SNMP_MIB_ITEM("TCPSynRetrans", LINUX_MIB_TCPSYNRETRANS),
+ SNMP_MIB_ITEM("TCPOrigDataSent", LINUX_MIB_TCPORIGDATASENT),
SNMP_MIB_SENTINEL
};
@@ -339,15 +345,15 @@ static void icmp_put(struct seq_file *seq)
for (i = 0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " Out%s", icmpmibmap[i].name);
seq_printf(seq, "\nIcmp: %lu %lu %lu",
- snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS),
- snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS),
- snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS));
+ snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS),
+ snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS),
+ snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS));
for (i = 0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " %lu",
atomic_long_read(ptr + icmpmibmap[i].index));
seq_printf(seq, " %lu %lu",
- snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
- snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
+ snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
+ snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
for (i = 0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " %lu",
atomic_long_read(ptr + (icmpmibmap[i].index | 0x100)));
@@ -373,7 +379,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
seq_printf(seq, " %llu",
- snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
+ snmp_fold_field64(net->mib.ip_statistics,
snmp4_ipstats_list[i].entry,
offsetof(struct ipstats_mib, syncp)));
@@ -389,11 +395,11 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
/* MaxConn field is signed, RFC 2012 */
if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
seq_printf(seq, " %ld",
- snmp_fold_field((void __percpu **)net->mib.tcp_statistics,
+ snmp_fold_field(net->mib.tcp_statistics,
snmp4_tcp_list[i].entry));
else
seq_printf(seq, " %lu",
- snmp_fold_field((void __percpu **)net->mib.tcp_statistics,
+ snmp_fold_field(net->mib.tcp_statistics,
snmp4_tcp_list[i].entry));
}
@@ -404,7 +410,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nUdp:");
for (i = 0; snmp4_udp_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void __percpu **)net->mib.udp_statistics,
+ snmp_fold_field(net->mib.udp_statistics,
snmp4_udp_list[i].entry));
/* the UDP and UDP-Lite MIBs are the same */
@@ -415,7 +421,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nUdpLite:");
for (i = 0; snmp4_udp_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void __percpu **)net->mib.udplite_statistics,
+ snmp_fold_field(net->mib.udplite_statistics,
snmp4_udp_list[i].entry));
seq_putc(seq, '\n');
@@ -452,7 +458,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nTcpExt:");
for (i = 0; snmp4_net_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void __percpu **)net->mib.net_statistics,
+ snmp_fold_field(net->mib.net_statistics,
snmp4_net_list[i].entry));
seq_puts(seq, "\nIpExt:");
@@ -462,7 +468,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nIpExt:");
for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
seq_printf(seq, " %llu",
- snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
+ snmp_fold_field64(net->mib.ip_statistics,
snmp4_ipextstats_list[i].entry,
offsetof(struct ipstats_mib, syncp)));
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index c04518f4850..2c65160565e 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -389,7 +389,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
iph->check = 0;
iph->tot_len = htons(length);
if (!iph->id)
- ip_select_ident(skb, &rt->dst, NULL);
+ ip_select_ident(skb, NULL);
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
}
@@ -524,7 +524,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
ipc.oif = sk->sk_bound_dev_if;
if (msg->msg_controllen) {
- err = ip_cmsg_send(sock_net(sk), msg, &ipc);
+ err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);
if (err)
goto out;
if (ipc.opt)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4c011ec69ed..190199851c9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -89,6 +89,7 @@
#include <linux/rcupdate.h>
#include <linux/times.h>
#include <linux/slab.h>
+#include <linux/jhash.h>
#include <net/dst.h>
#include <net/net_namespace.h>
#include <net/protocol.h>
@@ -139,11 +140,6 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
static void ipv4_dst_destroy(struct dst_entry *dst);
-static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
- int how)
-{
-}
-
static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
{
WARN_ON(1);
@@ -162,7 +158,6 @@ static struct dst_ops ipv4_dst_ops = {
.mtu = ipv4_mtu,
.cow_metrics = ipv4_cow_metrics,
.destroy = ipv4_dst_destroy,
- .ifdown = ipv4_dst_ifdown,
.negative_advice = ipv4_negative_advice,
.link_failure = ipv4_link_failure,
.update_pmtu = ip_rt_update_pmtu,
@@ -194,7 +189,7 @@ const __u8 ip_tos2prio[16] = {
EXPORT_SYMBOL(ip_tos2prio);
static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
-#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
+#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
#ifdef CONFIG_PROC_FS
static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
@@ -462,39 +457,45 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
return neigh_create(&arp_tbl, pkey, dev);
}
-/*
- * Peer allocation may fail only in serious out-of-memory conditions. However
- * we still can generate some output.
- * Random ID selection looks a bit dangerous because we have no chances to
- * select ID being unique in a reasonable period of time.
- * But broken packet identifier may be better than no packet at all.
+#define IP_IDENTS_SZ 2048u
+struct ip_ident_bucket {
+ atomic_t id;
+ u32 stamp32;
+};
+
+static struct ip_ident_bucket *ip_idents __read_mostly;
+
+/* In order to protect privacy, we add a perturbation to identifiers
+ * if one generator is seldom used. This makes hard for an attacker
+ * to infer how many packets were sent between two points in time.
*/
-static void ip_select_fb_ident(struct iphdr *iph)
+u32 ip_idents_reserve(u32 hash, int segs)
{
- static DEFINE_SPINLOCK(ip_fb_id_lock);
- static u32 ip_fallback_id;
- u32 salt;
+ struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ;
+ u32 old = ACCESS_ONCE(bucket->stamp32);
+ u32 now = (u32)jiffies;
+ u32 delta = 0;
+
+ if (old != now && cmpxchg(&bucket->stamp32, old, now) == old)
+ delta = prandom_u32_max(now - old);
- spin_lock_bh(&ip_fb_id_lock);
- salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
- iph->id = htons(salt & 0xFFFF);
- ip_fallback_id = salt;
- spin_unlock_bh(&ip_fb_id_lock);
+ return atomic_add_return(segs + delta, &bucket->id) - segs;
}
+EXPORT_SYMBOL(ip_idents_reserve);
-void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
+void __ip_select_ident(struct iphdr *iph, int segs)
{
- struct net *net = dev_net(dst->dev);
- struct inet_peer *peer;
+ static u32 ip_idents_hashrnd __read_mostly;
+ u32 hash, id;
- peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
- if (peer) {
- iph->id = htons(inet_getid(peer, more));
- inet_putpeer(peer);
- return;
- }
+ net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
- ip_select_fb_ident(iph);
+ hash = jhash_3words((__force u32)iph->daddr,
+ (__force u32)iph->saddr,
+ iph->protocol,
+ ip_idents_hashrnd);
+ id = ip_idents_reserve(hash, segs);
+ iph->id = htons(id);
}
EXPORT_SYMBOL(__ip_select_ident);
@@ -697,7 +698,6 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
out_unlock:
spin_unlock_bh(&fnhe_lock);
- return;
}
static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
@@ -1000,6 +1000,9 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
struct flowi4 fl4;
struct rtable *rt;
+ if (!mark)
+ mark = IP4_REPLY_MARK(net, skb->mark);
+
__build_flow_key(&fl4, NULL, iph, oif,
RT_TOS(iph->tos), protocol, mark, flow_flags);
rt = __ip_route_output_key(net, &fl4);
@@ -1017,6 +1020,10 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
struct rtable *rt;
__build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
+
+ if (!fl4.flowi4_mark)
+ fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
+
rt = __ip_route_output_key(sock_net(sk), &fl4);
if (!IS_ERR(rt)) {
__ip_rt_update_pmtu(rt, &fl4, mtu);
@@ -1029,7 +1036,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
const struct iphdr *iph = (const struct iphdr *) skb->data;
struct flowi4 fl4;
struct rtable *rt;
- struct dst_entry *dst;
+ struct dst_entry *odst = NULL;
bool new = false;
bh_lock_sock(sk);
@@ -1037,16 +1044,17 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
if (!ip_sk_accept_pmtu(sk))
goto out;
- rt = (struct rtable *) __sk_dst_get(sk);
+ odst = sk_dst_get(sk);
- if (sock_owned_by_user(sk) || !rt) {
+ if (sock_owned_by_user(sk) || !odst) {
__ipv4_sk_update_pmtu(skb, sk, mtu);
goto out;
}
__build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
- if (!__sk_dst_check(sk, 0)) {
+ rt = (struct rtable *)odst;
+ if (odst->obsolete && odst->ops->check(odst, 0) == NULL) {
rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
if (IS_ERR(rt))
goto out;
@@ -1056,8 +1064,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
__ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
- dst = dst_check(&rt->dst, 0);
- if (!dst) {
+ if (!dst_check(&rt->dst, 0)) {
if (new)
dst_release(&rt->dst);
@@ -1069,10 +1076,11 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
}
if (new)
- __sk_dst_set(sk, &rt->dst);
+ sk_dst_set(sk, &rt->dst);
out:
bh_unlock_sock(sk);
+ dst_release(odst);
}
EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
@@ -1136,7 +1144,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
dst_set_expires(&rt->dst, 0);
}
-static int ip_rt_bug(struct sk_buff *skb)
+static int ip_rt_bug(struct sock *sk, struct sk_buff *skb)
{
pr_debug("%s: %pI4 -> %pI4, %s\n",
__func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
@@ -1526,7 +1534,7 @@ static int __mkroute_input(struct sk_buff *skb,
struct in_device *out_dev;
unsigned int flags = 0;
bool do_cache;
- u32 itag;
+ u32 itag = 0;
/* get a working reference to the output device */
out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
@@ -1707,8 +1715,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (res.type == RTN_LOCAL) {
err = fib_validate_source(skb, saddr, daddr, tos,
- LOOPBACK_IFINDEX,
- dev, in_dev, &itag);
+ 0, dev, in_dev, &itag);
if (err < 0)
goto martian_source_keep_err;
goto local_input;
@@ -2225,7 +2232,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
new->__use = 1;
new->input = dst_discard;
- new->output = dst_discard;
+ new->output = dst_discard_sk;
new->dev = ort->dst.dev;
if (new->dev)
@@ -2364,7 +2371,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
}
} else
#endif
- if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
+ if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex))
goto nla_put_failure;
}
@@ -2475,11 +2482,6 @@ errout_free:
goto errout;
}
-int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
- return skb->len;
-}
-
void ip_rt_multicast_event(struct in_device *in_dev)
{
rt_cache_flush(dev_net(in_dev->dev));
@@ -2717,6 +2719,12 @@ int __init ip_rt_init(void)
{
int rc = 0;
+ ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
+ if (!ip_idents)
+ panic("IP: failed to allocate ip_idents\n");
+
+ prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
+
#ifdef CONFIG_IP_ROUTE_CLASSID
ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
if (!ip_rt_acct)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index f2ed13c2125..c86624b36a6 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -303,6 +303,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
ireq->ir_rmt_port = th->source;
ireq->ir_loc_addr = ip_hdr(skb)->daddr;
ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
+ ireq->ir_mark = inet_request_mark(sk, skb);
ireq->ecn_ok = ecn_ok;
ireq->snd_wscale = tcp_opt.snd_wscale;
ireq->sack_ok = tcp_opt.sack_ok;
@@ -339,7 +340,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
* hasn't changed since we received the original syn, but I see
* no easy way to do this.
*/
- flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark,
+ flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
inet_sk_flowi_flags(sk),
(opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr,
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 44eba052b43..79a007c5255 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -45,10 +45,10 @@ static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
/* Update system visible IP port range */
static void set_local_port_range(struct net *net, int range[2])
{
- write_seqlock(&net->ipv4.sysctl_local_ports.lock);
- net->ipv4.sysctl_local_ports.range[0] = range[0];
- net->ipv4.sysctl_local_ports.range[1] = range[1];
- write_sequnlock(&net->ipv4.sysctl_local_ports.lock);
+ write_seqlock(&net->ipv4.ip_local_ports.lock);
+ net->ipv4.ip_local_ports.range[0] = range[0];
+ net->ipv4.ip_local_ports.range[1] = range[1];
+ write_sequnlock(&net->ipv4.ip_local_ports.lock);
}
/* Validate changes from /proc interface. */
@@ -57,7 +57,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
size_t *lenp, loff_t *ppos)
{
struct net *net =
- container_of(table->data, struct net, ipv4.sysctl_local_ports.range);
+ container_of(table->data, struct net, ipv4.ip_local_ports.range);
int ret;
int range[2];
struct ctl_table tmp = {
@@ -87,14 +87,14 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low
{
kgid_t *data = table->data;
struct net *net =
- container_of(table->data, struct net, ipv4.sysctl_ping_group_range);
+ container_of(table->data, struct net, ipv4.ping_group_range.range);
unsigned int seq;
do {
- seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock);
+ seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
*low = data[0];
*high = data[1];
- } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq));
+ } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
}
/* Update system visible IP port range */
@@ -102,11 +102,11 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig
{
kgid_t *data = table->data;
struct net *net =
- container_of(table->data, struct net, ipv4.sysctl_ping_group_range);
- write_seqlock(&net->ipv4.sysctl_local_ports.lock);
+ container_of(table->data, struct net, ipv4.ping_group_range.range);
+ write_seqlock(&net->ipv4.ip_local_ports.lock);
data[0] = low;
data[1] = high;
- write_sequnlock(&net->ipv4.sysctl_local_ports.lock);
+ write_sequnlock(&net->ipv4.ip_local_ports.lock);
}
/* Validate changes from /proc interface. */
@@ -437,13 +437,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
- .procname = "ip_local_reserved_ports",
- .data = NULL, /* initialized in sysctl_ipv4_init */
- .maxlen = 65536,
- .mode = 0644,
- .proc_handler = proc_do_large_bitmap,
- },
- {
.procname = "igmp_max_memberships",
.data = &sysctl_igmp_max_memberships,
.maxlen = sizeof(int),
@@ -805,7 +798,7 @@ static struct ctl_table ipv4_net_table[] = {
},
{
.procname = "ping_group_range",
- .data = &init_net.ipv4.sysctl_ping_group_range,
+ .data = &init_net.ipv4.ping_group_range.range,
.maxlen = sizeof(gid_t)*2,
.mode = 0644,
.proc_handler = ipv4_ping_group_range,
@@ -819,12 +812,19 @@ static struct ctl_table ipv4_net_table[] = {
},
{
.procname = "ip_local_port_range",
- .maxlen = sizeof(init_net.ipv4.sysctl_local_ports.range),
- .data = &init_net.ipv4.sysctl_local_ports.range,
+ .maxlen = sizeof(init_net.ipv4.ip_local_ports.range),
+ .data = &init_net.ipv4.ip_local_ports.range,
.mode = 0644,
.proc_handler = ipv4_local_port_range,
},
{
+ .procname = "ip_local_reserved_ports",
+ .data = &init_net.ipv4.sysctl_local_reserved_ports,
+ .maxlen = 65536,
+ .mode = 0644,
+ .proc_handler = proc_do_large_bitmap,
+ },
+ {
.procname = "ip_no_pmtu_disc",
.data = &init_net.ipv4.sysctl_ip_no_pmtu_disc,
.maxlen = sizeof(int),
@@ -838,6 +838,20 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "fwmark_reflect",
+ .data = &init_net.ipv4.sysctl_fwmark_reflect,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "tcp_fwmark_accept",
+ .data = &init_net.ipv4.sysctl_tcp_fwmark_accept,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
{ }
};
@@ -858,26 +872,18 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
table[i].data += (void *)net - (void *)&init_net;
}
- /*
- * Sane defaults - nobody may create ping sockets.
- * Boot scripts should set this to distro-specific group.
- */
- net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1);
- net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0);
-
- /*
- * Set defaults for local port range
- */
- seqlock_init(&net->ipv4.sysctl_local_ports.lock);
- net->ipv4.sysctl_local_ports.range[0] = 32768;
- net->ipv4.sysctl_local_ports.range[1] = 61000;
-
net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
if (net->ipv4.ipv4_hdr == NULL)
goto err_reg;
+ net->ipv4.sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
+ if (!net->ipv4.sysctl_local_reserved_ports)
+ goto err_ports;
+
return 0;
+err_ports:
+ unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
err_reg:
if (!net_eq(net, &init_net))
kfree(table);
@@ -889,6 +895,7 @@ static __net_exit void ipv4_sysctl_exit_net(struct net *net)
{
struct ctl_table *table;
+ kfree(net->ipv4.sysctl_local_reserved_ports);
table = net->ipv4.ipv4_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
kfree(table);
@@ -902,16 +909,6 @@ static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
static __init int sysctl_ipv4_init(void)
{
struct ctl_table_header *hdr;
- struct ctl_table *i;
-
- for (i = ipv4_table; i->procname; i++) {
- if (strcmp(i->procname, "ip_local_reserved_ports") == 0) {
- i->data = sysctl_local_reserved_ports;
- break;
- }
- }
- if (!i->procname)
- return -EINVAL;
hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table);
if (hdr == NULL)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 97c8f5620c4..9d2118e5fbc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -387,7 +387,7 @@ void tcp_init_sock(struct sock *sk)
INIT_LIST_HEAD(&tp->tsq_node);
icsk->icsk_rto = TCP_TIMEOUT_INIT;
- tp->mdev = TCP_TIMEOUT_INIT;
+ tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
/* So many TCP implementations out there (incorrectly) count the
* initial SYN frame in their delayed-ACK and congestion control
@@ -1108,7 +1108,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (unlikely(tp->repair)) {
if (tp->repair_queue == TCP_RECV_QUEUE) {
copied = tcp_send_rcvq(sk, msg, size);
- goto out;
+ goto out_nopush;
}
err = -EINVAL;
@@ -1282,6 +1282,7 @@ wait_for_memory:
out:
if (copied)
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
+out_nopush:
release_sock(sk);
return copied + copied_syn;
@@ -2341,7 +2342,7 @@ int tcp_disconnect(struct sock *sk, int flags)
sk->sk_shutdown = 0;
sock_reset_flag(sk, SOCK_DONE);
- tp->srtt = 0;
+ tp->srtt_us = 0;
if ((tp->write_seq += tp->max_window + 2) == 0)
tp->write_seq = 1;
icsk->icsk_backoff = 0;
@@ -2785,8 +2786,8 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
- info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3;
- info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2;
+ info->tcpi_rtt = tp->srtt_us >> 3;
+ info->tcpi_rttvar = tp->mdev_us >> 2;
info->tcpi_snd_ssthresh = tp->snd_ssthresh;
info->tcpi_snd_cwnd = tp->snd_cwnd;
info->tcpi_advmss = tp->advmss;
@@ -2796,6 +2797,11 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
info->tcpi_rcv_space = tp->rcvq_space.space;
info->tcpi_total_retrans = tp->total_retrans;
+
+ info->tcpi_pacing_rate = sk->sk_pacing_rate != ~0U ?
+ sk->sk_pacing_rate : ~0ULL;
+ info->tcpi_max_pacing_rate = sk->sk_max_pacing_rate != ~0U ?
+ sk->sk_max_pacing_rate : ~0ULL;
}
EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -2911,6 +2917,14 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
case TCP_USER_TIMEOUT:
val = jiffies_to_msecs(icsk->icsk_user_timeout);
break;
+
+ case TCP_FASTOPEN:
+ if (icsk->icsk_accept_queue.fastopenq != NULL)
+ val = icsk->icsk_accept_queue.fastopenq->max_qlen;
+ else
+ val = 0;
+ break;
+
case TCP_TIMESTAMP:
val = tcp_time_stamp + tp->tsoffset;
break;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 821846fb0a7..d5de69bc04f 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -140,13 +140,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
ca->cnt = 1;
}
-static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked,
- u32 in_flight)
+static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
- if (!tcp_is_cwnd_limited(sk, in_flight))
+ if (!tcp_is_cwnd_limited(sk))
return;
if (tp->snd_cwnd <= tp->snd_ssthresh)
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 2388275adb9..7b09d8b49fa 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -276,26 +276,6 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
return err;
}
-/* RFC2861 Check whether we are limited by application or congestion window
- * This is the inverse of cwnd check in tcp_tso_should_defer
- */
-bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
- u32 left;
-
- if (in_flight >= tp->snd_cwnd)
- return true;
-
- left = tp->snd_cwnd - in_flight;
- if (sk_can_gso(sk) &&
- left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
- left < tp->xmit_size_goal_segs)
- return true;
- return left <= tcp_max_tso_deferred_mss(tp);
-}
-EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited);
-
/* Slow start is used when congestion window is no greater than the slow start
* threshold. We base on RFC2581 and also handle stretch ACKs properly.
* We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but
@@ -337,11 +317,11 @@ EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
/* This is Jacobson's slow start and congestion avoidance.
* SIGCOMM '88, p. 328.
*/
-void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
+void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (!tcp_is_cwnd_limited(sk, in_flight))
+ if (!tcp_is_cwnd_limited(sk))
return;
/* In "safe" area, increase. */
@@ -361,21 +341,12 @@ u32 tcp_reno_ssthresh(struct sock *sk)
}
EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
-/* Lower bound on congestion window with halving. */
-u32 tcp_reno_min_cwnd(const struct sock *sk)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
- return tp->snd_ssthresh/2;
-}
-EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
-
struct tcp_congestion_ops tcp_reno = {
.flags = TCP_CONG_NON_RESTRICTED,
.name = "reno",
.owner = THIS_MODULE,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_reno_cong_avoid,
- .min_cwnd = tcp_reno_min_cwnd,
};
/* Initial congestion control used (until SYN)
@@ -387,6 +358,5 @@ struct tcp_congestion_ops tcp_init_congestion_ops = {
.owner = THIS_MODULE,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_reno_cong_avoid,
- .min_cwnd = tcp_reno_min_cwnd,
};
EXPORT_SYMBOL_GPL(tcp_init_congestion_ops);
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 828e4c3ffba..a9bd8a4828a 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -304,13 +304,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
ca->cnt = 1;
}
-static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked,
- u32 in_flight)
+static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
- if (!tcp_is_cwnd_limited(sk, in_flight))
+ if (!tcp_is_cwnd_limited(sk))
return;
if (tp->snd_cwnd <= tp->snd_ssthresh) {
@@ -409,7 +408,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT;
ratio += cnt;
- ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT);
+ ca->delayed_ack = clamp(ratio, 1U, ACK_RATIO_LIMIT);
}
/* Some calls are for duplicates without timetamps */
@@ -476,10 +475,6 @@ static int __init cubictcp_register(void)
/* divide by bic_scale and by constant Srtt (100ms) */
do_div(cube_factor, bic_scale * 10);
- /* hystart needs ms clock resolution */
- if (hystart && HZ < 1000)
- cubictcp.flags |= TCP_CONG_RTT_STAMP;
-
return tcp_register_congestion_control(&cubictcp);
}
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index f195d9316e5..9771563ab56 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -72,25 +72,224 @@ error: kfree(ctx);
return err;
}
-/* Computes the fastopen cookie for the IP path.
- * The path is a 128 bits long (pad with zeros for IPv4).
- *
- * The caller must check foc->len to determine if a valid cookie
- * has been generated successfully.
-*/
-void tcp_fastopen_cookie_gen(__be32 src, __be32 dst,
- struct tcp_fastopen_cookie *foc)
+static bool __tcp_fastopen_cookie_gen(const void *path,
+ struct tcp_fastopen_cookie *foc)
{
- __be32 path[4] = { src, dst, 0, 0 };
struct tcp_fastopen_context *ctx;
+ bool ok = false;
tcp_fastopen_init_key_once(true);
rcu_read_lock();
ctx = rcu_dereference(tcp_fastopen_ctx);
if (ctx) {
- crypto_cipher_encrypt_one(ctx->tfm, foc->val, (__u8 *)path);
+ crypto_cipher_encrypt_one(ctx->tfm, foc->val, path);
foc->len = TCP_FASTOPEN_COOKIE_SIZE;
+ ok = true;
}
rcu_read_unlock();
+ return ok;
+}
+
+/* Generate the fastopen cookie by doing aes128 encryption on both
+ * the source and destination addresses. Pad 0s for IPv4 or IPv4-mapped-IPv6
+ * addresses. For the longer IPv6 addresses use CBC-MAC.
+ *
+ * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE.
+ */
+static bool tcp_fastopen_cookie_gen(struct request_sock *req,
+ struct sk_buff *syn,
+ struct tcp_fastopen_cookie *foc)
+{
+ if (req->rsk_ops->family == AF_INET) {
+ const struct iphdr *iph = ip_hdr(syn);
+
+ __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
+ return __tcp_fastopen_cookie_gen(path, foc);
+ }
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (req->rsk_ops->family == AF_INET6) {
+ const struct ipv6hdr *ip6h = ipv6_hdr(syn);
+ struct tcp_fastopen_cookie tmp;
+
+ if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) {
+ struct in6_addr *buf = (struct in6_addr *) tmp.val;
+ int i = 4;
+
+ for (i = 0; i < 4; i++)
+ buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
+ return __tcp_fastopen_cookie_gen(buf, foc);
+ }
+ }
+#endif
+ return false;
+}
+
+static bool tcp_fastopen_create_child(struct sock *sk,
+ struct sk_buff *skb,
+ struct dst_entry *dst,
+ struct request_sock *req)
+{
+ struct tcp_sock *tp;
+ struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
+ struct sock *child;
+
+ req->num_retrans = 0;
+ req->num_timeout = 0;
+ req->sk = NULL;
+
+ child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
+ if (child == NULL)
+ return false;
+
+ spin_lock(&queue->fastopenq->lock);
+ queue->fastopenq->qlen++;
+ spin_unlock(&queue->fastopenq->lock);
+
+ /* Initialize the child socket. Have to fix some values to take
+ * into account the child is a Fast Open socket and is created
+ * only out of the bits carried in the SYN packet.
+ */
+ tp = tcp_sk(child);
+
+ tp->fastopen_rsk = req;
+ /* Do a hold on the listner sk so that if the listener is being
+ * closed, the child that has been accepted can live on and still
+ * access listen_lock.
+ */
+ sock_hold(sk);
+ tcp_rsk(req)->listener = sk;
+
+ /* RFC1323: The window in SYN & SYN/ACK segments is never
+ * scaled. So correct it appropriately.
+ */
+ tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
+
+ /* Activate the retrans timer so that SYNACK can be retransmitted.
+ * The request socket is not added to the SYN table of the parent
+ * because it's been added to the accept queue directly.
+ */
+ inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
+ TCP_TIMEOUT_INIT, TCP_RTO_MAX);
+
+ /* Add the child socket directly into the accept queue */
+ inet_csk_reqsk_queue_add(sk, req, child);
+
+ /* Now finish processing the fastopen child socket. */
+ inet_csk(child)->icsk_af_ops->rebuild_header(child);
+ tcp_init_congestion_control(child);
+ tcp_mtup_init(child);
+ tcp_init_metrics(child);
+ tcp_init_buffer_space(child);
+
+ /* Queue the data carried in the SYN packet. We need to first
+ * bump skb's refcnt because the caller will attempt to free it.
+ *
+ * XXX (TFO) - we honor a zero-payload TFO request for now,
+ * (any reason not to?) but no need to queue the skb since
+ * there is no data. How about SYN+FIN?
+ */
+ if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1) {
+ skb = skb_get(skb);
+ skb_dst_drop(skb);
+ __skb_pull(skb, tcp_hdr(skb)->doff * 4);
+ skb_set_owner_r(skb, child);
+ __skb_queue_tail(&child->sk_receive_queue, skb);
+ tp->syn_data_acked = 1;
+ }
+ tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+ sk->sk_data_ready(sk);
+ bh_unlock_sock(child);
+ sock_put(child);
+ WARN_ON(req->sk == NULL);
+ return true;
+}
+EXPORT_SYMBOL(tcp_fastopen_create_child);
+
+static bool tcp_fastopen_queue_check(struct sock *sk)
+{
+ struct fastopen_queue *fastopenq;
+
+ /* Make sure the listener has enabled fastopen, and we don't
+ * exceed the max # of pending TFO requests allowed before trying
+ * to validating the cookie in order to avoid burning CPU cycles
+ * unnecessarily.
+ *
+ * XXX (TFO) - The implication of checking the max_qlen before
+ * processing a cookie request is that clients can't differentiate
+ * between qlen overflow causing Fast Open to be disabled
+ * temporarily vs a server not supporting Fast Open at all.
+ */
+ fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
+ if (fastopenq == NULL || fastopenq->max_qlen == 0)
+ return false;
+
+ if (fastopenq->qlen >= fastopenq->max_qlen) {
+ struct request_sock *req1;
+ spin_lock(&fastopenq->lock);
+ req1 = fastopenq->rskq_rst_head;
+ if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
+ spin_unlock(&fastopenq->lock);
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
+ return false;
+ }
+ fastopenq->rskq_rst_head = req1->dl_next;
+ fastopenq->qlen--;
+ spin_unlock(&fastopenq->lock);
+ reqsk_free(req1);
+ }
+ return true;
+}
+
+/* Returns true if we should perform Fast Open on the SYN. The cookie (foc)
+ * may be updated and return the client in the SYN-ACK later. E.g., Fast Open
+ * cookie request (foc->len == 0).
+ */
+bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct tcp_fastopen_cookie *foc,
+ struct dst_entry *dst)
+{
+ struct tcp_fastopen_cookie valid_foc = { .len = -1 };
+ bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
+
+ if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+ (syn_data || foc->len >= 0) &&
+ tcp_fastopen_queue_check(sk))) {
+ foc->len = -1;
+ return false;
+ }
+
+ if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
+ goto fastopen;
+
+ if (tcp_fastopen_cookie_gen(req, skb, &valid_foc) &&
+ foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
+ foc->len == valid_foc.len &&
+ !memcmp(foc->val, valid_foc.val, foc->len)) {
+ /* Cookie is valid. Create a (full) child socket to accept
+ * the data in SYN before returning a SYN-ACK to ack the
+ * data. If we fail to create the socket, fall back and
+ * ack the ISN only but includes the same cookie.
+ *
+ * Note: Data-less SYN with valid cookie is allowed to send
+ * data in SYN_RECV state.
+ */
+fastopen:
+ if (tcp_fastopen_create_child(sk, skb, dst, req)) {
+ foc->len = -1;
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENPASSIVE);
+ return true;
+ }
+ }
+
+ NET_INC_STATS_BH(sock_net(sk), foc->len ?
+ LINUX_MIB_TCPFASTOPENPASSIVEFAIL :
+ LINUX_MIB_TCPFASTOPENCOOKIEREQD);
+ *foc = valid_foc;
+ return false;
}
+EXPORT_SYMBOL(tcp_try_fastopen);
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 8ed9305dfdf..1c4908280d9 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -109,12 +109,12 @@ static void hstcp_init(struct sock *sk)
tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
}
-static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
+static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct hstcp *ca = inet_csk_ca(sk);
- if (!tcp_is_cwnd_limited(sk, in_flight))
+ if (!tcp_is_cwnd_limited(sk))
return;
if (tp->snd_cwnd <= tp->snd_ssthresh)
@@ -162,7 +162,6 @@ static struct tcp_congestion_ops tcp_highspeed __read_mostly = {
.init = hstcp_init,
.ssthresh = hstcp_ssthresh,
.cong_avoid = hstcp_cong_avoid,
- .min_cwnd = tcp_reno_min_cwnd,
.owner = THIS_MODULE,
.name = "highspeed"
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 4a194acfd92..031361311a8 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -227,12 +227,12 @@ static u32 htcp_recalc_ssthresh(struct sock *sk)
return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
}
-static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
+static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
- if (!tcp_is_cwnd_limited(sk, in_flight))
+ if (!tcp_is_cwnd_limited(sk))
return;
if (tp->snd_cwnd <= tp->snd_ssthresh)
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 478fe82611b..d8f8f05a495 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -21,7 +21,7 @@ struct hybla {
u32 rho2; /* Rho * Rho, integer part */
u32 rho_3ls; /* Rho parameter, <<3 */
u32 rho2_7ls; /* Rho^2, <<7 */
- u32 minrtt; /* Minimum smoothed round trip time value seen */
+ u32 minrtt_us; /* Minimum smoothed round trip time value seen */
};
/* Hybla reference round trip time (default= 1/40 sec = 25 ms), in ms */
@@ -35,7 +35,9 @@ static inline void hybla_recalc_param (struct sock *sk)
{
struct hybla *ca = inet_csk_ca(sk);
- ca->rho_3ls = max_t(u32, tcp_sk(sk)->srtt / msecs_to_jiffies(rtt0), 8);
+ ca->rho_3ls = max_t(u32,
+ tcp_sk(sk)->srtt_us / (rtt0 * USEC_PER_MSEC),
+ 8U);
ca->rho = ca->rho_3ls >> 3;
ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1;
ca->rho2 = ca->rho2_7ls >> 7;
@@ -59,7 +61,7 @@ static void hybla_init(struct sock *sk)
hybla_recalc_param(sk);
/* set minimum rtt as this is the 1st ever seen */
- ca->minrtt = tp->srtt;
+ ca->minrtt_us = tp->srtt_us;
tp->snd_cwnd = ca->rho;
}
@@ -85,8 +87,7 @@ static inline u32 hybla_fraction(u32 odds)
* o Give cwnd a new value based on the model proposed
* o remember increments <1
*/
-static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked,
- u32 in_flight)
+static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct hybla *ca = inet_csk_ca(sk);
@@ -94,16 +95,16 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked,
int is_slowstart = 0;
/* Recalculate rho only if this srtt is the lowest */
- if (tp->srtt < ca->minrtt){
+ if (tp->srtt_us < ca->minrtt_us) {
hybla_recalc_param(sk);
- ca->minrtt = tp->srtt;
+ ca->minrtt_us = tp->srtt_us;
}
- if (!tcp_is_cwnd_limited(sk, in_flight))
+ if (!tcp_is_cwnd_limited(sk))
return;
if (!ca->hybla_en) {
- tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+ tcp_reno_cong_avoid(sk, ack, acked);
return;
}
@@ -166,7 +167,6 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked,
static struct tcp_congestion_ops tcp_hybla __read_mostly = {
.init = hybla_init,
.ssthresh = tcp_reno_ssthresh,
- .min_cwnd = tcp_reno_min_cwnd,
.cong_avoid = hybla_cong_avoid,
.set_state = hybla_state,
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index e498a62b8f9..5999b3972e6 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -255,8 +255,7 @@ static void tcp_illinois_state(struct sock *sk, u8 new_state)
/*
* Increase window in response to successful acknowledgment.
*/
-static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked,
- u32 in_flight)
+static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct illinois *ca = inet_csk_ca(sk);
@@ -265,7 +264,7 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked,
update_params(sk);
/* RFC2861 only increase cwnd if fully utilized */
- if (!tcp_is_cwnd_limited(sk, in_flight))
+ if (!tcp_is_cwnd_limited(sk))
return;
/* In slow start */
@@ -325,10 +324,8 @@ static void tcp_illinois_info(struct sock *sk, u32 ext,
}
static struct tcp_congestion_ops tcp_illinois __read_mostly = {
- .flags = TCP_CONG_RTT_STAMP,
.init = tcp_illinois_init,
.ssthresh = tcp_illinois_ssthresh,
- .min_cwnd = tcp_reno_min_cwnd,
.cong_avoid = tcp_illinois_cong_avoid,
.set_state = tcp_illinois_state,
.get_info = tcp_illinois_info,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eeaac399420..40639c288dc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -667,11 +667,11 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
* To save cycles in the RFC 1323 implementation it was better to break
* it up into three procedures. -- erics
*/
-static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
+static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
{
struct tcp_sock *tp = tcp_sk(sk);
- long m = mrtt; /* RTT */
- u32 srtt = tp->srtt;
+ long m = mrtt_us; /* RTT */
+ u32 srtt = tp->srtt_us;
/* The following amusing code comes from Jacobson's
* article in SIGCOMM '88. Note that rtt and mdev
@@ -694,7 +694,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
srtt += m; /* rtt = 7/8 rtt + 1/8 new */
if (m < 0) {
m = -m; /* m is now abs(error) */
- m -= (tp->mdev >> 2); /* similar update on mdev */
+ m -= (tp->mdev_us >> 2); /* similar update on mdev */
/* This is similar to one of Eifel findings.
* Eifel blocks mdev updates when rtt decreases.
* This solution is a bit different: we use finer gain
@@ -706,28 +706,29 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
if (m > 0)
m >>= 3;
} else {
- m -= (tp->mdev >> 2); /* similar update on mdev */
+ m -= (tp->mdev_us >> 2); /* similar update on mdev */
}
- tp->mdev += m; /* mdev = 3/4 mdev + 1/4 new */
- if (tp->mdev > tp->mdev_max) {
- tp->mdev_max = tp->mdev;
- if (tp->mdev_max > tp->rttvar)
- tp->rttvar = tp->mdev_max;
+ tp->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */
+ if (tp->mdev_us > tp->mdev_max_us) {
+ tp->mdev_max_us = tp->mdev_us;
+ if (tp->mdev_max_us > tp->rttvar_us)
+ tp->rttvar_us = tp->mdev_max_us;
}
if (after(tp->snd_una, tp->rtt_seq)) {
- if (tp->mdev_max < tp->rttvar)
- tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2;
+ if (tp->mdev_max_us < tp->rttvar_us)
+ tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
tp->rtt_seq = tp->snd_nxt;
- tp->mdev_max = tcp_rto_min(sk);
+ tp->mdev_max_us = tcp_rto_min_us(sk);
}
} else {
/* no previous measure. */
srtt = m << 3; /* take the measured time to be rtt */
- tp->mdev = m << 1; /* make sure rto = 3*rtt */
- tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
+ tp->mdev_us = m << 1; /* make sure rto = 3*rtt */
+ tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
+ tp->mdev_max_us = tp->rttvar_us;
tp->rtt_seq = tp->snd_nxt;
}
- tp->srtt = max(1U, srtt);
+ tp->srtt_us = max(1U, srtt);
}
/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
@@ -742,20 +743,12 @@ static void tcp_update_pacing_rate(struct sock *sk)
u64 rate;
/* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
- rate = (u64)tp->mss_cache * 2 * (HZ << 3);
+ rate = (u64)tp->mss_cache * 2 * (USEC_PER_SEC << 3);
rate *= max(tp->snd_cwnd, tp->packets_out);
- /* Correction for small srtt and scheduling constraints.
- * For small rtt, consider noise is too high, and use
- * the minimal value (srtt = 1 -> 125 us for HZ=1000)
- *
- * We probably need usec resolution in the future.
- * Note: This also takes care of possible srtt=0 case,
- * when tcp_rtt_estimator() was not yet called.
- */
- if (tp->srtt > 8 + 2)
- do_div(rate, tp->srtt);
+ if (likely(tp->srtt_us))
+ do_div(rate, tp->srtt_us);
/* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate
* without any lock. We want to make sure compiler wont store
@@ -1113,7 +1106,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
}
/* D-SACK for already forgotten data... Do dumb counting. */
- if (dup_sack && tp->undo_marker && tp->undo_retrans &&
+ if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 &&
!after(end_seq_0, prior_snd_una) &&
after(end_seq_0, tp->undo_marker))
tp->undo_retrans--;
@@ -1122,10 +1115,10 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
}
struct tcp_sacktag_state {
- int reord;
- int fack_count;
- int flag;
- s32 rtt; /* RTT measured by SACKing never-retransmitted data */
+ int reord;
+ int fack_count;
+ long rtt_us; /* RTT measured by SACKing never-retransmitted data */
+ int flag;
};
/* Check if skb is fully within the SACK block. In presence of GSO skbs,
@@ -1169,12 +1162,12 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
unsigned int new_len = (pkt_len / mss) * mss;
if (!in_sack && new_len < pkt_len) {
new_len += mss;
- if (new_len > skb->len)
+ if (new_len >= skb->len)
return 0;
}
pkt_len = new_len;
}
- err = tcp_fragment(sk, skb, pkt_len, mss);
+ err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
if (err < 0)
return err;
}
@@ -1186,14 +1179,15 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
static u8 tcp_sacktag_one(struct sock *sk,
struct tcp_sacktag_state *state, u8 sacked,
u32 start_seq, u32 end_seq,
- int dup_sack, int pcount, u32 xmit_time)
+ int dup_sack, int pcount,
+ const struct skb_mstamp *xmit_time)
{
struct tcp_sock *tp = tcp_sk(sk);
int fack_count = state->fack_count;
/* Account D-SACK for retransmitted packet. */
if (dup_sack && (sacked & TCPCB_RETRANS)) {
- if (tp->undo_marker && tp->undo_retrans &&
+ if (tp->undo_marker && tp->undo_retrans > 0 &&
after(end_seq, tp->undo_marker))
tp->undo_retrans--;
if (sacked & TCPCB_SACKED_ACKED)
@@ -1227,8 +1221,13 @@ static u8 tcp_sacktag_one(struct sock *sk,
if (!after(end_seq, tp->high_seq))
state->flag |= FLAG_ORIG_SACK_ACKED;
/* Pick the earliest sequence sacked for RTT */
- if (state->rtt < 0)
- state->rtt = tcp_time_stamp - xmit_time;
+ if (state->rtt_us < 0) {
+ struct skb_mstamp now;
+
+ skb_mstamp_get(&now);
+ state->rtt_us = skb_mstamp_us_delta(&now,
+ xmit_time);
+ }
}
if (sacked & TCPCB_LOST) {
@@ -1287,7 +1286,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
*/
tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
start_seq, end_seq, dup_sack, pcount,
- TCP_SKB_CB(skb)->when);
+ &skb->skb_mstamp);
if (skb == tp->lost_skb_hint)
tp->lost_cnt_hint += pcount;
@@ -1565,7 +1564,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
TCP_SKB_CB(skb)->end_seq,
dup_sack,
tcp_skb_pcount(skb),
- TCP_SKB_CB(skb)->when);
+ &skb->skb_mstamp);
if (!before(TCP_SKB_CB(skb)->seq,
tcp_highest_sack_seq(tp)))
@@ -1622,7 +1621,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl
static int
tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
- u32 prior_snd_una, s32 *sack_rtt)
+ u32 prior_snd_una, long *sack_rtt_us)
{
struct tcp_sock *tp = tcp_sk(sk);
const unsigned char *ptr = (skb_transport_header(ack_skb) +
@@ -1640,7 +1639,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
state.flag = 0;
state.reord = tp->packets_out;
- state.rtt = -1;
+ state.rtt_us = -1L;
if (!tp->sacked_out) {
if (WARN_ON(tp->fackets_out))
@@ -1824,7 +1823,7 @@ out:
WARN_ON((int)tp->retrans_out < 0);
WARN_ON((int)tcp_packets_in_flight(tp) < 0);
#endif
- *sack_rtt = state.rtt;
+ *sack_rtt_us = state.rtt_us;
return state.flag;
}
@@ -1894,7 +1893,7 @@ static void tcp_clear_retrans_partial(struct tcp_sock *tp)
tp->lost_out = 0;
tp->undo_marker = 0;
- tp->undo_retrans = 0;
+ tp->undo_retrans = -1;
}
void tcp_clear_retrans(struct tcp_sock *tp)
@@ -2035,10 +2034,12 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
* available, or RTO is scheduled to fire first.
*/
if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
- (flag & FLAG_ECE) || !tp->srtt)
+ (flag & FLAG_ECE) || !tp->srtt_us)
return false;
- delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2));
+ delay = max(usecs_to_jiffies(tp->srtt_us >> 5),
+ msecs_to_jiffies(2));
+
if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
return false;
@@ -2240,7 +2241,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
break;
mss = skb_shinfo(skb)->gso_size;
- err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
+ err = tcp_fragment(sk, skb, (packets - oldcnt) * mss,
+ mss, GFP_ATOMIC);
if (err < 0)
break;
cnt = packets;
@@ -2663,7 +2665,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
tp->prior_ssthresh = 0;
tp->undo_marker = tp->snd_una;
- tp->undo_retrans = tp->retrans_out;
+ tp->undo_retrans = tp->retrans_out ? : -1;
if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
if (!ece_ack)
@@ -2683,13 +2685,12 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
bool recovered = !before(tp->snd_una, tp->high_seq);
if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
- if (flag & FLAG_ORIG_SACK_ACKED) {
- /* Step 3.b. A timeout is spurious if not all data are
- * lost, i.e., never-retransmitted data are (s)acked.
- */
- tcp_try_undo_loss(sk, true);
+ /* Step 3.b. A timeout is spurious if not all data are
+ * lost, i.e., never-retransmitted data are (s)acked.
+ */
+ if (tcp_try_undo_loss(sk, flag & FLAG_ORIG_SACK_ACKED))
return;
- }
+
if (after(tp->snd_nxt, tp->high_seq) &&
(flag & FLAG_DATA_SACKED || is_dupack)) {
tp->frto = 0; /* Loss was real: 2nd part of step 3.a */
@@ -2885,7 +2886,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
}
static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
- s32 seq_rtt, s32 sack_rtt)
+ long seq_rtt_us, long sack_rtt_us)
{
const struct tcp_sock *tp = tcp_sk(sk);
@@ -2895,10 +2896,10 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
* is acked (RFC6298).
*/
if (flag & FLAG_RETRANS_DATA_ACKED)
- seq_rtt = -1;
+ seq_rtt_us = -1L;
- if (seq_rtt < 0)
- seq_rtt = sack_rtt;
+ if (seq_rtt_us < 0)
+ seq_rtt_us = sack_rtt_us;
/* RTTM Rule: A TSecr value received in a segment is used to
* update the averaged RTT measurement only if the segment
@@ -2906,14 +2907,14 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
* left edge of the send window.
* See draft-ietf-tcplw-high-performance-00, section 3.3.
*/
- if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
+ if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
flag & FLAG_ACKED)
- seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+ seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - tp->rx_opt.rcv_tsecr);
- if (seq_rtt < 0)
+ if (seq_rtt_us < 0)
return false;
- tcp_rtt_estimator(sk, seq_rtt);
+ tcp_rtt_estimator(sk, seq_rtt_us);
tcp_set_rto(sk);
/* RFC6298: only reset backoff on valid RTT measurement. */
@@ -2925,22 +2926,23 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp)
{
struct tcp_sock *tp = tcp_sk(sk);
- s32 seq_rtt = -1;
+ long seq_rtt_us = -1L;
if (synack_stamp && !tp->total_retrans)
- seq_rtt = tcp_time_stamp - synack_stamp;
+ seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - synack_stamp);
/* If the ACK acks both the SYNACK and the (Fast Open'd) data packets
* sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack()
*/
- if (!tp->srtt)
- tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1);
+ if (!tp->srtt_us)
+ tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L);
}
-static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
+static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- icsk->icsk_ca_ops->cong_avoid(sk, ack, acked, in_flight);
+
+ icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
}
@@ -3023,26 +3025,27 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
* arrived at the other end.
*/
static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
- u32 prior_snd_una, s32 sack_rtt)
+ u32 prior_snd_una, long sack_rtt_us)
{
- struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
- struct sk_buff *skb;
- u32 now = tcp_time_stamp;
+ struct skb_mstamp first_ackt, last_ackt, now;
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 prior_sacked = tp->sacked_out;
+ u32 reord = tp->packets_out;
bool fully_acked = true;
- int flag = 0;
+ long ca_seq_rtt_us = -1L;
+ long seq_rtt_us = -1L;
+ struct sk_buff *skb;
u32 pkts_acked = 0;
- u32 reord = tp->packets_out;
- u32 prior_sacked = tp->sacked_out;
- s32 seq_rtt = -1;
- s32 ca_seq_rtt = -1;
- ktime_t last_ackt = net_invalid_timestamp();
bool rtt_update;
+ int flag = 0;
+
+ first_ackt.v64 = 0;
while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
- u32 acked_pcount;
u8 sacked = scb->sacked;
+ u32 acked_pcount;
/* Determine how many packets and what bytes were acked, tso and else */
if (after(scb->end_seq, tp->snd_una)) {
@@ -3064,11 +3067,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tp->retrans_out -= acked_pcount;
flag |= FLAG_RETRANS_DATA_ACKED;
} else {
- ca_seq_rtt = now - scb->when;
- last_ackt = skb->tstamp;
- if (seq_rtt < 0) {
- seq_rtt = ca_seq_rtt;
- }
+ last_ackt = skb->skb_mstamp;
+ WARN_ON_ONCE(last_ackt.v64 == 0);
+ if (!first_ackt.v64)
+ first_ackt = last_ackt;
+
if (!(sacked & TCPCB_SACKED_ACKED))
reord = min(pkts_acked, reord);
if (!after(scb->end_seq, tp->high_seq))
@@ -3114,7 +3117,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
flag |= FLAG_SACK_RENEGING;
- rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt);
+ skb_mstamp_get(&now);
+ if (first_ackt.v64) {
+ seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
+ ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
+ }
+
+ rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
if (flag & FLAG_ACKED) {
const struct tcp_congestion_ops *ca_ops
@@ -3142,25 +3151,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tp->fackets_out -= min(pkts_acked, tp->fackets_out);
- if (ca_ops->pkts_acked) {
- s32 rtt_us = -1;
-
- /* Is the ACK triggering packet unambiguous? */
- if (!(flag & FLAG_RETRANS_DATA_ACKED)) {
- /* High resolution needed and available? */
- if (ca_ops->flags & TCP_CONG_RTT_STAMP &&
- !ktime_equal(last_ackt,
- net_invalid_timestamp()))
- rtt_us = ktime_us_delta(ktime_get_real(),
- last_ackt);
- else if (ca_seq_rtt >= 0)
- rtt_us = jiffies_to_usecs(ca_seq_rtt);
- }
+ if (ca_ops->pkts_acked)
+ ca_ops->pkts_acked(sk, pkts_acked, ca_seq_rtt_us);
- ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
- }
- } else if (skb && rtt_update && sack_rtt >= 0 &&
- sack_rtt > (s32)(now - TCP_SKB_CB(skb)->when)) {
+ } else if (skb && rtt_update && sack_rtt_us >= 0 &&
+ sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
/* Do not re-arm RTO if the sack RTT is measured from data sent
* after when the head was last (re)transmitted. Otherwise the
* timeout may continue to extend in loss recovery.
@@ -3370,12 +3365,11 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
bool is_dupack = false;
- u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt;
u32 prior_fackets;
int prior_packets = tp->packets_out;
const int prior_unsacked = tp->packets_out - tp->sacked_out;
int acked = 0; /* Number of packets newly acked */
- s32 sack_rtt = -1;
+ long sack_rtt_us = -1L;
/* If the ack is older than previous acks
* then we can probably ignore it.
@@ -3403,7 +3397,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
flag |= FLAG_SND_UNA_ADVANCED;
prior_fackets = tp->fackets_out;
- prior_in_flight = tcp_packets_in_flight(tp);
/* ts_recent update must be made after we are sure that the packet
* is in window.
@@ -3433,7 +3426,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (TCP_SKB_CB(skb)->sacked)
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
- &sack_rtt);
+ &sack_rtt_us);
if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
flag |= FLAG_ECE;
@@ -3452,12 +3445,13 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
/* See if we can take anything off of the retransmit queue. */
acked = tp->packets_out;
- flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, sack_rtt);
+ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
+ sack_rtt_us);
acked -= tp->packets_out;
/* Advance cwnd if state allows */
if (tcp_may_raise_cwnd(sk, flag))
- tcp_cong_avoid(sk, ack, acked, prior_in_flight);
+ tcp_cong_avoid(sk, ack, acked);
if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
@@ -3475,8 +3469,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (icsk->icsk_pending == ICSK_TIME_RETRANS)
tcp_schedule_loss_probe(sk);
- if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd)
- tcp_update_pacing_rate(sk);
+ tcp_update_pacing_rate(sk);
return 1;
no_queue:
@@ -3505,7 +3498,7 @@ old_ack:
*/
if (TCP_SKB_CB(skb)->sacked) {
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
- &sack_rtt);
+ &sack_rtt_us);
tcp_fastretrans_alert(sk, acked, prior_unsacked,
is_dupack, flag);
}
@@ -4419,7 +4412,7 @@ queue_and_out:
if (eaten > 0)
kfree_skb_partial(skb, fragstolen);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk);
return;
}
@@ -4709,28 +4702,6 @@ static int tcp_prune_queue(struct sock *sk)
return -1;
}
-/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
- * As additional protections, we do not touch cwnd in retransmission phases,
- * and if application hit its sndbuf limit recently.
- */
-void tcp_cwnd_application_limited(struct sock *sk)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
- sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
- /* Limited by application or receiver window. */
- u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
- u32 win_used = max(tp->snd_cwnd_used, init_win);
- if (win_used < tp->snd_cwnd) {
- tp->snd_ssthresh = tcp_current_ssthresh(sk);
- tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
- }
- tp->snd_cwnd_used = 0;
- }
- tp->snd_cwnd_stamp = tcp_time_stamp;
-}
-
static bool tcp_should_expand_sndbuf(const struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
@@ -4920,7 +4891,7 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t
BUG();
tp->urg_data = TCP_URG_VALID | tmp;
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk);
}
}
}
@@ -5006,11 +4977,11 @@ static bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
(tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) ||
(atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
tp->ucopy.wakeup = 1;
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk);
}
} else if (chunk > 0) {
tp->ucopy.wakeup = 1;
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk);
}
out:
return copied_early;
@@ -5281,7 +5252,7 @@ no_ack:
#endif
if (eaten)
kfree_skb_partial(skb, fragstolen);
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk);
return;
}
}
@@ -5401,9 +5372,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
break;
}
tcp_rearm_rto(sk);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL);
return true;
}
tp->syn_data_acked = tp->syn_data;
+ if (tp->syn_data_acked)
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
return false;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1e4eac779f5..77cccda1ad0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -336,8 +336,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
const int code = icmp_hdr(icmp_skb)->code;
struct sock *sk;
struct sk_buff *skb;
- struct request_sock *req;
- __u32 seq;
+ struct request_sock *fastopen;
+ __u32 seq, snd_una;
__u32 remaining;
int err;
struct net *net = dev_net(icmp_skb->dev);
@@ -378,12 +378,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
icsk = inet_csk(sk);
tp = tcp_sk(sk);
- req = tp->fastopen_rsk;
seq = ntohl(th->seq);
+ /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
+ fastopen = tp->fastopen_rsk;
+ snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
if (sk->sk_state != TCP_LISTEN &&
- !between(seq, tp->snd_una, tp->snd_nxt) &&
- (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
- /* For a Fast Open socket, allow seq to be snt_isn. */
+ !between(seq, snd_una, tp->snd_nxt)) {
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -426,16 +426,14 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
break;
if (seq != tp->snd_una || !icsk->icsk_retransmits ||
- !icsk->icsk_backoff)
+ !icsk->icsk_backoff || fastopen)
break;
- /* XXX (TFO) - revisit the following logic for TFO */
-
if (sock_owned_by_user(sk))
break;
icsk->icsk_backoff--;
- inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
+ inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) :
TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
tcp_bound_rto(sk);
@@ -462,14 +460,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
goto out;
}
- /* XXX (TFO) - if it's a TFO socket and has been accepted, rather
- * than following the TCP_SYN_RECV case and closing the socket,
- * we ignore the ICMP error and keep trying like a fully established
- * socket. Is this the right thing to do?
- */
- if (req && req->sk == NULL)
- goto out;
-
switch (sk->sk_state) {
struct request_sock *req, **prev;
case TCP_LISTEN:
@@ -502,10 +492,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
goto out;
case TCP_SYN_SENT:
- case TCP_SYN_RECV: /* Cannot happen.
- It can f.e. if SYNs crossed,
- or Fast Open.
- */
+ case TCP_SYN_RECV:
+ /* Only in fast or simultaneous open. If a fast open socket is
+ * is already accepted it is treated as a connected one below.
+ */
+ if (fastopen && fastopen->sk == NULL)
+ break;
+
if (!sock_owned_by_user(sk)) {
sk->sk_err = err;
@@ -822,7 +815,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
*/
static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
- u16 queue_mapping)
+ u16 queue_mapping,
+ struct tcp_fastopen_cookie *foc)
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct flowi4 fl4;
@@ -833,7 +827,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
return -1;
- skb = tcp_make_synack(sk, dst, req, NULL);
+ skb = tcp_make_synack(sk, dst, req, foc);
if (skb) {
__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
@@ -852,10 +846,12 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
{
- int res = tcp_v4_send_synack(sk, NULL, req, 0);
+ int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL);
- if (!res)
+ if (!res) {
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+ }
return res;
}
@@ -878,8 +874,6 @@ bool tcp_syn_flood_action(struct sock *sk,
bool want_cookie = false;
struct listen_sock *lopt;
-
-
#ifdef CONFIG_SYN_COOKIES
if (sysctl_tcp_syncookies) {
msg = "Sending cookies";
@@ -1260,187 +1254,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
};
#endif
-static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
- struct request_sock *req,
- struct tcp_fastopen_cookie *foc,
- struct tcp_fastopen_cookie *valid_foc)
-{
- bool skip_cookie = false;
- struct fastopen_queue *fastopenq;
-
- if (likely(!fastopen_cookie_present(foc))) {
- /* See include/net/tcp.h for the meaning of these knobs */
- if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
- ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
- (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
- skip_cookie = true; /* no cookie to validate */
- else
- return false;
- }
- fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
- /* A FO option is present; bump the counter. */
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
-
- /* Make sure the listener has enabled fastopen, and we don't
- * exceed the max # of pending TFO requests allowed before trying
- * to validating the cookie in order to avoid burning CPU cycles
- * unnecessarily.
- *
- * XXX (TFO) - The implication of checking the max_qlen before
- * processing a cookie request is that clients can't differentiate
- * between qlen overflow causing Fast Open to be disabled
- * temporarily vs a server not supporting Fast Open at all.
- */
- if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
- fastopenq == NULL || fastopenq->max_qlen == 0)
- return false;
-
- if (fastopenq->qlen >= fastopenq->max_qlen) {
- struct request_sock *req1;
- spin_lock(&fastopenq->lock);
- req1 = fastopenq->rskq_rst_head;
- if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
- spin_unlock(&fastopenq->lock);
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
- /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
- foc->len = -1;
- return false;
- }
- fastopenq->rskq_rst_head = req1->dl_next;
- fastopenq->qlen--;
- spin_unlock(&fastopenq->lock);
- reqsk_free(req1);
- }
- if (skip_cookie) {
- tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- return true;
- }
-
- if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
- if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
- tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr, valid_foc);
- if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
- memcmp(&foc->val[0], &valid_foc->val[0],
- TCP_FASTOPEN_COOKIE_SIZE) != 0)
- return false;
- valid_foc->len = -1;
- }
- /* Acknowledge the data received from the peer. */
- tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- return true;
- } else if (foc->len == 0) { /* Client requesting a cookie */
- tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr, valid_foc);
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPFASTOPENCOOKIEREQD);
- } else {
- /* Client sent a cookie with wrong size. Treat it
- * the same as invalid and return a valid one.
- */
- tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr, valid_foc);
- }
- return false;
-}
-
-static int tcp_v4_conn_req_fastopen(struct sock *sk,
- struct sk_buff *skb,
- struct sk_buff *skb_synack,
- struct request_sock *req)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
- const struct inet_request_sock *ireq = inet_rsk(req);
- struct sock *child;
- int err;
-
- req->num_retrans = 0;
- req->num_timeout = 0;
- req->sk = NULL;
-
- child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
- if (child == NULL) {
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
- kfree_skb(skb_synack);
- return -1;
- }
- err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
- ireq->ir_rmt_addr, ireq->opt);
- err = net_xmit_eval(err);
- if (!err)
- tcp_rsk(req)->snt_synack = tcp_time_stamp;
- /* XXX (TFO) - is it ok to ignore error and continue? */
-
- spin_lock(&queue->fastopenq->lock);
- queue->fastopenq->qlen++;
- spin_unlock(&queue->fastopenq->lock);
-
- /* Initialize the child socket. Have to fix some values to take
- * into account the child is a Fast Open socket and is created
- * only out of the bits carried in the SYN packet.
- */
- tp = tcp_sk(child);
-
- tp->fastopen_rsk = req;
- /* Do a hold on the listner sk so that if the listener is being
- * closed, the child that has been accepted can live on and still
- * access listen_lock.
- */
- sock_hold(sk);
- tcp_rsk(req)->listener = sk;
-
- /* RFC1323: The window in SYN & SYN/ACK segments is never
- * scaled. So correct it appropriately.
- */
- tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
-
- /* Activate the retrans timer so that SYNACK can be retransmitted.
- * The request socket is not added to the SYN table of the parent
- * because it's been added to the accept queue directly.
- */
- inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
- TCP_TIMEOUT_INIT, TCP_RTO_MAX);
-
- /* Add the child socket directly into the accept queue */
- inet_csk_reqsk_queue_add(sk, req, child);
-
- /* Now finish processing the fastopen child socket. */
- inet_csk(child)->icsk_af_ops->rebuild_header(child);
- tcp_init_congestion_control(child);
- tcp_mtup_init(child);
- tcp_init_metrics(child);
- tcp_init_buffer_space(child);
-
- /* Queue the data carried in the SYN packet. We need to first
- * bump skb's refcnt because the caller will attempt to free it.
- *
- * XXX (TFO) - we honor a zero-payload TFO request for now.
- * (Any reason not to?)
- */
- if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
- /* Don't queue the skb if there is no payload in SYN.
- * XXX (TFO) - How about SYN+FIN?
- */
- tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- } else {
- skb = skb_get(skb);
- skb_dst_drop(skb);
- __skb_pull(skb, tcp_hdr(skb)->doff * 4);
- skb_set_owner_r(skb, child);
- __skb_queue_tail(&child->sk_receive_queue, skb);
- tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- tp->syn_data_acked = 1;
- }
- sk->sk_data_ready(sk, 0);
- bh_unlock_sock(child);
- sock_put(child);
- WARN_ON(req->sk == NULL);
- return 0;
-}
-
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
struct tcp_options_received tmp_opt;
@@ -1451,12 +1264,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
__be32 saddr = ip_hdr(skb)->saddr;
__be32 daddr = ip_hdr(skb)->daddr;
__u32 isn = TCP_SKB_CB(skb)->when;
- bool want_cookie = false;
+ bool want_cookie = false, fastopen;
struct flowi4 fl4;
struct tcp_fastopen_cookie foc = { .len = -1 };
- struct tcp_fastopen_cookie valid_foc = { .len = -1 };
- struct sk_buff *skb_synack;
- int do_fastopen;
+ int err;
/* Never answer to SYNs send to broadcast or multicast */
if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
@@ -1507,6 +1318,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
ireq->ir_rmt_addr = saddr;
ireq->no_srccheck = inet_sk(sk)->transparent;
ireq->opt = tcp_v4_save_options(skb);
+ ireq->ir_mark = inet_request_mark(sk, skb);
if (security_inet_conn_request(sk, skb, req))
goto drop_and_free;
@@ -1555,52 +1367,24 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
isn = tcp_v4_init_sequence(skb);
}
- tcp_rsk(req)->snt_isn = isn;
-
- if (dst == NULL) {
- dst = inet_csk_route_req(sk, &fl4, req);
- if (dst == NULL)
- goto drop_and_free;
- }
- do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
-
- /* We don't call tcp_v4_send_synack() directly because we need
- * to make sure a child socket can be created successfully before
- * sending back synack!
- *
- * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
- * (or better yet, call tcp_send_synack() in the child context
- * directly, but will have to fix bunch of other code first)
- * after syn_recv_sock() except one will need to first fix the
- * latter to remove its dependency on the current implementation
- * of tcp_v4_send_synack()->tcp_select_initial_window().
- */
- skb_synack = tcp_make_synack(sk, dst, req,
- fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
-
- if (skb_synack) {
- __tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr);
- skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
- } else
+ if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
goto drop_and_free;
- if (likely(!do_fastopen)) {
- int err;
- err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
- ireq->ir_rmt_addr, ireq->opt);
- err = net_xmit_eval(err);
+ tcp_rsk(req)->snt_isn = isn;
+ tcp_rsk(req)->snt_synack = tcp_time_stamp;
+ tcp_openreq_init_rwin(req, sk, dst);
+ fastopen = !want_cookie &&
+ tcp_try_fastopen(sk, skb, req, &foc, dst);
+ err = tcp_v4_send_synack(sk, dst, req,
+ skb_get_queue_mapping(skb), &foc);
+ if (!fastopen) {
if (err || want_cookie)
goto drop_and_free;
tcp_rsk(req)->snt_synack = tcp_time_stamp;
tcp_rsk(req)->listener = NULL;
- /* Add the request_sock to the SYN table */
inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
- if (fastopen_cookie_present(&foc) && foc.len != 0)
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
- } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req))
- goto drop_and_free;
+ }
return 0;
@@ -1744,28 +1528,6 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
return sk;
}
-static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
-{
- const struct iphdr *iph = ip_hdr(skb);
-
- if (skb->ip_summed == CHECKSUM_COMPLETE) {
- if (!tcp_v4_check(skb->len, iph->saddr,
- iph->daddr, skb->csum)) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- return 0;
- }
- }
-
- skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
- skb->len, IPPROTO_TCP, 0);
-
- if (skb->len <= 76) {
- return __skb_checksum_complete(skb);
- }
- return 0;
-}
-
-
/* The socket must have it's spinlock held when we get
* here.
*
@@ -1960,7 +1722,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
* Packet length and doff are validated by header prediction,
* provided case of th->doff==0 is eliminated.
* So, we defer the checks. */
- if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
+
+ if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
goto csum_error;
th = tcp_hdr(skb);
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 991d62a2f9b..1e70fa8fa79 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -115,13 +115,12 @@ static void tcp_lp_init(struct sock *sk)
* Will only call newReno CA when away from inference.
* From TCP-LP's paper, this will be handled in additive increasement.
*/
-static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked,
- u32 in_flight)
+static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct lp *lp = inet_csk_ca(sk);
if (!(lp->flag & LP_WITHIN_INF))
- tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+ tcp_reno_cong_avoid(sk, ack, acked);
}
/**
@@ -315,11 +314,9 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us)
}
static struct tcp_congestion_ops tcp_lp __read_mostly = {
- .flags = TCP_CONG_RTT_STAMP,
.init = tcp_lp_init,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_lp_cong_avoid,
- .min_cwnd = tcp_reno_min_cwnd,
.pkts_acked = tcp_lp_pkts_acked,
.owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index f7e522c558b..f7a2ec3ac58 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -102,17 +102,19 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
return 0;
}
-static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
- const char *buffer)
+static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
{
- struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
unsigned long long val;
int ret = 0;
- switch (cft->private) {
+ buf = strstrip(buf);
+
+ switch (of_cft(of)->private) {
case RES_LIMIT:
/* see memcontrol.c */
- ret = res_counter_memparse_write_strategy(buffer, &val);
+ ret = res_counter_memparse_write_strategy(buf, &val);
if (ret)
break;
ret = tcp_update_limit(memcg, val);
@@ -121,7 +123,7 @@ static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
ret = -EINVAL;
break;
}
- return ret;
+ return ret ?: nbytes;
}
static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val)
@@ -168,17 +170,18 @@ static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
return val;
}
-static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
+static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg;
struct cg_proto *cg_proto;
- memcg = mem_cgroup_from_css(css);
+ memcg = mem_cgroup_from_css(of_css(of));
cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
- return 0;
+ return nbytes;
- switch (event) {
+ switch (of_cft(of)->private) {
case RES_MAX_USAGE:
res_counter_reset_max(&cg_proto->memory_allocated);
break;
@@ -187,13 +190,13 @@ static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
break;
}
- return 0;
+ return nbytes;
}
static struct cftype tcp_files[] = {
{
.name = "kmem.tcp.limit_in_bytes",
- .write_string = tcp_cgroup_write,
+ .write = tcp_cgroup_write,
.read_u64 = tcp_cgroup_read,
.private = RES_LIMIT,
},
@@ -205,13 +208,13 @@ static struct cftype tcp_files[] = {
{
.name = "kmem.tcp.failcnt",
.private = RES_FAILCNT,
- .trigger = tcp_cgroup_reset,
+ .write = tcp_cgroup_reset,
.read_u64 = tcp_cgroup_read,
},
{
.name = "kmem.tcp.max_usage_in_bytes",
.private = RES_MAX_USAGE,
- .trigger = tcp_cgroup_reset,
+ .write = tcp_cgroup_reset,
.read_u64 = tcp_cgroup_read,
},
{ } /* terminate */
@@ -219,7 +222,7 @@ static struct cftype tcp_files[] = {
static int __init tcp_memcontrol_init(void)
{
- WARN_ON(cgroup_add_cftypes(&mem_cgroup_subsys, tcp_files));
+ WARN_ON(cgroup_add_cftypes(&memory_cgrp_subsys, tcp_files));
return 0;
}
__initcall(tcp_memcontrol_init);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index d547075d830..4fe04180598 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -33,6 +33,11 @@ struct tcp_fastopen_metrics {
struct tcp_fastopen_cookie cookie;
};
+/* TCP_METRIC_MAX includes 2 extra fields for userspace compatibility
+ * Kernel only stores RTT and RTTVAR in usec resolution
+ */
+#define TCP_METRIC_MAX_KERNEL (TCP_METRIC_MAX - 2)
+
struct tcp_metrics_block {
struct tcp_metrics_block __rcu *tcpm_next;
struct inetpeer_addr tcpm_saddr;
@@ -41,7 +46,7 @@ struct tcp_metrics_block {
u32 tcpm_ts;
u32 tcpm_ts_stamp;
u32 tcpm_lock;
- u32 tcpm_vals[TCP_METRIC_MAX + 1];
+ u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1];
struct tcp_fastopen_metrics tcpm_fastopen;
struct rcu_head rcu_head;
@@ -59,12 +64,6 @@ static u32 tcp_metric_get(struct tcp_metrics_block *tm,
return tm->tcpm_vals[idx];
}
-static u32 tcp_metric_get_jiffies(struct tcp_metrics_block *tm,
- enum tcp_metric_index idx)
-{
- return msecs_to_jiffies(tm->tcpm_vals[idx]);
-}
-
static void tcp_metric_set(struct tcp_metrics_block *tm,
enum tcp_metric_index idx,
u32 val)
@@ -72,13 +71,6 @@ static void tcp_metric_set(struct tcp_metrics_block *tm,
tm->tcpm_vals[idx] = val;
}
-static void tcp_metric_set_msecs(struct tcp_metrics_block *tm,
- enum tcp_metric_index idx,
- u32 val)
-{
- tm->tcpm_vals[idx] = jiffies_to_msecs(val);
-}
-
static bool addr_same(const struct inetpeer_addr *a,
const struct inetpeer_addr *b)
{
@@ -101,9 +93,11 @@ struct tcpm_hash_bucket {
static DEFINE_SPINLOCK(tcp_metrics_lock);
-static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst,
+static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+ const struct dst_entry *dst,
bool fastopen_clear)
{
+ u32 msval;
u32 val;
tm->tcpm_stamp = jiffies;
@@ -121,8 +115,11 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst,
val |= 1 << TCP_METRIC_REORDERING;
tm->tcpm_lock = val;
- tm->tcpm_vals[TCP_METRIC_RTT] = dst_metric_raw(dst, RTAX_RTT);
- tm->tcpm_vals[TCP_METRIC_RTTVAR] = dst_metric_raw(dst, RTAX_RTTVAR);
+ msval = dst_metric_raw(dst, RTAX_RTT);
+ tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC;
+
+ msval = dst_metric_raw(dst, RTAX_RTTVAR);
+ tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC;
tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
@@ -384,7 +381,7 @@ void tcp_update_metrics(struct sock *sk)
dst_confirm(dst);
rcu_read_lock();
- if (icsk->icsk_backoff || !tp->srtt) {
+ if (icsk->icsk_backoff || !tp->srtt_us) {
/* This session failed to estimate rtt. Why?
* Probably, no packets returned in time. Reset our
* results.
@@ -399,8 +396,8 @@ void tcp_update_metrics(struct sock *sk)
if (!tm)
goto out_unlock;
- rtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT);
- m = rtt - tp->srtt;
+ rtt = tcp_metric_get(tm, TCP_METRIC_RTT);
+ m = rtt - tp->srtt_us;
/* If newly calculated rtt larger than stored one, store new
* one. Otherwise, use EWMA. Remember, rtt overestimation is
@@ -408,10 +405,10 @@ void tcp_update_metrics(struct sock *sk)
*/
if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) {
if (m <= 0)
- rtt = tp->srtt;
+ rtt = tp->srtt_us;
else
rtt -= (m >> 3);
- tcp_metric_set_msecs(tm, TCP_METRIC_RTT, rtt);
+ tcp_metric_set(tm, TCP_METRIC_RTT, rtt);
}
if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) {
@@ -422,16 +419,16 @@ void tcp_update_metrics(struct sock *sk)
/* Scale deviation to rttvar fixed point */
m >>= 1;
- if (m < tp->mdev)
- m = tp->mdev;
+ if (m < tp->mdev_us)
+ m = tp->mdev_us;
- var = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR);
+ var = tcp_metric_get(tm, TCP_METRIC_RTTVAR);
if (m >= var)
var = m;
else
var -= (var - m) >> 2;
- tcp_metric_set_msecs(tm, TCP_METRIC_RTTVAR, var);
+ tcp_metric_set(tm, TCP_METRIC_RTTVAR, var);
}
if (tcp_in_initial_slowstart(tp)) {
@@ -528,7 +525,7 @@ void tcp_init_metrics(struct sock *sk)
tp->reordering = val;
}
- crtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT);
+ crtt = tcp_metric_get(tm, TCP_METRIC_RTT);
rcu_read_unlock();
reset:
/* The initial RTT measurement from the SYN/SYN-ACK is not ideal
@@ -551,18 +548,20 @@ reset:
* to low value, and then abruptly stops to do it and starts to delay
* ACKs, wait for troubles.
*/
- if (crtt > tp->srtt) {
+ if (crtt > tp->srtt_us) {
/* Set RTO like tcp_rtt_estimator(), but from cached RTT. */
- crtt >>= 3;
+ crtt /= 8 * USEC_PER_MSEC;
inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk));
- } else if (tp->srtt == 0) {
+ } else if (tp->srtt_us == 0) {
/* RFC6298: 5.7 We've failed to get a valid RTT sample from
* 3WHS. This is most likely due to retransmission,
* including spurious one. Reset the RTO back to 3secs
* from the more aggressive 1sec to avoid more spurious
* retransmission.
*/
- tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK;
+ tp->rttvar_us = jiffies_to_usecs(TCP_TIMEOUT_FALLBACK);
+ tp->mdev_us = tp->mdev_max_us = tp->rttvar_us;
+
inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
}
/* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
@@ -809,10 +808,26 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS);
if (!nest)
goto nla_put_failure;
- for (i = 0; i < TCP_METRIC_MAX + 1; i++) {
- if (!tm->tcpm_vals[i])
+ for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) {
+ u32 val = tm->tcpm_vals[i];
+
+ if (!val)
continue;
- if (nla_put_u32(msg, i + 1, tm->tcpm_vals[i]) < 0)
+ if (i == TCP_METRIC_RTT) {
+ if (nla_put_u32(msg, TCP_METRIC_RTT_US + 1,
+ val) < 0)
+ goto nla_put_failure;
+ n++;
+ val = max(val / 1000, 1U);
+ }
+ if (i == TCP_METRIC_RTTVAR) {
+ if (nla_put_u32(msg, TCP_METRIC_RTTVAR_US + 1,
+ val) < 0)
+ goto nla_put_failure;
+ n++;
+ val = max(val / 1000, 1U);
+ }
+ if (nla_put_u32(msg, i + 1, val) < 0)
goto nla_put_failure;
n++;
}
@@ -1144,10 +1159,7 @@ static void __net_exit tcp_net_metrics_exit(struct net *net)
tm = next;
}
}
- if (is_vmalloc_addr(net->ipv4.tcp_metrics_hash))
- vfree(net->ipv4.tcp_metrics_hash);
- else
- kfree(net->ipv4.tcp_metrics_hash);
+ kvfree(net->ipv4.tcp_metrics_hash);
}
static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 7a436c517e4..e68e0d4af6c 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -362,6 +362,37 @@ void tcp_twsk_destructor(struct sock *sk)
}
EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
+void tcp_openreq_init_rwin(struct request_sock *req,
+ struct sock *sk, struct dst_entry *dst)
+{
+ struct inet_request_sock *ireq = inet_rsk(req);
+ struct tcp_sock *tp = tcp_sk(sk);
+ __u8 rcv_wscale;
+ int mss = dst_metric_advmss(dst);
+
+ if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
+ mss = tp->rx_opt.user_mss;
+
+ /* Set this up on the first call only */
+ req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
+
+ /* limit the window selection if the user enforce a smaller rx buffer */
+ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
+ (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
+ req->window_clamp = tcp_full_space(sk);
+
+ /* tcp_full_space because it is guaranteed to be the first packet */
+ tcp_select_initial_window(tcp_full_space(sk),
+ mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
+ &req->rcv_wnd,
+ &req->window_clamp,
+ ireq->wscale_ok,
+ &rcv_wscale,
+ dst_metric(dst, RTAX_INITRWND));
+ ireq->rcv_wscale = rcv_wscale;
+}
+EXPORT_SYMBOL(tcp_openreq_init_rwin);
+
static inline void TCP_ECN_openreq_child(struct tcp_sock *tp,
struct request_sock *req)
{
@@ -398,8 +429,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
tcp_init_wl(newtp, treq->rcv_isn);
- newtp->srtt = 0;
- newtp->mdev = TCP_TIMEOUT_INIT;
+ newtp->srtt_us = 0;
+ newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
newicsk->icsk_rto = TCP_TIMEOUT_INIT;
newtp->packets_out = 0;
@@ -745,7 +776,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
skb->len);
/* Wakeup parent, send SIGIO */
if (state == TCP_SYN_RECV && child->sk_state != state)
- parent->sk_data_ready(parent, 0);
+ parent->sk_data_ready(parent);
} else {
/* Alas, it is possible again, because we do lookup
* in main socket hash table and lock on listening
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index b92b81718ca..55046ecd083 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -57,10 +57,12 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
SKB_GSO_TCP_ECN |
SKB_GSO_TCPV6 |
SKB_GSO_GRE |
+ SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP |
SKB_GSO_SIT |
SKB_GSO_MPLS |
SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM |
0) ||
!(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
goto out;
@@ -97,9 +99,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
th->check = newcheck;
if (skb->ip_summed != CHECKSUM_PARTIAL)
- th->check =
- csum_fold(csum_partial(skb_transport_header(skb),
- thlen, skb->csum));
+ th->check = gso_make_checksum(skb, ~th->check);
seq += mss;
if (copy_destructor) {
@@ -133,8 +133,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
(__force u32)delta));
if (skb->ip_summed != CHECKSUM_PARTIAL)
- th->check = csum_fold(csum_partial(skb_transport_header(skb),
- thlen, skb->csum));
+ th->check = gso_make_checksum(skb, ~th->check);
out:
return segs;
}
@@ -310,7 +309,7 @@ static int tcp4_gro_complete(struct sk_buff *skb, int thoff)
th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
iph->daddr, 0);
- skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
return tcp_gro_complete(skb);
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 17a11e65e57..179b51e6bda 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -86,6 +86,9 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
tcp_rearm_rto(sk);
}
+
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
+ tcp_skb_pcount(skb));
}
/* SND.NXT, if window was not shrunk.
@@ -269,6 +272,7 @@ EXPORT_SYMBOL(tcp_select_initial_window);
static u16 tcp_select_window(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
+ u32 old_win = tp->rcv_wnd;
u32 cur_win = tcp_receive_window(tp);
u32 new_win = __tcp_select_window(sk);
@@ -281,6 +285,9 @@ static u16 tcp_select_window(struct sock *sk)
*
* Relax Will Robinson.
*/
+ if (new_win == 0)
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPWANTZEROWINDOWADV);
new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
}
tp->rcv_wnd = new_win;
@@ -298,8 +305,14 @@ static u16 tcp_select_window(struct sock *sk)
new_win >>= tp->rx_opt.rcv_wscale;
/* If we advertise zero window, disable fast path. */
- if (new_win == 0)
+ if (new_win == 0) {
tp->pred_flags = 0;
+ if (old_win)
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPTOZEROWINDOWADV);
+ } else if (old_win == 0) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV);
+ }
return new_win;
}
@@ -614,7 +627,7 @@ static unsigned int tcp_synack_options(struct sock *sk,
if (unlikely(!ireq->tstamp_ok))
remaining -= TCPOLEN_SACKPERM_ALIGNED;
}
- if (foc != NULL) {
+ if (foc != NULL && foc->len >= 0) {
u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
need = (need + 3) & ~3U; /* Align to 32 bits */
if (remaining >= need) {
@@ -865,18 +878,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
BUG_ON(!skb || !tcp_skb_pcount(skb));
if (clone_it) {
- const struct sk_buff *fclone = skb + 1;
-
- /* If congestion control is doing timestamping, we must
- * take such a timestamp before we potentially clone/copy.
- */
- if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
- __net_timestamp(skb);
-
- if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
- fclone->fclone == SKB_FCLONE_CLONE))
- NET_INC_STATS(sock_net(sk),
- LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
+ skb_mstamp_get(&skb->skb_mstamp);
if (unlikely(skb_cloned(skb)))
skb = pskb_copy(skb, gfp_mask);
@@ -884,6 +886,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
skb = skb_clone(skb, gfp_mask);
if (unlikely(!skb))
return -ENOBUFS;
+ /* Our usage of tstamp should remain private */
+ skb->tstamp.tv64 = 0;
}
inet = inet_sk(sk);
@@ -970,7 +974,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
tcp_skb_pcount(skb));
- err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
+ err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
if (likely(err <= 0))
return err;
@@ -1070,7 +1074,7 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de
* Remember, these are still headerless SKBs at this point.
*/
int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
- unsigned int mss_now)
+ unsigned int mss_now, gfp_t gfp)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff;
@@ -1085,11 +1089,11 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
if (nsize < 0)
nsize = 0;
- if (skb_unclone(skb, GFP_ATOMIC))
+ if (skb_unclone(skb, gfp))
return -ENOMEM;
/* Get a new skb... force flag on. */
- buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
+ buff = sk_stream_alloc_skb(sk, nsize, gfp);
if (buff == NULL)
return -ENOMEM; /* We'll just try again later. */
@@ -1376,12 +1380,43 @@ unsigned int tcp_current_mss(struct sock *sk)
return mss_now;
}
-/* Congestion window validation. (RFC2861) */
-static void tcp_cwnd_validate(struct sock *sk)
+/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
+ * As additional protections, we do not touch cwnd in retransmission phases,
+ * and if application hit its sndbuf limit recently.
+ */
+static void tcp_cwnd_application_limited(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
+ sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
+ /* Limited by application or receiver window. */
+ u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
+ u32 win_used = max(tp->snd_cwnd_used, init_win);
+ if (win_used < tp->snd_cwnd) {
+ tp->snd_ssthresh = tcp_current_ssthresh(sk);
+ tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
+ }
+ tp->snd_cwnd_used = 0;
+ }
+ tp->snd_cwnd_stamp = tcp_time_stamp;
+}
+
+static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tp->packets_out >= tp->snd_cwnd) {
+ /* Track the maximum number of outstanding packets in each
+ * window, and remember whether we were cwnd-limited then.
+ */
+ if (!before(tp->snd_una, tp->max_packets_seq) ||
+ tp->packets_out > tp->max_packets_out) {
+ tp->max_packets_out = tp->packets_out;
+ tp->max_packets_seq = tp->snd_nxt;
+ tp->is_cwnd_limited = is_cwnd_limited;
+ }
+
+ if (tcp_is_cwnd_limited(sk)) {
/* Network is feed fully. */
tp->snd_cwnd_used = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
@@ -1426,7 +1461,7 @@ static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
* With Minshall's modification: all sent small packets are ACKed.
*/
static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
- unsigned int mss_now, int nonagle)
+ int nonagle)
{
return partial &&
((nonagle & TCP_NAGLE_CORK) ||
@@ -1458,7 +1493,7 @@ static unsigned int tcp_mss_split_point(const struct sock *sk,
* to include this last segment in this skb.
* Otherwise, we'll split the skb at last MSS boundary
*/
- if (tcp_nagle_check(partial != 0, tp, mss_now, nonagle))
+ if (tcp_nagle_check(partial != 0, tp, nonagle))
return needed - partial;
return needed;
@@ -1521,7 +1556,7 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf
if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
return true;
- if (!tcp_nagle_check(skb->len < cur_mss, tp, cur_mss, nonagle))
+ if (!tcp_nagle_check(skb->len < cur_mss, tp, nonagle))
return true;
return false;
@@ -1590,7 +1625,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
/* All of a TSO frame must be composed of paged data. */
if (skb->len != skb->data_len)
- return tcp_fragment(sk, skb, len, mss_now);
+ return tcp_fragment(sk, skb, len, mss_now, gfp);
buff = sk_stream_alloc_skb(sk, 0, gfp);
if (unlikely(buff == NULL))
@@ -1633,7 +1668,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
*
* This algorithm is from John Heffner.
*/
-static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
+static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
+ bool *is_cwnd_limited)
{
struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1697,6 +1733,9 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
if (!tp->tso_deferred)
tp->tso_deferred = 1 | (jiffies << 1);
+ if (cong_win < send_win && cong_win < skb->len)
+ *is_cwnd_limited = true;
+
return true;
send_now:
@@ -1857,6 +1896,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
unsigned int tso_segs, sent_pkts;
int cwnd_quota;
int result;
+ bool is_cwnd_limited = false;
sent_pkts = 0;
@@ -1881,6 +1921,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
cwnd_quota = tcp_cwnd_test(tp, skb);
if (!cwnd_quota) {
+ is_cwnd_limited = true;
if (push_one == 2)
/* Force out a loss probe pkt. */
cwnd_quota = 1;
@@ -1897,7 +1938,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
nonagle : TCP_NAGLE_PUSH))))
break;
} else {
- if (!push_one && tcp_tso_should_defer(sk, skb))
+ if (!push_one &&
+ tcp_tso_should_defer(sk, skb, &is_cwnd_limited))
break;
}
@@ -1919,10 +1961,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
/* It is possible TX completion already happened
* before we set TSQ_THROTTLED, so we must
* test again the condition.
- * We abuse smp_mb__after_clear_bit() because
- * there is no smp_mb__after_set_bit() yet
*/
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
if (atomic_read(&sk->sk_wmem_alloc) > limit)
break;
}
@@ -1964,7 +2004,7 @@ repair:
/* Send one loss probe per tail loss episode. */
if (push_one != 2)
tcp_schedule_loss_probe(sk);
- tcp_cwnd_validate(sk);
+ tcp_cwnd_validate(sk, is_cwnd_limited);
return false;
}
return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
@@ -1975,7 +2015,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
u32 timeout, tlp_time_stamp, rto_time_stamp;
- u32 rtt = tp->srtt >> 3;
+ u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
return false;
@@ -1997,7 +2037,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
/* Schedule a loss probe in 2*RTT for SACK capable connections
* in Open state, that are either limited by cwnd or application.
*/
- if (sysctl_tcp_early_retrans < 3 || !tp->srtt || !tp->packets_out ||
+ if (sysctl_tcp_early_retrans < 3 || !tp->srtt_us || !tp->packets_out ||
!tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
return false;
@@ -2028,6 +2068,25 @@ bool tcp_schedule_loss_probe(struct sock *sk)
return true;
}
+/* Thanks to skb fast clones, we can detect if a prior transmit of
+ * a packet is still in a qdisc or driver queue.
+ * In this case, there is very little point doing a retransmit !
+ * Note: This is called from BH context only.
+ */
+static bool skb_still_in_host_queue(const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ const struct sk_buff *fclone = skb + 1;
+
+ if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
+ fclone->fclone == SKB_FCLONE_CLONE)) {
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
+ return true;
+ }
+ return false;
+}
+
/* When probe timeout (PTO) fires, send a new segment if one exists, else
* retransmit the last segment.
*/
@@ -2053,12 +2112,16 @@ void tcp_send_loss_probe(struct sock *sk)
if (WARN_ON(!skb))
goto rearm_timer;
+ if (skb_still_in_host_queue(sk, skb))
+ goto rearm_timer;
+
pcount = tcp_skb_pcount(skb);
if (WARN_ON(!pcount))
goto rearm_timer;
if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
- if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss)))
+ if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss,
+ GFP_ATOMIC)))
goto rearm_timer;
skb = tcp_write_queue_tail(sk);
}
@@ -2066,9 +2129,7 @@ void tcp_send_loss_probe(struct sock *sk)
if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
goto rearm_timer;
- /* Probe with zero data doesn't trigger fast recovery. */
- if (skb->len > 0)
- err = __tcp_retransmit_skb(sk, skb);
+ err = __tcp_retransmit_skb(sk, skb);
/* Record snd_nxt for loss detection. */
if (likely(!err))
@@ -2082,7 +2143,6 @@ rearm_timer:
if (likely(!err))
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPLOSSPROBES);
- return;
}
/* Push out any pending frames which were held back due to
@@ -2180,7 +2240,8 @@ u32 __tcp_select_window(struct sock *sk)
*/
int mss = icsk->icsk_ack.rcv_mss;
int free_space = tcp_space(sk);
- int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk));
+ int allowed_space = tcp_full_space(sk);
+ int full_space = min_t(int, tp->window_clamp, allowed_space);
int window;
if (mss > full_space)
@@ -2193,7 +2254,19 @@ u32 __tcp_select_window(struct sock *sk)
tp->rcv_ssthresh = min(tp->rcv_ssthresh,
4U * tp->advmss);
- if (free_space < mss)
+ /* free_space might become our new window, make sure we don't
+ * increase it due to wscale.
+ */
+ free_space = round_down(free_space, 1 << tp->rx_opt.rcv_wscale);
+
+ /* if free space is less than mss estimate, or is below 1/16th
+ * of the maximum allowed, try to move to zero-window, else
+ * tcp_clamp_window() will grow rcv buf up to tcp_rmem[2], and
+ * new incoming data is dropped due to memory limits.
+ * With large window, mss test triggers way too late in order
+ * to announce zero window in time before rmem limit kicks in.
+ */
+ if (free_space < (allowed_space >> 4) || free_space < mss)
return 0;
}
@@ -2362,6 +2435,9 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
return -EAGAIN;
+ if (skb_still_in_host_queue(sk, skb))
+ return -EBUSY;
+
if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
BUG();
@@ -2384,7 +2460,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
return -EAGAIN;
if (skb->len > cur_mss) {
- if (tcp_fragment(sk, skb, cur_mss, cur_mss))
+ if (tcp_fragment(sk, skb, cur_mss, cur_mss, GFP_ATOMIC))
return -ENOMEM; /* We'll try again later. */
} else {
int oldpcount = tcp_skb_pcount(skb);
@@ -2418,8 +2494,14 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
- if (likely(!err))
+ if (likely(!err)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
+ /* Update global TCP statistics. */
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+ tp->total_retrans++;
+ }
return err;
}
@@ -2429,11 +2511,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
int err = __tcp_retransmit_skb(sk, skb);
if (err == 0) {
- /* Update global TCP statistics. */
- TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
-
- tp->total_retrans++;
-
#if FASTRETRANS_DEBUG > 0
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
net_dbg_ratelimited("retrans_out leaked\n");
@@ -2448,15 +2525,17 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
if (!tp->retrans_stamp)
tp->retrans_stamp = TCP_SKB_CB(skb)->when;
- tp->undo_retrans += tcp_skb_pcount(skb);
-
/* snd_nxt is stored to detect loss of retransmitted segment,
* see tcp_input.c tcp_sacktag_write_queue().
*/
TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
- } else {
+ } else if (err != -EBUSY) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
}
+
+ if (tp->undo_retrans < 0)
+ tp->undo_retrans = 0;
+ tp->undo_retrans += tcp_skb_pcount(skb);
return err;
}
@@ -2717,7 +2796,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
int tcp_header_size;
int mss;
- skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
+ skb = sock_wmalloc(sk, MAX_TCP_HEADER, 1, GFP_ATOMIC);
if (unlikely(!skb)) {
dst_release(dst);
return NULL;
@@ -2732,27 +2811,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
mss = tp->rx_opt.user_mss;
- if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
- __u8 rcv_wscale;
- /* Set this up on the first call only */
- req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
-
- /* limit the window selection if the user enforce a smaller rx buffer */
- if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
- (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
- req->window_clamp = tcp_full_space(sk);
-
- /* tcp_full_space because it is guaranteed to be the first packet */
- tcp_select_initial_window(tcp_full_space(sk),
- mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
- &req->rcv_wnd,
- &req->window_clamp,
- ireq->wscale_ok,
- &rcv_wscale,
- dst_metric(dst, RTAX_INITRWND));
- ireq->rcv_wscale = rcv_wscale;
- }
-
memset(&opts, 0, sizeof(opts));
#ifdef CONFIG_SYN_COOKIES
if (unlikely(req->cookie_ts))
@@ -2787,7 +2845,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
th->window = htons(min(req->rcv_wnd, 65535U));
tcp_options_write((__be32 *)(th + 1), tp, &opts);
th->doff = (tcp_header_size >> 2);
- TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb));
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS);
#ifdef CONFIG_TCP_MD5SIG
/* Okay, we have all we need - do the md5 hash if needed */
@@ -2959,9 +3017,15 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
tcp_connect_queue_skb(sk, data);
fo->copied = data->len;
+ /* syn_data is about to be sent, we need to take current time stamps
+ * for the packets that are in write queue : SYN packet and DATA
+ */
+ skb_mstamp_get(&syn->skb_mstamp);
+ data->skb_mstamp = syn->skb_mstamp;
+
if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {
tp->syn_data = (fo->copied > 0);
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
goto done;
}
syn_data = NULL;
@@ -3049,8 +3113,9 @@ void tcp_send_delayed_ack(struct sock *sk)
* Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements
* directly.
*/
- if (tp->srtt) {
- int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN);
+ if (tp->srtt_us) {
+ int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3),
+ TCP_DELACK_MIN);
if (rtt < max_ato)
max_ato = rtt;
@@ -3178,7 +3243,7 @@ int tcp_write_wakeup(struct sock *sk)
skb->len > mss) {
seg_size = min(seg_size, mss);
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
- if (tcp_fragment(sk, skb, seg_size, mss))
+ if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC))
return -1;
} else if (!tcp_skb_pcount(skb))
tcp_set_skb_tso_segs(sk, skb, mss);
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 1f2d37613c9..3b66610d415 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -154,7 +154,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
p->snd_wnd = tp->snd_wnd;
p->rcv_wnd = tp->rcv_wnd;
p->ssthresh = tcp_current_ssthresh(sk);
- p->srtt = tp->srtt >> 3;
+ p->srtt = tp->srtt_us >> 3;
tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1);
}
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 19ea6c2951f..8250949b885 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -15,12 +15,11 @@
#define TCP_SCALABLE_AI_CNT 50U
#define TCP_SCALABLE_MD_SCALE 3
-static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked,
- u32 in_flight)
+static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (!tcp_is_cwnd_limited(sk, in_flight))
+ if (!tcp_is_cwnd_limited(sk))
return;
if (tp->snd_cwnd <= tp->snd_ssthresh)
@@ -39,7 +38,6 @@ static u32 tcp_scalable_ssthresh(struct sock *sk)
static struct tcp_congestion_ops tcp_scalable __read_mostly = {
.ssthresh = tcp_scalable_ssthresh,
.cong_avoid = tcp_scalable_cong_avoid,
- .min_cwnd = tcp_reno_min_cwnd,
.owner = THIS_MODULE,
.name = "scalable",
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 64f0354c84c..286227abed1 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -165,6 +165,9 @@ static int tcp_write_timeout(struct sock *sk)
dst_negative_advice(sk);
if (tp->syn_fastopen || tp->syn_data)
tcp_fastopen_cache_set(sk, 0, NULL, true);
+ if (tp->syn_data)
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENACTIVEFAIL);
}
retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
syn_set = true;
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 06cae62bf20..9a5e05f27f4 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -163,14 +163,13 @@ static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
return min(tp->snd_ssthresh, tp->snd_cwnd-1);
}
-static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked,
- u32 in_flight)
+static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct vegas *vegas = inet_csk_ca(sk);
if (!vegas->doing_vegas_now) {
- tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+ tcp_reno_cong_avoid(sk, ack, acked);
return;
}
@@ -195,7 +194,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked,
/* We don't have enough RTT samples to do the Vegas
* calculation, so we'll behave like Reno.
*/
- tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+ tcp_reno_cong_avoid(sk, ack, acked);
} else {
u32 rtt, diff;
u64 target_cwnd;
@@ -306,11 +305,9 @@ void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
EXPORT_SYMBOL_GPL(tcp_vegas_get_info);
static struct tcp_congestion_ops tcp_vegas __read_mostly = {
- .flags = TCP_CONG_RTT_STAMP,
.init = tcp_vegas_init,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_vegas_cong_avoid,
- .min_cwnd = tcp_reno_min_cwnd,
.pkts_acked = tcp_vegas_pkts_acked,
.set_state = tcp_vegas_state,
.cwnd_event = tcp_vegas_cwnd_event,
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 326475a9486..27b9825753d 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -114,19 +114,18 @@ static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event)
tcp_veno_init(sk);
}
-static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked,
- u32 in_flight)
+static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct veno *veno = inet_csk_ca(sk);
if (!veno->doing_veno_now) {
- tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+ tcp_reno_cong_avoid(sk, ack, acked);
return;
}
/* limited by applications */
- if (!tcp_is_cwnd_limited(sk, in_flight))
+ if (!tcp_is_cwnd_limited(sk))
return;
/* We do the Veno calculations only if we got enough rtt samples */
@@ -134,7 +133,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked,
/* We don't have enough rtt samples to do the Veno
* calculation, so we'll behave like Reno.
*/
- tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+ tcp_reno_cong_avoid(sk, ack, acked);
} else {
u64 target_cwnd;
u32 rtt;
@@ -203,7 +202,6 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
}
static struct tcp_congestion_ops tcp_veno __read_mostly = {
- .flags = TCP_CONG_RTT_STAMP,
.init = tcp_veno_init,
.ssthresh = tcp_veno_ssthresh,
.cong_avoid = tcp_veno_cong_avoid,
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 76a1e23259e..b94a04ae2ed 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -276,7 +276,6 @@ static struct tcp_congestion_ops tcp_westwood __read_mostly = {
.init = tcp_westwood_init,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_reno_cong_avoid,
- .min_cwnd = tcp_westwood_bw_rttmin,
.cwnd_event = tcp_westwood_event,
.get_info = tcp_westwood_info,
.pkts_acked = tcp_westwood_pkts_acked,
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 1a8d271f994..599b79b8eac 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -69,13 +69,12 @@ static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us)
tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us);
}
-static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked,
- u32 in_flight)
+static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct yeah *yeah = inet_csk_ca(sk);
- if (!tcp_is_cwnd_limited(sk, in_flight))
+ if (!tcp_is_cwnd_limited(sk))
return;
if (tp->snd_cwnd <= tp->snd_ssthresh)
@@ -227,11 +226,9 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) {
}
static struct tcp_congestion_ops tcp_yeah __read_mostly = {
- .flags = TCP_CONG_RTT_STAMP,
.init = tcp_yeah_init,
.ssthresh = tcp_yeah_ssthresh,
.cong_avoid = tcp_yeah_cong_avoid,
- .min_cwnd = tcp_reno_min_cwnd,
.set_state = tcp_vegas_state,
.cwnd_event = tcp_vegas_cwnd_event,
.get_info = tcp_vegas_get_info,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 77bd16fa9f3..7d5a8661df7 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -246,7 +246,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
do {
if (low <= snum && snum <= high &&
!test_bit(snum >> udptable->log, bitmap) &&
- !inet_is_reserved_local_port(snum))
+ !inet_is_local_reserved_port(net, snum))
goto found;
snum += rand;
} while (snum != first);
@@ -727,13 +727,12 @@ EXPORT_SYMBOL(udp_flush_pending_frames);
void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
{
struct udphdr *uh = udp_hdr(skb);
- struct sk_buff *frags = skb_shinfo(skb)->frag_list;
int offset = skb_transport_offset(skb);
int len = skb->len - offset;
int hlen = len;
__wsum csum = 0;
- if (!frags) {
+ if (!skb_has_frag_list(skb)) {
/*
* Only one fragment on the socket.
*/
@@ -742,15 +741,17 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
uh->check = ~csum_tcpudp_magic(src, dst, len,
IPPROTO_UDP, 0);
} else {
+ struct sk_buff *frags;
+
/*
* HW-checksum won't work as there are two or more
* fragments on the socket so that all csums of sk_buffs
* should be together
*/
- do {
+ skb_walk_frags(skb, frags) {
csum = csum_add(csum, frags->csum);
hlen -= frags->len;
- } while ((frags = frags->next));
+ }
csum = skb_checksum(skb, offset, hlen, csum);
skb->ip_summed = CHECKSUM_NONE;
@@ -762,6 +763,43 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
}
EXPORT_SYMBOL_GPL(udp4_hwcsum);
+/* Function to set UDP checksum for an IPv4 UDP packet. This is intended
+ * for the simple case like when setting the checksum for a UDP tunnel.
+ */
+void udp_set_csum(bool nocheck, struct sk_buff *skb,
+ __be32 saddr, __be32 daddr, int len)
+{
+ struct udphdr *uh = udp_hdr(skb);
+
+ if (nocheck)
+ uh->check = 0;
+ else if (skb_is_gso(skb))
+ uh->check = ~udp_v4_check(len, saddr, daddr, 0);
+ else if (skb_dst(skb) && skb_dst(skb)->dev &&
+ (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
+
+ BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ uh->check = ~udp_v4_check(len, saddr, daddr, 0);
+ } else {
+ __wsum csum;
+
+ BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+ uh->check = 0;
+ csum = skb_checksum(skb, 0, len, 0);
+ uh->check = udp_v4_check(len, saddr, daddr, csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+}
+EXPORT_SYMBOL(udp_set_csum);
+
static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
{
struct sock *sk = skb->sk;
@@ -785,7 +823,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
if (is_udplite) /* UDP-Lite */
csum = udplite_csum(skb);
- else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
+ else if (sk->sk_no_check_tx) { /* UDP csum disabled */
skb->ip_summed = CHECKSUM_NONE;
goto send;
@@ -931,7 +969,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
sock_tx_timestamp(sk, &ipc.tx_flags);
if (msg->msg_controllen) {
- err = ip_cmsg_send(sock_net(sk), msg, &ipc);
+ err = ip_cmsg_send(sock_net(sk), msg, &ipc,
+ sk->sk_family == AF_INET6);
if (err)
return err;
if (ipc.opt)
@@ -1494,6 +1533,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) {
int ret;
+ /* Verify checksum before giving to encap */
+ if (udp_lib_checksum_complete(skb))
+ goto csum_error;
+
ret = encap_rcv(sk, skb);
if (ret <= 0) {
UDP_INC_STATS_BH(sock_net(sk),
@@ -1545,8 +1588,11 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto csum_error;
- if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf))
+ if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
+ UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
+ is_udplite);
goto drop;
+ }
rc = 0;
@@ -1671,7 +1717,6 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
int proto)
{
- const struct iphdr *iph;
int err;
UDP_SKB_CB(skb)->partial_cov = 0;
@@ -1683,22 +1728,8 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
return err;
}
- iph = ip_hdr(skb);
- if (uh->check == 0) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
- if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
- proto, skb->csum))
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- }
- if (!skb_csum_unnecessary(skb))
- skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
- skb->len, proto, 0);
- /* Probably, we should checksum udp header (it should be in cache
- * in any case) and data in tiny packets (< rx copybreak).
- */
-
- return 0;
+ return skb_checksum_init_zero_check(skb, proto, uh->check,
+ inet_compute_pseudo);
}
/*
@@ -1833,6 +1864,10 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
unsigned int count, slot = udp_hashfn(net, hnum, udp_table.mask);
struct udp_hslot *hslot = &udp_table.hash[slot];
+ /* Do not bother scanning a too big list */
+ if (hslot->count > 10)
+ return NULL;
+
rcu_read_lock();
begin:
count = 0;
@@ -1885,7 +1920,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum);
unsigned int slot2 = hash2 & udp_table.mask;
struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
- INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr)
+ INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
rcu_read_lock();
@@ -1978,7 +2013,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
int (*push_pending_frames)(struct sock *))
{
struct udp_sock *up = udp_sk(sk);
- int val;
+ int val, valbool;
int err = 0;
int is_udplite = IS_UDPLITE(sk);
@@ -1988,6 +2023,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
if (get_user(val, (int __user *)optval))
return -EFAULT;
+ valbool = val ? 1 : 0;
+
switch (optname) {
case UDP_CORK:
if (val != 0) {
@@ -2017,6 +2054,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
}
break;
+ case UDP_NO_CHECK6_TX:
+ up->no_check6_tx = valbool;
+ break;
+
+ case UDP_NO_CHECK6_RX:
+ up->no_check6_rx = valbool;
+ break;
+
/*
* UDP-Lite's partial checksum coverage (RFC 3828).
*/
@@ -2099,6 +2144,14 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
val = up->encap_type;
break;
+ case UDP_NO_CHECK6_TX:
+ val = up->no_check6_tx;
+ break;
+
+ case UDP_NO_CHECK6_RX:
+ val = up->no_check6_rx;
+ break;
+
/* The following two cannot be changed on UDP sockets, the return is
* always 0 (which corresponds to the full checksum coverage of UDP). */
case UDPLITE_SEND_CSCOV:
@@ -2483,7 +2536,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
__be16 protocol = skb->protocol;
netdev_features_t enc_features;
- int outer_hlen;
+ int udp_offset, outer_hlen;
+ unsigned int oldlen;
+ bool need_csum;
+
+ oldlen = (u16)~skb->len;
if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
goto out;
@@ -2495,6 +2552,10 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
skb->mac_len = skb_inner_network_offset(skb);
skb->protocol = htons(ETH_P_TEB);
+ need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
+ if (need_csum)
+ skb->encap_hdr_csum = 1;
+
/* segment inner packet. */
enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
segs = skb_mac_gso_segment(skb, enc_features);
@@ -2505,10 +2566,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
}
outer_hlen = skb_tnl_header_len(skb);
+ udp_offset = outer_hlen - tnl_hlen;
skb = segs;
do {
struct udphdr *uh;
- int udp_offset = outer_hlen - tnl_hlen;
+ int len;
skb_reset_inner_headers(skb);
skb->encapsulation = 1;
@@ -2519,31 +2581,20 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
skb_reset_mac_header(skb);
skb_set_network_header(skb, mac_len);
skb_set_transport_header(skb, udp_offset);
+ len = skb->len - udp_offset;
uh = udp_hdr(skb);
- uh->len = htons(skb->len - udp_offset);
-
- /* csum segment if tunnel sets skb with csum. */
- if (protocol == htons(ETH_P_IP) && unlikely(uh->check)) {
- struct iphdr *iph = ip_hdr(skb);
+ uh->len = htons(len);
- uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
- skb->len - udp_offset,
- IPPROTO_UDP, 0);
- uh->check = csum_fold(skb_checksum(skb, udp_offset,
- skb->len - udp_offset, 0));
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
+ if (need_csum) {
+ __be32 delta = htonl(oldlen + len);
- } else if (protocol == htons(ETH_P_IPV6)) {
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- u32 len = skb->len - udp_offset;
+ uh->check = ~csum_fold((__force __wsum)
+ ((__force u32)uh->check +
+ (__force u32)delta));
+ uh->check = gso_make_checksum(skb, ~uh->check);
- uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
- len, IPPROTO_UDP, 0);
- uh->check = csum_fold(skb_checksum(skb, udp_offset, len, 0));
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
- skb->ip_summed = CHECKSUM_NONE;
}
skb->protocol = protocol;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 88b4023ecfc..546d2d439dd 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -56,7 +56,8 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
__wsum csum;
if (skb->encapsulation &&
- skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) {
+ (skb_shinfo(skb)->gso_type &
+ (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
segs = skb_udp_tunnel_segment(skb, features);
goto out;
}
@@ -71,8 +72,10 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM |
SKB_GSO_IPIP |
- SKB_GSO_GRE | SKB_GSO_MPLS) ||
+ SKB_GSO_GRE | SKB_GSO_GRE_CSUM |
+ SKB_GSO_MPLS) ||
!(type & (SKB_GSO_UDP))))
goto out;
@@ -197,6 +200,7 @@ unflush:
}
skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
+ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
pp = uo_priv->offload->callbacks.gro_receive(head, skb);
out_unlock:
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 2c46acd4cc3..3b3efbda48e 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -70,7 +70,6 @@ static struct inet_protosw udplite4_protosw = {
.protocol = IPPROTO_UDPLITE,
.prot = &udplite_prot,
.ops = &inet_dgram_ops,
- .no_check = 0, /* must checksum (RFC 3828) */
.flags = INET_PROTOSW_PERMANENT,
};
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 1f12c8b4586..aac6197b7a7 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -37,15 +37,6 @@ drop:
return NET_RX_DROP;
}
-int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
- int encap_type)
-{
- XFRM_SPI_SKB_CB(skb)->family = AF_INET;
- XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
- return xfrm_input(skb, nexthdr, spi, encap_type);
-}
-EXPORT_SYMBOL(xfrm4_rcv_encap);
-
int xfrm4_transport_finish(struct sk_buff *skb, int async)
{
struct iphdr *iph = ip_hdr(skb);
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 31b18152528..91771a7c802 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -15,65 +15,6 @@
#include <net/ip.h>
#include <net/xfrm.h>
-/* Informational hook. The decap is still done here. */
-static struct xfrm_tunnel_notifier __rcu *rcv_notify_handlers __read_mostly;
-static DEFINE_MUTEX(xfrm4_mode_tunnel_input_mutex);
-
-int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel_notifier *handler)
-{
- struct xfrm_tunnel_notifier __rcu **pprev;
- struct xfrm_tunnel_notifier *t;
- int ret = -EEXIST;
- int priority = handler->priority;
-
- mutex_lock(&xfrm4_mode_tunnel_input_mutex);
-
- for (pprev = &rcv_notify_handlers;
- (t = rcu_dereference_protected(*pprev,
- lockdep_is_held(&xfrm4_mode_tunnel_input_mutex))) != NULL;
- pprev = &t->next) {
- if (t->priority > priority)
- break;
- if (t->priority == priority)
- goto err;
-
- }
-
- handler->next = *pprev;
- rcu_assign_pointer(*pprev, handler);
-
- ret = 0;
-
-err:
- mutex_unlock(&xfrm4_mode_tunnel_input_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(xfrm4_mode_tunnel_input_register);
-
-int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel_notifier *handler)
-{
- struct xfrm_tunnel_notifier __rcu **pprev;
- struct xfrm_tunnel_notifier *t;
- int ret = -ENOENT;
-
- mutex_lock(&xfrm4_mode_tunnel_input_mutex);
- for (pprev = &rcv_notify_handlers;
- (t = rcu_dereference_protected(*pprev,
- lockdep_is_held(&xfrm4_mode_tunnel_input_mutex))) != NULL;
- pprev = &t->next) {
- if (t == handler) {
- *pprev = handler->next;
- ret = 0;
- break;
- }
- }
- mutex_unlock(&xfrm4_mode_tunnel_input_mutex);
- synchronize_net();
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(xfrm4_mode_tunnel_input_deregister);
-
static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
{
struct iphdr *inner_iph = ipip_hdr(skb);
@@ -117,24 +58,18 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
- ip_select_ident(skb, dst->child, NULL);
top_iph->ttl = ip4_dst_hoplimit(dst->child);
top_iph->saddr = x->props.saddr.a4;
top_iph->daddr = x->id.daddr.a4;
+ ip_select_ident(skb, NULL);
return 0;
}
-#define for_each_input_rcu(head, handler) \
- for (handler = rcu_dereference(head); \
- handler != NULL; \
- handler = rcu_dereference(handler->next))
-
static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
{
- struct xfrm_tunnel_notifier *handler;
int err = -EINVAL;
if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP)
@@ -143,9 +78,6 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto out;
- for_each_input_rcu(rcv_notify_handlers, handler)
- handler->handler(skb);
-
err = skb_unclone(skb, GFP_ATOMIC);
if (err)
goto out;
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index baa0f63731f..d5f6bd9a210 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -25,7 +25,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
goto out;
- if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df)
+ if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df)
goto out;
mtu = dst_mtu(skb_dst(skb));
@@ -62,10 +62,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
if (err)
return err;
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED;
-
- skb->protocol = htons(ETH_P_IP);
+ IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
return x->outer_mode->output2(x, skb);
}
@@ -73,27 +70,34 @@ EXPORT_SYMBOL(xfrm4_prepare_output);
int xfrm4_output_finish(struct sk_buff *skb)
{
-#ifdef CONFIG_NETFILTER
- if (!skb_dst(skb)->xfrm) {
- IPCB(skb)->flags |= IPSKB_REROUTED;
- return dst_output(skb);
- }
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ skb->protocol = htons(ETH_P_IP);
+#ifdef CONFIG_NETFILTER
IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
#endif
- skb->protocol = htons(ETH_P_IP);
return xfrm_output(skb);
}
-int xfrm4_output(struct sk_buff *skb)
+static int __xfrm4_output(struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
- struct xfrm_state *x = dst->xfrm;
+ struct xfrm_state *x = skb_dst(skb)->xfrm;
+
+#ifdef CONFIG_NETFILTER
+ if (!x) {
+ IPCB(skb)->flags |= IPSKB_REROUTED;
+ return dst_output(skb);
+ }
+#endif
+ return x->outer_mode->afinfo->output_finish(skb);
+}
+
+int xfrm4_output(struct sock *sk, struct sk_buff *skb)
+{
return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb,
- NULL, dst->dev,
- x->outer_mode->afinfo->output_finish,
+ NULL, skb_dst(skb)->dev, __xfrm4_output,
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index e1a63930a96..6156f68a1e9 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -325,6 +325,7 @@ void __init xfrm4_init(void)
xfrm4_state_init();
xfrm4_policy_init();
+ xfrm4_protocol_init();
#ifdef CONFIG_SYSCTL
register_pernet_subsys(&xfrm4_net_ops);
#endif
diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
new file mode 100644
index 00000000000..a2ce0101eaa
--- /dev/null
+++ b/net/ipv4/xfrm4_protocol.c
@@ -0,0 +1,301 @@
+/* xfrm4_protocol.c - Generic xfrm protocol multiplexer.
+ *
+ * Copyright (C) 2013 secunet Security Networks AG
+ *
+ * Author:
+ * Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * Based on:
+ * net/ipv4/tunnel4.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/skbuff.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+
+static struct xfrm4_protocol __rcu *esp4_handlers __read_mostly;
+static struct xfrm4_protocol __rcu *ah4_handlers __read_mostly;
+static struct xfrm4_protocol __rcu *ipcomp4_handlers __read_mostly;
+static DEFINE_MUTEX(xfrm4_protocol_mutex);
+
+static inline struct xfrm4_protocol __rcu **proto_handlers(u8 protocol)
+{
+ switch (protocol) {
+ case IPPROTO_ESP:
+ return &esp4_handlers;
+ case IPPROTO_AH:
+ return &ah4_handlers;
+ case IPPROTO_COMP:
+ return &ipcomp4_handlers;
+ }
+
+ return NULL;
+}
+
+#define for_each_protocol_rcu(head, handler) \
+ for (handler = rcu_dereference(head); \
+ handler != NULL; \
+ handler = rcu_dereference(handler->next)) \
+
+int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
+{
+ int ret;
+ struct xfrm4_protocol *handler;
+ struct xfrm4_protocol __rcu **head = proto_handlers(protocol);
+
+ if (!head)
+ return 0;
+
+ for_each_protocol_rcu(*head, handler)
+ if ((ret = handler->cb_handler(skb, err)) <= 0)
+ return ret;
+
+ return 0;
+}
+EXPORT_SYMBOL(xfrm4_rcv_cb);
+
+int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
+ int encap_type)
+{
+ int ret;
+ struct xfrm4_protocol *handler;
+ struct xfrm4_protocol __rcu **head = proto_handlers(nexthdr);
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
+ XFRM_SPI_SKB_CB(skb)->family = AF_INET;
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
+
+ if (!head)
+ goto out;
+
+ for_each_protocol_rcu(*head, handler)
+ if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL)
+ return ret;
+
+out:
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+EXPORT_SYMBOL(xfrm4_rcv_encap);
+
+static int xfrm4_esp_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm4_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
+
+ for_each_protocol_rcu(esp4_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm4_esp_err(struct sk_buff *skb, u32 info)
+{
+ struct xfrm4_protocol *handler;
+
+ for_each_protocol_rcu(esp4_handlers, handler)
+ if (!handler->err_handler(skb, info))
+ break;
+}
+
+static int xfrm4_ah_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm4_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
+
+ for_each_protocol_rcu(ah4_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;;
+
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm4_ah_err(struct sk_buff *skb, u32 info)
+{
+ struct xfrm4_protocol *handler;
+
+ for_each_protocol_rcu(ah4_handlers, handler)
+ if (!handler->err_handler(skb, info))
+ break;
+}
+
+static int xfrm4_ipcomp_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm4_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
+
+ for_each_protocol_rcu(ipcomp4_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm4_ipcomp_err(struct sk_buff *skb, u32 info)
+{
+ struct xfrm4_protocol *handler;
+
+ for_each_protocol_rcu(ipcomp4_handlers, handler)
+ if (!handler->err_handler(skb, info))
+ break;
+}
+
+static const struct net_protocol esp4_protocol = {
+ .handler = xfrm4_esp_rcv,
+ .err_handler = xfrm4_esp_err,
+ .no_policy = 1,
+ .netns_ok = 1,
+};
+
+static const struct net_protocol ah4_protocol = {
+ .handler = xfrm4_ah_rcv,
+ .err_handler = xfrm4_ah_err,
+ .no_policy = 1,
+ .netns_ok = 1,
+};
+
+static const struct net_protocol ipcomp4_protocol = {
+ .handler = xfrm4_ipcomp_rcv,
+ .err_handler = xfrm4_ipcomp_err,
+ .no_policy = 1,
+ .netns_ok = 1,
+};
+
+static struct xfrm_input_afinfo xfrm4_input_afinfo = {
+ .family = AF_INET,
+ .owner = THIS_MODULE,
+ .callback = xfrm4_rcv_cb,
+};
+
+static inline const struct net_protocol *netproto(unsigned char protocol)
+{
+ switch (protocol) {
+ case IPPROTO_ESP:
+ return &esp4_protocol;
+ case IPPROTO_AH:
+ return &ah4_protocol;
+ case IPPROTO_COMP:
+ return &ipcomp4_protocol;
+ }
+
+ return NULL;
+}
+
+int xfrm4_protocol_register(struct xfrm4_protocol *handler,
+ unsigned char protocol)
+{
+ struct xfrm4_protocol __rcu **pprev;
+ struct xfrm4_protocol *t;
+ bool add_netproto = false;
+ int ret = -EEXIST;
+ int priority = handler->priority;
+
+ if (!proto_handlers(protocol) || !netproto(protocol))
+ return -EINVAL;
+
+ mutex_lock(&xfrm4_protocol_mutex);
+
+ if (!rcu_dereference_protected(*proto_handlers(protocol),
+ lockdep_is_held(&xfrm4_protocol_mutex)))
+ add_netproto = true;
+
+ for (pprev = proto_handlers(protocol);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&xfrm4_protocol_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t->priority < priority)
+ break;
+ if (t->priority == priority)
+ goto err;
+ }
+
+ handler->next = *pprev;
+ rcu_assign_pointer(*pprev, handler);
+
+ ret = 0;
+
+err:
+ mutex_unlock(&xfrm4_protocol_mutex);
+
+ if (add_netproto) {
+ if (inet_add_protocol(netproto(protocol), protocol)) {
+ pr_err("%s: can't add protocol\n", __func__);
+ ret = -EAGAIN;
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(xfrm4_protocol_register);
+
+int xfrm4_protocol_deregister(struct xfrm4_protocol *handler,
+ unsigned char protocol)
+{
+ struct xfrm4_protocol __rcu **pprev;
+ struct xfrm4_protocol *t;
+ int ret = -ENOENT;
+
+ if (!proto_handlers(protocol) || !netproto(protocol))
+ return -EINVAL;
+
+ mutex_lock(&xfrm4_protocol_mutex);
+
+ for (pprev = proto_handlers(protocol);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&xfrm4_protocol_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t == handler) {
+ *pprev = handler->next;
+ ret = 0;
+ break;
+ }
+ }
+
+ if (!rcu_dereference_protected(*proto_handlers(protocol),
+ lockdep_is_held(&xfrm4_protocol_mutex))) {
+ if (inet_del_protocol(netproto(protocol), protocol) < 0) {
+ pr_err("%s: can't remove protocol\n", __func__);
+ ret = -EAGAIN;
+ }
+ }
+
+ mutex_unlock(&xfrm4_protocol_mutex);
+
+ synchronize_net();
+
+ return ret;
+}
+EXPORT_SYMBOL(xfrm4_protocol_deregister);
+
+void __init xfrm4_protocol_init(void)
+{
+ xfrm_input_register_afinfo(&xfrm4_input_afinfo);
+}
+EXPORT_SYMBOL(xfrm4_protocol_init);
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 17bb830872d..2fe68364bb2 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -16,7 +16,7 @@ ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o
ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o
ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
- xfrm6_output.o
+ xfrm6_output.o xfrm6_protocol.o
ipv6-$(CONFIG_NETFILTER) += netfilter.o
ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
ipv6-$(CONFIG_PROC_FS) += proc.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6c7fa0853fc..5667b3003af 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -275,19 +275,14 @@ static int snmp6_alloc_dev(struct inet6_dev *idev)
{
int i;
- if (snmp_mib_init((void __percpu **)idev->stats.ipv6,
- sizeof(struct ipstats_mib),
- __alignof__(struct ipstats_mib)) < 0)
+ idev->stats.ipv6 = alloc_percpu(struct ipstats_mib);
+ if (!idev->stats.ipv6)
goto err_ip;
for_each_possible_cpu(i) {
struct ipstats_mib *addrconf_stats;
- addrconf_stats = per_cpu_ptr(idev->stats.ipv6[0], i);
+ addrconf_stats = per_cpu_ptr(idev->stats.ipv6, i);
u64_stats_init(&addrconf_stats->syncp);
-#if SNMP_ARRAY_SZ == 2
- addrconf_stats = per_cpu_ptr(idev->stats.ipv6[1], i);
- u64_stats_init(&addrconf_stats->syncp);
-#endif
}
@@ -305,7 +300,7 @@ static int snmp6_alloc_dev(struct inet6_dev *idev)
err_icmpmsg:
kfree(idev->stats.icmpv6dev);
err_icmp:
- snmp_mib_free((void __percpu **)idev->stats.ipv6);
+ free_percpu(idev->stats.ipv6);
err_ip:
return -ENOMEM;
}
@@ -2504,8 +2499,8 @@ static int inet6_addr_add(struct net *net, int ifindex,
return PTR_ERR(ifp);
}
-static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *pfx,
- unsigned int plen)
+static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
+ const struct in6_addr *pfx, unsigned int plen)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
@@ -2528,7 +2523,12 @@ static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *p
in6_ifa_hold(ifp);
read_unlock_bh(&idev->lock);
+ if (!(ifp->flags & IFA_F_TEMPORARY) &&
+ (ifa_flags & IFA_F_MANAGETEMPADDR))
+ manage_tempaddrs(idev, ifp, 0, 0, false,
+ jiffies);
ipv6_del_addr(ifp);
+ addrconf_verify_rtnl();
return 0;
}
}
@@ -2568,7 +2568,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
return -EFAULT;
rtnl_lock();
- err = inet6_addr_del(net, ireq.ifr6_ifindex, &ireq.ifr6_addr,
+ err = inet6_addr_del(net, ireq.ifr6_ifindex, 0, &ireq.ifr6_addr,
ireq.ifr6_prefixlen);
rtnl_unlock();
return err;
@@ -2813,18 +2813,6 @@ static void addrconf_gre_config(struct net_device *dev)
}
#endif
-static inline int
-ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
-{
- struct in6_addr lladdr;
-
- if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) {
- addrconf_add_linklocal(idev, &lladdr);
- return 0;
- }
- return -1;
-}
-
static int addrconf_notify(struct notifier_block *this, unsigned long event,
void *ptr)
{
@@ -3743,6 +3731,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
struct in6_addr *pfx, *peer_pfx;
+ u32 ifa_flags;
int err;
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
@@ -3754,7 +3743,13 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
if (pfx == NULL)
return -EINVAL;
- return inet6_addr_del(net, ifm->ifa_index, pfx, ifm->ifa_prefixlen);
+ ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags;
+
+ /* We ignore other flags so far. */
+ ifa_flags &= IFA_F_MANAGETEMPADDR;
+
+ return inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx,
+ ifm->ifa_prefixlen);
}
static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
@@ -4363,7 +4358,7 @@ static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib,
memset(&stats[items], 0, pad);
}
-static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib,
+static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib,
int items, int bytes, size_t syncpoff)
{
int i;
@@ -4383,7 +4378,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
{
switch (attrtype) {
case IFLA_INET6_STATS:
- __snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6,
+ __snmp6_fill_stats64(stats, idev->stats.ipv6,
IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp));
break;
case IFLA_INET6_ICMP6STATS:
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 4c11cbcf830..e6960457f62 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -123,7 +123,7 @@ static void snmp6_free_dev(struct inet6_dev *idev)
{
kfree(idev->stats.icmpv6msgdev);
kfree(idev->stats.icmpv6dev);
- snmp_mib_free((void __percpu **)idev->stats.ipv6);
+ free_percpu(idev->stats.ipv6);
}
/* Nobody refers to this device, we may destroy it. */
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index b30ad3741b4..731e1e1722d 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -6,7 +6,7 @@
*/
/*
* Author:
- * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
+ * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
*/
#include <linux/kernel.h>
@@ -22,14 +22,13 @@
#if 0
#define ADDRLABEL(x...) printk(x)
#else
-#define ADDRLABEL(x...) do { ; } while(0)
+#define ADDRLABEL(x...) do { ; } while (0)
#endif
/*
* Policy Table
*/
-struct ip6addrlbl_entry
-{
+struct ip6addrlbl_entry {
#ifdef CONFIG_NET_NS
struct net *lbl_net;
#endif
@@ -88,39 +87,39 @@ static const __net_initconst struct ip6addrlbl_init_table
{ /* ::/0 */
.prefix = &in6addr_any,
.label = 1,
- },{ /* fc00::/7 */
- .prefix = &(struct in6_addr){{{ 0xfc }}},
+ }, { /* fc00::/7 */
+ .prefix = &(struct in6_addr){ { { 0xfc } } } ,
.prefixlen = 7,
.label = 5,
- },{ /* fec0::/10 */
- .prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}},
+ }, { /* fec0::/10 */
+ .prefix = &(struct in6_addr){ { { 0xfe, 0xc0 } } },
.prefixlen = 10,
.label = 11,
- },{ /* 2002::/16 */
- .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}},
+ }, { /* 2002::/16 */
+ .prefix = &(struct in6_addr){ { { 0x20, 0x02 } } },
.prefixlen = 16,
.label = 2,
- },{ /* 3ffe::/16 */
- .prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}},
+ }, { /* 3ffe::/16 */
+ .prefix = &(struct in6_addr){ { { 0x3f, 0xfe } } },
.prefixlen = 16,
.label = 12,
- },{ /* 2001::/32 */
- .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}},
+ }, { /* 2001::/32 */
+ .prefix = &(struct in6_addr){ { { 0x20, 0x01 } } },
.prefixlen = 32,
.label = 6,
- },{ /* 2001:10::/28 */
- .prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}},
+ }, { /* 2001:10::/28 */
+ .prefix = &(struct in6_addr){ { { 0x20, 0x01, 0x00, 0x10 } } },
.prefixlen = 28,
.label = 7,
- },{ /* ::ffff:0:0 */
- .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}},
+ }, { /* ::ffff:0:0 */
+ .prefix = &(struct in6_addr){ { { [10] = 0xff, [11] = 0xff } } },
.prefixlen = 96,
.label = 4,
- },{ /* ::/96 */
+ }, { /* ::/96 */
.prefix = &in6addr_any,
.prefixlen = 96,
.label = 3,
- },{ /* ::1/128 */
+ }, { /* ::1/128 */
.prefix = &in6addr_loopback,
.prefixlen = 128,
.label = 0,
@@ -441,7 +440,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh)
if (label == IPV6_ADDR_LABEL_DEFAULT)
return -EINVAL;
- switch(nlh->nlmsg_type) {
+ switch (nlh->nlmsg_type) {
case RTM_NEWADDRLABEL:
if (ifal->ifal_index &&
!__dev_get_by_index(net, ifal->ifal_index))
@@ -505,12 +504,13 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
if (idx >= s_idx &&
net_eq(ip6addrlbl_net(p), net)) {
- if ((err = ip6addrlbl_fill(skb, p,
- ip6addrlbl_table.seq,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWADDRLABEL,
- NLM_F_MULTI)) <= 0)
+ err = ip6addrlbl_fill(skb, p,
+ ip6addrlbl_table.seq,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWADDRLABEL,
+ NLM_F_MULTI);
+ if (err <= 0)
break;
}
idx++;
@@ -527,7 +527,7 @@ static inline int ip6addrlbl_msgsize(void)
+ nla_total_size(4); /* IFAL_LABEL */
}
-static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh)
+static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(in_skb->sk);
struct ifaddrlblmsg *ifal;
@@ -568,7 +568,8 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh)
goto out;
}
- if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) {
+ skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL);
+ if (!skb) {
ip6addrlbl_put(p);
return -ENOBUFS;
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d935889f100..7cb4392690d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -106,7 +106,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
struct inet_protosw *answer;
struct proto *answer_prot;
unsigned char answer_flags;
- char answer_no_check;
int try_loading_module = 0;
int err;
@@ -162,7 +161,6 @@ lookup_protocol:
sock->ops = answer->ops;
answer_prot = answer->prot;
- answer_no_check = answer->no_check;
answer_flags = answer->flags;
rcu_read_unlock();
@@ -176,7 +174,6 @@ lookup_protocol:
sock_init_data(sock, sk);
err = 0;
- sk->sk_no_check = answer_no_check;
if (INET_PROTOSW_REUSE & answer_flags)
sk->sk_reuse = SK_CAN_REUSE;
@@ -715,33 +712,25 @@ static int __net_init ipv6_init_mibs(struct net *net)
{
int i;
- if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6,
- sizeof(struct udp_mib),
- __alignof__(struct udp_mib)) < 0)
+ net->mib.udp_stats_in6 = alloc_percpu(struct udp_mib);
+ if (!net->mib.udp_stats_in6)
return -ENOMEM;
- if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6,
- sizeof(struct udp_mib),
- __alignof__(struct udp_mib)) < 0)
+ net->mib.udplite_stats_in6 = alloc_percpu(struct udp_mib);
+ if (!net->mib.udplite_stats_in6)
goto err_udplite_mib;
- if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics,
- sizeof(struct ipstats_mib),
- __alignof__(struct ipstats_mib)) < 0)
+ net->mib.ipv6_statistics = alloc_percpu(struct ipstats_mib);
+ if (!net->mib.ipv6_statistics)
goto err_ip_mib;
for_each_possible_cpu(i) {
struct ipstats_mib *af_inet6_stats;
- af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[0], i);
+ af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics, i);
u64_stats_init(&af_inet6_stats->syncp);
-#if SNMP_ARRAY_SZ == 2
- af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[1], i);
- u64_stats_init(&af_inet6_stats->syncp);
-#endif
}
- if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics,
- sizeof(struct icmpv6_mib),
- __alignof__(struct icmpv6_mib)) < 0)
+ net->mib.icmpv6_statistics = alloc_percpu(struct icmpv6_mib);
+ if (!net->mib.icmpv6_statistics)
goto err_icmp_mib;
net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib),
GFP_KERNEL);
@@ -750,22 +739,22 @@ static int __net_init ipv6_init_mibs(struct net *net)
return 0;
err_icmpmsg_mib:
- snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics);
+ free_percpu(net->mib.icmpv6_statistics);
err_icmp_mib:
- snmp_mib_free((void __percpu **)net->mib.ipv6_statistics);
+ free_percpu(net->mib.ipv6_statistics);
err_ip_mib:
- snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6);
+ free_percpu(net->mib.udplite_stats_in6);
err_udplite_mib:
- snmp_mib_free((void __percpu **)net->mib.udp_stats_in6);
+ free_percpu(net->mib.udp_stats_in6);
return -ENOMEM;
}
static void ipv6_cleanup_mibs(struct net *net)
{
- snmp_mib_free((void __percpu **)net->mib.udp_stats_in6);
- snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6);
- snmp_mib_free((void __percpu **)net->mib.ipv6_statistics);
- snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics);
+ free_percpu(net->mib.udp_stats_in6);
+ free_percpu(net->mib.udplite_stats_in6);
+ free_percpu(net->mib.ipv6_statistics);
+ free_percpu(net->mib.icmpv6_statistics);
kfree(net->mib.icmpv6msg_statistics);
}
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 81e496a2e00..72a4930bdc0 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -346,6 +346,10 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
struct ip_auth_hdr *ah;
struct ah_data *ahp;
struct tmp_ext *iph_ext;
+ int seqhi_len = 0;
+ __be32 *seqhi;
+ int sglists = 0;
+ struct scatterlist *seqhisg;
ahp = x->data;
ahash = ahp->ahash;
@@ -359,15 +363,22 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
if (extlen)
extlen += sizeof(*iph_ext);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists = 1;
+ seqhi_len = sizeof(*seqhi);
+ }
err = -ENOMEM;
- iph_base = ah_alloc_tmp(ahash, nfrags, IPV6HDR_BASELEN + extlen);
+ iph_base = ah_alloc_tmp(ahash, nfrags + sglists, IPV6HDR_BASELEN +
+ extlen + seqhi_len);
if (!iph_base)
goto out;
iph_ext = ah_tmp_ext(iph_base);
- icv = ah_tmp_icv(ahash, iph_ext, extlen);
+ seqhi = (__be32 *)((char *)iph_ext + extlen);
+ icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
+ seqhisg = sg + nfrags;
ah = ip_auth_hdr(skb);
memset(ah->auth_data, 0, ahp->icv_trunc_len);
@@ -411,10 +422,15 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
ah->spi = x->id.spi;
ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ sg_init_table(sg, nfrags + sglists);
+ skb_to_sgvec_nomark(skb, sg, 0, skb->len);
- ahash_request_set_crypt(req, sg, icv, skb->len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ /* Attach seqhi sg right after packet payload */
+ *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ sg_set_buf(seqhisg, seqhi, seqhi_len);
+ }
+ ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah6_output_done, skb);
AH_SKB_CB(skb)->tmp = iph_base;
@@ -514,6 +530,10 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
int nexthdr;
int nfrags;
int err = -ENOMEM;
+ int seqhi_len = 0;
+ __be32 *seqhi;
+ int sglists = 0;
+ struct scatterlist *seqhisg;
if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
goto out;
@@ -550,14 +570,22 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
skb_push(skb, hdr_len);
- work_iph = ah_alloc_tmp(ahash, nfrags, hdr_len + ahp->icv_trunc_len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists = 1;
+ seqhi_len = sizeof(*seqhi);
+ }
+
+ work_iph = ah_alloc_tmp(ahash, nfrags + sglists, hdr_len +
+ ahp->icv_trunc_len + seqhi_len);
if (!work_iph)
goto out;
- auth_data = ah_tmp_auth(work_iph, hdr_len);
- icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len);
+ auth_data = ah_tmp_auth((u8 *)work_iph, hdr_len);
+ seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len);
+ icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
+ seqhisg = sg + nfrags;
memcpy(work_iph, ip6h, hdr_len);
memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
@@ -572,10 +600,16 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
ip6h->flow_lbl[2] = 0;
ip6h->hop_limit = 0;
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ sg_init_table(sg, nfrags + sglists);
+ skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+
+ if (x->props.flags & XFRM_STATE_ESN) {
+ /* Attach seqhi sg right after packet payload */
+ *seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
+ sg_set_buf(seqhisg, seqhi, seqhi_len);
+ }
- ahash_request_set_crypt(req, sg, icv, skb->len);
+ ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah6_input_done, skb);
AH_SKB_CB(skb)->tmp = work_iph;
@@ -609,8 +643,8 @@ out:
return err;
}
-static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
+static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
@@ -619,17 +653,19 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
- return;
+ return 0;
x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
if (!x)
- return;
+ return 0;
if (type == NDISC_REDIRECT)
ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
+
+ return 0;
}
static int ah6_init_state(struct xfrm_state *x)
@@ -714,6 +750,11 @@ static void ah6_destroy(struct xfrm_state *x)
kfree(ahp);
}
+static int ah6_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type ah6_type =
{
.description = "AH6",
@@ -727,10 +768,11 @@ static const struct xfrm_type ah6_type =
.hdr_offset = xfrm6_find_1stfragopt,
};
-static const struct inet6_protocol ah6_protocol = {
+static struct xfrm6_protocol ah6_protocol = {
.handler = xfrm6_rcv,
+ .cb_handler = ah6_rcv_cb,
.err_handler = ah6_err,
- .flags = INET6_PROTO_NOPOLICY,
+ .priority = 0,
};
static int __init ah6_init(void)
@@ -740,7 +782,7 @@ static int __init ah6_init(void)
return -EAGAIN;
}
- if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) {
+ if (xfrm6_protocol_register(&ah6_protocol, IPPROTO_AH) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&ah6_type, AF_INET6);
return -EAGAIN;
@@ -751,7 +793,7 @@ static int __init ah6_init(void)
static void __exit ah6_fini(void)
{
- if (inet6_del_protocol(&ah6_protocol, IPPROTO_AH) < 0)
+ if (xfrm6_protocol_deregister(&ah6_protocol, IPPROTO_AH) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0)
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 6eef8a7e35f..d15da137714 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -421,8 +421,8 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
net_adj) & ~(blksize - 1)) + net_adj - 2;
}
-static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
+static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
@@ -431,18 +431,20 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
- return;
+ return 0;
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
esph->spi, IPPROTO_ESP, AF_INET6);
if (!x)
- return;
+ return 0;
if (type == NDISC_REDIRECT)
ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
+
+ return 0;
}
static void esp6_destroy(struct xfrm_state *x)
@@ -614,6 +616,11 @@ error:
return err;
}
+static int esp6_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type esp6_type =
{
.description = "ESP6",
@@ -628,10 +635,11 @@ static const struct xfrm_type esp6_type =
.hdr_offset = xfrm6_find_1stfragopt,
};
-static const struct inet6_protocol esp6_protocol = {
- .handler = xfrm6_rcv,
+static struct xfrm6_protocol esp6_protocol = {
+ .handler = xfrm6_rcv,
+ .cb_handler = esp6_rcv_cb,
.err_handler = esp6_err,
- .flags = INET6_PROTO_NOPOLICY,
+ .priority = 0,
};
static int __init esp6_init(void)
@@ -640,7 +648,7 @@ static int __init esp6_init(void)
pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
- if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) {
+ if (xfrm6_protocol_register(&esp6_protocol, IPPROTO_ESP) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&esp6_type, AF_INET6);
return -EAGAIN;
@@ -651,7 +659,7 @@ static int __init esp6_init(void)
static void __exit esp6_fini(void)
{
- if (inet6_del_protocol(&esp6_protocol, IPPROTO_ESP) < 0)
+ if (xfrm6_protocol_deregister(&esp6_protocol, IPPROTO_ESP) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0)
pr_info("%s: can't remove xfrm type\n", __func__);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index f2610e15766..f6c84a6eb23 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -400,6 +400,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
int len;
int hlimit;
int err = 0;
+ u32 mark = IP6_REPLY_MARK(net, skb->mark);
if ((u8 *)hdr < skb->head ||
(skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
@@ -466,6 +467,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
fl6.daddr = hdr->saddr;
if (saddr)
fl6.saddr = *saddr;
+ fl6.flowi6_mark = mark;
fl6.flowi6_oif = iif;
fl6.fl6_icmp_type = type;
fl6.fl6_icmp_code = code;
@@ -474,6 +476,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
sk = icmpv6_xmit_lock(net);
if (sk == NULL)
return;
+ sk->sk_mark = mark;
np = inet6_sk(sk);
if (!icmpv6_xrlim_allow(sk, type, &fl6))
@@ -493,12 +496,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
if (IS_ERR(dst))
goto out;
- if (ipv6_addr_is_multicast(&fl6.daddr))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = ip6_dst_hoplimit(dst);
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
msg.skb = skb;
msg.offset = skb_network_offset(skb);
@@ -520,7 +518,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
np->tclass, NULL, &fl6, (struct rt6_info *)dst,
MSG_DONTWAIT, np->dontfrag);
if (err) {
- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
@@ -556,6 +554,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
int err = 0;
int hlimit;
u8 tclass;
+ u32 mark = IP6_REPLY_MARK(net, skb->mark);
saddr = &ipv6_hdr(skb)->daddr;
@@ -574,11 +573,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
fl6.saddr = *saddr;
fl6.flowi6_oif = skb->dev->ifindex;
fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
+ fl6.flowi6_mark = mark;
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
sk = icmpv6_xmit_lock(net);
if (sk == NULL)
return;
+ sk->sk_mark = mark;
np = inet6_sk(sk);
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
@@ -593,12 +594,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
if (IS_ERR(dst))
goto out;
- if (ipv6_addr_is_multicast(&fl6.daddr))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = ip6_dst_hoplimit(dst);
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
idev = __in6_dev_get(skb->dev);
@@ -702,22 +698,11 @@ static int icmpv6_rcv(struct sk_buff *skb)
saddr = &ipv6_hdr(skb)->saddr;
daddr = &ipv6_hdr(skb)->daddr;
- /* Perform checksum. */
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
- skb->csum))
- break;
- /* fall through */
- case CHECKSUM_NONE:
- skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
- IPPROTO_ICMPV6, 0));
- if (__skb_checksum_complete(skb)) {
- LIMIT_NETDEBUG(KERN_DEBUG
- "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
- saddr, daddr);
- goto csum_error;
- }
+ if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
+ LIMIT_NETDEBUG(KERN_DEBUG
+ "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
+ saddr, daddr);
+ goto csum_error;
}
if (!pskb_pull(skb, sizeof(*hdr)))
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index c9138189415..a245e5ddffb 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -81,7 +81,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
final_p = fl6_update_dst(fl6, np->opt, &final);
fl6->saddr = ireq->ir_v6_loc_addr;
fl6->flowi6_oif = ireq->ir_iif;
- fl6->flowi6_mark = sk->sk_mark;
+ fl6->flowi6_mark = ireq->ir_mark;
fl6->fl6_dport = ireq->ir_rmt_port;
fl6->fl6_sport = htons(ireq->ir_num);
security_req_classify_flow(req, flowi6_to_flowi(fl6));
@@ -224,9 +224,8 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
return dst;
}
-int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
+int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused)
{
- struct sock *sk = skb->sk;
struct ipv6_pinfo *np = inet6_sk(sk);
struct flowi6 fl6;
struct dst_entry *dst;
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index 72d198b8e4d..9a4d7322fb2 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -75,23 +75,50 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
return err;
}
- if (uh->check == 0) {
- /* RFC 2460 section 8.1 says that we SHOULD log
- this error. Well, it is reasonable.
- */
- LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
- return 1;
- }
- if (skb->ip_summed == CHECKSUM_COMPLETE &&
- !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
- skb->len, proto, skb->csum))
- skb->ip_summed = CHECKSUM_UNNECESSARY;
+ /* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels)
+ * we accept a checksum of zero here. When we find the socket
+ * for the UDP packet we'll check if that socket allows zero checksum
+ * for IPv6 (set by socket option).
+ */
+ return skb_checksum_init_zero_check(skb, proto, uh->check,
+ ip6_compute_pseudo);
+}
+EXPORT_SYMBOL(udp6_csum_init);
+
+/* Function to set UDP checksum for an IPv6 UDP packet. This is intended
+ * for the simple case like when setting the checksum for a UDP tunnel.
+ */
+void udp6_set_csum(bool nocheck, struct sk_buff *skb,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr, int len)
+{
+ struct udphdr *uh = udp_hdr(skb);
+
+ if (nocheck)
+ uh->check = 0;
+ else if (skb_is_gso(skb))
+ uh->check = ~udp_v6_check(len, saddr, daddr, 0);
+ else if (skb_dst(skb) && skb_dst(skb)->dev &&
+ (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
- if (!skb_csum_unnecessary(skb))
- skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr,
- skb->len, proto, 0));
+ BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
- return 0;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ uh->check = ~udp_v6_check(len, saddr, daddr, 0);
+ } else {
+ __wsum csum;
+
+ BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+ uh->check = 0;
+ csum = skb_checksum(skb, 0, len, 0);
+ uh->check = udp_v6_check(len, saddr, daddr, csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
}
-EXPORT_SYMBOL(udp6_csum_init);
+EXPORT_SYMBOL(udp6_set_csum);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 075602fc6b6..cb4459bd1d2 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -9,14 +9,12 @@
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
- */
-
-/*
- * Changes:
- * Yuji SEKIYA @USAGI: Support default route on router node;
- * remove ip6_null_entry from the top of
- * routing table.
- * Ville Nuorvala: Fixed routing subtrees.
+ *
+ * Changes:
+ * Yuji SEKIYA @USAGI: Support default route on router node;
+ * remove ip6_null_entry from the top of
+ * routing table.
+ * Ville Nuorvala: Fixed routing subtrees.
*/
#define pr_fmt(fmt) "IPv6: " fmt
@@ -46,10 +44,9 @@
#define RT6_TRACE(x...) do { ; } while (0)
#endif
-static struct kmem_cache * fib6_node_kmem __read_mostly;
+static struct kmem_cache *fib6_node_kmem __read_mostly;
-enum fib_walk_state_t
-{
+enum fib_walk_state_t {
#ifdef CONFIG_IPV6_SUBTREES
FWS_S,
#endif
@@ -59,8 +56,7 @@ enum fib_walk_state_t
FWS_U
};
-struct fib6_cleaner_t
-{
+struct fib6_cleaner_t {
struct fib6_walker_t w;
struct net *net;
int (*func)(struct rt6_info *, void *arg);
@@ -75,8 +71,7 @@ static DEFINE_RWLOCK(fib6_walker_lock);
#define FWS_INIT FWS_L
#endif
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn,
- struct rt6_info *rt);
+static void fib6_prune_clones(struct net *net, struct fib6_node *fn);
static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
static int fib6_walk(struct fib6_walker_t *w);
@@ -138,7 +133,7 @@ static __inline__ __be32 addr_bit_set(const void *token, int fn_bit)
const __be32 *addr = token;
/*
* Here,
- * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
+ * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
* is optimized version of
* htonl(1 << ((~fn_bit)&0x1F))
* See include/asm-generic/bitops/le.h.
@@ -147,7 +142,7 @@ static __inline__ __be32 addr_bit_set(const void *token, int fn_bit)
addr[fn_bit >> 5];
}
-static __inline__ struct fib6_node * node_alloc(void)
+static __inline__ struct fib6_node *node_alloc(void)
{
struct fib6_node *fn;
@@ -156,7 +151,7 @@ static __inline__ struct fib6_node * node_alloc(void)
return fn;
}
-static __inline__ void node_free(struct fib6_node * fn)
+static __inline__ void node_free(struct fib6_node *fn)
{
kmem_cache_free(fib6_node_kmem, fn);
}
@@ -292,7 +287,7 @@ static int fib6_dump_node(struct fib6_walker_t *w)
static void fib6_dump_end(struct netlink_callback *cb)
{
- struct fib6_walker_t *w = (void*)cb->args[2];
+ struct fib6_walker_t *w = (void *)cb->args[2];
if (w) {
if (cb->args[4]) {
@@ -302,7 +297,7 @@ static void fib6_dump_end(struct netlink_callback *cb)
cb->args[2] = 0;
kfree(w);
}
- cb->done = (void*)cb->args[3];
+ cb->done = (void *)cb->args[3];
cb->args[1] = 3;
}
@@ -485,7 +480,7 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
fn->fn_sernum = sernum;
dir = addr_bit_set(addr, fn->fn_bit);
pn = fn;
- fn = dir ? fn->right: fn->left;
+ fn = dir ? fn->right : fn->left;
} while (fn);
if (!allow_create) {
@@ -638,12 +633,41 @@ static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt)
RTF_GATEWAY;
}
+static int fib6_commit_metrics(struct dst_entry *dst,
+ struct nlattr *mx, int mx_len)
+{
+ struct nlattr *nla;
+ int remaining;
+ u32 *mp;
+
+ if (dst->flags & DST_HOST) {
+ mp = dst_metrics_write_ptr(dst);
+ } else {
+ mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
+ if (!mp)
+ return -ENOMEM;
+ dst_init_metrics(dst, mp, 0);
+ }
+
+ nla_for_each_attr(nla, mx, mx_len, remaining) {
+ int type = nla_type(nla);
+
+ if (type) {
+ if (type > RTAX_MAX)
+ return -EINVAL;
+
+ mp[type - 1] = nla_get_u32(nla);
+ }
+ }
+ return 0;
+}
+
/*
* Insert routing information in a node.
*/
static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
- struct nl_info *info)
+ struct nl_info *info, struct nlattr *mx, int mx_len)
{
struct rt6_info *iter = NULL;
struct rt6_info **ins;
@@ -653,6 +677,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
(info->nlh->nlmsg_flags & NLM_F_CREATE));
int found = 0;
bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
+ int err;
ins = &fn->leaf;
@@ -751,6 +776,11 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
pr_warn("NLM_F_CREATE should be set when creating new route\n");
add:
+ if (mx) {
+ err = fib6_commit_metrics(&rt->dst, mx, mx_len);
+ if (err)
+ return err;
+ }
rt->dst.rt6_next = iter;
*ins = rt;
rt->rt6i_node = fn;
@@ -770,6 +800,11 @@ add:
pr_warn("NLM_F_REPLACE set, but no existing node found!\n");
return -ENOENT;
}
+ if (mx) {
+ err = fib6_commit_metrics(&rt->dst, mx, mx_len);
+ if (err)
+ return err;
+ }
*ins = rt;
rt->rt6i_node = fn;
rt->dst.rt6_next = iter->dst.rt6_next;
@@ -806,7 +841,8 @@ void fib6_force_start_gc(struct net *net)
* with source addr info in sub-trees
*/
-int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
+int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
+ struct nlattr *mx, int mx_len)
{
struct fib6_node *fn, *pn = NULL;
int err = -ENOMEM;
@@ -900,11 +936,11 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
}
#endif
- err = fib6_add_rt2node(fn, rt, info);
+ err = fib6_add_rt2node(fn, rt, info, mx, mx_len);
if (!err) {
fib6_start_gc(info->nl_net, rt);
if (!(rt->rt6i_flags & RTF_CACHE))
- fib6_prune_clones(info->nl_net, pn, rt);
+ fib6_prune_clones(info->nl_net, pn);
}
out:
@@ -955,8 +991,8 @@ struct lookup_args {
const struct in6_addr *addr; /* search key */
};
-static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
- struct lookup_args *args)
+static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
+ struct lookup_args *args)
{
struct fib6_node *fn;
__be32 dir;
@@ -1018,8 +1054,8 @@ backtrack:
return NULL;
}
-struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
- const struct in6_addr *saddr)
+struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
struct fib6_node *fn;
struct lookup_args args[] = {
@@ -1051,9 +1087,9 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *da
*/
-static struct fib6_node * fib6_locate_1(struct fib6_node *root,
- const struct in6_addr *addr,
- int plen, int offset)
+static struct fib6_node *fib6_locate_1(struct fib6_node *root,
+ const struct in6_addr *addr,
+ int plen, int offset)
{
struct fib6_node *fn;
@@ -1081,9 +1117,9 @@ static struct fib6_node * fib6_locate_1(struct fib6_node *root,
return NULL;
}
-struct fib6_node * fib6_locate(struct fib6_node *root,
- const struct in6_addr *daddr, int dst_len,
- const struct in6_addr *saddr, int src_len)
+struct fib6_node *fib6_locate(struct fib6_node *root,
+ const struct in6_addr *daddr, int dst_len,
+ const struct in6_addr *saddr, int src_len)
{
struct fib6_node *fn;
@@ -1151,8 +1187,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
children = 0;
child = NULL;
- if (fn->right) child = fn->right, children |= 1;
- if (fn->left) child = fn->left, children |= 2;
+ if (fn->right)
+ child = fn->right, children |= 1;
+ if (fn->left)
+ child = fn->left, children |= 2;
if (children == 3 || FIB6_SUBTREE(fn)
#ifdef CONFIG_IPV6_SUBTREES
@@ -1180,8 +1218,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
} else {
WARN_ON(fn->fn_flags & RTN_ROOT);
#endif
- if (pn->right == fn) pn->right = child;
- else if (pn->left == fn) pn->left = child;
+ if (pn->right == fn)
+ pn->right = child;
+ else if (pn->left == fn)
+ pn->left = child;
#if RT6_DEBUG >= 2
else
WARN_ON(1);
@@ -1213,10 +1253,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
w->node = child;
if (children&2) {
RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
- w->state = w->state>=FWS_R ? FWS_U : FWS_INIT;
+ w->state = w->state >= FWS_R ? FWS_U : FWS_INIT;
} else {
RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
- w->state = w->state>=FWS_C ? FWS_U : FWS_INIT;
+ w->state = w->state >= FWS_C ? FWS_U : FWS_INIT;
}
}
}
@@ -1314,7 +1354,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
struct rt6_info **rtp;
#if RT6_DEBUG >= 2
- if (rt->dst.obsolete>0) {
+ if (rt->dst.obsolete > 0) {
WARN_ON(fn != NULL);
return -ENOENT;
}
@@ -1334,7 +1374,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
pn = pn->parent;
}
#endif
- fib6_prune_clones(info->nl_net, pn, rt);
+ fib6_prune_clones(info->nl_net, pn);
}
/*
@@ -1418,7 +1458,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
if (w->skip) {
w->skip--;
- continue;
+ goto skip;
}
err = w->func(w);
@@ -1428,6 +1468,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
w->count++;
continue;
}
+skip:
w->state = FWS_U;
case FWS_U:
if (fn == w->root)
@@ -1559,10 +1600,9 @@ static int fib6_prune_clone(struct rt6_info *rt, void *arg)
return 0;
}
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn,
- struct rt6_info *rt)
+static void fib6_prune_clones(struct net *net, struct fib6_node *fn)
{
- fib6_clean_tree(net, fn, fib6_prune_clone, 1, rt);
+ fib6_clean_tree(net, fn, fib6_prune_clone, 1, NULL);
}
/*
@@ -1707,7 +1747,7 @@ out_rt6_stats:
kfree(net->ipv6.rt6_stats);
out_timer:
return -ENOMEM;
- }
+}
static void fib6_net_exit(struct net *net)
{
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index dfa41bb4e0d..4052694c6f2 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -15,9 +15,7 @@
#include <linux/socket.h>
#include <linux/net.h>
#include <linux/netdevice.h>
-#include <linux/if_arp.h>
#include <linux/in6.h>
-#include <linux/route.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
@@ -28,12 +26,7 @@
#include <net/sock.h>
#include <net/ipv6.h>
-#include <net/ndisc.h>
-#include <net/protocol.h>
-#include <net/ip6_route.h>
-#include <net/addrconf.h>
#include <net/rawv6.h>
-#include <net/icmp.h>
#include <net/transp_v6.h>
#include <asm/uaccess.h>
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index f3ffb43f59c..3873181ed85 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -72,6 +72,7 @@ struct ip6gre_net {
};
static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
+static struct rtnl_link_ops ip6gre_tap_ops __read_mostly;
static int ip6gre_tunnel_init(struct net_device *dev);
static void ip6gre_tunnel_setup(struct net_device *dev);
static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
@@ -353,10 +354,10 @@ failed_free:
static void ip6gre_tunnel_uninit(struct net_device *dev)
{
- struct net *net = dev_net(dev);
- struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
- ip6gre_tunnel_unlink(ign, netdev_priv(dev));
+ ip6gre_tunnel_unlink(ign, t);
dev_put(dev);
}
@@ -467,17 +468,7 @@ static int ip6gre_rcv(struct sk_buff *skb)
goto drop;
if (flags&GRE_CSUM) {
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- csum = csum_fold(skb->csum);
- if (!csum)
- break;
- /* fall through */
- case CHECKSUM_NONE:
- skb->csum = 0;
- csum = __skb_checksum_complete(skb);
- skb->ip_summed = CHECKSUM_COMPLETE;
- }
+ csum = skb_checksum_simple_validate(skb);
offset += 4;
}
if (flags&GRE_KEY) {
@@ -611,8 +602,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
int encap_limit,
__u32 *pmtu)
{
- struct net *net = dev_net(dev);
struct ip6_tnl *tunnel = netdev_priv(dev);
+ struct net *net = tunnel->net;
struct net_device *tdev; /* Device to other host */
struct ipv6hdr *ipv6h; /* Our new IP header */
unsigned int max_headroom = 0; /* The extra header space needed */
@@ -979,7 +970,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
int strict = (ipv6_addr_type(&p->raddr) &
(IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
- struct rt6_info *rt = rt6_lookup(dev_net(dev),
+ struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
p->link, strict);
@@ -1063,13 +1054,12 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
int err = 0;
struct ip6_tnl_parm2 p;
struct __ip6_tnl_parm p1;
- struct ip6_tnl *t;
- struct net *net = dev_net(dev);
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = t->net;
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
switch (cmd) {
case SIOCGETTUNNEL:
- t = NULL;
if (dev == ign->fb_tunnel_dev) {
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
err = -EFAULT;
@@ -1077,9 +1067,9 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
}
ip6gre_tnl_parm_from_user(&p1, &p);
t = ip6gre_tunnel_locate(net, &p1, 0);
+ if (t == NULL)
+ t = netdev_priv(dev);
}
- if (t == NULL)
- t = netdev_priv(dev);
memset(&p, 0, sizeof(p));
ip6gre_tnl_parm_to_user(&p, &t->parms);
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
@@ -1242,7 +1232,6 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
dev->flags |= IFF_NOARP;
dev->iflink = 0;
dev->addr_len = sizeof(struct in6_addr);
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
}
@@ -1297,11 +1286,17 @@ static struct inet6_protocol ip6gre_protocol __read_mostly = {
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
-static void ip6gre_destroy_tunnels(struct ip6gre_net *ign,
- struct list_head *head)
+static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
{
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ struct net_device *dev, *aux;
int prio;
+ for_each_netdev_safe(net, dev, aux)
+ if (dev->rtnl_link_ops == &ip6gre_link_ops ||
+ dev->rtnl_link_ops == &ip6gre_tap_ops)
+ unregister_netdevice_queue(dev, head);
+
for (prio = 0; prio < 4; prio++) {
int h;
for (h = 0; h < HASH_SIZE; h++) {
@@ -1310,7 +1305,12 @@ static void ip6gre_destroy_tunnels(struct ip6gre_net *ign,
t = rtnl_dereference(ign->tunnels[prio][h]);
while (t != NULL) {
- unregister_netdevice_queue(t->dev, head);
+ /* If dev is in the same netns, it has already
+ * been added to the list by the previous loop.
+ */
+ if (!net_eq(dev_net(t->dev), net))
+ unregister_netdevice_queue(t->dev,
+ head);
t = rtnl_dereference(t->next);
}
}
@@ -1329,6 +1329,11 @@ static int __net_init ip6gre_init_net(struct net *net)
goto err_alloc_dev;
}
dev_net_set(ign->fb_tunnel_dev, net);
+ /* FB netdevice is special: we have one, and only one per netns.
+ * Allowing to move it to another netns is clearly unsafe.
+ */
+ ign->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+
ip6gre_fb_tunnel_init(ign->fb_tunnel_dev);
ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops;
@@ -1349,12 +1354,10 @@ err_alloc_dev:
static void __net_exit ip6gre_exit_net(struct net *net)
{
- struct ip6gre_net *ign;
LIST_HEAD(list);
- ign = net_generic(net, ip6gre_net_id);
rtnl_lock();
- ip6gre_destroy_tunnels(ign, &list);
+ ip6gre_destroy_tunnels(net, &list);
unregister_netdevice_many(&list);
rtnl_unlock();
}
@@ -1454,7 +1457,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
static int ip6gre_tap_init(struct net_device *dev)
{
struct ip6_tnl *tunnel;
- int i;
tunnel = netdev_priv(dev);
@@ -1464,16 +1466,10 @@ static int ip6gre_tap_init(struct net_device *dev)
ip6gre_tnl_link_config(tunnel, 1);
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *ip6gre_tap_stats;
- ip6gre_tap_stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&ip6gre_tap_stats->syncp);
- }
-
return 0;
}
@@ -1538,15 +1534,14 @@ out:
static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
- struct ip6_tnl *t, *nt;
- struct net *net = dev_net(dev);
+ struct ip6_tnl *t, *nt = netdev_priv(dev);
+ struct net *net = nt->net;
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
struct __ip6_tnl_parm p;
if (dev == ign->fb_tunnel_dev)
return -EINVAL;
- nt = netdev_priv(dev);
ip6gre_netlink_parms(data, &p);
t = ip6gre_tunnel_locate(net, &p, 0);
@@ -1566,6 +1561,15 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
return 0;
}
+static void ip6gre_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct net *net = dev_net(dev);
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+ if (dev != ign->fb_tunnel_dev)
+ unregister_netdevice_queue(dev, head);
+}
+
static size_t ip6gre_get_size(const struct net_device *dev)
{
return
@@ -1643,6 +1647,7 @@ static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
.validate = ip6gre_tunnel_validate,
.newlink = ip6gre_newlink,
.changelink = ip6gre_changelink,
+ .dellink = ip6gre_dellink,
.get_size = ip6gre_get_size,
.fill_info = ip6gre_fill_info,
};
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 59f95affceb..65eda2a8af4 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -97,9 +97,11 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
+ SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP |
SKB_GSO_SIT |
SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM |
SKB_GSO_MPLS |
SKB_GSO_TCPV6 |
0)))
@@ -196,7 +198,6 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
unsigned int off;
u16 flush = 1;
int proto;
- __wsum csum;
off = skb_gro_offset(skb);
hlen = off + sizeof(*iph);
@@ -264,13 +265,10 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
NAPI_GRO_CB(skb)->flush |= flush;
- csum = skb->csum;
- skb_postpull_rcsum(skb, iph, skb_network_header_len(skb));
+ skb_gro_postpull_rcsum(skb, iph, nlen);
pp = ops->callbacks.gro_receive(head, skb);
- skb->csum = csum;
-
out_unlock:
rcu_read_unlock();
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 64d6073731d..45702b8cd14 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -132,7 +132,7 @@ static int ip6_finish_output(struct sk_buff *skb)
return ip6_finish_output2(skb);
}
-int ip6_output(struct sk_buff *skb)
+int ip6_output(struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
@@ -219,7 +219,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
skb->mark = sk->sk_mark;
mtu = dst_mtu(dst);
- if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
+ if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUT, skb->len);
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
@@ -344,12 +344,16 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
{
- if (skb->len <= mtu || skb->local_df)
+ if (skb->len <= mtu)
return false;
+ /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
return true;
+ if (skb->ignore_df)
+ return false;
+
if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
return false;
@@ -367,6 +371,9 @@ int ip6_forward(struct sk_buff *skb)
if (net->ipv6.devconf_all->forwarding == 0)
goto error;
+ if (skb->pkt_type != PACKET_HOST)
+ goto drop;
+
if (skb_warn_if_lro(skb))
goto drop;
@@ -376,9 +383,6 @@ int ip6_forward(struct sk_buff *skb)
goto drop;
}
- if (skb->pkt_type != PACKET_HOST)
- goto drop;
-
skb_forward_csum(skb);
/*
@@ -533,6 +537,20 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from);
}
+static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
+{
+ static u32 ip6_idents_hashrnd __read_mostly;
+ u32 hash, id;
+
+ net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
+
+ hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd);
+ hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash);
+
+ id = ip_idents_reserve(hash, 1);
+ fhdr->identification = htonl(id);
+}
+
int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
{
struct sk_buff *frag;
@@ -555,7 +573,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
/* We must not fragment if the socket is set to force MTU discovery
* or if the skb it not generated by a local socket.
*/
- if (unlikely(!skb->local_df && skb->len > mtu) ||
+ if (unlikely(!skb->ignore_df && skb->len > mtu) ||
(IP6CB(skb)->frag_max_size &&
IP6CB(skb)->frag_max_size > mtu)) {
if (skb->sk && dst_allfrag(skb_dst(skb)))
@@ -1225,13 +1243,15 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
unsigned int maxnonfragsize, headersize;
headersize = sizeof(struct ipv6hdr) +
- (opt ? opt->tot_len : 0) +
+ (opt ? opt->opt_flen + opt->opt_nflen : 0) +
(dst_allfrag(&rt->dst) ?
sizeof(struct frag_hdr) : 0) +
rt->rt6i_nfheader_len;
- maxnonfragsize = (np->pmtudisc >= IPV6_PMTUDISC_DO) ?
- mtu : sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
+ if (ip6_sk_ignore_df(sk))
+ maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
+ else
+ maxnonfragsize = mtu;
/* dontfrag active */
if ((cork->length + length > mtu - headersize) && dontfrag &&
@@ -1538,8 +1558,7 @@ int ip6_push_pending_frames(struct sock *sk)
}
/* Allow local fragmentation. */
- if (np->pmtudisc < IPV6_PMTUDISC_DO)
- skb->local_df = 1;
+ skb->ignore_df = ip6_sk_ignore_df(sk);
*final_dst = fl6->daddr;
__skb_pull(skb, skb_network_header_len(skb));
@@ -1566,8 +1585,8 @@ int ip6_push_pending_frames(struct sock *sk)
if (proto == IPPROTO_ICMPV6) {
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
- ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
+ ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
}
err = ip6_local_out(skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 5db8d310f9c..afa08245836 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -61,6 +61,7 @@
MODULE_AUTHOR("Ville Nuorvala");
MODULE_DESCRIPTION("IPv6 tunneling device");
MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("ip6tnl");
MODULE_ALIAS_NETDEV("ip6tnl0");
#ifdef IP6_TNL_DEBUG
@@ -108,12 +109,12 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
per_cpu_ptr(dev->tstats, i);
do {
- start = u64_stats_fetch_begin_bh(&tstats->syncp);
+ start = u64_stats_fetch_begin_irq(&tstats->syncp);
tmp.rx_packets = tstats->rx_packets;
tmp.rx_bytes = tstats->rx_bytes;
tmp.tx_packets = tstats->tx_packets;
tmp.tx_bytes = tstats->tx_bytes;
- } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
+ } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
sum.rx_packets += tmp.rx_packets;
sum.rx_bytes += tmp.rx_bytes;
@@ -1340,8 +1341,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
int err = 0;
struct ip6_tnl_parm p;
struct __ip6_tnl_parm p1;
- struct ip6_tnl *t = NULL;
- struct net *net = dev_net(dev);
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = t->net;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
switch (cmd) {
@@ -1353,11 +1354,11 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
}
ip6_tnl_parm_from_user(&p1, &p);
t = ip6_tnl_locate(net, &p1, 0);
+ if (t == NULL)
+ t = netdev_priv(dev);
} else {
memset(&p, 0, sizeof(p));
}
- if (t == NULL)
- t = netdev_priv(dev);
ip6_tnl_parm_to_user(&p, &t->parms);
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
err = -EFAULT;
@@ -1502,19 +1503,12 @@ static inline int
ip6_tnl_dev_init_gen(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- int i;
t->dev = dev;
t->net = dev_net(dev);
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
-
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *ip6_tnl_stats;
- ip6_tnl_stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&ip6_tnl_stats->syncp);
- }
return 0;
}
@@ -1564,7 +1558,7 @@ static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[])
{
u8 proto;
- if (!data)
+ if (!data || !data[IFLA_IPTUN_PROTO])
return 0;
proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 2d19272b8ce..9aaa6bb229e 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -278,7 +278,6 @@ static void vti6_dev_uninit(struct net_device *dev)
RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
else
vti6_tnl_unlink(ip6n, t);
- ip6_tnl_dst_reset(t);
dev_put(dev);
}
@@ -288,11 +287,8 @@ static int vti6_rcv(struct sk_buff *skb)
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
rcu_read_lock();
-
if ((t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
&ipv6h->daddr)) != NULL) {
- struct pcpu_sw_netstats *tstats;
-
if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) {
rcu_read_unlock();
goto discard;
@@ -309,27 +305,58 @@ static int vti6_rcv(struct sk_buff *skb)
goto discard;
}
- tstats = this_cpu_ptr(t->dev->tstats);
- u64_stats_update_begin(&tstats->syncp);
- tstats->rx_packets++;
- tstats->rx_bytes += skb->len;
- u64_stats_update_end(&tstats->syncp);
-
- skb->mark = 0;
- secpath_reset(skb);
- skb->dev = t->dev;
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
+ skb->mark = be32_to_cpu(t->parms.i_key);
rcu_read_unlock();
- return 0;
+
+ return xfrm6_rcv(skb);
}
rcu_read_unlock();
- return 1;
-
+ return -EINVAL;
discard:
kfree_skb(skb);
return 0;
}
+static int vti6_rcv_cb(struct sk_buff *skb, int err)
+{
+ unsigned short family;
+ struct net_device *dev;
+ struct pcpu_sw_netstats *tstats;
+ struct xfrm_state *x;
+ struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
+
+ if (!t)
+ return 1;
+
+ dev = t->dev;
+
+ if (err) {
+ dev->stats.rx_errors++;
+ dev->stats.rx_dropped++;
+
+ return 0;
+ }
+
+ x = xfrm_input_state(skb);
+ family = x->inner_mode->afinfo->family;
+
+ if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
+ return -EPERM;
+
+ skb_scrub_packet(skb, !net_eq(t->net, dev_net(skb->dev)));
+ skb->dev = dev;
+
+ tstats = this_cpu_ptr(dev->tstats);
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
+
+ return 0;
+}
+
/**
* vti6_addr_conflict - compare packet addresses to tunnel's own
* @t: the outgoing tunnel device
@@ -349,44 +376,56 @@ vti6_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
}
+static bool vti6_state_check(const struct xfrm_state *x,
+ const struct in6_addr *dst,
+ const struct in6_addr *src)
+{
+ xfrm_address_t *daddr = (xfrm_address_t *)dst;
+ xfrm_address_t *saddr = (xfrm_address_t *)src;
+
+ /* if there is no transform then this tunnel is not functional.
+ * Or if the xfrm is not mode tunnel.
+ */
+ if (!x || x->props.mode != XFRM_MODE_TUNNEL ||
+ x->props.family != AF_INET6)
+ return false;
+
+ if (ipv6_addr_any(dst))
+ return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET6);
+
+ if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET6))
+ return false;
+
+ return true;
+}
+
/**
* vti6_xmit - send a packet
* @skb: the outgoing socket buffer
* @dev: the outgoing tunnel device
+ * @fl: the flow informations for the xfrm_lookup
**/
-static int vti6_xmit(struct sk_buff *skb, struct net_device *dev)
+static int
+vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
{
- struct net *net = dev_net(dev);
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->dev->stats;
- struct dst_entry *dst = NULL, *ndst = NULL;
- struct flowi6 fl6;
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev;
int err = -1;
- if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
- !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h))
- return err;
-
- dst = ip6_tnl_dst_check(t);
- if (!dst) {
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-
- ndst = ip6_route_output(net, NULL, &fl6);
+ if (!dst)
+ goto tx_err_link_failure;
- if (ndst->error)
- goto tx_err_link_failure;
- ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(&fl6), NULL, 0);
- if (IS_ERR(ndst)) {
- err = PTR_ERR(ndst);
- ndst = NULL;
- goto tx_err_link_failure;
- }
- dst = ndst;
+ dst_hold(dst);
+ dst = xfrm_lookup(t->net, dst, fl, NULL, 0);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ dst = NULL;
+ goto tx_err_link_failure;
}
- if (!dst->xfrm || dst->xfrm->props.mode != XFRM_MODE_TUNNEL)
+ if (!vti6_state_check(dst->xfrm, &t->parms.raddr, &t->parms.laddr))
goto tx_err_link_failure;
tdev = dst->dev;
@@ -398,14 +437,21 @@ static int vti6_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_err_dst_release;
}
+ skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
+ skb_dst_set(skb, dst);
+ skb->dev = skb_dst(skb)->dev;
- skb_dst_drop(skb);
- skb_dst_set_noref(skb, dst);
+ err = dst_output(skb);
+ if (net_xmit_eval(err) == 0) {
+ struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
- ip6tunnel_xmit(skb, dev);
- if (ndst) {
- dev->mtu = dst_mtu(ndst);
- ip6_tnl_dst_store(t, ndst);
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->tx_bytes += skb->len;
+ tstats->tx_packets++;
+ u64_stats_update_end(&tstats->syncp);
+ } else {
+ stats->tx_errors++;
+ stats->tx_aborted_errors++;
}
return 0;
@@ -413,7 +459,7 @@ tx_err_link_failure:
stats->tx_carrier_errors++;
dst_link_failure(skb);
tx_err_dst_release:
- dst_release(ndst);
+ dst_release(dst);
return err;
}
@@ -422,16 +468,33 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->dev->stats;
+ struct ipv6hdr *ipv6h;
+ struct flowi fl;
int ret;
+ memset(&fl, 0, sizeof(fl));
+ skb->mark = be32_to_cpu(t->parms.o_key);
+
switch (skb->protocol) {
case htons(ETH_P_IPV6):
- ret = vti6_xmit(skb, dev);
+ ipv6h = ipv6_hdr(skb);
+
+ if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
+ !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h))
+ goto tx_err;
+
+ xfrm_decode_session(skb, &fl, AF_INET6);
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ break;
+ case htons(ETH_P_IP):
+ xfrm_decode_session(skb, &fl, AF_INET);
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
break;
default:
goto tx_err;
}
+ ret = vti6_xmit(skb, dev, &fl);
if (ret < 0)
goto tx_err;
@@ -444,24 +507,69 @@ tx_err:
return NETDEV_TX_OK;
}
+static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ __be32 spi;
+ __u32 mark;
+ struct xfrm_state *x;
+ struct ip6_tnl *t;
+ struct ip_esp_hdr *esph;
+ struct ip_auth_hdr *ah;
+ struct ip_comp_hdr *ipch;
+ struct net *net = dev_net(skb->dev);
+ const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
+ int protocol = iph->nexthdr;
+
+ t = vti6_tnl_lookup(dev_net(skb->dev), &iph->daddr, &iph->saddr);
+ if (!t)
+ return -1;
+
+ mark = be32_to_cpu(t->parms.o_key);
+
+ switch (protocol) {
+ case IPPROTO_ESP:
+ esph = (struct ip_esp_hdr *)(skb->data + offset);
+ spi = esph->spi;
+ break;
+ case IPPROTO_AH:
+ ah = (struct ip_auth_hdr *)(skb->data + offset);
+ spi = ah->spi;
+ break;
+ case IPPROTO_COMP:
+ ipch = (struct ip_comp_hdr *)(skb->data + offset);
+ spi = htonl(ntohs(ipch->cpi));
+ break;
+ default:
+ return 0;
+ }
+
+ if (type != ICMPV6_PKT_TOOBIG &&
+ type != NDISC_REDIRECT)
+ return 0;
+
+ x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr,
+ spi, protocol, AF_INET6);
+ if (!x)
+ return 0;
+
+ if (type == NDISC_REDIRECT)
+ ip6_redirect(skb, net, skb->dev->ifindex, 0);
+ else
+ ip6_update_pmtu(skb, net, info, 0, 0);
+ xfrm_state_put(x);
+
+ return 0;
+}
+
static void vti6_link_config(struct ip6_tnl *t)
{
- struct dst_entry *dst;
struct net_device *dev = t->dev;
struct __ip6_tnl_parm *p = &t->parms;
- struct flowi6 *fl6 = &t->fl.u.ip6;
memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
- /* Set up flowi template */
- fl6->saddr = p->laddr;
- fl6->daddr = p->raddr;
- fl6->flowi6_oif = p->link;
- fl6->flowi6_mark = be32_to_cpu(p->i_key);
- fl6->flowi6_proto = p->proto;
- fl6->flowlabel = 0;
-
p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV |
IP6_TNL_F_CAP_PER_PACKET);
p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
@@ -472,28 +580,6 @@ static void vti6_link_config(struct ip6_tnl *t)
dev->flags &= ~IFF_POINTOPOINT;
dev->iflink = p->link;
-
- if (p->flags & IP6_TNL_F_CAP_XMIT) {
-
- dst = ip6_route_output(dev_net(dev), NULL, fl6);
- if (dst->error)
- return;
-
- dst = xfrm_lookup(dev_net(dev), dst, flowi6_to_flowi(fl6),
- NULL, 0);
- if (IS_ERR(dst))
- return;
-
- if (dst->dev) {
- dev->hard_header_len = dst->dev->hard_header_len;
-
- dev->mtu = dst_mtu(dst);
-
- if (dev->mtu < IPV6_MIN_MTU)
- dev->mtu = IPV6_MIN_MTU;
- }
- dst_release(dst);
- }
}
/**
@@ -709,18 +795,14 @@ static const struct net_device_ops vti6_netdev_ops = {
**/
static void vti6_dev_setup(struct net_device *dev)
{
- struct ip6_tnl *t;
-
dev->netdev_ops = &vti6_netdev_ops;
dev->destructor = vti6_dev_free;
dev->type = ARPHRD_TUNNEL6;
dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr);
dev->mtu = ETH_DATA_LEN;
- t = netdev_priv(dev);
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
}
@@ -731,18 +813,12 @@ static void vti6_dev_setup(struct net_device *dev)
static inline int vti6_dev_init_gen(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- int i;
t->dev = dev;
t->net = dev_net(dev);
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *stats;
- stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&stats->syncp);
- }
return 0;
}
@@ -914,11 +990,6 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = {
.fill_info = vti6_fill_info,
};
-static struct xfrm_tunnel_notifier vti6_handler __read_mostly = {
- .handler = vti6_rcv,
- .priority = 1,
-};
-
static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n)
{
int h;
@@ -990,6 +1061,27 @@ static struct pernet_operations vti6_net_ops = {
.size = sizeof(struct vti6_net),
};
+static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
+ .handler = vti6_rcv,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 100,
+};
+
+static struct xfrm6_protocol vti_ah6_protocol __read_mostly = {
+ .handler = vti6_rcv,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 100,
+};
+
+static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
+ .handler = vti6_rcv,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 100,
+};
+
/**
* vti6_tunnel_init - register protocol and reserve needed resources
*
@@ -1003,11 +1095,30 @@ static int __init vti6_tunnel_init(void)
if (err < 0)
goto out_pernet;
- err = xfrm6_mode_tunnel_input_register(&vti6_handler);
+ err = xfrm6_protocol_register(&vti_esp6_protocol, IPPROTO_ESP);
if (err < 0) {
- pr_err("%s: can't register vti6\n", __func__);
+ pr_err("%s: can't register vti6 protocol\n", __func__);
+
+ goto out;
+ }
+
+ err = xfrm6_protocol_register(&vti_ah6_protocol, IPPROTO_AH);
+ if (err < 0) {
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
+ pr_err("%s: can't register vti6 protocol\n", __func__);
+
goto out;
}
+
+ err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP);
+ if (err < 0) {
+ xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
+ pr_err("%s: can't register vti6 protocol\n", __func__);
+
+ goto out;
+ }
+
err = rtnl_link_register(&vti6_link_ops);
if (err < 0)
goto rtnl_link_failed;
@@ -1015,7 +1126,9 @@ static int __init vti6_tunnel_init(void)
return 0;
rtnl_link_failed:
- xfrm6_mode_tunnel_input_deregister(&vti6_handler);
+ xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP);
+ xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
+ xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
out:
unregister_pernet_device(&vti6_net_ops);
out_pernet:
@@ -1028,8 +1141,12 @@ out_pernet:
static void __exit vti6_tunnel_cleanup(void)
{
rtnl_link_unregister(&vti6_link_ops);
- if (xfrm6_mode_tunnel_input_deregister(&vti6_handler))
- pr_info("%s: can't deregister vti6\n", __func__);
+ if (xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP))
+ pr_info("%s: can't deregister protocol\n", __func__);
+ if (xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH))
+ pr_info("%s: can't deregister protocol\n", __func__);
+ if (xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP))
+ pr_info("%s: can't deregister protocol\n", __func__);
unregister_pernet_device(&vti6_net_ops);
}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 8737400af0a..8250474ab7d 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -700,7 +700,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
struct mr6_table *mrt;
struct flowi6 fl6 = {
.flowi6_oif = dev->ifindex,
- .flowi6_iif = skb->skb_iif,
+ .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
.flowi6_mark = skb->mark,
};
int err;
@@ -1633,7 +1633,7 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
{
struct mr6_table *mrt;
struct flowi6 fl6 = {
- .flowi6_iif = skb->skb_iif,
+ .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
.flowi6_oif = skb->dev->ifindex,
.flowi6_mark = skb->mark,
};
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index da9becb42e8..d1c793cffcb 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -53,7 +53,7 @@
#include <linux/icmpv6.h>
#include <linux/mutex.h>
-static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
@@ -65,19 +65,21 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
- return;
+ return 0;
spi = htonl(ntohs(ipcomph->cpi));
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
spi, IPPROTO_COMP, AF_INET6);
if (!x)
- return;
+ return 0;
if (type == NDISC_REDIRECT)
ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
+
+ return 0;
}
static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
@@ -174,6 +176,11 @@ out:
return err;
}
+static int ipcomp6_rcv_cb(struct sk_buff *skb, int err)
+{
+ return 0;
+}
+
static const struct xfrm_type ipcomp6_type =
{
.description = "IPCOMP6",
@@ -186,11 +193,12 @@ static const struct xfrm_type ipcomp6_type =
.hdr_offset = xfrm6_find_1stfragopt,
};
-static const struct inet6_protocol ipcomp6_protocol =
+static struct xfrm6_protocol ipcomp6_protocol =
{
.handler = xfrm6_rcv,
+ .cb_handler = ipcomp6_rcv_cb,
.err_handler = ipcomp6_err,
- .flags = INET6_PROTO_NOPOLICY,
+ .priority = 0,
};
static int __init ipcomp6_init(void)
@@ -199,7 +207,7 @@ static int __init ipcomp6_init(void)
pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
- if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
+ if (xfrm6_protocol_register(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&ipcomp6_type, AF_INET6);
return -EAGAIN;
@@ -209,7 +217,7 @@ static int __init ipcomp6_init(void)
static void __exit ipcomp6_fini(void)
{
- if (inet6_del_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0)
+ if (xfrm6_protocol_deregister(&ipcomp6_protocol, IPPROTO_COMP) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0)
pr_info("%s: can't remove xfrm type\n", __func__);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 0a00f449de5..edb58aff4ae 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -722,7 +722,7 @@ done:
case IPV6_MTU_DISCOVER:
if (optlen < sizeof(int))
goto e_inval;
- if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_INTERFACE)
+ if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
goto e_inval;
np->pmtudisc = val;
retv = 0;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index e1e47350784..617f0958e16 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1301,8 +1301,17 @@ int igmp6_event_query(struct sk_buff *skb)
len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr);
len -= skb_network_header_len(skb);
- /* Drop queries with not link local source */
- if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
+ /* RFC3810 6.2
+ * Upon reception of an MLD message that contains a Query, the node
+ * checks if the source address of the message is a valid link-local
+ * address, if the Hop Limit is set to 1, and if the Router Alert
+ * option is present in the Hop-By-Hop Options header of the IPv6
+ * packet. If any of these checks fails, the packet is dropped.
+ */
+ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL) ||
+ ipv6_hdr(skb)->hop_limit != 1 ||
+ !(IP6CB(skb)->flags & IP6SKB_ROUTERALERT) ||
+ IP6CB(skb)->ra != htons(IPV6_OPT_ROUTERALERT_MLD))
return -EINVAL;
idev = __in6_dev_get(skb->dev);
@@ -1620,11 +1629,12 @@ static void mld_sendpack(struct sk_buff *skb)
dst_output);
out:
if (!err) {
- ICMP6MSGOUT_INC_STATS_BH(net, idev, ICMPV6_MLD2_REPORT);
- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
- IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
- } else
- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS);
+ ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
+ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
+ } else {
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
+ }
rcu_read_unlock();
return;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 09a22f4f36c..ca8d4ea48a5 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -851,7 +851,7 @@ out:
static void ndisc_recv_na(struct sk_buff *skb)
{
struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
- const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
+ struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
u8 *lladdr = NULL;
u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
@@ -944,10 +944,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
/*
* Change: router to host
*/
- struct rt6_info *rt;
- rt = rt6_get_dflt_router(saddr, dev);
- if (rt)
- ip6_del_rt(rt);
+ rt6_clean_tohost(dev_net(dev), saddr);
}
out:
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 95f3f1da0d7..d38e6a8d8b9 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -30,13 +30,15 @@ int ip6_route_me_harder(struct sk_buff *skb)
.daddr = iph->daddr,
.saddr = iph->saddr,
};
+ int err;
dst = ip6_route_output(net, skb->sk, &fl6);
- if (dst->error) {
+ err = dst->error;
+ if (err) {
IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
dst_release(dst);
- return dst->error;
+ return err;
}
/* Drop old route. */
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 710238f58aa..e080fbbbc0e 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1241,8 +1241,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
xt_free_table_info(oldinfo);
if (copy_to_user(counters_ptr, counters,
- sizeof(struct xt_counters) * num_counters) != 0)
- ret = -EFAULT;
+ sizeof(struct xt_counters) * num_counters) != 0) {
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n");
+ }
vfree(counters);
xt_table_unlock(t);
return ret;
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index e0983f3648a..790e0c6b19e 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -33,6 +33,7 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,
struct ipv6hdr *iph = ipv6_hdr(skb);
bool ret = false;
struct flowi6 fl6 = {
+ .flowi6_iif = LOOPBACK_IFINDEX,
.flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
.flowi6_proto = iph->nexthdr,
.daddr = iph->saddr,
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 84c7f33d0cf..387d8b8fc18 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -90,17 +90,9 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
if (nf_ct_is_untracked(ct))
return NF_ACCEPT;
- nat = nfct_nat(ct);
- if (!nat) {
- /* NAT module was loaded late. */
- if (nf_ct_is_confirmed(ct))
- return NF_ACCEPT;
- nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
- if (nat == NULL) {
- pr_debug("failed to add NAT extension\n");
- return NF_ACCEPT;
- }
- }
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
switch (ctinfo) {
case IP_CT_RELATED:
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 767ab8da821..0d5279fd852 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -451,7 +451,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
}
sub_frag_mem_limit(&fq->q, head->truesize);
- head->local_df = 1;
+ head->ignore_df = 1;
head->next = NULL;
head->dev = dev;
head->tstamp = fq->q.stamp;
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 9c3297a768f..d189fcb437f 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -47,15 +47,9 @@ static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
if (ct == NULL || nf_ct_is_untracked(ct))
return NF_ACCEPT;
- nat = nfct_nat(ct);
- if (nat == NULL) {
- /* Conntrack module was loaded late, can't add extension. */
- if (nf_ct_is_confirmed(ct))
- return NF_ACCEPT;
- nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
- if (nat == NULL)
- return NF_ACCEPT;
- }
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
switch (ctinfo) {
case IP_CT_RELATED:
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 827f795209c..5ec867e4a8b 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -6,35 +6,7 @@
#include <net/ipv6.h>
#include <net/ip6_fib.h>
#include <net/addrconf.h>
-
-void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
-{
- static atomic_t ipv6_fragmentation_id;
- int old, new;
-
-#if IS_ENABLED(CONFIG_IPV6)
- if (rt && !(rt->dst.flags & DST_NOPEER)) {
- struct inet_peer *peer;
- struct net *net;
-
- net = dev_net(rt->dst.dev);
- peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
- if (peer) {
- fhdr->identification = htonl(inet_getid(peer, 0));
- inet_putpeer(peer);
- return;
- }
- }
-#endif
- do {
- old = atomic_read(&ipv6_fragmentation_id);
- new = old + 1;
- if (!new)
- new = 1;
- } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
- fhdr->identification = htonl(new);
-}
-EXPORT_SYMBOL(ipv6_select_ident);
+#include <net/secure_seq.h>
int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
{
@@ -106,6 +78,7 @@ int __ip6_local_out(struct sk_buff *skb)
if (len > IPV6_MAXPLEN)
len = 0;
ipv6_hdr(skb)->payload_len = htons(len);
+ IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
skb_dst(skb)->dev, dst_output);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 587bbdcb22b..5b7a1ed2aba 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -51,7 +51,6 @@ static struct inet_protosw pingv6_protosw = {
.protocol = IPPROTO_ICMPV6,
.prot = &pingv6_prot,
.ops = &inet6_dgram_ops,
- .no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_REUSE,
};
@@ -168,12 +167,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
pfh.wcheck = 0;
pfh.family = AF_INET6;
- if (ipv6_addr_is_multicast(&fl6.daddr))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = ip6_dst_hoplimit(dst);
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
lock_sock(sk);
err = ip6_append_data(sk, ping_getfrag, &pfh, len,
@@ -182,8 +176,8 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
MSG_DONTWAIT, np->dontfrag);
if (err) {
- ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev,
- ICMP6_MIB_OUTERRORS);
+ ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev,
+ ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
err = icmpv6_push_pending_frames(sk, &fl6,
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 091d066a57b..3317440ea34 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -186,7 +186,7 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib)
/* can be called either with percpu mib (pcpumib != NULL),
* or shared one (smib != NULL)
*/
-static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **pcpumib,
+static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib,
atomic_long_t *smib,
const struct snmp_mib *itemlist)
{
@@ -201,7 +201,7 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **pcpumib,
}
}
-static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib,
+static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib,
const struct snmp_mib *itemlist, size_t syncpoff)
{
int i;
@@ -215,14 +215,14 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)
{
struct net *net = (struct net *)seq->private;
- snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics,
+ snmp6_seq_show_item64(seq, net->mib.ipv6_statistics,
snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
- snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics,
+ snmp6_seq_show_item(seq, net->mib.icmpv6_statistics,
NULL, snmp6_icmp6_list);
snmp6_seq_show_icmpv6msg(seq, net->mib.icmpv6msg_statistics->mibs);
- snmp6_seq_show_item(seq, (void __percpu **)net->mib.udp_stats_in6,
+ snmp6_seq_show_item(seq, net->mib.udp_stats_in6,
NULL, snmp6_udp6_list);
- snmp6_seq_show_item(seq, (void __percpu **)net->mib.udplite_stats_in6,
+ snmp6_seq_show_item(seq, net->mib.udplite_stats_in6,
NULL, snmp6_udplite6_list);
return 0;
}
@@ -245,7 +245,7 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v)
struct inet6_dev *idev = (struct inet6_dev *)seq->private;
seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
- snmp6_seq_show_item64(seq, (void __percpu **)idev->stats.ipv6,
+ snmp6_seq_show_item64(seq, idev->stats.ipv6,
snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs,
snmp6_icmp6_list);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 1f29996e368..b2dc60b0c76 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -873,14 +873,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
err = PTR_ERR(dst);
goto out;
}
- if (hlimit < 0) {
- if (ipv6_addr_is_multicast(&fl6.daddr))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = ip6_dst_hoplimit(dst);
- }
+ if (hlimit < 0)
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
if (tclass < 0)
tclass = np->tclass;
@@ -1328,7 +1322,6 @@ static struct inet_protosw rawv6_protosw = {
.protocol = IPPROTO_IP, /* wild card */
.prot = &rawv6_prot,
.ops = &inet6_sockraw_ops,
- .no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_REUSE,
};
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fba54a407bb..f23fbd28a50 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -84,9 +84,9 @@ static void ip6_dst_ifdown(struct dst_entry *,
static int ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
-static int ip6_pkt_discard_out(struct sk_buff *skb);
+static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
static int ip6_pkt_prohibit(struct sk_buff *skb);
-static int ip6_pkt_prohibit_out(struct sk_buff *skb);
+static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
static void ip6_link_failure(struct sk_buff *skb);
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
@@ -149,7 +149,8 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
unsigned long prev, new;
p = peer->metrics;
- if (inet_metrics_new(peer))
+ if (inet_metrics_new(peer) ||
+ (old & DST_METRICS_FORCE_OVERWRITE))
memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
new = (unsigned long) p;
@@ -289,7 +290,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
.obsolete = DST_OBSOLETE_FORCE_CHK,
.error = -EINVAL,
.input = dst_discard,
- .output = dst_discard,
+ .output = dst_discard_sk,
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
.rt6i_protocol = RTPROT_KERNEL,
@@ -373,12 +374,6 @@ static bool rt6_check_expired(const struct rt6_info *rt)
return false;
}
-static bool rt6_need_strict(const struct in6_addr *daddr)
-{
- return ipv6_addr_type(daddr) &
- (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
-}
-
/* Multipath route selection:
* Hash based function using packet header and flowlabel.
* Adapted from fib_info_hashfn()
@@ -857,14 +852,15 @@ EXPORT_SYMBOL(rt6_lookup);
be destroyed.
*/
-static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
+static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
+ struct nlattr *mx, int mx_len)
{
int err;
struct fib6_table *table;
table = rt->rt6i_table;
write_lock_bh(&table->tb6_lock);
- err = fib6_add(&table->tb6_root, rt, info);
+ err = fib6_add(&table->tb6_root, rt, info, mx, mx_len);
write_unlock_bh(&table->tb6_lock);
return err;
@@ -875,7 +871,7 @@ int ip6_ins_rt(struct rt6_info *rt)
struct nl_info info = {
.nl_net = dev_net(rt->dst.dev),
};
- return __ip6_ins_rt(rt, &info);
+ return __ip6_ins_rt(rt, &info, NULL, 0);
}
static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
@@ -1062,7 +1058,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
new->__use = 1;
new->input = dst_discard;
- new->output = dst_discard;
+ new->output = dst_discard_sk;
if (dst_metrics_read_only(&ort->dst))
new->_metrics = ort->dst._metrics;
@@ -1180,7 +1176,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = oif;
- fl6.flowi6_mark = mark;
+ fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
fl6.daddr = iph->daddr;
fl6.saddr = iph->saddr;
fl6.flowlabel = ip6_flowinfo(iph);
@@ -1277,6 +1273,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
struct flowi6 fl6;
memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_iif = LOOPBACK_IFINDEX;
fl6.flowi6_oif = oif;
fl6.flowi6_mark = mark;
fl6.daddr = iph->daddr;
@@ -1298,6 +1295,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
struct flowi6 fl6;
memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_iif = LOOPBACK_IFINDEX;
fl6.flowi6_oif = oif;
fl6.flowi6_mark = mark;
fl6.daddr = msg->dest;
@@ -1342,7 +1340,7 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
if (mtu)
- return mtu;
+ goto out;
mtu = IPV6_MIN_MTU;
@@ -1352,7 +1350,8 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
mtu = idev->cnf.mtu6;
rcu_read_unlock();
- return mtu;
+out:
+ return min_t(unsigned int, mtu, IP6_MAX_MTU);
}
static struct dst_entry *icmp6_dst_gc_list;
@@ -1456,7 +1455,7 @@ static int ip6_dst_gc(struct dst_ops *ops)
goto out;
net->ipv6.ip6_rt_gc_expire++;
- fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
+ fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
entries = dst_entries_get_slow(ops);
if (entries < ops->gc_thresh)
net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
@@ -1543,17 +1542,11 @@ int ip6_route_add(struct fib6_config *cfg)
ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
rt->rt6i_dst.plen = cfg->fc_dst_len;
- if (rt->rt6i_dst.plen == 128)
- rt->dst.flags |= DST_HOST;
-
- if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
- u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
- if (!metrics) {
- err = -ENOMEM;
- goto out;
- }
- dst_init_metrics(&rt->dst, metrics, 0);
+ if (rt->rt6i_dst.plen == 128) {
+ rt->dst.flags |= DST_HOST;
+ dst_metrics_set_force_overwrite(&rt->dst);
}
+
#ifdef CONFIG_IPV6_SUBTREES
ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
rt->rt6i_src.plen = cfg->fc_src_len;
@@ -1586,7 +1579,7 @@ int ip6_route_add(struct fib6_config *cfg)
switch (cfg->fc_type) {
case RTN_BLACKHOLE:
rt->dst.error = -EINVAL;
- rt->dst.output = dst_discard;
+ rt->dst.output = dst_discard_sk;
rt->dst.input = dst_discard;
break;
case RTN_PROHIBIT:
@@ -1672,31 +1665,13 @@ int ip6_route_add(struct fib6_config *cfg)
rt->rt6i_flags = cfg->fc_flags;
install_route:
- if (cfg->fc_mx) {
- struct nlattr *nla;
- int remaining;
-
- nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
- int type = nla_type(nla);
-
- if (type) {
- if (type > RTAX_MAX) {
- err = -EINVAL;
- goto out;
- }
-
- dst_metric_set(&rt->dst, type, nla_get_u32(nla));
- }
- }
- }
-
rt->dst.dev = dev;
rt->rt6i_idev = idev;
rt->rt6i_table = table;
cfg->fc_nlinfo.nl_net = dev_net(dev);
- return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
+ return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len);
out:
if (dev)
@@ -2156,7 +2131,7 @@ static int ip6_pkt_discard(struct sk_buff *skb)
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
}
-static int ip6_pkt_discard_out(struct sk_buff *skb)
+static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
@@ -2167,7 +2142,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb)
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
}
-static int ip6_pkt_prohibit_out(struct sk_buff *skb)
+static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
@@ -2259,6 +2234,27 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
fib6_clean_all(net, fib6_remove_prefsrc, &adni);
}
+#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
+#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
+
+/* Remove routers and update dst entries when gateway turn into host. */
+static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
+{
+ struct in6_addr *gateway = (struct in6_addr *)arg;
+
+ if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
+ ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
+ ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
+ return -1;
+ }
+ return 0;
+}
+
+void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
+{
+ fib6_clean_all(net, fib6_clean_tohost, gateway);
+}
+
struct arg_dev_net {
struct net_device *dev;
struct net *net;
@@ -2734,6 +2730,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
if (tb[RTA_OIF])
oif = nla_get_u32(tb[RTA_OIF]);
+ if (tb[RTA_MARK])
+ fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
+
if (iif) {
struct net_device *dev;
int flags = 0;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index b4d74c86586..4f408176dc6 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -560,12 +560,12 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->dev->ifindex, 0, IPPROTO_IPV6, 0);
+ t->parms.link, 0, IPPROTO_IPV6, 0);
err = 0;
goto out;
}
if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
+ ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
IPPROTO_IPV6, 0);
err = 0;
goto out;
@@ -974,8 +974,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
goto out;
}
- err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos,
- ttl, df, !net_eq(tunnel->net, dev_net(dev)));
+ err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr,
+ IPPROTO_IPV6, tos, ttl, df,
+ !net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return NETDEV_TX_OK;
@@ -1126,8 +1127,8 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
int err = 0;
struct ip_tunnel_parm p;
struct ip_tunnel_prl prl;
- struct ip_tunnel *t;
- struct net *net = dev_net(dev);
+ struct ip_tunnel *t = netdev_priv(dev);
+ struct net *net = t->net;
struct sit_net *sitn = net_generic(net, sit_net_id);
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel_6rd ip6rd;
@@ -1138,16 +1139,15 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
#ifdef CONFIG_IPV6_SIT_6RD
case SIOCGET6RD:
#endif
- t = NULL;
if (dev == sitn->fb_tunnel_dev) {
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
err = -EFAULT;
break;
}
t = ipip6_tunnel_locate(net, &p, 0);
+ if (t == NULL)
+ t = netdev_priv(dev);
}
- if (t == NULL)
- t = netdev_priv(dev);
err = -EFAULT;
if (cmd == SIOCGETTUNNEL) {
@@ -1243,9 +1243,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
err = -EINVAL;
if (dev == sitn->fb_tunnel_dev)
goto done;
- err = -ENOENT;
- if (!(t = netdev_priv(dev)))
- goto done;
err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data);
break;
@@ -1261,9 +1258,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
err = -EFAULT;
if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl)))
goto done;
- err = -ENOENT;
- if (!(t = netdev_priv(dev)))
- goto done;
switch (cmd) {
case SIOCDELPRL:
@@ -1291,8 +1285,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
sizeof(ip6rd)))
goto done;
- t = netdev_priv(dev);
-
if (cmd != SIOCDEL6RD) {
err = ipip6_tunnel_update_6rd(t, &ip6rd);
if (err < 0)
@@ -1363,7 +1355,6 @@ static void ipip6_tunnel_setup(struct net_device *dev)
static int ipip6_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- int i;
tunnel->dev = dev;
tunnel->net = dev_net(dev);
@@ -1372,16 +1363,10 @@ static int ipip6_tunnel_init(struct net_device *dev)
memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
ipip6_tunnel_bind_dev(dev);
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *ipip6_tunnel_stats;
- ipip6_tunnel_stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&ipip6_tunnel_stats->syncp);
- }
-
tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
if (!tunnel->dst_cache) {
free_percpu(dev->tstats);
@@ -1397,7 +1382,6 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
struct iphdr *iph = &tunnel->parms.iph;
struct net *net = dev_net(dev);
struct sit_net *sitn = net_generic(net, sit_net_id);
- int i;
tunnel->dev = dev;
tunnel->net = dev_net(dev);
@@ -1408,16 +1392,10 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
iph->ihl = 5;
iph->ttl = 64;
- dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *ipip6_fb_stats;
- ipip6_fb_stats = per_cpu_ptr(dev->tstats, i);
- u64_stats_init(&ipip6_fb_stats->syncp);
- }
-
tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
if (!tunnel->dst_cache) {
free_percpu(dev->tstats);
@@ -1850,4 +1828,5 @@ xfrm_tunnel_failed:
module_init(sit_init);
module_exit(sit_cleanup);
MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("sit");
MODULE_ALIAS_NETDEV("sit0");
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bb53a5e73c1..a822b880689 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -216,6 +216,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq->ir_iif = inet6_iif(skb);
+ ireq->ir_mark = inet_request_mark(sk, skb);
+
req->expires = 0UL;
req->num_retrans = 0;
ireq->ecn_ok = ecn_ok;
@@ -242,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
final_p = fl6_update_dst(&fl6, np->opt, &final);
fl6.saddr = ireq->ir_v6_loc_addr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
- fl6.flowi6_mark = sk->sk_mark;
+ fl6.flowi6_mark = ireq->ir_mark;
fl6.fl6_dport = ireq->ir_rmt_port;
fl6.fl6_sport = inet_sk(sk)->inet_sport;
security_req_classify_flow(req, flowi6_to_flowi(&fl6));
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 7f405a16882..058f3eca2e5 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -38,6 +38,13 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "fwmark_reflect",
+ .data = &init_net.ipv6.sysctl.fwmark_reflect,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{ }
};
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 889079b2ea8..229239ad96b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -39,7 +39,7 @@
#include <linux/ipsec.h>
#include <linux/times.h>
#include <linux/slab.h>
-
+#include <linux/uaccess.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/random.h>
@@ -65,8 +65,6 @@
#include <net/tcp_memcontrol.h>
#include <net/busy_poll.h>
-#include <asm/uaccess.h>
-
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -342,7 +340,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct sock *sk;
int err;
struct tcp_sock *tp;
- __u32 seq;
+ struct request_sock *fastopen;
+ __u32 seq, snd_una;
struct net *net = dev_net(skb->dev);
sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
@@ -373,8 +372,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
tp = tcp_sk(sk);
seq = ntohl(th->seq);
+ /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
+ fastopen = tp->fastopen_rsk;
+ snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
if (sk->sk_state != TCP_LISTEN &&
- !between(seq, tp->snd_una, tp->snd_nxt)) {
+ !between(seq, snd_una, tp->snd_nxt)) {
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -438,8 +440,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
goto out;
case TCP_SYN_SENT:
- case TCP_SYN_RECV: /* Cannot happen.
- It can, it SYNs are crossed. --ANK */
+ case TCP_SYN_RECV:
+ /* Only in fast or simultaneous open. If a fast open socket is
+ * is already accepted it is treated as a connected one below.
+ */
+ if (fastopen && fastopen->sk == NULL)
+ break;
+
if (!sock_owned_by_user(sk)) {
sk->sk_err = err;
sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
@@ -465,7 +472,8 @@ out:
static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
struct flowi6 *fl6,
struct request_sock *req,
- u16 queue_mapping)
+ u16 queue_mapping,
+ struct tcp_fastopen_cookie *foc)
{
struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -476,7 +484,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
goto done;
- skb = tcp_make_synack(sk, dst, req, NULL);
+ skb = tcp_make_synack(sk, dst, req, foc);
if (skb) {
__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
@@ -500,9 +508,11 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
struct flowi6 fl6;
int res;
- res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0);
- if (!res)
+ res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0, NULL);
+ if (!res) {
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+ }
return res;
}
@@ -530,8 +540,8 @@ static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr);
}
-static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
- int optlen)
+static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval,
+ int optlen)
{
struct tcp_md5sig cmd;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
@@ -715,7 +725,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
.send_ack = tcp_v6_reqsk_send_ack,
.destructor = tcp_v6_reqsk_destructor,
.send_reset = tcp_v6_send_reset,
- .syn_ack_timeout = tcp_syn_ack_timeout,
+ .syn_ack_timeout = tcp_syn_ack_timeout,
};
#ifdef CONFIG_TCP_MD5SIG
@@ -726,7 +736,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
#endif
static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
- u32 tsval, u32 tsecr,
+ u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key, int rst, u8 tclass,
u32 label)
{
@@ -798,8 +808,11 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
fl6.flowi6_proto = IPPROTO_TCP;
- if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
+ if (rt6_need_strict(&fl6.daddr) && !oif)
fl6.flowi6_oif = inet6_iif(skb);
+ else
+ fl6.flowi6_oif = oif;
+ fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
fl6.fl6_dport = t1->dest;
fl6.fl6_sport = t1->source;
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
@@ -833,6 +846,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
int genhash;
struct sock *sk1 = NULL;
#endif
+ int oif;
if (th->rst)
return;
@@ -876,7 +890,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
(th->doff << 2);
- tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0, 0);
+ oif = sk ? sk->sk_bound_dev_if : 0;
+ tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
#ifdef CONFIG_TCP_MD5SIG
release_sk1:
@@ -888,11 +903,11 @@ release_sk1:
}
static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
- u32 win, u32 tsval, u32 tsecr,
+ u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key, u8 tclass,
u32 label)
{
- tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass,
+ tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass,
label);
}
@@ -904,7 +919,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp + tcptw->tw_ts_offset,
- tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
+ tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
tw->tw_tclass, (tw->tw_flowlabel << 12));
inet_twsk_put(tw);
@@ -913,8 +928,13 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
- tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1,
- req->rcv_wnd, tcp_time_stamp, req->ts_recent,
+ /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
+ * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
+ */
+ tcp_v6_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
+ tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
+ tcp_rsk(req)->rcv_nxt,
+ req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
0, 0);
}
@@ -965,8 +985,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
__u32 isn = TCP_SKB_CB(skb)->when;
struct dst_entry *dst = NULL;
+ struct tcp_fastopen_cookie foc = { .len = -1 };
+ bool want_cookie = false, fastopen;
struct flowi6 fl6;
- bool want_cookie = false;
+ int err;
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_conn_request(sk, skb);
@@ -997,7 +1019,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, 0, NULL);
+ tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
if (want_cookie && !tmp_opt.saw_tstamp)
tcp_clear_options(&tmp_opt);
@@ -1012,6 +1034,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
TCP_ECN_create_request(req, skb, sock_net(sk));
ireq->ir_iif = sk->sk_bound_dev_if;
+ ireq->ir_mark = inet_request_mark(sk, skb);
/* So that link locals have meaning */
if (!sk->sk_bound_dev_if &&
@@ -1070,19 +1093,27 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
isn = tcp_v6_init_sequence(skb);
}
have_isn:
- tcp_rsk(req)->snt_isn = isn;
if (security_inet_conn_request(sk, skb, req))
goto drop_and_release;
- if (tcp_v6_send_synack(sk, dst, &fl6, req,
- skb_get_queue_mapping(skb)) ||
- want_cookie)
+ if (!dst && (dst = inet6_csk_route_req(sk, &fl6, req)) == NULL)
goto drop_and_free;
+ tcp_rsk(req)->snt_isn = isn;
tcp_rsk(req)->snt_synack = tcp_time_stamp;
- tcp_rsk(req)->listener = NULL;
- inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+ tcp_openreq_init_rwin(req, sk, dst);
+ fastopen = !want_cookie &&
+ tcp_try_fastopen(sk, skb, req, &foc, dst);
+ err = tcp_v6_send_synack(sk, dst, &fl6, req,
+ skb_get_queue_mapping(skb), &foc);
+ if (!fastopen) {
+ if (err || want_cookie)
+ goto drop_and_free;
+
+ tcp_rsk(req)->listener = NULL;
+ inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+ }
return 0;
drop_and_release:
@@ -1259,7 +1290,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
#ifdef CONFIG_TCP_MD5SIG
/* Copy over the MD5 key from the original socket */
- if ((key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr)) != NULL) {
+ key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
+ if (key != NULL) {
/* We're using one, so create a matching key
* on the newsk structure. If we fail to get
* memory, then we end up not copying the key
@@ -1289,26 +1321,6 @@ out:
return NULL;
}
-static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
-{
- if (skb->ip_summed == CHECKSUM_COMPLETE) {
- if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr, skb->csum)) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- return 0;
- }
- }
-
- skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
- &ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr, 0));
-
- if (skb->len <= 76) {
- return __skb_checksum_complete(skb);
- }
- return 0;
-}
-
/* The socket must have it's spinlock held when we get
* here.
*
@@ -1335,7 +1347,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
return tcp_v4_do_rcv(sk, skb);
#ifdef CONFIG_TCP_MD5SIG
- if (tcp_v6_inbound_md5_hash (sk, skb))
+ if (tcp_v6_inbound_md5_hash(sk, skb))
goto discard;
#endif
@@ -1482,7 +1494,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, th->doff*4))
goto discard_it;
- if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
+ if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
goto csum_error;
th = tcp_hdr(skb);
@@ -1602,7 +1614,8 @@ do_time_wait:
break;
case TCP_TW_RST:
goto no_tcp_socket;
- case TCP_TW_SUCCESS:;
+ case TCP_TW_SUCCESS:
+ ;
}
goto discard_it;
}
@@ -1647,7 +1660,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp6_timewait_sock),
.twsk_unique = tcp_twsk_unique,
- .twsk_destructor= tcp_twsk_destructor,
+ .twsk_destructor = tcp_twsk_destructor,
};
static const struct inet_connection_sock_af_ops ipv6_specific = {
@@ -1681,7 +1694,6 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
/*
* TCP over IPv4 via INET6 API
*/
-
static const struct inet_connection_sock_af_ops ipv6_mapped = {
.queue_xmit = ip_queue_xmit,
.send_check = tcp_v4_send_check,
@@ -1775,6 +1787,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
const struct inet_sock *inet = inet_sk(sp);
const struct tcp_sock *tp = tcp_sk(sp);
const struct inet_connection_sock *icsk = inet_csk(sp);
+ struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
dest = &sp->sk_v6_daddr;
src = &sp->sk_v6_rcv_saddr;
@@ -1817,7 +1830,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
tp->snd_cwnd,
- tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
+ sp->sk_state == TCP_LISTEN ?
+ (fastopenq ? fastopenq->max_qlen : 0) :
+ (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
);
}
@@ -1977,7 +1992,6 @@ static struct inet_protosw tcpv6_protosw = {
.protocol = IPPROTO_TCP,
.prot = &tcpv6_prot,
.ops = &inet6_stream_ops,
- .no_check = 0,
.flags = INET_PROTOSW_PERMANENT |
INET_PROTOSW_ICSK,
};
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 0d78132ff18..01b0ff9a0c2 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -42,7 +42,7 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
if (NAPI_GRO_CB(skb)->flush)
goto skip_csum;
- wsum = skb->csum;
+ wsum = NAPI_GRO_CB(skb)->csum;
switch (skb->ip_summed) {
case CHECKSUM_NONE:
@@ -73,7 +73,7 @@ static int tcp6_gro_complete(struct sk_buff *skb, int thoff)
th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr,
&iph->daddr, 0);
- skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
return tcp_gro_complete(skb);
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1e586d92260..7092ff78fd8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -634,6 +634,10 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) {
int ret;
+ /* Verify checksum before giving to encap */
+ if (udp_lib_checksum_complete(skb))
+ goto csum_error;
+
ret = encap_rcv(sk, skb);
if (ret <= 0) {
UDP_INC_STATS_BH(sock_net(sk),
@@ -670,8 +674,11 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto csum_error;
}
- if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf))
+ if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
+ UDP6_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_RCVBUFERRORS, is_udplite);
goto drop;
+ }
skb_dst_drop(skb);
@@ -686,6 +693,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
bh_unlock_sock(sk);
return rc;
+
csum_error:
UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
drop:
@@ -701,17 +709,16 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
int dif)
{
struct hlist_nulls_node *node;
- struct sock *s = sk;
unsigned short num = ntohs(loc_port);
- sk_nulls_for_each_from(s, node) {
- struct inet_sock *inet = inet_sk(s);
+ sk_nulls_for_each_from(sk, node) {
+ struct inet_sock *inet = inet_sk(sk);
- if (!net_eq(sock_net(s), net))
+ if (!net_eq(sock_net(sk), net))
continue;
- if (udp_sk(s)->udp_port_hash == num &&
- s->sk_family == PF_INET6) {
+ if (udp_sk(sk)->udp_port_hash == num &&
+ sk->sk_family == PF_INET6) {
if (inet->inet_dport) {
if (inet->inet_dport != rmt_port)
continue;
@@ -720,16 +727,16 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
!ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
continue;
- if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)
+ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
continue;
if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
continue;
}
- if (!inet6_mc_check(s, loc_addr, rmt_addr))
+ if (!inet6_mc_check(sk, loc_addr, rmt_addr))
continue;
- return s;
+ return sk;
}
}
return NULL;
@@ -760,6 +767,17 @@ static void flush_stack(struct sock **stack, unsigned int count,
if (unlikely(skb1))
kfree_skb(skb1);
}
+
+static void udp6_csum_zero_error(struct sk_buff *skb)
+{
+ /* RFC 2460 section 8.1 says that we SHOULD log
+ * this error. Well, it is reasonable.
+ */
+ LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
+ &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source),
+ &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest));
+}
+
/*
* Note: called only from the BH handler context,
* so we don't need to lock the hashes.
@@ -779,7 +797,12 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
dif = inet6_iif(skb);
sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
while (sk) {
- stack[count++] = sk;
+ /* If zero checksum and no_check is not on for
+ * the socket then skip it.
+ */
+ if (uh->check || udp_sk(sk)->no_check6_rx)
+ stack[count++] = sk;
+
sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
uh->source, saddr, dif);
if (unlikely(count == ARRAY_SIZE(stack))) {
@@ -867,6 +890,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (sk != NULL) {
int ret;
+ if (!uh->check && !udp_sk(sk)->no_check6_rx) {
+ sock_put(sk);
+ udp6_csum_zero_error(skb);
+ goto csum_error;
+ }
+
ret = udpv6_queue_rcv_skb(sk, skb);
sock_put(sk);
@@ -879,6 +908,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
return 0;
}
+ if (!uh->check) {
+ udp6_csum_zero_error(skb);
+ goto csum_error;
+ }
+
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard;
@@ -1006,7 +1040,10 @@ static int udp_v6_push_pending_frames(struct sock *sk)
if (is_udplite)
csum = udplite_csum_outgoing(sk, skb);
- else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
+ else if (up->no_check6_tx) { /* UDP csum disabled */
+ skb->ip_summed = CHECKSUM_NONE;
+ goto send;
+ } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr,
up->len);
goto send;
@@ -1232,14 +1269,8 @@ do_udp_sendmsg:
goto out;
}
- if (hlimit < 0) {
- if (ipv6_addr_is_multicast(&fl6.daddr))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = ip6_dst_hoplimit(dst);
- }
+ if (hlimit < 0)
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
if (tclass < 0)
tclass = np->tclass;
@@ -1479,7 +1510,6 @@ static struct inet_protosw udpv6_protosw = {
.protocol = IPPROTO_UDP,
.prot = &udpv6_prot,
.ops = &inet6_dgram_ops,
- .no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_PERMANENT,
};
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index b261ee8b83f..0ae3d98f83e 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -63,7 +63,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
if (unlikely(type & ~(SKB_GSO_UDP |
SKB_GSO_DODGY |
SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM |
SKB_GSO_GRE |
+ SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP |
SKB_GSO_SIT |
SKB_GSO_MPLS) ||
@@ -76,7 +78,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
goto out;
}
- if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL)
+ if (skb->encapsulation && skb_shinfo(skb)->gso_type &
+ (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
segs = skb_udp_tunnel_segment(skb, features);
else {
/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index dfcc4be4689..9cf097e206e 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -64,7 +64,6 @@ static struct inet_protosw udplite6_protosw = {
.protocol = IPPROTO_UDPLITE,
.prot = &udplitev6_prot,
.ops = &inet6_dgram_ops,
- .no_check = 0,
.flags = INET_PROTOSW_PERMANENT,
};
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index cb04f7a16b5..901ef6f8add 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -18,65 +18,6 @@
#include <net/ipv6.h>
#include <net/xfrm.h>
-/* Informational hook. The decap is still done here. */
-static struct xfrm_tunnel_notifier __rcu *rcv_notify_handlers __read_mostly;
-static DEFINE_MUTEX(xfrm6_mode_tunnel_input_mutex);
-
-int xfrm6_mode_tunnel_input_register(struct xfrm_tunnel_notifier *handler)
-{
- struct xfrm_tunnel_notifier __rcu **pprev;
- struct xfrm_tunnel_notifier *t;
- int ret = -EEXIST;
- int priority = handler->priority;
-
- mutex_lock(&xfrm6_mode_tunnel_input_mutex);
-
- for (pprev = &rcv_notify_handlers;
- (t = rcu_dereference_protected(*pprev,
- lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL;
- pprev = &t->next) {
- if (t->priority > priority)
- break;
- if (t->priority == priority)
- goto err;
-
- }
-
- handler->next = *pprev;
- rcu_assign_pointer(*pprev, handler);
-
- ret = 0;
-
-err:
- mutex_unlock(&xfrm6_mode_tunnel_input_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_register);
-
-int xfrm6_mode_tunnel_input_deregister(struct xfrm_tunnel_notifier *handler)
-{
- struct xfrm_tunnel_notifier __rcu **pprev;
- struct xfrm_tunnel_notifier *t;
- int ret = -ENOENT;
-
- mutex_lock(&xfrm6_mode_tunnel_input_mutex);
- for (pprev = &rcv_notify_handlers;
- (t = rcu_dereference_protected(*pprev,
- lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL;
- pprev = &t->next) {
- if (t == handler) {
- *pprev = handler->next;
- ret = 0;
- break;
- }
- }
- mutex_unlock(&xfrm6_mode_tunnel_input_mutex);
- synchronize_net();
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_deregister);
-
static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
{
const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
@@ -130,7 +71,6 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
{
- struct xfrm_tunnel_notifier *handler;
int err = -EINVAL;
if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
@@ -138,9 +78,6 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto out;
- for_each_input_rcu(rcv_notify_handlers, handler)
- handler->handler(skb);
-
err = skb_unclone(skb, GFP_ATOMIC);
if (err)
goto out;
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 6cd625e3770..433672d07d0 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -78,7 +78,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- if (!skb->local_df && skb->len > mtu) {
+ if (!skb->ignore_df && skb->len > mtu) {
skb->dev = dst->dev;
if (xfrm6_local_dontfrag(skb))
@@ -114,13 +114,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
if (err)
return err;
- memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
-#ifdef CONFIG_NETFILTER
- IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
-#endif
-
- skb->protocol = htons(ETH_P_IPV6);
- skb->local_df = 1;
+ skb->ignore_df = 1;
return x->outer_mode->output2(x, skb);
}
@@ -128,11 +122,13 @@ EXPORT_SYMBOL(xfrm6_prepare_output);
int xfrm6_output_finish(struct sk_buff *skb)
{
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ skb->protocol = htons(ETH_P_IPV6);
+
#ifdef CONFIG_NETFILTER
IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
#endif
- skb->protocol = htons(ETH_P_IPV6);
return xfrm_output(skb);
}
@@ -142,6 +138,13 @@ static int __xfrm6_output(struct sk_buff *skb)
struct xfrm_state *x = dst->xfrm;
int mtu;
+#ifdef CONFIG_NETFILTER
+ if (!x) {
+ IP6CB(skb)->flags |= IP6SKB_REROUTED;
+ return dst_output(skb);
+ }
+#endif
+
if (skb->protocol == htons(ETH_P_IPV6))
mtu = ip6_skb_dst_mtu(skb);
else
@@ -150,7 +153,7 @@ static int __xfrm6_output(struct sk_buff *skb)
if (skb->len > mtu && xfrm6_local_dontfrag(skb)) {
xfrm6_local_rxpmtu(skb, mtu);
return -EMSGSIZE;
- } else if (!skb->local_df && skb->len > mtu && skb->sk) {
+ } else if (!skb->ignore_df && skb->len > mtu && skb->sk) {
xfrm_local_error(skb, mtu);
return -EMSGSIZE;
}
@@ -163,8 +166,9 @@ static int __xfrm6_output(struct sk_buff *skb)
return x->outer_mode->afinfo->output_finish(skb);
}
-int xfrm6_output(struct sk_buff *skb)
+int xfrm6_output(struct sock *sk, struct sk_buff *skb)
{
- return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL,
- skb_dst(skb)->dev, __xfrm6_output);
+ return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb,
+ NULL, skb_dst(skb)->dev, __xfrm6_output,
+ !(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 5f8e128c512..2a0bbda2c76 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -389,11 +389,17 @@ int __init xfrm6_init(void)
if (ret)
goto out_policy;
+ ret = xfrm6_protocol_init();
+ if (ret)
+ goto out_state;
+
#ifdef CONFIG_SYSCTL
register_pernet_subsys(&xfrm6_net_ops);
#endif
out:
return ret;
+out_state:
+ xfrm6_state_fini();
out_policy:
xfrm6_policy_fini();
goto out;
@@ -404,6 +410,7 @@ void xfrm6_fini(void)
#ifdef CONFIG_SYSCTL
unregister_pernet_subsys(&xfrm6_net_ops);
#endif
+ xfrm6_protocol_fini();
xfrm6_policy_fini();
xfrm6_state_fini();
dst_entries_destroy(&xfrm6_dst_ops);
diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c
new file mode 100644
index 00000000000..54d13f8dbba
--- /dev/null
+++ b/net/ipv6/xfrm6_protocol.c
@@ -0,0 +1,279 @@
+/* xfrm6_protocol.c - Generic xfrm protocol multiplexer for ipv6.
+ *
+ * Copyright (C) 2013 secunet Security Networks AG
+ *
+ * Author:
+ * Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * Based on:
+ * net/ipv4/xfrm4_protocol.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/skbuff.h>
+#include <linux/icmpv6.h>
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+
+static struct xfrm6_protocol __rcu *esp6_handlers __read_mostly;
+static struct xfrm6_protocol __rcu *ah6_handlers __read_mostly;
+static struct xfrm6_protocol __rcu *ipcomp6_handlers __read_mostly;
+static DEFINE_MUTEX(xfrm6_protocol_mutex);
+
+static inline struct xfrm6_protocol __rcu **proto_handlers(u8 protocol)
+{
+ switch (protocol) {
+ case IPPROTO_ESP:
+ return &esp6_handlers;
+ case IPPROTO_AH:
+ return &ah6_handlers;
+ case IPPROTO_COMP:
+ return &ipcomp6_handlers;
+ }
+
+ return NULL;
+}
+
+#define for_each_protocol_rcu(head, handler) \
+ for (handler = rcu_dereference(head); \
+ handler != NULL; \
+ handler = rcu_dereference(handler->next)) \
+
+int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+ struct xfrm6_protocol __rcu **head = proto_handlers(protocol);
+
+ if (!head)
+ return 0;
+
+ for_each_protocol_rcu(*proto_handlers(protocol), handler)
+ if ((ret = handler->cb_handler(skb, err)) <= 0)
+ return ret;
+
+ return 0;
+}
+EXPORT_SYMBOL(xfrm6_rcv_cb);
+
+static int xfrm6_esp_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+ for_each_protocol_rcu(esp6_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(esp6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static int xfrm6_ah_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+ for_each_protocol_rcu(ah6_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(ah6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static int xfrm6_ipcomp_rcv(struct sk_buff *skb)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
+ for_each_protocol_rcu(ipcomp6_handlers, handler)
+ if ((ret = handler->handler(skb)) != -EINVAL)
+ return ret;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static void xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_protocol *handler;
+
+ for_each_protocol_rcu(ipcomp6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
+static const struct inet6_protocol esp6_protocol = {
+ .handler = xfrm6_esp_rcv,
+ .err_handler = xfrm6_esp_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static const struct inet6_protocol ah6_protocol = {
+ .handler = xfrm6_ah_rcv,
+ .err_handler = xfrm6_ah_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static const struct inet6_protocol ipcomp6_protocol = {
+ .handler = xfrm6_ipcomp_rcv,
+ .err_handler = xfrm6_ipcomp_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static struct xfrm_input_afinfo xfrm6_input_afinfo = {
+ .family = AF_INET6,
+ .owner = THIS_MODULE,
+ .callback = xfrm6_rcv_cb,
+};
+
+static inline const struct inet6_protocol *netproto(unsigned char protocol)
+{
+ switch (protocol) {
+ case IPPROTO_ESP:
+ return &esp6_protocol;
+ case IPPROTO_AH:
+ return &ah6_protocol;
+ case IPPROTO_COMP:
+ return &ipcomp6_protocol;
+ }
+
+ return NULL;
+}
+
+int xfrm6_protocol_register(struct xfrm6_protocol *handler,
+ unsigned char protocol)
+{
+ struct xfrm6_protocol __rcu **pprev;
+ struct xfrm6_protocol *t;
+ bool add_netproto = false;
+ int ret = -EEXIST;
+ int priority = handler->priority;
+
+ if (!proto_handlers(protocol) || !netproto(protocol))
+ return -EINVAL;
+
+ mutex_lock(&xfrm6_protocol_mutex);
+
+ if (!rcu_dereference_protected(*proto_handlers(protocol),
+ lockdep_is_held(&xfrm6_protocol_mutex)))
+ add_netproto = true;
+
+ for (pprev = proto_handlers(protocol);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&xfrm6_protocol_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t->priority < priority)
+ break;
+ if (t->priority == priority)
+ goto err;
+ }
+
+ handler->next = *pprev;
+ rcu_assign_pointer(*pprev, handler);
+
+ ret = 0;
+
+err:
+ mutex_unlock(&xfrm6_protocol_mutex);
+
+ if (add_netproto) {
+ if (inet6_add_protocol(netproto(protocol), protocol)) {
+ pr_err("%s: can't add protocol\n", __func__);
+ ret = -EAGAIN;
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(xfrm6_protocol_register);
+
+int xfrm6_protocol_deregister(struct xfrm6_protocol *handler,
+ unsigned char protocol)
+{
+ struct xfrm6_protocol __rcu **pprev;
+ struct xfrm6_protocol *t;
+ int ret = -ENOENT;
+
+ if (!proto_handlers(protocol) || !netproto(protocol))
+ return -EINVAL;
+
+ mutex_lock(&xfrm6_protocol_mutex);
+
+ for (pprev = proto_handlers(protocol);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&xfrm6_protocol_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t == handler) {
+ *pprev = handler->next;
+ ret = 0;
+ break;
+ }
+ }
+
+ if (!rcu_dereference_protected(*proto_handlers(protocol),
+ lockdep_is_held(&xfrm6_protocol_mutex))) {
+ if (inet6_del_protocol(netproto(protocol), protocol) < 0) {
+ pr_err("%s: can't remove protocol\n", __func__);
+ ret = -EAGAIN;
+ }
+ }
+
+ mutex_unlock(&xfrm6_protocol_mutex);
+
+ synchronize_net();
+
+ return ret;
+}
+EXPORT_SYMBOL(xfrm6_protocol_deregister);
+
+int __init xfrm6_protocol_init(void)
+{
+ return xfrm_input_register_afinfo(&xfrm6_input_afinfo);
+}
+
+void xfrm6_protocol_fini(void)
+{
+ xfrm_input_unregister_afinfo(&xfrm6_input_afinfo);
+}
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 00b2a6d1c00..91729b807c7 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1353,7 +1353,7 @@ static int ipx_create(struct net *net, struct socket *sock, int protocol,
sk_refcnt_debug_inc(sk);
sock_init_data(sock, sk);
- sk->sk_no_check = 1; /* Checksum off by default */
+ sk->sk_no_check_tx = 1; /* Checksum off by default */
sock->ops = &ipx_dgram_ops;
rc = 0;
out:
@@ -1368,6 +1368,7 @@ static int ipx_release(struct socket *sock)
goto out;
lock_sock(sk);
+ sk->sk_shutdown = SHUTDOWN_MASK;
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_state_change(sk);
@@ -1791,8 +1792,11 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
flags & MSG_DONTWAIT, &rc);
- if (!skb)
+ if (!skb) {
+ if (rc == -EAGAIN && (sk->sk_shutdown & RCV_SHUTDOWN))
+ rc = 0;
goto out;
+ }
ipx = ipx_hdr(skb);
copied = ntohs(ipx->ipx_pktsize) - sizeof(struct ipxhdr);
@@ -1922,6 +1926,26 @@ static int ipx_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long
}
#endif
+static int ipx_shutdown(struct socket *sock, int mode)
+{
+ struct sock *sk = sock->sk;
+
+ if (mode < SHUT_RD || mode > SHUT_RDWR)
+ return -EINVAL;
+ /* This maps:
+ * SHUT_RD (0) -> RCV_SHUTDOWN (1)
+ * SHUT_WR (1) -> SEND_SHUTDOWN (2)
+ * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
+ */
+ ++mode;
+
+ lock_sock(sk);
+ sk->sk_shutdown |= mode;
+ release_sock(sk);
+ sk->sk_state_change(sk);
+
+ return 0;
+}
/*
* Socket family declarations
@@ -1948,7 +1972,7 @@ static const struct proto_ops ipx_dgram_ops = {
.compat_ioctl = ipx_compat_ioctl,
#endif
.listen = sock_no_listen,
- .shutdown = sock_no_shutdown, /* FIXME: support shutdown */
+ .shutdown = ipx_shutdown,
.setsockopt = ipx_setsockopt,
.getsockopt = ipx_getsockopt,
.sendmsg = ipx_sendmsg,
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index c1f03185c5e..67e7ad3d46b 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -236,7 +236,8 @@ int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
}
/* Apply checksum. Not allowed on 802.3 links. */
- if (sk->sk_no_check || intrfc->if_dlink_type == htons(IPX_FRAME_8023))
+ if (sk->sk_no_check_tx ||
+ intrfc->if_dlink_type == htons(IPX_FRAME_8023))
ipx->ipx_checksum = htons(0xFFFF);
else
ipx->ipx_checksum = ipx_cksum(ipx, len + sizeof(struct ipxhdr));
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index c4b7218058b..7a95fa4a3de 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -682,6 +682,18 @@ struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock)
return NULL;
}
+static void __iucv_auto_name(struct iucv_sock *iucv)
+{
+ char name[12];
+
+ sprintf(name, "%08x", atomic_inc_return(&iucv_sk_list.autobind_name));
+ while (__iucv_get_sock_by_name(name)) {
+ sprintf(name, "%08x",
+ atomic_inc_return(&iucv_sk_list.autobind_name));
+ }
+ memcpy(iucv->src_name, name, 8);
+}
+
/* Bind an unbound socket */
static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
int addr_len)
@@ -724,8 +736,12 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
rcu_read_lock();
for_each_netdev_rcu(&init_net, dev) {
if (!memcmp(dev->perm_addr, uid, 8)) {
- memcpy(iucv->src_name, sa->siucv_name, 8);
memcpy(iucv->src_user_id, sa->siucv_user_id, 8);
+ /* Check for unitialized siucv_name */
+ if (strncmp(sa->siucv_name, " ", 8) == 0)
+ __iucv_auto_name(iucv);
+ else
+ memcpy(iucv->src_name, sa->siucv_name, 8);
sk->sk_bound_dev_if = dev->ifindex;
iucv->hs_dev = dev;
dev_hold(dev);
@@ -763,7 +779,6 @@ done:
static int iucv_sock_autobind(struct sock *sk)
{
struct iucv_sock *iucv = iucv_sk(sk);
- char name[12];
int err = 0;
if (unlikely(!pr_iucv))
@@ -772,17 +787,9 @@ static int iucv_sock_autobind(struct sock *sk)
memcpy(iucv->src_user_id, iucv_userid, 8);
write_lock_bh(&iucv_sk_list.lock);
-
- sprintf(name, "%08x", atomic_inc_return(&iucv_sk_list.autobind_name));
- while (__iucv_get_sock_by_name(name)) {
- sprintf(name, "%08x",
- atomic_inc_return(&iucv_sk_list.autobind_name));
- }
-
+ __iucv_auto_name(iucv);
write_unlock_bh(&iucv_sk_list.lock);
- memcpy(&iucv->src_name, name, 8);
-
if (!iucv->msglimit)
iucv->msglimit = IUCV_QUEUELEN_DEFAULT;
@@ -1382,6 +1389,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
if (sk->sk_type == SOCK_STREAM) {
if (copied < rlen) {
IUCV_SKB_CB(skb)->offset = offset + copied;
+ skb_queue_head(&sk->sk_receive_queue, skb);
goto done;
}
}
@@ -1756,7 +1764,7 @@ static int iucv_callback_connreq(struct iucv_path *path,
/* Wake up accept */
nsk->sk_state = IUCV_CONNECTED;
- sk->sk_data_ready(sk, 1);
+ sk->sk_data_ready(sk);
err = 0;
fail:
bh_unlock_sock(sk);
@@ -1829,7 +1837,7 @@ static void iucv_callback_txdone(struct iucv_path *path,
spin_lock_irqsave(&list->lock, flags);
while (list_skb != (struct sk_buff *)list) {
- if (msg->tag != IUCV_SKB_CB(list_skb)->tag) {
+ if (msg->tag == IUCV_SKB_CB(list_skb)->tag) {
this = list_skb;
break;
}
@@ -1935,11 +1943,10 @@ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb)
sk_acceptq_is_full(sk) ||
!nsk) {
/* error on server socket - connection refused */
- if (nsk)
- sk_free(nsk);
afiucv_swap_src_dest(skb);
trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN;
err = dev_queue_xmit(skb);
+ iucv_sock_kill(nsk);
bh_unlock_sock(sk);
goto out;
}
@@ -1967,7 +1974,7 @@ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb)
if (!err) {
iucv_accept_enqueue(sk, nsk);
nsk->sk_state = IUCV_CONNECTED;
- sk->sk_data_ready(sk, 1);
+ sk->sk_data_ready(sk);
} else
iucv_sock_kill(nsk);
bh_unlock_sock(sk);
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index cd5b8ec9be0..da787930df0 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -621,6 +621,42 @@ static void iucv_disable(void)
put_online_cpus();
}
+static void free_iucv_data(int cpu)
+{
+ kfree(iucv_param_irq[cpu]);
+ iucv_param_irq[cpu] = NULL;
+ kfree(iucv_param[cpu]);
+ iucv_param[cpu] = NULL;
+ kfree(iucv_irq_data[cpu]);
+ iucv_irq_data[cpu] = NULL;
+}
+
+static int alloc_iucv_data(int cpu)
+{
+ /* Note: GFP_DMA used to get memory below 2G */
+ iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data),
+ GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
+ if (!iucv_irq_data[cpu])
+ goto out_free;
+
+ /* Allocate parameter blocks. */
+ iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param),
+ GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
+ if (!iucv_param[cpu])
+ goto out_free;
+
+ iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param),
+ GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
+ if (!iucv_param_irq[cpu])
+ goto out_free;
+
+ return 0;
+
+out_free:
+ free_iucv_data(cpu);
+ return -ENOMEM;
+}
+
static int iucv_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
@@ -630,38 +666,14 @@ static int iucv_cpu_notify(struct notifier_block *self,
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
- iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data),
- GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
- if (!iucv_irq_data[cpu])
- return notifier_from_errno(-ENOMEM);
-
- iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param),
- GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
- if (!iucv_param[cpu]) {
- kfree(iucv_irq_data[cpu]);
- iucv_irq_data[cpu] = NULL;
+ if (alloc_iucv_data(cpu))
return notifier_from_errno(-ENOMEM);
- }
- iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param),
- GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
- if (!iucv_param_irq[cpu]) {
- kfree(iucv_param[cpu]);
- iucv_param[cpu] = NULL;
- kfree(iucv_irq_data[cpu]);
- iucv_irq_data[cpu] = NULL;
- return notifier_from_errno(-ENOMEM);
- }
break;
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
- kfree(iucv_param_irq[cpu]);
- iucv_param_irq[cpu] = NULL;
- kfree(iucv_param[cpu]);
- iucv_param[cpu] = NULL;
- kfree(iucv_irq_data[cpu]);
- iucv_irq_data[cpu] = NULL;
+ free_iucv_data(cpu);
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
@@ -2016,7 +2028,7 @@ static int __init iucv_init(void)
rc = iucv_query_maxconn();
if (rc)
goto out_ctl;
- rc = register_external_interrupt(0x4000, iucv_external_interrupt);
+ rc = register_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt);
if (rc)
goto out_ctl;
iucv_root = root_device_register("iucv");
@@ -2025,33 +2037,20 @@ static int __init iucv_init(void)
goto out_int;
}
- for_each_online_cpu(cpu) {
- /* Note: GFP_DMA used to get memory below 2G */
- iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data),
- GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
- if (!iucv_irq_data[cpu]) {
- rc = -ENOMEM;
- goto out_free;
- }
+ cpu_notifier_register_begin();
- /* Allocate parameter blocks. */
- iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param),
- GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
- if (!iucv_param[cpu]) {
- rc = -ENOMEM;
- goto out_free;
- }
- iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param),
- GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
- if (!iucv_param_irq[cpu]) {
+ for_each_online_cpu(cpu) {
+ if (alloc_iucv_data(cpu)) {
rc = -ENOMEM;
goto out_free;
}
-
}
- rc = register_hotcpu_notifier(&iucv_cpu_notifier);
+ rc = __register_hotcpu_notifier(&iucv_cpu_notifier);
if (rc)
goto out_free;
+
+ cpu_notifier_register_done();
+
rc = register_reboot_notifier(&iucv_reboot_notifier);
if (rc)
goto out_cpu;
@@ -2069,19 +2068,17 @@ static int __init iucv_init(void)
out_reboot:
unregister_reboot_notifier(&iucv_reboot_notifier);
out_cpu:
- unregister_hotcpu_notifier(&iucv_cpu_notifier);
+ cpu_notifier_register_begin();
+ __unregister_hotcpu_notifier(&iucv_cpu_notifier);
out_free:
- for_each_possible_cpu(cpu) {
- kfree(iucv_param_irq[cpu]);
- iucv_param_irq[cpu] = NULL;
- kfree(iucv_param[cpu]);
- iucv_param[cpu] = NULL;
- kfree(iucv_irq_data[cpu]);
- iucv_irq_data[cpu] = NULL;
- }
+ for_each_possible_cpu(cpu)
+ free_iucv_data(cpu);
+
+ cpu_notifier_register_done();
+
root_device_unregister(iucv_root);
out_int:
- unregister_external_interrupt(0x4000, iucv_external_interrupt);
+ unregister_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt);
out_ctl:
ctl_clear_bit(0, 1);
out:
@@ -2105,18 +2102,14 @@ static void __exit iucv_exit(void)
kfree(p);
spin_unlock_irq(&iucv_queue_lock);
unregister_reboot_notifier(&iucv_reboot_notifier);
- unregister_hotcpu_notifier(&iucv_cpu_notifier);
- for_each_possible_cpu(cpu) {
- kfree(iucv_param_irq[cpu]);
- iucv_param_irq[cpu] = NULL;
- kfree(iucv_param[cpu]);
- iucv_param[cpu] = NULL;
- kfree(iucv_irq_data[cpu]);
- iucv_irq_data[cpu] = NULL;
- }
+ cpu_notifier_register_begin();
+ __unregister_hotcpu_notifier(&iucv_cpu_notifier);
+ for_each_possible_cpu(cpu)
+ free_iucv_data(cpu);
+ cpu_notifier_register_done();
root_device_unregister(iucv_root);
bus_unregister(&iucv_bus);
- unregister_external_interrupt(0x4000, iucv_external_interrupt);
+ unregister_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt);
}
subsys_initcall(iucv_init);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 79326978517..ba2a2f95911 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -205,7 +205,7 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
skb_set_owner_r(*skb2, sk);
skb_queue_tail(&sk->sk_receive_queue, *skb2);
- sk->sk_data_ready(sk, (*skb2)->len);
+ sk->sk_data_ready(sk);
*skb2 = NULL;
err = 0;
}
@@ -365,6 +365,7 @@ static const u8 sadb_ext_min_len[] = {
[SADB_X_EXT_NAT_T_OA] = (u8) sizeof(struct sadb_address),
[SADB_X_EXT_SEC_CTX] = (u8) sizeof(struct sadb_x_sec_ctx),
[SADB_X_EXT_KMADDRESS] = (u8) sizeof(struct sadb_x_kmaddress),
+ [SADB_X_EXT_FILTER] = (u8) sizeof(struct sadb_x_filter),
};
/* Verify sadb_address_{len,prefixlen} against sa_family. */
@@ -1475,9 +1476,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg
else
err = xfrm_state_update(x);
- xfrm_audit_state_add(x, err ? 0 : 1,
- audit_get_loginuid(current),
- audit_get_sessionid(current), 0);
+ xfrm_audit_state_add(x, err ? 0 : 1, true);
if (err < 0) {
x->km.state = XFRM_STATE_DEAD;
@@ -1531,9 +1530,7 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_
c.event = XFRM_MSG_DELSA;
km_state_notify(x, &c);
out:
- xfrm_audit_state_delete(x, err ? 0 : 1,
- audit_get_loginuid(current),
- audit_get_sessionid(current), 0);
+ xfrm_audit_state_delete(x, err ? 0 : 1, true);
xfrm_state_put(x);
return err;
@@ -1725,17 +1722,13 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m
struct net *net = sock_net(sk);
unsigned int proto;
struct km_event c;
- struct xfrm_audit audit_info;
int err, err2;
proto = pfkey_satype2proto(hdr->sadb_msg_satype);
if (proto == 0)
return -EINVAL;
- audit_info.loginuid = audit_get_loginuid(current);
- audit_info.sessionid = audit_get_sessionid(current);
- audit_info.secid = 0;
- err = xfrm_state_flush(net, proto, &audit_info);
+ err = xfrm_state_flush(net, proto, true);
err2 = unicast_flush_resp(sk, hdr);
if (err || err2) {
if (err == -ESRCH) /* empty table - go quietly */
@@ -1799,6 +1792,7 @@ static void pfkey_dump_sa_done(struct pfkey_sock *pfk)
static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
{
u8 proto;
+ struct xfrm_address_filter *filter = NULL;
struct pfkey_sock *pfk = pfkey_sk(sk);
if (pfk->dump.dump != NULL)
@@ -1808,11 +1802,27 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
if (proto == 0)
return -EINVAL;
+ if (ext_hdrs[SADB_X_EXT_FILTER - 1]) {
+ struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1];
+
+ filter = kmalloc(sizeof(*filter), GFP_KERNEL);
+ if (filter == NULL)
+ return -ENOMEM;
+
+ memcpy(&filter->saddr, &xfilter->sadb_x_filter_saddr,
+ sizeof(xfrm_address_t));
+ memcpy(&filter->daddr, &xfilter->sadb_x_filter_daddr,
+ sizeof(xfrm_address_t));
+ filter->family = xfilter->sadb_x_filter_family;
+ filter->splen = xfilter->sadb_x_filter_splen;
+ filter->dplen = xfilter->sadb_x_filter_dplen;
+ }
+
pfk->dump.msg_version = hdr->sadb_msg_version;
pfk->dump.msg_portid = hdr->sadb_msg_pid;
pfk->dump.dump = pfkey_dump_sa;
pfk->dump.done = pfkey_dump_sa_done;
- xfrm_state_walk_init(&pfk->dump.u.state, proto);
+ xfrm_state_walk_init(&pfk->dump.u.state, proto, filter);
return pfkey_do_dump(pfk);
}
@@ -2270,9 +2280,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_
err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp,
hdr->sadb_msg_type != SADB_X_SPDUPDATE);
- xfrm_audit_policy_add(xp, err ? 0 : 1,
- audit_get_loginuid(current),
- audit_get_sessionid(current), 0);
+ xfrm_audit_policy_add(xp, err ? 0 : 1, true);
if (err)
goto out;
@@ -2354,9 +2362,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
if (xp == NULL)
return -ENOENT;
- xfrm_audit_policy_delete(xp, err ? 0 : 1,
- audit_get_loginuid(current),
- audit_get_sessionid(current), 0);
+ xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
if (err)
goto out;
@@ -2535,7 +2541,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
sel.sport_mask = htons(0xffff);
/* set destination address info of selector */
- sa = ext_hdrs[SADB_EXT_ADDRESS_DST - 1],
+ sa = ext_hdrs[SADB_EXT_ADDRESS_DST - 1];
pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr);
sel.prefixlen_d = sa->sadb_address_prefixlen;
sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
@@ -2604,9 +2610,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
return -ENOENT;
if (delete) {
- xfrm_audit_policy_delete(xp, err ? 0 : 1,
- audit_get_loginuid(current),
- audit_get_sessionid(current), 0);
+ xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
if (err)
goto out;
@@ -2715,13 +2719,9 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
{
struct net *net = sock_net(sk);
struct km_event c;
- struct xfrm_audit audit_info;
int err, err2;
- audit_info.loginuid = audit_get_loginuid(current);
- audit_info.sessionid = audit_get_sessionid(current);
- audit_info.secid = 0;
- err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
+ err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true);
err2 = unicast_flush_resp(sk, hdr);
if (err || err2) {
if (err == -ESRCH) /* empty table - old silent behavior */
@@ -3060,6 +3060,24 @@ static u32 get_acqseq(void)
return res;
}
+static bool pfkey_is_alive(const struct km_event *c)
+{
+ struct netns_pfkey *net_pfkey = net_generic(c->net, pfkey_net_id);
+ struct sock *sk;
+ bool is_alive = false;
+
+ rcu_read_lock();
+ sk_for_each_rcu(sk, &net_pfkey->table) {
+ if (pfkey_sk(sk)->registered) {
+ is_alive = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return is_alive;
+}
+
static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp)
{
struct sk_buff *skb;
@@ -3785,6 +3803,7 @@ static struct xfrm_mgr pfkeyv2_mgr =
.new_mapping = pfkey_send_new_mapping,
.notify_policy = pfkey_send_policy_notify,
.migrate = pfkey_send_migrate,
+ .is_alive = pfkey_is_alive,
};
static int __net_init pfkey_net_init(struct net *net)
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index c83827e7c32..bea25904320 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -495,52 +495,6 @@ out:
spin_unlock_bh(&session->reorder_q.lock);
}
-static inline int l2tp_verify_udp_checksum(struct sock *sk,
- struct sk_buff *skb)
-{
- struct udphdr *uh = udp_hdr(skb);
- u16 ulen = ntohs(uh->len);
- __wsum psum;
-
- if (sk->sk_no_check || skb_csum_unnecessary(skb))
- return 0;
-
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == PF_INET6 && !l2tp_tunnel(sk)->v4mapped) {
- if (!uh->check) {
- LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n");
- return 1;
- }
- if ((skb->ip_summed == CHECKSUM_COMPLETE) &&
- !csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr, ulen,
- IPPROTO_UDP, skb->csum)) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- return 0;
- }
- skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr,
- skb->len, IPPROTO_UDP,
- 0));
- } else
-#endif
- {
- struct inet_sock *inet;
- if (!uh->check)
- return 0;
- inet = inet_sk(sk);
- psum = csum_tcpudp_nofold(inet->inet_saddr, inet->inet_daddr,
- ulen, IPPROTO_UDP, 0);
-
- if ((skb->ip_summed == CHECKSUM_COMPLETE) &&
- !csum_fold(csum_add(psum, skb->csum)))
- return 0;
- skb->csum = psum;
- }
-
- return __skb_checksum_complete(skb);
-}
-
static int l2tp_seq_check_rx_window(struct l2tp_session *session, u32 nr)
{
u32 nws;
@@ -895,8 +849,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
u16 version;
int length;
- if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb))
- goto discard_bad_csum;
+ /* UDP has verifed checksum */
/* UDP always verifies the packet length. */
__skb_pull(skb, sizeof(struct udphdr));
@@ -979,14 +932,6 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
return 0;
-discard_bad_csum:
- LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name);
- UDP_INC_STATS_USER(tunnel->l2tp_net, UDP_MIB_INERRORS, 0);
- atomic_long_inc(&tunnel->stats.rx_errors);
- kfree_skb(skb);
-
- return 0;
-
error:
/* Put UDP header back */
__skb_push(skb, sizeof(struct udphdr));
@@ -1128,13 +1073,13 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
}
/* Queue the packet to IP for output */
- skb->local_df = 1;
+ skb->ignore_df = 1;
#if IS_ENABLED(CONFIG_IPV6)
- if (skb->sk->sk_family == PF_INET6 && !tunnel->v4mapped)
- error = inet6_csk_xmit(skb, NULL);
+ if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped)
+ error = inet6_csk_xmit(tunnel->sock, skb, NULL);
else
#endif
- error = ip_queue_xmit(skb, fl);
+ error = ip_queue_xmit(tunnel->sock, skb, fl);
/* Update stats */
if (error >= 0) {
@@ -1150,48 +1095,6 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
return 0;
}
-/* Automatically called when the skb is freed.
- */
-static void l2tp_sock_wfree(struct sk_buff *skb)
-{
- sock_put(skb->sk);
-}
-
-/* For data skbs that we transmit, we associate with the tunnel socket
- * but don't do accounting.
- */
-static inline void l2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
-{
- sock_hold(sk);
- skb->sk = sk;
- skb->destructor = l2tp_sock_wfree;
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb,
- int udp_len)
-{
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct udphdr *uh = udp_hdr(skb);
-
- if (!skb_dst(skb) || !skb_dst(skb)->dev ||
- !(skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
- __wsum csum = skb_checksum(skb, 0, udp_len, 0);
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- uh->check = csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr, udp_len,
- IPPROTO_UDP, csum);
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
- } else {
- skb->ip_summed = CHECKSUM_PARTIAL;
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct udphdr, check);
- uh->check = ~csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr,
- udp_len, IPPROTO_UDP, 0);
- }
-}
-#endif
-
/* If caller requires the skb to have a ppp header, the header must be
* inserted in the skb data before calling this function.
*/
@@ -1203,7 +1106,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
struct flowi *fl;
struct udphdr *uh;
struct inet_sock *inet;
- __wsum csum;
int headroom;
int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
int udp_len;
@@ -1220,7 +1122,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
return NET_XMIT_DROP;
}
- skb_orphan(skb);
/* Setup L2TP header */
session->build_header(session, __skb_push(skb, hdr_len));
@@ -1253,41 +1154,23 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
uh->dest = inet->inet_dport;
udp_len = uhlen + hdr_len + data_len;
uh->len = htons(udp_len);
- uh->check = 0;
/* Calculate UDP checksum if configured to do so */
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
- l2tp_xmit_ipv6_csum(sk, skb, udp_len);
+ udp6_set_csum(udp_get_no_check6_tx(sk),
+ skb, &inet6_sk(sk)->saddr,
+ &sk->sk_v6_daddr, udp_len);
else
#endif
- if (sk->sk_no_check == UDP_CSUM_NOXMIT)
- skb->ip_summed = CHECKSUM_NONE;
- else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
- (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
- skb->ip_summed = CHECKSUM_COMPLETE;
- csum = skb_checksum(skb, 0, udp_len, 0);
- uh->check = csum_tcpudp_magic(inet->inet_saddr,
- inet->inet_daddr,
- udp_len, IPPROTO_UDP, csum);
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
- } else {
- skb->ip_summed = CHECKSUM_PARTIAL;
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct udphdr, check);
- uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
- inet->inet_daddr,
- udp_len, IPPROTO_UDP, 0);
- }
+ udp_set_csum(sk->sk_no_check_tx, skb, inet->inet_saddr,
+ inet->inet_daddr, udp_len);
break;
case L2TP_ENCAPTYPE_IP:
break;
}
- l2tp_skb_set_owner_w(skb, sk);
-
l2tp_xmit_core(session, skb, fl, data_len);
out_unlock:
bh_unlock_sock(sk);
@@ -1510,6 +1393,11 @@ static int l2tp_tunnel_sock_create(struct net *net,
sizeof(udp6_addr), 0);
if (err < 0)
goto out;
+
+ if (cfg->udp6_zero_tx_checksums)
+ udp_set_no_check6_tx(sock->sk, true);
+ if (cfg->udp6_zero_rx_checksums)
+ udp_set_no_check6_rx(sock->sk, true);
} else
#endif
{
@@ -1538,7 +1426,7 @@ static int l2tp_tunnel_sock_create(struct net *net,
}
if (!cfg->use_udp_checksums)
- sock->sk->sk_no_check = UDP_CSUM_NOXMIT;
+ sock->sk->sk_no_check_tx = 1;
break;
@@ -1808,8 +1696,6 @@ void l2tp_session_free(struct l2tp_session *session)
}
kfree(session);
-
- return;
}
EXPORT_SYMBOL_GPL(l2tp_session_free);
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 3f93ccd6ba9..68aa9ffd4ae 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -162,7 +162,9 @@ struct l2tp_tunnel_cfg {
#endif
u16 local_udp_port;
u16 peer_udp_port;
- unsigned int use_udp_checksums:1;
+ unsigned int use_udp_checksums:1,
+ udp6_zero_tx_checksums:1,
+ udp6_zero_rx_checksums:1;
};
struct l2tp_tunnel {
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 0b44d855269..369a9822488 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -487,7 +487,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
xmit:
/* Queue the packet to IP for output */
- rc = ip_queue_xmit(skb, &inet->cork.fl);
+ rc = ip_queue_xmit(sk, skb, &inet->cork.fl);
rcu_read_unlock();
error:
@@ -606,7 +606,6 @@ static struct inet_protosw l2tp_ip_protosw = {
.protocol = IPPROTO_L2TP,
.prot = &l2tp_ip_prot,
.ops = &l2tp_ip_ops,
- .no_check = 0,
};
static struct net_protocol l2tp_ip_protocol __read_mostly = {
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 7704ea9502f..f3f98a156ce 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -605,14 +605,8 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk,
goto out;
}
- if (hlimit < 0) {
- if (ipv6_addr_is_multicast(&fl6.daddr))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = ip6_dst_hoplimit(dst);
- }
+ if (hlimit < 0)
+ hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
if (tclass < 0)
tclass = np->tclass;
@@ -761,7 +755,6 @@ static struct inet_protosw l2tp_ip6_protosw = {
.protocol = IPPROTO_L2TP,
.prot = &l2tp_ip6_prot,
.ops = &l2tp_ip6_ops,
- .no_check = 0,
};
static struct inet6_protocol l2tp_ip6_protocol __read_mostly = {
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index bd7387adea9..0ac907adb2f 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -161,6 +161,13 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]);
if (info->attrs[L2TP_ATTR_UDP_CSUM])
cfg.use_udp_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_CSUM]);
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX])
+ cfg.udp6_zero_tx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]);
+ if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX])
+ cfg.udp6_zero_rx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]);
+#endif
}
if (info->attrs[L2TP_ATTR_DEBUG])
@@ -297,8 +304,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
case L2TP_ENCAPTYPE_UDP:
if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) ||
nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)) ||
- nla_put_u8(skb, L2TP_ATTR_UDP_CSUM,
- (sk->sk_no_check != UDP_CSUM_NOXMIT)))
+ nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx))
goto nla_put_failure;
/* NOBREAK */
case L2TP_ENCAPTYPE_IP:
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 5990919356a..13752d96275 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -456,13 +456,11 @@ static void pppol2tp_session_close(struct l2tp_session *session)
BUG_ON(session->magic != L2TP_SESSION_MAGIC);
-
if (sock) {
inet_shutdown(sock, 2);
/* Don't let the session go away before our socket does */
l2tp_session_inc_refcount(session);
}
- return;
}
/* Really kill the session socket. (Called from sock_put() if
@@ -476,7 +474,6 @@ static void pppol2tp_session_destruct(struct sock *sk)
BUG_ON(session->magic != L2TP_SESSION_MAGIC);
l2tp_session_dec_refcount(session);
}
- return;
}
/* Called when the PPPoX socket (session) is closed.
@@ -756,9 +753,9 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
session->deref = pppol2tp_session_sock_put;
/* If PMTU discovery was enabled, use the MTU that was discovered */
- dst = sk_dst_get(sk);
+ dst = sk_dst_get(tunnel->sock);
if (dst != NULL) {
- u32 pmtu = dst_mtu(__sk_dst_get(sk));
+ u32 pmtu = dst_mtu(__sk_dst_get(tunnel->sock));
if (pmtu != 0)
session->mtu = session->mru = pmtu -
PPPOL2TP_HEADER_OVERHEAD;
@@ -1368,7 +1365,7 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
int err;
if (level != SOL_PPPOL2TP)
- return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+ return -EINVAL;
if (optlen < sizeof(int))
return -EINVAL;
@@ -1494,7 +1491,7 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
struct pppol2tp_session *ps;
if (level != SOL_PPPOL2TP)
- return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+ return -EINVAL;
if (get_user(len, optlen))
return -EFAULT;
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 9d7d840aac6..1e46ffa6916 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -25,7 +25,8 @@ mac80211-y := \
wme.o \
event.o \
chan.o \
- trace.o mlme.o
+ trace.o mlme.o \
+ tdls.o
mac80211-$(CONFIG_MAC80211_LEDS) += led.o
mac80211-$(CONFIG_MAC80211_DEBUGFS) += \
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index 7c7df475a40..ec24378caaa 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c
@@ -23,12 +23,13 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
u8 *data, size_t data_len, u8 *mic)
{
struct scatterlist assoc, pt, ct[2];
- struct {
- struct aead_request req;
- u8 priv[crypto_aead_reqsize(tfm)];
- } aead_req;
- memset(&aead_req, 0, sizeof(aead_req));
+ char aead_req_data[sizeof(struct aead_request) +
+ crypto_aead_reqsize(tfm)]
+ __aligned(__alignof__(struct aead_request));
+ struct aead_request *aead_req = (void *) aead_req_data;
+
+ memset(aead_req, 0, sizeof(aead_req_data));
sg_init_one(&pt, data, data_len);
sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad));
@@ -36,23 +37,23 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
sg_set_buf(&ct[0], data, data_len);
sg_set_buf(&ct[1], mic, IEEE80211_CCMP_MIC_LEN);
- aead_request_set_tfm(&aead_req.req, tfm);
- aead_request_set_assoc(&aead_req.req, &assoc, assoc.length);
- aead_request_set_crypt(&aead_req.req, &pt, ct, data_len, b_0);
+ aead_request_set_tfm(aead_req, tfm);
+ aead_request_set_assoc(aead_req, &assoc, assoc.length);
+ aead_request_set_crypt(aead_req, &pt, ct, data_len, b_0);
- crypto_aead_encrypt(&aead_req.req);
+ crypto_aead_encrypt(aead_req);
}
int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
u8 *data, size_t data_len, u8 *mic)
{
struct scatterlist assoc, pt, ct[2];
- struct {
- struct aead_request req;
- u8 priv[crypto_aead_reqsize(tfm)];
- } aead_req;
+ char aead_req_data[sizeof(struct aead_request) +
+ crypto_aead_reqsize(tfm)]
+ __aligned(__alignof__(struct aead_request));
+ struct aead_request *aead_req = (void *) aead_req_data;
- memset(&aead_req, 0, sizeof(aead_req));
+ memset(aead_req, 0, sizeof(aead_req_data));
sg_init_one(&pt, data, data_len);
sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad));
@@ -60,12 +61,12 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
sg_set_buf(&ct[0], data, data_len);
sg_set_buf(&ct[1], mic, IEEE80211_CCMP_MIC_LEN);
- aead_request_set_tfm(&aead_req.req, tfm);
- aead_request_set_assoc(&aead_req.req, &assoc, assoc.length);
- aead_request_set_crypt(&aead_req.req, ct, &pt,
+ aead_request_set_tfm(aead_req, tfm);
+ aead_request_set_assoc(aead_req, &assoc, assoc.length);
+ aead_request_set_crypt(aead_req, ct, &pt,
data_len + IEEE80211_CCMP_MIC_LEN, b_0);
- return crypto_aead_decrypt(&aead_req.req);
+ return crypto_aead_decrypt(aead_req);
}
struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[])
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 13b7683de5a..ce9633a3cfb 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -107,7 +107,7 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
mgmt->u.action.u.addba_req.start_seq_num =
cpu_to_le16(start_seq_num << 4);
- ieee80211_tx_skb_tid(sdata, skb, tid);
+ ieee80211_tx_skb(sdata, skb);
}
void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn)
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 453e974287d..592f4b152ba 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -109,6 +109,15 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
static int ieee80211_start_p2p_device(struct wiphy *wiphy,
struct wireless_dev *wdev)
{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+ int ret;
+
+ mutex_lock(&sdata->local->chanctx_mtx);
+ ret = ieee80211_check_combinations(sdata, NULL, 0, 0);
+ mutex_unlock(&sdata->local->chanctx_mtx);
+ if (ret < 0)
+ return ret;
+
return ieee80211_do_open(wdev, true);
}
@@ -451,11 +460,11 @@ void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
rinfo->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
if (sta->last_rx_rate_flag & RX_FLAG_SHORT_GI)
rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
- if (sta->last_rx_rate_flag & RX_FLAG_80MHZ)
+ if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_80MHZ)
rinfo->flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH;
- if (sta->last_rx_rate_flag & RX_FLAG_80P80MHZ)
+ if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_80P80MHZ)
rinfo->flags |= RATE_INFO_FLAGS_80P80_MHZ_WIDTH;
- if (sta->last_rx_rate_flag & RX_FLAG_160MHZ)
+ if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_160MHZ)
rinfo->flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH;
}
@@ -463,10 +472,15 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_local *local = sdata->local;
+ struct rate_control_ref *ref = NULL;
struct timespec uptime;
u64 packets = 0;
+ u32 thr = 0;
int i, ac;
+ if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
+ ref = local->rate_ctrl;
+
sinfo->generation = sdata->local->sta_generation;
sinfo->filled = STATION_INFO_INACTIVE_TIME |
@@ -578,6 +592,17 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_ASSOCIATED);
if (test_sta_flag(sta, WLAN_STA_TDLS_PEER))
sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER);
+
+ /* check if the driver has a SW RC implementation */
+ if (ref && ref->ops->get_expected_throughput)
+ thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv);
+ else
+ thr = drv_get_expected_throughput(local, &sta->sta);
+
+ if (thr != 0) {
+ sinfo->filled |= STATION_INFO_EXPECTED_THROUGHPUT;
+ sinfo->expected_throughput = thr;
+ }
}
static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = {
@@ -768,7 +793,7 @@ static void ieee80211_get_et_strings(struct wiphy *wiphy,
}
static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev,
- int idx, u8 *mac, struct station_info *sinfo)
+ int idx, u8 *mac, struct station_info *sinfo)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
@@ -798,7 +823,7 @@ static int ieee80211_dump_survey(struct wiphy *wiphy, struct net_device *dev,
}
static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev,
- u8 *mac, struct station_info *sinfo)
+ const u8 *mac, struct station_info *sinfo)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
@@ -970,15 +995,15 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
/* TODO: make hostapd tell us what it wants */
sdata->smps_mode = IEEE80211_SMPS_OFF;
sdata->needed_rx_chains = sdata->local->rx_chains;
- sdata->radar_required = params->radar_required;
mutex_lock(&local->mtx);
err = ieee80211_vif_use_channel(sdata, &params->chandef,
IEEE80211_CHANCTX_SHARED);
+ if (!err)
+ ieee80211_vif_copy_chanctx_to_vlans(sdata, false);
mutex_unlock(&local->mtx);
if (err)
return err;
- ieee80211_vif_copy_chanctx_to_vlans(sdata, false);
/*
* Apply control port protocol, this allows us to
@@ -1056,6 +1081,7 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
int err;
sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ sdata_assert_lock(sdata);
/* don't allow changing the beacon while CSA is in place - offset
* of channel switch counter may change
@@ -1074,6 +1100,31 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
return 0;
}
+bool ieee80211_csa_needs_block_tx(struct ieee80211_local *local)
+{
+ struct ieee80211_sub_if_data *sdata;
+
+ lockdep_assert_held(&local->mtx);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ if (!ieee80211_sdata_running(sdata))
+ continue;
+
+ if (!sdata->vif.csa_active)
+ continue;
+
+ if (!sdata->csa_block_tx)
+ continue;
+
+ rcu_read_unlock();
+ return true;
+ }
+ rcu_read_unlock();
+
+ return false;
+}
+
static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -1083,13 +1134,22 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
struct probe_resp *old_probe_resp;
struct cfg80211_chan_def chandef;
+ sdata_assert_lock(sdata);
+
old_beacon = sdata_dereference(sdata->u.ap.beacon, sdata);
if (!old_beacon)
return -ENOENT;
old_probe_resp = sdata_dereference(sdata->u.ap.probe_resp, sdata);
/* abort any running channel switch */
+ mutex_lock(&local->mtx);
sdata->vif.csa_active = false;
+ if (!ieee80211_csa_needs_block_tx(local))
+ ieee80211_wake_queues_by_reason(&local->hw,
+ IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
+ mutex_unlock(&local->mtx);
+
kfree(sdata->u.ap.next_beacon);
sdata->u.ap.next_beacon = NULL;
@@ -1128,8 +1188,8 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf);
skb_queue_purge(&sdata->u.ap.ps.bc_buf);
- ieee80211_vif_copy_chanctx_to_vlans(sdata, true);
mutex_lock(&local->mtx);
+ ieee80211_vif_copy_chanctx_to_vlans(sdata, true);
ieee80211_vif_release_channel(sdata);
mutex_unlock(&local->mtx);
@@ -1343,6 +1403,15 @@ static int sta_apply_parameters(struct ieee80211_local *local,
ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
params->vht_capa, sta);
+ if (params->opmode_notif_used) {
+ /* returned value is only needed for rc update, but the
+ * rc isn't initialized here yet, so ignore it
+ */
+ __ieee80211_vht_handle_opmode(sdata, sta,
+ params->opmode_notif,
+ band, false);
+ }
+
if (ieee80211_vif_is_mesh(&sdata->vif)) {
#ifdef CONFIG_MAC80211_MESH
u32 changed = 0;
@@ -1404,7 +1473,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
}
static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
- u8 *mac, struct station_parameters *params)
+ const u8 *mac,
+ struct station_parameters *params)
{
struct ieee80211_local *local = wiphy_priv(wiphy);
struct sta_info *sta;
@@ -1438,6 +1508,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) {
sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
+ } else {
+ sta->sta.tdls = true;
}
err = sta_apply_parameters(local, sta, params);
@@ -1471,7 +1543,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
}
static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
- u8 *mac)
+ const u8 *mac)
{
struct ieee80211_sub_if_data *sdata;
@@ -1485,7 +1557,7 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
}
static int ieee80211_change_station(struct wiphy *wiphy,
- struct net_device *dev, u8 *mac,
+ struct net_device *dev, const u8 *mac,
struct station_parameters *params)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -1554,7 +1626,7 @@ static int ieee80211_change_station(struct wiphy *wiphy,
if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
sta->sdata->u.vlan.sta) {
- rcu_assign_pointer(sta->sdata->u.vlan.sta, NULL);
+ RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL);
prev_4addr = true;
}
@@ -1610,7 +1682,7 @@ out_err:
#ifdef CONFIG_MAC80211_MESH
static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
- u8 *dst, u8 *next_hop)
+ const u8 *dst, const u8 *next_hop)
{
struct ieee80211_sub_if_data *sdata;
struct mesh_path *mpath;
@@ -1638,7 +1710,7 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
}
static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev,
- u8 *dst)
+ const u8 *dst)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -1649,9 +1721,8 @@ static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev,
return 0;
}
-static int ieee80211_change_mpath(struct wiphy *wiphy,
- struct net_device *dev,
- u8 *dst, u8 *next_hop)
+static int ieee80211_change_mpath(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *dst, const u8 *next_hop)
{
struct ieee80211_sub_if_data *sdata;
struct mesh_path *mpath;
@@ -1743,8 +1814,8 @@ static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev,
}
static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev,
- int idx, u8 *dst, u8 *next_hop,
- struct mpath_info *pinfo)
+ int idx, u8 *dst, u8 *next_hop,
+ struct mpath_info *pinfo)
{
struct ieee80211_sub_if_data *sdata;
struct mesh_path *mpath;
@@ -2630,6 +2701,18 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
if (!roc)
return -ENOMEM;
+ /*
+ * If the duration is zero, then the driver
+ * wouldn't actually do anything. Set it to
+ * 10 for now.
+ *
+ * TODO: cancel the off-channel operation
+ * when we get the SKB's TX status and
+ * the wait time was zero before.
+ */
+ if (!duration)
+ duration = 10;
+
roc->chan = channel;
roc->duration = duration;
roc->req_duration = duration;
@@ -2671,18 +2754,6 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
/* otherwise actually kick it off here (for error handling) */
- /*
- * If the duration is zero, then the driver
- * wouldn't actually do anything. Set it to
- * 10 for now.
- *
- * TODO: cancel the off-channel operation
- * when we get the SKB's TX status and
- * the wait time was zero before.
- */
- if (!duration)
- duration = 10;
-
ret = drv_remain_on_channel(local, sdata, channel, duration, type);
if (ret) {
kfree(roc);
@@ -2902,11 +2973,11 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
static int ieee80211_start_radar_detection(struct wiphy *wiphy,
struct net_device *dev,
- struct cfg80211_chan_def *chandef)
+ struct cfg80211_chan_def *chandef,
+ u32 cac_time_ms)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
- unsigned long timeout;
int err;
mutex_lock(&local->mtx);
@@ -2918,16 +2989,15 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy,
/* whatever, but channel contexts should not complain about that one */
sdata->smps_mode = IEEE80211_SMPS_OFF;
sdata->needed_rx_chains = local->rx_chains;
- sdata->radar_required = true;
err = ieee80211_vif_use_channel(sdata, chandef,
IEEE80211_CHANCTX_SHARED);
if (err)
goto out_unlock;
- timeout = msecs_to_jiffies(IEEE80211_DFS_MIN_CAC_TIME_MS);
ieee80211_queue_delayed_work(&sdata->local->hw,
- &sdata->dfs_cac_timer_work, timeout);
+ &sdata->dfs_cac_timer_work,
+ msecs_to_jiffies(cac_time_ms));
out_unlock:
mutex_unlock(&local->mtx);
@@ -2990,136 +3060,179 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon)
return new_beacon;
}
-void ieee80211_csa_finalize_work(struct work_struct *work)
+void ieee80211_csa_finish(struct ieee80211_vif *vif)
{
- struct ieee80211_sub_if_data *sdata =
- container_of(work, struct ieee80211_sub_if_data,
- csa_finalize_work);
- struct ieee80211_local *local = sdata->local;
- int err, changed = 0;
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
- sdata_lock(sdata);
- /* AP might have been stopped while waiting for the lock. */
- if (!sdata->vif.csa_active)
- goto unlock;
-
- if (!ieee80211_sdata_running(sdata))
- goto unlock;
-
- sdata->radar_required = sdata->csa_radar_required;
- mutex_lock(&local->mtx);
- err = ieee80211_vif_change_channel(sdata, &changed);
- mutex_unlock(&local->mtx);
- if (WARN_ON(err < 0))
- goto unlock;
-
- if (!local->use_chanctx) {
- local->_oper_chandef = sdata->csa_chandef;
- ieee80211_hw_config(local, 0);
- }
+ ieee80211_queue_work(&sdata->local->hw,
+ &sdata->csa_finalize_work);
+}
+EXPORT_SYMBOL(ieee80211_csa_finish);
- ieee80211_bss_info_change_notify(sdata, changed);
+static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata,
+ u32 *changed)
+{
+ int err;
- sdata->vif.csa_active = false;
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon);
- if (err < 0)
- goto unlock;
-
- changed |= err;
kfree(sdata->u.ap.next_beacon);
sdata->u.ap.next_beacon = NULL;
- ieee80211_bss_info_change_notify(sdata, err);
+ if (err < 0)
+ return err;
+ *changed |= err;
break;
case NL80211_IFTYPE_ADHOC:
- ieee80211_ibss_finish_csa(sdata);
+ err = ieee80211_ibss_finish_csa(sdata);
+ if (err < 0)
+ return err;
+ *changed |= err;
break;
#ifdef CONFIG_MAC80211_MESH
case NL80211_IFTYPE_MESH_POINT:
err = ieee80211_mesh_finish_csa(sdata);
if (err < 0)
- goto unlock;
+ return err;
+ *changed |= err;
break;
#endif
default:
WARN_ON(1);
- goto unlock;
+ return -EINVAL;
}
- ieee80211_wake_queues_by_reason(&sdata->local->hw,
+ return 0;
+}
+
+static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ u32 changed = 0;
+ int err;
+
+ sdata_assert_lock(sdata);
+ lockdep_assert_held(&local->mtx);
+
+ sdata->radar_required = sdata->csa_radar_required;
+ err = ieee80211_vif_change_channel(sdata, &changed);
+ if (err < 0)
+ return err;
+
+ if (!local->use_chanctx) {
+ local->_oper_chandef = sdata->csa_chandef;
+ ieee80211_hw_config(local, 0);
+ }
+
+ sdata->vif.csa_active = false;
+
+ err = ieee80211_set_after_csa_beacon(sdata, &changed);
+ if (err)
+ return err;
+
+ ieee80211_bss_info_change_notify(sdata, changed);
+ cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef);
+
+ if (!ieee80211_csa_needs_block_tx(local))
+ ieee80211_wake_queues_by_reason(&local->hw,
IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_CSA);
- cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef);
+ return 0;
+}
-unlock:
- sdata_unlock(sdata);
+static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
+{
+ if (__ieee80211_csa_finalize(sdata)) {
+ sdata_info(sdata, "failed to finalize CSA, disconnecting\n");
+ cfg80211_stop_iface(sdata->local->hw.wiphy, &sdata->wdev,
+ GFP_KERNEL);
+ }
}
-int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
- struct cfg80211_csa_settings *params)
+void ieee80211_csa_finalize_work(struct work_struct *work)
{
- struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_sub_if_data *sdata =
+ container_of(work, struct ieee80211_sub_if_data,
+ csa_finalize_work);
struct ieee80211_local *local = sdata->local;
- struct ieee80211_chanctx_conf *chanctx_conf;
- struct ieee80211_chanctx *chanctx;
- struct ieee80211_if_mesh __maybe_unused *ifmsh;
- int err, num_chanctx;
-
- lockdep_assert_held(&sdata->wdev.mtx);
- if (!list_empty(&local->roc_list) || local->scanning)
- return -EBUSY;
-
- if (sdata->wdev.cac_started)
- return -EBUSY;
+ sdata_lock(sdata);
+ mutex_lock(&local->mtx);
- if (cfg80211_chandef_identical(&params->chandef,
- &sdata->vif.bss_conf.chandef))
- return -EINVAL;
+ /* AP might have been stopped while waiting for the lock. */
+ if (!sdata->vif.csa_active)
+ goto unlock;
- rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
- if (!chanctx_conf) {
- rcu_read_unlock();
- return -EBUSY;
- }
+ if (!ieee80211_sdata_running(sdata))
+ goto unlock;
- /* don't handle for multi-VIF cases */
- chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf);
- if (chanctx->refcount > 1) {
- rcu_read_unlock();
- return -EBUSY;
- }
- num_chanctx = 0;
- list_for_each_entry_rcu(chanctx, &local->chanctx_list, list)
- num_chanctx++;
- rcu_read_unlock();
+ ieee80211_csa_finalize(sdata);
- if (num_chanctx > 1)
- return -EBUSY;
+unlock:
+ mutex_unlock(&local->mtx);
+ sdata_unlock(sdata);
+}
- /* don't allow another channel switch if one is already active. */
- if (sdata->vif.csa_active)
- return -EBUSY;
+static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_csa_settings *params,
+ u32 *changed)
+{
+ int err;
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
- sdata->csa_counter_offset_beacon =
- params->counter_offset_beacon;
- sdata->csa_counter_offset_presp = params->counter_offset_presp;
sdata->u.ap.next_beacon =
cfg80211_beacon_dup(&params->beacon_after);
if (!sdata->u.ap.next_beacon)
return -ENOMEM;
+ /*
+ * With a count of 0, we don't have to wait for any
+ * TBTT before switching, so complete the CSA
+ * immediately. In theory, with a count == 1 we
+ * should delay the switch until just before the next
+ * TBTT, but that would complicate things so we switch
+ * immediately too. If we would delay the switch
+ * until the next TBTT, we would have to set the probe
+ * response here.
+ *
+ * TODO: A channel switch with count <= 1 without
+ * sending a CSA action frame is kind of useless,
+ * because the clients won't know we're changing
+ * channels. The action frame must be implemented
+ * either here or in the userspace.
+ */
+ if (params->count <= 1)
+ break;
+
+ if ((params->n_counter_offsets_beacon >
+ IEEE80211_MAX_CSA_COUNTERS_NUM) ||
+ (params->n_counter_offsets_presp >
+ IEEE80211_MAX_CSA_COUNTERS_NUM))
+ return -EINVAL;
+
+ /* make sure we don't have garbage in other counters */
+ memset(sdata->csa_counter_offset_beacon, 0,
+ sizeof(sdata->csa_counter_offset_beacon));
+ memset(sdata->csa_counter_offset_presp, 0,
+ sizeof(sdata->csa_counter_offset_presp));
+
+ memcpy(sdata->csa_counter_offset_beacon,
+ params->counter_offsets_beacon,
+ params->n_counter_offsets_beacon * sizeof(u16));
+ memcpy(sdata->csa_counter_offset_presp,
+ params->counter_offsets_presp,
+ params->n_counter_offsets_presp * sizeof(u16));
+
err = ieee80211_assign_beacon(sdata, &params->beacon_csa);
if (err < 0) {
kfree(sdata->u.ap.next_beacon);
return err;
}
+ *changed |= err;
+
break;
case NL80211_IFTYPE_ADHOC:
if (!sdata->vif.bss_conf.ibss_joined)
@@ -3147,16 +3260,20 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
params->chandef.chan->band)
return -EINVAL;
- err = ieee80211_ibss_csa_beacon(sdata, params);
- if (err < 0)
- return err;
+ /* see comments in the NL80211_IFTYPE_AP block */
+ if (params->count > 1) {
+ err = ieee80211_ibss_csa_beacon(sdata, params);
+ if (err < 0)
+ return err;
+ *changed |= err;
+ }
+
+ ieee80211_send_action_csa(sdata, params);
+
break;
#ifdef CONFIG_MAC80211_MESH
- case NL80211_IFTYPE_MESH_POINT:
- ifmsh = &sdata->u.mesh;
-
- if (!ifmsh->mesh_id)
- return -EINVAL;
+ case NL80211_IFTYPE_MESH_POINT: {
+ struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
if (params->chandef.width != sdata->vif.bss_conf.chandef.width)
return -EINVAL;
@@ -3166,39 +3283,126 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
params->chandef.chan->band)
return -EINVAL;
- ifmsh->chsw_init = true;
- if (!ifmsh->pre_value)
- ifmsh->pre_value = 1;
- else
- ifmsh->pre_value++;
+ if (ifmsh->csa_role == IEEE80211_MESH_CSA_ROLE_NONE) {
+ ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_INIT;
+ if (!ifmsh->pre_value)
+ ifmsh->pre_value = 1;
+ else
+ ifmsh->pre_value++;
+ }
- err = ieee80211_mesh_csa_beacon(sdata, params, true);
- if (err < 0) {
- ifmsh->chsw_init = false;
- return err;
+ /* see comments in the NL80211_IFTYPE_AP block */
+ if (params->count > 1) {
+ err = ieee80211_mesh_csa_beacon(sdata, params);
+ if (err < 0) {
+ ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_NONE;
+ return err;
+ }
+ *changed |= err;
}
+
+ if (ifmsh->csa_role == IEEE80211_MESH_CSA_ROLE_INIT)
+ ieee80211_send_action_csa(sdata, params);
+
break;
+ }
#endif
default:
return -EOPNOTSUPP;
}
- sdata->csa_radar_required = params->radar_required;
+ return 0;
+}
- if (params->block_tx)
- ieee80211_stop_queues_by_reason(&local->hw,
- IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_CSA);
+static int
+__ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
+ struct cfg80211_csa_settings *params)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_chanctx_conf *conf;
+ struct ieee80211_chanctx *chanctx;
+ int err, num_chanctx, changed = 0;
+
+ sdata_assert_lock(sdata);
+ lockdep_assert_held(&local->mtx);
+
+ if (!list_empty(&local->roc_list) || local->scanning)
+ return -EBUSY;
+
+ if (sdata->wdev.cac_started)
+ return -EBUSY;
+ if (cfg80211_chandef_identical(&params->chandef,
+ &sdata->vif.bss_conf.chandef))
+ return -EINVAL;
+
+ mutex_lock(&local->chanctx_mtx);
+ conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+ lockdep_is_held(&local->chanctx_mtx));
+ if (!conf) {
+ mutex_unlock(&local->chanctx_mtx);
+ return -EBUSY;
+ }
+
+ /* don't handle for multi-VIF cases */
+ chanctx = container_of(conf, struct ieee80211_chanctx, conf);
+ if (ieee80211_chanctx_refcount(local, chanctx) > 1) {
+ mutex_unlock(&local->chanctx_mtx);
+ return -EBUSY;
+ }
+ num_chanctx = 0;
+ list_for_each_entry_rcu(chanctx, &local->chanctx_list, list)
+ num_chanctx++;
+ mutex_unlock(&local->chanctx_mtx);
+
+ if (num_chanctx > 1)
+ return -EBUSY;
+
+ /* don't allow another channel switch if one is already active. */
+ if (sdata->vif.csa_active)
+ return -EBUSY;
+
+ err = ieee80211_set_csa_beacon(sdata, params, &changed);
+ if (err)
+ return err;
+
+ sdata->csa_radar_required = params->radar_required;
sdata->csa_chandef = params->chandef;
+ sdata->csa_block_tx = params->block_tx;
+ sdata->csa_current_counter = params->count;
sdata->vif.csa_active = true;
- ieee80211_bss_info_change_notify(sdata, err);
- drv_channel_switch_beacon(sdata, &params->chandef);
+ if (sdata->csa_block_tx)
+ ieee80211_stop_queues_by_reason(&local->hw,
+ IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
+
+ if (changed) {
+ ieee80211_bss_info_change_notify(sdata, changed);
+ drv_channel_switch_beacon(sdata, &params->chandef);
+ } else {
+ /* if the beacon didn't change, we can finalize immediately */
+ ieee80211_csa_finalize(sdata);
+ }
return 0;
}
+int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
+ struct cfg80211_csa_settings *params)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = sdata->local;
+ int err;
+
+ mutex_lock(&local->mtx);
+ err = __ieee80211_channel_switch(wiphy, dev, params);
+ mutex_unlock(&local->mtx);
+
+ return err;
+}
+
static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
struct cfg80211_mgmt_tx_params *params,
u64 *cookie)
@@ -3211,6 +3415,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
bool need_offchan = false;
u32 flags;
int ret;
+ u8 *data;
if (params->dont_wait_for_ack)
flags = IEEE80211_TX_CTL_NO_ACK;
@@ -3304,7 +3509,20 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
}
skb_reserve(skb, local->hw.extra_tx_headroom);
- memcpy(skb_put(skb, params->len), params->buf, params->len);
+ data = skb_put(skb, params->len);
+ memcpy(data, params->buf, params->len);
+
+ /* Update CSA counters */
+ if (sdata->vif.csa_active &&
+ (sdata->vif.type == NL80211_IFTYPE_AP ||
+ sdata->vif.type == NL80211_IFTYPE_ADHOC) &&
+ params->n_csa_offsets) {
+ int i;
+ u8 c = sdata->csa_current_counter;
+
+ for (i = 0; i < params->n_csa_offsets; i++)
+ data[params->csa_offsets[i]] = c;
+ }
IEEE80211_SKB_CB(skb)->flags = flags;
@@ -3413,320 +3631,6 @@ static int ieee80211_set_rekey_data(struct wiphy *wiphy,
return 0;
}
-static void ieee80211_tdls_add_ext_capab(struct sk_buff *skb)
-{
- u8 *pos = (void *)skb_put(skb, 7);
-
- *pos++ = WLAN_EID_EXT_CAPABILITY;
- *pos++ = 5; /* len */
- *pos++ = 0x0;
- *pos++ = 0x0;
- *pos++ = 0x0;
- *pos++ = 0x0;
- *pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED;
-}
-
-static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata)
-{
- struct ieee80211_local *local = sdata->local;
- u16 capab;
-
- capab = 0;
- if (ieee80211_get_sdata_band(sdata) != IEEE80211_BAND_2GHZ)
- return capab;
-
- if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE))
- capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME;
- if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE))
- capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
-
- return capab;
-}
-
-static void ieee80211_tdls_add_link_ie(struct sk_buff *skb, u8 *src_addr,
- u8 *peer, u8 *bssid)
-{
- struct ieee80211_tdls_lnkie *lnkid;
-
- lnkid = (void *)skb_put(skb, sizeof(struct ieee80211_tdls_lnkie));
-
- lnkid->ie_type = WLAN_EID_LINK_ID;
- lnkid->ie_len = sizeof(struct ieee80211_tdls_lnkie) - 2;
-
- memcpy(lnkid->bssid, bssid, ETH_ALEN);
- memcpy(lnkid->init_sta, src_addr, ETH_ALEN);
- memcpy(lnkid->resp_sta, peer, ETH_ALEN);
-}
-
-static int
-ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev,
- u8 *peer, u8 action_code, u8 dialog_token,
- u16 status_code, struct sk_buff *skb)
-{
- struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
- enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
- struct ieee80211_tdls_data *tf;
-
- tf = (void *)skb_put(skb, offsetof(struct ieee80211_tdls_data, u));
-
- memcpy(tf->da, peer, ETH_ALEN);
- memcpy(tf->sa, sdata->vif.addr, ETH_ALEN);
- tf->ether_type = cpu_to_be16(ETH_P_TDLS);
- tf->payload_type = WLAN_TDLS_SNAP_RFTYPE;
-
- switch (action_code) {
- case WLAN_TDLS_SETUP_REQUEST:
- tf->category = WLAN_CATEGORY_TDLS;
- tf->action_code = WLAN_TDLS_SETUP_REQUEST;
-
- skb_put(skb, sizeof(tf->u.setup_req));
- tf->u.setup_req.dialog_token = dialog_token;
- tf->u.setup_req.capability =
- cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
-
- ieee80211_add_srates_ie(sdata, skb, false, band);
- ieee80211_add_ext_srates_ie(sdata, skb, false, band);
- ieee80211_tdls_add_ext_capab(skb);
- break;
- case WLAN_TDLS_SETUP_RESPONSE:
- tf->category = WLAN_CATEGORY_TDLS;
- tf->action_code = WLAN_TDLS_SETUP_RESPONSE;
-
- skb_put(skb, sizeof(tf->u.setup_resp));
- tf->u.setup_resp.status_code = cpu_to_le16(status_code);
- tf->u.setup_resp.dialog_token = dialog_token;
- tf->u.setup_resp.capability =
- cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
-
- ieee80211_add_srates_ie(sdata, skb, false, band);
- ieee80211_add_ext_srates_ie(sdata, skb, false, band);
- ieee80211_tdls_add_ext_capab(skb);
- break;
- case WLAN_TDLS_SETUP_CONFIRM:
- tf->category = WLAN_CATEGORY_TDLS;
- tf->action_code = WLAN_TDLS_SETUP_CONFIRM;
-
- skb_put(skb, sizeof(tf->u.setup_cfm));
- tf->u.setup_cfm.status_code = cpu_to_le16(status_code);
- tf->u.setup_cfm.dialog_token = dialog_token;
- break;
- case WLAN_TDLS_TEARDOWN:
- tf->category = WLAN_CATEGORY_TDLS;
- tf->action_code = WLAN_TDLS_TEARDOWN;
-
- skb_put(skb, sizeof(tf->u.teardown));
- tf->u.teardown.reason_code = cpu_to_le16(status_code);
- break;
- case WLAN_TDLS_DISCOVERY_REQUEST:
- tf->category = WLAN_CATEGORY_TDLS;
- tf->action_code = WLAN_TDLS_DISCOVERY_REQUEST;
-
- skb_put(skb, sizeof(tf->u.discover_req));
- tf->u.discover_req.dialog_token = dialog_token;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int
-ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev,
- u8 *peer, u8 action_code, u8 dialog_token,
- u16 status_code, struct sk_buff *skb)
-{
- struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
- enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
- struct ieee80211_mgmt *mgmt;
-
- mgmt = (void *)skb_put(skb, 24);
- memset(mgmt, 0, 24);
- memcpy(mgmt->da, peer, ETH_ALEN);
- memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
- memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN);
-
- mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
- IEEE80211_STYPE_ACTION);
-
- switch (action_code) {
- case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
- skb_put(skb, 1 + sizeof(mgmt->u.action.u.tdls_discover_resp));
- mgmt->u.action.category = WLAN_CATEGORY_PUBLIC;
- mgmt->u.action.u.tdls_discover_resp.action_code =
- WLAN_PUB_ACTION_TDLS_DISCOVER_RES;
- mgmt->u.action.u.tdls_discover_resp.dialog_token =
- dialog_token;
- mgmt->u.action.u.tdls_discover_resp.capability =
- cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
-
- ieee80211_add_srates_ie(sdata, skb, false, band);
- ieee80211_add_ext_srates_ie(sdata, skb, false, band);
- ieee80211_tdls_add_ext_capab(skb);
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
- u8 *peer, u8 action_code, u8 dialog_token,
- u16 status_code, const u8 *extra_ies,
- size_t extra_ies_len)
-{
- struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
- struct ieee80211_local *local = sdata->local;
- struct sk_buff *skb = NULL;
- bool send_direct;
- int ret;
-
- if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
- return -ENOTSUPP;
-
- /* make sure we are in managed mode, and associated */
- if (sdata->vif.type != NL80211_IFTYPE_STATION ||
- !sdata->u.mgd.associated)
- return -EINVAL;
-
- tdls_dbg(sdata, "TDLS mgmt action %d peer %pM\n",
- action_code, peer);
-
- skb = dev_alloc_skb(local->hw.extra_tx_headroom +
- max(sizeof(struct ieee80211_mgmt),
- sizeof(struct ieee80211_tdls_data)) +
- 50 + /* supported rates */
- 7 + /* ext capab */
- extra_ies_len +
- sizeof(struct ieee80211_tdls_lnkie));
- if (!skb)
- return -ENOMEM;
-
- skb_reserve(skb, local->hw.extra_tx_headroom);
-
- switch (action_code) {
- case WLAN_TDLS_SETUP_REQUEST:
- case WLAN_TDLS_SETUP_RESPONSE:
- case WLAN_TDLS_SETUP_CONFIRM:
- case WLAN_TDLS_TEARDOWN:
- case WLAN_TDLS_DISCOVERY_REQUEST:
- ret = ieee80211_prep_tdls_encap_data(wiphy, dev, peer,
- action_code, dialog_token,
- status_code, skb);
- send_direct = false;
- break;
- case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
- ret = ieee80211_prep_tdls_direct(wiphy, dev, peer, action_code,
- dialog_token, status_code,
- skb);
- send_direct = true;
- break;
- default:
- ret = -ENOTSUPP;
- break;
- }
-
- if (ret < 0)
- goto fail;
-
- if (extra_ies_len)
- memcpy(skb_put(skb, extra_ies_len), extra_ies, extra_ies_len);
-
- /* the TDLS link IE is always added last */
- switch (action_code) {
- case WLAN_TDLS_SETUP_REQUEST:
- case WLAN_TDLS_SETUP_CONFIRM:
- case WLAN_TDLS_TEARDOWN:
- case WLAN_TDLS_DISCOVERY_REQUEST:
- /* we are the initiator */
- ieee80211_tdls_add_link_ie(skb, sdata->vif.addr, peer,
- sdata->u.mgd.bssid);
- break;
- case WLAN_TDLS_SETUP_RESPONSE:
- case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
- /* we are the responder */
- ieee80211_tdls_add_link_ie(skb, peer, sdata->vif.addr,
- sdata->u.mgd.bssid);
- break;
- default:
- ret = -ENOTSUPP;
- goto fail;
- }
-
- if (send_direct) {
- ieee80211_tx_skb(sdata, skb);
- return 0;
- }
-
- /*
- * According to 802.11z: Setup req/resp are sent in AC_BK, otherwise
- * we should default to AC_VI.
- */
- switch (action_code) {
- case WLAN_TDLS_SETUP_REQUEST:
- case WLAN_TDLS_SETUP_RESPONSE:
- skb_set_queue_mapping(skb, IEEE80211_AC_BK);
- skb->priority = 2;
- break;
- default:
- skb_set_queue_mapping(skb, IEEE80211_AC_VI);
- skb->priority = 5;
- break;
- }
-
- /* disable bottom halves when entering the Tx path */
- local_bh_disable();
- ret = ieee80211_subif_start_xmit(skb, dev);
- local_bh_enable();
-
- return ret;
-
-fail:
- dev_kfree_skb(skb);
- return ret;
-}
-
-static int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
- u8 *peer, enum nl80211_tdls_operation oper)
-{
- struct sta_info *sta;
- struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
- if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
- return -ENOTSUPP;
-
- if (sdata->vif.type != NL80211_IFTYPE_STATION)
- return -EINVAL;
-
- tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer);
-
- switch (oper) {
- case NL80211_TDLS_ENABLE_LINK:
- rcu_read_lock();
- sta = sta_info_get(sdata, peer);
- if (!sta) {
- rcu_read_unlock();
- return -ENOLINK;
- }
-
- set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH);
- rcu_read_unlock();
- break;
- case NL80211_TDLS_DISABLE_LINK:
- return sta_info_destroy_addr(sdata, peer);
- case NL80211_TDLS_TEARDOWN:
- case NL80211_TDLS_SETUP:
- case NL80211_TDLS_DISCOVERY_REQ:
- /* We don't support in-driver setup/teardown/discovery */
- return -ENOTSUPP;
- default:
- return -ENOTSUPP;
- }
-
- return 0;
-}
-
static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
const u8 *peer, u64 *cookie)
{
@@ -3865,7 +3769,22 @@ static int ieee80211_set_qos_map(struct wiphy *wiphy,
return 0;
}
-struct cfg80211_ops mac80211_config_ops = {
+static int ieee80211_set_ap_chanwidth(struct wiphy *wiphy,
+ struct net_device *dev,
+ struct cfg80211_chan_def *chandef)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ int ret;
+ u32 changed = 0;
+
+ ret = ieee80211_vif_change_bandwidth(sdata, chandef, &changed);
+ if (ret == 0)
+ ieee80211_bss_info_change_notify(sdata, changed);
+
+ return ret;
+}
+
+const struct cfg80211_ops mac80211_config_ops = {
.add_virtual_intf = ieee80211_add_iface,
.del_virtual_intf = ieee80211_del_iface,
.change_virtual_intf = ieee80211_change_iface,
@@ -3945,4 +3864,5 @@ struct cfg80211_ops mac80211_config_ops = {
.start_radar_detection = ieee80211_start_radar_detection,
.channel_switch = ieee80211_channel_switch,
.set_qos_map = ieee80211_set_qos_map,
+ .set_ap_chanwidth = ieee80211_set_ap_chanwidth,
};
diff --git a/net/mac80211/cfg.h b/net/mac80211/cfg.h
index 7d7879f5b00..2d51f62dc76 100644
--- a/net/mac80211/cfg.h
+++ b/net/mac80211/cfg.h
@@ -4,6 +4,6 @@
#ifndef __CFG_H
#define __CFG_H
-extern struct cfg80211_ops mac80211_config_ops;
+extern const struct cfg80211_ops mac80211_config_ops;
#endif /* __CFG_H */
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 0c1ecfdf9a1..a310e33972d 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -9,6 +9,170 @@
#include "ieee80211_i.h"
#include "driver-ops.h"
+static int ieee80211_chanctx_num_assigned(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx)
+{
+ struct ieee80211_sub_if_data *sdata;
+ int num = 0;
+
+ lockdep_assert_held(&local->chanctx_mtx);
+
+ list_for_each_entry(sdata, &ctx->assigned_vifs, assigned_chanctx_list)
+ num++;
+
+ return num;
+}
+
+static int ieee80211_chanctx_num_reserved(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx)
+{
+ struct ieee80211_sub_if_data *sdata;
+ int num = 0;
+
+ lockdep_assert_held(&local->chanctx_mtx);
+
+ list_for_each_entry(sdata, &ctx->reserved_vifs, reserved_chanctx_list)
+ num++;
+
+ return num;
+}
+
+int ieee80211_chanctx_refcount(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx)
+{
+ return ieee80211_chanctx_num_assigned(local, ctx) +
+ ieee80211_chanctx_num_reserved(local, ctx);
+}
+
+static int ieee80211_num_chanctx(struct ieee80211_local *local)
+{
+ struct ieee80211_chanctx *ctx;
+ int num = 0;
+
+ lockdep_assert_held(&local->chanctx_mtx);
+
+ list_for_each_entry(ctx, &local->chanctx_list, list)
+ num++;
+
+ return num;
+}
+
+static bool ieee80211_can_create_new_chanctx(struct ieee80211_local *local)
+{
+ lockdep_assert_held(&local->chanctx_mtx);
+ return ieee80211_num_chanctx(local) < ieee80211_max_num_channels(local);
+}
+
+static const struct cfg80211_chan_def *
+ieee80211_chanctx_reserved_chandef(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx,
+ const struct cfg80211_chan_def *compat)
+{
+ struct ieee80211_sub_if_data *sdata;
+
+ lockdep_assert_held(&local->chanctx_mtx);
+
+ list_for_each_entry(sdata, &ctx->reserved_vifs,
+ reserved_chanctx_list) {
+ if (!compat)
+ compat = &sdata->reserved_chandef;
+
+ compat = cfg80211_chandef_compatible(&sdata->reserved_chandef,
+ compat);
+ if (!compat)
+ break;
+ }
+
+ return compat;
+}
+
+static const struct cfg80211_chan_def *
+ieee80211_chanctx_non_reserved_chandef(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx,
+ const struct cfg80211_chan_def *compat)
+{
+ struct ieee80211_sub_if_data *sdata;
+
+ lockdep_assert_held(&local->chanctx_mtx);
+
+ list_for_each_entry(sdata, &ctx->assigned_vifs,
+ assigned_chanctx_list) {
+ if (sdata->reserved_chanctx != NULL)
+ continue;
+
+ if (!compat)
+ compat = &sdata->vif.bss_conf.chandef;
+
+ compat = cfg80211_chandef_compatible(
+ &sdata->vif.bss_conf.chandef, compat);
+ if (!compat)
+ break;
+ }
+
+ return compat;
+}
+
+static const struct cfg80211_chan_def *
+ieee80211_chanctx_combined_chandef(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx,
+ const struct cfg80211_chan_def *compat)
+{
+ lockdep_assert_held(&local->chanctx_mtx);
+
+ compat = ieee80211_chanctx_reserved_chandef(local, ctx, compat);
+ if (!compat)
+ return NULL;
+
+ compat = ieee80211_chanctx_non_reserved_chandef(local, ctx, compat);
+ if (!compat)
+ return NULL;
+
+ return compat;
+}
+
+static bool
+ieee80211_chanctx_can_reserve_chandef(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx,
+ const struct cfg80211_chan_def *def)
+{
+ lockdep_assert_held(&local->chanctx_mtx);
+
+ if (ieee80211_chanctx_combined_chandef(local, ctx, def))
+ return true;
+
+ if (!list_empty(&ctx->reserved_vifs) &&
+ ieee80211_chanctx_reserved_chandef(local, ctx, def))
+ return true;
+
+ return false;
+}
+
+static struct ieee80211_chanctx *
+ieee80211_find_reservation_chanctx(struct ieee80211_local *local,
+ const struct cfg80211_chan_def *chandef,
+ enum ieee80211_chanctx_mode mode)
+{
+ struct ieee80211_chanctx *ctx;
+
+ lockdep_assert_held(&local->chanctx_mtx);
+
+ if (mode == IEEE80211_CHANCTX_EXCLUSIVE)
+ return NULL;
+
+ list_for_each_entry(ctx, &local->chanctx_list, list) {
+ if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE)
+ continue;
+
+ if (!ieee80211_chanctx_can_reserve_chandef(local, ctx,
+ chandef))
+ continue;
+
+ return ctx;
+ }
+
+ return NULL;
+}
+
static enum nl80211_chan_width ieee80211_get_sta_bw(struct ieee80211_sta *sta)
{
switch (sta->bandwidth) {
@@ -190,6 +354,11 @@ ieee80211_find_chanctx(struct ieee80211_local *local,
if (!compat)
continue;
+ compat = ieee80211_chanctx_reserved_chandef(local, ctx,
+ compat);
+ if (!compat)
+ continue;
+
ieee80211_change_chanctx(local, ctx, compat);
return ctx;
@@ -202,6 +371,8 @@ static bool ieee80211_is_radar_required(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata;
+ lockdep_assert_held(&local->mtx);
+
rcu_read_lock();
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
if (sdata->radar_required) {
@@ -215,62 +386,91 @@ static bool ieee80211_is_radar_required(struct ieee80211_local *local)
}
static struct ieee80211_chanctx *
-ieee80211_new_chanctx(struct ieee80211_local *local,
- const struct cfg80211_chan_def *chandef,
- enum ieee80211_chanctx_mode mode)
+ieee80211_alloc_chanctx(struct ieee80211_local *local,
+ const struct cfg80211_chan_def *chandef,
+ enum ieee80211_chanctx_mode mode)
{
struct ieee80211_chanctx *ctx;
- u32 changed;
- int err;
lockdep_assert_held(&local->chanctx_mtx);
ctx = kzalloc(sizeof(*ctx) + local->hw.chanctx_data_size, GFP_KERNEL);
if (!ctx)
- return ERR_PTR(-ENOMEM);
+ return NULL;
+ INIT_LIST_HEAD(&ctx->assigned_vifs);
+ INIT_LIST_HEAD(&ctx->reserved_vifs);
ctx->conf.def = *chandef;
ctx->conf.rx_chains_static = 1;
ctx->conf.rx_chains_dynamic = 1;
ctx->mode = mode;
ctx->conf.radar_enabled = ieee80211_is_radar_required(local);
ieee80211_recalc_chanctx_min_def(local, ctx);
+
+ return ctx;
+}
+
+static int ieee80211_add_chanctx(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx)
+{
+ u32 changed;
+ int err;
+
+ lockdep_assert_held(&local->mtx);
+ lockdep_assert_held(&local->chanctx_mtx);
+
if (!local->use_chanctx)
local->hw.conf.radar_enabled = ctx->conf.radar_enabled;
- /* we hold the mutex to prevent idle from changing */
- lockdep_assert_held(&local->mtx);
/* turn idle off *before* setting channel -- some drivers need that */
changed = ieee80211_idle_off(local);
if (changed)
ieee80211_hw_config(local, changed);
if (!local->use_chanctx) {
- local->_oper_chandef = *chandef;
- ieee80211_hw_config(local, 0);
+ local->_oper_chandef = ctx->conf.def;
+ ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
} else {
err = drv_add_chanctx(local, ctx);
if (err) {
- kfree(ctx);
ieee80211_recalc_idle(local);
- return ERR_PTR(err);
+ return err;
}
}
- /* and keep the mutex held until the new chanctx is on the list */
- list_add_rcu(&ctx->list, &local->chanctx_list);
+ return 0;
+}
+static struct ieee80211_chanctx *
+ieee80211_new_chanctx(struct ieee80211_local *local,
+ const struct cfg80211_chan_def *chandef,
+ enum ieee80211_chanctx_mode mode)
+{
+ struct ieee80211_chanctx *ctx;
+ int err;
+
+ lockdep_assert_held(&local->mtx);
+ lockdep_assert_held(&local->chanctx_mtx);
+
+ ctx = ieee80211_alloc_chanctx(local, chandef, mode);
+ if (!ctx)
+ return ERR_PTR(-ENOMEM);
+
+ err = ieee80211_add_chanctx(local, ctx);
+ if (err) {
+ kfree(ctx);
+ return ERR_PTR(err);
+ }
+
+ list_add_rcu(&ctx->list, &local->chanctx_list);
return ctx;
}
-static void ieee80211_free_chanctx(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx)
+static void ieee80211_del_chanctx(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx)
{
- bool check_single_channel = false;
lockdep_assert_held(&local->chanctx_mtx);
- WARN_ON_ONCE(ctx->refcount != 0);
-
if (!local->use_chanctx) {
struct cfg80211_chan_def *chandef = &local->_oper_chandef;
chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
@@ -280,48 +480,29 @@ static void ieee80211_free_chanctx(struct ieee80211_local *local,
/* NOTE: Disabling radar is only valid here for
* single channel context. To be sure, check it ...
*/
- if (local->hw.conf.radar_enabled)
- check_single_channel = true;
+ WARN_ON(local->hw.conf.radar_enabled &&
+ !list_empty(&local->chanctx_list));
+
local->hw.conf.radar_enabled = false;
- ieee80211_hw_config(local, 0);
+ ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
} else {
drv_remove_chanctx(local, ctx);
}
- list_del_rcu(&ctx->list);
- kfree_rcu(ctx, rcu_head);
-
- /* throw a warning if this wasn't the only channel context. */
- WARN_ON(check_single_channel && !list_empty(&local->chanctx_list));
-
ieee80211_recalc_idle(local);
}
-static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_chanctx *ctx)
+static void ieee80211_free_chanctx(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx)
{
- struct ieee80211_local *local = sdata->local;
- int ret;
-
lockdep_assert_held(&local->chanctx_mtx);
- ret = drv_assign_vif_chanctx(local, sdata, ctx);
- if (ret)
- return ret;
+ WARN_ON_ONCE(ieee80211_chanctx_refcount(local, ctx) != 0);
- rcu_assign_pointer(sdata->vif.chanctx_conf, &ctx->conf);
- ctx->refcount++;
-
- ieee80211_recalc_txpower(sdata);
- ieee80211_recalc_chanctx_min_def(local, ctx);
- sdata->vif.bss_conf.idle = false;
-
- if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
- sdata->vif.type != NL80211_IFTYPE_MONITOR)
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE);
-
- return 0;
+ list_del_rcu(&ctx->list);
+ ieee80211_del_chanctx(local, ctx);
+ kfree_rcu(ctx, rcu_head);
}
static void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local,
@@ -382,30 +563,58 @@ static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local,
drv_change_chanctx(local, chanctx, IEEE80211_CHANCTX_CHANGE_RADAR);
}
-static void ieee80211_unassign_vif_chanctx(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_chanctx *ctx)
+static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_chanctx *new_ctx)
{
struct ieee80211_local *local = sdata->local;
+ struct ieee80211_chanctx_conf *conf;
+ struct ieee80211_chanctx *curr_ctx = NULL;
+ int ret = 0;
- lockdep_assert_held(&local->chanctx_mtx);
+ conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+ lockdep_is_held(&local->chanctx_mtx));
- ctx->refcount--;
- rcu_assign_pointer(sdata->vif.chanctx_conf, NULL);
+ if (conf) {
+ curr_ctx = container_of(conf, struct ieee80211_chanctx, conf);
- sdata->vif.bss_conf.idle = true;
+ drv_unassign_vif_chanctx(local, sdata, curr_ctx);
+ conf = NULL;
+ list_del(&sdata->assigned_chanctx_list);
+ }
- if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
- sdata->vif.type != NL80211_IFTYPE_MONITOR)
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE);
+ if (new_ctx) {
+ ret = drv_assign_vif_chanctx(local, sdata, new_ctx);
+ if (ret)
+ goto out;
- drv_unassign_vif_chanctx(local, sdata, ctx);
+ conf = &new_ctx->conf;
+ list_add(&sdata->assigned_chanctx_list,
+ &new_ctx->assigned_vifs);
+ }
+
+out:
+ rcu_assign_pointer(sdata->vif.chanctx_conf, conf);
- if (ctx->refcount > 0) {
- ieee80211_recalc_chanctx_chantype(sdata->local, ctx);
- ieee80211_recalc_smps_chanctx(local, ctx);
- ieee80211_recalc_radar_chanctx(local, ctx);
- ieee80211_recalc_chanctx_min_def(local, ctx);
+ sdata->vif.bss_conf.idle = !conf;
+
+ if (curr_ctx && ieee80211_chanctx_num_assigned(local, curr_ctx) > 0) {
+ ieee80211_recalc_chanctx_chantype(local, curr_ctx);
+ ieee80211_recalc_smps_chanctx(local, curr_ctx);
+ ieee80211_recalc_radar_chanctx(local, curr_ctx);
+ ieee80211_recalc_chanctx_min_def(local, curr_ctx);
}
+
+ if (new_ctx && ieee80211_chanctx_num_assigned(local, new_ctx) > 0) {
+ ieee80211_recalc_txpower(sdata);
+ ieee80211_recalc_chanctx_min_def(local, new_ctx);
+ }
+
+ if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
+ sdata->vif.type != NL80211_IFTYPE_MONITOR)
+ ieee80211_bss_info_change_notify(sdata,
+ BSS_CHANGED_IDLE);
+
+ return ret;
}
static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata)
@@ -423,8 +632,11 @@ static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata)
ctx = container_of(conf, struct ieee80211_chanctx, conf);
- ieee80211_unassign_vif_chanctx(sdata, ctx);
- if (ctx->refcount == 0)
+ if (sdata->reserved_chanctx)
+ ieee80211_vif_unreserve_chanctx(sdata);
+
+ ieee80211_assign_vif_chanctx(sdata, NULL);
+ if (ieee80211_chanctx_refcount(local, ctx) == 0)
ieee80211_free_chanctx(local, ctx);
}
@@ -490,6 +702,13 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
rx_chains_static = max(rx_chains_static, needed_static);
rx_chains_dynamic = max(rx_chains_dynamic, needed_dynamic);
}
+
+ /* Disable SMPS for the monitor interface */
+ sdata = rcu_dereference(local->monitor_sdata);
+ if (sdata &&
+ rcu_access_pointer(sdata->vif.chanctx_conf) == &chanctx->conf)
+ rx_chains_dynamic = rx_chains_static = local->rx_chains;
+
rcu_read_unlock();
if (!local->use_chanctx) {
@@ -517,6 +736,7 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx *ctx;
+ u8 radar_detect_width = 0;
int ret;
lockdep_assert_held(&local->mtx);
@@ -524,6 +744,22 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
WARN_ON(sdata->dev && netif_carrier_ok(sdata->dev));
mutex_lock(&local->chanctx_mtx);
+
+ ret = cfg80211_chandef_dfs_required(local->hw.wiphy,
+ chandef,
+ sdata->wdev.iftype);
+ if (ret < 0)
+ goto out;
+ if (ret > 0)
+ radar_detect_width = BIT(chandef->width);
+
+ sdata->radar_required = ret;
+
+ ret = ieee80211_check_combinations(sdata, chandef, mode,
+ radar_detect_width);
+ if (ret < 0)
+ goto out;
+
__ieee80211_vif_release_channel(sdata);
ctx = ieee80211_find_chanctx(local, chandef, mode);
@@ -539,7 +775,7 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
ret = ieee80211_assign_vif_chanctx(sdata, ctx);
if (ret) {
/* if assign fails refcount stays the same */
- if (ctx->refcount == 0)
+ if (ieee80211_chanctx_refcount(local, ctx) == 0)
ieee80211_free_chanctx(local, ctx);
goto out;
}
@@ -551,15 +787,47 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
return ret;
}
+static int __ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_chanctx *ctx,
+ u32 *changed)
+{
+ struct ieee80211_local *local = sdata->local;
+ const struct cfg80211_chan_def *chandef = &sdata->csa_chandef;
+ u32 chanctx_changed = 0;
+
+ if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
+ IEEE80211_CHAN_DISABLED))
+ return -EINVAL;
+
+ if (ieee80211_chanctx_refcount(local, ctx) != 1)
+ return -EINVAL;
+
+ if (sdata->vif.bss_conf.chandef.width != chandef->width) {
+ chanctx_changed = IEEE80211_CHANCTX_CHANGE_WIDTH;
+ *changed |= BSS_CHANGED_BANDWIDTH;
+ }
+
+ sdata->vif.bss_conf.chandef = *chandef;
+ ctx->conf.def = *chandef;
+
+ chanctx_changed |= IEEE80211_CHANCTX_CHANGE_CHANNEL;
+ drv_change_chanctx(local, ctx, chanctx_changed);
+
+ ieee80211_recalc_chanctx_chantype(local, ctx);
+ ieee80211_recalc_smps_chanctx(local, ctx);
+ ieee80211_recalc_radar_chanctx(local, ctx);
+ ieee80211_recalc_chanctx_min_def(local, ctx);
+
+ return 0;
+}
+
int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata,
u32 *changed)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *ctx;
- const struct cfg80211_chan_def *chandef = &sdata->csa_chandef;
int ret;
- u32 chanctx_changed = 0;
lockdep_assert_held(&local->mtx);
@@ -567,11 +835,94 @@ int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata,
if (WARN_ON(!sdata->vif.csa_active))
return -EINVAL;
- if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
- IEEE80211_CHAN_DISABLED))
+ mutex_lock(&local->chanctx_mtx);
+ conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+ lockdep_is_held(&local->chanctx_mtx));
+ if (!conf) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ctx = container_of(conf, struct ieee80211_chanctx, conf);
+
+ ret = __ieee80211_vif_change_channel(sdata, ctx, changed);
+ out:
+ mutex_unlock(&local->chanctx_mtx);
+ return ret;
+}
+
+static void
+__ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata,
+ bool clear)
+{
+ struct ieee80211_local *local __maybe_unused = sdata->local;
+ struct ieee80211_sub_if_data *vlan;
+ struct ieee80211_chanctx_conf *conf;
+
+ if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP))
+ return;
+
+ lockdep_assert_held(&local->mtx);
+
+ /* Check that conf exists, even when clearing this function
+ * must be called with the AP's channel context still there
+ * as it would otherwise cause VLANs to have an invalid
+ * channel context pointer for a while, possibly pointing
+ * to a channel context that has already been freed.
+ */
+ conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+ lockdep_is_held(&local->chanctx_mtx));
+ WARN_ON(!conf);
+
+ if (clear)
+ conf = NULL;
+
+ list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
+ rcu_assign_pointer(vlan->vif.chanctx_conf, conf);
+}
+
+void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata,
+ bool clear)
+{
+ struct ieee80211_local *local = sdata->local;
+
+ mutex_lock(&local->chanctx_mtx);
+
+ __ieee80211_vif_copy_chanctx_to_vlans(sdata, clear);
+
+ mutex_unlock(&local->chanctx_mtx);
+}
+
+int ieee80211_vif_unreserve_chanctx(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_chanctx *ctx = sdata->reserved_chanctx;
+
+ lockdep_assert_held(&sdata->local->chanctx_mtx);
+
+ if (WARN_ON(!ctx))
return -EINVAL;
+ list_del(&sdata->reserved_chanctx_list);
+ sdata->reserved_chanctx = NULL;
+
+ if (ieee80211_chanctx_refcount(sdata->local, ctx) == 0)
+ ieee80211_free_chanctx(sdata->local, ctx);
+
+ return 0;
+}
+
+int ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata,
+ const struct cfg80211_chan_def *chandef,
+ enum ieee80211_chanctx_mode mode,
+ bool radar_required)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_chanctx_conf *conf;
+ struct ieee80211_chanctx *new_ctx, *curr_ctx;
+ int ret = 0;
+
mutex_lock(&local->chanctx_mtx);
+
conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
lockdep_is_held(&local->chanctx_mtx));
if (!conf) {
@@ -579,30 +930,108 @@ int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata,
goto out;
}
- ctx = container_of(conf, struct ieee80211_chanctx, conf);
- if (ctx->refcount != 1) {
+ curr_ctx = container_of(conf, struct ieee80211_chanctx, conf);
+
+ new_ctx = ieee80211_find_reservation_chanctx(local, chandef, mode);
+ if (!new_ctx) {
+ if (ieee80211_chanctx_refcount(local, curr_ctx) == 1 &&
+ (local->hw.flags & IEEE80211_HW_CHANGE_RUNNING_CHANCTX)) {
+ /* if we're the only users of the chanctx and
+ * the driver supports changing a running
+ * context, reserve our current context
+ */
+ new_ctx = curr_ctx;
+ } else if (ieee80211_can_create_new_chanctx(local)) {
+ /* create a new context and reserve it */
+ new_ctx = ieee80211_new_chanctx(local, chandef, mode);
+ if (IS_ERR(new_ctx)) {
+ ret = PTR_ERR(new_ctx);
+ goto out;
+ }
+ } else {
+ ret = -EBUSY;
+ goto out;
+ }
+ }
+
+ list_add(&sdata->reserved_chanctx_list, &new_ctx->reserved_vifs);
+ sdata->reserved_chanctx = new_ctx;
+ sdata->reserved_chandef = *chandef;
+ sdata->reserved_radar_required = radar_required;
+out:
+ mutex_unlock(&local->chanctx_mtx);
+ return ret;
+}
+
+int ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata,
+ u32 *changed)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_chanctx *ctx;
+ struct ieee80211_chanctx *old_ctx;
+ struct ieee80211_chanctx_conf *conf;
+ int ret;
+ u32 tmp_changed = *changed;
+
+ /* TODO: need to recheck if the chandef is usable etc.? */
+
+ lockdep_assert_held(&local->mtx);
+
+ mutex_lock(&local->chanctx_mtx);
+
+ ctx = sdata->reserved_chanctx;
+ if (WARN_ON(!ctx)) {
ret = -EINVAL;
goto out;
}
- if (sdata->vif.bss_conf.chandef.width != chandef->width) {
- chanctx_changed = IEEE80211_CHANCTX_CHANGE_WIDTH;
- *changed |= BSS_CHANGED_BANDWIDTH;
+ conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+ lockdep_is_held(&local->chanctx_mtx));
+ if (!conf) {
+ ret = -EINVAL;
+ goto out;
}
- sdata->vif.bss_conf.chandef = *chandef;
- ctx->conf.def = *chandef;
+ old_ctx = container_of(conf, struct ieee80211_chanctx, conf);
- chanctx_changed |= IEEE80211_CHANCTX_CHANGE_CHANNEL;
- drv_change_chanctx(local, ctx, chanctx_changed);
+ if (sdata->vif.bss_conf.chandef.width != sdata->reserved_chandef.width)
+ tmp_changed |= BSS_CHANGED_BANDWIDTH;
+
+ sdata->vif.bss_conf.chandef = sdata->reserved_chandef;
+
+ /* unref our reservation */
+ sdata->reserved_chanctx = NULL;
+ sdata->radar_required = sdata->reserved_radar_required;
+ list_del(&sdata->reserved_chanctx_list);
+
+ if (old_ctx == ctx) {
+ /* This is our own context, just change it */
+ ret = __ieee80211_vif_change_channel(sdata, old_ctx,
+ &tmp_changed);
+ if (ret)
+ goto out;
+ } else {
+ ret = ieee80211_assign_vif_chanctx(sdata, ctx);
+ if (ieee80211_chanctx_refcount(local, old_ctx) == 0)
+ ieee80211_free_chanctx(local, old_ctx);
+ if (ret) {
+ /* if assign fails refcount stays the same */
+ if (ieee80211_chanctx_refcount(local, ctx) == 0)
+ ieee80211_free_chanctx(local, ctx);
+ goto out;
+ }
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP)
+ __ieee80211_vif_copy_chanctx_to_vlans(sdata, false);
+ }
+
+ *changed = tmp_changed;
ieee80211_recalc_chanctx_chantype(local, ctx);
ieee80211_recalc_smps_chanctx(local, ctx);
ieee80211_recalc_radar_chanctx(local, ctx);
ieee80211_recalc_chanctx_min_def(local, ctx);
-
- ret = 0;
- out:
+out:
mutex_unlock(&local->chanctx_mtx);
return ret;
}
@@ -686,40 +1115,6 @@ void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata)
mutex_unlock(&local->chanctx_mtx);
}
-void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata,
- bool clear)
-{
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_sub_if_data *vlan;
- struct ieee80211_chanctx_conf *conf;
-
- ASSERT_RTNL();
-
- if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP))
- return;
-
- mutex_lock(&local->chanctx_mtx);
-
- /*
- * Check that conf exists, even when clearing this function
- * must be called with the AP's channel context still there
- * as it would otherwise cause VLANs to have an invalid
- * channel context pointer for a while, possibly pointing
- * to a channel context that has already been freed.
- */
- conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
- WARN_ON(!conf);
-
- if (clear)
- conf = NULL;
-
- list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
- rcu_assign_pointer(vlan->vif.chanctx_conf, conf);
-
- mutex_unlock(&local->chanctx_mtx);
-}
-
void ieee80211_iter_chan_contexts_atomic(
struct ieee80211_hw *hw,
void (*iter)(struct ieee80211_hw *hw,
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index fa16e54980a..0e963bc1cea 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -128,7 +128,7 @@ static ssize_t sta_tx_latency_stat_write(struct file *file,
if (!strcmp(buf, TX_LATENCY_DISABLED)) {
if (!tx_latency)
goto unlock;
- rcu_assign_pointer(local->tx_latency, NULL);
+ RCU_INIT_POINTER(local->tx_latency, NULL);
synchronize_rcu();
kfree(tx_latency);
goto unlock;
diff --git a/net/mac80211/debugfs.h b/net/mac80211/debugfs.h
index 214ed4ecd73..60c35afee29 100644
--- a/net/mac80211/debugfs.h
+++ b/net/mac80211/debugfs.h
@@ -1,6 +1,8 @@
#ifndef __MAC80211_DEBUGFS_H
#define __MAC80211_DEBUGFS_H
+#include "ieee80211_i.h"
+
#ifdef CONFIG_MAC80211_DEBUGFS
void debugfs_hw_add(struct ieee80211_local *local);
int __printf(4, 5) mac80211_format_buffer(char __user *userbuf, size_t count,
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index ebf80f3abd8..e205ebabfa5 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -34,8 +34,7 @@ static ssize_t ieee80211_if_read(
ssize_t ret = -EINVAL;
read_lock(&dev_base_lock);
- if (sdata->dev->reg_state == NETREG_REGISTERED)
- ret = (*format)(sdata, buf, sizeof(buf));
+ ret = (*format)(sdata, buf, sizeof(buf));
read_unlock(&dev_base_lock);
if (ret >= 0)
@@ -62,8 +61,7 @@ static ssize_t ieee80211_if_write(
ret = -ENODEV;
rtnl_lock();
- if (sdata->dev->reg_state == NETREG_REGISTERED)
- ret = (*write)(sdata, buf, count);
+ ret = (*write)(sdata, buf, count);
rtnl_unlock();
return ret;
@@ -358,6 +356,18 @@ static ssize_t ieee80211_if_parse_tkip_mic_test(
}
IEEE80211_IF_FILE_W(tkip_mic_test);
+static ssize_t ieee80211_if_parse_beacon_loss(
+ struct ieee80211_sub_if_data *sdata, const char *buf, int buflen)
+{
+ if (!ieee80211_sdata_running(sdata) || !sdata->vif.bss_conf.assoc)
+ return -ENOTCONN;
+
+ ieee80211_beacon_loss(&sdata->vif);
+
+ return buflen;
+}
+IEEE80211_IF_FILE_W(beacon_loss);
+
static ssize_t ieee80211_if_fmt_uapsd_queues(
const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
{
@@ -569,6 +579,7 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
DEBUGFS_ADD(beacon_timeout);
DEBUGFS_ADD_MODE(smps, 0600);
DEBUGFS_ADD_MODE(tkip_mic_test, 0200);
+ DEBUGFS_ADD_MODE(beacon_loss, 0200);
DEBUGFS_ADD_MODE(uapsd_queues, 0600);
DEBUGFS_ADD_MODE(uapsd_max_sp_len, 0600);
}
diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h
index 79025e79f4d..9f5501a9a79 100644
--- a/net/mac80211/debugfs_netdev.h
+++ b/net/mac80211/debugfs_netdev.h
@@ -3,6 +3,8 @@
#ifndef __IEEE80211_DEBUGFS_NETDEV_H
#define __IEEE80211_DEBUGFS_NETDEV_H
+#include "ieee80211_i.h"
+
#ifdef CONFIG_MAC80211_DEBUGFS
void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata);
void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata);
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 80194b557a0..2ecb4deddb5 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -195,7 +195,7 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
static ssize_t sta_agg_status_write(struct file *file, const char __user *userbuf,
size_t count, loff_t *ppos)
{
- char _buf[12], *buf = _buf;
+ char _buf[12] = {}, *buf = _buf;
struct sta_info *sta = file->private_data;
bool start, tx;
unsigned long tid;
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index ef8b385eff0..bd782dcffcc 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -5,11 +5,11 @@
#include "ieee80211_i.h"
#include "trace.h"
-static inline void check_sdata_in_driver(struct ieee80211_sub_if_data *sdata)
+static inline bool check_sdata_in_driver(struct ieee80211_sub_if_data *sdata)
{
- WARN(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER),
- "%s: Failed check-sdata-in-driver check, flags: 0x%x\n",
- sdata->dev ? sdata->dev->name : sdata->name, sdata->flags);
+ return !WARN(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER),
+ "%s: Failed check-sdata-in-driver check, flags: 0x%x\n",
+ sdata->dev ? sdata->dev->name : sdata->name, sdata->flags);
}
static inline struct ieee80211_sub_if_data *
@@ -168,7 +168,8 @@ static inline int drv_change_interface(struct ieee80211_local *local,
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_change_interface(local, sdata, type, p2p);
ret = local->ops->change_interface(&local->hw, &sdata->vif, type, p2p);
@@ -181,7 +182,8 @@ static inline void drv_remove_interface(struct ieee80211_local *local,
{
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_remove_interface(local, sdata);
local->ops->remove_interface(&local->hw, &sdata->vif);
@@ -219,7 +221,8 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local,
sdata->vif.type == NL80211_IFTYPE_MONITOR))
return;
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_bss_info_changed(local, sdata, info, changed);
if (local->ops->bss_info_changed)
@@ -278,7 +281,8 @@ static inline int drv_set_key(struct ieee80211_local *local,
might_sleep();
sdata = get_bss_sdata(sdata);
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_set_key(local, cmd, sdata, sta, key);
ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key);
@@ -298,7 +302,8 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local,
ista = &sta->sta;
sdata = get_bss_sdata(sdata);
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_update_tkip_key(local, sdata, conf, ista, iv32);
if (local->ops->update_tkip_key)
@@ -315,7 +320,8 @@ static inline int drv_hw_scan(struct ieee80211_local *local,
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_hw_scan(local, sdata);
ret = local->ops->hw_scan(&local->hw, &sdata->vif, req);
@@ -328,7 +334,8 @@ static inline void drv_cancel_hw_scan(struct ieee80211_local *local,
{
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_cancel_hw_scan(local, sdata);
local->ops->cancel_hw_scan(&local->hw, &sdata->vif);
@@ -345,7 +352,8 @@ drv_sched_scan_start(struct ieee80211_local *local,
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_sched_scan_start(local, sdata);
ret = local->ops->sched_scan_start(&local->hw, &sdata->vif,
@@ -354,16 +362,21 @@ drv_sched_scan_start(struct ieee80211_local *local,
return ret;
}
-static inline void drv_sched_scan_stop(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata)
+static inline int drv_sched_scan_stop(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
{
+ int ret;
+
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_sched_scan_stop(local, sdata);
- local->ops->sched_scan_stop(&local->hw, &sdata->vif);
- trace_drv_return_void(local);
+ ret = local->ops->sched_scan_stop(&local->hw, &sdata->vif);
+ trace_drv_return_int(local, ret);
+
+ return ret;
}
static inline void drv_sw_scan_start(struct ieee80211_local *local)
@@ -458,7 +471,8 @@ static inline void drv_sta_notify(struct ieee80211_local *local,
struct ieee80211_sta *sta)
{
sdata = get_bss_sdata(sdata);
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_sta_notify(local, sdata, cmd, sta);
if (local->ops->sta_notify)
@@ -475,7 +489,8 @@ static inline int drv_sta_add(struct ieee80211_local *local,
might_sleep();
sdata = get_bss_sdata(sdata);
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_sta_add(local, sdata, sta);
if (local->ops->sta_add)
@@ -493,7 +508,8 @@ static inline void drv_sta_remove(struct ieee80211_local *local,
might_sleep();
sdata = get_bss_sdata(sdata);
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_sta_remove(local, sdata, sta);
if (local->ops->sta_remove)
@@ -511,7 +527,8 @@ static inline void drv_sta_add_debugfs(struct ieee80211_local *local,
might_sleep();
sdata = get_bss_sdata(sdata);
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
if (local->ops->sta_add_debugfs)
local->ops->sta_add_debugfs(&local->hw, &sdata->vif,
@@ -541,7 +558,8 @@ static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local,
might_sleep();
sdata = get_bss_sdata(sdata);
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_sta_pre_rcu_remove(local, sdata, &sta->sta);
if (local->ops->sta_pre_rcu_remove)
@@ -562,7 +580,8 @@ int drv_sta_state(struct ieee80211_local *local,
might_sleep();
sdata = get_bss_sdata(sdata);
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_sta_state(local, sdata, &sta->sta, old_state, new_state);
if (local->ops->sta_state) {
@@ -586,7 +605,8 @@ static inline void drv_sta_rc_update(struct ieee80211_local *local,
struct ieee80211_sta *sta, u32 changed)
{
sdata = get_bss_sdata(sdata);
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED &&
(sdata->vif.type != NL80211_IFTYPE_ADHOC &&
@@ -608,7 +628,8 @@ static inline int drv_conf_tx(struct ieee80211_local *local,
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_conf_tx(local, sdata, ac, params);
if (local->ops->conf_tx)
@@ -625,7 +646,8 @@ static inline u64 drv_get_tsf(struct ieee80211_local *local,
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return ret;
trace_drv_get_tsf(local, sdata);
if (local->ops->get_tsf)
@@ -640,7 +662,8 @@ static inline void drv_set_tsf(struct ieee80211_local *local,
{
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_set_tsf(local, sdata, tsf);
if (local->ops->set_tsf)
@@ -653,7 +676,8 @@ static inline void drv_reset_tsf(struct ieee80211_local *local,
{
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_reset_tsf(local, sdata);
if (local->ops->reset_tsf)
@@ -685,7 +709,8 @@ static inline int drv_ampdu_action(struct ieee80211_local *local,
might_sleep();
sdata = get_bss_sdata(sdata);
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, buf_size);
@@ -722,13 +747,19 @@ static inline void drv_rfkill_poll(struct ieee80211_local *local)
}
static inline void drv_flush(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
u32 queues, bool drop)
{
+ struct ieee80211_vif *vif = sdata ? &sdata->vif : NULL;
+
might_sleep();
+ if (sdata && !check_sdata_in_driver(sdata))
+ return;
+
trace_drv_flush(local, queues, drop);
if (local->ops->flush)
- local->ops->flush(&local->hw, queues, drop);
+ local->ops->flush(&local->hw, vif, queues, drop);
trace_drv_return_void(local);
}
@@ -844,7 +875,8 @@ static inline int drv_set_bitrate_mask(struct ieee80211_local *local,
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_set_bitrate_mask(local, sdata, mask);
if (local->ops->set_bitrate_mask)
@@ -859,7 +891,8 @@ static inline void drv_set_rekey_data(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct cfg80211_gtk_rekey_data *data)
{
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_set_rekey_data(local, sdata, data);
if (local->ops->set_rekey_data)
@@ -927,7 +960,8 @@ static inline void drv_mgd_prepare_tx(struct ieee80211_local *local,
{
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION);
trace_drv_mgd_prepare_tx(local, sdata);
@@ -954,6 +988,9 @@ static inline int drv_add_chanctx(struct ieee80211_local *local,
static inline void drv_remove_chanctx(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx)
{
+ if (WARN_ON(!ctx->driver_present))
+ return;
+
trace_drv_remove_chanctx(local, ctx);
if (local->ops->remove_chanctx)
local->ops->remove_chanctx(&local->hw, &ctx->conf);
@@ -979,7 +1016,8 @@ static inline int drv_assign_vif_chanctx(struct ieee80211_local *local,
{
int ret = 0;
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_assign_vif_chanctx(local, sdata, ctx);
if (local->ops->assign_vif_chanctx) {
@@ -997,7 +1035,8 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct ieee80211_chanctx *ctx)
{
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_unassign_vif_chanctx(local, sdata, ctx);
if (local->ops->unassign_vif_chanctx) {
@@ -1009,12 +1048,66 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local,
trace_drv_return_void(local);
}
+static inline int
+drv_switch_vif_chanctx(struct ieee80211_local *local,
+ struct ieee80211_vif_chanctx_switch *vifs,
+ int n_vifs,
+ enum ieee80211_chanctx_switch_mode mode)
+{
+ int ret = 0;
+ int i;
+
+ if (!local->ops->switch_vif_chanctx)
+ return -EOPNOTSUPP;
+
+ for (i = 0; i < n_vifs; i++) {
+ struct ieee80211_chanctx *new_ctx =
+ container_of(vifs[i].new_ctx,
+ struct ieee80211_chanctx,
+ conf);
+ struct ieee80211_chanctx *old_ctx =
+ container_of(vifs[i].old_ctx,
+ struct ieee80211_chanctx,
+ conf);
+
+ WARN_ON_ONCE(!old_ctx->driver_present);
+ WARN_ON_ONCE((mode == CHANCTX_SWMODE_SWAP_CONTEXTS &&
+ new_ctx->driver_present) ||
+ (mode == CHANCTX_SWMODE_REASSIGN_VIF &&
+ !new_ctx->driver_present));
+ }
+
+ trace_drv_switch_vif_chanctx(local, vifs, n_vifs, mode);
+ ret = local->ops->switch_vif_chanctx(&local->hw,
+ vifs, n_vifs, mode);
+ trace_drv_return_int(local, ret);
+
+ if (!ret && mode == CHANCTX_SWMODE_SWAP_CONTEXTS) {
+ for (i = 0; i < n_vifs; i++) {
+ struct ieee80211_chanctx *new_ctx =
+ container_of(vifs[i].new_ctx,
+ struct ieee80211_chanctx,
+ conf);
+ struct ieee80211_chanctx *old_ctx =
+ container_of(vifs[i].old_ctx,
+ struct ieee80211_chanctx,
+ conf);
+
+ new_ctx->driver_present = true;
+ old_ctx->driver_present = false;
+ }
+ }
+
+ return ret;
+}
+
static inline int drv_start_ap(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
int ret = 0;
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_start_ap(local, sdata, &sdata->vif.bss_conf);
if (local->ops->start_ap)
@@ -1026,7 +1119,8 @@ static inline int drv_start_ap(struct ieee80211_local *local,
static inline void drv_stop_ap(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_stop_ap(local, sdata);
if (local->ops->stop_ap)
@@ -1049,7 +1143,8 @@ drv_set_default_unicast_key(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
int key_idx)
{
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
WARN_ON_ONCE(key_idx < -1 || key_idx > 3);
@@ -1091,7 +1186,8 @@ static inline int drv_join_ibss(struct ieee80211_local *local,
int ret = 0;
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
trace_drv_join_ibss(local, sdata, &sdata->vif.bss_conf);
if (local->ops->join_ibss)
@@ -1104,7 +1200,8 @@ static inline void drv_leave_ibss(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
might_sleep();
- check_sdata_in_driver(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
trace_drv_leave_ibss(local, sdata);
if (local->ops->leave_ibss)
@@ -1112,4 +1209,17 @@ static inline void drv_leave_ibss(struct ieee80211_local *local,
trace_drv_return_void(local);
}
+static inline u32 drv_get_expected_throughput(struct ieee80211_local *local,
+ struct ieee80211_sta *sta)
+{
+ u32 ret = 0;
+
+ trace_drv_get_expected_throughput(sta);
+ if (local->ops->get_expected_throughput)
+ ret = local->ops->get_expected_throughput(sta);
+ trace_drv_return_u32(local, ret);
+
+ return ret;
+}
+
#endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 70dd013de83..15702ff64a4 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -31,6 +31,18 @@ static void __check_htcap_disable(struct ieee80211_ht_cap *ht_capa,
}
}
+static void __check_htcap_enable(struct ieee80211_ht_cap *ht_capa,
+ struct ieee80211_ht_cap *ht_capa_mask,
+ struct ieee80211_sta_ht_cap *ht_cap,
+ u16 flag)
+{
+ __le16 le_flag = cpu_to_le16(flag);
+
+ if ((ht_capa_mask->cap_info & le_flag) &&
+ (ht_capa->cap_info & le_flag))
+ ht_cap->cap |= flag;
+}
+
void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
struct ieee80211_sta_ht_cap *ht_cap)
{
@@ -59,7 +71,7 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
smask = (u8 *)(&ht_capa_mask->mcs.rx_mask);
/* NOTE: If you add more over-rides here, update register_hw
- * ht_capa_mod_msk logic in main.c as well.
+ * ht_capa_mod_mask logic in main.c as well.
* And, if this method can ever change ht_cap.ht_supported, fix
* the check in ieee80211_add_ht_ie.
*/
@@ -86,6 +98,14 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
__check_htcap_disable(ht_capa, ht_capa_mask, ht_cap,
IEEE80211_HT_CAP_MAX_AMSDU);
+ /* Allow user to disable LDPC */
+ __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap,
+ IEEE80211_HT_CAP_LDPC_CODING);
+
+ /* Allow user to enable 40 MHz intolerant bit. */
+ __check_htcap_enable(ht_capa, ht_capa_mask, ht_cap,
+ IEEE80211_HT_CAP_40MHZ_INTOLERANT);
+
/* Allow user to decrease AMPDU factor */
if (ht_capa_mask->ampdu_params_info &
IEEE80211_HT_AMPDU_PARM_FACTOR) {
@@ -375,7 +395,7 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
mgmt->u.action.u.delba.params = cpu_to_le16(params);
mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code);
- ieee80211_tx_skb_tid(sdata, skb, tid);
+ ieee80211_tx_skb(sdata, skb);
}
void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
@@ -482,8 +502,6 @@ void ieee80211_request_smps(struct ieee80211_vif *vif,
return;
if (vif->type == NL80211_IFTYPE_STATION) {
- if (WARN_ON(smps_mode == IEEE80211_SMPS_OFF))
- smps_mode = IEEE80211_SMPS_AUTOMATIC;
if (sdata->u.mgd.driver_smps_mode == smps_mode)
return;
sdata->u.mgd.driver_smps_mode = smps_mode;
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 2796a198728..18ee0a256b1 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -143,7 +143,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
*pos++ = csa_settings->block_tx ? 1 : 0;
*pos++ = ieee80211_frequency_to_channel(
csa_settings->chandef.chan->center_freq);
- sdata->csa_counter_offset_beacon = (pos - presp->head);
+ sdata->csa_counter_offset_beacon[0] = (pos - presp->head);
*pos++ = csa_settings->count;
}
@@ -220,7 +220,6 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
struct ieee80211_local *local = sdata->local;
- struct ieee80211_supported_band *sband;
struct ieee80211_mgmt *mgmt;
struct cfg80211_bss *bss;
u32 bss_change;
@@ -229,7 +228,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
struct beacon_data *presp;
enum nl80211_bss_scan_width scan_width;
bool have_higher_than_11mbit;
- bool radar_required = false;
+ bool radar_required;
int err;
sdata_assert_lock(sdata);
@@ -254,7 +253,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
presp = rcu_dereference_protected(ifibss->presp,
lockdep_is_held(&sdata->wdev.mtx));
- rcu_assign_pointer(ifibss->presp, NULL);
+ RCU_INIT_POINTER(ifibss->presp, NULL);
if (presp)
kfree_rcu(presp, rcu_head);
@@ -263,7 +262,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
/* make a copy of the chandef, it could be modified below. */
chandef = *req_chandef;
chan = chandef.chan;
- if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) {
+ if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef,
+ NL80211_IFTYPE_ADHOC)) {
if (chandef.width == NL80211_CHAN_WIDTH_5 ||
chandef.width == NL80211_CHAN_WIDTH_10 ||
chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
@@ -275,7 +275,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
chandef.width = NL80211_CHAN_WIDTH_20;
chandef.center_freq1 = chan->center_freq;
/* check again for downgraded chandef */
- if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) {
+ if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef,
+ NL80211_IFTYPE_ADHOC)) {
sdata_info(sdata,
"Failed to join IBSS, beacons forbidden\n");
return;
@@ -283,18 +284,21 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
}
err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy,
- &chandef);
- if (err > 0) {
- if (!ifibss->userspace_handles_dfs) {
- sdata_info(sdata,
- "Failed to join IBSS, DFS channel without control program\n");
- return;
- }
- radar_required = true;
+ &chandef, NL80211_IFTYPE_ADHOC);
+ if (err < 0) {
+ sdata_info(sdata,
+ "Failed to join IBSS, invalid chandef\n");
+ return;
}
+ if (err > 0 && !ifibss->userspace_handles_dfs) {
+ sdata_info(sdata,
+ "Failed to join IBSS, DFS channel without control program\n");
+ return;
+ }
+
+ radar_required = err;
mutex_lock(&local->mtx);
- ieee80211_vif_release_channel(sdata);
if (ieee80211_vif_use_channel(sdata, &chandef,
ifibss->fixed_channel ?
IEEE80211_CHANCTX_SHARED :
@@ -303,12 +307,11 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
mutex_unlock(&local->mtx);
return;
}
+ sdata->radar_required = radar_required;
mutex_unlock(&local->mtx);
memcpy(ifibss->bssid, bssid, ETH_ALEN);
- sband = local->hw.wiphy->bands[chan->band];
-
presp = ieee80211_ibss_build_presp(sdata, beacon_int, basic_rates,
capability, tsf, &chandef,
&have_higher_than_11mbit, NULL);
@@ -318,7 +321,6 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
rcu_assign_pointer(ifibss->presp, presp);
mgmt = (void *)presp->head;
- sdata->radar_required = radar_required;
sdata->vif.bss_conf.enable_beacon = true;
sdata->vif.bss_conf.beacon_int = beacon_int;
sdata->vif.bss_conf.basic_rates = basic_rates;
@@ -386,7 +388,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
presp->head_len, 0, GFP_KERNEL);
cfg80211_put_bss(local->hw.wiphy, bss);
netif_carrier_on(sdata->dev);
- cfg80211_ibss_joined(sdata->dev, ifibss->bssid, GFP_KERNEL);
+ cfg80211_ibss_joined(sdata->dev, ifibss->bssid, chan, GFP_KERNEL);
}
static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
@@ -521,12 +523,6 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
if (old_presp)
kfree_rcu(old_presp, rcu_head);
- /* it might not send the beacon for a while. send an action frame
- * immediately to announce the channel switch.
- */
- if (csa_settings)
- ieee80211_send_action_csa(sdata, csa_settings);
-
return BSS_CHANGED_BEACON;
out:
return ret;
@@ -536,7 +532,7 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
struct cfg80211_bss *cbss;
- int err;
+ int err, changed = 0;
u16 capability;
sdata_assert_lock(sdata);
@@ -568,10 +564,9 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata)
if (err < 0)
return err;
- if (err)
- ieee80211_bss_info_change_notify(sdata, err);
+ changed |= err;
- return 0;
+ return changed;
}
void ieee80211_ibss_stop(struct ieee80211_sub_if_data *sdata)
@@ -781,7 +776,8 @@ static void ieee80211_ibss_csa_mark_radar(struct ieee80211_sub_if_data *sdata)
* unavailable.
*/
err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy,
- &ifibss->chandef);
+ &ifibss->chandef,
+ NL80211_IFTYPE_ADHOC);
if (err > 0)
cfg80211_radar_event(sdata->local->hw.wiphy, &ifibss->chandef,
GFP_ATOMIC);
@@ -799,6 +795,8 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
int err;
u32 sta_flags;
+ sdata_assert_lock(sdata);
+
sta_flags = IEEE80211_STA_DISABLE_VHT;
switch (ifibss->chandef.width) {
case NL80211_CHAN_WIDTH_5:
@@ -865,7 +863,8 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
goto disconnect;
}
- if (!cfg80211_reg_can_beacon(sdata->local->hw.wiphy, &params.chandef)) {
+ if (!cfg80211_reg_can_beacon(sdata->local->hw.wiphy, &params.chandef,
+ NL80211_IFTYPE_ADHOC)) {
sdata_info(sdata,
"IBSS %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n",
ifibss->bssid,
@@ -877,17 +876,17 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
}
err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy,
- &params.chandef);
+ &params.chandef,
+ NL80211_IFTYPE_ADHOC);
if (err < 0)
goto disconnect;
- if (err) {
+ if (err > 0 && !ifibss->userspace_handles_dfs) {
/* IBSS-DFS only allowed with a control program */
- if (!ifibss->userspace_handles_dfs)
- goto disconnect;
-
- params.radar_required = true;
+ goto disconnect;
}
+ params.radar_required = err;
+
if (cfg80211_chandef_identical(&params.chandef,
&sdata->vif.bss_conf.chandef)) {
ibss_dbg(sdata,
@@ -995,7 +994,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems)
{
struct ieee80211_local *local = sdata->local;
- int freq;
struct cfg80211_bss *cbss;
struct ieee80211_bss *bss;
struct sta_info *sta;
@@ -1007,15 +1005,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
bool rates_updated = false;
- if (elems->ds_params)
- freq = ieee80211_channel_to_frequency(elems->ds_params[0],
- band);
- else
- freq = rx_status->freq;
-
- channel = ieee80211_get_channel(local->hw.wiphy, freq);
-
- if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
+ channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq);
+ if (!channel)
return;
if (sdata->vif.type == NL80211_IFTYPE_ADHOC &&
@@ -1468,6 +1459,11 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
memcpy(((struct ieee80211_mgmt *) skb->data)->da, mgmt->sa, ETH_ALEN);
ibss_dbg(sdata, "Sending ProbeResp to %pM\n", mgmt->sa);
IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
+
+ /* avoid excessive retries for probe request to wildcard SSIDs */
+ if (pos[1] == 0)
+ IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_NO_ACK;
+
ieee80211_tx_skb(sdata, skb);
}
@@ -1643,7 +1639,33 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
u32 changed = 0;
u32 rate_flags;
struct ieee80211_supported_band *sband;
+ enum ieee80211_chanctx_mode chanmode;
+ struct ieee80211_local *local = sdata->local;
+ int radar_detect_width = 0;
int i;
+ int ret;
+
+ ret = cfg80211_chandef_dfs_required(local->hw.wiphy,
+ &params->chandef,
+ sdata->wdev.iftype);
+ if (ret < 0)
+ return ret;
+
+ if (ret > 0) {
+ if (!params->userspace_handles_dfs)
+ return -EINVAL;
+ radar_detect_width = BIT(params->chandef.width);
+ }
+
+ chanmode = (params->channel_fixed && !ret) ?
+ IEEE80211_CHANCTX_SHARED : IEEE80211_CHANCTX_EXCLUSIVE;
+
+ mutex_lock(&local->chanctx_mtx);
+ ret = ieee80211_check_combinations(sdata, &params->chandef, chanmode,
+ radar_detect_width);
+ mutex_unlock(&local->chanctx_mtx);
+ if (ret < 0)
+ return ret;
if (params->bssid) {
memcpy(sdata->u.ibss.bssid, params->bssid, ETH_ALEN);
@@ -1655,10 +1677,11 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
sdata->u.ibss.control_port = params->control_port;
sdata->u.ibss.userspace_handles_dfs = params->userspace_handles_dfs;
sdata->u.ibss.basic_rates = params->basic_rates;
+ sdata->u.ibss.last_scan_completed = jiffies;
/* fix basic_rates if channel does not support these rates */
rate_flags = ieee80211_chandef_rate_flags(&params->chandef);
- sband = sdata->local->hw.wiphy->bands[params->chandef.chan->band];
+ sband = local->hw.wiphy->bands[params->chandef.chan->band];
for (i = 0; i < sband->n_bitrates; i++) {
if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
sdata->u.ibss.basic_rates &= ~BIT(i);
@@ -1707,9 +1730,9 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
ieee80211_bss_info_change_notify(sdata, changed);
sdata->smps_mode = IEEE80211_SMPS_OFF;
- sdata->needed_rx_chains = sdata->local->rx_chains;
+ sdata->needed_rx_chains = local->rx_chains;
- ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+ ieee80211_queue_work(&local->hw, &sdata->work);
return 0;
}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 5e44e3179e0..ac9836e0aab 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -260,7 +260,7 @@ struct ieee80211_if_ap {
/* to be used after channel switch. */
struct cfg80211_beacon_data *next_beacon;
- struct list_head vlans;
+ struct list_head vlans; /* write-protected with RTNL and local->mtx */
struct ps_data ps;
atomic_t num_mcast_sta; /* number of stations receiving multicast */
@@ -276,7 +276,7 @@ struct ieee80211_if_wds {
};
struct ieee80211_if_vlan {
- struct list_head list;
+ struct list_head list; /* write-protected with RTNL and local->mtx */
/* used for all tx if the VLAN is configured to 4-addr mode */
struct sta_info __rcu *sta;
@@ -317,6 +317,7 @@ struct ieee80211_roc_work {
bool started, abort, hw_begun, notified;
bool to_be_freed;
+ bool on_channel;
unsigned long hw_start_time;
@@ -616,7 +617,11 @@ struct ieee80211_if_mesh {
struct ps_data ps;
/* Channel Switching Support */
struct mesh_csa_settings __rcu *csa;
- bool chsw_init;
+ enum {
+ IEEE80211_MESH_CSA_ROLE_NONE,
+ IEEE80211_MESH_CSA_ROLE_INIT,
+ IEEE80211_MESH_CSA_ROLE_REPEATER,
+ } csa_role;
u8 chsw_ttl;
u16 pre_value;
@@ -687,8 +692,10 @@ struct ieee80211_chanctx {
struct list_head list;
struct rcu_head rcu_head;
+ struct list_head assigned_vifs;
+ struct list_head reserved_vifs;
+
enum ieee80211_chanctx_mode mode;
- int refcount;
bool driver_present;
struct ieee80211_chanctx_conf conf;
@@ -747,11 +754,21 @@ struct ieee80211_sub_if_data {
struct mac80211_qos_map __rcu *qos_map;
struct work_struct csa_finalize_work;
- int csa_counter_offset_beacon;
- int csa_counter_offset_presp;
+ u16 csa_counter_offset_beacon[IEEE80211_MAX_CSA_COUNTERS_NUM];
+ u16 csa_counter_offset_presp[IEEE80211_MAX_CSA_COUNTERS_NUM];
bool csa_radar_required;
+ bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */
struct cfg80211_chan_def csa_chandef;
+ struct list_head assigned_chanctx_list; /* protected by chanctx_mtx */
+ struct list_head reserved_chanctx_list; /* protected by chanctx_mtx */
+
+ /* context reservation -- protected with chanctx_mtx */
+ struct ieee80211_chanctx *reserved_chanctx;
+ struct cfg80211_chan_def reserved_chandef;
+ bool reserved_radar_required;
+ u8 csa_current_counter;
+
/* used to reconfigure hardware SM PS */
struct work_struct recalc_smps;
@@ -1238,6 +1255,8 @@ struct ieee80211_local {
struct ieee80211_sub_if_data __rcu *p2p_sdata;
+ struct napi_struct *napi;
+
/* virtual monitor interface */
struct ieee80211_sub_if_data __rcu *monitor_sdata;
struct cfg80211_chan_def monitor_chandef;
@@ -1385,6 +1404,7 @@ void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata);
void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata);
void ieee80211_mgd_conn_tx_status(struct ieee80211_sub_if_data *sdata,
__le16 fc, bool acked);
+void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
/* IBSS code */
@@ -1408,8 +1428,7 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata);
void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb);
int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata,
- struct cfg80211_csa_settings *csa_settings,
- bool csa_action);
+ struct cfg80211_csa_settings *csa_settings);
int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata);
/* scan/BSS handling */
@@ -1442,6 +1461,7 @@ __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
struct cfg80211_sched_scan_request *req);
int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata);
+void ieee80211_sched_scan_end(struct ieee80211_local *local);
void ieee80211_sched_scan_stopped_work(struct work_struct *work);
/* off-channel helpers */
@@ -1456,6 +1476,7 @@ void ieee80211_sw_roc_work(struct work_struct *work);
void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc);
/* channel switch handling */
+bool ieee80211_csa_needs_block_tx(struct ieee80211_local *local);
void ieee80211_csa_finalize_work(struct work_struct *work);
int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_csa_settings *params);
@@ -1553,6 +1574,9 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta);
enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta);
void ieee80211_sta_set_rx_nss(struct sta_info *sta);
+u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta, u8 opmode,
+ enum ieee80211_band band, bool nss_only);
void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta, u8 opmode,
enum ieee80211_band band, bool nss_only);
@@ -1605,7 +1629,7 @@ static inline int __ieee80211_resume(struct ieee80211_hw *hw)
}
/* utility functions/constants */
-extern void *mac80211_wiphy_privid; /* for wiphy privid */
+extern const void *const mac80211_wiphy_privid; /* for wiphy privid */
u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
enum nl80211_iftype type);
int ieee80211_frame_duration(enum ieee80211_band band, size_t len,
@@ -1762,6 +1786,16 @@ ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
const struct cfg80211_chan_def *chandef,
enum ieee80211_chanctx_mode mode);
int __must_check
+ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata,
+ const struct cfg80211_chan_def *chandef,
+ enum ieee80211_chanctx_mode mode,
+ bool radar_required);
+int __must_check
+ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata,
+ u32 *changed);
+int ieee80211_vif_unreserve_chanctx(struct ieee80211_sub_if_data *sdata);
+
+int __must_check
ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata,
const struct cfg80211_chan_def *chandef,
u32 *changed);
@@ -1773,6 +1807,8 @@ void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata);
void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata);
void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata,
bool clear);
+int ieee80211_chanctx_refcount(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx);
void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
struct ieee80211_chanctx *chanctx);
@@ -1796,6 +1832,20 @@ int ieee80211_cs_headroom(struct ieee80211_local *local,
enum nl80211_iftype iftype);
void ieee80211_recalc_dtim(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata);
+int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
+ const struct cfg80211_chan_def *chandef,
+ enum ieee80211_chanctx_mode chanmode,
+ u8 radar_detect);
+int ieee80211_max_num_channels(struct ieee80211_local *local);
+
+/* TDLS */
+int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *peer, u8 action_code, u8 dialog_token,
+ u16 status_code, u32 peer_capability,
+ const u8 *extra_ies, size_t extra_ies_len);
+int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *peer, enum nl80211_tdls_operation oper);
+
#ifdef CONFIG_MAC80211_NOINLINE
#define debug_noinline noinline
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index ce1c4437061..388b863e821 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -101,9 +101,8 @@ static u32 __ieee80211_idle_on(struct ieee80211_local *local)
static u32 __ieee80211_recalc_idle(struct ieee80211_local *local,
bool force_active)
{
- bool working = false, scanning, active;
+ bool working, scanning, active;
unsigned int led_trig_start = 0, led_trig_stop = 0;
- struct ieee80211_roc_work *roc;
lockdep_assert_held(&local->mtx);
@@ -111,12 +110,8 @@ static u32 __ieee80211_recalc_idle(struct ieee80211_local *local,
!list_empty(&local->chanctx_list) ||
local->monitors;
- if (!local->ops->remain_on_channel) {
- list_for_each_entry(roc, &local->roc_list, list) {
- working = true;
- break;
- }
- }
+ working = !local->ops->remain_on_channel &&
+ !list_empty(&local->roc_list);
scanning = test_bit(SCAN_SW_SCANNING, &local->scanning) ||
test_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning);
@@ -255,6 +250,7 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_sub_if_data *nsdata;
+ int ret;
ASSERT_RTNL();
@@ -305,7 +301,10 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
}
}
- return 0;
+ mutex_lock(&local->chanctx_mtx);
+ ret = ieee80211_check_combinations(sdata, NULL, 0, 0);
+ mutex_unlock(&local->chanctx_mtx);
+ return ret;
}
static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata,
@@ -400,6 +399,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
sdata->vif.type = NL80211_IFTYPE_MONITOR;
snprintf(sdata->name, IFNAMSIZ, "%s-monitor",
wiphy_name(local->hw.wiphy));
+ sdata->wdev.iftype = NL80211_IFTYPE_MONITOR;
sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM;
@@ -428,7 +428,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
mutex_unlock(&local->mtx);
if (ret) {
mutex_lock(&local->iflist_mtx);
- rcu_assign_pointer(local->monitor_sdata, NULL);
+ RCU_INIT_POINTER(local->monitor_sdata, NULL);
mutex_unlock(&local->iflist_mtx);
synchronize_net();
drv_remove_interface(local, sdata);
@@ -457,7 +457,7 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
return;
}
- rcu_assign_pointer(local->monitor_sdata, NULL);
+ RCU_INIT_POINTER(local->monitor_sdata, NULL);
mutex_unlock(&local->iflist_mtx);
synchronize_net();
@@ -497,7 +497,9 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
if (!sdata->bss)
return -ENOLINK;
+ mutex_lock(&local->mtx);
list_add(&sdata->u.vlan.list, &sdata->bss->vlans);
+ mutex_unlock(&local->mtx);
master = container_of(sdata->bss,
struct ieee80211_sub_if_data, u.ap);
@@ -727,8 +729,11 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
drv_stop(local);
err_del_bss:
sdata->bss = NULL;
- if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
+ mutex_lock(&local->mtx);
list_del(&sdata->u.vlan.list);
+ mutex_unlock(&local->mtx);
+ }
/* might already be clear but that doesn't matter */
clear_bit(SDATA_STATE_RUNNING, &sdata->state);
return res;
@@ -833,7 +838,16 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
cancel_work_sync(&local->dynamic_ps_enable_work);
cancel_work_sync(&sdata->recalc_smps);
+ sdata_lock(sdata);
+ mutex_lock(&local->mtx);
sdata->vif.csa_active = false;
+ if (!ieee80211_csa_needs_block_tx(local))
+ ieee80211_wake_queues_by_reason(&local->hw,
+ IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
+ mutex_unlock(&local->mtx);
+ sdata_unlock(sdata);
+
cancel_work_sync(&sdata->csa_finalize_work);
cancel_delayed_work_sync(&sdata->dfs_cac_timer_work);
@@ -878,8 +892,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP_VLAN:
+ mutex_lock(&local->mtx);
list_del(&sdata->u.vlan.list);
- rcu_assign_pointer(sdata->vif.chanctx_conf, NULL);
+ mutex_unlock(&local->mtx);
+ RCU_INIT_POINTER(sdata->vif.chanctx_conf, NULL);
/* no need to tell driver */
break;
case NL80211_IFTYPE_MONITOR:
@@ -898,7 +914,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
break;
case NL80211_IFTYPE_P2P_DEVICE:
/* relies on synchronize_rcu() below */
- rcu_assign_pointer(local->p2p_sdata, NULL);
+ RCU_INIT_POINTER(local->p2p_sdata, NULL);
/* fall through */
default:
cancel_work_sync(&sdata->work);
@@ -1270,6 +1286,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
sdata->control_port_protocol = cpu_to_be16(ETH_P_PAE);
sdata->control_port_no_encrypt = false;
sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM;
+ sdata->vif.bss_conf.idle = true;
sdata->noack_map = 0;
@@ -1283,6 +1300,8 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
INIT_WORK(&sdata->work, ieee80211_iface_work);
INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work);
INIT_WORK(&sdata->csa_finalize_work, ieee80211_csa_finalize_work);
+ INIT_LIST_HEAD(&sdata->assigned_chanctx_list);
+ INIT_LIST_HEAD(&sdata->reserved_chanctx_list);
switch (type) {
case NL80211_IFTYPE_P2P_GO:
@@ -1761,7 +1780,6 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
}
mutex_unlock(&local->iflist_mtx);
unregister_netdevice_many(&unreg_list);
- list_del(&unreg_list);
list_for_each_entry_safe(sdata, tmp, &wdev_list, list) {
list_del(&sdata->list);
@@ -1777,20 +1795,19 @@ static int netdev_notify(struct notifier_block *nb,
struct ieee80211_sub_if_data *sdata;
if (state != NETDEV_CHANGENAME)
- return 0;
+ return NOTIFY_DONE;
if (!dev->ieee80211_ptr || !dev->ieee80211_ptr->wiphy)
- return 0;
+ return NOTIFY_DONE;
if (dev->ieee80211_ptr->wiphy->privid != mac80211_wiphy_privid)
- return 0;
+ return NOTIFY_DONE;
sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
memcpy(sdata->name, dev->name, IFNAMSIZ);
-
ieee80211_debugfs_rename_netdev(sdata);
- return 0;
+
+ return NOTIFY_OK;
}
static struct notifier_block mac80211_netdev_notifier = {
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 6ff65a1ebaa..16d97f044a2 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -325,7 +325,8 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
struct ieee80211_key *key;
int i, j, err;
- BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS);
+ if (WARN_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS))
+ return ERR_PTR(-EINVAL);
key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL);
if (!key)
@@ -481,8 +482,8 @@ int ieee80211_key_link(struct ieee80211_key *key,
int idx, ret;
bool pairwise;
- BUG_ON(!sdata);
- BUG_ON(!key);
+ if (WARN_ON(!sdata || !key))
+ return -EINVAL;
pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
idx = key->conf.keyidx;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index d767cfb9b45..d17c26d6e36 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -148,6 +148,8 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
if (!rcu_access_pointer(sdata->vif.chanctx_conf))
continue;
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ continue;
power = min(power, sdata->vif.bss_conf.txpower);
}
rcu_read_unlock();
@@ -199,7 +201,7 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
- if (!changed)
+ if (!changed || sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
return;
drv_bss_info_changed(local, sdata, &sdata->vif.bss_conf, changed);
@@ -338,7 +340,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
sdata_unlock(sdata);
- return NOTIFY_DONE;
+ return NOTIFY_OK;
}
#endif
@@ -369,7 +371,7 @@ static int ieee80211_ifa6_changed(struct notifier_block *nb,
drv_ipv6_addr_change(local, sdata, idev);
- return NOTIFY_DONE;
+ return NOTIFY_OK;
}
#endif
@@ -444,7 +446,9 @@ static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = {
.cap_info = cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
IEEE80211_HT_CAP_MAX_AMSDU |
IEEE80211_HT_CAP_SGI_20 |
- IEEE80211_HT_CAP_SGI_40),
+ IEEE80211_HT_CAP_SGI_40 |
+ IEEE80211_HT_CAP_LDPC_CODING |
+ IEEE80211_HT_CAP_40MHZ_INTOLERANT),
.mcs = {
.rx_mask = { 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, },
@@ -893,10 +897,15 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
/* mac80211 supports control port protocol changing */
local->hw.wiphy->flags |= WIPHY_FLAG_CONTROL_PORT_PROTOCOL;
- if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
+ if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) {
local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM;
- else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
+ } else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) {
local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC;
+ if (hw->max_signal <= 0) {
+ result = -EINVAL;
+ goto fail_wiphy_register;
+ }
+ }
WARN((local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)
&& (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK),
@@ -947,6 +956,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
if (local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)
local->hw.wiphy->flags |= WIPHY_FLAG_TDLS_EXTERNAL_SETUP;
+ local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CSA_COUNTERS_NUM;
+
result = wiphy_register(local->hw.wiphy);
if (result < 0)
goto fail_wiphy_register;
@@ -1071,6 +1082,18 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
}
EXPORT_SYMBOL(ieee80211_register_hw);
+void ieee80211_napi_add(struct ieee80211_hw *hw, struct napi_struct *napi,
+ struct net_device *napi_dev,
+ int (*poll)(struct napi_struct *, int),
+ int weight)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ netif_napi_add(napi_dev, napi, poll, weight);
+ local->napi = napi;
+}
+EXPORT_SYMBOL_GPL(ieee80211_napi_add);
+
void ieee80211_unregister_hw(struct ieee80211_hw *hw)
{
struct ieee80211_local *local = hw_to_local(hw);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 5b919cab1de..6495a3f0428 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -366,20 +366,15 @@ int mesh_add_rsn_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
return 0;
/* find RSN IE */
- data = ifmsh->ie;
- while (data < ifmsh->ie + ifmsh->ie_len) {
- if (*data == WLAN_EID_RSN) {
- len = data[1] + 2;
- break;
- }
- data++;
- }
+ data = cfg80211_find_ie(WLAN_EID_RSN, ifmsh->ie, ifmsh->ie_len);
+ if (!data)
+ return 0;
- if (len) {
- if (skb_tailroom(skb) < len)
- return -ENOMEM;
- memcpy(skb_put(skb, len), data, len);
- }
+ len = data[1] + 2;
+
+ if (skb_tailroom(skb) < len)
+ return -ENOMEM;
+ memcpy(skb_put(skb, len), data, len);
return 0;
}
@@ -684,11 +679,11 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
*pos++ = 0x0;
*pos++ = ieee80211_frequency_to_channel(
csa->settings.chandef.chan->center_freq);
- sdata->csa_counter_offset_beacon = hdr_len + 6;
+ sdata->csa_counter_offset_beacon[0] = hdr_len + 6;
*pos++ = csa->settings.count;
*pos++ = WLAN_EID_CHAN_SWITCH_PARAM;
*pos++ = 6;
- if (ifmsh->chsw_init) {
+ if (ifmsh->csa_role == IEEE80211_MESH_CSA_ROLE_INIT) {
*pos++ = ifmsh->mshcfg.dot11MeshTTL;
*pos |= WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR;
} else {
@@ -829,7 +824,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
bcn = rcu_dereference_protected(ifmsh->beacon,
lockdep_is_held(&sdata->wdev.mtx));
- rcu_assign_pointer(ifmsh->beacon, NULL);
+ RCU_INIT_POINTER(ifmsh->beacon, NULL);
kfree_rcu(bcn, rcu_head);
/* flush STAs and mpaths on this iface */
@@ -859,18 +854,12 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
{
struct cfg80211_csa_settings params;
struct ieee80211_csa_ie csa_ie;
- struct ieee80211_chanctx_conf *chanctx_conf;
- struct ieee80211_chanctx *chanctx;
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
- int err, num_chanctx;
+ int err;
u32 sta_flags;
- if (sdata->vif.csa_active)
- return true;
-
- if (!ifmsh->mesh_id)
- return false;
+ sdata_assert_lock(sdata);
sta_flags = IEEE80211_STA_DISABLE_VHT;
switch (sdata->vif.bss_conf.chandef.width) {
@@ -896,10 +885,6 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
params.chandef = csa_ie.chandef;
params.count = csa_ie.count;
- if (sdata->vif.bss_conf.chandef.chan->band !=
- params.chandef.chan->band)
- return false;
-
if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, &params.chandef,
IEEE80211_CHAN_DISABLED)) {
sdata_info(sdata,
@@ -913,33 +898,22 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
}
err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy,
- &params.chandef);
+ &params.chandef,
+ NL80211_IFTYPE_MESH_POINT);
if (err < 0)
return false;
- if (err) {
- params.radar_required = true;
+ if (err > 0)
/* TODO: DFS not (yet) supported */
return false;
- }
- rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
- if (!chanctx_conf)
- goto failed_chswitch;
+ params.radar_required = err;
- /* don't handle for multi-VIF cases */
- chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf);
- if (chanctx->refcount > 1)
- goto failed_chswitch;
-
- num_chanctx = 0;
- list_for_each_entry_rcu(chanctx, &sdata->local->chanctx_list, list)
- num_chanctx++;
-
- if (num_chanctx > 1)
- goto failed_chswitch;
-
- rcu_read_unlock();
+ if (cfg80211_chandef_identical(&params.chandef,
+ &sdata->vif.bss_conf.chandef)) {
+ mcsa_dbg(sdata,
+ "received csa with an identical chandef, ignoring\n");
+ return true;
+ }
mcsa_dbg(sdata,
"received channel switch announcement to go to channel %d MHz\n",
@@ -953,30 +927,16 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
ifmsh->pre_value = csa_ie.pre_value;
}
- if (ifmsh->chsw_ttl < ifmsh->mshcfg.dot11MeshTTL) {
- if (ieee80211_mesh_csa_beacon(sdata, &params, false) < 0)
- return false;
- } else {
+ if (ifmsh->chsw_ttl >= ifmsh->mshcfg.dot11MeshTTL)
return false;
- }
-
- sdata->csa_radar_required = params.radar_required;
- if (params.block_tx)
- ieee80211_stop_queues_by_reason(&sdata->local->hw,
- IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_CSA);
+ ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_REPEATER;
- sdata->csa_chandef = params.chandef;
- sdata->vif.csa_active = true;
-
- ieee80211_bss_info_change_notify(sdata, err);
- drv_channel_switch_beacon(sdata, &params.chandef);
+ if (ieee80211_channel_switch(sdata->local->hw.wiphy, sdata->dev,
+ &params) < 0)
+ return false;
return true;
-failed_chswitch:
- rcu_read_unlock();
- return false;
}
static void
@@ -1086,7 +1046,8 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
ifmsh->sync_ops->rx_bcn_presp(sdata,
stype, mgmt, &elems, rx_status);
- if (!ifmsh->chsw_init)
+ if (ifmsh->csa_role != IEEE80211_MESH_CSA_ROLE_INIT &&
+ !sdata->vif.csa_active)
ieee80211_mesh_process_chnswitch(sdata, &elems, true);
}
@@ -1095,29 +1056,30 @@ int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata)
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
struct mesh_csa_settings *tmp_csa_settings;
int ret = 0;
+ int changed = 0;
/* Reset the TTL value and Initiator flag */
- ifmsh->chsw_init = false;
+ ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_NONE;
ifmsh->chsw_ttl = 0;
/* Remove the CSA and MCSP elements from the beacon */
tmp_csa_settings = rcu_dereference(ifmsh->csa);
- rcu_assign_pointer(ifmsh->csa, NULL);
- kfree_rcu(tmp_csa_settings, rcu_head);
+ RCU_INIT_POINTER(ifmsh->csa, NULL);
+ if (tmp_csa_settings)
+ kfree_rcu(tmp_csa_settings, rcu_head);
ret = ieee80211_mesh_rebuild_beacon(sdata);
if (ret)
return -EINVAL;
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
+ changed |= BSS_CHANGED_BEACON;
mcsa_dbg(sdata, "complete switching to center freq %d MHz",
sdata->vif.bss_conf.chandef.chan->center_freq);
- return 0;
+ return changed;
}
int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata,
- struct cfg80211_csa_settings *csa_settings,
- bool csa_action)
+ struct cfg80211_csa_settings *csa_settings)
{
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
struct mesh_csa_settings *tmp_csa_settings;
@@ -1136,17 +1098,12 @@ int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata,
ret = ieee80211_mesh_rebuild_beacon(sdata);
if (ret) {
tmp_csa_settings = rcu_dereference(ifmsh->csa);
- rcu_assign_pointer(ifmsh->csa, NULL);
+ RCU_INIT_POINTER(ifmsh->csa, NULL);
kfree_rcu(tmp_csa_settings, rcu_head);
return ret;
}
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
-
- if (csa_action)
- ieee80211_send_action_csa(sdata, csa_settings);
-
- return 0;
+ return BSS_CHANGED_BEACON;
}
static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata,
@@ -1210,7 +1167,8 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata,
ifmsh->pre_value = pre_value;
- if (!ieee80211_mesh_process_chnswitch(sdata, &elems, false)) {
+ if (!sdata->vif.csa_active &&
+ !ieee80211_mesh_process_chnswitch(sdata, &elems, false)) {
mcsa_dbg(sdata, "Failed to process CSA action frame");
return;
}
@@ -1257,7 +1215,7 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
sdata_lock(sdata);
/* mesh already went down */
- if (!sdata->wdev.mesh_id_len)
+ if (!sdata->u.mesh.mesh_id_len)
goto out;
rx_status = IEEE80211_SKB_RXCB(skb);
@@ -1310,7 +1268,7 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata)
sdata_lock(sdata);
/* mesh already went down */
- if (!sdata->wdev.mesh_id_len)
+ if (!sdata->u.mesh.mesh_id_len)
goto out;
if (ifmsh->preq_queue_len &&
@@ -1365,7 +1323,7 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
mesh_rmc_init(sdata);
ifmsh->last_preq = jiffies;
ifmsh->next_perr = jiffies;
- ifmsh->chsw_init = false;
+ ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_NONE;
/* Allocate all mesh structures when creating the first mesh interface. */
if (!mesh_allocated)
ieee80211s_init();
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index f9514685d45..94758b9c9ed 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -37,7 +37,7 @@ static inline u32 u32_field_get(const u8 *preq_elem, int offset, bool ae)
return get_unaligned_le32(preq_elem + offset);
}
-static inline u32 u16_field_get(const u8 *preq_elem, int offset, bool ae)
+static inline u16 u16_field_get(const u8 *preq_elem, int offset, bool ae)
{
if (ae)
offset += 6;
@@ -544,9 +544,10 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
if (time_after(jiffies, ifmsh->last_sn_update +
net_traversal_jiffies(sdata)) ||
time_before(jiffies, ifmsh->last_sn_update)) {
- target_sn = ++ifmsh->sn;
+ ++ifmsh->sn;
ifmsh->last_sn_update = jiffies;
}
+ target_sn = ifmsh->sn;
} else if (is_broadcast_ether_addr(target_addr) &&
(target_flags & IEEE80211_PREQ_TO_FLAG)) {
rcu_read_lock();
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 7d050ed6fe5..cf032a8db9d 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -287,8 +287,10 @@ static void mesh_path_move_to_queue(struct mesh_path *gate_mpath,
struct sk_buff_head failq;
unsigned long flags;
- BUG_ON(gate_mpath == from_mpath);
- BUG_ON(!gate_mpath->next_hop);
+ if (WARN_ON(gate_mpath == from_mpath))
+ return;
+ if (WARN_ON(!gate_mpath->next_hop))
+ return;
__skb_queue_head_init(&failq);
diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c
index 2bc5dc25d5a..09625d6205c 100644
--- a/net/mac80211/mesh_sync.c
+++ b/net/mac80211/mesh_sync.c
@@ -171,7 +171,7 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata,
u8 cap;
WARN_ON(ifmsh->mesh_sp_id != IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET);
- BUG_ON(!rcu_read_lock_held());
+ WARN_ON(!rcu_read_lock_held());
cap = beacon->meshconf->meshconf_cap;
spin_lock_bh(&ifmsh->sync_offset_lock);
diff --git a/net/mac80211/michael.h b/net/mac80211/michael.h
index 3b848dad958..0e4886f881f 100644
--- a/net/mac80211/michael.h
+++ b/net/mac80211/michael.h
@@ -11,6 +11,7 @@
#define MICHAEL_H
#include <linux/types.h>
+#include <linux/ieee80211.h>
#define MICHAEL_MIC_LEN 8
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 245dce969b3..3345401be1b 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -131,13 +131,13 @@ void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata)
if (unlikely(!sdata->u.mgd.associated))
return;
+ ifmgd->probe_send_count = 0;
+
if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
return;
mod_timer(&sdata->u.mgd.conn_mon_timer,
round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME));
-
- ifmgd->probe_send_count = 0;
}
static int ecw2cw(int ecw)
@@ -531,6 +531,7 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
u8 *pos;
u32 cap;
struct ieee80211_sta_vht_cap vht_cap;
+ u32 mask, ap_bf_sts, our_bf_sts;
BUILD_BUG_ON(sizeof(vht_cap) != sizeof(sband->vht_cap));
@@ -558,6 +559,16 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE)))
cap &= ~IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE;
+ mask = IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK;
+
+ ap_bf_sts = le32_to_cpu(ap_vht_cap->vht_cap_info) & mask;
+ our_bf_sts = cap & mask;
+
+ if (ap_bf_sts < our_bf_sts) {
+ cap &= ~mask;
+ cap |= ap_bf_sts;
+ }
+
/* reserve and fill IE */
pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2);
ieee80211_ie_build_vht_cap(pos, &vht_cap, cap);
@@ -768,6 +779,34 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
ieee80211_add_ht_ie(sdata, skb, assoc_data->ap_ht_param,
sband, chan, sdata->smps_mode);
+ /* if present, add any custom IEs that go before VHT */
+ if (assoc_data->ie_len) {
+ static const u8 before_vht[] = {
+ WLAN_EID_SSID,
+ WLAN_EID_SUPP_RATES,
+ WLAN_EID_EXT_SUPP_RATES,
+ WLAN_EID_PWR_CAPABILITY,
+ WLAN_EID_SUPPORTED_CHANNELS,
+ WLAN_EID_RSN,
+ WLAN_EID_QOS_CAPA,
+ WLAN_EID_RRM_ENABLED_CAPABILITIES,
+ WLAN_EID_MOBILITY_DOMAIN,
+ WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
+ WLAN_EID_HT_CAPABILITY,
+ WLAN_EID_BSS_COEX_2040,
+ WLAN_EID_EXT_CAPABILITY,
+ WLAN_EID_QOS_TRAFFIC_CAPA,
+ WLAN_EID_TIM_BCAST_REQ,
+ WLAN_EID_INTERWORKING,
+ };
+ noffset = ieee80211_ie_split(assoc_data->ie, assoc_data->ie_len,
+ before_vht, ARRAY_SIZE(before_vht),
+ offset);
+ pos = skb_put(skb, noffset - offset);
+ memcpy(pos, assoc_data->ie + offset, noffset - offset);
+ offset = noffset;
+ }
+
if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
ieee80211_add_vht_ie(sdata, skb, sband,
&assoc_data->ap_vht_cap);
@@ -936,16 +975,23 @@ static void ieee80211_chswitch_work(struct work_struct *work)
/* XXX: shouldn't really modify cfg80211-owned data! */
ifmgd->associated->channel = sdata->csa_chandef.chan;
+ ieee80211_bss_info_change_notify(sdata, changed);
+
+ mutex_lock(&local->mtx);
+ sdata->vif.csa_active = false;
/* XXX: wait for a beacon first? */
- ieee80211_wake_queues_by_reason(&local->hw,
+ if (!ieee80211_csa_needs_block_tx(local))
+ ieee80211_wake_queues_by_reason(&local->hw,
IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_CSA);
+ mutex_unlock(&local->mtx);
- ieee80211_bss_info_change_notify(sdata, changed);
-
- out:
- sdata->vif.csa_active = false;
ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
+
+ ieee80211_sta_reset_beacon_monitor(sdata);
+ ieee80211_sta_reset_conn_monitor(sdata);
+
+out:
sdata_unlock(sdata);
}
@@ -1024,7 +1070,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
}
ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED;
- sdata->vif.csa_active = true;
mutex_lock(&local->chanctx_mtx);
if (local->use_chanctx) {
@@ -1051,7 +1096,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
}
chanctx = container_of(rcu_access_pointer(sdata->vif.chanctx_conf),
struct ieee80211_chanctx, conf);
- if (chanctx->refcount > 1) {
+ if (ieee80211_chanctx_refcount(local, chanctx) > 1) {
sdata_info(sdata,
"channel switch with multiple interfaces on the same channel, disconnecting\n");
ieee80211_queue_work(&local->hw,
@@ -1063,10 +1108,15 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
sdata->csa_chandef = csa_ie.chandef;
- if (csa_ie.mode)
+ mutex_lock(&local->mtx);
+ sdata->vif.csa_active = true;
+ sdata->csa_block_tx = csa_ie.mode;
+
+ if (sdata->csa_block_tx)
ieee80211_stop_queues_by_reason(&local->hw,
- IEEE80211_MAX_QUEUE_MAP,
- IEEE80211_QUEUE_STOP_REASON_CSA);
+ IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
+ mutex_unlock(&local->mtx);
if (local->ops->channel_switch) {
/* use driver's channel switch callback */
@@ -1778,6 +1828,12 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
ifmgd->flags = 0;
mutex_lock(&local->mtx);
ieee80211_vif_release_channel(sdata);
+
+ sdata->vif.csa_active = false;
+ if (!ieee80211_csa_needs_block_tx(local))
+ ieee80211_wake_queues_by_reason(&local->hw,
+ IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
mutex_unlock(&local->mtx);
sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM;
@@ -2006,6 +2062,7 @@ EXPORT_SYMBOL(ieee80211_ap_probereq_get);
static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
{
+ struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
@@ -2019,10 +2076,14 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
true, frame_buf);
ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
+
+ mutex_lock(&local->mtx);
sdata->vif.csa_active = false;
- ieee80211_wake_queues_by_reason(&sdata->local->hw,
+ if (!ieee80211_csa_needs_block_tx(local))
+ ieee80211_wake_queues_by_reason(&local->hw,
IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_CSA);
+ mutex_unlock(&local->mtx);
cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
IEEE80211_DEAUTH_FRAME_LEN);
@@ -2233,6 +2294,62 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
/* ignore frame -- wait for timeout */
}
+#define case_WLAN(type) \
+ case WLAN_REASON_##type: return #type
+
+static const char *ieee80211_get_reason_code_string(u16 reason_code)
+{
+ switch (reason_code) {
+ case_WLAN(UNSPECIFIED);
+ case_WLAN(PREV_AUTH_NOT_VALID);
+ case_WLAN(DEAUTH_LEAVING);
+ case_WLAN(DISASSOC_DUE_TO_INACTIVITY);
+ case_WLAN(DISASSOC_AP_BUSY);
+ case_WLAN(CLASS2_FRAME_FROM_NONAUTH_STA);
+ case_WLAN(CLASS3_FRAME_FROM_NONASSOC_STA);
+ case_WLAN(DISASSOC_STA_HAS_LEFT);
+ case_WLAN(STA_REQ_ASSOC_WITHOUT_AUTH);
+ case_WLAN(DISASSOC_BAD_POWER);
+ case_WLAN(DISASSOC_BAD_SUPP_CHAN);
+ case_WLAN(INVALID_IE);
+ case_WLAN(MIC_FAILURE);
+ case_WLAN(4WAY_HANDSHAKE_TIMEOUT);
+ case_WLAN(GROUP_KEY_HANDSHAKE_TIMEOUT);
+ case_WLAN(IE_DIFFERENT);
+ case_WLAN(INVALID_GROUP_CIPHER);
+ case_WLAN(INVALID_PAIRWISE_CIPHER);
+ case_WLAN(INVALID_AKMP);
+ case_WLAN(UNSUPP_RSN_VERSION);
+ case_WLAN(INVALID_RSN_IE_CAP);
+ case_WLAN(IEEE8021X_FAILED);
+ case_WLAN(CIPHER_SUITE_REJECTED);
+ case_WLAN(DISASSOC_UNSPECIFIED_QOS);
+ case_WLAN(DISASSOC_QAP_NO_BANDWIDTH);
+ case_WLAN(DISASSOC_LOW_ACK);
+ case_WLAN(DISASSOC_QAP_EXCEED_TXOP);
+ case_WLAN(QSTA_LEAVE_QBSS);
+ case_WLAN(QSTA_NOT_USE);
+ case_WLAN(QSTA_REQUIRE_SETUP);
+ case_WLAN(QSTA_TIMEOUT);
+ case_WLAN(QSTA_CIPHER_NOT_SUPP);
+ case_WLAN(MESH_PEER_CANCELED);
+ case_WLAN(MESH_MAX_PEERS);
+ case_WLAN(MESH_CONFIG);
+ case_WLAN(MESH_CLOSE);
+ case_WLAN(MESH_MAX_RETRIES);
+ case_WLAN(MESH_CONFIRM_TIMEOUT);
+ case_WLAN(MESH_INVALID_GTK);
+ case_WLAN(MESH_INCONSISTENT_PARAM);
+ case_WLAN(MESH_INVALID_SECURITY);
+ case_WLAN(MESH_PATH_ERROR);
+ case_WLAN(MESH_PATH_NOFORWARD);
+ case_WLAN(MESH_PATH_DEST_UNREACHABLE);
+ case_WLAN(MAC_EXISTS_IN_MBSS);
+ case_WLAN(MESH_CHAN_REGULATORY);
+ case_WLAN(MESH_CHAN);
+ default: return "<unknown>";
+ }
+}
static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt, size_t len)
@@ -2254,8 +2371,8 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
reason_code = le16_to_cpu(mgmt->u.deauth.reason_code);
- sdata_info(sdata, "deauthenticated from %pM (Reason: %u)\n",
- bssid, reason_code);
+ sdata_info(sdata, "deauthenticated from %pM (Reason: %u=%s)\n",
+ bssid, reason_code, ieee80211_get_reason_code_string(reason_code));
ieee80211_set_disassoc(sdata, 0, 0, false, NULL);
@@ -2688,28 +2805,20 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems)
{
struct ieee80211_local *local = sdata->local;
- int freq;
struct ieee80211_bss *bss;
struct ieee80211_channel *channel;
sdata_assert_lock(sdata);
- if (elems->ds_params)
- freq = ieee80211_channel_to_frequency(elems->ds_params[0],
- rx_status->band);
- else
- freq = rx_status->freq;
-
- channel = ieee80211_get_channel(local->hw.wiphy, freq);
-
- if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
+ channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq);
+ if (!channel)
return;
bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems,
channel);
if (bss) {
- ieee80211_rx_bss_put(local, bss);
sdata->vif.bss_conf.beacon_rate = bss->beacon_rate;
+ ieee80211_rx_bss_put(local, bss);
}
}
@@ -3459,6 +3568,9 @@ static void ieee80211_sta_bcn_mon_timer(unsigned long data)
if (local->quiescing)
return;
+ if (sdata->vif.csa_active)
+ return;
+
sdata->u.mgd.connection_loss = false;
ieee80211_queue_work(&sdata->local->hw,
&sdata->u.mgd.beacon_connection_loss_work);
@@ -3474,6 +3586,9 @@ static void ieee80211_sta_conn_mon_timer(unsigned long data)
if (local->quiescing)
return;
+ if (sdata->vif.csa_active)
+ return;
+
ieee80211_queue_work(&local->hw, &ifmgd->monitor_work);
}
@@ -3504,6 +3619,38 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
}
#ifdef CONFIG_PM
+void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
+
+ sdata_lock(sdata);
+
+ if (ifmgd->auth_data || ifmgd->assoc_data) {
+ const u8 *bssid = ifmgd->auth_data ?
+ ifmgd->auth_data->bss->bssid :
+ ifmgd->assoc_data->bss->bssid;
+
+ /*
+ * If we are trying to authenticate / associate while suspending,
+ * cfg80211 won't know and won't actually abort those attempts,
+ * thus we need to do that ourselves.
+ */
+ ieee80211_send_deauth_disassoc(sdata, bssid,
+ IEEE80211_STYPE_DEAUTH,
+ WLAN_REASON_DEAUTH_LEAVING,
+ false, frame_buf);
+ if (ifmgd->assoc_data)
+ ieee80211_destroy_assoc_data(sdata, false);
+ if (ifmgd->auth_data)
+ ieee80211_destroy_auth_data(sdata, false);
+ cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
+ IEEE80211_DEAUTH_FRAME_LEN);
+ }
+
+ sdata_unlock(sdata);
+}
+
void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -3588,7 +3735,7 @@ int ieee80211_max_network_latency(struct notifier_block *nb,
ieee80211_recalc_ps(local, latency_usec);
mutex_unlock(&local->iflist_mtx);
- return 0;
+ return NOTIFY_OK;
}
static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata,
@@ -4322,37 +4469,41 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
bool tx = !req->local_state_change;
- bool report_frame = false;
- sdata_info(sdata,
- "deauthenticating from %pM by local choice (reason=%d)\n",
- req->bssid, req->reason_code);
+ if (ifmgd->auth_data &&
+ ether_addr_equal(ifmgd->auth_data->bss->bssid, req->bssid)) {
+ sdata_info(sdata,
+ "aborting authentication with %pM by local choice (Reason: %u=%s)\n",
+ req->bssid, req->reason_code,
+ ieee80211_get_reason_code_string(req->reason_code));
- if (ifmgd->auth_data) {
drv_mgd_prepare_tx(sdata->local, sdata);
ieee80211_send_deauth_disassoc(sdata, req->bssid,
IEEE80211_STYPE_DEAUTH,
req->reason_code, tx,
frame_buf);
ieee80211_destroy_auth_data(sdata, false);
+ cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
+ IEEE80211_DEAUTH_FRAME_LEN);
- report_frame = true;
- goto out;
+ return 0;
}
if (ifmgd->associated &&
ether_addr_equal(ifmgd->associated->bssid, req->bssid)) {
+ sdata_info(sdata,
+ "deauthenticating from %pM by local choice (Reason: %u=%s)\n",
+ req->bssid, req->reason_code,
+ ieee80211_get_reason_code_string(req->reason_code));
+
ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
req->reason_code, tx, frame_buf);
- report_frame = true;
- }
-
- out:
- if (report_frame)
cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
IEEE80211_DEAUTH_FRAME_LEN);
+ return 0;
+ }
- return 0;
+ return -ENOTCONN;
}
int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
@@ -4372,8 +4523,8 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
return -ENOLINK;
sdata_info(sdata,
- "disassociating from %pM by local choice (reason=%d)\n",
- req->bss->bssid, req->reason_code);
+ "disassociating from %pM by local choice (Reason: %u=%s)\n",
+ req->bss->bssid, req->reason_code, ieee80211_get_reason_code_string(req->reason_code));
memcpy(bssid, req->bss->bssid, ETH_ALEN);
ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DISASSOC,
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 0c2a29484c0..7a17decd27f 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -333,7 +333,7 @@ void ieee80211_sw_roc_work(struct work_struct *work)
container_of(work, struct ieee80211_roc_work, work.work);
struct ieee80211_sub_if_data *sdata = roc->sdata;
struct ieee80211_local *local = sdata->local;
- bool started;
+ bool started, on_channel;
mutex_lock(&local->mtx);
@@ -354,13 +354,26 @@ void ieee80211_sw_roc_work(struct work_struct *work)
if (!roc->started) {
struct ieee80211_roc_work *dep;
- /* start this ROC */
+ WARN_ON(local->use_chanctx);
+
+ /* If actually operating on the desired channel (with at least
+ * 20 MHz channel width) don't stop all the operations but still
+ * treat it as though the ROC operation started properly, so
+ * other ROC operations won't interfere with this one.
+ */
+ roc->on_channel = roc->chan == local->_oper_chandef.chan &&
+ local->_oper_chandef.width != NL80211_CHAN_WIDTH_5 &&
+ local->_oper_chandef.width != NL80211_CHAN_WIDTH_10;
- /* switch channel etc */
+ /* start this ROC */
ieee80211_recalc_idle(local);
- local->tmp_channel = roc->chan;
- ieee80211_hw_config(local, 0);
+ if (!roc->on_channel) {
+ ieee80211_offchannel_stop_vifs(local);
+
+ local->tmp_channel = roc->chan;
+ ieee80211_hw_config(local, 0);
+ }
/* tell userspace or send frame */
ieee80211_handle_roc_started(roc);
@@ -379,9 +392,10 @@ void ieee80211_sw_roc_work(struct work_struct *work)
finish:
list_del(&roc->list);
started = roc->started;
+ on_channel = roc->on_channel;
ieee80211_roc_notify_destroy(roc, !roc->abort);
- if (started) {
+ if (started && !on_channel) {
ieee80211_flush_queues(local, NULL);
local->tmp_channel = NULL;
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index af64fb8e8ad..d478b880a0a 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -100,10 +100,18 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
/* remove all interfaces that were created in the driver */
list_for_each_entry(sdata, &local->interfaces, list) {
- if (!ieee80211_sdata_running(sdata) ||
- sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
- sdata->vif.type == NL80211_IFTYPE_MONITOR)
+ if (!ieee80211_sdata_running(sdata))
continue;
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_AP_VLAN:
+ case NL80211_IFTYPE_MONITOR:
+ continue;
+ case NL80211_IFTYPE_STATION:
+ ieee80211_mgd_quiesce(sdata);
+ break;
+ default:
+ break;
+ }
drv_remove_interface(local, sdata);
}
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 22b223f13c9..8fdadfd94ba 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -10,15 +10,15 @@
#include <linux/kernel.h>
#include <linux/rtnetlink.h>
-#include <linux/slab.h>
#include <linux/module.h>
+#include <linux/slab.h>
#include "rate.h"
#include "ieee80211_i.h"
#include "debugfs.h"
struct rate_control_alg {
struct list_head list;
- struct rate_control_ops *ops;
+ const struct rate_control_ops *ops;
};
static LIST_HEAD(rate_ctrl_algs);
@@ -29,7 +29,7 @@ module_param(ieee80211_default_rc_algo, charp, 0644);
MODULE_PARM_DESC(ieee80211_default_rc_algo,
"Default rate control algorithm for mac80211 to use");
-int ieee80211_rate_control_register(struct rate_control_ops *ops)
+int ieee80211_rate_control_register(const struct rate_control_ops *ops)
{
struct rate_control_alg *alg;
@@ -60,7 +60,7 @@ int ieee80211_rate_control_register(struct rate_control_ops *ops)
}
EXPORT_SYMBOL(ieee80211_rate_control_register);
-void ieee80211_rate_control_unregister(struct rate_control_ops *ops)
+void ieee80211_rate_control_unregister(const struct rate_control_ops *ops)
{
struct rate_control_alg *alg;
@@ -76,32 +76,31 @@ void ieee80211_rate_control_unregister(struct rate_control_ops *ops)
}
EXPORT_SYMBOL(ieee80211_rate_control_unregister);
-static struct rate_control_ops *
+static const struct rate_control_ops *
ieee80211_try_rate_control_ops_get(const char *name)
{
struct rate_control_alg *alg;
- struct rate_control_ops *ops = NULL;
+ const struct rate_control_ops *ops = NULL;
if (!name)
return NULL;
mutex_lock(&rate_ctrl_mutex);
list_for_each_entry(alg, &rate_ctrl_algs, list) {
- if (!strcmp(alg->ops->name, name))
- if (try_module_get(alg->ops->module)) {
- ops = alg->ops;
- break;
- }
+ if (!strcmp(alg->ops->name, name)) {
+ ops = alg->ops;
+ break;
+ }
}
mutex_unlock(&rate_ctrl_mutex);
return ops;
}
/* Get the rate control algorithm. */
-static struct rate_control_ops *
+static const struct rate_control_ops *
ieee80211_rate_control_ops_get(const char *name)
{
- struct rate_control_ops *ops;
+ const struct rate_control_ops *ops;
const char *alg_name;
kparam_block_sysfs_write(ieee80211_default_rc_algo);
@@ -111,10 +110,6 @@ ieee80211_rate_control_ops_get(const char *name)
alg_name = name;
ops = ieee80211_try_rate_control_ops_get(alg_name);
- if (!ops) {
- request_module("rc80211_%s", alg_name);
- ops = ieee80211_try_rate_control_ops_get(alg_name);
- }
if (!ops && name)
/* try default if specific alg requested but not found */
ops = ieee80211_try_rate_control_ops_get(ieee80211_default_rc_algo);
@@ -127,11 +122,6 @@ ieee80211_rate_control_ops_get(const char *name)
return ops;
}
-static void ieee80211_rate_control_ops_put(struct rate_control_ops *ops)
-{
- module_put(ops->module);
-}
-
#ifdef CONFIG_MAC80211_DEBUGFS
static ssize_t rcname_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
@@ -158,11 +148,11 @@ static struct rate_control_ref *rate_control_alloc(const char *name,
ref = kmalloc(sizeof(struct rate_control_ref), GFP_KERNEL);
if (!ref)
- goto fail_ref;
+ return NULL;
ref->local = local;
ref->ops = ieee80211_rate_control_ops_get(name);
if (!ref->ops)
- goto fail_ops;
+ goto free;
#ifdef CONFIG_MAC80211_DEBUGFS
debugfsdir = debugfs_create_dir("rc", local->hw.wiphy->debugfsdir);
@@ -172,14 +162,11 @@ static struct rate_control_ref *rate_control_alloc(const char *name,
ref->priv = ref->ops->alloc(&local->hw, debugfsdir);
if (!ref->priv)
- goto fail_priv;
+ goto free;
return ref;
-fail_priv:
- ieee80211_rate_control_ops_put(ref->ops);
-fail_ops:
+free:
kfree(ref);
-fail_ref:
return NULL;
}
@@ -192,7 +179,6 @@ static void rate_control_free(struct rate_control_ref *ctrl_ref)
ctrl_ref->local->debugfs.rcdir = NULL;
#endif
- ieee80211_rate_control_ops_put(ctrl_ref->ops);
kfree(ctrl_ref);
}
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index b95e16c0708..9aa2a1190a8 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -21,7 +21,7 @@
struct rate_control_ref {
struct ieee80211_local *local;
- struct rate_control_ops *ops;
+ const struct rate_control_ops *ops;
void *priv;
};
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index f3d88b0c054..1c1469c36dc 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -657,7 +657,18 @@ minstrel_free(void *priv)
kfree(priv);
}
-struct rate_control_ops mac80211_minstrel = {
+static u32 minstrel_get_expected_throughput(void *priv_sta)
+{
+ struct minstrel_sta_info *mi = priv_sta;
+ int idx = mi->max_tp_rate[0];
+
+ /* convert pkt per sec in kbps (1200 is the average pkt size used for
+ * computing cur_tp
+ */
+ return MINSTREL_TRUNC(mi->r[idx].cur_tp) * 1200 * 8 / 1024;
+}
+
+const struct rate_control_ops mac80211_minstrel = {
.name = "minstrel",
.tx_status = minstrel_tx_status,
.get_rate = minstrel_get_rate,
@@ -670,6 +681,7 @@ struct rate_control_ops mac80211_minstrel = {
.add_sta_debugfs = minstrel_add_sta_debugfs,
.remove_sta_debugfs = minstrel_remove_sta_debugfs,
#endif
+ .get_expected_throughput = minstrel_get_expected_throughput,
};
int __init
diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index f4301f4b2e4..046d1bd598a 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h
@@ -123,7 +123,7 @@ struct minstrel_debugfs_info {
char buf[];
};
-extern struct rate_control_ops mac80211_minstrel;
+extern const struct rate_control_ops mac80211_minstrel;
void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
void minstrel_remove_sta_debugfs(void *priv, void *priv_sta);
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index c1b5b73c5b9..85c1e74b771 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -22,7 +22,7 @@
#define MCS_NBITS (AVG_PKT_SIZE << 3)
/* Number of symbols for a packet with (bps) bits per symbol */
-#define MCS_NSYMS(bps) ((MCS_NBITS + (bps) - 1) / (bps))
+#define MCS_NSYMS(bps) DIV_ROUND_UP(MCS_NBITS, (bps))
/* Transmission time (nanoseconds) for a packet containing (syms) symbols */
#define MCS_SYMBOL_TIME(sgi, syms) \
@@ -124,7 +124,7 @@ const struct mcs_group minstrel_mcs_groups[] = {
#define MINSTREL_CCK_GROUP (ARRAY_SIZE(minstrel_mcs_groups) - 1)
-static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES];
+static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES] __read_mostly;
static void
minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi);
@@ -226,8 +226,9 @@ minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate)
nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len);
nsecs += minstrel_mcs_groups[group].duration[rate];
- tp = 1000000 * ((prob * 1000) / nsecs);
+ /* prob is scaled - see MINSTREL_FRAC above */
+ tp = 1000000 * ((prob * 1000) / nsecs);
mr->cur_tp = MINSTREL_TRUNC(tp);
}
@@ -1031,7 +1032,23 @@ minstrel_ht_free(void *priv)
mac80211_minstrel.free(priv);
}
-static struct rate_control_ops mac80211_minstrel_ht = {
+static u32 minstrel_ht_get_expected_throughput(void *priv_sta)
+{
+ struct minstrel_ht_sta_priv *msp = priv_sta;
+ struct minstrel_ht_sta *mi = &msp->ht;
+ int i, j;
+
+ if (!msp->is_ht)
+ return mac80211_minstrel.get_expected_throughput(priv_sta);
+
+ i = mi->max_tp_rate / MCS_GROUP_RATES;
+ j = mi->max_tp_rate % MCS_GROUP_RATES;
+
+ /* convert cur_tp from pkt per second in kbps */
+ return mi->groups[i].rates[j].cur_tp * AVG_PKT_SIZE * 8 / 1024;
+}
+
+static const struct rate_control_ops mac80211_minstrel_ht = {
.name = "minstrel_ht",
.tx_status = minstrel_ht_tx_status,
.get_rate = minstrel_ht_get_rate,
@@ -1045,11 +1062,11 @@ static struct rate_control_ops mac80211_minstrel_ht = {
.add_sta_debugfs = minstrel_ht_add_sta_debugfs,
.remove_sta_debugfs = minstrel_ht_remove_sta_debugfs,
#endif
+ .get_expected_throughput = minstrel_ht_get_expected_throughput,
};
-static void
-init_sample_table(void)
+static void __init init_sample_table(void)
{
int col, i, new_idx;
u8 rnd[MCS_GROUP_RATES];
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 958fad07b54..d0da2a70fe6 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -452,7 +452,7 @@ static void rate_control_pid_free_sta(void *priv, struct ieee80211_sta *sta,
kfree(priv_sta);
}
-static struct rate_control_ops mac80211_rcpid = {
+static const struct rate_control_ops mac80211_rcpid = {
.name = "pid",
.tx_status = rate_control_pid_tx_status,
.get_rate = rate_control_pid_get_rate,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3e57f96c966..394e201cde6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -40,8 +40,6 @@
static struct sk_buff *remove_monitor_info(struct ieee80211_local *local,
struct sk_buff *skb)
{
- struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
-
if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) {
if (likely(skb->len > FCS_LEN))
__pskb_trim(skb, skb->len - FCS_LEN);
@@ -53,31 +51,28 @@ static struct sk_buff *remove_monitor_info(struct ieee80211_local *local,
}
}
- if (status->vendor_radiotap_len)
- __pskb_pull(skb, status->vendor_radiotap_len);
-
return skb;
}
-static inline int should_drop_frame(struct sk_buff *skb, int present_fcs_len)
+static inline bool should_drop_frame(struct sk_buff *skb, int present_fcs_len)
{
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
- struct ieee80211_hdr *hdr;
-
- hdr = (void *)(skb->data + status->vendor_radiotap_len);
+ struct ieee80211_hdr *hdr = (void *)skb->data;
if (status->flag & (RX_FLAG_FAILED_FCS_CRC |
RX_FLAG_FAILED_PLCP_CRC |
RX_FLAG_AMPDU_IS_ZEROLEN))
- return 1;
- if (unlikely(skb->len < 16 + present_fcs_len +
- status->vendor_radiotap_len))
- return 1;
+ return true;
+
+ if (unlikely(skb->len < 16 + present_fcs_len))
+ return true;
+
if (ieee80211_is_ctl(hdr->frame_control) &&
!ieee80211_is_pspoll(hdr->frame_control) &&
!ieee80211_is_back_req(hdr->frame_control))
- return 1;
- return 0;
+ return true;
+
+ return false;
}
static int
@@ -90,8 +85,6 @@ ieee80211_rx_radiotap_space(struct ieee80211_local *local,
len = sizeof(struct ieee80211_radiotap_header) + 8;
/* allocate extra bitmaps */
- if (status->vendor_radiotap_len)
- len += 4;
if (status->chains)
len += 4 * hweight8(status->chains);
@@ -127,18 +120,6 @@ ieee80211_rx_radiotap_space(struct ieee80211_local *local,
len += 2 * hweight8(status->chains);
}
- if (status->vendor_radiotap_len) {
- if (WARN_ON_ONCE(status->vendor_radiotap_align == 0))
- status->vendor_radiotap_align = 1;
- /* align standard part of vendor namespace */
- len = ALIGN(len, 2);
- /* allocate standard part of vendor namespace */
- len += 6;
- /* align vendor-defined part */
- len = ALIGN(len, status->vendor_radiotap_align);
- /* vendor-defined part is already in skb */
- }
-
return len;
}
@@ -172,7 +153,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
it_present = &rthdr->it_present;
/* radiotap header, set always present flags */
- rthdr->it_len = cpu_to_le16(rtap_len + status->vendor_radiotap_len);
+ rthdr->it_len = cpu_to_le16(rtap_len);
it_present_val = BIT(IEEE80211_RADIOTAP_FLAGS) |
BIT(IEEE80211_RADIOTAP_CHANNEL) |
BIT(IEEE80211_RADIOTAP_RX_FLAGS);
@@ -190,14 +171,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
BIT(IEEE80211_RADIOTAP_DBM_ANTSIGNAL);
}
- if (status->vendor_radiotap_len) {
- it_present_val |= BIT(IEEE80211_RADIOTAP_VENDOR_NAMESPACE) |
- BIT(IEEE80211_RADIOTAP_EXT);
- put_unaligned_le32(it_present_val, it_present);
- it_present++;
- it_present_val = status->vendor_radiotap_bitmap;
- }
-
put_unaligned_le32(it_present_val, it_present);
pos = (void *)(it_present + 1);
@@ -307,6 +280,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
*pos |= IEEE80211_RADIOTAP_MCS_BW_40;
if (status->flag & RX_FLAG_HT_GF)
*pos |= IEEE80211_RADIOTAP_MCS_FMT_GF;
+ if (status->flag & RX_FLAG_LDPC)
+ *pos |= IEEE80211_RADIOTAP_MCS_FEC_LDPC;
stbc = (status->flag & RX_FLAG_STBC_MASK) >> RX_FLAG_STBC_SHIFT;
*pos |= stbc << IEEE80211_RADIOTAP_MCS_STBC_SHIFT;
pos++;
@@ -349,20 +324,25 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT);
/* known field - how to handle 80+80? */
- if (status->flag & RX_FLAG_80P80MHZ)
+ if (status->vht_flag & RX_VHT_FLAG_80P80MHZ)
known &= ~IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH;
put_unaligned_le16(known, pos);
pos += 2;
/* flags */
if (status->flag & RX_FLAG_SHORT_GI)
*pos |= IEEE80211_RADIOTAP_VHT_FLAG_SGI;
+ /* in VHT, STBC is binary */
+ if (status->flag & RX_FLAG_STBC_MASK)
+ *pos |= IEEE80211_RADIOTAP_VHT_FLAG_STBC;
+ if (status->vht_flag & RX_VHT_FLAG_BF)
+ *pos |= IEEE80211_RADIOTAP_VHT_FLAG_BEAMFORMED;
pos++;
/* bandwidth */
- if (status->flag & RX_FLAG_80MHZ)
+ if (status->vht_flag & RX_VHT_FLAG_80MHZ)
*pos++ = 4;
- else if (status->flag & RX_FLAG_80P80MHZ)
+ else if (status->vht_flag & RX_VHT_FLAG_80P80MHZ)
*pos++ = 0; /* marked not known above */
- else if (status->flag & RX_FLAG_160MHZ)
+ else if (status->vht_flag & RX_VHT_FLAG_160MHZ)
*pos++ = 11;
else if (status->flag & RX_FLAG_40MHZ)
*pos++ = 1;
@@ -372,6 +352,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
*pos = (status->rate_idx << 4) | status->vht_nss;
pos += 4;
/* coding field */
+ if (status->flag & RX_FLAG_LDPC)
+ *pos |= IEEE80211_RADIOTAP_CODING_LDPC_USER0;
pos++;
/* group ID */
pos++;
@@ -383,21 +365,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
*pos++ = status->chain_signal[chain];
*pos++ = chain;
}
-
- if (status->vendor_radiotap_len) {
- /* ensure 2 byte alignment for the vendor field as required */
- if ((pos - (u8 *)rthdr) & 1)
- *pos++ = 0;
- *pos++ = status->vendor_radiotap_oui[0];
- *pos++ = status->vendor_radiotap_oui[1];
- *pos++ = status->vendor_radiotap_oui[2];
- *pos++ = status->vendor_radiotap_subns;
- put_unaligned_le16(status->vendor_radiotap_len, pos);
- pos += 2;
- /* align the actual payload as requested */
- while ((pos - (u8 *)rthdr) & (status->vendor_radiotap_align - 1))
- *pos++ = 0;
- }
}
/*
@@ -428,8 +395,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)
present_fcs_len = FCS_LEN;
- /* ensure hdr->frame_control and vendor radiotap data are in skb head */
- if (!pskb_may_pull(origskb, 2 + status->vendor_radiotap_len)) {
+ /* ensure hdr->frame_control is in skb head */
+ if (!pskb_may_pull(origskb, 2)) {
dev_kfree_skb(origskb);
return NULL;
}
@@ -599,10 +566,10 @@ static int ieee80211_is_unicast_robust_mgmt_frame(struct sk_buff *skb)
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
- if (skb->len < 24 || is_multicast_ether_addr(hdr->addr1))
+ if (is_multicast_ether_addr(hdr->addr1))
return 0;
- return ieee80211_is_robust_mgmt_frame(hdr);
+ return ieee80211_is_robust_mgmt_frame(skb);
}
@@ -610,10 +577,10 @@ static int ieee80211_is_multicast_robust_mgmt_frame(struct sk_buff *skb)
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
- if (skb->len < 24 || !is_multicast_ether_addr(hdr->addr1))
+ if (!is_multicast_ether_addr(hdr->addr1))
return 0;
- return ieee80211_is_robust_mgmt_frame(hdr);
+ return ieee80211_is_robust_mgmt_frame(skb);
}
@@ -626,7 +593,7 @@ static int ieee80211_get_mmie_keyidx(struct sk_buff *skb)
if (skb->len < 24 + sizeof(*mmie) || !is_multicast_ether_addr(hdr->da))
return -1;
- if (!ieee80211_is_robust_mgmt_frame((struct ieee80211_hdr *) hdr))
+ if (!ieee80211_is_robust_mgmt_frame(skb))
return -1; /* not a robust management frame */
mmie = (struct ieee80211_mmie *)
@@ -1265,9 +1232,11 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
if (ether_addr_equal(bssid, rx->sdata->u.ibss.bssid) &&
test_sta_flag(sta, WLAN_STA_AUTHORIZED)) {
sta->last_rx = jiffies;
- if (ieee80211_is_data(hdr->frame_control)) {
+ if (ieee80211_is_data(hdr->frame_control) &&
+ !is_multicast_ether_addr(hdr->addr1)) {
sta->last_rx_rate_idx = status->rate_idx;
sta->last_rx_rate_flag = status->flag;
+ sta->last_rx_rate_vht_flag = status->vht_flag;
sta->last_rx_rate_vht_nss = status->vht_nss;
}
}
@@ -1280,6 +1249,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
if (ieee80211_is_data(hdr->frame_control)) {
sta->last_rx_rate_idx = status->rate_idx;
sta->last_rx_rate_flag = status->flag;
+ sta->last_rx_rate_vht_flag = status->vht_flag;
sta->last_rx_rate_vht_nss = status->vht_nss;
}
}
@@ -1318,18 +1288,15 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
!ieee80211_has_morefrags(hdr->frame_control) &&
!(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) &&
(rx->sdata->vif.type == NL80211_IFTYPE_AP ||
- rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) {
+ rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
+ /* PM bit is only checked in frames where it isn't reserved,
+ * in AP mode it's reserved in non-bufferable management frames
+ * (cf. IEEE 802.11-2012 8.2.4.1.7 Power Management field)
+ */
+ (!ieee80211_is_mgmt(hdr->frame_control) ||
+ ieee80211_is_bufferable_mmpdu(hdr->frame_control))) {
if (test_sta_flag(sta, WLAN_STA_PS_STA)) {
- /*
- * Ignore doze->wake transitions that are
- * indicated by non-data frames, the standard
- * is unclear here, but for example going to
- * PS mode and then scanning would cause a
- * doze->wake transition for the probe request,
- * and that is clearly undesirable.
- */
- if (ieee80211_is_data(hdr->frame_control) &&
- !ieee80211_has_pm(hdr->frame_control))
+ if (!ieee80211_has_pm(hdr->frame_control))
sta_ps_end(sta);
} else {
if (ieee80211_has_pm(hdr->frame_control))
@@ -1852,8 +1819,7 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
* having configured keys.
*/
if (unlikely(ieee80211_is_action(fc) && !rx->key &&
- ieee80211_is_robust_mgmt_frame(
- (struct ieee80211_hdr *) rx->skb->data)))
+ ieee80211_is_robust_mgmt_frame(rx->skb)))
return -EACCES;
}
@@ -2000,7 +1966,10 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
/* deliver to local stack */
skb->protocol = eth_type_trans(skb, dev);
memset(skb->cb, 0, sizeof(skb->cb));
- netif_receive_skb(skb);
+ if (rx->local->napi)
+ napi_gro_receive(rx->local->napi, skb);
+ else
+ netif_receive_skb(skb);
}
if (xmit_skb) {
@@ -3223,7 +3192,7 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
}
/*
- * This is the actual Rx frames handler. as it blongs to Rx path it must
+ * This is the actual Rx frames handler. as it belongs to Rx path it must
* be called with rcu_read_lock protection.
*/
static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 88c81616f8f..f40661eb75b 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -309,7 +309,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
if (local->scan_req != local->int_scan_req)
cfg80211_scan_done(local->scan_req, aborted);
local->scan_req = NULL;
- rcu_assign_pointer(local->scan_sdata, NULL);
+ RCU_INIT_POINTER(local->scan_sdata, NULL);
local->scanning = 0;
local->scan_chandef.chan = NULL;
@@ -472,9 +472,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
if (local->ops->hw_scan) {
u8 *ies;
- local->hw_scan_ies_bufsize = 2 + IEEE80211_MAX_SSID_LEN +
- local->scan_ies_len +
- req->ie_len;
+ local->hw_scan_ies_bufsize = local->scan_ies_len + req->ie_len;
local->hw_scan_req = kmalloc(
sizeof(*local->hw_scan_req) +
req->n_channels * sizeof(req->channels[0]) +
@@ -561,7 +559,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
ieee80211_recalc_idle(local);
local->scan_req = NULL;
- rcu_assign_pointer(local->scan_sdata, NULL);
+ RCU_INIT_POINTER(local->scan_sdata, NULL);
}
return rc;
@@ -775,7 +773,7 @@ void ieee80211_scan_work(struct work_struct *work)
int rc;
local->scan_req = NULL;
- rcu_assign_pointer(local->scan_sdata, NULL);
+ RCU_INIT_POINTER(local->scan_sdata, NULL);
rc = __ieee80211_start_scan(sdata, req);
if (rc) {
@@ -979,8 +977,7 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
struct cfg80211_chan_def chandef;
int ret, i, iebufsz;
- iebufsz = 2 + IEEE80211_MAX_SSID_LEN +
- local->scan_ies_len + req->ie_len;
+ iebufsz = local->scan_ies_len + req->ie_len;
lockdep_assert_held(&local->mtx);
@@ -1017,7 +1014,7 @@ out_free:
if (ret) {
/* Clean in case of failure after HW restart or upon resume. */
- rcu_assign_pointer(local->sched_scan_sdata, NULL);
+ RCU_INIT_POINTER(local->sched_scan_sdata, NULL);
local->sched_scan_req = NULL;
}
@@ -1058,9 +1055,11 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata)
/* We don't want to restart sched scan anymore. */
local->sched_scan_req = NULL;
- if (rcu_access_pointer(local->sched_scan_sdata))
- drv_sched_scan_stop(local, sdata);
-
+ if (rcu_access_pointer(local->sched_scan_sdata)) {
+ ret = drv_sched_scan_stop(local, sdata);
+ if (!ret)
+ rcu_assign_pointer(local->sched_scan_sdata, NULL);
+ }
out:
mutex_unlock(&local->mtx);
@@ -1077,12 +1076,8 @@ void ieee80211_sched_scan_results(struct ieee80211_hw *hw)
}
EXPORT_SYMBOL(ieee80211_sched_scan_results);
-void ieee80211_sched_scan_stopped_work(struct work_struct *work)
+void ieee80211_sched_scan_end(struct ieee80211_local *local)
{
- struct ieee80211_local *local =
- container_of(work, struct ieee80211_local,
- sched_scan_stopped_work);
-
mutex_lock(&local->mtx);
if (!rcu_access_pointer(local->sched_scan_sdata)) {
@@ -1090,7 +1085,7 @@ void ieee80211_sched_scan_stopped_work(struct work_struct *work)
return;
}
- rcu_assign_pointer(local->sched_scan_sdata, NULL);
+ RCU_INIT_POINTER(local->sched_scan_sdata, NULL);
/* If sched scan was aborted by the driver. */
local->sched_scan_req = NULL;
@@ -1100,6 +1095,15 @@ void ieee80211_sched_scan_stopped_work(struct work_struct *work)
cfg80211_sched_scan_stopped(local->hw.wiphy);
}
+void ieee80211_sched_scan_stopped_work(struct work_struct *work)
+{
+ struct ieee80211_local *local =
+ container_of(work, struct ieee80211_local,
+ sched_scan_stopped_work);
+
+ ieee80211_sched_scan_end(local);
+}
+
void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw)
{
struct ieee80211_local *local = hw_to_local(hw);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 137a192e64b..a9b46d8ea22 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -240,6 +240,7 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
sta_dbg(sta->sdata, "Destroyed STA %pM\n", sta->sta.addr);
+ kfree(rcu_dereference_raw(sta->sta.rates));
kfree(sta);
}
@@ -552,7 +553,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
{
struct ieee80211_local *local = sta->local;
- int err = 0;
+ int err;
might_sleep();
@@ -570,7 +571,6 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
return 0;
out_free:
- BUG_ON(!err);
sta_info_free(local, sta);
return err;
}
@@ -1148,7 +1148,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
atomic_dec(&ps->num_sta_ps);
/* This station just woke up and isn't aware of our SMPS state */
- if (!ieee80211_smps_is_restrictive(sta->known_smps_mode,
+ if (!ieee80211_vif_is_mesh(&sdata->vif) &&
+ !ieee80211_smps_is_restrictive(sta->known_smps_mode,
sdata->smps_mode) &&
sta->known_smps_mode != sdata->bss->req_smps &&
sta_info_tx_streams(sta) != 1) {
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index d3a6d8208f2..4acc5fc402f 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -261,6 +261,7 @@ struct ieee80211_tx_latency_stat {
* "the" transmit rate
* @last_rx_rate_idx: rx status rate index of the last data packet
* @last_rx_rate_flag: rx status flag of the last data packet
+ * @last_rx_rate_vht_flag: rx status vht flag of the last data packet
* @last_rx_rate_vht_nss: rx status nss of last data packet
* @lock: used for locking all fields that require locking, see comments
* in the header file.
@@ -396,6 +397,7 @@ struct sta_info {
struct ieee80211_tx_rate last_tx_rate;
int last_rx_rate_idx;
u32 last_rx_rate_flag;
+ u32 last_rx_rate_vht_flag;
u8 last_rx_rate_vht_nss;
u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 1ee85c40243..ba29ebc8614 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -314,10 +314,9 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
!is_multicast_ether_addr(hdr->addr1))
txflags |= IEEE80211_RADIOTAP_F_TX_FAIL;
- if ((info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) ||
- (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT))
+ if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)
txflags |= IEEE80211_RADIOTAP_F_TX_CTS;
- else if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS)
+ if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS)
txflags |= IEEE80211_RADIOTAP_F_TX_RTS;
put_unaligned_le16(txflags, pos);
@@ -479,7 +478,7 @@ static void ieee80211_tx_latency_end_msrmnt(struct ieee80211_local *local,
u32 msrmnt;
u16 tid;
u8 *qc;
- int i, bin_range_count, bin_count;
+ int i, bin_range_count;
u32 *bin_ranges;
__le16 fc;
struct ieee80211_tx_latency_stat *tx_lat;
@@ -522,7 +521,6 @@ static void ieee80211_tx_latency_end_msrmnt(struct ieee80211_local *local,
/* count how many Tx frames transmitted with the appropriate latency */
bin_range_count = tx_latency->n_ranges;
bin_ranges = tx_latency->ranges;
- bin_count = tx_lat->bin_count;
for (i = 0; i < bin_range_count; i++) {
if (msrmnt <= bin_ranges[i]) {
@@ -543,6 +541,23 @@ static void ieee80211_tx_latency_end_msrmnt(struct ieee80211_local *local,
*/
#define STA_LOST_PKT_THRESHOLD 50
+static void ieee80211_lost_packet(struct sta_info *sta, struct sk_buff *skb)
+{
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+
+ /* This packet was aggregated but doesn't carry status info */
+ if ((info->flags & IEEE80211_TX_CTL_AMPDU) &&
+ !(info->flags & IEEE80211_TX_STAT_AMPDU))
+ return;
+
+ if (++sta->lost_packets < STA_LOST_PKT_THRESHOLD)
+ return;
+
+ cfg80211_cqm_pktloss_notify(sta->sdata->dev, sta->sta.addr,
+ sta->lost_packets, GFP_ATOMIC);
+ sta->lost_packets = 0;
+}
+
void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
{
struct sk_buff *skb2;
@@ -619,6 +634,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
sta, true, acked);
if ((local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) &&
+ (ieee80211_is_data(hdr->frame_control)) &&
(rates_idx != -1))
sta->last_tx_rate = info->status.rates[rates_idx];
@@ -681,12 +697,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
if (info->flags & IEEE80211_TX_STAT_ACK) {
if (sta->lost_packets)
sta->lost_packets = 0;
- } else if (++sta->lost_packets >= STA_LOST_PKT_THRESHOLD) {
- cfg80211_cqm_pktloss_notify(sta->sdata->dev,
- sta->sta.addr,
- sta->lost_packets,
- GFP_ATOMIC);
- sta->lost_packets = 0;
+ } else {
+ ieee80211_lost_packet(sta, skb);
}
}
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
new file mode 100644
index 00000000000..652813b2d3d
--- /dev/null
+++ b/net/mac80211/tdls.c
@@ -0,0 +1,325 @@
+/*
+ * mac80211 TDLS handling code
+ *
+ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2014, Intel Corporation
+ *
+ * This file is GPLv2 as found in COPYING.
+ */
+
+#include <linux/ieee80211.h>
+#include "ieee80211_i.h"
+
+static void ieee80211_tdls_add_ext_capab(struct sk_buff *skb)
+{
+ u8 *pos = (void *)skb_put(skb, 7);
+
+ *pos++ = WLAN_EID_EXT_CAPABILITY;
+ *pos++ = 5; /* len */
+ *pos++ = 0x0;
+ *pos++ = 0x0;
+ *pos++ = 0x0;
+ *pos++ = 0x0;
+ *pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED;
+}
+
+static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ u16 capab;
+
+ capab = 0;
+ if (ieee80211_get_sdata_band(sdata) != IEEE80211_BAND_2GHZ)
+ return capab;
+
+ if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE))
+ capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME;
+ if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE))
+ capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
+
+ return capab;
+}
+
+static void ieee80211_tdls_add_link_ie(struct sk_buff *skb, const u8 *src_addr,
+ const u8 *peer, const u8 *bssid)
+{
+ struct ieee80211_tdls_lnkie *lnkid;
+
+ lnkid = (void *)skb_put(skb, sizeof(struct ieee80211_tdls_lnkie));
+
+ lnkid->ie_type = WLAN_EID_LINK_ID;
+ lnkid->ie_len = sizeof(struct ieee80211_tdls_lnkie) - 2;
+
+ memcpy(lnkid->bssid, bssid, ETH_ALEN);
+ memcpy(lnkid->init_sta, src_addr, ETH_ALEN);
+ memcpy(lnkid->resp_sta, peer, ETH_ALEN);
+}
+
+static int
+ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *peer, u8 action_code, u8 dialog_token,
+ u16 status_code, struct sk_buff *skb)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
+ struct ieee80211_tdls_data *tf;
+
+ tf = (void *)skb_put(skb, offsetof(struct ieee80211_tdls_data, u));
+
+ memcpy(tf->da, peer, ETH_ALEN);
+ memcpy(tf->sa, sdata->vif.addr, ETH_ALEN);
+ tf->ether_type = cpu_to_be16(ETH_P_TDLS);
+ tf->payload_type = WLAN_TDLS_SNAP_RFTYPE;
+
+ switch (action_code) {
+ case WLAN_TDLS_SETUP_REQUEST:
+ tf->category = WLAN_CATEGORY_TDLS;
+ tf->action_code = WLAN_TDLS_SETUP_REQUEST;
+
+ skb_put(skb, sizeof(tf->u.setup_req));
+ tf->u.setup_req.dialog_token = dialog_token;
+ tf->u.setup_req.capability =
+ cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
+
+ ieee80211_add_srates_ie(sdata, skb, false, band);
+ ieee80211_add_ext_srates_ie(sdata, skb, false, band);
+ ieee80211_tdls_add_ext_capab(skb);
+ break;
+ case WLAN_TDLS_SETUP_RESPONSE:
+ tf->category = WLAN_CATEGORY_TDLS;
+ tf->action_code = WLAN_TDLS_SETUP_RESPONSE;
+
+ skb_put(skb, sizeof(tf->u.setup_resp));
+ tf->u.setup_resp.status_code = cpu_to_le16(status_code);
+ tf->u.setup_resp.dialog_token = dialog_token;
+ tf->u.setup_resp.capability =
+ cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
+
+ ieee80211_add_srates_ie(sdata, skb, false, band);
+ ieee80211_add_ext_srates_ie(sdata, skb, false, band);
+ ieee80211_tdls_add_ext_capab(skb);
+ break;
+ case WLAN_TDLS_SETUP_CONFIRM:
+ tf->category = WLAN_CATEGORY_TDLS;
+ tf->action_code = WLAN_TDLS_SETUP_CONFIRM;
+
+ skb_put(skb, sizeof(tf->u.setup_cfm));
+ tf->u.setup_cfm.status_code = cpu_to_le16(status_code);
+ tf->u.setup_cfm.dialog_token = dialog_token;
+ break;
+ case WLAN_TDLS_TEARDOWN:
+ tf->category = WLAN_CATEGORY_TDLS;
+ tf->action_code = WLAN_TDLS_TEARDOWN;
+
+ skb_put(skb, sizeof(tf->u.teardown));
+ tf->u.teardown.reason_code = cpu_to_le16(status_code);
+ break;
+ case WLAN_TDLS_DISCOVERY_REQUEST:
+ tf->category = WLAN_CATEGORY_TDLS;
+ tf->action_code = WLAN_TDLS_DISCOVERY_REQUEST;
+
+ skb_put(skb, sizeof(tf->u.discover_req));
+ tf->u.discover_req.dialog_token = dialog_token;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *peer, u8 action_code, u8 dialog_token,
+ u16 status_code, struct sk_buff *skb)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
+ struct ieee80211_mgmt *mgmt;
+
+ mgmt = (void *)skb_put(skb, 24);
+ memset(mgmt, 0, 24);
+ memcpy(mgmt->da, peer, ETH_ALEN);
+ memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+ memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN);
+
+ mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+ IEEE80211_STYPE_ACTION);
+
+ switch (action_code) {
+ case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
+ skb_put(skb, 1 + sizeof(mgmt->u.action.u.tdls_discover_resp));
+ mgmt->u.action.category = WLAN_CATEGORY_PUBLIC;
+ mgmt->u.action.u.tdls_discover_resp.action_code =
+ WLAN_PUB_ACTION_TDLS_DISCOVER_RES;
+ mgmt->u.action.u.tdls_discover_resp.dialog_token =
+ dialog_token;
+ mgmt->u.action.u.tdls_discover_resp.capability =
+ cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
+
+ ieee80211_add_srates_ie(sdata, skb, false, band);
+ ieee80211_add_ext_srates_ie(sdata, skb, false, band);
+ ieee80211_tdls_add_ext_capab(skb);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *peer, u8 action_code, u8 dialog_token,
+ u16 status_code, u32 peer_capability,
+ const u8 *extra_ies, size_t extra_ies_len)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = sdata->local;
+ struct sk_buff *skb = NULL;
+ bool send_direct;
+ int ret;
+
+ if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
+ return -ENOTSUPP;
+
+ /* make sure we are in managed mode, and associated */
+ if (sdata->vif.type != NL80211_IFTYPE_STATION ||
+ !sdata->u.mgd.associated)
+ return -EINVAL;
+
+ tdls_dbg(sdata, "TDLS mgmt action %d peer %pM\n",
+ action_code, peer);
+
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom +
+ max(sizeof(struct ieee80211_mgmt),
+ sizeof(struct ieee80211_tdls_data)) +
+ 50 + /* supported rates */
+ 7 + /* ext capab */
+ extra_ies_len +
+ sizeof(struct ieee80211_tdls_lnkie));
+ if (!skb)
+ return -ENOMEM;
+
+ skb_reserve(skb, local->hw.extra_tx_headroom);
+
+ switch (action_code) {
+ case WLAN_TDLS_SETUP_REQUEST:
+ case WLAN_TDLS_SETUP_RESPONSE:
+ case WLAN_TDLS_SETUP_CONFIRM:
+ case WLAN_TDLS_TEARDOWN:
+ case WLAN_TDLS_DISCOVERY_REQUEST:
+ ret = ieee80211_prep_tdls_encap_data(wiphy, dev, peer,
+ action_code, dialog_token,
+ status_code, skb);
+ send_direct = false;
+ break;
+ case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
+ ret = ieee80211_prep_tdls_direct(wiphy, dev, peer, action_code,
+ dialog_token, status_code,
+ skb);
+ send_direct = true;
+ break;
+ default:
+ ret = -ENOTSUPP;
+ break;
+ }
+
+ if (ret < 0)
+ goto fail;
+
+ if (extra_ies_len)
+ memcpy(skb_put(skb, extra_ies_len), extra_ies, extra_ies_len);
+
+ /* the TDLS link IE is always added last */
+ switch (action_code) {
+ case WLAN_TDLS_SETUP_REQUEST:
+ case WLAN_TDLS_SETUP_CONFIRM:
+ case WLAN_TDLS_TEARDOWN:
+ case WLAN_TDLS_DISCOVERY_REQUEST:
+ /* we are the initiator */
+ ieee80211_tdls_add_link_ie(skb, sdata->vif.addr, peer,
+ sdata->u.mgd.bssid);
+ break;
+ case WLAN_TDLS_SETUP_RESPONSE:
+ case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
+ /* we are the responder */
+ ieee80211_tdls_add_link_ie(skb, peer, sdata->vif.addr,
+ sdata->u.mgd.bssid);
+ break;
+ default:
+ ret = -ENOTSUPP;
+ goto fail;
+ }
+
+ if (send_direct) {
+ ieee80211_tx_skb(sdata, skb);
+ return 0;
+ }
+
+ /*
+ * According to 802.11z: Setup req/resp are sent in AC_BK, otherwise
+ * we should default to AC_VI.
+ */
+ switch (action_code) {
+ case WLAN_TDLS_SETUP_REQUEST:
+ case WLAN_TDLS_SETUP_RESPONSE:
+ skb_set_queue_mapping(skb, IEEE80211_AC_BK);
+ skb->priority = 2;
+ break;
+ default:
+ skb_set_queue_mapping(skb, IEEE80211_AC_VI);
+ skb->priority = 5;
+ break;
+ }
+
+ /* disable bottom halves when entering the Tx path */
+ local_bh_disable();
+ ret = ieee80211_subif_start_xmit(skb, dev);
+ local_bh_enable();
+
+ return ret;
+
+fail:
+ dev_kfree_skb(skb);
+ return ret;
+}
+
+int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
+ const u8 *peer, enum nl80211_tdls_operation oper)
+{
+ struct sta_info *sta;
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+ if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
+ return -ENOTSUPP;
+
+ if (sdata->vif.type != NL80211_IFTYPE_STATION)
+ return -EINVAL;
+
+ tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer);
+
+ switch (oper) {
+ case NL80211_TDLS_ENABLE_LINK:
+ rcu_read_lock();
+ sta = sta_info_get(sdata, peer);
+ if (!sta) {
+ rcu_read_unlock();
+ return -ENOLINK;
+ }
+
+ set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH);
+ rcu_read_unlock();
+ break;
+ case NL80211_TDLS_DISABLE_LINK:
+ return sta_info_destroy_addr(sdata, peer);
+ case NL80211_TDLS_TEARDOWN:
+ case NL80211_TDLS_SETUP:
+ case NL80211_TDLS_DISCOVERY_REQ:
+ /* We don't support in-driver setup/teardown/discovery */
+ return -ENOTSUPP;
+ default:
+ return -ENOTSUPP;
+ }
+
+ return 0;
+}
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index a0b0aea7652..cfe1a0688b5 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -21,10 +21,10 @@
#define VIF_ENTRY __field(enum nl80211_iftype, vif_type) __field(void *, sdata) \
__field(bool, p2p) \
- __string(vif_name, sdata->dev ? sdata->dev->name : "<nodev>")
+ __string(vif_name, sdata->name)
#define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \
__entry->p2p = sdata->vif.p2p; \
- __assign_str(vif_name, sdata->dev ? sdata->dev->name : sdata->name)
+ __assign_str(vif_name, sdata->name)
#define VIF_PR_FMT " vif:%s(%d%s)"
#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : ""
@@ -184,6 +184,20 @@ TRACE_EVENT(drv_return_bool,
"true" : "false")
);
+TRACE_EVENT(drv_return_u32,
+ TP_PROTO(struct ieee80211_local *local, u32 ret),
+ TP_ARGS(local, ret),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ __field(u32, ret)
+ ),
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ __entry->ret = ret;
+ ),
+ TP_printk(LOCAL_PR_FMT " - %u", LOCAL_PR_ARG, __entry->ret)
+);
+
TRACE_EVENT(drv_return_u64,
TP_PROTO(struct ieee80211_local *local, u64 ret),
TP_ARGS(local, ret),
@@ -1375,6 +1389,91 @@ TRACE_EVENT(drv_change_chanctx,
)
);
+#if !defined(__TRACE_VIF_ENTRY)
+#define __TRACE_VIF_ENTRY
+struct trace_vif_entry {
+ enum nl80211_iftype vif_type;
+ bool p2p;
+ char vif_name[IFNAMSIZ];
+} __packed;
+
+struct trace_chandef_entry {
+ u32 control_freq;
+ u32 chan_width;
+ u32 center_freq1;
+ u32 center_freq2;
+} __packed;
+
+struct trace_switch_entry {
+ struct trace_vif_entry vif;
+ struct trace_chandef_entry old_chandef;
+ struct trace_chandef_entry new_chandef;
+} __packed;
+
+#define SWITCH_ENTRY_ASSIGN(to, from) local_vifs[i].to = vifs[i].from
+#endif
+
+TRACE_EVENT(drv_switch_vif_chanctx,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_vif_chanctx_switch *vifs,
+ int n_vifs, enum ieee80211_chanctx_switch_mode mode),
+ TP_ARGS(local, vifs, n_vifs, mode),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ __field(int, n_vifs)
+ __field(u32, mode)
+ __dynamic_array(u8, vifs,
+ sizeof(struct trace_switch_entry) * n_vifs)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ __entry->n_vifs = n_vifs;
+ __entry->mode = mode;
+ {
+ struct trace_switch_entry *local_vifs =
+ __get_dynamic_array(vifs);
+ int i;
+
+ for (i = 0; i < n_vifs; i++) {
+ struct ieee80211_sub_if_data *sdata;
+
+ sdata = container_of(vifs[i].vif,
+ struct ieee80211_sub_if_data,
+ vif);
+
+ SWITCH_ENTRY_ASSIGN(vif.vif_type, vif->type);
+ SWITCH_ENTRY_ASSIGN(vif.p2p, vif->p2p);
+ strncpy(local_vifs[i].vif.vif_name,
+ sdata->name,
+ sizeof(local_vifs[i].vif.vif_name));
+ SWITCH_ENTRY_ASSIGN(old_chandef.control_freq,
+ old_ctx->def.chan->center_freq);
+ SWITCH_ENTRY_ASSIGN(old_chandef.chan_width,
+ old_ctx->def.width);
+ SWITCH_ENTRY_ASSIGN(old_chandef.center_freq1,
+ old_ctx->def.center_freq1);
+ SWITCH_ENTRY_ASSIGN(old_chandef.center_freq2,
+ old_ctx->def.center_freq2);
+ SWITCH_ENTRY_ASSIGN(new_chandef.control_freq,
+ new_ctx->def.chan->center_freq);
+ SWITCH_ENTRY_ASSIGN(new_chandef.chan_width,
+ new_ctx->def.width);
+ SWITCH_ENTRY_ASSIGN(new_chandef.center_freq1,
+ new_ctx->def.center_freq1);
+ SWITCH_ENTRY_ASSIGN(new_chandef.center_freq2,
+ new_ctx->def.center_freq2);
+ }
+ }
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT " n_vifs:%d mode:%d",
+ LOCAL_PR_ARG, __entry->n_vifs, __entry->mode
+ )
+);
+
DECLARE_EVENT_CLASS(local_sdata_chanctx,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
@@ -1499,6 +1598,24 @@ DEFINE_EVENT(local_sdata_evt, drv_leave_ibss,
TP_ARGS(local, sdata)
);
+TRACE_EVENT(drv_get_expected_throughput,
+ TP_PROTO(struct ieee80211_sta *sta),
+
+ TP_ARGS(sta),
+
+ TP_STRUCT__entry(
+ STA_ENTRY
+ ),
+
+ TP_fast_assign(
+ STA_ASSIGN;
+ ),
+
+ TP_printk(
+ STA_PR_FMT, STA_PR_ARG
+ )
+);
+
/*
* Tracing for API calls that drivers call.
*/
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 4080c615636..1a252c606ad 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -414,6 +414,9 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
if (ieee80211_has_order(hdr->frame_control))
return TX_CONTINUE;
+ if (ieee80211_is_probe_req(hdr->frame_control))
+ return TX_CONTINUE;
+
if (tx->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)
info->hw_queue = tx->sdata->vif.cab_queue;
@@ -452,8 +455,7 @@ static int ieee80211_use_mfp(__le16 fc, struct sta_info *sta,
if (sta == NULL || !test_sta_flag(sta, WLAN_STA_MFP))
return 0;
- if (!ieee80211_is_robust_mgmt_frame((struct ieee80211_hdr *)
- skb->data))
+ if (!ieee80211_is_robust_mgmt_frame(skb))
return 0;
return 1;
@@ -464,6 +466,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
{
struct sta_info *sta = tx->sta;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
struct ieee80211_local *local = tx->local;
if (unlikely(!sta))
@@ -474,6 +477,12 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
!(info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER))) {
int ac = skb_get_queue_mapping(tx->skb);
+ if (ieee80211_is_mgmt(hdr->frame_control) &&
+ !ieee80211_is_bufferable_mmpdu(hdr->frame_control)) {
+ info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER;
+ return TX_CONTINUE;
+ }
+
ps_dbg(sta->sdata, "STA %pM aid %d: PS buffer for AC %d\n",
sta->sta.addr, sta->sta.aid, ac);
if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER)
@@ -532,22 +541,9 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
static ieee80211_tx_result debug_noinline
ieee80211_tx_h_ps_buf(struct ieee80211_tx_data *tx)
{
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
-
if (unlikely(tx->flags & IEEE80211_TX_PS_BUFFERED))
return TX_CONTINUE;
- /* only deauth, disassoc and action are bufferable MMPDUs */
- if (ieee80211_is_mgmt(hdr->frame_control) &&
- !ieee80211_is_deauth(hdr->frame_control) &&
- !ieee80211_is_disassoc(hdr->frame_control) &&
- !ieee80211_is_action(hdr->frame_control)) {
- if (tx->flags & IEEE80211_TX_UNICAST)
- info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER;
- return TX_CONTINUE;
- }
-
if (tx->flags & IEEE80211_TX_UNICAST)
return ieee80211_tx_h_unicast_ps_buf(tx);
else
@@ -582,7 +578,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
tx->key = key;
else if (ieee80211_is_mgmt(hdr->frame_control) &&
is_multicast_ether_addr(hdr->addr1) &&
- ieee80211_is_robust_mgmt_frame(hdr) &&
+ ieee80211_is_robust_mgmt_frame(tx->skb) &&
(key = rcu_dereference(tx->sdata->default_mgmt_key)))
tx->key = key;
else if (is_multicast_ether_addr(hdr->addr1) &&
@@ -597,12 +593,12 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
tx->key = NULL;
else if (tx->skb->protocol == tx->sdata->control_port_protocol)
tx->key = NULL;
- else if (ieee80211_is_robust_mgmt_frame(hdr) &&
+ else if (ieee80211_is_robust_mgmt_frame(tx->skb) &&
!(ieee80211_is_action(hdr->frame_control) &&
tx->sta && test_sta_flag(tx->sta, WLAN_STA_MFP)))
tx->key = NULL;
else if (ieee80211_is_mgmt(hdr->frame_control) &&
- !ieee80211_is_robust_mgmt_frame(hdr))
+ !ieee80211_is_robust_mgmt_frame(tx->skb))
tx->key = NULL;
else {
I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted);
@@ -2332,7 +2328,8 @@ void ieee80211_tx_pending(unsigned long data)
/* functions for drivers to get certain frames */
static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
- struct ps_data *ps, struct sk_buff *skb)
+ struct ps_data *ps, struct sk_buff *skb,
+ bool is_template)
{
u8 *pos, *tim;
int aid0 = 0;
@@ -2345,11 +2342,12 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
* checking byte-for-byte */
have_bits = !bitmap_empty((unsigned long *)ps->tim,
IEEE80211_MAX_AID+1);
-
- if (ps->dtim_count == 0)
- ps->dtim_count = sdata->vif.bss_conf.dtim_period - 1;
- else
- ps->dtim_count--;
+ if (!is_template) {
+ if (ps->dtim_count == 0)
+ ps->dtim_count = sdata->vif.bss_conf.dtim_period - 1;
+ else
+ ps->dtim_count--;
+ }
tim = pos = (u8 *) skb_put(skb, 6);
*pos++ = WLAN_EID_TIM;
@@ -2395,7 +2393,8 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
}
static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
- struct ps_data *ps, struct sk_buff *skb)
+ struct ps_data *ps, struct sk_buff *skb,
+ bool is_template)
{
struct ieee80211_local *local = sdata->local;
@@ -2407,33 +2406,24 @@ static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
* of the tim bitmap in mac80211 and the driver.
*/
if (local->tim_in_locked_section) {
- __ieee80211_beacon_add_tim(sdata, ps, skb);
+ __ieee80211_beacon_add_tim(sdata, ps, skb, is_template);
} else {
spin_lock_bh(&local->tim_lock);
- __ieee80211_beacon_add_tim(sdata, ps, skb);
+ __ieee80211_beacon_add_tim(sdata, ps, skb, is_template);
spin_unlock_bh(&local->tim_lock);
}
return 0;
}
-void ieee80211_csa_finish(struct ieee80211_vif *vif)
-{
- struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
-
- ieee80211_queue_work(&sdata->local->hw,
- &sdata->csa_finalize_work);
-}
-EXPORT_SYMBOL(ieee80211_csa_finish);
-
-static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata,
- struct beacon_data *beacon)
+static void ieee80211_set_csa(struct ieee80211_sub_if_data *sdata,
+ struct beacon_data *beacon)
{
struct probe_resp *resp;
- int counter_offset_beacon = sdata->csa_counter_offset_beacon;
- int counter_offset_presp = sdata->csa_counter_offset_presp;
u8 *beacon_data;
size_t beacon_data_len;
+ int i;
+ u8 count = sdata->csa_current_counter;
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
@@ -2451,36 +2441,57 @@ static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata,
default:
return;
}
- if (WARN_ON(counter_offset_beacon >= beacon_data_len))
- return;
- /* warn if the driver did not check for/react to csa completeness */
- if (WARN_ON(beacon_data[counter_offset_beacon] == 0))
- return;
+ for (i = 0; i < IEEE80211_MAX_CSA_COUNTERS_NUM; ++i) {
+ u16 counter_offset_beacon =
+ sdata->csa_counter_offset_beacon[i];
+ u16 counter_offset_presp = sdata->csa_counter_offset_presp[i];
+
+ if (counter_offset_beacon) {
+ if (WARN_ON(counter_offset_beacon >= beacon_data_len))
+ return;
- beacon_data[counter_offset_beacon]--;
+ beacon_data[counter_offset_beacon] = count;
+ }
- if (sdata->vif.type == NL80211_IFTYPE_AP && counter_offset_presp) {
- rcu_read_lock();
- resp = rcu_dereference(sdata->u.ap.probe_resp);
+ if (sdata->vif.type == NL80211_IFTYPE_AP &&
+ counter_offset_presp) {
+ rcu_read_lock();
+ resp = rcu_dereference(sdata->u.ap.probe_resp);
- /* if nl80211 accepted the offset, this should not happen. */
- if (WARN_ON(!resp)) {
+ /* If nl80211 accepted the offset, this should
+ * not happen.
+ */
+ if (WARN_ON(!resp)) {
+ rcu_read_unlock();
+ return;
+ }
+ resp->data[counter_offset_presp] = count;
rcu_read_unlock();
- return;
}
- resp->data[counter_offset_presp]--;
- rcu_read_unlock();
}
}
+u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+ sdata->csa_current_counter--;
+
+ /* the counter should never reach 0 */
+ WARN_ON(!sdata->csa_current_counter);
+
+ return sdata->csa_current_counter;
+}
+EXPORT_SYMBOL(ieee80211_csa_update_counter);
+
bool ieee80211_csa_is_complete(struct ieee80211_vif *vif)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
struct beacon_data *beacon = NULL;
u8 *beacon_data;
size_t beacon_data_len;
- int counter_beacon = sdata->csa_counter_offset_beacon;
+ int counter_beacon = sdata->csa_counter_offset_beacon[0];
int ret = false;
if (!ieee80211_sdata_running(sdata))
@@ -2521,7 +2532,7 @@ bool ieee80211_csa_is_complete(struct ieee80211_vif *vif)
if (WARN_ON(counter_beacon > beacon_data_len))
goto out;
- if (beacon_data[counter_beacon] == 0)
+ if (beacon_data[counter_beacon] == 1)
ret = true;
out:
rcu_read_unlock();
@@ -2530,9 +2541,11 @@ bool ieee80211_csa_is_complete(struct ieee80211_vif *vif)
}
EXPORT_SYMBOL(ieee80211_csa_is_complete);
-struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif,
- u16 *tim_offset, u16 *tim_length)
+static struct sk_buff *
+__ieee80211_beacon_get(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_mutable_offsets *offs,
+ bool is_template)
{
struct ieee80211_local *local = hw_to_local(hw);
struct sk_buff *skb = NULL;
@@ -2541,6 +2554,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
enum ieee80211_band band;
struct ieee80211_tx_rate_control txrc;
struct ieee80211_chanctx_conf *chanctx_conf;
+ int csa_off_base = 0;
rcu_read_lock();
@@ -2550,18 +2564,20 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
if (!ieee80211_sdata_running(sdata) || !chanctx_conf)
goto out;
- if (tim_offset)
- *tim_offset = 0;
- if (tim_length)
- *tim_length = 0;
+ if (offs)
+ memset(offs, 0, sizeof(*offs));
if (sdata->vif.type == NL80211_IFTYPE_AP) {
struct ieee80211_if_ap *ap = &sdata->u.ap;
struct beacon_data *beacon = rcu_dereference(ap->beacon);
if (beacon) {
- if (sdata->vif.csa_active)
- ieee80211_update_csa(sdata, beacon);
+ if (sdata->vif.csa_active) {
+ if (!is_template)
+ ieee80211_csa_update_counter(vif);
+
+ ieee80211_set_csa(sdata, beacon);
+ }
/*
* headroom, head length,
@@ -2578,12 +2594,16 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
memcpy(skb_put(skb, beacon->head_len), beacon->head,
beacon->head_len);
- ieee80211_beacon_add_tim(sdata, &ap->ps, skb);
+ ieee80211_beacon_add_tim(sdata, &ap->ps, skb,
+ is_template);
- if (tim_offset)
- *tim_offset = beacon->head_len;
- if (tim_length)
- *tim_length = skb->len - beacon->head_len;
+ if (offs) {
+ offs->tim_offset = beacon->head_len;
+ offs->tim_length = skb->len - beacon->head_len;
+
+ /* for AP the csa offsets are from tail */
+ csa_off_base = skb->len;
+ }
if (beacon->tail)
memcpy(skb_put(skb, beacon->tail_len),
@@ -2598,9 +2618,12 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
if (!presp)
goto out;
- if (sdata->vif.csa_active)
- ieee80211_update_csa(sdata, presp);
+ if (sdata->vif.csa_active) {
+ if (!is_template)
+ ieee80211_csa_update_counter(vif);
+ ieee80211_set_csa(sdata, presp);
+ }
skb = dev_alloc_skb(local->tx_headroom + presp->head_len +
local->hw.extra_beacon_tailroom);
@@ -2620,8 +2643,17 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
if (!bcn)
goto out;
- if (sdata->vif.csa_active)
- ieee80211_update_csa(sdata, bcn);
+ if (sdata->vif.csa_active) {
+ if (!is_template)
+ /* TODO: For mesh csa_counter is in TU, so
+ * decrementing it by one isn't correct, but
+ * for now we leave it consistent with overall
+ * mac80211's behavior.
+ */
+ ieee80211_csa_update_counter(vif);
+
+ ieee80211_set_csa(sdata, bcn);
+ }
if (ifmsh->sync_ops)
ifmsh->sync_ops->adjust_tbtt(sdata, bcn);
@@ -2635,13 +2667,33 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
goto out;
skb_reserve(skb, local->tx_headroom);
memcpy(skb_put(skb, bcn->head_len), bcn->head, bcn->head_len);
- ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb);
+ ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb, is_template);
+
+ if (offs) {
+ offs->tim_offset = bcn->head_len;
+ offs->tim_length = skb->len - bcn->head_len;
+ }
+
memcpy(skb_put(skb, bcn->tail_len), bcn->tail, bcn->tail_len);
} else {
WARN_ON(1);
goto out;
}
+ /* CSA offsets */
+ if (offs) {
+ int i;
+
+ for (i = 0; i < IEEE80211_MAX_CSA_COUNTERS_NUM; i++) {
+ u16 csa_off = sdata->csa_counter_offset_beacon[i];
+
+ if (!csa_off)
+ continue;
+
+ offs->csa_counter_offs[i] = csa_off_base + csa_off;
+ }
+ }
+
band = chanctx_conf->def.chan->band;
info = IEEE80211_SKB_CB(skb);
@@ -2672,6 +2724,32 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
out:
rcu_read_unlock();
return skb;
+
+}
+
+struct sk_buff *
+ieee80211_beacon_get_template(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_mutable_offsets *offs)
+{
+ return __ieee80211_beacon_get(hw, vif, offs, true);
+}
+EXPORT_SYMBOL(ieee80211_beacon_get_template);
+
+struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ u16 *tim_offset, u16 *tim_length)
+{
+ struct ieee80211_mutable_offsets offs = {};
+ struct sk_buff *bcn = __ieee80211_beacon_get(hw, vif, &offs, false);
+
+ if (tim_offset)
+ *tim_offset = offs.tim_offset;
+
+ if (tim_length)
+ *tim_length = offs.tim_length;
+
+ return bcn;
}
EXPORT_SYMBOL(ieee80211_beacon_get_tim);
@@ -2909,7 +2987,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
cpu_to_le16(IEEE80211_FCTL_MOREDATA);
}
- if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ if (sdata->vif.type == NL80211_IFTYPE_AP)
sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev);
if (!ieee80211_tx_prepare(sdata, &tx, skb))
break;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index b8700d417a9..a6cda52ed92 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -34,7 +34,7 @@
#include "wep.h"
/* privid for wiphys to determine whether they belong to us or not */
-void *mac80211_wiphy_privid = &mac80211_wiphy_privid;
+const void *const mac80211_wiphy_privid = &mac80211_wiphy_privid;
struct ieee80211_hw *wiphy_to_ieee80211_hw(struct wiphy *wiphy)
{
@@ -554,7 +554,7 @@ void ieee80211_flush_queues(struct ieee80211_local *local,
ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_FLUSH);
- drv_flush(local, queues, false);
+ drv_flush(local, sdata, queues, false);
ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_FLUSH);
@@ -1096,11 +1096,12 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
int err;
/* 24 + 6 = header + auth_algo + auth_transaction + status_code */
- skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24 + 6 + extra_len);
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom + IEEE80211_WEP_IV_LEN +
+ 24 + 6 + extra_len + IEEE80211_WEP_ICV_LEN);
if (!skb)
return;
- skb_reserve(skb, local->hw.extra_tx_headroom);
+ skb_reserve(skb, local->hw.extra_tx_headroom + IEEE80211_WEP_IV_LEN);
mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6);
memset(mgmt, 0, 24 + 6);
@@ -1277,13 +1278,32 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
* that calculates local->scan_ies_len.
*/
- /* add any remaining custom IEs */
+ /* insert custom IEs that go before VHT */
if (ie && ie_len) {
- noffset = ie_len;
+ static const u8 before_vht[] = {
+ WLAN_EID_SSID,
+ WLAN_EID_SUPP_RATES,
+ WLAN_EID_REQUEST,
+ WLAN_EID_EXT_SUPP_RATES,
+ WLAN_EID_DS_PARAMS,
+ WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
+ WLAN_EID_HT_CAPABILITY,
+ WLAN_EID_BSS_COEX_2040,
+ WLAN_EID_EXT_CAPABILITY,
+ WLAN_EID_SSID_LIST,
+ WLAN_EID_CHANNEL_USAGE,
+ WLAN_EID_INTERWORKING,
+ /* mesh ID can't happen here */
+ /* 60 GHz can't happen here right now */
+ };
+ noffset = ieee80211_ie_split(ie, ie_len,
+ before_vht, ARRAY_SIZE(before_vht),
+ offset);
if (end - pos < noffset - offset)
goto out_err;
memcpy(pos, ie + offset, noffset - offset);
pos += noffset - offset;
+ offset = noffset;
}
if (sband->vht_cap.vht_supported) {
@@ -1293,6 +1313,15 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
sband->vht_cap.cap);
}
+ /* add any remaining custom IEs */
+ if (ie && ie_len) {
+ noffset = ie_len;
+ if (end - pos < noffset - offset)
+ goto out_err;
+ memcpy(pos, ie + offset, noffset - offset);
+ pos += noffset - offset;
+ }
+
return pos - buffer;
out_err:
WARN_ONCE(1, "not enough space for preq IEs\n");
@@ -1370,7 +1399,6 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
enum ieee80211_band band, u32 *basic_rates)
{
struct ieee80211_supported_band *sband;
- struct ieee80211_rate *bitrates;
size_t num_rates;
u32 supp_rates, rate_flags;
int i, j, shift;
@@ -1382,7 +1410,6 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
if (WARN_ON(!sband))
return 1;
- bitrates = sband->bitrates;
num_rates = sband->n_bitrates;
supp_rates = 0;
for (i = 0; i < elems->supp_rates_len +
@@ -1431,6 +1458,44 @@ void ieee80211_stop_device(struct ieee80211_local *local)
drv_stop(local);
}
+static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local)
+{
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_chanctx *ctx;
+
+ /*
+ * We get here if during resume the device can't be restarted properly.
+ * We might also get here if this happens during HW reset, which is a
+ * slightly different situation and we need to drop all connections in
+ * the latter case.
+ *
+ * Ask cfg80211 to turn off all interfaces, this will result in more
+ * warnings but at least we'll then get into a clean stopped state.
+ */
+
+ local->resuming = false;
+ local->suspended = false;
+ local->started = false;
+
+ /* scheduled scan clearly can't be running any more, but tell
+ * cfg80211 and clear local state
+ */
+ ieee80211_sched_scan_end(local);
+
+ list_for_each_entry(sdata, &local->interfaces, list)
+ sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER;
+
+ /* Mark channel contexts as not being in the driver any more to avoid
+ * removing them from the driver during the shutdown process...
+ */
+ mutex_lock(&local->chanctx_mtx);
+ list_for_each_entry(ctx, &local->chanctx_list, list)
+ ctx->driver_present = false;
+ mutex_unlock(&local->chanctx_mtx);
+
+ cfg80211_shutdown_all_interfaces(local->hw.wiphy);
+}
+
static void ieee80211_assign_chanctx(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
@@ -1494,9 +1559,11 @@ int ieee80211_reconfig(struct ieee80211_local *local)
*/
res = drv_start(local);
if (res) {
- WARN(local->suspended, "Hardware became unavailable "
- "upon resume. This could be a software issue "
- "prior to suspend or a hardware issue.\n");
+ if (local->suspended)
+ WARN(1, "Hardware became unavailable upon resume. This could be a software issue prior to suspend or a hardware issue.\n");
+ else
+ WARN(1, "Hardware became unavailable during restart.\n");
+ ieee80211_handle_reconfig_failure(local);
return res;
}
@@ -1520,7 +1587,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
WARN_ON(local->resuming);
res = drv_add_interface(local, sdata);
if (WARN_ON(res)) {
- rcu_assign_pointer(local->monitor_sdata, NULL);
+ RCU_INIT_POINTER(local->monitor_sdata, NULL);
synchronize_net();
kfree(sdata);
}
@@ -1539,17 +1606,17 @@ int ieee80211_reconfig(struct ieee80211_local *local)
list_for_each_entry(ctx, &local->chanctx_list, list)
WARN_ON(drv_add_chanctx(local, ctx));
mutex_unlock(&local->chanctx_mtx);
- }
- list_for_each_entry(sdata, &local->interfaces, list) {
- if (!ieee80211_sdata_running(sdata))
- continue;
- ieee80211_assign_chanctx(local, sdata);
- }
+ list_for_each_entry(sdata, &local->interfaces, list) {
+ if (!ieee80211_sdata_running(sdata))
+ continue;
+ ieee80211_assign_chanctx(local, sdata);
+ }
- sdata = rtnl_dereference(local->monitor_sdata);
- if (sdata && ieee80211_sdata_running(sdata))
- ieee80211_assign_chanctx(local, sdata);
+ sdata = rtnl_dereference(local->monitor_sdata);
+ if (sdata && ieee80211_sdata_running(sdata))
+ ieee80211_assign_chanctx(local, sdata);
+ }
/* add STAs back */
mutex_lock(&local->sta_mtx);
@@ -1645,13 +1712,10 @@ int ieee80211_reconfig(struct ieee80211_local *local)
}
break;
case NL80211_IFTYPE_WDS:
- break;
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_MONITOR:
- /* ignore virtual */
- break;
case NL80211_IFTYPE_P2P_DEVICE:
- changed = BSS_CHANGED_IDLE;
+ /* nothing to do */
break;
case NL80211_IFTYPE_UNSPECIFIED:
case NUM_NL80211_IFTYPES:
@@ -1754,7 +1818,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
mutex_unlock(&local->mtx);
if (sched_scan_stopped)
- cfg80211_sched_scan_stopped(local->hw.wiphy);
+ cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy);
/*
* If this is for hw restart things are still running.
@@ -2268,11 +2332,11 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
ri.nss = status->vht_nss;
if (status->flag & RX_FLAG_40MHZ)
ri.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
- if (status->flag & RX_FLAG_80MHZ)
+ if (status->vht_flag & RX_VHT_FLAG_80MHZ)
ri.flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH;
- if (status->flag & RX_FLAG_80P80MHZ)
+ if (status->vht_flag & RX_VHT_FLAG_80P80MHZ)
ri.flags |= RATE_INFO_FLAGS_80P80_MHZ_WIDTH;
- if (status->flag & RX_FLAG_160MHZ)
+ if (status->vht_flag & RX_VHT_FLAG_160MHZ)
ri.flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH;
if (status->flag & RX_FLAG_SHORT_GI)
ri.flags |= RATE_INFO_FLAGS_SHORT_GI;
@@ -2771,3 +2835,121 @@ void ieee80211_recalc_dtim(struct ieee80211_local *local,
ps->dtim_count = dtim_count;
}
+
+int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
+ const struct cfg80211_chan_def *chandef,
+ enum ieee80211_chanctx_mode chanmode,
+ u8 radar_detect)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_sub_if_data *sdata_iter;
+ enum nl80211_iftype iftype = sdata->wdev.iftype;
+ int num[NUM_NL80211_IFTYPES];
+ struct ieee80211_chanctx *ctx;
+ int num_different_channels = 0;
+ int total = 1;
+
+ lockdep_assert_held(&local->chanctx_mtx);
+
+ if (WARN_ON(hweight32(radar_detect) > 1))
+ return -EINVAL;
+
+ if (WARN_ON(chandef && chanmode == IEEE80211_CHANCTX_SHARED &&
+ !chandef->chan))
+ return -EINVAL;
+
+ if (chandef)
+ num_different_channels = 1;
+
+ if (WARN_ON(iftype >= NUM_NL80211_IFTYPES))
+ return -EINVAL;
+
+ /* Always allow software iftypes */
+ if (local->hw.wiphy->software_iftypes & BIT(iftype)) {
+ if (radar_detect)
+ return -EINVAL;
+ return 0;
+ }
+
+ memset(num, 0, sizeof(num));
+
+ if (iftype != NL80211_IFTYPE_UNSPECIFIED)
+ num[iftype] = 1;
+
+ list_for_each_entry(ctx, &local->chanctx_list, list) {
+ if (ctx->conf.radar_enabled)
+ radar_detect |= BIT(ctx->conf.def.width);
+ if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) {
+ num_different_channels++;
+ continue;
+ }
+ if (chandef && chanmode == IEEE80211_CHANCTX_SHARED &&
+ cfg80211_chandef_compatible(chandef,
+ &ctx->conf.def))
+ continue;
+ num_different_channels++;
+ }
+
+ list_for_each_entry_rcu(sdata_iter, &local->interfaces, list) {
+ struct wireless_dev *wdev_iter;
+
+ wdev_iter = &sdata_iter->wdev;
+
+ if (sdata_iter == sdata ||
+ rcu_access_pointer(sdata_iter->vif.chanctx_conf) == NULL ||
+ local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype))
+ continue;
+
+ num[wdev_iter->iftype]++;
+ total++;
+ }
+
+ if (total == 1 && !radar_detect)
+ return 0;
+
+ return cfg80211_check_combinations(local->hw.wiphy,
+ num_different_channels,
+ radar_detect, num);
+}
+
+static void
+ieee80211_iter_max_chans(const struct ieee80211_iface_combination *c,
+ void *data)
+{
+ u32 *max_num_different_channels = data;
+
+ *max_num_different_channels = max(*max_num_different_channels,
+ c->num_different_channels);
+}
+
+int ieee80211_max_num_channels(struct ieee80211_local *local)
+{
+ struct ieee80211_sub_if_data *sdata;
+ int num[NUM_NL80211_IFTYPES] = {};
+ struct ieee80211_chanctx *ctx;
+ int num_different_channels = 0;
+ u8 radar_detect = 0;
+ u32 max_num_different_channels = 1;
+ int err;
+
+ lockdep_assert_held(&local->chanctx_mtx);
+
+ list_for_each_entry(ctx, &local->chanctx_list, list) {
+ num_different_channels++;
+
+ if (ctx->conf.radar_enabled)
+ radar_detect |= BIT(ctx->conf.def.width);
+ }
+
+ list_for_each_entry_rcu(sdata, &local->interfaces, list)
+ num[sdata->wdev.iftype]++;
+
+ err = cfg80211_iter_combinations(local->hw.wiphy,
+ num_different_channels, radar_detect,
+ num, ieee80211_iter_max_chans,
+ &max_num_different_channels);
+ if (err < 0)
+ return err;
+
+ return max_num_different_channels;
+}
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index d75f35c6e1a..9265adfdabf 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -129,9 +129,12 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
if (!vht_cap_ie || !sband->vht_cap.vht_supported)
return;
- /* A VHT STA must support 40 MHz */
- if (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40))
- return;
+ /*
+ * A VHT STA must support 40 MHz, but if we verify that here
+ * then we break a few things - some APs (e.g. Netgear R6300v2
+ * and others based on the BCM4360 chipset) will unset this
+ * capability bit when operating in 20 MHz.
+ */
vht_cap->vht_supported = true;
@@ -349,9 +352,9 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta)
sta->sta.rx_nss = max_t(u8, 1, ht_rx_nss);
}
-void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
- struct sta_info *sta, u8 opmode,
- enum ieee80211_band band, bool nss_only)
+u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta, u8 opmode,
+ enum ieee80211_band band, bool nss_only)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
@@ -363,7 +366,7 @@ void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
/* ignore - no support for BF yet */
if (opmode & IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF)
- return;
+ return 0;
nss = opmode & IEEE80211_OPMODE_NOTIF_RX_NSS_MASK;
nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT;
@@ -375,7 +378,7 @@ void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
}
if (nss_only)
- goto change;
+ return changed;
switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) {
case IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ:
@@ -398,7 +401,19 @@ void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
changed |= IEEE80211_RC_BW_CHANGED;
}
- change:
- if (changed)
+ return changed;
+}
+
+void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta, u8 opmode,
+ enum ieee80211_band band, bool nss_only)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
+
+ u32 changed = __ieee80211_vht_handle_opmode(sdata, sta, opmode,
+ band, nss_only);
+
+ if (changed > 0)
rate_control_rate_update(local, sband, sta, changed);
}
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 21448d629b1..9b3dcc20114 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -301,8 +301,7 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
}
-static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad,
- int encrypted)
+static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad)
{
__le16 mask_fc;
int a4_included, mgmt;
@@ -407,7 +406,10 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
if (info->control.hw_key &&
!(info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV) &&
- !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) {
+ !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) &&
+ !((info->control.hw_key->flags &
+ IEEE80211_KEY_FLAG_GENERATE_IV_MGMT) &&
+ ieee80211_is_mgmt(hdr->frame_control))) {
/*
* hwaccel has no need for preallocated room for CCMP
* header or MIC fields
@@ -456,7 +458,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
return 0;
pos += IEEE80211_CCMP_HDR_LEN;
- ccmp_special_blocks(skb, pn, b_0, aad, 0);
+ ccmp_special_blocks(skb, pn, b_0, aad);
ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len,
skb_put(skb, IEEE80211_CCMP_MIC_LEN));
@@ -495,7 +497,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
hdrlen = ieee80211_hdrlen(hdr->frame_control);
if (!ieee80211_is_data(hdr->frame_control) &&
- !ieee80211_is_robust_mgmt_frame(hdr))
+ !ieee80211_is_robust_mgmt_frame(skb))
return RX_CONTINUE;
data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN -
@@ -524,7 +526,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
u8 aad[2 * AES_BLOCK_SIZE];
u8 b_0[AES_BLOCK_SIZE];
/* hardware didn't decrypt/verify MIC */
- ccmp_special_blocks(skb, pn, b_0, aad, 1);
+ ccmp_special_blocks(skb, pn, b_0, aad);
if (ieee80211_aes_ccm_decrypt(
key->u.ccmp.tfm, b_0, aad,
diff --git a/net/mac802154/Kconfig b/net/mac802154/Kconfig
index b33dd76d430..1818a99b308 100644
--- a/net/mac802154/Kconfig
+++ b/net/mac802154/Kconfig
@@ -2,6 +2,10 @@ config MAC802154
tristate "Generic IEEE 802.15.4 Soft Networking Stack (mac802154)"
depends on IEEE802154
select CRC_CCITT
+ select CRYPTO_AUTHENC
+ select CRYPTO_CCM
+ select CRYPTO_CTR
+ select CRYPTO_AES
---help---
This option enables the hardware independent IEEE 802.15.4
networking stack for SoftMAC devices (the ones implementing
diff --git a/net/mac802154/Makefile b/net/mac802154/Makefile
index 57cf5d1a2e4..9723d6f3f3e 100644
--- a/net/mac802154/Makefile
+++ b/net/mac802154/Makefile
@@ -1,2 +1,5 @@
obj-$(CONFIG_MAC802154) += mac802154.o
-mac802154-objs := ieee802154_dev.o rx.o tx.o mac_cmd.o mib.o monitor.o wpan.o
+mac802154-objs := ieee802154_dev.o rx.o tx.o mac_cmd.o mib.o \
+ monitor.o wpan.o llsec.o
+
+ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c
index 52ae6646a41..2cf66d885e6 100644
--- a/net/mac802154/ieee802154_dev.c
+++ b/net/mac802154/ieee802154_dev.c
@@ -27,6 +27,7 @@
#include <net/netlink.h>
#include <linux/nl802154.h>
#include <net/mac802154.h>
+#include <net/ieee802154_netdev.h>
#include <net/route.h>
#include <net/wpan-phy.h>
@@ -35,9 +36,28 @@
int mac802154_slave_open(struct net_device *dev)
{
struct mac802154_sub_if_data *priv = netdev_priv(dev);
+ struct mac802154_sub_if_data *subif;
struct mac802154_priv *ipriv = priv->hw;
int res = 0;
+ ASSERT_RTNL();
+
+ if (priv->type == IEEE802154_DEV_WPAN) {
+ mutex_lock(&priv->hw->slaves_mtx);
+ list_for_each_entry(subif, &priv->hw->slaves, list) {
+ if (subif != priv && subif->type == priv->type &&
+ subif->running) {
+ mutex_unlock(&priv->hw->slaves_mtx);
+ return -EBUSY;
+ }
+ }
+ mutex_unlock(&priv->hw->slaves_mtx);
+ }
+
+ mutex_lock(&priv->hw->slaves_mtx);
+ priv->running = true;
+ mutex_unlock(&priv->hw->slaves_mtx);
+
if (ipriv->open_count++ == 0) {
res = ipriv->ops->start(&ipriv->hw);
WARN_ON(res);
@@ -46,7 +66,9 @@ int mac802154_slave_open(struct net_device *dev)
}
if (ipriv->ops->ieee_addr) {
- res = ipriv->ops->ieee_addr(&ipriv->hw, dev->dev_addr);
+ __le64 addr = ieee802154_devaddr_from_raw(dev->dev_addr);
+
+ res = ipriv->ops->ieee_addr(&ipriv->hw, addr);
WARN_ON(res);
if (res)
goto err;
@@ -66,8 +88,14 @@ int mac802154_slave_close(struct net_device *dev)
struct mac802154_sub_if_data *priv = netdev_priv(dev);
struct mac802154_priv *ipriv = priv->hw;
+ ASSERT_RTNL();
+
netif_stop_queue(dev);
+ mutex_lock(&priv->hw->slaves_mtx);
+ priv->running = false;
+ mutex_unlock(&priv->hw->slaves_mtx);
+
if (!--ipriv->open_count)
ipriv->ops->stop(&ipriv->hw);
@@ -165,6 +193,49 @@ err:
return ERR_PTR(err);
}
+static int mac802154_set_txpower(struct wpan_phy *phy, int db)
+{
+ struct mac802154_priv *priv = wpan_phy_priv(phy);
+
+ return priv->ops->set_txpower(&priv->hw, db);
+}
+
+static int mac802154_set_lbt(struct wpan_phy *phy, bool on)
+{
+ struct mac802154_priv *priv = wpan_phy_priv(phy);
+
+ return priv->ops->set_lbt(&priv->hw, on);
+}
+
+static int mac802154_set_cca_mode(struct wpan_phy *phy, u8 mode)
+{
+ struct mac802154_priv *priv = wpan_phy_priv(phy);
+
+ return priv->ops->set_cca_mode(&priv->hw, mode);
+}
+
+static int mac802154_set_cca_ed_level(struct wpan_phy *phy, s32 level)
+{
+ struct mac802154_priv *priv = wpan_phy_priv(phy);
+
+ return priv->ops->set_cca_ed_level(&priv->hw, level);
+}
+
+static int mac802154_set_csma_params(struct wpan_phy *phy, u8 min_be,
+ u8 max_be, u8 retries)
+{
+ struct mac802154_priv *priv = wpan_phy_priv(phy);
+
+ return priv->ops->set_csma_params(&priv->hw, min_be, max_be, retries);
+}
+
+static int mac802154_set_frame_retries(struct wpan_phy *phy, s8 retries)
+{
+ struct mac802154_priv *priv = wpan_phy_priv(phy);
+
+ return priv->ops->set_frame_retries(&priv->hw, retries);
+}
+
struct ieee802154_dev *
ieee802154_alloc_device(size_t priv_data_len, struct ieee802154_ops *ops)
{
@@ -242,6 +313,18 @@ int ieee802154_register_device(struct ieee802154_dev *dev)
priv->phy->add_iface = mac802154_add_iface;
priv->phy->del_iface = mac802154_del_iface;
+ if (priv->ops->set_txpower)
+ priv->phy->set_txpower = mac802154_set_txpower;
+ if (priv->ops->set_lbt)
+ priv->phy->set_lbt = mac802154_set_lbt;
+ if (priv->ops->set_cca_mode)
+ priv->phy->set_cca_mode = mac802154_set_cca_mode;
+ if (priv->ops->set_cca_ed_level)
+ priv->phy->set_cca_ed_level = mac802154_set_cca_ed_level;
+ if (priv->ops->set_csma_params)
+ priv->phy->set_csma_params = mac802154_set_csma_params;
+ if (priv->ops->set_frame_retries)
+ priv->phy->set_frame_retries = mac802154_set_frame_retries;
rc = wpan_phy_register(priv->phy);
if (rc < 0)
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
new file mode 100644
index 00000000000..1456f73b02b
--- /dev/null
+++ b/net/mac802154/llsec.c
@@ -0,0 +1,1070 @@
+/*
+ * Copyright (C) 2014 Fraunhofer ITWM
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Written by:
+ * Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
+ */
+
+#include <linux/err.h>
+#include <linux/bug.h>
+#include <linux/completion.h>
+#include <net/ieee802154.h>
+#include <crypto/algapi.h>
+
+#include "mac802154.h"
+#include "llsec.h"
+
+static void llsec_key_put(struct mac802154_llsec_key *key);
+static bool llsec_key_id_equal(const struct ieee802154_llsec_key_id *a,
+ const struct ieee802154_llsec_key_id *b);
+
+static void llsec_dev_free(struct mac802154_llsec_device *dev);
+
+void mac802154_llsec_init(struct mac802154_llsec *sec)
+{
+ memset(sec, 0, sizeof(*sec));
+
+ memset(&sec->params.default_key_source, 0xFF, IEEE802154_ADDR_LEN);
+
+ INIT_LIST_HEAD(&sec->table.security_levels);
+ INIT_LIST_HEAD(&sec->table.devices);
+ INIT_LIST_HEAD(&sec->table.keys);
+ hash_init(sec->devices_short);
+ hash_init(sec->devices_hw);
+ rwlock_init(&sec->lock);
+}
+
+void mac802154_llsec_destroy(struct mac802154_llsec *sec)
+{
+ struct ieee802154_llsec_seclevel *sl, *sn;
+ struct ieee802154_llsec_device *dev, *dn;
+ struct ieee802154_llsec_key_entry *key, *kn;
+
+ list_for_each_entry_safe(sl, sn, &sec->table.security_levels, list) {
+ struct mac802154_llsec_seclevel *msl;
+
+ msl = container_of(sl, struct mac802154_llsec_seclevel, level);
+ list_del(&sl->list);
+ kfree(msl);
+ }
+
+ list_for_each_entry_safe(dev, dn, &sec->table.devices, list) {
+ struct mac802154_llsec_device *mdev;
+
+ mdev = container_of(dev, struct mac802154_llsec_device, dev);
+ list_del(&dev->list);
+ llsec_dev_free(mdev);
+ }
+
+ list_for_each_entry_safe(key, kn, &sec->table.keys, list) {
+ struct mac802154_llsec_key *mkey;
+
+ mkey = container_of(key->key, struct mac802154_llsec_key, key);
+ list_del(&key->list);
+ llsec_key_put(mkey);
+ kfree(key);
+ }
+}
+
+
+
+int mac802154_llsec_get_params(struct mac802154_llsec *sec,
+ struct ieee802154_llsec_params *params)
+{
+ read_lock_bh(&sec->lock);
+ *params = sec->params;
+ read_unlock_bh(&sec->lock);
+
+ return 0;
+}
+
+int mac802154_llsec_set_params(struct mac802154_llsec *sec,
+ const struct ieee802154_llsec_params *params,
+ int changed)
+{
+ write_lock_bh(&sec->lock);
+
+ if (changed & IEEE802154_LLSEC_PARAM_ENABLED)
+ sec->params.enabled = params->enabled;
+ if (changed & IEEE802154_LLSEC_PARAM_FRAME_COUNTER)
+ sec->params.frame_counter = params->frame_counter;
+ if (changed & IEEE802154_LLSEC_PARAM_OUT_LEVEL)
+ sec->params.out_level = params->out_level;
+ if (changed & IEEE802154_LLSEC_PARAM_OUT_KEY)
+ sec->params.out_key = params->out_key;
+ if (changed & IEEE802154_LLSEC_PARAM_KEY_SOURCE)
+ sec->params.default_key_source = params->default_key_source;
+ if (changed & IEEE802154_LLSEC_PARAM_PAN_ID)
+ sec->params.pan_id = params->pan_id;
+ if (changed & IEEE802154_LLSEC_PARAM_HWADDR)
+ sec->params.hwaddr = params->hwaddr;
+ if (changed & IEEE802154_LLSEC_PARAM_COORD_HWADDR)
+ sec->params.coord_hwaddr = params->coord_hwaddr;
+ if (changed & IEEE802154_LLSEC_PARAM_COORD_SHORTADDR)
+ sec->params.coord_shortaddr = params->coord_shortaddr;
+
+ write_unlock_bh(&sec->lock);
+
+ return 0;
+}
+
+
+
+static struct mac802154_llsec_key*
+llsec_key_alloc(const struct ieee802154_llsec_key *template)
+{
+ const int authsizes[3] = { 4, 8, 16 };
+ struct mac802154_llsec_key *key;
+ int i;
+
+ key = kzalloc(sizeof(*key), GFP_KERNEL);
+ if (!key)
+ return NULL;
+
+ kref_init(&key->ref);
+ key->key = *template;
+
+ BUILD_BUG_ON(ARRAY_SIZE(authsizes) != ARRAY_SIZE(key->tfm));
+
+ for (i = 0; i < ARRAY_SIZE(key->tfm); i++) {
+ key->tfm[i] = crypto_alloc_aead("ccm(aes)", 0,
+ CRYPTO_ALG_ASYNC);
+ if (!key->tfm[i])
+ goto err_tfm;
+ if (crypto_aead_setkey(key->tfm[i], template->key,
+ IEEE802154_LLSEC_KEY_SIZE))
+ goto err_tfm;
+ if (crypto_aead_setauthsize(key->tfm[i], authsizes[i]))
+ goto err_tfm;
+ }
+
+ key->tfm0 = crypto_alloc_blkcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC);
+ if (!key->tfm0)
+ goto err_tfm;
+
+ if (crypto_blkcipher_setkey(key->tfm0, template->key,
+ IEEE802154_LLSEC_KEY_SIZE))
+ goto err_tfm0;
+
+ return key;
+
+err_tfm0:
+ crypto_free_blkcipher(key->tfm0);
+err_tfm:
+ for (i = 0; i < ARRAY_SIZE(key->tfm); i++)
+ if (key->tfm[i])
+ crypto_free_aead(key->tfm[i]);
+
+ kfree(key);
+ return NULL;
+}
+
+static void llsec_key_release(struct kref *ref)
+{
+ struct mac802154_llsec_key *key;
+ int i;
+
+ key = container_of(ref, struct mac802154_llsec_key, ref);
+
+ for (i = 0; i < ARRAY_SIZE(key->tfm); i++)
+ crypto_free_aead(key->tfm[i]);
+
+ crypto_free_blkcipher(key->tfm0);
+ kfree(key);
+}
+
+static struct mac802154_llsec_key*
+llsec_key_get(struct mac802154_llsec_key *key)
+{
+ kref_get(&key->ref);
+ return key;
+}
+
+static void llsec_key_put(struct mac802154_llsec_key *key)
+{
+ kref_put(&key->ref, llsec_key_release);
+}
+
+static bool llsec_key_id_equal(const struct ieee802154_llsec_key_id *a,
+ const struct ieee802154_llsec_key_id *b)
+{
+ if (a->mode != b->mode)
+ return false;
+
+ if (a->mode == IEEE802154_SCF_KEY_IMPLICIT)
+ return ieee802154_addr_equal(&a->device_addr, &b->device_addr);
+
+ if (a->id != b->id)
+ return false;
+
+ switch (a->mode) {
+ case IEEE802154_SCF_KEY_INDEX:
+ return true;
+ case IEEE802154_SCF_KEY_SHORT_INDEX:
+ return a->short_source == b->short_source;
+ case IEEE802154_SCF_KEY_HW_INDEX:
+ return a->extended_source == b->extended_source;
+ }
+
+ return false;
+}
+
+int mac802154_llsec_key_add(struct mac802154_llsec *sec,
+ const struct ieee802154_llsec_key_id *id,
+ const struct ieee802154_llsec_key *key)
+{
+ struct mac802154_llsec_key *mkey = NULL;
+ struct ieee802154_llsec_key_entry *pos, *new;
+
+ if (!(key->frame_types & (1 << IEEE802154_FC_TYPE_MAC_CMD)) &&
+ key->cmd_frame_ids)
+ return -EINVAL;
+
+ list_for_each_entry(pos, &sec->table.keys, list) {
+ if (llsec_key_id_equal(&pos->id, id))
+ return -EEXIST;
+
+ if (memcmp(pos->key->key, key->key,
+ IEEE802154_LLSEC_KEY_SIZE))
+ continue;
+
+ mkey = container_of(pos->key, struct mac802154_llsec_key, key);
+
+ /* Don't allow multiple instances of the same AES key to have
+ * different allowed frame types/command frame ids, as this is
+ * not possible in the 802.15.4 PIB.
+ */
+ if (pos->key->frame_types != key->frame_types ||
+ pos->key->cmd_frame_ids != key->cmd_frame_ids)
+ return -EEXIST;
+
+ break;
+ }
+
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
+
+ if (!mkey)
+ mkey = llsec_key_alloc(key);
+ else
+ mkey = llsec_key_get(mkey);
+
+ if (!mkey)
+ goto fail;
+
+ new->id = *id;
+ new->key = &mkey->key;
+
+ list_add_rcu(&new->list, &sec->table.keys);
+
+ return 0;
+
+fail:
+ kfree(new);
+ return -ENOMEM;
+}
+
+int mac802154_llsec_key_del(struct mac802154_llsec *sec,
+ const struct ieee802154_llsec_key_id *key)
+{
+ struct ieee802154_llsec_key_entry *pos;
+
+ list_for_each_entry(pos, &sec->table.keys, list) {
+ struct mac802154_llsec_key *mkey;
+
+ mkey = container_of(pos->key, struct mac802154_llsec_key, key);
+
+ if (llsec_key_id_equal(&pos->id, key)) {
+ list_del_rcu(&pos->list);
+ llsec_key_put(mkey);
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+
+
+static bool llsec_dev_use_shortaddr(__le16 short_addr)
+{
+ return short_addr != cpu_to_le16(IEEE802154_ADDR_UNDEF) &&
+ short_addr != cpu_to_le16(0xffff);
+}
+
+static u32 llsec_dev_hash_short(__le16 short_addr, __le16 pan_id)
+{
+ return ((__force u16) short_addr) << 16 | (__force u16) pan_id;
+}
+
+static u64 llsec_dev_hash_long(__le64 hwaddr)
+{
+ return (__force u64) hwaddr;
+}
+
+static struct mac802154_llsec_device*
+llsec_dev_find_short(struct mac802154_llsec *sec, __le16 short_addr,
+ __le16 pan_id)
+{
+ struct mac802154_llsec_device *dev;
+ u32 key = llsec_dev_hash_short(short_addr, pan_id);
+
+ hash_for_each_possible_rcu(sec->devices_short, dev, bucket_s, key) {
+ if (dev->dev.short_addr == short_addr &&
+ dev->dev.pan_id == pan_id)
+ return dev;
+ }
+
+ return NULL;
+}
+
+static struct mac802154_llsec_device*
+llsec_dev_find_long(struct mac802154_llsec *sec, __le64 hwaddr)
+{
+ struct mac802154_llsec_device *dev;
+ u64 key = llsec_dev_hash_long(hwaddr);
+
+ hash_for_each_possible_rcu(sec->devices_hw, dev, bucket_hw, key) {
+ if (dev->dev.hwaddr == hwaddr)
+ return dev;
+ }
+
+ return NULL;
+}
+
+static void llsec_dev_free(struct mac802154_llsec_device *dev)
+{
+ struct ieee802154_llsec_device_key *pos, *pn;
+ struct mac802154_llsec_device_key *devkey;
+
+ list_for_each_entry_safe(pos, pn, &dev->dev.keys, list) {
+ devkey = container_of(pos, struct mac802154_llsec_device_key,
+ devkey);
+
+ list_del(&pos->list);
+ kfree(devkey);
+ }
+
+ kfree(dev);
+}
+
+int mac802154_llsec_dev_add(struct mac802154_llsec *sec,
+ const struct ieee802154_llsec_device *dev)
+{
+ struct mac802154_llsec_device *entry;
+ u32 skey = llsec_dev_hash_short(dev->short_addr, dev->pan_id);
+ u64 hwkey = llsec_dev_hash_long(dev->hwaddr);
+
+ BUILD_BUG_ON(sizeof(hwkey) != IEEE802154_ADDR_LEN);
+
+ if ((llsec_dev_use_shortaddr(dev->short_addr) &&
+ llsec_dev_find_short(sec, dev->short_addr, dev->pan_id)) ||
+ llsec_dev_find_long(sec, dev->hwaddr))
+ return -EEXIST;
+
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->dev = *dev;
+ spin_lock_init(&entry->lock);
+ INIT_LIST_HEAD(&entry->dev.keys);
+
+ if (llsec_dev_use_shortaddr(dev->short_addr))
+ hash_add_rcu(sec->devices_short, &entry->bucket_s, skey);
+ else
+ INIT_HLIST_NODE(&entry->bucket_s);
+
+ hash_add_rcu(sec->devices_hw, &entry->bucket_hw, hwkey);
+ list_add_tail_rcu(&entry->dev.list, &sec->table.devices);
+
+ return 0;
+}
+
+static void llsec_dev_free_rcu(struct rcu_head *rcu)
+{
+ llsec_dev_free(container_of(rcu, struct mac802154_llsec_device, rcu));
+}
+
+int mac802154_llsec_dev_del(struct mac802154_llsec *sec, __le64 device_addr)
+{
+ struct mac802154_llsec_device *pos;
+
+ pos = llsec_dev_find_long(sec, device_addr);
+ if (!pos)
+ return -ENOENT;
+
+ hash_del_rcu(&pos->bucket_s);
+ hash_del_rcu(&pos->bucket_hw);
+ call_rcu(&pos->rcu, llsec_dev_free_rcu);
+
+ return 0;
+}
+
+
+
+static struct mac802154_llsec_device_key*
+llsec_devkey_find(struct mac802154_llsec_device *dev,
+ const struct ieee802154_llsec_key_id *key)
+{
+ struct ieee802154_llsec_device_key *devkey;
+
+ list_for_each_entry_rcu(devkey, &dev->dev.keys, list) {
+ if (!llsec_key_id_equal(key, &devkey->key_id))
+ continue;
+
+ return container_of(devkey, struct mac802154_llsec_device_key,
+ devkey);
+ }
+
+ return NULL;
+}
+
+int mac802154_llsec_devkey_add(struct mac802154_llsec *sec,
+ __le64 dev_addr,
+ const struct ieee802154_llsec_device_key *key)
+{
+ struct mac802154_llsec_device *dev;
+ struct mac802154_llsec_device_key *devkey;
+
+ dev = llsec_dev_find_long(sec, dev_addr);
+
+ if (!dev)
+ return -ENOENT;
+
+ if (llsec_devkey_find(dev, &key->key_id))
+ return -EEXIST;
+
+ devkey = kmalloc(sizeof(*devkey), GFP_KERNEL);
+ if (!devkey)
+ return -ENOMEM;
+
+ devkey->devkey = *key;
+ list_add_tail_rcu(&devkey->devkey.list, &dev->dev.keys);
+ return 0;
+}
+
+int mac802154_llsec_devkey_del(struct mac802154_llsec *sec,
+ __le64 dev_addr,
+ const struct ieee802154_llsec_device_key *key)
+{
+ struct mac802154_llsec_device *dev;
+ struct mac802154_llsec_device_key *devkey;
+
+ dev = llsec_dev_find_long(sec, dev_addr);
+
+ if (!dev)
+ return -ENOENT;
+
+ devkey = llsec_devkey_find(dev, &key->key_id);
+ if (!devkey)
+ return -ENOENT;
+
+ list_del_rcu(&devkey->devkey.list);
+ kfree_rcu(devkey, rcu);
+ return 0;
+}
+
+
+
+static struct mac802154_llsec_seclevel*
+llsec_find_seclevel(const struct mac802154_llsec *sec,
+ const struct ieee802154_llsec_seclevel *sl)
+{
+ struct ieee802154_llsec_seclevel *pos;
+
+ list_for_each_entry(pos, &sec->table.security_levels, list) {
+ if (pos->frame_type != sl->frame_type ||
+ (pos->frame_type == IEEE802154_FC_TYPE_MAC_CMD &&
+ pos->cmd_frame_id != sl->cmd_frame_id) ||
+ pos->device_override != sl->device_override ||
+ pos->sec_levels != sl->sec_levels)
+ continue;
+
+ return container_of(pos, struct mac802154_llsec_seclevel,
+ level);
+ }
+
+ return NULL;
+}
+
+int mac802154_llsec_seclevel_add(struct mac802154_llsec *sec,
+ const struct ieee802154_llsec_seclevel *sl)
+{
+ struct mac802154_llsec_seclevel *entry;
+
+ if (llsec_find_seclevel(sec, sl))
+ return -EEXIST;
+
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->level = *sl;
+
+ list_add_tail_rcu(&entry->level.list, &sec->table.security_levels);
+
+ return 0;
+}
+
+int mac802154_llsec_seclevel_del(struct mac802154_llsec *sec,
+ const struct ieee802154_llsec_seclevel *sl)
+{
+ struct mac802154_llsec_seclevel *pos;
+
+ pos = llsec_find_seclevel(sec, sl);
+ if (!pos)
+ return -ENOENT;
+
+ list_del_rcu(&pos->level.list);
+ kfree_rcu(pos, rcu);
+
+ return 0;
+}
+
+
+
+static int llsec_recover_addr(struct mac802154_llsec *sec,
+ struct ieee802154_addr *addr)
+{
+ __le16 caddr = sec->params.coord_shortaddr;
+ addr->pan_id = sec->params.pan_id;
+
+ if (caddr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) {
+ return -EINVAL;
+ } else if (caddr == cpu_to_le16(IEEE802154_ADDR_UNDEF)) {
+ addr->extended_addr = sec->params.coord_hwaddr;
+ addr->mode = IEEE802154_ADDR_LONG;
+ } else {
+ addr->short_addr = sec->params.coord_shortaddr;
+ addr->mode = IEEE802154_ADDR_SHORT;
+ }
+
+ return 0;
+}
+
+static struct mac802154_llsec_key*
+llsec_lookup_key(struct mac802154_llsec *sec,
+ const struct ieee802154_hdr *hdr,
+ const struct ieee802154_addr *addr,
+ struct ieee802154_llsec_key_id *key_id)
+{
+ struct ieee802154_addr devaddr = *addr;
+ u8 key_id_mode = hdr->sec.key_id_mode;
+ struct ieee802154_llsec_key_entry *key_entry;
+ struct mac802154_llsec_key *key;
+
+ if (key_id_mode == IEEE802154_SCF_KEY_IMPLICIT &&
+ devaddr.mode == IEEE802154_ADDR_NONE) {
+ if (hdr->fc.type == IEEE802154_FC_TYPE_BEACON) {
+ devaddr.extended_addr = sec->params.coord_hwaddr;
+ devaddr.mode = IEEE802154_ADDR_LONG;
+ } else if (llsec_recover_addr(sec, &devaddr) < 0) {
+ return NULL;
+ }
+ }
+
+ list_for_each_entry_rcu(key_entry, &sec->table.keys, list) {
+ const struct ieee802154_llsec_key_id *id = &key_entry->id;
+
+ if (!(key_entry->key->frame_types & BIT(hdr->fc.type)))
+ continue;
+
+ if (id->mode != key_id_mode)
+ continue;
+
+ if (key_id_mode == IEEE802154_SCF_KEY_IMPLICIT) {
+ if (ieee802154_addr_equal(&devaddr, &id->device_addr))
+ goto found;
+ } else {
+ if (id->id != hdr->sec.key_id)
+ continue;
+
+ if ((key_id_mode == IEEE802154_SCF_KEY_INDEX) ||
+ (key_id_mode == IEEE802154_SCF_KEY_SHORT_INDEX &&
+ id->short_source == hdr->sec.short_src) ||
+ (key_id_mode == IEEE802154_SCF_KEY_HW_INDEX &&
+ id->extended_source == hdr->sec.extended_src))
+ goto found;
+ }
+ }
+
+ return NULL;
+
+found:
+ key = container_of(key_entry->key, struct mac802154_llsec_key, key);
+ if (key_id)
+ *key_id = key_entry->id;
+ return llsec_key_get(key);
+}
+
+
+static void llsec_geniv(u8 iv[16], __le64 addr,
+ const struct ieee802154_sechdr *sec)
+{
+ __be64 addr_bytes = (__force __be64) swab64((__force u64) addr);
+ __be32 frame_counter = (__force __be32) swab32((__force u32) sec->frame_counter);
+
+ iv[0] = 1; /* L' = L - 1 = 1 */
+ memcpy(iv + 1, &addr_bytes, sizeof(addr_bytes));
+ memcpy(iv + 9, &frame_counter, sizeof(frame_counter));
+ iv[13] = sec->level;
+ iv[14] = 0;
+ iv[15] = 1;
+}
+
+static int
+llsec_do_encrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
+ const struct ieee802154_hdr *hdr,
+ struct mac802154_llsec_key *key)
+{
+ u8 iv[16];
+ struct scatterlist src;
+ struct blkcipher_desc req = {
+ .tfm = key->tfm0,
+ .info = iv,
+ .flags = 0,
+ };
+
+ llsec_geniv(iv, sec->params.hwaddr, &hdr->sec);
+ sg_init_one(&src, skb->data, skb->len);
+ return crypto_blkcipher_encrypt_iv(&req, &src, &src, skb->len);
+}
+
+static struct crypto_aead*
+llsec_tfm_by_len(struct mac802154_llsec_key *key, int authlen)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(key->tfm); i++)
+ if (crypto_aead_authsize(key->tfm[i]) == authlen)
+ return key->tfm[i];
+
+ BUG();
+}
+
+static int
+llsec_do_encrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec,
+ const struct ieee802154_hdr *hdr,
+ struct mac802154_llsec_key *key)
+{
+ u8 iv[16];
+ unsigned char *data;
+ int authlen, assoclen, datalen, rc;
+ struct scatterlist src, assoc[2], dst[2];
+ struct aead_request *req;
+
+ authlen = ieee802154_sechdr_authtag_len(&hdr->sec);
+ llsec_geniv(iv, sec->params.hwaddr, &hdr->sec);
+
+ req = aead_request_alloc(llsec_tfm_by_len(key, authlen), GFP_ATOMIC);
+ if (!req)
+ return -ENOMEM;
+
+ sg_init_table(assoc, 2);
+ sg_set_buf(&assoc[0], skb_mac_header(skb), skb->mac_len);
+ assoclen = skb->mac_len;
+
+ data = skb_mac_header(skb) + skb->mac_len;
+ datalen = skb_tail_pointer(skb) - data;
+
+ if (hdr->sec.level & IEEE802154_SCF_SECLEVEL_ENC) {
+ sg_set_buf(&assoc[1], data, 0);
+ } else {
+ sg_set_buf(&assoc[1], data, datalen);
+ assoclen += datalen;
+ datalen = 0;
+ }
+
+ sg_init_one(&src, data, datalen);
+
+ sg_init_table(dst, 2);
+ sg_set_buf(&dst[0], data, datalen);
+ sg_set_buf(&dst[1], skb_put(skb, authlen), authlen);
+
+ aead_request_set_callback(req, 0, NULL, NULL);
+ aead_request_set_assoc(req, assoc, assoclen);
+ aead_request_set_crypt(req, &src, dst, datalen, iv);
+
+ rc = crypto_aead_encrypt(req);
+
+ kfree(req);
+
+ return rc;
+}
+
+static int llsec_do_encrypt(struct sk_buff *skb,
+ const struct mac802154_llsec *sec,
+ const struct ieee802154_hdr *hdr,
+ struct mac802154_llsec_key *key)
+{
+ if (hdr->sec.level == IEEE802154_SCF_SECLEVEL_ENC)
+ return llsec_do_encrypt_unauth(skb, sec, hdr, key);
+ else
+ return llsec_do_encrypt_auth(skb, sec, hdr, key);
+}
+
+int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb)
+{
+ struct ieee802154_hdr hdr;
+ int rc, authlen, hlen;
+ struct mac802154_llsec_key *key;
+ u32 frame_ctr;
+
+ hlen = ieee802154_hdr_pull(skb, &hdr);
+
+ if (hlen < 0 || hdr.fc.type != IEEE802154_FC_TYPE_DATA)
+ return -EINVAL;
+
+ if (!hdr.fc.security_enabled || hdr.sec.level == 0) {
+ skb_push(skb, hlen);
+ return 0;
+ }
+
+ authlen = ieee802154_sechdr_authtag_len(&hdr.sec);
+
+ if (skb->len + hlen + authlen + IEEE802154_MFR_SIZE > IEEE802154_MTU)
+ return -EMSGSIZE;
+
+ rcu_read_lock();
+
+ read_lock_bh(&sec->lock);
+
+ if (!sec->params.enabled) {
+ rc = -EINVAL;
+ goto fail_read;
+ }
+
+ key = llsec_lookup_key(sec, &hdr, &hdr.dest, NULL);
+ if (!key) {
+ rc = -ENOKEY;
+ goto fail_read;
+ }
+
+ read_unlock_bh(&sec->lock);
+
+ write_lock_bh(&sec->lock);
+
+ frame_ctr = be32_to_cpu(sec->params.frame_counter);
+ hdr.sec.frame_counter = cpu_to_le32(frame_ctr);
+ if (frame_ctr == 0xFFFFFFFF) {
+ write_unlock_bh(&sec->lock);
+ llsec_key_put(key);
+ rc = -EOVERFLOW;
+ goto fail;
+ }
+
+ sec->params.frame_counter = cpu_to_be32(frame_ctr + 1);
+
+ write_unlock_bh(&sec->lock);
+
+ rcu_read_unlock();
+
+ skb->mac_len = ieee802154_hdr_push(skb, &hdr);
+ skb_reset_mac_header(skb);
+
+ rc = llsec_do_encrypt(skb, sec, &hdr, key);
+ llsec_key_put(key);
+
+ return rc;
+
+fail_read:
+ read_unlock_bh(&sec->lock);
+fail:
+ rcu_read_unlock();
+ return rc;
+}
+
+
+
+static struct mac802154_llsec_device*
+llsec_lookup_dev(struct mac802154_llsec *sec,
+ const struct ieee802154_addr *addr)
+{
+ struct ieee802154_addr devaddr = *addr;
+ struct mac802154_llsec_device *dev = NULL;
+
+ if (devaddr.mode == IEEE802154_ADDR_NONE &&
+ llsec_recover_addr(sec, &devaddr) < 0)
+ return NULL;
+
+ if (devaddr.mode == IEEE802154_ADDR_SHORT) {
+ u32 key = llsec_dev_hash_short(devaddr.short_addr,
+ devaddr.pan_id);
+
+ hash_for_each_possible_rcu(sec->devices_short, dev,
+ bucket_s, key) {
+ if (dev->dev.pan_id == devaddr.pan_id &&
+ dev->dev.short_addr == devaddr.short_addr)
+ return dev;
+ }
+ } else {
+ u64 key = llsec_dev_hash_long(devaddr.extended_addr);
+
+ hash_for_each_possible_rcu(sec->devices_hw, dev,
+ bucket_hw, key) {
+ if (dev->dev.hwaddr == devaddr.extended_addr)
+ return dev;
+ }
+ }
+
+ return NULL;
+}
+
+static int
+llsec_lookup_seclevel(const struct mac802154_llsec *sec,
+ u8 frame_type, u8 cmd_frame_id,
+ struct ieee802154_llsec_seclevel *rlevel)
+{
+ struct ieee802154_llsec_seclevel *level;
+
+ list_for_each_entry_rcu(level, &sec->table.security_levels, list) {
+ if (level->frame_type == frame_type &&
+ (frame_type != IEEE802154_FC_TYPE_MAC_CMD ||
+ level->cmd_frame_id == cmd_frame_id)) {
+ *rlevel = *level;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int
+llsec_do_decrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
+ const struct ieee802154_hdr *hdr,
+ struct mac802154_llsec_key *key, __le64 dev_addr)
+{
+ u8 iv[16];
+ unsigned char *data;
+ int datalen;
+ struct scatterlist src;
+ struct blkcipher_desc req = {
+ .tfm = key->tfm0,
+ .info = iv,
+ .flags = 0,
+ };
+
+ llsec_geniv(iv, dev_addr, &hdr->sec);
+ data = skb_mac_header(skb) + skb->mac_len;
+ datalen = skb_tail_pointer(skb) - data;
+
+ sg_init_one(&src, data, datalen);
+
+ return crypto_blkcipher_decrypt_iv(&req, &src, &src, datalen);
+}
+
+static int
+llsec_do_decrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec,
+ const struct ieee802154_hdr *hdr,
+ struct mac802154_llsec_key *key, __le64 dev_addr)
+{
+ u8 iv[16];
+ unsigned char *data;
+ int authlen, datalen, assoclen, rc;
+ struct scatterlist src, assoc[2];
+ struct aead_request *req;
+
+ authlen = ieee802154_sechdr_authtag_len(&hdr->sec);
+ llsec_geniv(iv, dev_addr, &hdr->sec);
+
+ req = aead_request_alloc(llsec_tfm_by_len(key, authlen), GFP_ATOMIC);
+ if (!req)
+ return -ENOMEM;
+
+ sg_init_table(assoc, 2);
+ sg_set_buf(&assoc[0], skb_mac_header(skb), skb->mac_len);
+ assoclen = skb->mac_len;
+
+ data = skb_mac_header(skb) + skb->mac_len;
+ datalen = skb_tail_pointer(skb) - data;
+
+ if (hdr->sec.level & IEEE802154_SCF_SECLEVEL_ENC) {
+ sg_set_buf(&assoc[1], data, 0);
+ } else {
+ sg_set_buf(&assoc[1], data, datalen - authlen);
+ assoclen += datalen - authlen;
+ data += datalen - authlen;
+ datalen = authlen;
+ }
+
+ sg_init_one(&src, data, datalen);
+
+ aead_request_set_callback(req, 0, NULL, NULL);
+ aead_request_set_assoc(req, assoc, assoclen);
+ aead_request_set_crypt(req, &src, &src, datalen, iv);
+
+ rc = crypto_aead_decrypt(req);
+
+ kfree(req);
+ skb_trim(skb, skb->len - authlen);
+
+ return rc;
+}
+
+static int
+llsec_do_decrypt(struct sk_buff *skb, const struct mac802154_llsec *sec,
+ const struct ieee802154_hdr *hdr,
+ struct mac802154_llsec_key *key, __le64 dev_addr)
+{
+ if (hdr->sec.level == IEEE802154_SCF_SECLEVEL_ENC)
+ return llsec_do_decrypt_unauth(skb, sec, hdr, key, dev_addr);
+ else
+ return llsec_do_decrypt_auth(skb, sec, hdr, key, dev_addr);
+}
+
+static int
+llsec_update_devkey_record(struct mac802154_llsec_device *dev,
+ const struct ieee802154_llsec_key_id *in_key)
+{
+ struct mac802154_llsec_device_key *devkey;
+
+ devkey = llsec_devkey_find(dev, in_key);
+
+ if (!devkey) {
+ struct mac802154_llsec_device_key *next;
+
+ next = kzalloc(sizeof(*devkey), GFP_ATOMIC);
+ if (!next)
+ return -ENOMEM;
+
+ next->devkey.key_id = *in_key;
+
+ spin_lock_bh(&dev->lock);
+
+ devkey = llsec_devkey_find(dev, in_key);
+ if (!devkey)
+ list_add_rcu(&next->devkey.list, &dev->dev.keys);
+ else
+ kfree(next);
+
+ spin_unlock_bh(&dev->lock);
+ }
+
+ return 0;
+}
+
+static int
+llsec_update_devkey_info(struct mac802154_llsec_device *dev,
+ const struct ieee802154_llsec_key_id *in_key,
+ u32 frame_counter)
+{
+ struct mac802154_llsec_device_key *devkey = NULL;
+
+ if (dev->dev.key_mode == IEEE802154_LLSEC_DEVKEY_RESTRICT) {
+ devkey = llsec_devkey_find(dev, in_key);
+ if (!devkey)
+ return -ENOENT;
+ }
+
+ if (dev->dev.key_mode == IEEE802154_LLSEC_DEVKEY_RECORD) {
+ int rc = llsec_update_devkey_record(dev, in_key);
+
+ if (rc < 0)
+ return rc;
+ }
+
+ spin_lock_bh(&dev->lock);
+
+ if ((!devkey && frame_counter < dev->dev.frame_counter) ||
+ (devkey && frame_counter < devkey->devkey.frame_counter)) {
+ spin_unlock_bh(&dev->lock);
+ return -EINVAL;
+ }
+
+ if (devkey)
+ devkey->devkey.frame_counter = frame_counter + 1;
+ else
+ dev->dev.frame_counter = frame_counter + 1;
+
+ spin_unlock_bh(&dev->lock);
+
+ return 0;
+}
+
+int mac802154_llsec_decrypt(struct mac802154_llsec *sec, struct sk_buff *skb)
+{
+ struct ieee802154_hdr hdr;
+ struct mac802154_llsec_key *key;
+ struct ieee802154_llsec_key_id key_id;
+ struct mac802154_llsec_device *dev;
+ struct ieee802154_llsec_seclevel seclevel;
+ int err;
+ __le64 dev_addr;
+ u32 frame_ctr;
+
+ if (ieee802154_hdr_peek(skb, &hdr) < 0)
+ return -EINVAL;
+ if (!hdr.fc.security_enabled)
+ return 0;
+ if (hdr.fc.version == 0)
+ return -EINVAL;
+
+ read_lock_bh(&sec->lock);
+ if (!sec->params.enabled) {
+ read_unlock_bh(&sec->lock);
+ return -EINVAL;
+ }
+ read_unlock_bh(&sec->lock);
+
+ rcu_read_lock();
+
+ key = llsec_lookup_key(sec, &hdr, &hdr.source, &key_id);
+ if (!key) {
+ err = -ENOKEY;
+ goto fail;
+ }
+
+ dev = llsec_lookup_dev(sec, &hdr.source);
+ if (!dev) {
+ err = -EINVAL;
+ goto fail_dev;
+ }
+
+ if (llsec_lookup_seclevel(sec, hdr.fc.type, 0, &seclevel) < 0) {
+ err = -EINVAL;
+ goto fail_dev;
+ }
+
+ if (!(seclevel.sec_levels & BIT(hdr.sec.level)) &&
+ (hdr.sec.level == 0 && seclevel.device_override &&
+ !dev->dev.seclevel_exempt)) {
+ err = -EINVAL;
+ goto fail_dev;
+ }
+
+ frame_ctr = le32_to_cpu(hdr.sec.frame_counter);
+
+ if (frame_ctr == 0xffffffff) {
+ err = -EOVERFLOW;
+ goto fail_dev;
+ }
+
+ err = llsec_update_devkey_info(dev, &key_id, frame_ctr);
+ if (err)
+ goto fail_dev;
+
+ dev_addr = dev->dev.hwaddr;
+
+ rcu_read_unlock();
+
+ err = llsec_do_decrypt(skb, sec, &hdr, key, dev_addr);
+ llsec_key_put(key);
+ return err;
+
+fail_dev:
+ llsec_key_put(key);
+fail:
+ rcu_read_unlock();
+ return err;
+}
diff --git a/net/mac802154/llsec.h b/net/mac802154/llsec.h
new file mode 100644
index 00000000000..950578e1d7b
--- /dev/null
+++ b/net/mac802154/llsec.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2014 Fraunhofer ITWM
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Written by:
+ * Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
+ */
+
+#ifndef MAC802154_LLSEC_H
+#define MAC802154_LLSEC_H
+
+#include <linux/slab.h>
+#include <linux/hashtable.h>
+#include <linux/crypto.h>
+#include <linux/kref.h>
+#include <linux/spinlock.h>
+#include <net/af_ieee802154.h>
+#include <net/ieee802154_netdev.h>
+
+struct mac802154_llsec_key {
+ struct ieee802154_llsec_key key;
+
+ /* one tfm for each authsize (4/8/16) */
+ struct crypto_aead *tfm[3];
+ struct crypto_blkcipher *tfm0;
+
+ struct kref ref;
+};
+
+struct mac802154_llsec_device_key {
+ struct ieee802154_llsec_device_key devkey;
+
+ struct rcu_head rcu;
+};
+
+struct mac802154_llsec_device {
+ struct ieee802154_llsec_device dev;
+
+ struct hlist_node bucket_s;
+ struct hlist_node bucket_hw;
+
+ /* protects dev.frame_counter and the elements of dev.keys */
+ spinlock_t lock;
+
+ struct rcu_head rcu;
+};
+
+struct mac802154_llsec_seclevel {
+ struct ieee802154_llsec_seclevel level;
+
+ struct rcu_head rcu;
+};
+
+struct mac802154_llsec {
+ struct ieee802154_llsec_params params;
+ struct ieee802154_llsec_table table;
+
+ DECLARE_HASHTABLE(devices_short, 6);
+ DECLARE_HASHTABLE(devices_hw, 6);
+
+ /* protects params, all other fields are fine with RCU */
+ rwlock_t lock;
+};
+
+void mac802154_llsec_init(struct mac802154_llsec *sec);
+void mac802154_llsec_destroy(struct mac802154_llsec *sec);
+
+int mac802154_llsec_get_params(struct mac802154_llsec *sec,
+ struct ieee802154_llsec_params *params);
+int mac802154_llsec_set_params(struct mac802154_llsec *sec,
+ const struct ieee802154_llsec_params *params,
+ int changed);
+
+int mac802154_llsec_key_add(struct mac802154_llsec *sec,
+ const struct ieee802154_llsec_key_id *id,
+ const struct ieee802154_llsec_key *key);
+int mac802154_llsec_key_del(struct mac802154_llsec *sec,
+ const struct ieee802154_llsec_key_id *key);
+
+int mac802154_llsec_dev_add(struct mac802154_llsec *sec,
+ const struct ieee802154_llsec_device *dev);
+int mac802154_llsec_dev_del(struct mac802154_llsec *sec,
+ __le64 device_addr);
+
+int mac802154_llsec_devkey_add(struct mac802154_llsec *sec,
+ __le64 dev_addr,
+ const struct ieee802154_llsec_device_key *key);
+int mac802154_llsec_devkey_del(struct mac802154_llsec *sec,
+ __le64 dev_addr,
+ const struct ieee802154_llsec_device_key *key);
+
+int mac802154_llsec_seclevel_add(struct mac802154_llsec *sec,
+ const struct ieee802154_llsec_seclevel *sl);
+int mac802154_llsec_seclevel_del(struct mac802154_llsec *sec,
+ const struct ieee802154_llsec_seclevel *sl);
+
+int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb);
+int mac802154_llsec_decrypt(struct mac802154_llsec *sec, struct sk_buff *skb);
+
+#endif /* MAC802154_LLSEC_H */
diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h
index d48422e2711..762a6f849c6 100644
--- a/net/mac802154/mac802154.h
+++ b/net/mac802154/mac802154.h
@@ -23,6 +23,12 @@
#ifndef MAC802154_H
#define MAC802154_H
+#include <linux/mutex.h>
+#include <net/mac802154.h>
+#include <net/ieee802154_netdev.h>
+
+#include "llsec.h"
+
/* mac802154 device private data */
struct mac802154_priv {
struct ieee802154_dev hw;
@@ -71,19 +77,30 @@ struct mac802154_sub_if_data {
struct net_device *dev;
int type;
+ bool running;
spinlock_t mib_lock;
__le16 pan_id;
__le16 short_addr;
+ __le64 extended_addr;
u8 chan;
u8 page;
+ struct ieee802154_mac_params mac_params;
+
/* MAC BSN field */
u8 bsn;
/* MAC DSN field */
u8 dsn;
+
+ /* protects sec from concurrent access by netlink. access by
+ * encrypt/decrypt/header_create safe without additional protection.
+ */
+ struct mutex sec_mtx;
+
+ struct mac802154_llsec sec;
};
#define mac802154_to_priv(_hw) container_of(_hw, struct mac802154_priv, hw)
@@ -106,12 +123,50 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,
u8 page, u8 chan);
/* MIB callbacks */
-void mac802154_dev_set_short_addr(struct net_device *dev, u16 val);
-u16 mac802154_dev_get_short_addr(const struct net_device *dev);
+void mac802154_dev_set_short_addr(struct net_device *dev, __le16 val);
+__le16 mac802154_dev_get_short_addr(const struct net_device *dev);
void mac802154_dev_set_ieee_addr(struct net_device *dev);
-u16 mac802154_dev_get_pan_id(const struct net_device *dev);
-void mac802154_dev_set_pan_id(struct net_device *dev, u16 val);
+__le16 mac802154_dev_get_pan_id(const struct net_device *dev);
+void mac802154_dev_set_pan_id(struct net_device *dev, __le16 val);
void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan);
u8 mac802154_dev_get_dsn(const struct net_device *dev);
+int mac802154_set_mac_params(struct net_device *dev,
+ const struct ieee802154_mac_params *params);
+void mac802154_get_mac_params(struct net_device *dev,
+ struct ieee802154_mac_params *params);
+
+int mac802154_get_params(struct net_device *dev,
+ struct ieee802154_llsec_params *params);
+int mac802154_set_params(struct net_device *dev,
+ const struct ieee802154_llsec_params *params,
+ int changed);
+
+int mac802154_add_key(struct net_device *dev,
+ const struct ieee802154_llsec_key_id *id,
+ const struct ieee802154_llsec_key *key);
+int mac802154_del_key(struct net_device *dev,
+ const struct ieee802154_llsec_key_id *id);
+
+int mac802154_add_dev(struct net_device *dev,
+ const struct ieee802154_llsec_device *llsec_dev);
+int mac802154_del_dev(struct net_device *dev, __le64 dev_addr);
+
+int mac802154_add_devkey(struct net_device *dev,
+ __le64 device_addr,
+ const struct ieee802154_llsec_device_key *key);
+int mac802154_del_devkey(struct net_device *dev,
+ __le64 device_addr,
+ const struct ieee802154_llsec_device_key *key);
+
+int mac802154_add_seclevel(struct net_device *dev,
+ const struct ieee802154_llsec_seclevel *sl);
+int mac802154_del_seclevel(struct net_device *dev,
+ const struct ieee802154_llsec_seclevel *sl);
+
+void mac802154_lock_table(struct net_device *dev);
+void mac802154_get_table(struct net_device *dev,
+ struct ieee802154_llsec_table **t);
+void mac802154_unlock_table(struct net_device *dev);
+
#endif /* MAC802154_H */
diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c
index a99910d4d52..bf809131eef 100644
--- a/net/mac802154/mac_cmd.c
+++ b/net/mac802154/mac_cmd.c
@@ -40,19 +40,41 @@ static int mac802154_mlme_start_req(struct net_device *dev,
u8 pan_coord, u8 blx,
u8 coord_realign)
{
- BUG_ON(addr->addr_type != IEEE802154_ADDR_SHORT);
+ struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+ int rc = 0;
+
+ BUG_ON(addr->mode != IEEE802154_ADDR_SHORT);
mac802154_dev_set_pan_id(dev, addr->pan_id);
mac802154_dev_set_short_addr(dev, addr->short_addr);
mac802154_dev_set_ieee_addr(dev);
mac802154_dev_set_page_channel(dev, page, channel);
+ if (ops->llsec) {
+ struct ieee802154_llsec_params params;
+ int changed = 0;
+
+ params.coord_shortaddr = addr->short_addr;
+ changed |= IEEE802154_LLSEC_PARAM_COORD_SHORTADDR;
+
+ params.pan_id = addr->pan_id;
+ changed |= IEEE802154_LLSEC_PARAM_PAN_ID;
+
+ params.hwaddr = ieee802154_devaddr_from_raw(dev->dev_addr);
+ changed |= IEEE802154_LLSEC_PARAM_HWADDR;
+
+ params.coord_hwaddr = params.hwaddr;
+ changed |= IEEE802154_LLSEC_PARAM_COORD_HWADDR;
+
+ rc = ops->llsec->set_params(dev, &params, changed);
+ }
+
/* FIXME: add validation for unused parameters to be sane
* for SoftMAC
*/
ieee802154_nl_start_confirm(dev, IEEE802154_SUCCESS);
- return 0;
+ return rc;
}
static struct wpan_phy *mac802154_get_phy(const struct net_device *dev)
@@ -64,6 +86,22 @@ static struct wpan_phy *mac802154_get_phy(const struct net_device *dev)
return to_phy(get_device(&priv->hw->phy->dev));
}
+static struct ieee802154_llsec_ops mac802154_llsec_ops = {
+ .get_params = mac802154_get_params,
+ .set_params = mac802154_set_params,
+ .add_key = mac802154_add_key,
+ .del_key = mac802154_del_key,
+ .add_dev = mac802154_add_dev,
+ .del_dev = mac802154_del_dev,
+ .add_devkey = mac802154_add_devkey,
+ .del_devkey = mac802154_del_devkey,
+ .add_seclevel = mac802154_add_seclevel,
+ .del_seclevel = mac802154_del_seclevel,
+ .lock_table = mac802154_lock_table,
+ .get_table = mac802154_get_table,
+ .unlock_table = mac802154_unlock_table,
+};
+
struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced = {
.get_phy = mac802154_get_phy,
};
@@ -74,4 +112,9 @@ struct ieee802154_mlme_ops mac802154_mlme_wpan = {
.get_pan_id = mac802154_dev_get_pan_id,
.get_short_addr = mac802154_dev_get_short_addr,
.get_dsn = mac802154_dev_get_dsn,
+
+ .llsec = &mac802154_llsec_ops,
+
+ .set_mac_params = mac802154_set_mac_params,
+ .get_mac_params = mac802154_get_mac_params,
};
diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c
index 8ded97cf1c3..15aa2f2b03a 100644
--- a/net/mac802154/mib.c
+++ b/net/mac802154/mib.c
@@ -24,6 +24,7 @@
#include <linux/if_arp.h>
#include <net/mac802154.h>
+#include <net/ieee802154_netdev.h>
#include <net/wpan-phy.h>
#include "mac802154.h"
@@ -62,8 +63,6 @@ static void hw_addr_notify(struct work_struct *work)
pr_debug("failed changed mask %lx\n", nw->changed);
kfree(nw);
-
- return;
}
static void set_hw_addr_filt(struct net_device *dev, unsigned long changed)
@@ -79,11 +78,9 @@ static void set_hw_addr_filt(struct net_device *dev, unsigned long changed)
work->dev = dev;
work->changed = changed;
queue_work(priv->hw->dev_workqueue, &work->work);
-
- return;
}
-void mac802154_dev_set_short_addr(struct net_device *dev, u16 val)
+void mac802154_dev_set_short_addr(struct net_device *dev, __le16 val)
{
struct mac802154_sub_if_data *priv = netdev_priv(dev);
@@ -100,10 +97,10 @@ void mac802154_dev_set_short_addr(struct net_device *dev, u16 val)
}
}
-u16 mac802154_dev_get_short_addr(const struct net_device *dev)
+__le16 mac802154_dev_get_short_addr(const struct net_device *dev)
{
struct mac802154_sub_if_data *priv = netdev_priv(dev);
- u16 ret;
+ __le16 ret;
BUG_ON(dev->type != ARPHRD_IEEE802154);
@@ -119,19 +116,19 @@ void mac802154_dev_set_ieee_addr(struct net_device *dev)
struct mac802154_sub_if_data *priv = netdev_priv(dev);
struct mac802154_priv *mac = priv->hw;
+ priv->extended_addr = ieee802154_devaddr_from_raw(dev->dev_addr);
+
if (mac->ops->set_hw_addr_filt &&
- memcmp(mac->hw.hw_filt.ieee_addr,
- dev->dev_addr, IEEE802154_ADDR_LEN)) {
- memcpy(mac->hw.hw_filt.ieee_addr,
- dev->dev_addr, IEEE802154_ADDR_LEN);
+ mac->hw.hw_filt.ieee_addr != priv->extended_addr) {
+ mac->hw.hw_filt.ieee_addr = priv->extended_addr;
set_hw_addr_filt(dev, IEEE802515_AFILT_IEEEADDR_CHANGED);
}
}
-u16 mac802154_dev_get_pan_id(const struct net_device *dev)
+__le16 mac802154_dev_get_pan_id(const struct net_device *dev)
{
struct mac802154_sub_if_data *priv = netdev_priv(dev);
- u16 ret;
+ __le16 ret;
BUG_ON(dev->type != ARPHRD_IEEE802154);
@@ -142,7 +139,7 @@ u16 mac802154_dev_get_pan_id(const struct net_device *dev)
return ret;
}
-void mac802154_dev_set_pan_id(struct net_device *dev, u16 val)
+void mac802154_dev_set_pan_id(struct net_device *dev, __le16 val)
{
struct mac802154_sub_if_data *priv = netdev_priv(dev);
@@ -216,3 +213,190 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan)
} else
mutex_unlock(&priv->hw->phy->pib_lock);
}
+
+
+int mac802154_get_params(struct net_device *dev,
+ struct ieee802154_llsec_params *params)
+{
+ struct mac802154_sub_if_data *priv = netdev_priv(dev);
+ int res;
+
+ BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+ mutex_lock(&priv->sec_mtx);
+ res = mac802154_llsec_get_params(&priv->sec, params);
+ mutex_unlock(&priv->sec_mtx);
+
+ return res;
+}
+
+int mac802154_set_params(struct net_device *dev,
+ const struct ieee802154_llsec_params *params,
+ int changed)
+{
+ struct mac802154_sub_if_data *priv = netdev_priv(dev);
+ int res;
+
+ BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+ mutex_lock(&priv->sec_mtx);
+ res = mac802154_llsec_set_params(&priv->sec, params, changed);
+ mutex_unlock(&priv->sec_mtx);
+
+ return res;
+}
+
+
+int mac802154_add_key(struct net_device *dev,
+ const struct ieee802154_llsec_key_id *id,
+ const struct ieee802154_llsec_key *key)
+{
+ struct mac802154_sub_if_data *priv = netdev_priv(dev);
+ int res;
+
+ BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+ mutex_lock(&priv->sec_mtx);
+ res = mac802154_llsec_key_add(&priv->sec, id, key);
+ mutex_unlock(&priv->sec_mtx);
+
+ return res;
+}
+
+int mac802154_del_key(struct net_device *dev,
+ const struct ieee802154_llsec_key_id *id)
+{
+ struct mac802154_sub_if_data *priv = netdev_priv(dev);
+ int res;
+
+ BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+ mutex_lock(&priv->sec_mtx);
+ res = mac802154_llsec_key_del(&priv->sec, id);
+ mutex_unlock(&priv->sec_mtx);
+
+ return res;
+}
+
+
+int mac802154_add_dev(struct net_device *dev,
+ const struct ieee802154_llsec_device *llsec_dev)
+{
+ struct mac802154_sub_if_data *priv = netdev_priv(dev);
+ int res;
+
+ BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+ mutex_lock(&priv->sec_mtx);
+ res = mac802154_llsec_dev_add(&priv->sec, llsec_dev);
+ mutex_unlock(&priv->sec_mtx);
+
+ return res;
+}
+
+int mac802154_del_dev(struct net_device *dev, __le64 dev_addr)
+{
+ struct mac802154_sub_if_data *priv = netdev_priv(dev);
+ int res;
+
+ BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+ mutex_lock(&priv->sec_mtx);
+ res = mac802154_llsec_dev_del(&priv->sec, dev_addr);
+ mutex_unlock(&priv->sec_mtx);
+
+ return res;
+}
+
+
+int mac802154_add_devkey(struct net_device *dev,
+ __le64 device_addr,
+ const struct ieee802154_llsec_device_key *key)
+{
+ struct mac802154_sub_if_data *priv = netdev_priv(dev);
+ int res;
+
+ BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+ mutex_lock(&priv->sec_mtx);
+ res = mac802154_llsec_devkey_add(&priv->sec, device_addr, key);
+ mutex_unlock(&priv->sec_mtx);
+
+ return res;
+}
+
+int mac802154_del_devkey(struct net_device *dev,
+ __le64 device_addr,
+ const struct ieee802154_llsec_device_key *key)
+{
+ struct mac802154_sub_if_data *priv = netdev_priv(dev);
+ int res;
+
+ BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+ mutex_lock(&priv->sec_mtx);
+ res = mac802154_llsec_devkey_del(&priv->sec, device_addr, key);
+ mutex_unlock(&priv->sec_mtx);
+
+ return res;
+}
+
+
+int mac802154_add_seclevel(struct net_device *dev,
+ const struct ieee802154_llsec_seclevel *sl)
+{
+ struct mac802154_sub_if_data *priv = netdev_priv(dev);
+ int res;
+
+ BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+ mutex_lock(&priv->sec_mtx);
+ res = mac802154_llsec_seclevel_add(&priv->sec, sl);
+ mutex_unlock(&priv->sec_mtx);
+
+ return res;
+}
+
+int mac802154_del_seclevel(struct net_device *dev,
+ const struct ieee802154_llsec_seclevel *sl)
+{
+ struct mac802154_sub_if_data *priv = netdev_priv(dev);
+ int res;
+
+ BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+ mutex_lock(&priv->sec_mtx);
+ res = mac802154_llsec_seclevel_del(&priv->sec, sl);
+ mutex_unlock(&priv->sec_mtx);
+
+ return res;
+}
+
+
+void mac802154_lock_table(struct net_device *dev)
+{
+ struct mac802154_sub_if_data *priv = netdev_priv(dev);
+
+ BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+ mutex_lock(&priv->sec_mtx);
+}
+
+void mac802154_get_table(struct net_device *dev,
+ struct ieee802154_llsec_table **t)
+{
+ struct mac802154_sub_if_data *priv = netdev_priv(dev);
+
+ BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+ *t = &priv->sec.table;
+}
+
+void mac802154_unlock_table(struct net_device *dev)
+{
+ struct mac802154_sub_if_data *priv = netdev_priv(dev);
+
+ BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+ mutex_unlock(&priv->sec_mtx);
+}
diff --git a/net/mac802154/monitor.c b/net/mac802154/monitor.c
index 434a26f76a8..a68230e2b25 100644
--- a/net/mac802154/monitor.c
+++ b/net/mac802154/monitor.c
@@ -70,7 +70,8 @@ void mac802154_monitors_rx(struct mac802154_priv *priv, struct sk_buff *skb)
rcu_read_lock();
list_for_each_entry_rcu(sdata, &priv->slaves, list) {
- if (sdata->type != IEEE802154_DEV_MONITOR)
+ if (sdata->type != IEEE802154_DEV_MONITOR ||
+ !netif_running(sdata->dev))
continue;
skb2 = skb_clone(skb, GFP_ATOMIC);
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index 38548ec2098..7f820a108a9 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -59,28 +59,28 @@ mac802154_subif_rx(struct ieee802154_dev *hw, struct sk_buff *skb, u8 lqi)
skb->protocol = htons(ETH_P_IEEE802154);
skb_reset_mac_header(skb);
- BUILD_BUG_ON(sizeof(struct ieee802154_mac_cb) > sizeof(skb->cb));
-
if (!(priv->hw.flags & IEEE802154_HW_OMIT_CKSUM)) {
u16 crc;
if (skb->len < 2) {
pr_debug("got invalid frame\n");
- goto out;
+ goto fail;
}
crc = crc_ccitt(0, skb->data, skb->len);
if (crc) {
pr_debug("CRC mismatch\n");
- goto out;
+ goto fail;
}
skb_trim(skb, skb->len - 2); /* CRC */
}
mac802154_monitors_rx(priv, skb);
mac802154_wpans_rx(priv, skb);
-out:
- dev_kfree_skb(skb);
+
return;
+
+fail:
+ kfree_skb(skb);
}
static void mac802154_rx_worker(struct work_struct *work)
diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c
index 372d8a222b9..3c3069fd697 100644
--- a/net/mac802154/wpan.c
+++ b/net/mac802154/wpan.c
@@ -35,33 +35,26 @@
#include "mac802154.h"
-static inline int mac802154_fetch_skb_u8(struct sk_buff *skb, u8 *val)
+static int mac802154_wpan_update_llsec(struct net_device *dev)
{
- if (unlikely(!pskb_may_pull(skb, 1)))
- return -EINVAL;
+ struct mac802154_sub_if_data *priv = netdev_priv(dev);
+ struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+ int rc = 0;
- *val = skb->data[0];
- skb_pull(skb, 1);
+ if (ops->llsec) {
+ struct ieee802154_llsec_params params;
+ int changed = 0;
- return 0;
-}
+ params.pan_id = priv->pan_id;
+ changed |= IEEE802154_LLSEC_PARAM_PAN_ID;
-static inline int mac802154_fetch_skb_u16(struct sk_buff *skb, u16 *val)
-{
- if (unlikely(!pskb_may_pull(skb, 2)))
- return -EINVAL;
-
- *val = skb->data[0] | (skb->data[1] << 8);
- skb_pull(skb, 2);
+ params.hwaddr = priv->extended_addr;
+ changed |= IEEE802154_LLSEC_PARAM_HWADDR;
- return 0;
-}
+ rc = ops->llsec->set_params(dev, &params, changed);
+ }
-static inline void mac802154_haddr_copy_swap(u8 *dest, const u8 *src)
-{
- int i;
- for (i = 0; i < IEEE802154_ADDR_LEN; i++)
- dest[IEEE802154_ADDR_LEN - i - 1] = src[i];
+ return rc;
}
static int
@@ -76,19 +69,25 @@ mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
switch (cmd) {
case SIOCGIFADDR:
- if (priv->pan_id == IEEE802154_PANID_BROADCAST ||
- priv->short_addr == IEEE802154_ADDR_BROADCAST) {
+ {
+ u16 pan_id, short_addr;
+
+ pan_id = le16_to_cpu(priv->pan_id);
+ short_addr = le16_to_cpu(priv->short_addr);
+ if (pan_id == IEEE802154_PANID_BROADCAST ||
+ short_addr == IEEE802154_ADDR_BROADCAST) {
err = -EADDRNOTAVAIL;
break;
}
sa->family = AF_IEEE802154;
sa->addr.addr_type = IEEE802154_ADDR_SHORT;
- sa->addr.pan_id = priv->pan_id;
- sa->addr.short_addr = priv->short_addr;
+ sa->addr.pan_id = pan_id;
+ sa->addr.short_addr = short_addr;
err = 0;
break;
+ }
case SIOCSIFADDR:
dev_warn(&dev->dev,
"Using DEBUGing ioctl SIOCSIFADDR isn't recommened!\n");
@@ -101,10 +100,10 @@ mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
break;
}
- priv->pan_id = sa->addr.pan_id;
- priv->short_addr = sa->addr.short_addr;
+ priv->pan_id = cpu_to_le16(sa->addr.pan_id);
+ priv->short_addr = cpu_to_le16(sa->addr.short_addr);
- err = 0;
+ err = mac802154_wpan_update_llsec(dev);
break;
}
@@ -122,193 +121,194 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p)
/* FIXME: validate addr */
memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
mac802154_dev_set_ieee_addr(dev);
- return 0;
+ return mac802154_wpan_update_llsec(dev);
}
-static int mac802154_header_create(struct sk_buff *skb,
- struct net_device *dev,
- unsigned short type,
- const void *_daddr,
- const void *_saddr,
- unsigned len)
+int mac802154_set_mac_params(struct net_device *dev,
+ const struct ieee802154_mac_params *params)
{
- const struct ieee802154_addr *saddr = _saddr;
- const struct ieee802154_addr *daddr = _daddr;
- struct ieee802154_addr dev_addr;
struct mac802154_sub_if_data *priv = netdev_priv(dev);
- int pos = 2;
- u8 head[MAC802154_FRAME_HARD_HEADER_LEN];
- u16 fc;
- if (!daddr)
- return -EINVAL;
+ mutex_lock(&priv->hw->slaves_mtx);
+ priv->mac_params = *params;
+ mutex_unlock(&priv->hw->slaves_mtx);
- head[pos++] = mac_cb(skb)->seq; /* DSN/BSN */
- fc = mac_cb_type(skb);
- if (mac_cb_is_ackreq(skb))
- fc |= IEEE802154_FC_ACK_REQ;
+ return 0;
+}
- if (!saddr) {
- spin_lock_bh(&priv->mib_lock);
+void mac802154_get_mac_params(struct net_device *dev,
+ struct ieee802154_mac_params *params)
+{
+ struct mac802154_sub_if_data *priv = netdev_priv(dev);
- if (priv->short_addr == IEEE802154_ADDR_BROADCAST ||
- priv->short_addr == IEEE802154_ADDR_UNDEF ||
- priv->pan_id == IEEE802154_PANID_BROADCAST) {
- dev_addr.addr_type = IEEE802154_ADDR_LONG;
- memcpy(dev_addr.hwaddr, dev->dev_addr,
- IEEE802154_ADDR_LEN);
- } else {
- dev_addr.addr_type = IEEE802154_ADDR_SHORT;
- dev_addr.short_addr = priv->short_addr;
- }
+ mutex_lock(&priv->hw->slaves_mtx);
+ *params = priv->mac_params;
+ mutex_unlock(&priv->hw->slaves_mtx);
+}
- dev_addr.pan_id = priv->pan_id;
- saddr = &dev_addr;
+static int mac802154_wpan_open(struct net_device *dev)
+{
+ int rc;
+ struct mac802154_sub_if_data *priv = netdev_priv(dev);
+ struct wpan_phy *phy = priv->hw->phy;
- spin_unlock_bh(&priv->mib_lock);
- }
+ rc = mac802154_slave_open(dev);
+ if (rc < 0)
+ return rc;
- if (daddr->addr_type != IEEE802154_ADDR_NONE) {
- fc |= (daddr->addr_type << IEEE802154_FC_DAMODE_SHIFT);
+ mutex_lock(&phy->pib_lock);
- head[pos++] = daddr->pan_id & 0xff;
- head[pos++] = daddr->pan_id >> 8;
+ if (phy->set_txpower) {
+ rc = phy->set_txpower(phy, priv->mac_params.transmit_power);
+ if (rc < 0)
+ goto out;
+ }
- if (daddr->addr_type == IEEE802154_ADDR_SHORT) {
- head[pos++] = daddr->short_addr & 0xff;
- head[pos++] = daddr->short_addr >> 8;
- } else {
- mac802154_haddr_copy_swap(head + pos, daddr->hwaddr);
- pos += IEEE802154_ADDR_LEN;
- }
+ if (phy->set_lbt) {
+ rc = phy->set_lbt(phy, priv->mac_params.lbt);
+ if (rc < 0)
+ goto out;
}
- if (saddr->addr_type != IEEE802154_ADDR_NONE) {
- fc |= (saddr->addr_type << IEEE802154_FC_SAMODE_SHIFT);
+ if (phy->set_cca_mode) {
+ rc = phy->set_cca_mode(phy, priv->mac_params.cca_mode);
+ if (rc < 0)
+ goto out;
+ }
- if ((saddr->pan_id == daddr->pan_id) &&
- (saddr->pan_id != IEEE802154_PANID_BROADCAST)) {
- /* PANID compression/intra PAN */
- fc |= IEEE802154_FC_INTRA_PAN;
- } else {
- head[pos++] = saddr->pan_id & 0xff;
- head[pos++] = saddr->pan_id >> 8;
- }
+ if (phy->set_cca_ed_level) {
+ rc = phy->set_cca_ed_level(phy, priv->mac_params.cca_ed_level);
+ if (rc < 0)
+ goto out;
+ }
- if (saddr->addr_type == IEEE802154_ADDR_SHORT) {
- head[pos++] = saddr->short_addr & 0xff;
- head[pos++] = saddr->short_addr >> 8;
- } else {
- mac802154_haddr_copy_swap(head + pos, saddr->hwaddr);
- pos += IEEE802154_ADDR_LEN;
- }
+ if (phy->set_csma_params) {
+ rc = phy->set_csma_params(phy, priv->mac_params.min_be,
+ priv->mac_params.max_be,
+ priv->mac_params.csma_retries);
+ if (rc < 0)
+ goto out;
}
- head[0] = fc;
- head[1] = fc >> 8;
+ if (phy->set_frame_retries) {
+ rc = phy->set_frame_retries(phy,
+ priv->mac_params.frame_retries);
+ if (rc < 0)
+ goto out;
+ }
- memcpy(skb_push(skb, pos), head, pos);
- skb_reset_mac_header(skb);
- skb->mac_len = pos;
+ mutex_unlock(&phy->pib_lock);
+ return 0;
- return pos;
+out:
+ mutex_unlock(&phy->pib_lock);
+ return rc;
}
-static int
-mac802154_header_parse(const struct sk_buff *skb, unsigned char *haddr)
+static int mac802154_set_header_security(struct mac802154_sub_if_data *priv,
+ struct ieee802154_hdr *hdr,
+ const struct ieee802154_mac_cb *cb)
{
- const u8 *hdr = skb_mac_header(skb);
- const u8 *tail = skb_tail_pointer(skb);
- struct ieee802154_addr *addr = (struct ieee802154_addr *)haddr;
- u16 fc;
- int da_type;
+ struct ieee802154_llsec_params params;
+ u8 level;
- if (hdr + 3 > tail)
- goto malformed;
+ mac802154_llsec_get_params(&priv->sec, &params);
- fc = hdr[0] | (hdr[1] << 8);
+ if (!params.enabled && cb->secen_override && cb->secen)
+ return -EINVAL;
+ if (!params.enabled ||
+ (cb->secen_override && !cb->secen) ||
+ !params.out_level)
+ return 0;
+ if (cb->seclevel_override && !cb->seclevel)
+ return -EINVAL;
- hdr += 3;
+ level = cb->seclevel_override ? cb->seclevel : params.out_level;
- da_type = IEEE802154_FC_DAMODE(fc);
- addr->addr_type = IEEE802154_FC_SAMODE(fc);
+ hdr->fc.security_enabled = 1;
+ hdr->sec.level = level;
+ hdr->sec.key_id_mode = params.out_key.mode;
+ if (params.out_key.mode == IEEE802154_SCF_KEY_SHORT_INDEX)
+ hdr->sec.short_src = params.out_key.short_source;
+ else if (params.out_key.mode == IEEE802154_SCF_KEY_HW_INDEX)
+ hdr->sec.extended_src = params.out_key.extended_source;
+ hdr->sec.key_id = params.out_key.id;
- switch (da_type) {
- case IEEE802154_ADDR_NONE:
- if (fc & IEEE802154_FC_INTRA_PAN)
- goto malformed;
- break;
- case IEEE802154_ADDR_LONG:
- if (fc & IEEE802154_FC_INTRA_PAN) {
- if (hdr + 2 > tail)
- goto malformed;
- addr->pan_id = hdr[0] | (hdr[1] << 8);
- hdr += 2;
- }
+ return 0;
+}
- if (hdr + IEEE802154_ADDR_LEN > tail)
- goto malformed;
+static int mac802154_header_create(struct sk_buff *skb,
+ struct net_device *dev,
+ unsigned short type,
+ const void *daddr,
+ const void *saddr,
+ unsigned len)
+{
+ struct ieee802154_hdr hdr;
+ struct mac802154_sub_if_data *priv = netdev_priv(dev);
+ struct ieee802154_mac_cb *cb = mac_cb(skb);
+ int hlen;
- hdr += IEEE802154_ADDR_LEN;
- break;
- case IEEE802154_ADDR_SHORT:
- if (fc & IEEE802154_FC_INTRA_PAN) {
- if (hdr + 2 > tail)
- goto malformed;
- addr->pan_id = hdr[0] | (hdr[1] << 8);
- hdr += 2;
- }
+ if (!daddr)
+ return -EINVAL;
- if (hdr + 2 > tail)
- goto malformed;
+ memset(&hdr.fc, 0, sizeof(hdr.fc));
+ hdr.fc.type = cb->type;
+ hdr.fc.security_enabled = cb->secen;
+ hdr.fc.ack_request = cb->ackreq;
+ hdr.seq = ieee802154_mlme_ops(dev)->get_dsn(dev);
- hdr += 2;
- break;
- default:
- goto malformed;
+ if (mac802154_set_header_security(priv, &hdr, cb) < 0)
+ return -EINVAL;
- }
+ if (!saddr) {
+ spin_lock_bh(&priv->mib_lock);
- switch (addr->addr_type) {
- case IEEE802154_ADDR_NONE:
- break;
- case IEEE802154_ADDR_LONG:
- if (!(fc & IEEE802154_FC_INTRA_PAN)) {
- if (hdr + 2 > tail)
- goto malformed;
- addr->pan_id = hdr[0] | (hdr[1] << 8);
- hdr += 2;
+ if (priv->short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST) ||
+ priv->short_addr == cpu_to_le16(IEEE802154_ADDR_UNDEF) ||
+ priv->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST)) {
+ hdr.source.mode = IEEE802154_ADDR_LONG;
+ hdr.source.extended_addr = priv->extended_addr;
+ } else {
+ hdr.source.mode = IEEE802154_ADDR_SHORT;
+ hdr.source.short_addr = priv->short_addr;
}
- if (hdr + IEEE802154_ADDR_LEN > tail)
- goto malformed;
+ hdr.source.pan_id = priv->pan_id;
- mac802154_haddr_copy_swap(addr->hwaddr, hdr);
- hdr += IEEE802154_ADDR_LEN;
- break;
- case IEEE802154_ADDR_SHORT:
- if (!(fc & IEEE802154_FC_INTRA_PAN)) {
- if (hdr + 2 > tail)
- goto malformed;
- addr->pan_id = hdr[0] | (hdr[1] << 8);
- hdr += 2;
- }
+ spin_unlock_bh(&priv->mib_lock);
+ } else {
+ hdr.source = *(const struct ieee802154_addr *)saddr;
+ }
- if (hdr + 2 > tail)
- goto malformed;
+ hdr.dest = *(const struct ieee802154_addr *)daddr;
- addr->short_addr = hdr[0] | (hdr[1] << 8);
- hdr += 2;
- break;
- default:
- goto malformed;
- }
+ hlen = ieee802154_hdr_push(skb, &hdr);
+ if (hlen < 0)
+ return -EINVAL;
- return sizeof(struct ieee802154_addr);
+ skb_reset_mac_header(skb);
+ skb->mac_len = hlen;
-malformed:
- pr_debug("malformed packet\n");
- return 0;
+ if (len > ieee802154_max_payload(&hdr))
+ return -EMSGSIZE;
+
+ return hlen;
+}
+
+static int
+mac802154_header_parse(const struct sk_buff *skb, unsigned char *haddr)
+{
+ struct ieee802154_hdr hdr;
+ struct ieee802154_addr *addr = (struct ieee802154_addr *)haddr;
+
+ if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) {
+ pr_debug("malformed packet\n");
+ return 0;
+ }
+
+ *addr = hdr.source;
+ return sizeof(*addr);
}
static netdev_tx_t
@@ -316,6 +316,7 @@ mac802154_wpan_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct mac802154_sub_if_data *priv;
u8 chan, page;
+ int rc;
priv = netdev_priv(dev);
@@ -331,6 +332,13 @@ mac802154_wpan_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
+ rc = mac802154_llsec_encrypt(&priv->sec, skb);
+ if (rc) {
+ pr_warn("encryption failed: %i\n", rc);
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+
skb->skb_iif = dev->ifindex;
dev->stats.tx_packets++;
dev->stats.tx_bytes += skb->len;
@@ -344,13 +352,22 @@ static struct header_ops mac802154_header_ops = {
};
static const struct net_device_ops mac802154_wpan_ops = {
- .ndo_open = mac802154_slave_open,
+ .ndo_open = mac802154_wpan_open,
.ndo_stop = mac802154_slave_close,
.ndo_start_xmit = mac802154_wpan_xmit,
.ndo_do_ioctl = mac802154_wpan_ioctl,
.ndo_set_mac_address = mac802154_wpan_mac_addr,
};
+static void mac802154_wpan_free(struct net_device *dev)
+{
+ struct mac802154_sub_if_data *priv = netdev_priv(dev);
+
+ mac802154_llsec_destroy(&priv->sec);
+
+ free_netdev(dev);
+}
+
void mac802154_wpan_setup(struct net_device *dev)
{
struct mac802154_sub_if_data *priv;
@@ -360,14 +377,14 @@ void mac802154_wpan_setup(struct net_device *dev)
dev->hard_header_len = MAC802154_FRAME_HARD_HEADER_LEN;
dev->header_ops = &mac802154_header_ops;
- dev->needed_tailroom = 2; /* FCS */
+ dev->needed_tailroom = 2 + 16; /* FCS + MIC */
dev->mtu = IEEE802154_MTU;
dev->tx_queue_len = 300;
dev->type = ARPHRD_IEEE802154;
dev->flags = IFF_NOARP | IFF_BROADCAST;
dev->watchdog_timeo = 0;
- dev->destructor = free_netdev;
+ dev->destructor = mac802154_wpan_free;
dev->netdev_ops = &mac802154_wpan_ops;
dev->ml_priv = &mac802154_mlme_wpan;
@@ -378,12 +395,21 @@ void mac802154_wpan_setup(struct net_device *dev)
priv->page = 0;
spin_lock_init(&priv->mib_lock);
+ mutex_init(&priv->sec_mtx);
get_random_bytes(&priv->bsn, 1);
get_random_bytes(&priv->dsn, 1);
- priv->pan_id = IEEE802154_PANID_BROADCAST;
- priv->short_addr = IEEE802154_ADDR_BROADCAST;
+ /* defaults per 802.15.4-2011 */
+ priv->mac_params.min_be = 3;
+ priv->mac_params.max_be = 5;
+ priv->mac_params.csma_retries = 4;
+ priv->mac_params.frame_retries = -1; /* for compatibility, actual default is 3 */
+
+ priv->pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST);
+ priv->short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
+
+ mac802154_llsec_init(&priv->sec);
}
static int mac802154_process_data(struct net_device *dev, struct sk_buff *skb)
@@ -392,15 +418,22 @@ static int mac802154_process_data(struct net_device *dev, struct sk_buff *skb)
}
static int
-mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb)
+mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb,
+ const struct ieee802154_hdr *hdr)
{
+ __le16 span, sshort;
+ int rc;
+
pr_debug("getting packet via slave interface %s\n", sdata->dev->name);
spin_lock_bh(&sdata->mib_lock);
- switch (mac_cb(skb)->da.addr_type) {
+ span = sdata->pan_id;
+ sshort = sdata->short_addr;
+
+ switch (mac_cb(skb)->dest.mode) {
case IEEE802154_ADDR_NONE:
- if (mac_cb(skb)->sa.addr_type != IEEE802154_ADDR_NONE)
+ if (mac_cb(skb)->dest.mode != IEEE802154_ADDR_NONE)
/* FIXME: check if we are PAN coordinator */
skb->pkt_type = PACKET_OTHERHOST;
else
@@ -408,23 +441,22 @@ mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb)
skb->pkt_type = PACKET_HOST;
break;
case IEEE802154_ADDR_LONG:
- if (mac_cb(skb)->da.pan_id != sdata->pan_id &&
- mac_cb(skb)->da.pan_id != IEEE802154_PANID_BROADCAST)
+ if (mac_cb(skb)->dest.pan_id != span &&
+ mac_cb(skb)->dest.pan_id != cpu_to_le16(IEEE802154_PANID_BROADCAST))
skb->pkt_type = PACKET_OTHERHOST;
- else if (!memcmp(mac_cb(skb)->da.hwaddr, sdata->dev->dev_addr,
- IEEE802154_ADDR_LEN))
+ else if (mac_cb(skb)->dest.extended_addr == sdata->extended_addr)
skb->pkt_type = PACKET_HOST;
else
skb->pkt_type = PACKET_OTHERHOST;
break;
case IEEE802154_ADDR_SHORT:
- if (mac_cb(skb)->da.pan_id != sdata->pan_id &&
- mac_cb(skb)->da.pan_id != IEEE802154_PANID_BROADCAST)
+ if (mac_cb(skb)->dest.pan_id != span &&
+ mac_cb(skb)->dest.pan_id != cpu_to_le16(IEEE802154_PANID_BROADCAST))
skb->pkt_type = PACKET_OTHERHOST;
- else if (mac_cb(skb)->da.short_addr == sdata->short_addr)
+ else if (mac_cb(skb)->dest.short_addr == sshort)
skb->pkt_type = PACKET_HOST;
- else if (mac_cb(skb)->da.short_addr ==
- IEEE802154_ADDR_BROADCAST)
+ else if (mac_cb(skb)->dest.short_addr ==
+ cpu_to_le16(IEEE802154_ADDR_BROADCAST))
skb->pkt_type = PACKET_BROADCAST;
else
skb->pkt_type = PACKET_OTHERHOST;
@@ -437,111 +469,108 @@ mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb)
skb->dev = sdata->dev;
+ rc = mac802154_llsec_decrypt(&sdata->sec, skb);
+ if (rc) {
+ pr_debug("decryption failed: %i\n", rc);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+
sdata->dev->stats.rx_packets++;
sdata->dev->stats.rx_bytes += skb->len;
- switch (mac_cb_type(skb)) {
+ switch (mac_cb(skb)->type) {
case IEEE802154_FC_TYPE_DATA:
return mac802154_process_data(sdata->dev, skb);
default:
pr_warn("ieee802154: bad frame received (type = %d)\n",
- mac_cb_type(skb));
+ mac_cb(skb)->type);
kfree_skb(skb);
return NET_RX_DROP;
}
}
-static int mac802154_parse_frame_start(struct sk_buff *skb)
+static void mac802154_print_addr(const char *name,
+ const struct ieee802154_addr *addr)
{
- u8 *head = skb->data;
- u16 fc;
-
- if (mac802154_fetch_skb_u16(skb, &fc) ||
- mac802154_fetch_skb_u8(skb, &(mac_cb(skb)->seq)))
- goto err;
-
- pr_debug("fc: %04x dsn: %02x\n", fc, head[2]);
-
- mac_cb(skb)->flags = IEEE802154_FC_TYPE(fc);
- mac_cb(skb)->sa.addr_type = IEEE802154_FC_SAMODE(fc);
- mac_cb(skb)->da.addr_type = IEEE802154_FC_DAMODE(fc);
-
- if (fc & IEEE802154_FC_INTRA_PAN)
- mac_cb(skb)->flags |= MAC_CB_FLAG_INTRAPAN;
+ if (addr->mode == IEEE802154_ADDR_NONE)
+ pr_debug("%s not present\n", name);
- if (mac_cb(skb)->da.addr_type != IEEE802154_ADDR_NONE) {
- if (mac802154_fetch_skb_u16(skb, &(mac_cb(skb)->da.pan_id)))
- goto err;
+ pr_debug("%s PAN ID: %04x\n", name, le16_to_cpu(addr->pan_id));
+ if (addr->mode == IEEE802154_ADDR_SHORT) {
+ pr_debug("%s is short: %04x\n", name,
+ le16_to_cpu(addr->short_addr));
+ } else {
+ u64 hw = swab64((__force u64) addr->extended_addr);
- /* source PAN id compression */
- if (mac_cb_is_intrapan(skb))
- mac_cb(skb)->sa.pan_id = mac_cb(skb)->da.pan_id;
+ pr_debug("%s is hardware: %8phC\n", name, &hw);
+ }
+}
- pr_debug("dest PAN addr: %04x\n", mac_cb(skb)->da.pan_id);
+static int mac802154_parse_frame_start(struct sk_buff *skb,
+ struct ieee802154_hdr *hdr)
+{
+ int hlen;
+ struct ieee802154_mac_cb *cb = mac_cb_init(skb);
- if (mac_cb(skb)->da.addr_type == IEEE802154_ADDR_SHORT) {
- u16 *da = &(mac_cb(skb)->da.short_addr);
+ hlen = ieee802154_hdr_pull(skb, hdr);
+ if (hlen < 0)
+ return -EINVAL;
- if (mac802154_fetch_skb_u16(skb, da))
- goto err;
+ skb->mac_len = hlen;
- pr_debug("destination address is short: %04x\n",
- mac_cb(skb)->da.short_addr);
- } else {
- if (!pskb_may_pull(skb, IEEE802154_ADDR_LEN))
- goto err;
+ pr_debug("fc: %04x dsn: %02x\n", le16_to_cpup((__le16 *)&hdr->fc),
+ hdr->seq);
- mac802154_haddr_copy_swap(mac_cb(skb)->da.hwaddr,
- skb->data);
- skb_pull(skb, IEEE802154_ADDR_LEN);
+ cb->type = hdr->fc.type;
+ cb->ackreq = hdr->fc.ack_request;
+ cb->secen = hdr->fc.security_enabled;
- pr_debug("destination address is hardware\n");
- }
- }
+ mac802154_print_addr("destination", &hdr->dest);
+ mac802154_print_addr("source", &hdr->source);
- if (mac_cb(skb)->sa.addr_type != IEEE802154_ADDR_NONE) {
- /* non PAN-compression, fetch source address id */
- if (!(mac_cb_is_intrapan(skb))) {
- u16 *sa_pan = &(mac_cb(skb)->sa.pan_id);
-
- if (mac802154_fetch_skb_u16(skb, sa_pan))
- goto err;
- }
+ cb->source = hdr->source;
+ cb->dest = hdr->dest;
- pr_debug("source PAN addr: %04x\n", mac_cb(skb)->da.pan_id);
+ if (hdr->fc.security_enabled) {
+ u64 key;
- if (mac_cb(skb)->sa.addr_type == IEEE802154_ADDR_SHORT) {
- u16 *sa = &(mac_cb(skb)->sa.short_addr);
+ pr_debug("seclevel %i\n", hdr->sec.level);
- if (mac802154_fetch_skb_u16(skb, sa))
- goto err;
+ switch (hdr->sec.key_id_mode) {
+ case IEEE802154_SCF_KEY_IMPLICIT:
+ pr_debug("implicit key\n");
+ break;
- pr_debug("source address is short: %04x\n",
- mac_cb(skb)->sa.short_addr);
- } else {
- if (!pskb_may_pull(skb, IEEE802154_ADDR_LEN))
- goto err;
+ case IEEE802154_SCF_KEY_INDEX:
+ pr_debug("key %02x\n", hdr->sec.key_id);
+ break;
- mac802154_haddr_copy_swap(mac_cb(skb)->sa.hwaddr,
- skb->data);
- skb_pull(skb, IEEE802154_ADDR_LEN);
+ case IEEE802154_SCF_KEY_SHORT_INDEX:
+ pr_debug("key %04x:%04x %02x\n",
+ le32_to_cpu(hdr->sec.short_src) >> 16,
+ le32_to_cpu(hdr->sec.short_src) & 0xffff,
+ hdr->sec.key_id);
+ break;
- pr_debug("source address is hardware\n");
+ case IEEE802154_SCF_KEY_HW_INDEX:
+ key = swab64((__force u64) hdr->sec.extended_src);
+ pr_debug("key source %8phC %02x\n", &key,
+ hdr->sec.key_id);
+ break;
}
}
return 0;
-err:
- return -EINVAL;
}
void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb)
{
int ret;
- struct sk_buff *sskb;
struct mac802154_sub_if_data *sdata;
+ struct ieee802154_hdr hdr;
- ret = mac802154_parse_frame_start(skb);
+ ret = mac802154_parse_frame_start(skb, &hdr);
if (ret) {
pr_debug("got invalid frame\n");
return;
@@ -549,12 +578,16 @@ void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb)
rcu_read_lock();
list_for_each_entry_rcu(sdata, &priv->slaves, list) {
- if (sdata->type != IEEE802154_DEV_WPAN)
+ if (sdata->type != IEEE802154_DEV_WPAN ||
+ !netif_running(sdata->dev))
continue;
- sskb = skb_clone(skb, GFP_ATOMIC);
- if (sskb)
- mac802154_subif_frame(sdata, sskb);
+ mac802154_subif_frame(sdata, skb, &hdr);
+ skb = NULL;
+ break;
}
rcu_read_unlock();
+
+ if (skb)
+ kfree_skb(skb);
}
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index 851cd880b0c..6b38d083e1c 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -33,6 +33,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
+ SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP |
SKB_GSO_MPLS)))
goto out;
diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig
index 44cd4f58adf..2f7f5c32c6f 100644
--- a/net/netfilter/ipset/Kconfig
+++ b/net/netfilter/ipset/Kconfig
@@ -61,6 +61,15 @@ config IP_SET_HASH_IP
To compile it as a module, choose M here. If unsure, say N.
+config IP_SET_HASH_IPMARK
+ tristate "hash:ip,mark set support"
+ depends on IP_SET
+ help
+ This option adds the hash:ip,mark set type support, by which one
+ can store IPv4/IPv6 address and mark pairs.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config IP_SET_HASH_IPPORT
tristate "hash:ip,port set support"
depends on IP_SET
diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile
index 44b2d38476f..231f10196cb 100644
--- a/net/netfilter/ipset/Makefile
+++ b/net/netfilter/ipset/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o
# hash types
obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o
+obj-$(CONFIG_IP_SET_HASH_IPMARK) += ip_set_hash_ipmark.o
obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o
obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o
obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index de770ec39e5..ec8114fae50 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -54,10 +54,10 @@ MODULE_DESCRIPTION("core IP set support");
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
/* When the nfnl mutex is held: */
-#define nfnl_dereference(p) \
+#define ip_set_dereference(p) \
rcu_dereference_protected(p, 1)
-#define nfnl_set(inst, id) \
- nfnl_dereference((inst)->ip_set_list)[id]
+#define ip_set(inst, id) \
+ ip_set_dereference((inst)->ip_set_list)[id]
/*
* The set types are implemented in modules and registered set types
@@ -271,10 +271,7 @@ ip_set_free(void *members)
{
pr_debug("%p: free with %s\n", members,
is_vmalloc_addr(members) ? "vfree" : "kfree");
- if (is_vmalloc_addr(members))
- vfree(members);
- else
- kfree(members);
+ kvfree(members);
}
EXPORT_SYMBOL_GPL(ip_set_free);
@@ -368,6 +365,8 @@ ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
if (tb[IPSET_ATTR_CADT_FLAGS])
cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+ if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
+ set->flags |= IPSET_CREATE_FLAG_FORCEADD;
for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
if (!add_extension(id, cadt_flags, tb))
continue;
@@ -510,7 +509,7 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
if (opt->dim < set->type->dimension ||
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
- return 0;
+ return -IPSET_ERR_TYPE_MISMATCH;
write_lock_bh(&set->lock);
ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
@@ -533,7 +532,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
if (opt->dim < set->type->dimension ||
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
- return 0;
+ return -IPSET_ERR_TYPE_MISMATCH;
write_lock_bh(&set->lock);
ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
@@ -640,7 +639,7 @@ ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index)
return IPSET_INVALID_ID;
nfnl_lock(NFNL_SUBSYS_IPSET);
- set = nfnl_set(inst, index);
+ set = ip_set(inst, index);
if (set)
__ip_set_get(set);
else
@@ -666,7 +665,7 @@ ip_set_nfnl_put(struct net *net, ip_set_id_t index)
nfnl_lock(NFNL_SUBSYS_IPSET);
if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */
- set = nfnl_set(inst, index);
+ set = ip_set(inst, index);
if (set != NULL)
__ip_set_put(set);
}
@@ -734,7 +733,7 @@ find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id)
*id = IPSET_INVALID_ID;
for (i = 0; i < inst->ip_set_max; i++) {
- set = nfnl_set(inst, i);
+ set = ip_set(inst, i);
if (set != NULL && STREQ(set->name, name)) {
*id = i;
break;
@@ -760,7 +759,7 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
*index = IPSET_INVALID_ID;
for (i = 0; i < inst->ip_set_max; i++) {
- s = nfnl_set(inst, i);
+ s = ip_set(inst, i);
if (s == NULL) {
if (*index == IPSET_INVALID_ID)
*index = i;
@@ -883,7 +882,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
if (!list)
goto cleanup;
/* nfnl mutex is held, both lists are valid */
- tmp = nfnl_dereference(inst->ip_set_list);
+ tmp = ip_set_dereference(inst->ip_set_list);
memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max);
rcu_assign_pointer(inst->ip_set_list, list);
/* Make sure all current packets have passed through */
@@ -900,7 +899,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
* Finally! Add our shiny new set to the list, and be done.
*/
pr_debug("create: '%s' created with index %u!\n", set->name, index);
- nfnl_set(inst, index) = set;
+ ip_set(inst, index) = set;
return ret;
@@ -925,10 +924,10 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
static void
ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index)
{
- struct ip_set *set = nfnl_set(inst, index);
+ struct ip_set *set = ip_set(inst, index);
pr_debug("set: %s\n", set->name);
- nfnl_set(inst, index) = NULL;
+ ip_set(inst, index) = NULL;
/* Must call it without holding any lock */
set->variant->destroy(set);
@@ -962,7 +961,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
read_lock_bh(&ip_set_ref_lock);
if (!attr[IPSET_ATTR_SETNAME]) {
for (i = 0; i < inst->ip_set_max; i++) {
- s = nfnl_set(inst, i);
+ s = ip_set(inst, i);
if (s != NULL && s->ref) {
ret = -IPSET_ERR_BUSY;
goto out;
@@ -970,7 +969,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
}
read_unlock_bh(&ip_set_ref_lock);
for (i = 0; i < inst->ip_set_max; i++) {
- s = nfnl_set(inst, i);
+ s = ip_set(inst, i);
if (s != NULL)
ip_set_destroy_set(inst, i);
}
@@ -1020,7 +1019,7 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
if (!attr[IPSET_ATTR_SETNAME]) {
for (i = 0; i < inst->ip_set_max; i++) {
- s = nfnl_set(inst, i);
+ s = ip_set(inst, i);
if (s != NULL)
ip_set_flush_set(s);
}
@@ -1074,7 +1073,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
for (i = 0; i < inst->ip_set_max; i++) {
- s = nfnl_set(inst, i);
+ s = ip_set(inst, i);
if (s != NULL && STREQ(s->name, name2)) {
ret = -IPSET_ERR_EXIST_SETNAME2;
goto out;
@@ -1134,8 +1133,8 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
write_lock_bh(&ip_set_ref_lock);
swap(from->ref, to->ref);
- nfnl_set(inst, from_id) = to;
- nfnl_set(inst, to_id) = from;
+ ip_set(inst, from_id) = to;
+ ip_set(inst, to_id) = from;
write_unlock_bh(&ip_set_ref_lock);
return 0;
@@ -1157,7 +1156,7 @@ ip_set_dump_done(struct netlink_callback *cb)
struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET];
if (cb->args[IPSET_CB_ARG0]) {
pr_debug("release set %s\n",
- nfnl_set(inst, cb->args[IPSET_CB_INDEX])->name);
+ ip_set(inst, cb->args[IPSET_CB_INDEX])->name);
__ip_set_put_byindex(inst,
(ip_set_id_t) cb->args[IPSET_CB_INDEX]);
}
@@ -1254,7 +1253,7 @@ dump_last:
dump_type, dump_flags, cb->args[IPSET_CB_INDEX]);
for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) {
index = (ip_set_id_t) cb->args[IPSET_CB_INDEX];
- set = nfnl_set(inst, index);
+ set = ip_set(inst, index);
if (set == NULL) {
if (dump_type == DUMP_ONE) {
ret = -ENOENT;
@@ -1332,7 +1331,7 @@ next_set:
release_refcount:
/* If there was an error or set is done, release set */
if (ret || !cb->args[IPSET_CB_ARG0]) {
- pr_debug("release set %s\n", nfnl_set(inst, index)->name);
+ pr_debug("release set %s\n", ip_set(inst, index)->name);
__ip_set_put_byindex(inst, index);
cb->args[IPSET_CB_ARG0] = 0;
}
@@ -1887,7 +1886,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
find_set_and_id(inst, req_get->set.name, &id);
req_get->set.index = id;
if (id != IPSET_INVALID_ID)
- req_get->family = nfnl_set(inst, id)->family;
+ req_get->family = ip_set(inst, id)->family;
nfnl_unlock(NFNL_SUBSYS_IPSET);
goto copy;
}
@@ -1901,7 +1900,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
goto done;
}
nfnl_lock(NFNL_SUBSYS_IPSET);
- set = nfnl_set(inst, req_get->set.index);
+ set = ip_set(inst, req_get->set.index);
strncpy(req_get->set.name, set ? set->name : "",
IPSET_MAXNAMELEN);
nfnl_unlock(NFNL_SUBSYS_IPSET);
@@ -1945,7 +1944,6 @@ ip_set_net_init(struct net *net)
return -ENOMEM;
inst->is_deleted = 0;
rcu_assign_pointer(inst->ip_set_list, list);
- pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
return 0;
}
@@ -1960,7 +1958,7 @@ ip_set_net_exit(struct net *net)
inst->is_deleted = 1; /* flag for ip_set_nfnl_put */
for (i = 0; i < inst->ip_set_max; i++) {
- set = nfnl_set(inst, i);
+ set = ip_set(inst, i);
if (set != NULL)
ip_set_destroy_set(inst, i);
}
@@ -1996,6 +1994,7 @@ ip_set_init(void)
nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
return ret;
}
+ pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL);
return 0;
}
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index be6932ad3a8..61c7fb05280 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -263,6 +263,9 @@ struct htype {
u32 maxelem; /* max elements in the hash */
u32 elements; /* current element (vs timeout) */
u32 initval; /* random jhash init value */
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ u32 markmask; /* markmask value for mark mask to store */
+#endif
struct timer_list gc; /* garbage collection when timeout enabled */
struct mtype_elem next; /* temporary storage for uadd */
#ifdef IP_SET_HASH_WITH_MULTI
@@ -454,6 +457,9 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b)
#ifdef IP_SET_HASH_WITH_NETMASK
x->netmask == y->netmask &&
#endif
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ x->markmask == y->markmask &&
+#endif
a->extensions == b->extensions;
}
@@ -627,6 +633,18 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
bool flag_exist = flags & IPSET_FLAG_EXIST;
u32 key, multi = 0;
+ if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set)) {
+ rcu_read_lock_bh();
+ t = rcu_dereference_bh(h->table);
+ key = HKEY(value, h->initval, t->htable_bits);
+ n = hbucket(t,key);
+ if (n->pos) {
+ /* Choosing the first entry in the array to replace */
+ j = 0;
+ goto reuse_slot;
+ }
+ rcu_read_unlock_bh();
+ }
if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem)
/* FIXME: when set is full, we slow down here */
mtype_expire(set, h, NLEN(set->family), set->dsize);
@@ -908,6 +926,10 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask))
goto nla_put_failure;
#endif
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask))
+ goto nla_put_failure;
+#endif
if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
goto nla_put_failure;
@@ -1016,6 +1038,9 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
struct nlattr *tb[], u32 flags)
{
u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ u32 markmask;
+#endif
u8 hbits;
#ifdef IP_SET_HASH_WITH_NETMASK
u8 netmask;
@@ -1026,6 +1051,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
return -IPSET_ERR_INVALID_FAMILY;
+
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ markmask = 0xffffffff;
+#endif
#ifdef IP_SET_HASH_WITH_NETMASK
netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
pr_debug("Create set %s with family %s\n",
@@ -1034,6 +1063,9 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK) ||
+#endif
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
@@ -1057,6 +1089,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
return -IPSET_ERR_INVALID_NETMASK;
}
#endif
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ if (tb[IPSET_ATTR_MARKMASK]) {
+ markmask = ntohl(nla_get_u32(tb[IPSET_ATTR_MARKMASK]));
+
+ if ((markmask > 4294967295u) || markmask == 0)
+ return -IPSET_ERR_INVALID_MARKMASK;
+ }
+#endif
hsize = sizeof(*h);
#ifdef IP_SET_HASH_WITH_NETS
@@ -1071,6 +1111,9 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
#ifdef IP_SET_HASH_WITH_NETMASK
h->netmask = netmask;
#endif
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ h->markmask = markmask;
+#endif
get_random_bytes(&h->initval, sizeof(h->initval));
set->timeout = IPSET_NO_TIMEOUT;
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index e65fc2423d5..dd40607f878 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -25,7 +25,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 Counters support */
-#define IPSET_TYPE_REV_MAX 2 /* Comments support */
+/* 2 Comments support */
+#define IPSET_TYPE_REV_MAX 3 /* Forceadd support */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
new file mode 100644
index 00000000000..4eff0a29725
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -0,0 +1,321 @@
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2013 Smoothwall Ltd. <vytas.dauksa@smoothwall.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the hash:ip,mark type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN 0
+#define IPSET_TYPE_REV_MAX 1 /* Forceadd support */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Vytas Dauksa <vytas.dauksa@smoothwall.net>");
+IP_SET_MODULE_DESC("hash:ip,mark", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:ip,mark");
+
+/* Type specific function prefix */
+#define HTYPE hash_ipmark
+#define IP_SET_HASH_WITH_MARKMASK
+
+/* IPv4 variant */
+
+/* Member elements */
+struct hash_ipmark4_elem {
+ __be32 ip;
+ __u32 mark;
+};
+
+/* Common functions */
+
+static inline bool
+hash_ipmark4_data_equal(const struct hash_ipmark4_elem *ip1,
+ const struct hash_ipmark4_elem *ip2,
+ u32 *multi)
+{
+ return ip1->ip == ip2->ip &&
+ ip1->mark == ip2->mark;
+}
+
+static bool
+hash_ipmark4_data_list(struct sk_buff *skb,
+ const struct hash_ipmark4_elem *data)
+{
+ if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
+ nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return 1;
+}
+
+static inline void
+hash_ipmark4_data_next(struct hash_ipmark4_elem *next,
+ const struct hash_ipmark4_elem *d)
+{
+ next->ip = d->ip;
+}
+
+#define MTYPE hash_ipmark4
+#define PF 4
+#define HOST_MASK 32
+#define HKEY_DATALEN sizeof(struct hash_ipmark4_elem)
+#include "ip_set_hash_gen.h"
+
+static int
+hash_ipmark4_kadt(struct ip_set *set, const struct sk_buff *skb,
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+ const struct hash_ipmark *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ipmark4_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+ e.mark = skb->mark;
+ e.mark &= h->markmask;
+
+ ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+ const struct hash_ipmark *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ipmark4_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ u32 ip, ip_to = 0;
+ int ret;
+
+ if (unlikely(!tb[IPSET_ATTR_IP] ||
+ !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ return -IPSET_ERR_PROTOCOL;
+
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+ ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) ||
+ ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+
+ e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK]));
+ e.mark &= h->markmask;
+
+ if (adt == IPSET_TEST ||
+ !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) {
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ return ip_set_eexist(ret, flags) ? 0 : ret;
+ }
+
+ ip_to = ip = ntohl(e.ip);
+ if (tb[IPSET_ATTR_IP_TO]) {
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+ if (ret)
+ return ret;
+ if (ip > ip_to)
+ swap(ip, ip_to);
+ } else if (tb[IPSET_ATTR_CIDR]) {
+ u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+ if (!cidr || cidr > 32)
+ return -IPSET_ERR_INVALID_CIDR;
+ ip_set_mask_from_to(ip, ip_to, cidr);
+ }
+
+ if (retried)
+ ip = ntohl(h->next.ip);
+ for (; !before(ip_to, ip); ip++) {
+ e.ip = htonl(ip);
+ ret = adtfn(set, &e, &ext, &ext, flags);
+
+ if (ret && !ip_set_eexist(ret, flags))
+ return ret;
+ else
+ ret = 0;
+ }
+ return ret;
+}
+
+/* IPv6 variant */
+
+struct hash_ipmark6_elem {
+ union nf_inet_addr ip;
+ __u32 mark;
+};
+
+/* Common functions */
+
+static inline bool
+hash_ipmark6_data_equal(const struct hash_ipmark6_elem *ip1,
+ const struct hash_ipmark6_elem *ip2,
+ u32 *multi)
+{
+ return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
+ ip1->mark == ip2->mark;
+}
+
+static bool
+hash_ipmark6_data_list(struct sk_buff *skb,
+ const struct hash_ipmark6_elem *data)
+{
+ if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) ||
+ nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return 1;
+}
+
+static inline void
+hash_ipmark6_data_next(struct hash_ipmark4_elem *next,
+ const struct hash_ipmark6_elem *d)
+{
+}
+
+#undef MTYPE
+#undef PF
+#undef HOST_MASK
+#undef HKEY_DATALEN
+
+#define MTYPE hash_ipmark6
+#define PF 6
+#define HOST_MASK 128
+#define HKEY_DATALEN sizeof(struct hash_ipmark6_elem)
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
+
+
+static int
+hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb,
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+ const struct hash_ipmark *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ipmark6_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+ e.mark = skb->mark;
+ e.mark &= h->markmask;
+
+ ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+ const struct hash_ipmark *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_ipmark6_elem e = { };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ int ret;
+
+ if (unlikely(!tb[IPSET_ATTR_IP] ||
+ !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ tb[IPSET_ATTR_IP_TO] ||
+ tb[IPSET_ATTR_CIDR]))
+ return -IPSET_ERR_PROTOCOL;
+
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
+ ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+
+ e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK]));
+ e.mark &= h->markmask;
+
+ if (adt == IPSET_TEST) {
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ return ip_set_eexist(ret, flags) ? 0 : ret;
+ }
+
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ if (ret && !ip_set_eexist(ret, flags))
+ return ret;
+ else
+ ret = 0;
+
+ return ret;
+}
+
+static struct ip_set_type hash_ipmark_type __read_mostly = {
+ .name = "hash:ip,mark",
+ .protocol = IPSET_PROTOCOL,
+ .features = IPSET_TYPE_IP | IPSET_TYPE_MARK,
+ .dimension = IPSET_DIM_TWO,
+ .family = NFPROTO_UNSPEC,
+ .revision_min = IPSET_TYPE_REV_MIN,
+ .revision_max = IPSET_TYPE_REV_MAX,
+ .create = hash_ipmark_create,
+ .create_policy = {
+ [IPSET_ATTR_MARKMASK] = { .type = NLA_U32 },
+ [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
+ [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
+ [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
+ [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
+ },
+ .adt_policy = {
+ [IPSET_ATTR_IP] = { .type = NLA_NESTED },
+ [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
+ [IPSET_ATTR_MARK] = { .type = NLA_U32 },
+ [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
+ [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
+ [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
+ [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ },
+ .me = THIS_MODULE,
+};
+
+static int __init
+hash_ipmark_init(void)
+{
+ return ip_set_type_register(&hash_ipmark_type);
+}
+
+static void __exit
+hash_ipmark_fini(void)
+{
+ ip_set_type_unregister(&hash_ipmark_type);
+}
+
+module_init(hash_ipmark_init);
+module_exit(hash_ipmark_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 525a595dd1f..7597b82a8b0 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -27,7 +27,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 SCTP and UDPLITE support added */
/* 2 Counters support added */
-#define IPSET_TYPE_REV_MAX 3 /* Comments support added */
+/* 3 Comments support added */
+#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index f5636631466..672655ffd57 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -27,7 +27,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 SCTP and UDPLITE support added */
/* 2 Counters support added */
-#define IPSET_TYPE_REV_MAX 3 /* Comments support added */
+/* 3 Comments support added */
+#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 5d87fe8a41f..7308d84f927 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -29,7 +29,8 @@
/* 2 Range as input support for IPv4 added */
/* 3 nomatch flag support added */
/* 4 Counters support added */
-#define IPSET_TYPE_REV_MAX 5 /* Comments support added */
+/* 5 Comments support added */
+#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 8295cf4f9fd..4c7d495783a 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -26,7 +26,8 @@
/* 1 Range as input support for IPv4 added */
/* 2 nomatch flag support added */
/* 3 Counters support added */
-#define IPSET_TYPE_REV_MAX 4 /* Comments support added */
+/* 4 Comments support added */
+#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index b827a0f1f35..db2606805b3 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -27,7 +27,8 @@
/* 1 nomatch flag support added */
/* 2 /0 support added */
/* 3 Counters support added */
-#define IPSET_TYPE_REV_MAX 4 /* Comments support added */
+/* 4 Comments support added */
+#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index 6226803fc49..3e99987e4bf 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -24,7 +24,7 @@
#include <linux/netfilter/ipset/ip_set_hash.h>
#define IPSET_TYPE_REV_MIN 0
-#define IPSET_TYPE_REV_MAX 0
+#define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
@@ -112,10 +112,10 @@ hash_netnet4_data_list(struct sk_buff *skb,
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -334,10 +334,10 @@ hash_netnet6_data_list(struct sk_buff *skb,
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 7097fb0141b..1c645fbd09c 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -28,7 +28,8 @@
/* 2 Range as input support for IPv4 added */
/* 3 nomatch flag support added */
/* 4 Counters support added */
-#define IPSET_TYPE_REV_MAX 5 /* Comments support added */
+/* 5 Comments support added */
+#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 703d1192a6a..c0d2ba73f8b 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -25,7 +25,8 @@
#include <linux/netfilter/ipset/ip_set_hash.h>
#define IPSET_TYPE_REV_MIN 0
-#define IPSET_TYPE_REV_MAX 0 /* Comments support added */
+/* 0 Comments support added */
+#define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c
index 4f29fa97044..04d15fdc99e 100644
--- a/net/netfilter/ipset/pfxlen.c
+++ b/net/netfilter/ipset/pfxlen.c
@@ -7,8 +7,8 @@
#define E(a, b, c, d) \
{.ip6 = { \
- __constant_htonl(a), __constant_htonl(b), \
- __constant_htonl(c), __constant_htonl(d), \
+ htonl(a), htonl(b), \
+ htonl(c), htonl(d), \
} }
/*
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index a8eb0a89326..610e19c0e13 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -797,7 +797,6 @@ static void ip_vs_conn_expire(unsigned long data)
ip_vs_control_del(cp);
if (cp->flags & IP_VS_CONN_F_NFCT) {
- ip_vs_conn_drop_conntrack(cp);
/* Do not access conntracks during subsys cleanup
* because nf_conntrack_find_get can not be used after
* conntrack cleanup for the net.
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 4f26ee46b51..e6836755c45 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -97,7 +97,7 @@ const char *ip_vs_proto_name(unsigned int proto)
return "ICMPv6";
#endif
default:
- sprintf(buf, "IP_%d", proto);
+ sprintf(buf, "IP_%u", proto);
return buf;
}
}
@@ -1392,15 +1392,19 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
if (ipip) {
__be32 info = ic->un.gateway;
+ __u8 type = ic->type;
+ __u8 code = ic->code;
/* Update the MTU */
if (ic->type == ICMP_DEST_UNREACH &&
ic->code == ICMP_FRAG_NEEDED) {
struct ip_vs_dest *dest = cp->dest;
u32 mtu = ntohs(ic->un.frag.mtu);
+ __be16 frag_off = cih->frag_off;
/* Strip outer IP and ICMP, go to IPIP header */
- __skb_pull(skb, ihl + sizeof(_icmph));
+ if (pskb_pull(skb, ihl + sizeof(_icmph)) == NULL)
+ goto ignore_ipip;
offset2 -= ihl + sizeof(_icmph);
skb_reset_network_header(skb);
IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n",
@@ -1408,7 +1412,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
ipv4_update_pmtu(skb, dev_net(skb->dev),
mtu, 0, 0, 0, 0);
/* Client uses PMTUD? */
- if (!(cih->frag_off & htons(IP_DF)))
+ if (!(frag_off & htons(IP_DF)))
goto ignore_ipip;
/* Prefer the resulting PMTU */
if (dest) {
@@ -1427,12 +1431,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
/* Strip outer IP, ICMP and IPIP, go to IP header of
* original request.
*/
- __skb_pull(skb, offset2);
+ if (pskb_pull(skb, offset2) == NULL)
+ goto ignore_ipip;
skb_reset_network_header(skb);
IP_VS_DBG(12, "Sending ICMP for %pI4->%pI4: t=%u, c=%u, i=%u\n",
&ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
- ic->type, ic->code, ntohl(info));
- icmp_send(skb, ic->type, ic->code, info);
+ type, code, ntohl(info));
+ icmp_send(skb, type, code, info);
/* ICMP can be shorter but anyways, account it */
ip_vs_out_stats(cp, skb);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 35be035ee0c..581a6584ed0 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2177,10 +2177,10 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
__u64 inbytes, outbytes;
do {
- start = u64_stats_fetch_begin_bh(&u->syncp);
+ start = u64_stats_fetch_begin_irq(&u->syncp);
inbytes = u->ustats.inbytes;
outbytes = u->ustats.outbytes;
- } while (u64_stats_fetch_retry_bh(&u->syncp, start));
+ } while (u64_stats_fetch_retry_irq(&u->syncp, start));
seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
i, u->ustats.conns, u->ustats.inpkts,
@@ -3580,7 +3580,7 @@ out:
}
-static const struct genl_ops ip_vs_genl_ops[] __read_mostly = {
+static const struct genl_ops ip_vs_genl_ops[] = {
{
.cmd = IPVS_CMD_NEW_SERVICE,
.flags = GENL_ADMIN_PERM,
@@ -3778,6 +3778,7 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
cancel_delayed_work_sync(&ipvs->defense_work);
cancel_work_sync(&ipvs->defense_work.work);
unregister_net_sysctl_table(ipvs->sysctl_hdr);
+ ip_vs_stop_estimator(net, &ipvs->tot_stats);
}
#else
@@ -3840,7 +3841,6 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net)
struct netns_ipvs *ipvs = net_ipvs(net);
ip_vs_trash_cleanup(net);
- ip_vs_stop_estimator(net, &ipvs->tot_stats);
ip_vs_control_net_cleanup_sysctl(net);
remove_proc_entry("ip_vs_stats_percpu", net->proc_net);
remove_proc_entry("ip_vs_stats", net->proc_net);
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index ca056a331e6..547ff33c1ef 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -238,7 +238,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc)
spin_lock_bh(&svc->sched_lock);
tbl->dead = 1;
- for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
+ for (i = 0; i < IP_VS_LBLC_TAB_SIZE; i++) {
hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
ip_vs_lblc_del(en);
atomic_dec(&tbl->entries);
@@ -265,7 +265,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
unsigned long now = jiffies;
int i, j;
- for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
+ for (i = 0, j = tbl->rover; i < IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
spin_lock(&svc->sched_lock);
@@ -321,7 +321,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
if (goal > tbl->max_size/2)
goal = tbl->max_size/2;
- for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
+ for (i = 0, j = tbl->rover; i < IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
spin_lock(&svc->sched_lock);
@@ -340,7 +340,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
tbl->rover = j;
out:
- mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
+ mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
}
@@ -363,7 +363,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
/*
* Initialize the hash buckets
*/
- for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
+ for (i = 0; i < IP_VS_LBLC_TAB_SIZE; i++) {
INIT_HLIST_HEAD(&tbl->bucket[i]);
}
tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
@@ -536,8 +536,7 @@ out:
/*
* IPVS LBLC Scheduler structure
*/
-static struct ip_vs_scheduler ip_vs_lblc_scheduler =
-{
+static struct ip_vs_scheduler ip_vs_lblc_scheduler = {
.name = "lblc",
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index c47444e4cf8..73ba1cc7a88 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -562,7 +562,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_send_check(iph);
/* Another hack: avoid icmp_send in ip_fragment */
- skb->local_df = 1;
+ skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
rcu_read_unlock();
@@ -590,7 +590,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error;
/* Another hack: avoid icmp_send in ip_fragment */
- skb->local_df = 1;
+ skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
rcu_read_unlock();
@@ -684,7 +684,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
MTU problem. */
/* Another hack: avoid icmp_send in ip_fragment */
- skb->local_df = 1;
+ skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
rcu_read_unlock();
@@ -774,7 +774,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
MTU problem. */
/* Another hack: avoid icmp_send in ip_fragment */
- skb->local_df = 1;
+ skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
rcu_read_unlock();
@@ -883,10 +883,10 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
iph->daddr = cp->daddr.ip;
iph->saddr = saddr;
iph->ttl = old_iph->ttl;
- ip_select_ident(skb, &rt->dst, NULL);
+ ip_select_ident(skb, NULL);
/* Another hack: avoid icmp_send in ip_fragment */
- skb->local_df = 1;
+ skb->ignore_df = 1;
ret = ip_vs_tunnel_xmit_prepare(skb, cp);
if (ret == NF_ACCEPT)
@@ -974,7 +974,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
iph->hop_limit = old_iph->hop_limit;
/* Another hack: avoid icmp_send in ip_fragment */
- skb->local_df = 1;
+ skb->ignore_df = 1;
ret = ip_vs_tunnel_xmit_prepare(skb, cp);
if (ret == NF_ACCEPT)
@@ -1023,7 +1023,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_send_check(ip_hdr(skb));
/* Another hack: avoid icmp_send in ip_fragment */
- skb->local_df = 1;
+ skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
rcu_read_unlock();
@@ -1060,7 +1060,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
}
/* Another hack: avoid icmp_send in ip_fragment */
- skb->local_df = 1;
+ skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
rcu_read_unlock();
@@ -1157,7 +1157,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_vs_nat_icmp(skb, pp, cp, 0);
/* Another hack: avoid icmp_send in ip_fragment */
- skb->local_df = 1;
+ skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
rcu_read_unlock();
@@ -1249,7 +1249,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_vs_nat_icmp_v6(skb, pp, cp, 0);
/* Another hack: avoid icmp_send in ip_fragment */
- skb->local_df = 1;
+ skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
rcu_read_unlock();
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 356bef519fe..1f4f954c4b4 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -60,8 +60,59 @@ int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
const struct nlattr *attr) __read_mostly;
EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook);
-DEFINE_SPINLOCK(nf_conntrack_lock);
-EXPORT_SYMBOL_GPL(nf_conntrack_lock);
+__cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
+EXPORT_SYMBOL_GPL(nf_conntrack_locks);
+
+__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
+EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
+
+static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
+{
+ h1 %= CONNTRACK_LOCKS;
+ h2 %= CONNTRACK_LOCKS;
+ spin_unlock(&nf_conntrack_locks[h1]);
+ if (h1 != h2)
+ spin_unlock(&nf_conntrack_locks[h2]);
+}
+
+/* return true if we need to recompute hashes (in case hash table was resized) */
+static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
+ unsigned int h2, unsigned int sequence)
+{
+ h1 %= CONNTRACK_LOCKS;
+ h2 %= CONNTRACK_LOCKS;
+ if (h1 <= h2) {
+ spin_lock(&nf_conntrack_locks[h1]);
+ if (h1 != h2)
+ spin_lock_nested(&nf_conntrack_locks[h2],
+ SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock(&nf_conntrack_locks[h2]);
+ spin_lock_nested(&nf_conntrack_locks[h1],
+ SINGLE_DEPTH_NESTING);
+ }
+ if (read_seqcount_retry(&net->ct.generation, sequence)) {
+ nf_conntrack_double_unlock(h1, h2);
+ return true;
+ }
+ return false;
+}
+
+static void nf_conntrack_all_lock(void)
+{
+ int i;
+
+ for (i = 0; i < CONNTRACK_LOCKS; i++)
+ spin_lock_nested(&nf_conntrack_locks[i], i);
+}
+
+static void nf_conntrack_all_unlock(void)
+{
+ int i;
+
+ for (i = 0; i < CONNTRACK_LOCKS; i++)
+ spin_unlock(&nf_conntrack_locks[i]);
+}
unsigned int nf_conntrack_htable_size __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
@@ -192,6 +243,50 @@ clean_from_lists(struct nf_conn *ct)
nf_ct_remove_expectations(ct);
}
+/* must be called with local_bh_disable */
+static void nf_ct_add_to_dying_list(struct nf_conn *ct)
+{
+ struct ct_pcpu *pcpu;
+
+ /* add this conntrack to the (per cpu) dying list */
+ ct->cpu = smp_processor_id();
+ pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
+
+ spin_lock(&pcpu->lock);
+ hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
+ &pcpu->dying);
+ spin_unlock(&pcpu->lock);
+}
+
+/* must be called with local_bh_disable */
+static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct)
+{
+ struct ct_pcpu *pcpu;
+
+ /* add this conntrack to the (per cpu) unconfirmed list */
+ ct->cpu = smp_processor_id();
+ pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
+
+ spin_lock(&pcpu->lock);
+ hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
+ &pcpu->unconfirmed);
+ spin_unlock(&pcpu->lock);
+}
+
+/* must be called with local_bh_disable */
+static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
+{
+ struct ct_pcpu *pcpu;
+
+ /* We overload first tuple to link into unconfirmed or dying list.*/
+ pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
+
+ spin_lock(&pcpu->lock);
+ BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
+ hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+ spin_unlock(&pcpu->lock);
+}
+
static void
destroy_conntrack(struct nf_conntrack *nfct)
{
@@ -203,9 +298,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
NF_CT_ASSERT(!timer_pending(&ct->timeout));
- /* To make sure we don't get any weird locking issues here:
- * destroy_conntrack() MUST NOT be called with a write lock
- * to nf_conntrack_lock!!! -HW */
rcu_read_lock();
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto && l4proto->destroy)
@@ -213,19 +305,18 @@ destroy_conntrack(struct nf_conntrack *nfct)
rcu_read_unlock();
- spin_lock_bh(&nf_conntrack_lock);
+ local_bh_disable();
/* Expectations will have been removed in clean_from_lists,
* except TFTP can create an expectation on the first packet,
* before connection is in the list, so we need to clean here,
- * too. */
+ * too.
+ */
nf_ct_remove_expectations(ct);
- /* We overload first tuple to link into unconfirmed or dying list.*/
- BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
- hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+ nf_ct_del_from_dying_or_unconfirmed_list(ct);
NF_CT_STAT_INC(net, delete);
- spin_unlock_bh(&nf_conntrack_lock);
+ local_bh_enable();
if (ct->master)
nf_ct_put(ct->master);
@@ -237,17 +328,28 @@ destroy_conntrack(struct nf_conntrack *nfct)
static void nf_ct_delete_from_lists(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
+ unsigned int hash, reply_hash;
+ u16 zone = nf_ct_zone(ct);
+ unsigned int sequence;
nf_ct_helper_destroy(ct);
- spin_lock_bh(&nf_conntrack_lock);
- /* Inside lock so preempt is disabled on module removal path.
- * Otherwise we can get spurious warnings. */
- NF_CT_STAT_INC(net, delete_list);
+
+ local_bh_disable();
+ do {
+ sequence = read_seqcount_begin(&net->ct.generation);
+ hash = hash_conntrack(net, zone,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ reply_hash = hash_conntrack(net, zone,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
+
clean_from_lists(ct);
- /* add this conntrack to the dying list */
- hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
- &net->ct.dying);
- spin_unlock_bh(&nf_conntrack_lock);
+ nf_conntrack_double_unlock(hash, reply_hash);
+
+ nf_ct_add_to_dying_list(ct);
+
+ NF_CT_STAT_INC(net, delete_list);
+ local_bh_enable();
}
static void death_by_event(unsigned long ul_conntrack)
@@ -331,8 +433,6 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
* Warning :
* - Caller must take a reference on returned object
* and recheck nf_ct_tuple_equal(tuple, &h->tuple)
- * OR
- * - Caller must lock nf_conntrack_lock before calling this function
*/
static struct nf_conntrack_tuple_hash *
____nf_conntrack_find(struct net *net, u16 zone,
@@ -408,32 +508,36 @@ EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
static void __nf_conntrack_hash_insert(struct nf_conn *ct,
unsigned int hash,
- unsigned int repl_hash)
+ unsigned int reply_hash)
{
struct net *net = nf_ct_net(ct);
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
&net->ct.hash[hash]);
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
- &net->ct.hash[repl_hash]);
+ &net->ct.hash[reply_hash]);
}
int
nf_conntrack_hash_check_insert(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
- unsigned int hash, repl_hash;
+ unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
u16 zone;
+ unsigned int sequence;
zone = nf_ct_zone(ct);
- hash = hash_conntrack(net, zone,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- repl_hash = hash_conntrack(net, zone,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
- spin_lock_bh(&nf_conntrack_lock);
+ local_bh_disable();
+ do {
+ sequence = read_seqcount_begin(&net->ct.generation);
+ hash = hash_conntrack(net, zone,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ reply_hash = hash_conntrack(net, zone,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
/* See if there's one in the list already, including reverse */
hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
@@ -441,7 +545,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
&h->tuple) &&
zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
goto out;
- hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode)
+ hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
&h->tuple) &&
zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
@@ -451,15 +555,16 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
smp_wmb();
/* The caller holds a reference to this object */
atomic_set(&ct->ct_general.use, 2);
- __nf_conntrack_hash_insert(ct, hash, repl_hash);
+ __nf_conntrack_hash_insert(ct, hash, reply_hash);
+ nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert);
- spin_unlock_bh(&nf_conntrack_lock);
-
+ local_bh_enable();
return 0;
out:
+ nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert_failed);
- spin_unlock_bh(&nf_conntrack_lock);
+ local_bh_enable();
return -EEXIST;
}
EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
@@ -467,15 +572,22 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
/* deletion from this larval template list happens via nf_ct_put() */
void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl)
{
+ struct ct_pcpu *pcpu;
+
__set_bit(IPS_TEMPLATE_BIT, &tmpl->status);
__set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
nf_conntrack_get(&tmpl->ct_general);
- spin_lock_bh(&nf_conntrack_lock);
+ /* add this conntrack to the (per cpu) tmpl list */
+ local_bh_disable();
+ tmpl->cpu = smp_processor_id();
+ pcpu = per_cpu_ptr(nf_ct_net(tmpl)->ct.pcpu_lists, tmpl->cpu);
+
+ spin_lock(&pcpu->lock);
/* Overload tuple linked list to put us in template list. */
hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
- &net->ct.tmpl);
- spin_unlock_bh(&nf_conntrack_lock);
+ &pcpu->tmpl);
+ spin_unlock_bh(&pcpu->lock);
}
EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert);
@@ -483,7 +595,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert);
int
__nf_conntrack_confirm(struct sk_buff *skb)
{
- unsigned int hash, repl_hash;
+ unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct nf_conn_help *help;
@@ -492,6 +604,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
enum ip_conntrack_info ctinfo;
struct net *net;
u16 zone;
+ unsigned int sequence;
ct = nf_ct_get(skb, &ctinfo);
net = nf_ct_net(ct);
@@ -504,31 +617,37 @@ __nf_conntrack_confirm(struct sk_buff *skb)
return NF_ACCEPT;
zone = nf_ct_zone(ct);
- /* reuse the hash saved before */
- hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
- hash = hash_bucket(hash, net);
- repl_hash = hash_conntrack(net, zone,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ local_bh_disable();
+
+ do {
+ sequence = read_seqcount_begin(&net->ct.generation);
+ /* reuse the hash saved before */
+ hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
+ hash = hash_bucket(hash, net);
+ reply_hash = hash_conntrack(net, zone,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+ } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
/* We're not in hash table, and we refuse to set up related
- connections for unconfirmed conns. But packet copies and
- REJECT will give spurious warnings here. */
+ * connections for unconfirmed conns. But packet copies and
+ * REJECT will give spurious warnings here.
+ */
/* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
/* No external references means no one else could have
- confirmed us. */
+ * confirmed us.
+ */
NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
pr_debug("Confirming conntrack %p\n", ct);
-
- spin_lock_bh(&nf_conntrack_lock);
-
/* We have to check the DYING flag inside the lock to prevent
a race against nf_ct_get_next_corpse() possibly called from
user context, else we insert an already 'dead' hash, blocking
further use of that particular connection -JM */
if (unlikely(nf_ct_is_dying(ct))) {
- spin_unlock_bh(&nf_conntrack_lock);
+ nf_conntrack_double_unlock(hash, reply_hash);
+ local_bh_enable();
return NF_ACCEPT;
}
@@ -540,14 +659,13 @@ __nf_conntrack_confirm(struct sk_buff *skb)
&h->tuple) &&
zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
goto out;
- hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode)
+ hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
&h->tuple) &&
zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
goto out;
- /* Remove from unconfirmed list */
- hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+ nf_ct_del_from_dying_or_unconfirmed_list(ct);
/* Timer relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in
@@ -570,9 +688,10 @@ __nf_conntrack_confirm(struct sk_buff *skb)
* guarantee that no other CPU can find the conntrack before the above
* stores are visible.
*/
- __nf_conntrack_hash_insert(ct, hash, repl_hash);
+ __nf_conntrack_hash_insert(ct, hash, reply_hash);
+ nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert);
- spin_unlock_bh(&nf_conntrack_lock);
+ local_bh_enable();
help = nfct_help(ct);
if (help && help->helper)
@@ -583,8 +702,9 @@ __nf_conntrack_confirm(struct sk_buff *skb)
return NF_ACCEPT;
out:
+ nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert_failed);
- spin_unlock_bh(&nf_conntrack_lock);
+ local_bh_enable();
return NF_DROP;
}
EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
@@ -627,39 +747,48 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
/* There's a small race here where we may free a just-assured
connection. Too bad: we're in trouble anyway. */
-static noinline int early_drop(struct net *net, unsigned int hash)
+static noinline int early_drop(struct net *net, unsigned int _hash)
{
/* Use oldest entry, which is roughly LRU */
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct = NULL, *tmp;
struct hlist_nulls_node *n;
- unsigned int i, cnt = 0;
+ unsigned int i = 0, cnt = 0;
int dropped = 0;
+ unsigned int hash, sequence;
+ spinlock_t *lockp;
- rcu_read_lock();
- for (i = 0; i < net->ct.htable_size; i++) {
+ local_bh_disable();
+restart:
+ sequence = read_seqcount_begin(&net->ct.generation);
+ hash = hash_bucket(_hash, net);
+ for (; i < net->ct.htable_size; i++) {
+ lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
+ spin_lock(lockp);
+ if (read_seqcount_retry(&net->ct.generation, sequence)) {
+ spin_unlock(lockp);
+ goto restart;
+ }
hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash],
hnnode) {
tmp = nf_ct_tuplehash_to_ctrack(h);
- if (!test_bit(IPS_ASSURED_BIT, &tmp->status))
+ if (!test_bit(IPS_ASSURED_BIT, &tmp->status) &&
+ !nf_ct_is_dying(tmp) &&
+ atomic_inc_not_zero(&tmp->ct_general.use)) {
ct = tmp;
+ break;
+ }
cnt++;
}
- if (ct != NULL) {
- if (likely(!nf_ct_is_dying(ct) &&
- atomic_inc_not_zero(&ct->ct_general.use)))
- break;
- else
- ct = NULL;
- }
+ hash = (hash + 1) % net->ct.htable_size;
+ spin_unlock(lockp);
- if (cnt >= NF_CT_EVICTION_RANGE)
+ if (ct || cnt >= NF_CT_EVICTION_RANGE)
break;
- hash = (hash + 1) % net->ct.htable_size;
}
- rcu_read_unlock();
+ local_bh_enable();
if (!ct)
return dropped;
@@ -708,7 +837,7 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
if (nf_conntrack_max &&
unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
- if (!early_drop(net, hash_bucket(hash, net))) {
+ if (!early_drop(net, hash)) {
atomic_dec(&net->ct.count);
net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
return ERR_PTR(-ENOMEM);
@@ -785,7 +914,7 @@ void nf_conntrack_free(struct nf_conn *ct)
nf_ct_ext_destroy(ct);
nf_ct_ext_free(ct);
kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
- smp_mb__before_atomic_dec();
+ smp_mb__before_atomic();
atomic_dec(&net->ct.count);
}
EXPORT_SYMBOL_GPL(nf_conntrack_free);
@@ -805,7 +934,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
struct nf_conn_help *help;
struct nf_conntrack_tuple repl_tuple;
struct nf_conntrack_ecache *ecache;
- struct nf_conntrack_expect *exp;
+ struct nf_conntrack_expect *exp = NULL;
u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
struct nf_conn_timeout *timeout_ext;
unsigned int *timeouts;
@@ -849,42 +978,44 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
ecache ? ecache->expmask : 0,
GFP_ATOMIC);
- spin_lock_bh(&nf_conntrack_lock);
- exp = nf_ct_find_expectation(net, zone, tuple);
- if (exp) {
- pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
- ct, exp);
- /* Welcome, Mr. Bond. We've been expecting you... */
- __set_bit(IPS_EXPECTED_BIT, &ct->status);
- ct->master = exp->master;
- if (exp->helper) {
- help = nf_ct_helper_ext_add(ct, exp->helper,
- GFP_ATOMIC);
- if (help)
- rcu_assign_pointer(help->helper, exp->helper);
- }
+ local_bh_disable();
+ if (net->ct.expect_count) {
+ spin_lock(&nf_conntrack_expect_lock);
+ exp = nf_ct_find_expectation(net, zone, tuple);
+ if (exp) {
+ pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
+ ct, exp);
+ /* Welcome, Mr. Bond. We've been expecting you... */
+ __set_bit(IPS_EXPECTED_BIT, &ct->status);
+ /* exp->master safe, refcnt bumped in nf_ct_find_expectation */
+ ct->master = exp->master;
+ if (exp->helper) {
+ help = nf_ct_helper_ext_add(ct, exp->helper,
+ GFP_ATOMIC);
+ if (help)
+ rcu_assign_pointer(help->helper, exp->helper);
+ }
#ifdef CONFIG_NF_CONNTRACK_MARK
- ct->mark = exp->master->mark;
+ ct->mark = exp->master->mark;
#endif
#ifdef CONFIG_NF_CONNTRACK_SECMARK
- ct->secmark = exp->master->secmark;
+ ct->secmark = exp->master->secmark;
#endif
- nf_conntrack_get(&ct->master->ct_general);
- NF_CT_STAT_INC(net, expect_new);
- } else {
+ NF_CT_STAT_INC(net, expect_new);
+ }
+ spin_unlock(&nf_conntrack_expect_lock);
+ }
+ if (!exp) {
__nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
NF_CT_STAT_INC(net, new);
}
/* Now it is inserted into the unconfirmed list, bump refcount */
nf_conntrack_get(&ct->ct_general);
+ nf_ct_add_to_unconfirmed_list(ct);
- /* Overload tuple linked list to put us in unconfirmed list. */
- hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
- &net->ct.unconfirmed);
-
- spin_unlock_bh(&nf_conntrack_lock);
+ local_bh_enable();
if (exp) {
if (exp->expectfn)
@@ -1254,27 +1385,42 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct hlist_nulls_node *n;
+ int cpu;
+ spinlock_t *lockp;
- spin_lock_bh(&nf_conntrack_lock);
for (; *bucket < net->ct.htable_size; (*bucket)++) {
- hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
- if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
- continue;
+ lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
+ local_bh_disable();
+ spin_lock(lockp);
+ if (*bucket < net->ct.htable_size) {
+ hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
+ if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+ continue;
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (iter(ct, data))
+ goto found;
+ }
+ }
+ spin_unlock(lockp);
+ local_bh_enable();
+ }
+
+ for_each_possible_cpu(cpu) {
+ struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+
+ spin_lock_bh(&pcpu->lock);
+ hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
if (iter(ct, data))
- goto found;
+ set_bit(IPS_DYING_BIT, &ct->status);
}
+ spin_unlock_bh(&pcpu->lock);
}
- hlist_nulls_for_each_entry(h, n, &net->ct.unconfirmed, hnnode) {
- ct = nf_ct_tuplehash_to_ctrack(h);
- if (iter(ct, data))
- set_bit(IPS_DYING_BIT, &ct->status);
- }
- spin_unlock_bh(&nf_conntrack_lock);
return NULL;
found:
atomic_inc(&ct->ct_general.use);
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock(lockp);
+ local_bh_enable();
return ct;
}
@@ -1323,14 +1469,19 @@ static void nf_ct_release_dying_list(struct net *net)
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct hlist_nulls_node *n;
+ int cpu;
- spin_lock_bh(&nf_conntrack_lock);
- hlist_nulls_for_each_entry(h, n, &net->ct.dying, hnnode) {
- ct = nf_ct_tuplehash_to_ctrack(h);
- /* never fails to remove them, no listeners at this point */
- nf_ct_kill(ct);
+ for_each_possible_cpu(cpu) {
+ struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+
+ spin_lock_bh(&pcpu->lock);
+ hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ /* never fails to remove them, no listeners at this point */
+ nf_ct_kill(ct);
+ }
+ spin_unlock_bh(&pcpu->lock);
}
- spin_unlock_bh(&nf_conntrack_lock);
}
static int untrack_refs(void)
@@ -1417,6 +1568,7 @@ i_see_dead_people:
kmem_cache_destroy(net->ct.nf_conntrack_cachep);
kfree(net->ct.slabname);
free_percpu(net->ct.stat);
+ free_percpu(net->ct.pcpu_lists);
}
}
@@ -1469,12 +1621,16 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
if (!hash)
return -ENOMEM;
+ local_bh_disable();
+ nf_conntrack_all_lock();
+ write_seqcount_begin(&init_net.ct.generation);
+
/* Lookups in the old hash might happen in parallel, which means we
* might get false negatives during connection lookup. New connections
* created because of a false negative won't make it into the hash
- * though since that required taking the lock.
+ * though since that required taking the locks.
*/
- spin_lock_bh(&nf_conntrack_lock);
+
for (i = 0; i < init_net.ct.htable_size; i++) {
while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
h = hlist_nulls_entry(init_net.ct.hash[i].first,
@@ -1491,7 +1647,10 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
init_net.ct.hash = hash;
- spin_unlock_bh(&nf_conntrack_lock);
+
+ write_seqcount_end(&init_net.ct.generation);
+ nf_conntrack_all_unlock();
+ local_bh_enable();
nf_ct_free_hashtable(old_hash, old_size);
return 0;
@@ -1513,7 +1672,10 @@ EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
int nf_conntrack_init_start(void)
{
int max_factor = 8;
- int ret, cpu;
+ int i, ret, cpu;
+
+ for (i = 0; i < CONNTRACK_LOCKS; i++)
+ spin_lock_init(&nf_conntrack_locks[i]);
/* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
* machine has 512 buckets. >= 1GB machines have 16384 buckets. */
@@ -1629,37 +1791,44 @@ void nf_conntrack_init_end(void)
int nf_conntrack_init_net(struct net *net)
{
- int ret;
+ int ret = -ENOMEM;
+ int cpu;
atomic_set(&net->ct.count, 0);
- INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL);
- INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL);
- INIT_HLIST_NULLS_HEAD(&net->ct.tmpl, TEMPLATE_NULLS_VAL);
- net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
- if (!net->ct.stat) {
- ret = -ENOMEM;
+ seqcount_init(&net->ct.generation);
+
+ net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
+ if (!net->ct.pcpu_lists)
goto err_stat;
+
+ for_each_possible_cpu(cpu) {
+ struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+
+ spin_lock_init(&pcpu->lock);
+ INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL);
+ INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL);
+ INIT_HLIST_NULLS_HEAD(&pcpu->tmpl, TEMPLATE_NULLS_VAL);
}
+ net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
+ if (!net->ct.stat)
+ goto err_pcpu_lists;
+
net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
- if (!net->ct.slabname) {
- ret = -ENOMEM;
+ if (!net->ct.slabname)
goto err_slabname;
- }
net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname,
sizeof(struct nf_conn), 0,
SLAB_DESTROY_BY_RCU, NULL);
if (!net->ct.nf_conntrack_cachep) {
printk(KERN_ERR "Unable to create nf_conn slab cache\n");
- ret = -ENOMEM;
goto err_cache;
}
net->ct.htable_size = nf_conntrack_htable_size;
net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
if (!net->ct.hash) {
- ret = -ENOMEM;
printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
goto err_hash;
}
@@ -1701,6 +1870,8 @@ err_cache:
kfree(net->ct.slabname);
err_slabname:
free_percpu(net->ct.stat);
+err_pcpu_lists:
+ free_percpu(net->ct.pcpu_lists);
err_stat:
return ret;
}
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 4fd1ca94fd4..f87e8f68ad4 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -66,9 +66,9 @@ static void nf_ct_expectation_timed_out(unsigned long ul_expect)
{
struct nf_conntrack_expect *exp = (void *)ul_expect;
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
nf_ct_unlink_expect(exp);
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
nf_ct_expect_put(exp);
}
@@ -155,6 +155,18 @@ nf_ct_find_expectation(struct net *net, u16 zone,
if (!nf_ct_is_confirmed(exp->master))
return NULL;
+ /* Avoid race with other CPUs, that for exp->master ct, is
+ * about to invoke ->destroy(), or nf_ct_delete() via timeout
+ * or early_drop().
+ *
+ * The atomic_inc_not_zero() check tells: If that fails, we
+ * know that the ct is being destroyed. If it succeeds, we
+ * can be sure the ct cannot disappear underneath.
+ */
+ if (unlikely(nf_ct_is_dying(exp->master) ||
+ !atomic_inc_not_zero(&exp->master->ct_general.use)))
+ return NULL;
+
if (exp->flags & NF_CT_EXPECT_PERMANENT) {
atomic_inc(&exp->use);
return exp;
@@ -162,6 +174,8 @@ nf_ct_find_expectation(struct net *net, u16 zone,
nf_ct_unlink_expect(exp);
return exp;
}
+ /* Undo exp->master refcnt increase, if del_timer() failed */
+ nf_ct_put(exp->master);
return NULL;
}
@@ -177,12 +191,14 @@ void nf_ct_remove_expectations(struct nf_conn *ct)
if (!help)
return;
+ spin_lock_bh(&nf_conntrack_expect_lock);
hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp);
nf_ct_expect_put(exp);
}
}
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
@@ -217,12 +233,12 @@ static inline int expect_matches(const struct nf_conntrack_expect *a,
/* Generally a bad idea to call this: could have matched already. */
void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
{
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp);
nf_ct_expect_put(exp);
}
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
@@ -335,7 +351,7 @@ static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
(unsigned long)exp);
helper = rcu_dereference_protected(master_help->helper,
- lockdep_is_held(&nf_conntrack_lock));
+ lockdep_is_held(&nf_conntrack_expect_lock));
if (helper) {
exp->timeout.expires = jiffies +
helper->expect_policy[exp->class].timeout * HZ;
@@ -395,7 +411,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
}
/* Will be over limit? */
helper = rcu_dereference_protected(master_help->helper,
- lockdep_is_held(&nf_conntrack_lock));
+ lockdep_is_held(&nf_conntrack_expect_lock));
if (helper) {
p = &helper->expect_policy[expect->class];
if (p->max_expected &&
@@ -417,12 +433,12 @@ out:
return ret;
}
-int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
+int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
u32 portid, int report)
{
int ret;
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
ret = __nf_ct_expect_check(expect);
if (ret <= 0)
goto out;
@@ -430,11 +446,11 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
ret = nf_ct_expect_insert(expect);
if (ret < 0)
goto out;
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
return ret;
out:
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 70866d192ef..3a3a60b126e 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1476,7 +1476,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
nf_ct_refresh(ct, skb, info->timeout * HZ);
/* Set expect timeout */
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
exp = find_expect(ct, &ct->tuplehash[dir].tuple.dst.u3,
info->sig_port[!dir]);
if (exp) {
@@ -1486,7 +1486,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
nf_ct_dump_tuple(&exp->tuple);
set_expect_timeout(exp, info->timeout);
}
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
return 0;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 974a2a4adef..5b3eae7d4c9 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -250,16 +250,14 @@ out:
}
EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper);
+/* appropiate ct lock protecting must be taken by caller */
static inline int unhelp(struct nf_conntrack_tuple_hash *i,
const struct nf_conntrack_helper *me)
{
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
struct nf_conn_help *help = nfct_help(ct);
- if (help && rcu_dereference_protected(
- help->helper,
- lockdep_is_held(&nf_conntrack_lock)
- ) == me) {
+ if (help && rcu_dereference_raw(help->helper) == me) {
nf_conntrack_event(IPCT_HELPER, ct);
RCU_INIT_POINTER(help->helper, NULL);
}
@@ -284,17 +282,17 @@ static LIST_HEAD(nf_ct_helper_expectfn_list);
void nf_ct_helper_expectfn_register(struct nf_ct_helper_expectfn *n)
{
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
list_add_rcu(&n->head, &nf_ct_helper_expectfn_list);
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_register);
void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n)
{
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
list_del_rcu(&n->head);
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister);
@@ -396,15 +394,17 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
const struct hlist_node *next;
const struct hlist_nulls_node *nn;
unsigned int i;
+ int cpu;
/* Get rid of expectations */
+ spin_lock_bh(&nf_conntrack_expect_lock);
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, next,
&net->ct.expect_hash[i], hnode) {
struct nf_conn_help *help = nfct_help(exp->master);
if ((rcu_dereference_protected(
help->helper,
- lockdep_is_held(&nf_conntrack_lock)
+ lockdep_is_held(&nf_conntrack_expect_lock)
) == me || exp->helper == me) &&
del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp);
@@ -412,14 +412,27 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
}
}
}
+ spin_unlock_bh(&nf_conntrack_expect_lock);
/* Get rid of expecteds, set helpers to NULL. */
- hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode)
- unhelp(h, me);
- for (i = 0; i < net->ct.htable_size; i++) {
- hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
+ for_each_possible_cpu(cpu) {
+ struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+
+ spin_lock_bh(&pcpu->lock);
+ hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
unhelp(h, me);
+ spin_unlock_bh(&pcpu->lock);
+ }
+ local_bh_disable();
+ for (i = 0; i < net->ct.htable_size; i++) {
+ spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+ if (i < net->ct.htable_size) {
+ hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
+ unhelp(h, me);
+ }
+ spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
}
+ local_bh_enable();
}
void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
@@ -437,10 +450,8 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
synchronize_rcu();
rtnl_lock();
- spin_lock_bh(&nf_conntrack_lock);
for_each_net(net)
__nf_conntrack_helper_unregister(me, net);
- spin_unlock_bh(&nf_conntrack_lock);
rtnl_unlock();
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index b9f0e037432..300ed1eec72 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -597,6 +597,9 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
#ifdef CONFIG_NF_CONNTRACK_MARK
+ nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */
+#endif
+ ctnetlink_proto_size(ct)
+ ctnetlink_label_size(ct)
;
@@ -764,14 +767,23 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
u_int8_t l3proto = nfmsg->nfgen_family;
int res;
+ spinlock_t *lockp;
+
#ifdef CONFIG_NF_CONNTRACK_MARK
const struct ctnetlink_dump_filter *filter = cb->data;
#endif
- spin_lock_bh(&nf_conntrack_lock);
last = (struct nf_conn *)cb->args[1];
+
+ local_bh_disable();
for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
restart:
+ lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
+ spin_lock(lockp);
+ if (cb->args[0] >= net->ct.htable_size) {
+ spin_unlock(lockp);
+ goto out;
+ }
hlist_nulls_for_each_entry(h, n, &net->ct.hash[cb->args[0]],
hnnode) {
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
@@ -803,16 +815,18 @@ restart:
if (res < 0) {
nf_conntrack_get(&ct->ct_general);
cb->args[1] = (unsigned long)ct;
+ spin_unlock(lockp);
goto out;
}
}
+ spin_unlock(lockp);
if (cb->args[1]) {
cb->args[1] = 0;
goto restart;
}
}
out:
- spin_unlock_bh(&nf_conntrack_lock);
+ local_bh_enable();
if (last)
nf_ct_put(last);
@@ -966,7 +980,6 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name,
return 0;
}
-#define __CTA_LABELS_MAX_LENGTH ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE)
static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
[CTA_TUPLE_ORIG] = { .type = NLA_NESTED },
[CTA_TUPLE_REPLY] = { .type = NLA_NESTED },
@@ -984,9 +997,9 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
[CTA_ZONE] = { .type = NLA_U16 },
[CTA_MARK_MASK] = { .type = NLA_U32 },
[CTA_LABELS] = { .type = NLA_BINARY,
- .len = __CTA_LABELS_MAX_LENGTH },
+ .len = NF_CT_LABELS_MAX_SIZE },
[CTA_LABELS_MASK] = { .type = NLA_BINARY,
- .len = __CTA_LABELS_MAX_LENGTH },
+ .len = NF_CT_LABELS_MAX_SIZE },
};
static int
@@ -1138,8 +1151,7 @@ static int ctnetlink_done_list(struct netlink_callback *cb)
}
static int
-ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb,
- struct hlist_nulls_head *list)
+ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying)
{
struct nf_conn *ct, *last;
struct nf_conntrack_tuple_hash *h;
@@ -1147,41 +1159,57 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb,
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
u_int8_t l3proto = nfmsg->nfgen_family;
int res;
+ int cpu;
+ struct hlist_nulls_head *list;
+ struct net *net = sock_net(skb->sk);
if (cb->args[2])
return 0;
- spin_lock_bh(&nf_conntrack_lock);
last = (struct nf_conn *)cb->args[1];
-restart:
- hlist_nulls_for_each_entry(h, n, list, hnnode) {
- ct = nf_ct_tuplehash_to_ctrack(h);
- if (l3proto && nf_ct_l3num(ct) != l3proto)
+
+ for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) {
+ struct ct_pcpu *pcpu;
+
+ if (!cpu_possible(cpu))
continue;
- if (cb->args[1]) {
- if (ct != last)
+
+ pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+ spin_lock_bh(&pcpu->lock);
+ list = dying ? &pcpu->dying : &pcpu->unconfirmed;
+restart:
+ hlist_nulls_for_each_entry(h, n, list, hnnode) {
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (l3proto && nf_ct_l3num(ct) != l3proto)
continue;
- cb->args[1] = 0;
+ if (cb->args[1]) {
+ if (ct != last)
+ continue;
+ cb->args[1] = 0;
+ }
+ rcu_read_lock();
+ res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
+ ct);
+ rcu_read_unlock();
+ if (res < 0) {
+ if (!atomic_inc_not_zero(&ct->ct_general.use))
+ continue;
+ cb->args[0] = cpu;
+ cb->args[1] = (unsigned long)ct;
+ spin_unlock_bh(&pcpu->lock);
+ goto out;
+ }
}
- rcu_read_lock();
- res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
- ct);
- rcu_read_unlock();
- if (res < 0) {
- nf_conntrack_get(&ct->ct_general);
- cb->args[1] = (unsigned long)ct;
- goto out;
+ if (cb->args[1]) {
+ cb->args[1] = 0;
+ goto restart;
}
+ spin_unlock_bh(&pcpu->lock);
}
- if (cb->args[1]) {
- cb->args[1] = 0;
- goto restart;
- } else
- cb->args[2] = 1;
+ cb->args[2] = 1;
out:
- spin_unlock_bh(&nf_conntrack_lock);
if (last)
nf_ct_put(last);
@@ -1191,9 +1219,7 @@ out:
static int
ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
-
- return ctnetlink_dump_list(skb, cb, &net->ct.dying);
+ return ctnetlink_dump_list(skb, cb, true);
}
static int
@@ -1215,9 +1241,7 @@ ctnetlink_get_ct_dying(struct sock *ctnl, struct sk_buff *skb,
static int
ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
-
- return ctnetlink_dump_list(skb, cb, &net->ct.unconfirmed);
+ return ctnetlink_dump_list(skb, cb, false);
}
static int
@@ -1315,6 +1339,9 @@ ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[])
#ifdef CONFIG_NF_NAT_NEEDED
int ret;
+ if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC])
+ return 0;
+
ret = ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_DST,
cda[CTA_NAT_DST]);
if (ret < 0)
@@ -1361,14 +1388,14 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[])
nf_ct_protonum(ct));
if (helper == NULL) {
#ifdef CONFIG_MODULES
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
if (request_module("nfct-helper-%s", helpname) < 0) {
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
return -EOPNOTSUPP;
}
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
nf_ct_protonum(ct));
if (helper)
@@ -1804,9 +1831,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
err = -EEXIST;
ct = nf_ct_tuplehash_to_ctrack(h);
if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
err = ctnetlink_change_conntrack(ct, cda);
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
if (err == 0) {
nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
(1 << IPCT_ASSURED) |
@@ -2016,6 +2043,9 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
#ifdef CONFIG_NF_CONNTRACK_MARK
+ nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */
+#endif
+ ctnetlink_proto_size(ct)
;
}
@@ -2135,9 +2165,9 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
if (ret < 0)
return ret;
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct);
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
return ret;
}
@@ -2692,13 +2722,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
}
/* after list removal, usage count == 1 */
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid,
nlmsg_report(nlh));
nf_ct_expect_put(exp);
}
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
/* have to put what we 'get' above.
* after this line usage count == 0 */
nf_ct_expect_put(exp);
@@ -2707,7 +2737,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
struct nf_conn_help *m_help;
/* delete all expectations for this helper */
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, next,
&net->ct.expect_hash[i],
@@ -2722,10 +2752,10 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
}
}
}
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
} else {
/* This basically means we have to flush everything*/
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, next,
&net->ct.expect_hash[i],
@@ -2738,7 +2768,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
}
}
}
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
return 0;
@@ -2964,11 +2994,11 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
if (err < 0)
return err;
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
exp = __nf_ct_expect_find(net, zone, &tuple);
if (!exp) {
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
err = -ENOENT;
if (nlh->nlmsg_flags & NLM_F_CREATE) {
err = ctnetlink_create_expect(net, zone, cda,
@@ -2982,7 +3012,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
err = -EEXIST;
if (!(nlh->nlmsg_flags & NLM_F_EXCL))
err = ctnetlink_change_expect(exp, cda);
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
return err;
}
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 7bd03decd36..825c3e3f830 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -605,32 +605,14 @@ static struct nf_conntrack_helper pptp __read_mostly = {
.expect_policy = &pptp_exp_policy,
};
-static void nf_conntrack_pptp_net_exit(struct net *net)
-{
- nf_ct_gre_keymap_flush(net);
-}
-
-static struct pernet_operations nf_conntrack_pptp_net_ops = {
- .exit = nf_conntrack_pptp_net_exit,
-};
-
static int __init nf_conntrack_pptp_init(void)
{
- int rv;
-
- rv = nf_conntrack_helper_register(&pptp);
- if (rv < 0)
- return rv;
- rv = register_pernet_subsys(&nf_conntrack_pptp_net_ops);
- if (rv < 0)
- nf_conntrack_helper_unregister(&pptp);
- return rv;
+ return nf_conntrack_helper_register(&pptp);
}
static void __exit nf_conntrack_pptp_fini(void)
{
nf_conntrack_helper_unregister(&pptp);
- unregister_pernet_subsys(&nf_conntrack_pptp_net_ops);
}
module_init(nf_conntrack_pptp_init);
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 9d9c0dade60..d5665739e3b 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -66,7 +66,7 @@ static inline struct netns_proto_gre *gre_pernet(struct net *net)
return net_generic(net, proto_gre_net_id);
}
-void nf_ct_gre_keymap_flush(struct net *net)
+static void nf_ct_gre_keymap_flush(struct net *net)
{
struct netns_proto_gre *net_gre = gre_pernet(net);
struct nf_ct_gre_keymap *km, *tmp;
@@ -78,7 +78,6 @@ void nf_ct_gre_keymap_flush(struct net *net)
}
write_unlock_bh(&net_gre->keymap_lock);
}
-EXPORT_SYMBOL(nf_ct_gre_keymap_flush);
static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km,
const struct nf_conntrack_tuple *t)
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 466410eaa48..4c3ba1c8d68 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -800,7 +800,7 @@ static int refresh_signalling_expectation(struct nf_conn *ct,
struct hlist_node *next;
int found = 0;
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
if (exp->class != SIP_EXPECT_SIGNALLING ||
!nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) ||
@@ -815,7 +815,7 @@ static int refresh_signalling_expectation(struct nf_conn *ct,
found = 1;
break;
}
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
return found;
}
@@ -825,7 +825,7 @@ static void flush_expectations(struct nf_conn *ct, bool media)
struct nf_conntrack_expect *exp;
struct hlist_node *next;
- spin_lock_bh(&nf_conntrack_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
if ((exp->class != SIP_EXPECT_SIGNALLING) ^ media)
continue;
@@ -836,7 +836,7 @@ static void flush_expectations(struct nf_conn *ct, bool media)
if (!media)
break;
}
- spin_unlock_bh(&nf_conntrack_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 52ca952b802..a49907b1dab 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -358,6 +358,19 @@ out:
rcu_read_unlock();
}
+struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct)
+{
+ struct nf_conn_nat *nat = nfct_nat(ct);
+ if (nat)
+ return nat;
+
+ if (!nf_ct_is_confirmed(ct))
+ nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+
+ return nat;
+}
+EXPORT_SYMBOL_GPL(nf_ct_nat_ext_add);
+
unsigned int
nf_nat_setup_info(struct nf_conn *ct,
const struct nf_nat_range *range,
@@ -368,14 +381,9 @@ nf_nat_setup_info(struct nf_conn *ct,
struct nf_conn_nat *nat;
/* nat helper or nfctnetlink also setup binding */
- nat = nfct_nat(ct);
- if (!nat) {
- nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
- if (nat == NULL) {
- pr_debug("failed to add NAT extension\n");
- return NF_ACCEPT;
- }
- }
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
maniptype == NF_NAT_MANIP_DST);
@@ -517,6 +525,39 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data)
return i->status & IPS_NAT_MASK ? 1 : 0;
}
+static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
+{
+ struct nf_conn_nat *nat = nfct_nat(ct);
+
+ if (nf_nat_proto_remove(ct, data))
+ return 1;
+
+ if (!nat || !nat->ct)
+ return 0;
+
+ /* This netns is being destroyed, and conntrack has nat null binding.
+ * Remove it from bysource hash, as the table will be freed soon.
+ *
+ * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
+ * will delete entry from already-freed table.
+ */
+ if (!del_timer(&ct->timeout))
+ return 1;
+
+ spin_lock_bh(&nf_nat_lock);
+ hlist_del_rcu(&nat->bysource);
+ ct->status &= ~IPS_NAT_DONE_MASK;
+ nat->ct = NULL;
+ spin_unlock_bh(&nf_nat_lock);
+
+ add_timer(&ct->timeout);
+
+ /* don't delete conntrack. Although that would make things a lot
+ * simpler, we'd end up flushing all conntracks on nat rmmod.
+ */
+ return 0;
+}
+
static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
{
struct nf_nat_proto_clean clean = {
@@ -787,7 +828,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
{
struct nf_nat_proto_clean clean = {};
- nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean, 0, 0);
+ nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean, 0, 0);
synchronize_rcu();
nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index adce01e8bb5..8746ff9a835 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -35,7 +35,7 @@ int nft_register_afinfo(struct net *net, struct nft_af_info *afi)
{
INIT_LIST_HEAD(&afi->tables);
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_add_tail(&afi->list, &net->nft.af_info);
+ list_add_tail_rcu(&afi->list, &net->nft.af_info);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
return 0;
}
@@ -51,7 +51,7 @@ EXPORT_SYMBOL_GPL(nft_register_afinfo);
void nft_unregister_afinfo(struct nft_af_info *afi)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_del(&afi->list);
+ list_del_rcu(&afi->list);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_unregister_afinfo);
@@ -88,6 +88,45 @@ nf_tables_afinfo_lookup(struct net *net, int family, bool autoload)
return ERR_PTR(-EAFNOSUPPORT);
}
+static void nft_ctx_init(struct nft_ctx *ctx,
+ const struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ struct nft_af_info *afi,
+ struct nft_table *table,
+ struct nft_chain *chain,
+ const struct nlattr * const *nla)
+{
+ ctx->net = sock_net(skb->sk);
+ ctx->afi = afi;
+ ctx->table = table;
+ ctx->chain = chain;
+ ctx->nla = nla;
+ ctx->portid = NETLINK_CB(skb).portid;
+ ctx->report = nlmsg_report(nlh);
+ ctx->seq = nlh->nlmsg_seq;
+}
+
+static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type,
+ u32 size)
+{
+ struct nft_trans *trans;
+
+ trans = kzalloc(sizeof(struct nft_trans) + size, GFP_KERNEL);
+ if (trans == NULL)
+ return NULL;
+
+ trans->msg_type = msg_type;
+ trans->ctx = *ctx;
+
+ return trans;
+}
+
+static void nft_trans_destroy(struct nft_trans *trans)
+{
+ list_del(&trans->list);
+ kfree(trans);
+}
+
/*
* Tables
*/
@@ -152,8 +191,8 @@ nf_tables_chain_type_lookup(const struct nft_af_info *afi,
#ifdef CONFIG_MODULES
if (autoload) {
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
- request_module("nft-chain-%u-%*.s", afi->family,
- nla_len(nla)-1, (const char *)nla_data(nla));
+ request_module("nft-chain-%u-%.*s", afi->family,
+ nla_len(nla), (const char *)nla_data(nla));
nfnl_lock(NFNL_SUBSYS_NFTABLES);
type = __nf_tables_chain_type_lookup(afi->family, nla);
if (type != NULL)
@@ -197,20 +236,13 @@ nla_put_failure:
return -1;
}
-static int nf_tables_table_notify(const struct sk_buff *oskb,
- const struct nlmsghdr *nlh,
- const struct nft_table *table,
- int event, int family)
+static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
{
struct sk_buff *skb;
- u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
- u32 seq = nlh ? nlh->nlmsg_seq : 0;
- struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
- bool report;
int err;
- report = nlh ? nlmsg_report(nlh) : false;
- if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ if (!ctx->report &&
+ !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return 0;
err = -ENOBUFS;
@@ -218,18 +250,20 @@ static int nf_tables_table_notify(const struct sk_buff *oskb,
if (skb == NULL)
goto err;
- err = nf_tables_fill_table_info(skb, portid, seq, event, 0,
- family, table);
+ err = nf_tables_fill_table_info(skb, ctx->portid, ctx->seq, event, 0,
+ ctx->afi->family, ctx->table);
if (err < 0) {
kfree_skb(skb);
goto err;
}
- err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
- GFP_KERNEL);
+ err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ ctx->report, GFP_KERNEL);
err:
- if (err < 0)
- nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ if (err < 0) {
+ nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ err);
+ }
return err;
}
@@ -243,11 +277,14 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
- list_for_each_entry(afi, &net->nft.af_info, list) {
+ rcu_read_lock();
+ cb->seq = net->nft.base_seq;
+
+ list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
if (family != NFPROTO_UNSPEC && family != afi->family)
continue;
- list_for_each_entry(table, &afi->tables, list) {
+ list_for_each_entry_rcu(table, &afi->tables, list) {
if (idx < s_idx)
goto cont;
if (idx > s_idx)
@@ -260,15 +297,21 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
NLM_F_MULTI,
afi->family, table) < 0)
goto done;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
}
done:
+ rcu_read_unlock();
cb->args[0] = idx;
return skb->len;
}
+/* Internal table flags */
+#define NFT_TABLE_INACTIVE (1 << 15)
+
static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -295,6 +338,8 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb2)
@@ -343,7 +388,7 @@ err:
return err;
}
-static int nf_tables_table_disable(const struct nft_af_info *afi,
+static void nf_tables_table_disable(const struct nft_af_info *afi,
struct nft_table *table)
{
struct nft_chain *chain;
@@ -353,45 +398,66 @@ static int nf_tables_table_disable(const struct nft_af_info *afi,
nf_unregister_hooks(nft_base_chain(chain)->ops,
afi->nops);
}
-
- return 0;
}
-static int nf_tables_updtable(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct nft_af_info *afi, struct nft_table *table)
+static int nf_tables_updtable(struct nft_ctx *ctx)
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- int family = nfmsg->nfgen_family, ret = 0;
+ struct nft_trans *trans;
+ u32 flags;
+ int ret = 0;
- if (nla[NFTA_TABLE_FLAGS]) {
- u32 flags;
+ if (!ctx->nla[NFTA_TABLE_FLAGS])
+ return 0;
- flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS]));
- if (flags & ~NFT_TABLE_F_DORMANT)
- return -EINVAL;
+ flags = ntohl(nla_get_be32(ctx->nla[NFTA_TABLE_FLAGS]));
+ if (flags & ~NFT_TABLE_F_DORMANT)
+ return -EINVAL;
- if ((flags & NFT_TABLE_F_DORMANT) &&
- !(table->flags & NFT_TABLE_F_DORMANT)) {
- ret = nf_tables_table_disable(afi, table);
- if (ret >= 0)
- table->flags |= NFT_TABLE_F_DORMANT;
- } else if (!(flags & NFT_TABLE_F_DORMANT) &&
- table->flags & NFT_TABLE_F_DORMANT) {
- ret = nf_tables_table_enable(afi, table);
- if (ret >= 0)
- table->flags &= ~NFT_TABLE_F_DORMANT;
+ if (flags == ctx->table->flags)
+ return 0;
+
+ trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
+ sizeof(struct nft_trans_table));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if ((flags & NFT_TABLE_F_DORMANT) &&
+ !(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
+ nft_trans_table_enable(trans) = false;
+ } else if (!(flags & NFT_TABLE_F_DORMANT) &&
+ ctx->table->flags & NFT_TABLE_F_DORMANT) {
+ ret = nf_tables_table_enable(ctx->afi, ctx->table);
+ if (ret >= 0) {
+ ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
+ nft_trans_table_enable(trans) = true;
}
- if (ret < 0)
- goto err;
}
+ if (ret < 0)
+ goto err;
- nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
+ nft_trans_table_update(trans) = true;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ return 0;
err:
+ nft_trans_destroy(trans);
return ret;
}
+static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWTABLE)
+ ctx->table->flags |= NFT_TABLE_INACTIVE;
+
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ return 0;
+}
+
static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -403,6 +469,8 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
u32 flags = 0;
+ struct nft_ctx ctx;
+ int err;
afi = nf_tables_afinfo_lookup(net, family, true);
if (IS_ERR(afi))
@@ -417,11 +485,15 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
}
if (table != NULL) {
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
if (nlh->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- return nf_tables_updtable(nlsk, skb, nlh, nla, afi, table);
+
+ nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ return nf_tables_updtable(&ctx);
}
if (nla[NFTA_TABLE_FLAGS]) {
@@ -444,8 +516,14 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
INIT_LIST_HEAD(&table->sets);
table->flags = flags;
- list_add_tail(&table->list, &afi->tables);
- nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
+ nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
+ if (err < 0) {
+ kfree(table);
+ module_put(afi->owner);
+ return err;
+ }
+ list_add_tail_rcu(&table->list, &afi->tables);
return 0;
}
@@ -457,7 +535,8 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
struct nft_af_info *afi;
struct nft_table *table;
struct net *net = sock_net(skb->sk);
- int family = nfmsg->nfgen_family;
+ int family = nfmsg->nfgen_family, err;
+ struct nft_ctx ctx;
afi = nf_tables_afinfo_lookup(net, family, false);
if (IS_ERR(afi))
@@ -466,17 +545,28 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
if (IS_ERR(table))
return PTR_ERR(table);
-
- if (!list_empty(&table->chains) || !list_empty(&table->sets))
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
+ if (table->use > 0)
return -EBUSY;
- list_del(&table->list);
- nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family);
- kfree(table);
- module_put(afi->owner);
+ nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ err = nft_trans_table_add(&ctx, NFT_MSG_DELTABLE);
+ if (err < 0)
+ return err;
+
+ list_del_rcu(&table->list);
return 0;
}
+static void nf_tables_table_destroy(struct nft_ctx *ctx)
+{
+ BUG_ON(ctx->table->use > 0);
+
+ kfree(ctx->table);
+ module_put(ctx->afi->owner);
+}
+
int nft_register_chain_type(const struct nf_chain_type *ctype)
{
int err = 0;
@@ -541,7 +631,7 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
.len = NFT_CHAIN_MAXNAMELEN - 1 },
[NFTA_CHAIN_HOOK] = { .type = NLA_NESTED },
[NFTA_CHAIN_POLICY] = { .type = NLA_U32 },
- [NFTA_CHAIN_TYPE] = { .type = NLA_NUL_STRING },
+ [NFTA_CHAIN_TYPE] = { .type = NLA_STRING },
[NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED },
};
@@ -554,13 +644,20 @@ static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats)
{
struct nft_stats *cpu_stats, total;
struct nlattr *nest;
+ unsigned int seq;
+ u64 pkts, bytes;
int cpu;
memset(&total, 0, sizeof(total));
for_each_possible_cpu(cpu) {
cpu_stats = per_cpu_ptr(stats, cpu);
- total.pkts += cpu_stats->pkts;
- total.bytes += cpu_stats->bytes;
+ do {
+ seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+ pkts = cpu_stats->pkts;
+ bytes = cpu_stats->bytes;
+ } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
+ total.pkts += pkts;
+ total.bytes += bytes;
}
nest = nla_nest_start(skb, NFTA_CHAIN_COUNTERS);
if (nest == NULL)
@@ -637,21 +734,13 @@ nla_put_failure:
return -1;
}
-static int nf_tables_chain_notify(const struct sk_buff *oskb,
- const struct nlmsghdr *nlh,
- const struct nft_table *table,
- const struct nft_chain *chain,
- int event, int family)
+static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
{
struct sk_buff *skb;
- u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
- struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
- u32 seq = nlh ? nlh->nlmsg_seq : 0;
- bool report;
int err;
- report = nlh ? nlmsg_report(nlh) : false;
- if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ if (!ctx->report &&
+ !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return 0;
err = -ENOBUFS;
@@ -659,18 +748,21 @@ static int nf_tables_chain_notify(const struct sk_buff *oskb,
if (skb == NULL)
goto err;
- err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, family,
- table, chain);
+ err = nf_tables_fill_chain_info(skb, ctx->portid, ctx->seq, event, 0,
+ ctx->afi->family, ctx->table,
+ ctx->chain);
if (err < 0) {
kfree_skb(skb);
goto err;
}
- err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
- GFP_KERNEL);
+ err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ ctx->report, GFP_KERNEL);
err:
- if (err < 0)
- nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ if (err < 0) {
+ nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ err);
+ }
return err;
}
@@ -685,12 +777,15 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
- list_for_each_entry(afi, &net->nft.af_info, list) {
+ rcu_read_lock();
+ cb->seq = net->nft.base_seq;
+
+ list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
if (family != NFPROTO_UNSPEC && family != afi->family)
continue;
- list_for_each_entry(table, &afi->tables, list) {
- list_for_each_entry(chain, &table->chains, list) {
+ list_for_each_entry_rcu(table, &afi->tables, list) {
+ list_for_each_entry_rcu(chain, &table->chains, list) {
if (idx < s_idx)
goto cont;
if (idx > s_idx)
@@ -702,17 +797,19 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
NLM_F_MULTI,
afi->family, table, chain) < 0)
goto done;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
}
}
done:
+ rcu_read_unlock();
cb->args[0] = idx;
return skb->len;
}
-
static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -740,10 +837,14 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
if (IS_ERR(chain))
return PTR_ERR(chain);
+ if (chain->flags & NFT_CHAIN_INACTIVE)
+ return -ENOENT;
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb2)
@@ -767,8 +868,7 @@ static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
[NFTA_COUNTER_BYTES] = { .type = NLA_U64 },
};
-static int
-nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
+static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
{
struct nlattr *tb[NFTA_COUNTER_MAX+1];
struct nft_stats __percpu *newstats;
@@ -777,14 +877,14 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy);
if (err < 0)
- return err;
+ return ERR_PTR(err);
if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS])
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
- newstats = alloc_percpu(struct nft_stats);
+ newstats = netdev_alloc_pcpu_stats(struct nft_stats);
if (newstats == NULL)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
/* Restore old counters on this cpu, no problem. Per-cpu statistics
* are not exposed to userspace.
@@ -793,27 +893,58 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+ return newstats;
+}
+
+static void nft_chain_stats_replace(struct nft_base_chain *chain,
+ struct nft_stats __percpu *newstats)
+{
if (chain->stats) {
- /* nfnl_lock is held, add some nfnl function for this, later */
struct nft_stats __percpu *oldstats =
- rcu_dereference_protected(chain->stats, 1);
+ nft_dereference(chain->stats);
rcu_assign_pointer(chain->stats, newstats);
synchronize_rcu();
free_percpu(oldstats);
} else
rcu_assign_pointer(chain->stats, newstats);
+}
+
+static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
+{
+ struct nft_trans *trans;
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWCHAIN)
+ ctx->chain->flags |= NFT_CHAIN_INACTIVE;
+
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
}
+static void nf_tables_chain_destroy(struct nft_chain *chain)
+{
+ BUG_ON(chain->use > 0);
+
+ if (chain->flags & NFT_BASE_CHAIN) {
+ module_put(nft_base_chain(chain)->type->owner);
+ free_percpu(nft_base_chain(chain)->stats);
+ kfree(nft_base_chain(chain));
+ } else {
+ kfree(chain);
+ }
+}
+
static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nlattr * uninitialized_var(name);
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
struct nft_base_chain *basechain = NULL;
@@ -823,8 +954,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
u8 policy = NF_ACCEPT;
u64 handle = 0;
unsigned int i;
+ struct nft_stats __percpu *stats;
int err;
bool create;
+ struct nft_ctx ctx;
create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
@@ -870,6 +1003,11 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
}
if (chain != NULL) {
+ struct nft_stats *stats = NULL;
+ struct nft_trans *trans;
+
+ if (chain->flags & NFT_CHAIN_INACTIVE)
+ return -ENOENT;
if (nlh->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
if (nlh->nlmsg_flags & NLM_F_REPLACE)
@@ -883,19 +1021,31 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
if (!(chain->flags & NFT_BASE_CHAIN))
return -EOPNOTSUPP;
- err = nf_tables_counters(nft_base_chain(chain),
- nla[NFTA_CHAIN_COUNTERS]);
- if (err < 0)
- return err;
+ stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
+ if (IS_ERR(stats))
+ return PTR_ERR(stats);
}
- if (nla[NFTA_CHAIN_POLICY])
- nft_base_chain(chain)->policy = policy;
+ nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN,
+ sizeof(struct nft_trans_chain));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ nft_trans_chain_stats(trans) = stats;
+ nft_trans_chain_update(trans) = true;
- if (nla[NFTA_CHAIN_HANDLE] && name)
- nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+ if (nla[NFTA_CHAIN_POLICY])
+ nft_trans_chain_policy(trans) = policy;
+ else
+ nft_trans_chain_policy(trans) = -1;
- goto notify;
+ if (nla[NFTA_CHAIN_HANDLE] && name) {
+ nla_strlcpy(nft_trans_chain_name(trans), name,
+ NFT_CHAIN_MAXNAMELEN);
+ }
+ list_add_tail(&trans->list, &net->nft.commit_list);
+ return 0;
}
if (table->use == UINT_MAX)
@@ -940,23 +1090,21 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
return -ENOMEM;
if (nla[NFTA_CHAIN_COUNTERS]) {
- err = nf_tables_counters(basechain,
- nla[NFTA_CHAIN_COUNTERS]);
- if (err < 0) {
+ stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
+ if (IS_ERR(stats)) {
module_put(type->owner);
kfree(basechain);
- return err;
+ return PTR_ERR(stats);
}
+ basechain->stats = stats;
} else {
- struct nft_stats __percpu *newstats;
-
- newstats = alloc_percpu(struct nft_stats);
- if (newstats == NULL) {
+ stats = netdev_alloc_pcpu_stats(struct nft_stats);
+ if (IS_ERR(stats)) {
module_put(type->owner);
kfree(basechain);
- return -ENOMEM;
+ return PTR_ERR(stats);
}
- rcu_assign_pointer(basechain->stats, newstats);
+ rcu_assign_pointer(basechain->stats, stats);
}
basechain->type = type;
@@ -993,31 +1141,27 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
if (!(table->flags & NFT_TABLE_F_DORMANT) &&
chain->flags & NFT_BASE_CHAIN) {
err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops);
- if (err < 0) {
- module_put(basechain->type->owner);
- free_percpu(basechain->stats);
- kfree(basechain);
- return err;
- }
+ if (err < 0)
+ goto err1;
}
- list_add_tail(&chain->list, &table->chains);
- table->use++;
-notify:
- nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN,
- family);
- return 0;
-}
-static void nf_tables_chain_destroy(struct nft_chain *chain)
-{
- BUG_ON(chain->use > 0);
+ nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN);
+ if (err < 0)
+ goto err2;
- if (chain->flags & NFT_BASE_CHAIN) {
- module_put(nft_base_chain(chain)->type->owner);
- free_percpu(nft_base_chain(chain)->stats);
- kfree(nft_base_chain(chain));
- } else
- kfree(chain);
+ table->use++;
+ list_add_tail_rcu(&chain->list, &table->chains);
+ return 0;
+err2:
+ if (!(table->flags & NFT_TABLE_F_DORMANT) &&
+ chain->flags & NFT_BASE_CHAIN) {
+ nf_unregister_hooks(nft_base_chain(chain)->ops,
+ afi->nops);
+ }
+err1:
+ nf_tables_chain_destroy(chain);
+ return err;
}
static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
@@ -1025,11 +1169,13 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
+ struct nft_ctx ctx;
+ int err;
afi = nf_tables_afinfo_lookup(net, family, false);
if (IS_ERR(afi))
@@ -1038,48 +1184,27 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
if (IS_ERR(chain))
return PTR_ERR(chain);
-
- if (!list_empty(&chain->rules) || chain->use > 0)
+ if (chain->flags & NFT_CHAIN_INACTIVE)
+ return -ENOENT;
+ if (chain->use > 0)
return -EBUSY;
- list_del(&chain->list);
- table->use--;
-
- if (!(table->flags & NFT_TABLE_F_DORMANT) &&
- chain->flags & NFT_BASE_CHAIN)
- nf_unregister_hooks(nft_base_chain(chain)->ops, afi->nops);
-
- nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN,
- family);
-
- /* Make sure all rule references are gone before this is released */
- synchronize_rcu();
+ nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ err = nft_trans_chain_add(&ctx, NFT_MSG_DELCHAIN);
+ if (err < 0)
+ return err;
- nf_tables_chain_destroy(chain);
+ table->use--;
+ list_del_rcu(&chain->list);
return 0;
}
-static void nft_ctx_init(struct nft_ctx *ctx,
- const struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nft_af_info *afi,
- const struct nft_table *table,
- const struct nft_chain *chain,
- const struct nlattr * const *nla)
-{
- ctx->net = sock_net(skb->sk);
- ctx->skb = skb;
- ctx->nlh = nlh;
- ctx->afi = afi;
- ctx->table = table;
- ctx->chain = chain;
- ctx->nla = nla;
-}
-
/*
* Expressions
*/
@@ -1094,7 +1219,10 @@ static void nft_ctx_init(struct nft_ctx *ctx,
int nft_register_expr(struct nft_expr_type *type)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_add_tail(&type->list, &nf_tables_expressions);
+ if (type->family == NFPROTO_UNSPEC)
+ list_add_tail_rcu(&type->list, &nf_tables_expressions);
+ else
+ list_add_rcu(&type->list, &nf_tables_expressions);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
return 0;
}
@@ -1109,7 +1237,7 @@ EXPORT_SYMBOL_GPL(nft_register_expr);
void nft_unregister_expr(struct nft_expr_type *type)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_del(&type->list);
+ list_del_rcu(&type->list);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_unregister_expr);
@@ -1254,10 +1382,11 @@ err1:
return err;
}
-static void nf_tables_expr_destroy(struct nft_expr *expr)
+static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
+ struct nft_expr *expr)
{
if (expr->ops->destroy)
- expr->ops->destroy(expr);
+ expr->ops->destroy(ctx, expr);
module_put(expr->ops->type->owner);
}
@@ -1296,6 +1425,8 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
[NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED },
[NFTA_RULE_COMPAT] = { .type = NLA_NESTED },
[NFTA_RULE_POSITION] = { .type = NLA_U64 },
+ [NFTA_RULE_USERDATA] = { .type = NLA_BINARY,
+ .len = NFT_USERDATA_MAXLEN },
};
static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
@@ -1348,6 +1479,10 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
}
nla_nest_end(skb, list);
+ if (rule->ulen &&
+ nla_put(skb, NFTA_RULE_USERDATA, rule->ulen, nft_userdata(rule)))
+ goto nla_put_failure;
+
return nlmsg_end(skb, nlh);
nla_put_failure:
@@ -1355,22 +1490,15 @@ nla_put_failure:
return -1;
}
-static int nf_tables_rule_notify(const struct sk_buff *oskb,
- const struct nlmsghdr *nlh,
- const struct nft_table *table,
- const struct nft_chain *chain,
+static int nf_tables_rule_notify(const struct nft_ctx *ctx,
const struct nft_rule *rule,
- int event, u32 flags, int family)
+ int event)
{
struct sk_buff *skb;
- u32 portid = NETLINK_CB(oskb).portid;
- struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
- u32 seq = nlh->nlmsg_seq;
- bool report;
int err;
- report = nlmsg_report(nlh);
- if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ if (!ctx->report &&
+ !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return 0;
err = -ENOBUFS;
@@ -1378,18 +1506,21 @@ static int nf_tables_rule_notify(const struct sk_buff *oskb,
if (skb == NULL)
goto err;
- err = nf_tables_fill_rule_info(skb, portid, seq, event, flags,
- family, table, chain, rule);
+ err = nf_tables_fill_rule_info(skb, ctx->portid, ctx->seq, event, 0,
+ ctx->afi->family, ctx->table,
+ ctx->chain, rule);
if (err < 0) {
kfree_skb(skb);
goto err;
}
- err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
- GFP_KERNEL);
+ err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ ctx->report, GFP_KERNEL);
err:
- if (err < 0)
- nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ if (err < 0) {
+ nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ err);
+ }
return err;
}
@@ -1439,16 +1570,17 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
unsigned int idx = 0, s_idx = cb->args[0];
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
- u8 genctr = ACCESS_ONCE(net->nft.genctr);
- u8 gencursor = ACCESS_ONCE(net->nft.gencursor);
- list_for_each_entry(afi, &net->nft.af_info, list) {
+ rcu_read_lock();
+ cb->seq = net->nft.base_seq;
+
+ list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
if (family != NFPROTO_UNSPEC && family != afi->family)
continue;
- list_for_each_entry(table, &afi->tables, list) {
- list_for_each_entry(chain, &table->chains, list) {
- list_for_each_entry(rule, &chain->rules, list) {
+ list_for_each_entry_rcu(table, &afi->tables, list) {
+ list_for_each_entry_rcu(chain, &table->chains, list) {
+ list_for_each_entry_rcu(rule, &chain->rules, list) {
if (!nft_rule_is_active(net, rule))
goto cont;
if (idx < s_idx)
@@ -1462,6 +1594,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
NLM_F_MULTI | NLM_F_APPEND,
afi->family, table, chain, rule) < 0)
goto done;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
@@ -1469,9 +1603,7 @@ cont:
}
}
done:
- /* Invalidate this dump, a transition to the new generation happened */
- if (gencursor != net->nft.gencursor || genctr != net->nft.genctr)
- return -EBUSY;
+ rcu_read_unlock();
cb->args[0] = idx;
return skb->len;
@@ -1505,10 +1637,14 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
if (IS_ERR(chain))
return PTR_ERR(chain);
+ if (chain->flags & NFT_CHAIN_INACTIVE)
+ return -ENOENT;
rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
if (IS_ERR(rule))
@@ -1531,7 +1667,8 @@ err:
return err;
}
-static void nf_tables_rule_destroy(struct nft_rule *rule)
+static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
+ struct nft_rule *rule)
{
struct nft_expr *expr;
@@ -1541,50 +1678,46 @@ static void nf_tables_rule_destroy(struct nft_rule *rule)
*/
expr = nft_expr_first(rule);
while (expr->ops && expr != nft_expr_last(rule)) {
- nf_tables_expr_destroy(expr);
+ nf_tables_expr_destroy(ctx, expr);
expr = nft_expr_next(expr);
}
kfree(rule);
}
-#define NFT_RULE_MAXEXPRS 128
-
-static struct nft_expr_info *info;
-
-static struct nft_rule_trans *
-nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx)
+static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
+ struct nft_rule *rule)
{
- struct nft_rule_trans *rupd;
+ struct nft_trans *trans;
- rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL);
- if (rupd == NULL)
- return NULL;
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_rule));
+ if (trans == NULL)
+ return NULL;
- rupd->chain = ctx->chain;
- rupd->table = ctx->table;
- rupd->rule = rule;
- rupd->family = ctx->afi->family;
- rupd->nlh = ctx->nlh;
- list_add_tail(&rupd->list, &ctx->net->nft.commit_list);
+ nft_trans_rule(trans) = rule;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
- return rupd;
+ return trans;
}
+#define NFT_RULE_MAXEXPRS 128
+
+static struct nft_expr_info *info;
+
static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct net *net = sock_net(skb->sk);
struct nft_table *table;
struct nft_chain *chain;
struct nft_rule *rule, *old_rule = NULL;
- struct nft_rule_trans *repl = NULL;
+ struct nft_trans *trans = NULL;
struct nft_expr *expr;
struct nft_ctx ctx;
struct nlattr *tmp;
- unsigned int size, i, n;
+ unsigned int size, i, n, ulen = 0;
int err, rem;
bool create;
u64 handle, pos_handle;
@@ -1619,6 +1752,9 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
if (!create || nlh->nlmsg_flags & NLM_F_REPLACE)
return -EINVAL;
handle = nf_tables_alloc_handle(table);
+
+ if (chain->use == UINT_MAX)
+ return -EOVERFLOW;
}
if (nla[NFTA_RULE_POSITION]) {
@@ -1650,8 +1786,11 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
}
}
+ if (nla[NFTA_RULE_USERDATA])
+ ulen = nla_len(nla[NFTA_RULE_USERDATA]);
+
err = -ENOMEM;
- rule = kzalloc(sizeof(*rule) + size, GFP_KERNEL);
+ rule = kzalloc(sizeof(*rule) + size + ulen, GFP_KERNEL);
if (rule == NULL)
goto err1;
@@ -1659,6 +1798,10 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
rule->handle = handle;
rule->dlen = size;
+ rule->ulen = ulen;
+
+ if (ulen)
+ nla_memcpy(nft_userdata(rule), nla[NFTA_RULE_USERDATA], ulen);
expr = nft_expr_first(rule);
for (i = 0; i < n; i++) {
@@ -1671,13 +1814,15 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
if (nlh->nlmsg_flags & NLM_F_REPLACE) {
if (nft_rule_is_active_next(net, old_rule)) {
- repl = nf_tables_trans_add(old_rule, &ctx);
- if (repl == NULL) {
+ trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE,
+ old_rule);
+ if (trans == NULL) {
err = -ENOMEM;
goto err2;
}
nft_rule_disactivate_next(net, old_rule);
- list_add_tail(&rule->list, &old_rule->list);
+ chain->use--;
+ list_add_tail_rcu(&rule->list, &old_rule->list);
} else {
err = -ENOENT;
goto err2;
@@ -1694,22 +1839,23 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
list_add_rcu(&rule->list, &chain->rules);
}
- if (nf_tables_trans_add(rule, &ctx) == NULL) {
+ if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
err = -ENOMEM;
goto err3;
}
+ chain->use++;
return 0;
err3:
list_del_rcu(&rule->list);
- if (repl) {
- list_del_rcu(&repl->rule->list);
- list_del(&repl->list);
- nft_rule_clear(net, repl->rule);
- kfree(repl);
+ if (trans) {
+ list_del_rcu(&nft_trans_rule(trans)->list);
+ nft_rule_clear(net, nft_trans_rule(trans));
+ nft_trans_destroy(trans);
+ chain->use++;
}
err2:
- nf_tables_rule_destroy(rule);
+ nf_tables_rule_destroy(&ctx, rule);
err1:
for (i = 0; i < n; i++) {
if (info[i].ops != NULL)
@@ -1723,9 +1869,10 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule)
{
/* You cannot delete the same rule twice */
if (nft_rule_is_active_next(ctx->net, rule)) {
- if (nf_tables_trans_add(rule, ctx) == NULL)
+ if (nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule) == NULL)
return -ENOMEM;
nft_rule_disactivate_next(ctx->net, rule);
+ ctx->chain->use--;
return 0;
}
return -ENOENT;
@@ -1749,9 +1896,9 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct net *net = sock_net(skb->sk);
- const struct nft_table *table;
+ struct nft_table *table;
struct nft_chain *chain = NULL;
struct nft_rule *rule;
int family = nfmsg->nfgen_family, err = 0;
@@ -1764,6 +1911,8 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
if (nla[NFTA_RULE_CHAIN]) {
chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
@@ -1796,87 +1945,6 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
return err;
}
-static int nf_tables_commit(struct sk_buff *skb)
-{
- struct net *net = sock_net(skb->sk);
- struct nft_rule_trans *rupd, *tmp;
-
- /* Bump generation counter, invalidate any dump in progress */
- net->nft.genctr++;
-
- /* A new generation has just started */
- net->nft.gencursor = gencursor_next(net);
-
- /* Make sure all packets have left the previous generation before
- * purging old rules.
- */
- synchronize_rcu();
-
- list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- /* This rule was inactive in the past and just became active.
- * Clear the next bit of the genmask since its meaning has
- * changed, now it is the future.
- */
- if (nft_rule_is_active(net, rupd->rule)) {
- nft_rule_clear(net, rupd->rule);
- nf_tables_rule_notify(skb, rupd->nlh, rupd->table,
- rupd->chain, rupd->rule,
- NFT_MSG_NEWRULE, 0,
- rupd->family);
- list_del(&rupd->list);
- kfree(rupd);
- continue;
- }
-
- /* This rule is in the past, get rid of it */
- list_del_rcu(&rupd->rule->list);
- nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain,
- rupd->rule, NFT_MSG_DELRULE, 0,
- rupd->family);
- }
-
- /* Make sure we don't see any packet traversing old rules */
- synchronize_rcu();
-
- /* Now we can safely release unused old rules */
- list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- nf_tables_rule_destroy(rupd->rule);
- list_del(&rupd->list);
- kfree(rupd);
- }
-
- return 0;
-}
-
-static int nf_tables_abort(struct sk_buff *skb)
-{
- struct net *net = sock_net(skb->sk);
- struct nft_rule_trans *rupd, *tmp;
-
- list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- if (!nft_rule_is_active_next(net, rupd->rule)) {
- nft_rule_clear(net, rupd->rule);
- list_del(&rupd->list);
- kfree(rupd);
- continue;
- }
-
- /* This rule is inactive, get rid of it */
- list_del_rcu(&rupd->rule->list);
- }
-
- /* Make sure we don't see any packet accessing aborted rules */
- synchronize_rcu();
-
- list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- nf_tables_rule_destroy(rupd->rule);
- list_del(&rupd->list);
- kfree(rupd);
- }
-
- return 0;
-}
-
/*
* Sets
*/
@@ -1886,7 +1954,7 @@ static LIST_HEAD(nf_tables_set_ops);
int nft_register_set(struct nft_set_ops *ops)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_add_tail(&ops->list, &nf_tables_set_ops);
+ list_add_tail_rcu(&ops->list, &nf_tables_set_ops);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
return 0;
}
@@ -1895,14 +1963,23 @@ EXPORT_SYMBOL_GPL(nft_register_set);
void nft_unregister_set(struct nft_set_ops *ops)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_del(&ops->list);
+ list_del_rcu(&ops->list);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_unregister_set);
-static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const nla[])
+/*
+ * Select a set implementation based on the data characteristics and the
+ * given policy. The total memory use might not be known if no size is
+ * given, in that case the amount of memory per element is used.
+ */
+static const struct nft_set_ops *
+nft_select_set_ops(const struct nlattr * const nla[],
+ const struct nft_set_desc *desc,
+ enum nft_set_policies policy)
{
- const struct nft_set_ops *ops;
+ const struct nft_set_ops *ops, *bops;
+ struct nft_set_estimate est, best;
u32 features;
#ifdef CONFIG_MODULES
@@ -1920,26 +1997,64 @@ static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const
features &= NFT_SET_INTERVAL | NFT_SET_MAP;
}
- // FIXME: implement selection properly
+ bops = NULL;
+ best.size = ~0;
+ best.class = ~0;
+
list_for_each_entry(ops, &nf_tables_set_ops, list) {
if ((ops->features & features) != features)
continue;
+ if (!ops->estimate(desc, features, &est))
+ continue;
+
+ switch (policy) {
+ case NFT_SET_POL_PERFORMANCE:
+ if (est.class < best.class)
+ break;
+ if (est.class == best.class && est.size < best.size)
+ break;
+ continue;
+ case NFT_SET_POL_MEMORY:
+ if (est.size < best.size)
+ break;
+ if (est.size == best.size && est.class < best.class)
+ break;
+ continue;
+ default:
+ break;
+ }
+
if (!try_module_get(ops->owner))
continue;
- return ops;
+ if (bops != NULL)
+ module_put(bops->owner);
+
+ bops = ops;
+ best = est;
}
+ if (bops != NULL)
+ return bops;
+
return ERR_PTR(-EOPNOTSUPP);
}
static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
[NFTA_SET_TABLE] = { .type = NLA_STRING },
- [NFTA_SET_NAME] = { .type = NLA_STRING },
+ [NFTA_SET_NAME] = { .type = NLA_STRING,
+ .len = IFNAMSIZ - 1 },
[NFTA_SET_FLAGS] = { .type = NLA_U32 },
[NFTA_SET_KEY_TYPE] = { .type = NLA_U32 },
[NFTA_SET_KEY_LEN] = { .type = NLA_U32 },
[NFTA_SET_DATA_TYPE] = { .type = NLA_U32 },
[NFTA_SET_DATA_LEN] = { .type = NLA_U32 },
+ [NFTA_SET_POLICY] = { .type = NLA_U32 },
+ [NFTA_SET_DESC] = { .type = NLA_NESTED },
+ [NFTA_SET_ID] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
+ [NFTA_SET_DESC_SIZE] = { .type = NLA_U32 },
};
static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
@@ -1949,8 +2064,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
{
struct net *net = sock_net(skb->sk);
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nft_af_info *afi = NULL;
- const struct nft_table *table = NULL;
+ struct nft_af_info *afi = NULL;
+ struct nft_table *table = NULL;
if (nfmsg->nfgen_family != NFPROTO_UNSPEC) {
afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
@@ -1965,6 +2080,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
}
nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
@@ -1986,13 +2103,27 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
return ERR_PTR(-ENOENT);
}
+struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
+ const struct nlattr *nla)
+{
+ struct nft_trans *trans;
+ u32 id = ntohl(nla_get_be32(nla));
+
+ list_for_each_entry(trans, &net->nft.commit_list, list) {
+ if (trans->msg_type == NFT_MSG_NEWSET &&
+ id == nft_trans_set_id(trans))
+ return nft_trans_set(trans);
+ }
+ return ERR_PTR(-ENOENT);
+}
+
static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
const char *name)
{
const struct nft_set *i;
const char *p;
unsigned long *inuse;
- unsigned int n = 0;
+ unsigned int n = 0, min = 0;
p = strnchr(name, IFNAMSIZ, '%');
if (p != NULL) {
@@ -2002,23 +2133,28 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL);
if (inuse == NULL)
return -ENOMEM;
-
+cont:
list_for_each_entry(i, &ctx->table->sets, list) {
int tmp;
if (!sscanf(i->name, name, &tmp))
continue;
- if (tmp < 0 || tmp >= BITS_PER_BYTE * PAGE_SIZE)
+ if (tmp < min || tmp >= min + BITS_PER_BYTE * PAGE_SIZE)
continue;
- set_bit(tmp, inuse);
+ set_bit(tmp - min, inuse);
}
n = find_first_zero_bit(inuse, BITS_PER_BYTE * PAGE_SIZE);
+ if (n >= BITS_PER_BYTE * PAGE_SIZE) {
+ min += BITS_PER_BYTE * PAGE_SIZE;
+ memset(inuse, 0, PAGE_SIZE);
+ goto cont;
+ }
free_page((unsigned long)inuse);
}
- snprintf(set->name, sizeof(set->name), name, n);
+ snprintf(set->name, sizeof(set->name), name, min + n);
list_for_each_entry(i, &ctx->table->sets, list) {
if (!strcmp(set->name, i->name))
return -ENFILE;
@@ -2031,8 +2167,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
{
struct nfgenmsg *nfmsg;
struct nlmsghdr *nlh;
- u32 portid = NETLINK_CB(ctx->skb).portid;
- u32 seq = ctx->nlh->nlmsg_seq;
+ struct nlattr *desc;
+ u32 portid = ctx->portid;
+ u32 seq = ctx->seq;
event |= NFNL_SUBSYS_NFTABLES << 8;
nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
@@ -2064,6 +2201,14 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
goto nla_put_failure;
}
+ desc = nla_nest_start(skb, NFTA_SET_DESC);
+ if (desc == NULL)
+ goto nla_put_failure;
+ if (set->size &&
+ nla_put_be32(skb, NFTA_SET_DESC_SIZE, htonl(set->size)))
+ goto nla_put_failure;
+ nla_nest_end(skb, desc);
+
return nlmsg_end(skb, nlh);
nla_put_failure:
@@ -2073,19 +2218,18 @@ nla_put_failure:
static int nf_tables_set_notify(const struct nft_ctx *ctx,
const struct nft_set *set,
- int event)
+ int event, gfp_t gfp_flags)
{
struct sk_buff *skb;
- u32 portid = NETLINK_CB(ctx->skb).portid;
- bool report;
+ u32 portid = ctx->portid;
int err;
- report = nlmsg_report(ctx->nlh);
- if (!report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
+ if (!ctx->report &&
+ !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return 0;
err = -ENOBUFS;
- skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb = nlmsg_new(NLMSG_GOODSIZE, gfp_flags);
if (skb == NULL)
goto err;
@@ -2095,8 +2239,8 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx,
goto err;
}
- err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, report,
- GFP_KERNEL);
+ err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES,
+ ctx->report, gfp_flags);
err:
if (err < 0)
nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err);
@@ -2112,7 +2256,10 @@ static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb,
if (cb->args[1])
return skb->len;
- list_for_each_entry(set, &ctx->table->sets, list) {
+ rcu_read_lock();
+ cb->seq = ctx->net->nft.base_seq;
+
+ list_for_each_entry_rcu(set, &ctx->table->sets, list) {
if (idx < s_idx)
goto cont;
if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
@@ -2120,11 +2267,13 @@ static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb,
cb->args[0] = idx;
goto done;
}
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
cb->args[1] = 1;
done:
+ rcu_read_unlock();
return skb->len;
}
@@ -2138,7 +2287,10 @@ static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb,
if (cb->args[1])
return skb->len;
- list_for_each_entry(table, &ctx->afi->tables, list) {
+ rcu_read_lock();
+ cb->seq = ctx->net->nft.base_seq;
+
+ list_for_each_entry_rcu(table, &ctx->afi->tables, list) {
if (cur_table) {
if (cur_table != table)
continue;
@@ -2147,7 +2299,7 @@ static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb,
}
ctx->table = table;
idx = 0;
- list_for_each_entry(set, &ctx->table->sets, list) {
+ list_for_each_entry_rcu(set, &ctx->table->sets, list) {
if (idx < s_idx)
goto cont;
if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
@@ -2156,12 +2308,14 @@ static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb,
cb->args[2] = (unsigned long) table;
goto done;
}
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
}
cb->args[1] = 1;
done:
+ rcu_read_unlock();
return skb->len;
}
@@ -2170,7 +2324,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
{
const struct nft_set *set;
unsigned int idx, s_idx = cb->args[0];
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
struct net *net = sock_net(skb->sk);
int cur_family = cb->args[3];
@@ -2178,7 +2332,10 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
if (cb->args[1])
return skb->len;
- list_for_each_entry(afi, &net->nft.af_info, list) {
+ rcu_read_lock();
+ cb->seq = net->nft.base_seq;
+
+ list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
if (cur_family) {
if (afi->family != cur_family)
continue;
@@ -2186,7 +2343,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
cur_family = 0;
}
- list_for_each_entry(table, &afi->tables, list) {
+ list_for_each_entry_rcu(table, &afi->tables, list) {
if (cur_table) {
if (cur_table != table)
continue;
@@ -2197,7 +2354,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
ctx->table = table;
ctx->afi = afi;
idx = 0;
- list_for_each_entry(set, &ctx->table->sets, list) {
+ list_for_each_entry_rcu(set, &ctx->table->sets, list) {
if (idx < s_idx)
goto cont;
if (nf_tables_fill_set(skb, ctx, set,
@@ -2208,6 +2365,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
cb->args[3] = afi->family;
goto done;
}
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
@@ -2217,6 +2375,7 @@ cont:
}
cb->args[1] = 1;
done:
+ rcu_read_unlock();
return skb->len;
}
@@ -2247,6 +2406,8 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
return ret;
}
+#define NFT_SET_INACTIVE (1 << 15) /* Internal set flag */
+
static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -2276,6 +2437,8 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
if (IS_ERR(set))
return PTR_ERR(set);
+ if (set->flags & NFT_SET_INACTIVE)
+ return -ENOENT;
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (skb2 == NULL)
@@ -2292,13 +2455,50 @@ err:
return err;
}
+static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
+ struct nft_set_desc *desc,
+ const struct nlattr *nla)
+{
+ struct nlattr *da[NFTA_SET_DESC_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla, nft_set_desc_policy);
+ if (err < 0)
+ return err;
+
+ if (da[NFTA_SET_DESC_SIZE] != NULL)
+ desc->size = ntohl(nla_get_be32(da[NFTA_SET_DESC_SIZE]));
+
+ return 0;
+}
+
+static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
+ struct nft_set *set)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
+ nft_trans_set_id(trans) =
+ ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
+ set->flags |= NFT_SET_INACTIVE;
+ }
+ nft_trans_set(trans) = set;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+ return 0;
+}
+
static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nft_set_ops *ops;
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct net *net = sock_net(skb->sk);
struct nft_table *table;
struct nft_set *set;
@@ -2306,14 +2506,18 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
char name[IFNAMSIZ];
unsigned int size;
bool create;
- u32 ktype, klen, dlen, dtype, flags;
+ u32 ktype, dtype, flags, policy;
+ struct nft_set_desc desc;
int err;
if (nla[NFTA_SET_TABLE] == NULL ||
nla[NFTA_SET_NAME] == NULL ||
- nla[NFTA_SET_KEY_LEN] == NULL)
+ nla[NFTA_SET_KEY_LEN] == NULL ||
+ nla[NFTA_SET_ID] == NULL)
return -EINVAL;
+ memset(&desc, 0, sizeof(desc));
+
ktype = NFT_DATA_VALUE;
if (nla[NFTA_SET_KEY_TYPE] != NULL) {
ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
@@ -2321,8 +2525,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
return -EINVAL;
}
- klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
- if (klen == 0 || klen > FIELD_SIZEOF(struct nft_data, data))
+ desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
+ if (desc.klen == 0 || desc.klen > FIELD_SIZEOF(struct nft_data, data))
return -EINVAL;
flags = 0;
@@ -2334,7 +2538,6 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
}
dtype = 0;
- dlen = 0;
if (nla[NFTA_SET_DATA_TYPE] != NULL) {
if (!(flags & NFT_SET_MAP))
return -EINVAL;
@@ -2347,15 +2550,25 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
if (dtype != NFT_DATA_VERDICT) {
if (nla[NFTA_SET_DATA_LEN] == NULL)
return -EINVAL;
- dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
- if (dlen == 0 ||
- dlen > FIELD_SIZEOF(struct nft_data, data))
+ desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
+ if (desc.dlen == 0 ||
+ desc.dlen > FIELD_SIZEOF(struct nft_data, data))
return -EINVAL;
} else
- dlen = sizeof(struct nft_data);
+ desc.dlen = sizeof(struct nft_data);
} else if (flags & NFT_SET_MAP)
return -EINVAL;
+ policy = NFT_SET_POL_PERFORMANCE;
+ if (nla[NFTA_SET_POLICY] != NULL)
+ policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
+
+ if (nla[NFTA_SET_DESC] != NULL) {
+ err = nf_tables_set_desc_parse(&ctx, &desc, nla[NFTA_SET_DESC]);
+ if (err < 0)
+ return err;
+ }
+
create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
@@ -2386,7 +2599,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
if (!(nlh->nlmsg_flags & NLM_F_CREATE))
return -ENOENT;
- ops = nft_select_set_ops(nla);
+ ops = nft_select_set_ops(nla, &desc, policy);
if (IS_ERR(ops))
return PTR_ERR(ops);
@@ -2407,17 +2620,22 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
INIT_LIST_HEAD(&set->bindings);
set->ops = ops;
set->ktype = ktype;
- set->klen = klen;
+ set->klen = desc.klen;
set->dtype = dtype;
- set->dlen = dlen;
+ set->dlen = desc.dlen;
set->flags = flags;
+ set->size = desc.size;
- err = ops->init(set, nla);
+ err = ops->init(set, &desc, nla);
if (err < 0)
goto err2;
- list_add_tail(&set->list, &table->sets);
- nf_tables_set_notify(&ctx, set, NFT_MSG_NEWSET);
+ err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
+ if (err < 0)
+ goto err2;
+
+ list_add_tail_rcu(&set->list, &table->sets);
+ table->use++;
return 0;
err2:
@@ -2427,17 +2645,20 @@ err1:
return err;
}
-static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+static void nft_set_destroy(struct nft_set *set)
{
- list_del(&set->list);
- if (!(set->flags & NFT_SET_ANONYMOUS))
- nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
-
set->ops->destroy(set);
module_put(set->ops->owner);
kfree(set);
}
+static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ list_del_rcu(&set->list);
+ nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC);
+ nft_set_destroy(set);
+}
+
static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -2459,10 +2680,17 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
if (IS_ERR(set))
return PTR_ERR(set);
+ if (set->flags & NFT_SET_INACTIVE)
+ return -ENOENT;
if (!list_empty(&set->bindings))
return -EBUSY;
- nf_tables_set_destroy(&ctx, set);
+ err = nft_trans_set_add(&ctx, NFT_MSG_DELSET, set);
+ if (err < 0)
+ return err;
+
+ list_del_rcu(&set->list);
+ ctx.table->use--;
return 0;
}
@@ -2513,16 +2741,17 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
}
bind:
binding->chain = ctx->chain;
- list_add_tail(&binding->list, &set->bindings);
+ list_add_tail_rcu(&binding->list, &set->bindings);
return 0;
}
void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding)
{
- list_del(&binding->list);
+ list_del_rcu(&binding->list);
- if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
+ if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
+ !(set->flags & NFT_SET_INACTIVE))
nf_tables_set_destroy(ctx, set);
}
@@ -2540,16 +2769,18 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
[NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING },
[NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING },
[NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED },
+ [NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 },
};
static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ bool trans)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nft_af_info *afi;
- const struct nft_table *table;
+ struct nft_af_info *afi;
+ struct nft_table *table;
struct net *net = sock_net(skb->sk);
afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
@@ -2559,6 +2790,8 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (!trans && (table->flags & NFT_TABLE_INACTIVE))
+ return -ENOENT;
nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
return 0;
@@ -2632,13 +2865,16 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
if (err < 0)
return err;
- err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla);
+ err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla,
+ false);
if (err < 0)
return err;
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
if (IS_ERR(set))
return PTR_ERR(set);
+ if (set->flags & NFT_SET_INACTIVE)
+ return -ENOENT;
event = NFT_MSG_NEWSETELEM;
event |= NFNL_SUBSYS_NFTABLES << 8;
@@ -2651,7 +2887,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
goto nla_put_failure;
nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = NFPROTO_UNSPEC;
+ nfmsg->nfgen_family = ctx.afi->family;
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
@@ -2695,13 +2931,15 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb,
struct nft_ctx ctx;
int err;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+ err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
if (err < 0)
return err;
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
if (IS_ERR(set))
return PTR_ERR(set);
+ if (set->flags & NFT_SET_INACTIVE)
+ return -ENOENT;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
@@ -2712,7 +2950,98 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb,
return -EOPNOTSUPP;
}
-static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
+static int nf_tables_fill_setelem_info(struct sk_buff *skb,
+ const struct nft_ctx *ctx, u32 seq,
+ u32 portid, int event, u16 flags,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nfgenmsg *nfmsg;
+ struct nlmsghdr *nlh;
+ struct nlattr *nest;
+ int err;
+
+ event |= NFNL_SUBSYS_NFTABLES << 8;
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
+ flags);
+ if (nlh == NULL)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = ctx->afi->family;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
+ goto nla_put_failure;
+ if (nla_put_string(skb, NFTA_SET_NAME, set->name))
+ goto nla_put_failure;
+
+ nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS);
+ if (nest == NULL)
+ goto nla_put_failure;
+
+ err = nf_tables_fill_setelem(skb, set, elem);
+ if (err < 0)
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_trim(skb, nlh);
+ return -1;
+}
+
+static int nf_tables_setelem_notify(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem,
+ int event, u16 flags)
+{
+ struct net *net = ctx->net;
+ u32 portid = ctx->portid;
+ struct sk_buff *skb;
+ int err;
+
+ if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ return 0;
+
+ err = -ENOBUFS;
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto err;
+
+ err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags,
+ set, elem);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto err;
+ }
+
+ err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report,
+ GFP_KERNEL);
+err:
+ if (err < 0)
+ nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ return err;
+}
+
+static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
+ int msg_type,
+ struct nft_set *set)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_elem));
+ if (trans == NULL)
+ return NULL;
+
+ nft_trans_elem_set(trans) = set;
+ return trans;
+}
+
+static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
@@ -2720,8 +3049,12 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_elem elem;
struct nft_set_binding *binding;
enum nft_registers dreg;
+ struct nft_trans *trans;
int err;
+ if (set->size && set->nelems == set->size)
+ return -ENFILE;
+
err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
nft_set_elem_policy);
if (err < 0)
@@ -2774,7 +3107,7 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_ctx bind_ctx = {
.afi = ctx->afi,
.table = ctx->table,
- .chain = binding->chain,
+ .chain = (struct nft_chain *)binding->chain,
};
err = nft_validate_data_load(&bind_ctx, dreg,
@@ -2784,12 +3117,20 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
}
}
+ trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
+ if (trans == NULL)
+ goto err3;
+
err = set->ops->insert(set, &elem);
if (err < 0)
- goto err3;
+ goto err4;
+ nft_trans_elem(trans) = elem;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
+err4:
+ kfree(trans);
err3:
if (nla[NFTA_SET_ELEM_DATA] != NULL)
nft_data_uninit(&elem.data, d2.type);
@@ -2803,35 +3144,46 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
+ struct net *net = sock_net(skb->sk);
const struct nlattr *attr;
struct nft_set *set;
struct nft_ctx ctx;
- int rem, err;
+ int rem, err = 0;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+ err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, true);
if (err < 0)
return err;
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
- if (IS_ERR(set))
- return PTR_ERR(set);
+ if (IS_ERR(set)) {
+ if (nla[NFTA_SET_ELEM_LIST_SET_ID]) {
+ set = nf_tables_set_lookup_byid(net,
+ nla[NFTA_SET_ELEM_LIST_SET_ID]);
+ }
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+ }
+
if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
return -EBUSY;
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
err = nft_add_set_elem(&ctx, set, attr);
if (err < 0)
- return err;
+ break;
+
+ set->nelems++;
}
- return 0;
+ return err;
}
-static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set,
+static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
struct nft_data_desc desc;
struct nft_set_elem elem;
+ struct nft_trans *trans;
int err;
err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
@@ -2855,7 +3207,12 @@ static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
goto err2;
- set->ops->remove(set, &elem);
+ trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
+ if (trans == NULL)
+ goto err2;
+
+ nft_trans_elem(trans) = elem;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
nft_data_uninit(&elem.key, NFT_DATA_VALUE);
if (set->flags & NFT_SET_MAP)
@@ -2874,9 +3231,9 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
const struct nlattr *attr;
struct nft_set *set;
struct nft_ctx ctx;
- int rem, err;
+ int rem, err = 0;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+ err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
if (err < 0)
return err;
@@ -2889,14 +3246,16 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
err = nft_del_setelem(&ctx, set, attr);
if (err < 0)
- return err;
+ break;
+
+ set->nelems--;
}
- return 0;
+ return err;
}
static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
[NFT_MSG_NEWTABLE] = {
- .call = nf_tables_newtable,
+ .call_batch = nf_tables_newtable,
.attr_count = NFTA_TABLE_MAX,
.policy = nft_table_policy,
},
@@ -2906,12 +3265,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_table_policy,
},
[NFT_MSG_DELTABLE] = {
- .call = nf_tables_deltable,
+ .call_batch = nf_tables_deltable,
.attr_count = NFTA_TABLE_MAX,
.policy = nft_table_policy,
},
[NFT_MSG_NEWCHAIN] = {
- .call = nf_tables_newchain,
+ .call_batch = nf_tables_newchain,
.attr_count = NFTA_CHAIN_MAX,
.policy = nft_chain_policy,
},
@@ -2921,7 +3280,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_chain_policy,
},
[NFT_MSG_DELCHAIN] = {
- .call = nf_tables_delchain,
+ .call_batch = nf_tables_delchain,
.attr_count = NFTA_CHAIN_MAX,
.policy = nft_chain_policy,
},
@@ -2941,7 +3300,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_rule_policy,
},
[NFT_MSG_NEWSET] = {
- .call = nf_tables_newset,
+ .call_batch = nf_tables_newset,
.attr_count = NFTA_SET_MAX,
.policy = nft_set_policy,
},
@@ -2951,12 +3310,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_set_policy,
},
[NFT_MSG_DELSET] = {
- .call = nf_tables_delset,
+ .call_batch = nf_tables_delset,
.attr_count = NFTA_SET_MAX,
.policy = nft_set_policy,
},
[NFT_MSG_NEWSETELEM] = {
- .call = nf_tables_newsetelem,
+ .call_batch = nf_tables_newsetelem,
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
@@ -2966,12 +3325,282 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_set_elem_list_policy,
},
[NFT_MSG_DELSETELEM] = {
- .call = nf_tables_delsetelem,
+ .call_batch = nf_tables_delsetelem,
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
};
+static void nft_chain_commit_update(struct nft_trans *trans)
+{
+ struct nft_base_chain *basechain;
+
+ if (nft_trans_chain_name(trans)[0])
+ strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans));
+
+ if (!(trans->ctx.chain->flags & NFT_BASE_CHAIN))
+ return;
+
+ basechain = nft_base_chain(trans->ctx.chain);
+ nft_chain_stats_replace(basechain, nft_trans_chain_stats(trans));
+
+ switch (nft_trans_chain_policy(trans)) {
+ case NF_DROP:
+ case NF_ACCEPT:
+ basechain->policy = nft_trans_chain_policy(trans);
+ break;
+ }
+}
+
+/* Schedule objects for release via rcu to make sure no packets are accesing
+ * removed rules.
+ */
+static void nf_tables_commit_release_rcu(struct rcu_head *rt)
+{
+ struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head);
+
+ switch (trans->msg_type) {
+ case NFT_MSG_DELTABLE:
+ nf_tables_table_destroy(&trans->ctx);
+ break;
+ case NFT_MSG_DELCHAIN:
+ nf_tables_chain_destroy(trans->ctx.chain);
+ break;
+ case NFT_MSG_DELRULE:
+ nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+ break;
+ case NFT_MSG_DELSET:
+ nft_set_destroy(nft_trans_set(trans));
+ break;
+ }
+ kfree(trans);
+}
+
+static int nf_tables_commit(struct sk_buff *skb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nft_trans *trans, *next;
+ struct nft_set *set;
+
+ /* Bump generation counter, invalidate any dump in progress */
+ while (++net->nft.base_seq == 0);
+
+ /* A new generation has just started */
+ net->nft.gencursor = gencursor_next(net);
+
+ /* Make sure all packets have left the previous generation before
+ * purging old rules.
+ */
+ synchronize_rcu();
+
+ list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWTABLE:
+ if (nft_trans_table_update(trans)) {
+ if (!nft_trans_table_enable(trans)) {
+ nf_tables_table_disable(trans->ctx.afi,
+ trans->ctx.table);
+ trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
+ }
+ } else {
+ trans->ctx.table->flags &= ~NFT_TABLE_INACTIVE;
+ }
+ nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELTABLE:
+ nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE);
+ break;
+ case NFT_MSG_NEWCHAIN:
+ if (nft_trans_chain_update(trans))
+ nft_chain_commit_update(trans);
+ else
+ trans->ctx.chain->flags &= ~NFT_CHAIN_INACTIVE;
+
+ nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELCHAIN:
+ nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
+ if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) &&
+ trans->ctx.chain->flags & NFT_BASE_CHAIN) {
+ nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops,
+ trans->ctx.afi->nops);
+ }
+ break;
+ case NFT_MSG_NEWRULE:
+ nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
+ nf_tables_rule_notify(&trans->ctx,
+ nft_trans_rule(trans),
+ NFT_MSG_NEWRULE);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELRULE:
+ list_del_rcu(&nft_trans_rule(trans)->list);
+ nf_tables_rule_notify(&trans->ctx,
+ nft_trans_rule(trans),
+ NFT_MSG_DELRULE);
+ break;
+ case NFT_MSG_NEWSET:
+ nft_trans_set(trans)->flags &= ~NFT_SET_INACTIVE;
+ /* This avoids hitting -EBUSY when deleting the table
+ * from the transaction.
+ */
+ if (nft_trans_set(trans)->flags & NFT_SET_ANONYMOUS &&
+ !list_empty(&nft_trans_set(trans)->bindings))
+ trans->ctx.table->use--;
+
+ nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+ NFT_MSG_NEWSET, GFP_KERNEL);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELSET:
+ nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+ NFT_MSG_DELSET, GFP_KERNEL);
+ break;
+ case NFT_MSG_NEWSETELEM:
+ nf_tables_setelem_notify(&trans->ctx,
+ nft_trans_elem_set(trans),
+ &nft_trans_elem(trans),
+ NFT_MSG_NEWSETELEM, 0);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELSETELEM:
+ nf_tables_setelem_notify(&trans->ctx,
+ nft_trans_elem_set(trans),
+ &nft_trans_elem(trans),
+ NFT_MSG_DELSETELEM, 0);
+ set = nft_trans_elem_set(trans);
+ set->ops->get(set, &nft_trans_elem(trans));
+ set->ops->remove(set, &nft_trans_elem(trans));
+ nft_trans_destroy(trans);
+ break;
+ }
+ }
+
+ list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ list_del(&trans->list);
+ trans->ctx.nla = NULL;
+ call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu);
+ }
+
+ return 0;
+}
+
+/* Schedule objects for release via rcu to make sure no packets are accesing
+ * aborted rules.
+ */
+static void nf_tables_abort_release_rcu(struct rcu_head *rt)
+{
+ struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head);
+
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWTABLE:
+ nf_tables_table_destroy(&trans->ctx);
+ break;
+ case NFT_MSG_NEWCHAIN:
+ nf_tables_chain_destroy(trans->ctx.chain);
+ break;
+ case NFT_MSG_NEWRULE:
+ nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+ break;
+ case NFT_MSG_NEWSET:
+ nft_set_destroy(nft_trans_set(trans));
+ break;
+ }
+ kfree(trans);
+}
+
+static int nf_tables_abort(struct sk_buff *skb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nft_trans *trans, *next;
+ struct nft_set *set;
+
+ list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWTABLE:
+ if (nft_trans_table_update(trans)) {
+ if (nft_trans_table_enable(trans)) {
+ nf_tables_table_disable(trans->ctx.afi,
+ trans->ctx.table);
+ trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
+ }
+ nft_trans_destroy(trans);
+ } else {
+ list_del_rcu(&trans->ctx.table->list);
+ }
+ break;
+ case NFT_MSG_DELTABLE:
+ list_add_tail_rcu(&trans->ctx.table->list,
+ &trans->ctx.afi->tables);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_NEWCHAIN:
+ if (nft_trans_chain_update(trans)) {
+ if (nft_trans_chain_stats(trans))
+ free_percpu(nft_trans_chain_stats(trans));
+
+ nft_trans_destroy(trans);
+ } else {
+ trans->ctx.table->use--;
+ list_del_rcu(&trans->ctx.chain->list);
+ if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) &&
+ trans->ctx.chain->flags & NFT_BASE_CHAIN) {
+ nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops,
+ trans->ctx.afi->nops);
+ }
+ }
+ break;
+ case NFT_MSG_DELCHAIN:
+ trans->ctx.table->use++;
+ list_add_tail_rcu(&trans->ctx.chain->list,
+ &trans->ctx.table->chains);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_NEWRULE:
+ trans->ctx.chain->use--;
+ list_del_rcu(&nft_trans_rule(trans)->list);
+ break;
+ case NFT_MSG_DELRULE:
+ trans->ctx.chain->use++;
+ nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_NEWSET:
+ trans->ctx.table->use--;
+ list_del_rcu(&nft_trans_set(trans)->list);
+ break;
+ case NFT_MSG_DELSET:
+ trans->ctx.table->use++;
+ list_add_tail_rcu(&nft_trans_set(trans)->list,
+ &trans->ctx.table->sets);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_NEWSETELEM:
+ nft_trans_elem_set(trans)->nelems--;
+ set = nft_trans_elem_set(trans);
+ set->ops->get(set, &nft_trans_elem(trans));
+ set->ops->remove(set, &nft_trans_elem(trans));
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELSETELEM:
+ nft_trans_elem_set(trans)->nelems++;
+ nft_trans_destroy(trans);
+ break;
+ }
+ }
+
+ list_for_each_entry_safe_reverse(trans, next,
+ &net->nft.commit_list, list) {
+ list_del(&trans->list);
+ trans->ctx.nla = NULL;
+ call_rcu(&trans->rcu_head, nf_tables_abort_release_rcu);
+ }
+
+ return 0;
+}
+
static const struct nfnetlink_subsystem nf_tables_subsys = {
.name = "nf_tables",
.subsys_id = NFNL_SUBSYS_NFTABLES,
@@ -3175,9 +3804,16 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
switch (data->verdict) {
- case NF_ACCEPT:
- case NF_DROP:
- case NF_QUEUE:
+ default:
+ switch (data->verdict & NF_VERDICT_MASK) {
+ case NF_ACCEPT:
+ case NF_DROP:
+ case NF_QUEUE:
+ break;
+ default:
+ return -EINVAL;
+ }
+ /* fall through */
case NFT_CONTINUE:
case NFT_BREAK:
case NFT_RETURN:
@@ -3198,8 +3834,6 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
data->chain = chain;
desc->len = sizeof(data);
break;
- default:
- return -EINVAL;
}
desc->type = NFT_DATA_VERDICT;
@@ -3354,6 +3988,7 @@ static int nf_tables_init_net(struct net *net)
{
INIT_LIST_HEAD(&net->nft.af_info);
INIT_LIST_HEAD(&net->nft.commit_list);
+ net->nft.base_seq = 1;
return 0;
}
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 90998a6ff8b..3b90eb2b2c5 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -25,9 +25,8 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr,
struct nft_data data[NFT_REG_MAX + 1])
{
const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
- u32 mask;
+ u32 mask = nft_cmp_fast_mask(priv->len);
- mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - priv->len);
if ((data[priv->sreg].data[0] & mask) == priv->data)
return;
data[NFT_REG_VERDICT].verdict = NFT_BREAK;
@@ -67,20 +66,6 @@ struct nft_jumpstack {
int rulenum;
};
-static inline void
-nft_chain_stats(const struct nft_chain *this, const struct nft_pktinfo *pkt,
- struct nft_jumpstack *jumpstack, unsigned int stackptr)
-{
- struct nft_stats __percpu *stats;
- const struct nft_chain *chain = stackptr ? jumpstack[0].chain : this;
-
- rcu_read_lock_bh();
- stats = rcu_dereference(nft_base_chain(chain)->stats);
- __this_cpu_inc(stats->pkts);
- __this_cpu_add(stats->bytes, pkt->skb->len);
- rcu_read_unlock_bh();
-}
-
enum nft_trace {
NFT_TRACE_RULE,
NFT_TRACE_RETURN,
@@ -118,13 +103,14 @@ static void nft_trace_packet(const struct nft_pktinfo *pkt,
unsigned int
nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
{
- const struct nft_chain *chain = ops->priv;
+ const struct nft_chain *chain = ops->priv, *basechain = chain;
const struct nft_rule *rule;
const struct nft_expr *expr, *last;
struct nft_data data[NFT_REG_MAX + 1];
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
- int rulenum = 0;
+ struct nft_stats *stats;
+ int rulenum;
/*
* Cache cursor to avoid problems in case that the cursor is updated
* while traversing the ruleset.
@@ -132,6 +118,7 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor);
do_chain:
+ rulenum = 0;
rule = list_entry(&chain->rules, struct nft_rule, list);
next_rule:
data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
@@ -157,8 +144,10 @@ next_rule:
switch (data[NFT_REG_VERDICT].verdict) {
case NFT_BREAK:
data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
- /* fall through */
+ continue;
case NFT_CONTINUE:
+ if (unlikely(pkt->skb->nf_trace))
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
continue;
}
break;
@@ -184,37 +173,46 @@ next_rule:
jumpstack[stackptr].rule = rule;
jumpstack[stackptr].rulenum = rulenum;
stackptr++;
- /* fall through */
+ chain = data[NFT_REG_VERDICT].chain;
+ goto do_chain;
case NFT_GOTO:
+ if (unlikely(pkt->skb->nf_trace))
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+
chain = data[NFT_REG_VERDICT].chain;
goto do_chain;
case NFT_RETURN:
if (unlikely(pkt->skb->nf_trace))
nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
-
- /* fall through */
+ break;
case NFT_CONTINUE:
+ if (unlikely(pkt->skb->nf_trace && !(chain->flags & NFT_BASE_CHAIN)))
+ nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN);
break;
default:
WARN_ON(1);
}
if (stackptr > 0) {
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN);
-
stackptr--;
chain = jumpstack[stackptr].chain;
rule = jumpstack[stackptr].rule;
rulenum = jumpstack[stackptr].rulenum;
goto next_rule;
}
- nft_chain_stats(chain, pkt, jumpstack, stackptr);
if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_POLICY);
+ nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY);
+
+ rcu_read_lock_bh();
+ stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
+ u64_stats_update_begin(&stats->syncp);
+ stats->pkts++;
+ stats->bytes += pkt->skb->len;
+ u64_stats_update_end(&stats->syncp);
+ rcu_read_unlock_bh();
- return nft_base_chain(chain)->policy;
+ return nft_base_chain(basechain)->policy;
}
EXPORT_SYMBOL_GPL(nft_do_chain);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 046aa13b4fe..c138b8fbe28 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -61,6 +61,14 @@ void nfnl_unlock(__u8 subsys_id)
}
EXPORT_SYMBOL_GPL(nfnl_unlock);
+#ifdef CONFIG_PROVE_LOCKING
+int lockdep_nfnl_is_held(u8 subsys_id)
+{
+ return lockdep_is_held(&table[subsys_id].mutex);
+}
+EXPORT_SYMBOL_GPL(lockdep_nfnl_is_held);
+#endif
+
int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n)
{
nfnl_lock(n->subsys_id);
@@ -248,15 +256,15 @@ replay:
#endif
{
nfnl_unlock(subsys_id);
- kfree_skb(nskb);
- return netlink_ack(skb, nlh, -EOPNOTSUPP);
+ netlink_ack(skb, nlh, -EOPNOTSUPP);
+ return kfree_skb(nskb);
}
}
if (!ss->commit || !ss->abort) {
nfnl_unlock(subsys_id);
- kfree_skb(nskb);
- return netlink_ack(skb, nlh, -EOPNOTSUPP);
+ netlink_ack(skb, nlh, -EOPNOTSUPP);
+ return kfree_skb(skb);
}
while (skb->len >= nlmsg_total_size(0)) {
@@ -360,14 +368,13 @@ done:
static void nfnetlink_rcv(struct sk_buff *skb)
{
struct nlmsghdr *nlh = nlmsg_hdr(skb);
- struct net *net = sock_net(skb->sk);
int msglen;
if (nlh->nlmsg_len < NLMSG_HDRLEN ||
skb->len < nlh->nlmsg_len)
return;
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+ if (!netlink_net_capable(skb, CAP_NET_ADMIN)) {
netlink_ack(skb, nlh, -EPERM);
return;
}
@@ -392,19 +399,17 @@ static void nfnetlink_rcv(struct sk_buff *skb)
}
#ifdef CONFIG_MODULES
-static void nfnetlink_bind(int group)
+static int nfnetlink_bind(int group)
{
const struct nfnetlink_subsystem *ss;
int type = nfnl_group2type[group];
rcu_read_lock();
ss = nfnetlink_get_subsys(type);
- if (!ss) {
- rcu_read_unlock();
- request_module("nfnetlink-subsys-%d", type);
- return;
- }
rcu_read_unlock();
+ if (!ss)
+ request_module("nfnetlink-subsys-%d", type);
+ return 0;
}
#endif
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index c7b6d466a66..2baa125c2e8 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -32,18 +32,24 @@ static LIST_HEAD(nfnl_acct_list);
struct nf_acct {
atomic64_t pkts;
atomic64_t bytes;
+ unsigned long flags;
struct list_head head;
atomic_t refcnt;
char name[NFACCT_NAME_MAX];
struct rcu_head rcu_head;
+ char data[0];
};
+#define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES)
+
static int
nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
const struct nlmsghdr *nlh, const struct nlattr * const tb[])
{
struct nf_acct *nfacct, *matching = NULL;
char *acct_name;
+ unsigned int size = 0;
+ u32 flags = 0;
if (!tb[NFACCT_NAME])
return -EINVAL;
@@ -68,15 +74,38 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
/* reset counters if you request a replacement. */
atomic64_set(&matching->pkts, 0);
atomic64_set(&matching->bytes, 0);
+ smp_mb__before_atomic();
+ /* reset overquota flag if quota is enabled. */
+ if ((matching->flags & NFACCT_F_QUOTA))
+ clear_bit(NFACCT_F_OVERQUOTA, &matching->flags);
return 0;
}
return -EBUSY;
}
- nfacct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL);
+ if (tb[NFACCT_FLAGS]) {
+ flags = ntohl(nla_get_be32(tb[NFACCT_FLAGS]));
+ if (flags & ~NFACCT_F_QUOTA)
+ return -EOPNOTSUPP;
+ if ((flags & NFACCT_F_QUOTA) == NFACCT_F_QUOTA)
+ return -EINVAL;
+ if (flags & NFACCT_F_OVERQUOTA)
+ return -EINVAL;
+
+ size += sizeof(u64);
+ }
+
+ nfacct = kzalloc(sizeof(struct nf_acct) + size, GFP_KERNEL);
if (nfacct == NULL)
return -ENOMEM;
+ if (flags & NFACCT_F_QUOTA) {
+ u64 *quota = (u64 *)nfacct->data;
+
+ *quota = be64_to_cpu(nla_get_be64(tb[NFACCT_QUOTA]));
+ nfacct->flags = flags;
+ }
+
strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
if (tb[NFACCT_BYTES]) {
@@ -117,6 +146,9 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
if (type == NFNL_MSG_ACCT_GET_CTRZERO) {
pkts = atomic64_xchg(&acct->pkts, 0);
bytes = atomic64_xchg(&acct->bytes, 0);
+ smp_mb__before_atomic();
+ if (acct->flags & NFACCT_F_QUOTA)
+ clear_bit(NFACCT_F_OVERQUOTA, &acct->flags);
} else {
pkts = atomic64_read(&acct->pkts);
bytes = atomic64_read(&acct->bytes);
@@ -125,7 +157,13 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) ||
nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))))
goto nla_put_failure;
+ if (acct->flags & NFACCT_F_QUOTA) {
+ u64 *quota = (u64 *)acct->data;
+ if (nla_put_be32(skb, NFACCT_FLAGS, htonl(acct->flags)) ||
+ nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota)))
+ goto nla_put_failure;
+ }
nlmsg_end(skb, nlh);
return skb->len;
@@ -270,6 +308,8 @@ static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
[NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 },
[NFACCT_BYTES] = { .type = NLA_U64 },
[NFACCT_PKTS] = { .type = NLA_U64 },
+ [NFACCT_FLAGS] = { .type = NLA_U32 },
+ [NFACCT_QUOTA] = { .type = NLA_U64 },
};
static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
@@ -336,6 +376,50 @@ void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
}
EXPORT_SYMBOL_GPL(nfnl_acct_update);
+static void nfnl_overquota_report(struct nf_acct *nfacct)
+{
+ int ret;
+ struct sk_buff *skb;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (skb == NULL)
+ return;
+
+ ret = nfnl_acct_fill_info(skb, 0, 0, NFNL_MSG_ACCT_OVERQUOTA, 0,
+ nfacct);
+ if (ret <= 0) {
+ kfree_skb(skb);
+ return;
+ }
+ netlink_broadcast(init_net.nfnl, skb, 0, NFNLGRP_ACCT_QUOTA,
+ GFP_ATOMIC);
+}
+
+int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct)
+{
+ u64 now;
+ u64 *quota;
+ int ret = NFACCT_UNDERQUOTA;
+
+ /* no place here if we don't have a quota */
+ if (!(nfacct->flags & NFACCT_F_QUOTA))
+ return NFACCT_NO_QUOTA;
+
+ quota = (u64 *)nfacct->data;
+ now = (nfacct->flags & NFACCT_F_QUOTA_PKTS) ?
+ atomic64_read(&nfacct->pkts) : atomic64_read(&nfacct->bytes);
+
+ ret = now > *quota;
+
+ if (now >= *quota &&
+ !test_and_set_bit(NFACCT_F_OVERQUOTA, &nfacct->flags)) {
+ nfnl_overquota_report(nfacct);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_overquota);
+
static int __init nfnl_acct_init(void)
{
int ret;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index a155d19a225..d292c8d286e 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -28,8 +28,6 @@
#include <linux/proc_fs.h>
#include <linux/security.h>
#include <linux/list.h>
-#include <linux/jhash.h>
-#include <linux/random.h>
#include <linux/slab.h>
#include <net/sock.h>
#include <net/netfilter/nf_log.h>
@@ -75,7 +73,6 @@ struct nfulnl_instance {
};
#define INSTANCE_BUCKETS 16
-static unsigned int hash_init;
static int nfnl_log_net_id __read_mostly;
@@ -1067,11 +1064,6 @@ static int __init nfnetlink_log_init(void)
{
int status = -ENOMEM;
- /* it's not really all that important to have a random value, so
- * we can do this from the init function, even if there hasn't
- * been that much entropy yet */
- get_random_bytes(&hash_init, sizeof(hash_init));
-
netlink_register_notifier(&nfulnl_rtnl_notifier);
status = nfnetlink_subsys_register(&nfulnl_subsys);
if (status < 0) {
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 954925db414..e2b3f51c81f 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -128,7 +128,7 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx,
BUG_ON(err < 0);
desc.len *= BITS_PER_BYTE;
- mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - desc.len);
+ mask = nft_cmp_fast_mask(desc.len);
priv->data = data.data[0] & mask;
priv->len = desc.len;
return 0;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 82cb8236f8a..1840989092e 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -192,9 +192,18 @@ err:
}
static void
-nft_target_destroy(const struct nft_expr *expr)
+nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
struct xt_target *target = expr->ops->data;
+ void *info = nft_expr_priv(expr);
+ struct xt_tgdtor_param par;
+
+ par.net = ctx->net;
+ par.target = target;
+ par.targinfo = info;
+ par.family = ctx->afi->family;
+ if (par.target->destroy != NULL)
+ par.target->destroy(&par);
module_put(target->me);
}
@@ -379,9 +388,18 @@ err:
}
static void
-nft_match_destroy(const struct nft_expr *expr)
+nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
struct xt_match *match = expr->ops->data;
+ void *info = nft_expr_priv(expr);
+ struct xt_mtdtor_param par;
+
+ par.net = ctx->net;
+ par.match = match;
+ par.matchinfo = info;
+ par.family = ctx->afi->family;
+ if (par.match->destroy != NULL)
+ par.match->destroy(&par);
module_put(match->me);
}
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 46e27540383..cc560301624 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -19,15 +19,15 @@
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_labels.h>
struct nft_ct {
enum nft_ct_keys key:8;
enum ip_conntrack_dir dir:8;
- union{
+ union {
enum nft_registers dreg:8;
enum nft_registers sreg:8;
};
- uint8_t family;
};
static void nft_ct_get_eval(const struct nft_expr *expr,
@@ -97,6 +97,26 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
goto err;
strncpy((char *)dest->data, helper->name, sizeof(dest->data));
return;
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+ case NFT_CT_LABELS: {
+ struct nf_conn_labels *labels = nf_ct_labels_find(ct);
+ unsigned int size;
+
+ if (!labels) {
+ memset(dest->data, 0, sizeof(dest->data));
+ return;
+ }
+
+ BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > sizeof(dest->data));
+ size = labels->words * sizeof(long);
+
+ memcpy(dest->data, labels->bits, size);
+ if (size < sizeof(dest->data))
+ memset(((char *) dest->data) + size, 0,
+ sizeof(dest->data) - size);
+ return;
+ }
+#endif
}
tuple = &ct->tuplehash[priv->dir].tuple;
@@ -195,22 +215,14 @@ static void nft_ct_l3proto_module_put(uint8_t family)
nf_ct_l3proto_module_put(family);
}
-static int nft_ct_init_validate_get(const struct nft_expr *expr,
- const struct nlattr * const tb[])
+static int nft_ct_get_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_ct *priv = nft_expr_priv(expr);
+ int err;
- if (tb[NFTA_CT_DIRECTION] != NULL) {
- priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
- switch (priv->dir) {
- case IP_CT_DIR_ORIGINAL:
- case IP_CT_DIR_REPLY:
- break;
- default:
- return -EINVAL;
- }
- }
-
+ priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
switch (priv->key) {
case NFT_CT_STATE:
case NFT_CT_DIRECTION:
@@ -221,6 +233,9 @@ static int nft_ct_init_validate_get(const struct nft_expr *expr,
#ifdef CONFIG_NF_CONNTRACK_SECMARK
case NFT_CT_SECMARK:
#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+ case NFT_CT_LABELS:
+#endif
case NFT_CT_EXPIRATION:
case NFT_CT_HELPER:
if (tb[NFTA_CT_DIRECTION] != NULL)
@@ -239,69 +254,66 @@ static int nft_ct_init_validate_get(const struct nft_expr *expr,
return -EOPNOTSUPP;
}
- return 0;
-}
-
-static int nft_ct_init_validate_set(uint32_t key)
-{
- switch (key) {
- case NFT_CT_MARK:
- break;
- default:
- return -EOPNOTSUPP;
+ if (tb[NFTA_CT_DIRECTION] != NULL) {
+ priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
+ switch (priv->dir) {
+ case IP_CT_DIR_ORIGINAL:
+ case IP_CT_DIR_REPLY:
+ break;
+ default:
+ return -EINVAL;
+ }
}
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+
+ err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ if (err < 0)
+ return err;
+
+ err = nft_ct_l3proto_try_module_get(ctx->afi->family);
+ if (err < 0)
+ return err;
+
return 0;
}
-static int nft_ct_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+static int nft_ct_set_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_ct *priv = nft_expr_priv(expr);
int err;
priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
-
- if (tb[NFTA_CT_DREG]) {
- err = nft_ct_init_validate_get(expr, tb);
- if (err < 0)
- return err;
-
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
-
- err = nft_validate_data_load(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE);
- if (err < 0)
- return err;
- } else {
- err = nft_ct_init_validate_set(priv->key);
- if (err < 0)
- return err;
-
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG]));
- err = nft_validate_input_register(priv->sreg);
- if (err < 0)
- return err;
+ switch (priv->key) {
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ case NFT_CT_MARK:
+ break;
+#endif
+ default:
+ return -EOPNOTSUPP;
}
- err = nft_ct_l3proto_try_module_get(ctx->afi->family);
+ priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG]));
+ err = nft_validate_input_register(priv->sreg);
if (err < 0)
return err;
- priv->family = ctx->afi->family;
+ err = nft_ct_l3proto_try_module_get(ctx->afi->family);
+ if (err < 0)
+ return err;
return 0;
}
-static void nft_ct_destroy(const struct nft_expr *expr)
+static void nft_ct_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
{
- struct nft_ct *priv = nft_expr_priv(expr);
-
- nft_ct_l3proto_module_put(priv->family);
+ nft_ct_l3proto_module_put(ctx->afi->family);
}
static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -350,7 +362,7 @@ static const struct nft_expr_ops nft_ct_get_ops = {
.type = &nft_ct_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
.eval = nft_ct_get_eval,
- .init = nft_ct_init,
+ .init = nft_ct_get_init,
.destroy = nft_ct_destroy,
.dump = nft_ct_get_dump,
};
@@ -359,7 +371,7 @@ static const struct nft_expr_ops nft_ct_set_ops = {
.type = &nft_ct_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
.eval = nft_ct_set_eval,
- .init = nft_ct_init,
+ .init = nft_ct_set_init,
.destroy = nft_ct_destroy,
.dump = nft_ct_set_dump,
};
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 3d3f8fce10a..4080ed6a072 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -12,23 +12,36 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/list.h>
+#include <linux/log2.h>
#include <linux/jhash.h>
#include <linux/netlink.h>
+#include <linux/vmalloc.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
+#define NFT_HASH_MIN_SIZE 4UL
+
struct nft_hash {
- struct hlist_head *hash;
- unsigned int hsize;
+ struct nft_hash_table __rcu *tbl;
+};
+
+struct nft_hash_table {
+ unsigned int size;
+ struct nft_hash_elem __rcu *buckets[];
};
struct nft_hash_elem {
- struct hlist_node hnode;
- struct nft_data key;
- struct nft_data data[];
+ struct nft_hash_elem __rcu *next;
+ struct nft_data key;
+ struct nft_data data[];
};
+#define nft_hash_for_each_entry(i, head) \
+ for (i = nft_dereference(head); i != NULL; i = nft_dereference(i->next))
+#define nft_hash_for_each_entry_rcu(i, head) \
+ for (i = rcu_dereference(head); i != NULL; i = rcu_dereference(i->next))
+
static u32 nft_hash_rnd __read_mostly;
static bool nft_hash_rnd_initted __read_mostly;
@@ -38,7 +51,7 @@ static unsigned int nft_hash_data(const struct nft_data *data,
unsigned int h;
h = jhash(data->data, len, nft_hash_rnd);
- return ((u64)h * hsize) >> 32;
+ return h & (hsize - 1);
}
static bool nft_hash_lookup(const struct nft_set *set,
@@ -46,11 +59,12 @@ static bool nft_hash_lookup(const struct nft_set *set,
struct nft_data *data)
{
const struct nft_hash *priv = nft_set_priv(set);
+ const struct nft_hash_table *tbl = rcu_dereference(priv->tbl);
const struct nft_hash_elem *he;
unsigned int h;
- h = nft_hash_data(key, priv->hsize, set->klen);
- hlist_for_each_entry(he, &priv->hash[h], hnode) {
+ h = nft_hash_data(key, tbl->size, set->klen);
+ nft_hash_for_each_entry_rcu(he, tbl->buckets[h]) {
if (nft_data_cmp(&he->key, key, set->klen))
continue;
if (set->flags & NFT_SET_MAP)
@@ -60,19 +74,148 @@ static bool nft_hash_lookup(const struct nft_set *set,
return false;
}
-static void nft_hash_elem_destroy(const struct nft_set *set,
- struct nft_hash_elem *he)
+static void nft_hash_tbl_free(const struct nft_hash_table *tbl)
{
- nft_data_uninit(&he->key, NFT_DATA_VALUE);
- if (set->flags & NFT_SET_MAP)
- nft_data_uninit(he->data, set->dtype);
- kfree(he);
+ kvfree(tbl);
+}
+
+static unsigned int nft_hash_tbl_size(unsigned int nelem)
+{
+ return max(roundup_pow_of_two(nelem * 4 / 3), NFT_HASH_MIN_SIZE);
+}
+
+static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets)
+{
+ struct nft_hash_table *tbl;
+ size_t size;
+
+ size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
+ tbl = kzalloc(size, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN);
+ if (tbl == NULL)
+ tbl = vzalloc(size);
+ if (tbl == NULL)
+ return NULL;
+ tbl->size = nbuckets;
+
+ return tbl;
+}
+
+static void nft_hash_chain_unzip(const struct nft_set *set,
+ const struct nft_hash_table *ntbl,
+ struct nft_hash_table *tbl, unsigned int n)
+{
+ struct nft_hash_elem *he, *last, *next;
+ unsigned int h;
+
+ he = nft_dereference(tbl->buckets[n]);
+ if (he == NULL)
+ return;
+ h = nft_hash_data(&he->key, ntbl->size, set->klen);
+
+ /* Find last element of first chain hashing to bucket h */
+ last = he;
+ nft_hash_for_each_entry(he, he->next) {
+ if (nft_hash_data(&he->key, ntbl->size, set->klen) != h)
+ break;
+ last = he;
+ }
+
+ /* Unlink first chain from the old table */
+ RCU_INIT_POINTER(tbl->buckets[n], last->next);
+
+ /* If end of chain reached, done */
+ if (he == NULL)
+ return;
+
+ /* Find first element of second chain hashing to bucket h */
+ next = NULL;
+ nft_hash_for_each_entry(he, he->next) {
+ if (nft_hash_data(&he->key, ntbl->size, set->klen) != h)
+ continue;
+ next = he;
+ break;
+ }
+
+ /* Link the two chains */
+ RCU_INIT_POINTER(last->next, next);
+}
+
+static int nft_hash_tbl_expand(const struct nft_set *set, struct nft_hash *priv)
+{
+ struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl;
+ struct nft_hash_elem *he;
+ unsigned int i, h;
+ bool complete;
+
+ ntbl = nft_hash_tbl_alloc(tbl->size * 2);
+ if (ntbl == NULL)
+ return -ENOMEM;
+
+ /* Link new table's buckets to first element in the old table
+ * hashing to the new bucket.
+ */
+ for (i = 0; i < ntbl->size; i++) {
+ h = i < tbl->size ? i : i - tbl->size;
+ nft_hash_for_each_entry(he, tbl->buckets[h]) {
+ if (nft_hash_data(&he->key, ntbl->size, set->klen) != i)
+ continue;
+ RCU_INIT_POINTER(ntbl->buckets[i], he);
+ break;
+ }
+ }
+
+ /* Publish new table */
+ rcu_assign_pointer(priv->tbl, ntbl);
+
+ /* Unzip interleaved hash chains */
+ do {
+ /* Wait for readers to use new table/unzipped chains */
+ synchronize_rcu();
+
+ complete = true;
+ for (i = 0; i < tbl->size; i++) {
+ nft_hash_chain_unzip(set, ntbl, tbl, i);
+ if (tbl->buckets[i] != NULL)
+ complete = false;
+ }
+ } while (!complete);
+
+ nft_hash_tbl_free(tbl);
+ return 0;
+}
+
+static int nft_hash_tbl_shrink(const struct nft_set *set, struct nft_hash *priv)
+{
+ struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl;
+ struct nft_hash_elem __rcu **pprev;
+ unsigned int i;
+
+ ntbl = nft_hash_tbl_alloc(tbl->size / 2);
+ if (ntbl == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < ntbl->size; i++) {
+ ntbl->buckets[i] = tbl->buckets[i];
+
+ for (pprev = &ntbl->buckets[i]; *pprev != NULL;
+ pprev = &nft_dereference(*pprev)->next)
+ ;
+ RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]);
+ }
+
+ /* Publish new table */
+ rcu_assign_pointer(priv->tbl, ntbl);
+ synchronize_rcu();
+
+ nft_hash_tbl_free(tbl);
+ return 0;
}
static int nft_hash_insert(const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_table *tbl = nft_dereference(priv->tbl);
struct nft_hash_elem *he;
unsigned int size, h;
@@ -91,33 +234,64 @@ static int nft_hash_insert(const struct nft_set *set,
if (set->flags & NFT_SET_MAP)
nft_data_copy(he->data, &elem->data);
- h = nft_hash_data(&he->key, priv->hsize, set->klen);
- hlist_add_head_rcu(&he->hnode, &priv->hash[h]);
+ h = nft_hash_data(&he->key, tbl->size, set->klen);
+ RCU_INIT_POINTER(he->next, tbl->buckets[h]);
+ rcu_assign_pointer(tbl->buckets[h], he);
+
+ /* Expand table when exceeding 75% load */
+ if (set->nelems + 1 > tbl->size / 4 * 3)
+ nft_hash_tbl_expand(set, priv);
+
return 0;
}
+static void nft_hash_elem_destroy(const struct nft_set *set,
+ struct nft_hash_elem *he)
+{
+ nft_data_uninit(&he->key, NFT_DATA_VALUE);
+ if (set->flags & NFT_SET_MAP)
+ nft_data_uninit(he->data, set->dtype);
+ kfree(he);
+}
+
static void nft_hash_remove(const struct nft_set *set,
const struct nft_set_elem *elem)
{
- struct nft_hash_elem *he = elem->cookie;
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_table *tbl = nft_dereference(priv->tbl);
+ struct nft_hash_elem *he, __rcu **pprev;
+
+ pprev = elem->cookie;
+ he = nft_dereference((*pprev));
- hlist_del_rcu(&he->hnode);
+ RCU_INIT_POINTER(*pprev, he->next);
+ synchronize_rcu();
kfree(he);
+
+ /* Shrink table beneath 30% load */
+ if (set->nelems - 1 < tbl->size * 3 / 10 &&
+ tbl->size > NFT_HASH_MIN_SIZE)
+ nft_hash_tbl_shrink(set, priv);
}
static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
{
const struct nft_hash *priv = nft_set_priv(set);
+ const struct nft_hash_table *tbl = nft_dereference(priv->tbl);
+ struct nft_hash_elem __rcu * const *pprev;
struct nft_hash_elem *he;
unsigned int h;
- h = nft_hash_data(&elem->key, priv->hsize, set->klen);
- hlist_for_each_entry(he, &priv->hash[h], hnode) {
- if (nft_data_cmp(&he->key, &elem->key, set->klen))
+ h = nft_hash_data(&elem->key, tbl->size, set->klen);
+ pprev = &tbl->buckets[h];
+ nft_hash_for_each_entry(he, tbl->buckets[h]) {
+ if (nft_data_cmp(&he->key, &elem->key, set->klen)) {
+ pprev = &he->next;
continue;
+ }
- elem->cookie = he;
- elem->flags = 0;
+ elem->cookie = (void *)pprev;
+ elem->flags = 0;
if (set->flags & NFT_SET_MAP)
nft_data_copy(&elem->data, he->data);
return 0;
@@ -129,12 +303,13 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
struct nft_set_iter *iter)
{
const struct nft_hash *priv = nft_set_priv(set);
+ const struct nft_hash_table *tbl = nft_dereference(priv->tbl);
const struct nft_hash_elem *he;
struct nft_set_elem elem;
unsigned int i;
- for (i = 0; i < priv->hsize; i++) {
- hlist_for_each_entry(he, &priv->hash[i], hnode) {
+ for (i = 0; i < tbl->size; i++) {
+ nft_hash_for_each_entry(he, tbl->buckets[i]) {
if (iter->count < iter->skip)
goto cont;
@@ -158,50 +333,77 @@ static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
}
static int nft_hash_init(const struct nft_set *set,
+ const struct nft_set_desc *desc,
const struct nlattr * const tb[])
{
struct nft_hash *priv = nft_set_priv(set);
- unsigned int cnt, i;
+ struct nft_hash_table *tbl;
+ unsigned int size;
if (unlikely(!nft_hash_rnd_initted)) {
get_random_bytes(&nft_hash_rnd, 4);
nft_hash_rnd_initted = true;
}
- /* Aim for a load factor of 0.75 */
- // FIXME: temporarily broken until we have set descriptions
- cnt = 100;
- cnt = cnt * 4 / 3;
+ size = NFT_HASH_MIN_SIZE;
+ if (desc->size)
+ size = nft_hash_tbl_size(desc->size);
- priv->hash = kcalloc(cnt, sizeof(struct hlist_head), GFP_KERNEL);
- if (priv->hash == NULL)
+ tbl = nft_hash_tbl_alloc(size);
+ if (tbl == NULL)
return -ENOMEM;
- priv->hsize = cnt;
-
- for (i = 0; i < cnt; i++)
- INIT_HLIST_HEAD(&priv->hash[i]);
-
+ RCU_INIT_POINTER(priv->tbl, tbl);
return 0;
}
static void nft_hash_destroy(const struct nft_set *set)
{
const struct nft_hash *priv = nft_set_priv(set);
- const struct hlist_node *next;
- struct nft_hash_elem *elem;
+ const struct nft_hash_table *tbl = nft_dereference(priv->tbl);
+ struct nft_hash_elem *he, *next;
unsigned int i;
- for (i = 0; i < priv->hsize; i++) {
- hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) {
- hlist_del(&elem->hnode);
- nft_hash_elem_destroy(set, elem);
+ for (i = 0; i < tbl->size; i++) {
+ for (he = nft_dereference(tbl->buckets[i]); he != NULL;
+ he = next) {
+ next = nft_dereference(he->next);
+ nft_hash_elem_destroy(set, he);
}
}
- kfree(priv->hash);
+ kfree(tbl);
+}
+
+static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
+ struct nft_set_estimate *est)
+{
+ unsigned int esize;
+
+ esize = sizeof(struct nft_hash_elem);
+ if (features & NFT_SET_MAP)
+ esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]);
+
+ if (desc->size) {
+ est->size = sizeof(struct nft_hash) +
+ nft_hash_tbl_size(desc->size) *
+ sizeof(struct nft_hash_elem *) +
+ desc->size * esize;
+ } else {
+ /* Resizing happens when the load drops below 30% or goes
+ * above 75%. The average of 52.5% load (approximated by 50%)
+ * is used for the size estimation of the hash buckets,
+ * meaning we calculate two buckets per element.
+ */
+ est->size = esize + 2 * sizeof(struct nft_hash_elem *);
+ }
+
+ est->class = NFT_SET_CLASS_O_1;
+
+ return true;
}
static struct nft_set_ops nft_hash_ops __read_mostly = {
.privsize = nft_hash_privsize,
+ .estimate = nft_hash_estimate,
.init = nft_hash_init,
.destroy = nft_hash_destroy,
.get = nft_hash_get,
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index f169501f1ad..810385eb724 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -70,7 +70,8 @@ err1:
return err;
}
-static void nft_immediate_destroy(const struct nft_expr *expr)
+static void nft_immediate_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg));
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 26c5154e05f..10cfb156cdf 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -74,7 +74,8 @@ static int nft_log_init(const struct nft_ctx *ctx,
return 0;
}
-static void nft_log_destroy(const struct nft_expr *expr)
+static void nft_log_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
{
struct nft_log *priv = nft_expr_priv(expr);
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index bb4ef4cccb6..6404a726d17 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -56,8 +56,14 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
return -EINVAL;
set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]);
- if (IS_ERR(set))
- return PTR_ERR(set);
+ if (IS_ERR(set)) {
+ if (tb[NFTA_LOOKUP_SET_ID]) {
+ set = nf_tables_set_lookup_byid(ctx->net,
+ tb[NFTA_LOOKUP_SET_ID]);
+ }
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+ }
priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG]));
err = nft_validate_input_register(priv->sreg);
@@ -89,11 +95,12 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
return 0;
}
-static void nft_lookup_destroy(const struct nft_expr *expr)
+static void nft_lookup_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
{
struct nft_lookup *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(NULL, priv->set, &priv->binding);
+ nf_tables_unbind_set(ctx, priv->set, &priv->binding);
}
static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 425cf39af89..852b178c6ae 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -18,18 +18,11 @@
#include <net/sock.h>
#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_meta.h>
-struct nft_meta {
- enum nft_meta_keys key:8;
- union {
- enum nft_registers dreg:8;
- enum nft_registers sreg:8;
- };
-};
-
-static void nft_meta_get_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
- const struct nft_pktinfo *pkt)
+void nft_meta_get_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
{
const struct nft_meta *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
@@ -140,10 +133,11 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
err:
data[NFT_REG_VERDICT].verdict = NFT_BREAK;
}
+EXPORT_SYMBOL_GPL(nft_meta_get_eval);
-static void nft_meta_set_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
- const struct nft_pktinfo *pkt)
+void nft_meta_set_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
{
const struct nft_meta *meta = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
@@ -163,28 +157,24 @@ static void nft_meta_set_eval(const struct nft_expr *expr,
WARN_ON(1);
}
}
+EXPORT_SYMBOL_GPL(nft_meta_set_eval);
-static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
[NFTA_META_DREG] = { .type = NLA_U32 },
[NFTA_META_KEY] = { .type = NLA_U32 },
[NFTA_META_SREG] = { .type = NLA_U32 },
};
+EXPORT_SYMBOL_GPL(nft_meta_policy);
-static int nft_meta_init_validate_set(uint32_t key)
+int nft_meta_get_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
- switch (key) {
- case NFT_META_MARK:
- case NFT_META_PRIORITY:
- case NFT_META_NFTRACE:
- return 0;
- default:
- return -EOPNOTSUPP;
- }
-}
+ struct nft_meta *priv = nft_expr_priv(expr);
+ int err;
-static int nft_meta_init_validate_get(uint32_t key)
-{
- switch (key) {
+ priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+ switch (priv->key) {
case NFT_META_LEN:
case NFT_META_PROTOCOL:
case NFT_META_NFPROTO:
@@ -205,39 +195,41 @@ static int nft_meta_init_validate_get(uint32_t key)
#ifdef CONFIG_NETWORK_SECMARK
case NFT_META_SECMARK:
#endif
- return 0;
+ break;
default:
return -EOPNOTSUPP;
}
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+
+ err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ if (err < 0)
+ return err;
+
+ return 0;
}
+EXPORT_SYMBOL_GPL(nft_meta_get_init);
-static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
- const struct nlattr * const tb[])
+int nft_meta_set_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_meta *priv = nft_expr_priv(expr);
int err;
priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
-
- if (tb[NFTA_META_DREG]) {
- err = nft_meta_init_validate_get(priv->key);
- if (err < 0)
- return err;
-
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
-
- return nft_validate_data_load(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE);
+ switch (priv->key) {
+ case NFT_META_MARK:
+ case NFT_META_PRIORITY:
+ case NFT_META_NFTRACE:
+ break;
+ default:
+ return -EOPNOTSUPP;
}
- err = nft_meta_init_validate_set(priv->key);
- if (err < 0)
- return err;
-
priv->sreg = ntohl(nla_get_be32(tb[NFTA_META_SREG]));
err = nft_validate_input_register(priv->sreg);
if (err < 0)
@@ -245,9 +237,10 @@ static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return 0;
}
+EXPORT_SYMBOL_GPL(nft_meta_set_init);
-static int nft_meta_get_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
+int nft_meta_get_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
@@ -260,9 +253,10 @@ static int nft_meta_get_dump(struct sk_buff *skb,
nla_put_failure:
return -1;
}
+EXPORT_SYMBOL_GPL(nft_meta_get_dump);
-static int nft_meta_set_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
+int nft_meta_set_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
@@ -276,13 +270,14 @@ static int nft_meta_set_dump(struct sk_buff *skb,
nla_put_failure:
return -1;
}
+EXPORT_SYMBOL_GPL(nft_meta_set_dump);
static struct nft_expr_type nft_meta_type;
static const struct nft_expr_ops nft_meta_get_ops = {
.type = &nft_meta_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_get_eval,
- .init = nft_meta_init,
+ .init = nft_meta_get_init,
.dump = nft_meta_get_dump,
};
@@ -290,7 +285,7 @@ static const struct nft_expr_ops nft_meta_set_ops = {
.type = &nft_meta_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_set_eval,
- .init = nft_meta_init,
+ .init = nft_meta_set_init,
.dump = nft_meta_set_dump,
};
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index d3b1ffe2618..79ff58cd36d 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -31,8 +31,8 @@ struct nft_nat {
enum nft_registers sreg_addr_max:8;
enum nft_registers sreg_proto_min:8;
enum nft_registers sreg_proto_max:8;
- int family;
- enum nf_nat_manip_type type;
+ enum nf_nat_manip_type type:8;
+ u8 family;
};
static void nft_nat_eval(const struct nft_expr *expr,
@@ -88,6 +88,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_nat *priv = nft_expr_priv(expr);
+ u32 family;
int err;
if (tb[NFTA_NAT_TYPE] == NULL)
@@ -107,9 +108,12 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
if (tb[NFTA_NAT_FAMILY] == NULL)
return -EINVAL;
- priv->family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY]));
- if (priv->family != AF_INET && priv->family != AF_INET6)
- return -EINVAL;
+ family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY]));
+ if (family != AF_INET && family != AF_INET6)
+ return -EAFNOSUPPORT;
+ if (family != ctx->afi->family)
+ return -EOPNOTSUPP;
+ priv->family = family;
if (tb[NFTA_NAT_REG_ADDR_MIN]) {
priv->sreg_addr_min = ntohl(nla_get_be32(
@@ -171,12 +175,14 @@ static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr)
if (nla_put_be32(skb,
NFTA_NAT_REG_ADDR_MAX, htonl(priv->sreg_addr_max)))
goto nla_put_failure;
- if (nla_put_be32(skb,
- NFTA_NAT_REG_PROTO_MIN, htonl(priv->sreg_proto_min)))
- goto nla_put_failure;
- if (nla_put_be32(skb,
- NFTA_NAT_REG_PROTO_MAX, htonl(priv->sreg_proto_max)))
- goto nla_put_failure;
+ if (priv->sreg_proto_min) {
+ if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MIN,
+ htonl(priv->sreg_proto_min)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX,
+ htonl(priv->sreg_proto_max)))
+ goto nla_put_failure;
+ }
return 0;
nla_put_failure:
@@ -202,13 +208,7 @@ static struct nft_expr_type nft_nat_type __read_mostly = {
static int __init nft_nat_module_init(void)
{
- int err;
-
- err = nft_register_expr(&nft_nat_type);
- if (err < 0)
- return err;
-
- return 0;
+ return nft_register_expr(&nft_nat_type);
}
static void __exit nft_nat_module_exit(void)
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index e21d69d1350..e1836ff8819 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -18,6 +18,8 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
+static DEFINE_SPINLOCK(nft_rbtree_lock);
+
struct nft_rbtree {
struct rb_root root;
};
@@ -38,6 +40,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set,
const struct rb_node *parent = priv->root.rb_node;
int d;
+ spin_lock_bh(&nft_rbtree_lock);
while (parent != NULL) {
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
@@ -53,6 +56,8 @@ found:
goto out;
if (set->flags & NFT_SET_MAP)
nft_data_copy(data, rbe->data);
+
+ spin_unlock_bh(&nft_rbtree_lock);
return true;
}
}
@@ -62,6 +67,7 @@ found:
goto found;
}
out:
+ spin_unlock_bh(&nft_rbtree_lock);
return false;
}
@@ -124,9 +130,12 @@ static int nft_rbtree_insert(const struct nft_set *set,
!(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
nft_data_copy(rbe->data, &elem->data);
+ spin_lock_bh(&nft_rbtree_lock);
err = __nft_rbtree_insert(set, rbe);
if (err < 0)
kfree(rbe);
+
+ spin_unlock_bh(&nft_rbtree_lock);
return err;
}
@@ -136,7 +145,9 @@ static void nft_rbtree_remove(const struct nft_set *set,
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe = elem->cookie;
+ spin_lock_bh(&nft_rbtree_lock);
rb_erase(&rbe->node, &priv->root);
+ spin_unlock_bh(&nft_rbtree_lock);
kfree(rbe);
}
@@ -147,6 +158,7 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
struct nft_rbtree_elem *rbe;
int d;
+ spin_lock_bh(&nft_rbtree_lock);
while (parent != NULL) {
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
@@ -161,9 +173,11 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
!(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
nft_data_copy(&elem->data, rbe->data);
elem->flags = rbe->flags;
+ spin_unlock_bh(&nft_rbtree_lock);
return 0;
}
}
+ spin_unlock_bh(&nft_rbtree_lock);
return -ENOENT;
}
@@ -176,6 +190,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
struct nft_set_elem elem;
struct rb_node *node;
+ spin_lock_bh(&nft_rbtree_lock);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
if (iter->count < iter->skip)
goto cont;
@@ -188,11 +203,14 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
elem.flags = rbe->flags;
iter->err = iter->fn(ctx, set, iter, &elem);
- if (iter->err < 0)
+ if (iter->err < 0) {
+ spin_unlock_bh(&nft_rbtree_lock);
return;
+ }
cont:
iter->count++;
}
+ spin_unlock_bh(&nft_rbtree_lock);
}
static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
@@ -201,6 +219,7 @@ static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
}
static int nft_rbtree_init(const struct nft_set *set,
+ const struct nft_set_desc *desc,
const struct nlattr * const nla[])
{
struct nft_rbtree *priv = nft_set_priv(set);
@@ -215,15 +234,37 @@ static void nft_rbtree_destroy(const struct nft_set *set)
struct nft_rbtree_elem *rbe;
struct rb_node *node;
+ spin_lock_bh(&nft_rbtree_lock);
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
nft_rbtree_elem_destroy(set, rbe);
}
+ spin_unlock_bh(&nft_rbtree_lock);
+}
+
+static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
+ struct nft_set_estimate *est)
+{
+ unsigned int nsize;
+
+ nsize = sizeof(struct nft_rbtree_elem);
+ if (features & NFT_SET_MAP)
+ nsize += FIELD_SIZEOF(struct nft_rbtree_elem, data[0]);
+
+ if (desc->size)
+ est->size = sizeof(struct nft_rbtree) + desc->size * nsize;
+ else
+ est->size = nsize;
+
+ est->class = NFT_SET_CLASS_O_LOG_N;
+
+ return true;
}
static struct nft_set_ops nft_rbtree_ops __read_mostly = {
.privsize = nft_rbtree_privsize,
+ .estimate = nft_rbtree_estimate,
.init = nft_rbtree_init,
.destroy = nft_rbtree_destroy,
.insert = nft_rbtree_insert,
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
index 3228d7f24eb..4973cbddc44 100644
--- a/net/netfilter/xt_AUDIT.c
+++ b/net/netfilter/xt_AUDIT.c
@@ -146,11 +146,11 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
if (par->family == NFPROTO_BRIDGE) {
switch (eth_hdr(skb)->h_proto) {
- case __constant_htons(ETH_P_IP):
+ case htons(ETH_P_IP):
audit_ip4(ab, skb);
break;
- case __constant_htons(ETH_P_IPV6):
+ case htons(ETH_P_IPV6):
audit_ip6(ab, skb);
break;
}
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 12d4da8e6c7..bbffdbdaf60 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -23,10 +23,11 @@ MODULE_ALIAS("ip6t_bpf");
static int bpf_mt_check(const struct xt_mtchk_param *par)
{
struct xt_bpf_info *info = par->matchinfo;
- struct sock_fprog program;
+ struct sock_fprog_kern program;
program.len = info->bpf_program_num_elem;
- program.filter = (struct sock_filter __user *) info->bpf_program;
+ program.filter = info->bpf_program;
+
if (sk_unattached_filter_create(&info->filter, &program)) {
pr_info("bpf: check failed: parse error\n");
return -EINVAL;
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index 9a8e77e7f8d..f4e83300532 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -54,7 +54,8 @@ static struct xt_match cgroup_mt_reg __read_mostly = {
.matchsize = sizeof(struct xt_cgroup_info),
.me = THIS_MODULE,
.hooks = (1 << NF_INET_LOCAL_OUT) |
- (1 << NF_INET_POST_ROUTING),
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_IN),
};
static int __init cgroup_mt_init(void)
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index c40b2695633..fbc66bb250d 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -19,6 +19,7 @@
#include <linux/jhash.h>
#include <linux/slab.h>
#include <linux/list.h>
+#include <linux/rbtree.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/skbuff.h>
@@ -31,6 +32,16 @@
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_zones.h>
+#define CONNLIMIT_SLOTS 256U
+
+#ifdef CONFIG_LOCKDEP
+#define CONNLIMIT_LOCK_SLOTS 8U
+#else
+#define CONNLIMIT_LOCK_SLOTS 256U
+#endif
+
+#define CONNLIMIT_GC_MAX_NODES 8
+
/* we will save the tuples of all connections we care about */
struct xt_connlimit_conn {
struct hlist_node node;
@@ -38,16 +49,27 @@ struct xt_connlimit_conn {
union nf_inet_addr addr;
};
+struct xt_connlimit_rb {
+ struct rb_node node;
+ struct hlist_head hhead; /* connections/hosts in same subnet */
+ union nf_inet_addr addr; /* search key */
+};
+
+static spinlock_t xt_connlimit_locks[CONNLIMIT_LOCK_SLOTS] __cacheline_aligned_in_smp;
+
struct xt_connlimit_data {
- struct hlist_head iphash[256];
- spinlock_t lock;
+ struct rb_root climit_root4[CONNLIMIT_SLOTS];
+ struct rb_root climit_root6[CONNLIMIT_SLOTS];
};
static u_int32_t connlimit_rnd __read_mostly;
+static struct kmem_cache *connlimit_rb_cachep __read_mostly;
+static struct kmem_cache *connlimit_conn_cachep __read_mostly;
static inline unsigned int connlimit_iphash(__be32 addr)
{
- return jhash_1word((__force __u32)addr, connlimit_rnd) & 0xFF;
+ return jhash_1word((__force __u32)addr,
+ connlimit_rnd) % CONNLIMIT_SLOTS;
}
static inline unsigned int
@@ -60,7 +82,8 @@ connlimit_iphash6(const union nf_inet_addr *addr,
for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i)
res.ip6[i] = addr->ip6[i] & mask->ip6[i];
- return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6), connlimit_rnd) & 0xFF;
+ return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6),
+ connlimit_rnd) % CONNLIMIT_SLOTS;
}
static inline bool already_closed(const struct nf_conn *conn)
@@ -72,13 +95,14 @@ static inline bool already_closed(const struct nf_conn *conn)
return 0;
}
-static inline unsigned int
+static int
same_source_net(const union nf_inet_addr *addr,
const union nf_inet_addr *mask,
const union nf_inet_addr *u3, u_int8_t family)
{
if (family == NFPROTO_IPV4) {
- return (addr->ip & mask->ip) == (u3->ip & mask->ip);
+ return ntohl(addr->ip & mask->ip) -
+ ntohl(u3->ip & mask->ip);
} else {
union nf_inet_addr lh, rh;
unsigned int i;
@@ -88,89 +112,205 @@ same_source_net(const union nf_inet_addr *addr,
rh.ip6[i] = u3->ip6[i] & mask->ip6[i];
}
- return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6)) == 0;
+ return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6));
}
}
-static int count_them(struct net *net,
- struct xt_connlimit_data *data,
+static bool add_hlist(struct hlist_head *head,
const struct nf_conntrack_tuple *tuple,
- const union nf_inet_addr *addr,
- const union nf_inet_addr *mask,
- u_int8_t family)
+ const union nf_inet_addr *addr)
+{
+ struct xt_connlimit_conn *conn;
+
+ conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC);
+ if (conn == NULL)
+ return false;
+ conn->tuple = *tuple;
+ conn->addr = *addr;
+ hlist_add_head(&conn->node, head);
+ return true;
+}
+
+static unsigned int check_hlist(struct net *net,
+ struct hlist_head *head,
+ const struct nf_conntrack_tuple *tuple,
+ bool *addit)
{
const struct nf_conntrack_tuple_hash *found;
struct xt_connlimit_conn *conn;
struct hlist_node *n;
struct nf_conn *found_ct;
- struct hlist_head *hash;
- bool addit = true;
- int matches = 0;
-
- if (family == NFPROTO_IPV6)
- hash = &data->iphash[connlimit_iphash6(addr, mask)];
- else
- hash = &data->iphash[connlimit_iphash(addr->ip & mask->ip)];
+ unsigned int length = 0;
+ *addit = true;
rcu_read_lock();
/* check the saved connections */
- hlist_for_each_entry_safe(conn, n, hash, node) {
+ hlist_for_each_entry_safe(conn, n, head, node) {
found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE,
&conn->tuple);
- found_ct = NULL;
+ if (found == NULL) {
+ hlist_del(&conn->node);
+ kmem_cache_free(connlimit_conn_cachep, conn);
+ continue;
+ }
- if (found != NULL)
- found_ct = nf_ct_tuplehash_to_ctrack(found);
+ found_ct = nf_ct_tuplehash_to_ctrack(found);
- if (found_ct != NULL &&
- nf_ct_tuple_equal(&conn->tuple, tuple) &&
- !already_closed(found_ct))
+ if (nf_ct_tuple_equal(&conn->tuple, tuple)) {
/*
* Just to be sure we have it only once in the list.
* We should not see tuples twice unless someone hooks
* this into a table without "-p tcp --syn".
*/
- addit = false;
-
- if (found == NULL) {
- /* this one is gone */
- hlist_del(&conn->node);
- kfree(conn);
- continue;
- }
-
- if (already_closed(found_ct)) {
+ *addit = false;
+ } else if (already_closed(found_ct)) {
/*
* we do not care about connections which are
* closed already -> ditch it
*/
nf_ct_put(found_ct);
hlist_del(&conn->node);
- kfree(conn);
+ kmem_cache_free(connlimit_conn_cachep, conn);
continue;
}
- if (same_source_net(addr, mask, &conn->addr, family))
- /* same source network -> be counted! */
- ++matches;
nf_ct_put(found_ct);
+ length++;
}
rcu_read_unlock();
- if (addit) {
- /* save the new connection in our list */
- conn = kmalloc(sizeof(*conn), GFP_ATOMIC);
- if (conn == NULL)
- return -ENOMEM;
- conn->tuple = *tuple;
- conn->addr = *addr;
- hlist_add_head(&conn->node, hash);
- ++matches;
+ return length;
+}
+
+static void tree_nodes_free(struct rb_root *root,
+ struct xt_connlimit_rb *gc_nodes[],
+ unsigned int gc_count)
+{
+ struct xt_connlimit_rb *rbconn;
+
+ while (gc_count) {
+ rbconn = gc_nodes[--gc_count];
+ rb_erase(&rbconn->node, root);
+ kmem_cache_free(connlimit_rb_cachep, rbconn);
+ }
+}
+
+static unsigned int
+count_tree(struct net *net, struct rb_root *root,
+ const struct nf_conntrack_tuple *tuple,
+ const union nf_inet_addr *addr, const union nf_inet_addr *mask,
+ u8 family)
+{
+ struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES];
+ struct rb_node **rbnode, *parent;
+ struct xt_connlimit_rb *rbconn;
+ struct xt_connlimit_conn *conn;
+ unsigned int gc_count;
+ bool no_gc = false;
+
+ restart:
+ gc_count = 0;
+ parent = NULL;
+ rbnode = &(root->rb_node);
+ while (*rbnode) {
+ int diff;
+ bool addit;
+
+ rbconn = container_of(*rbnode, struct xt_connlimit_rb, node);
+
+ parent = *rbnode;
+ diff = same_source_net(addr, mask, &rbconn->addr, family);
+ if (diff < 0) {
+ rbnode = &((*rbnode)->rb_left);
+ } else if (diff > 0) {
+ rbnode = &((*rbnode)->rb_right);
+ } else {
+ /* same source network -> be counted! */
+ unsigned int count;
+ count = check_hlist(net, &rbconn->hhead, tuple, &addit);
+
+ tree_nodes_free(root, gc_nodes, gc_count);
+ if (!addit)
+ return count;
+
+ if (!add_hlist(&rbconn->hhead, tuple, addr))
+ return 0; /* hotdrop */
+
+ return count + 1;
+ }
+
+ if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
+ continue;
+
+ /* only used for GC on hhead, retval and 'addit' ignored */
+ check_hlist(net, &rbconn->hhead, tuple, &addit);
+ if (hlist_empty(&rbconn->hhead))
+ gc_nodes[gc_count++] = rbconn;
+ }
+
+ if (gc_count) {
+ no_gc = true;
+ tree_nodes_free(root, gc_nodes, gc_count);
+ /* tree_node_free before new allocation permits
+ * allocator to re-use newly free'd object.
+ *
+ * This is a rare event; in most cases we will find
+ * existing node to re-use. (or gc_count is 0).
+ */
+ goto restart;
+ }
+
+ /* no match, need to insert new node */
+ rbconn = kmem_cache_alloc(connlimit_rb_cachep, GFP_ATOMIC);
+ if (rbconn == NULL)
+ return 0;
+
+ conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC);
+ if (conn == NULL) {
+ kmem_cache_free(connlimit_rb_cachep, rbconn);
+ return 0;
+ }
+
+ conn->tuple = *tuple;
+ conn->addr = *addr;
+ rbconn->addr = *addr;
+
+ INIT_HLIST_HEAD(&rbconn->hhead);
+ hlist_add_head(&conn->node, &rbconn->hhead);
+
+ rb_link_node(&rbconn->node, parent, rbnode);
+ rb_insert_color(&rbconn->node, root);
+ return 1;
+}
+
+static int count_them(struct net *net,
+ struct xt_connlimit_data *data,
+ const struct nf_conntrack_tuple *tuple,
+ const union nf_inet_addr *addr,
+ const union nf_inet_addr *mask,
+ u_int8_t family)
+{
+ struct rb_root *root;
+ int count;
+ u32 hash;
+
+ if (family == NFPROTO_IPV6) {
+ hash = connlimit_iphash6(addr, mask);
+ root = &data->climit_root6[hash];
+ } else {
+ hash = connlimit_iphash(addr->ip & mask->ip);
+ root = &data->climit_root4[hash];
}
- return matches;
+ spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
+
+ count = count_tree(net, root, tuple, addr, mask, family);
+
+ spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
+
+ return count;
}
static bool
@@ -183,7 +323,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
const struct nf_conntrack_tuple *tuple_ptr = &tuple;
enum ip_conntrack_info ctinfo;
const struct nf_conn *ct;
- int connections;
+ unsigned int connections;
ct = nf_ct_get(skb, &ctinfo);
if (ct != NULL)
@@ -202,12 +342,9 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
iph->daddr : iph->saddr;
}
- spin_lock_bh(&info->data->lock);
connections = count_them(net, info->data, tuple_ptr, &addr,
&info->mask, par->family);
- spin_unlock_bh(&info->data->lock);
-
- if (connections < 0)
+ if (connections == 0)
/* kmalloc failed, drop it entirely */
goto hotdrop;
@@ -247,29 +384,44 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
return -ENOMEM;
}
- spin_lock_init(&info->data->lock);
- for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i)
- INIT_HLIST_HEAD(&info->data->iphash[i]);
+ for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
+ info->data->climit_root4[i] = RB_ROOT;
+ for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
+ info->data->climit_root6[i] = RB_ROOT;
return 0;
}
-static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
+static void destroy_tree(struct rb_root *r)
{
- const struct xt_connlimit_info *info = par->matchinfo;
struct xt_connlimit_conn *conn;
+ struct xt_connlimit_rb *rbconn;
struct hlist_node *n;
- struct hlist_head *hash = info->data->iphash;
+ struct rb_node *node;
+
+ while ((node = rb_first(r)) != NULL) {
+ rbconn = container_of(node, struct xt_connlimit_rb, node);
+
+ rb_erase(node, r);
+
+ hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node)
+ kmem_cache_free(connlimit_conn_cachep, conn);
+
+ kmem_cache_free(connlimit_rb_cachep, rbconn);
+ }
+}
+
+static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
+{
+ const struct xt_connlimit_info *info = par->matchinfo;
unsigned int i;
nf_ct_l3proto_module_put(par->family);
- for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) {
- hlist_for_each_entry_safe(conn, n, &hash[i], node) {
- hlist_del(&conn->node);
- kfree(conn);
- }
- }
+ for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
+ destroy_tree(&info->data->climit_root4[i]);
+ for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
+ destroy_tree(&info->data->climit_root6[i]);
kfree(info->data);
}
@@ -287,12 +439,40 @@ static struct xt_match connlimit_mt_reg __read_mostly = {
static int __init connlimit_mt_init(void)
{
- return xt_register_match(&connlimit_mt_reg);
+ int ret, i;
+
+ BUILD_BUG_ON(CONNLIMIT_LOCK_SLOTS > CONNLIMIT_SLOTS);
+ BUILD_BUG_ON((CONNLIMIT_SLOTS % CONNLIMIT_LOCK_SLOTS) != 0);
+
+ for (i = 0; i < CONNLIMIT_LOCK_SLOTS; ++i)
+ spin_lock_init(&xt_connlimit_locks[i]);
+
+ connlimit_conn_cachep = kmem_cache_create("xt_connlimit_conn",
+ sizeof(struct xt_connlimit_conn),
+ 0, 0, NULL);
+ if (!connlimit_conn_cachep)
+ return -ENOMEM;
+
+ connlimit_rb_cachep = kmem_cache_create("xt_connlimit_rb",
+ sizeof(struct xt_connlimit_rb),
+ 0, 0, NULL);
+ if (!connlimit_rb_cachep) {
+ kmem_cache_destroy(connlimit_conn_cachep);
+ return -ENOMEM;
+ }
+ ret = xt_register_match(&connlimit_mt_reg);
+ if (ret != 0) {
+ kmem_cache_destroy(connlimit_conn_cachep);
+ kmem_cache_destroy(connlimit_rb_cachep);
+ }
+ return ret;
}
static void __exit connlimit_mt_exit(void)
{
xt_unregister_match(&connlimit_mt_reg);
+ kmem_cache_destroy(connlimit_conn_cachep);
+ kmem_cache_destroy(connlimit_rb_cachep);
}
module_init(connlimit_mt_init);
diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c
index a4c7561698c..89d53104c6b 100644
--- a/net/netfilter/xt_ipcomp.c
+++ b/net/netfilter/xt_ipcomp.c
@@ -60,7 +60,7 @@ static bool comp_mt(const struct sk_buff *skb, struct xt_action_param *par)
}
return spi_match(compinfo->spis[0], compinfo->spis[1],
- ntohl(chdr->cpi << 16),
+ ntohs(chdr->cpi),
!!(compinfo->invflags & XT_IPCOMP_INV_SPI));
}
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index b3be0ef21f1..8c646ed9c92 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -21,11 +21,14 @@ MODULE_ALIAS("ip6t_nfacct");
static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
+ int overquota;
const struct xt_nfacct_match_info *info = par->targinfo;
nfnl_acct_update(skb, info->nfacct);
- return true;
+ overquota = nfnl_acct_overquota(skb, info->nfacct);
+
+ return overquota == NFACCT_UNDERQUOTA ? false : true;
}
static int
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 7174611bd67..c529161cdbf 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -422,4 +422,6 @@ module_exit(xt_osf_fini);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
MODULE_DESCRIPTION("Passive OS fingerprint matching.");
+MODULE_ALIAS("ipt_osf");
+MODULE_ALIAS("ip6t_osf");
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 1e657cf715c..a9faae89f95 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -313,10 +313,7 @@ out:
static void recent_table_free(void *addr)
{
- if (is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
+ kvfree(addr);
}
static int recent_mt_check(const struct xt_mtchk_param *par,
diff --git a/net/netfilter/xt_repldata.h b/net/netfilter/xt_repldata.h
index 6efe4e5a81c..8fd324116e6 100644
--- a/net/netfilter/xt_repldata.h
+++ b/net/netfilter/xt_repldata.h
@@ -5,23 +5,35 @@
* they serve as the hanging-off data accessed through repl.data[].
*/
+/* tbl has the following structure equivalent, but is C99 compliant:
+ * struct {
+ * struct type##_replace repl;
+ * struct type##_standard entries[nhooks];
+ * struct type##_error term;
+ * } *tbl;
+ */
+
#define xt_alloc_initial_table(type, typ2) ({ \
unsigned int hook_mask = info->valid_hooks; \
unsigned int nhooks = hweight32(hook_mask); \
unsigned int bytes = 0, hooknum = 0, i = 0; \
struct { \
struct type##_replace repl; \
- struct type##_standard entries[nhooks]; \
- struct type##_error term; \
- } *tbl = kzalloc(sizeof(*tbl), GFP_KERNEL); \
+ struct type##_standard entries[]; \
+ } *tbl; \
+ struct type##_error *term; \
+ size_t term_offset = (offsetof(typeof(*tbl), entries[nhooks]) + \
+ __alignof__(*term) - 1) & ~(__alignof__(*term) - 1); \
+ tbl = kzalloc(term_offset + sizeof(*term), GFP_KERNEL); \
if (tbl == NULL) \
return NULL; \
+ term = (struct type##_error *)&(((char *)tbl)[term_offset]); \
strncpy(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \
- tbl->term = (struct type##_error)typ2##_ERROR_INIT; \
+ *term = (struct type##_error)typ2##_ERROR_INIT; \
tbl->repl.valid_hooks = hook_mask; \
tbl->repl.num_entries = nhooks + 1; \
tbl->repl.size = nhooks * sizeof(struct type##_standard) + \
- sizeof(struct type##_error); \
+ sizeof(struct type##_error); \
for (; hook_mask != 0; hook_mask >>= 1, ++hooknum) { \
if (!(hook_mask & 1)) \
continue; \
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 04748ab649c..e6fac7e3db5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -636,7 +636,7 @@ static unsigned int netlink_poll(struct file *file, struct socket *sock,
while (nlk->cb_running && netlink_dump_space(nlk)) {
err = netlink_dump(sk);
if (err < 0) {
- sk->sk_err = err;
+ sk->sk_err = -err;
sk->sk_error_report(sk);
break;
}
@@ -1206,7 +1206,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
struct module *module = NULL;
struct mutex *cb_mutex;
struct netlink_sock *nlk;
- void (*bind)(int group);
+ int (*bind)(int group);
+ void (*unbind)(int group);
int err = 0;
sock->state = SS_UNCONNECTED;
@@ -1232,6 +1233,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
err = -EPROTONOSUPPORT;
cb_mutex = nl_table[protocol].cb_mutex;
bind = nl_table[protocol].bind;
+ unbind = nl_table[protocol].unbind;
netlink_unlock_table();
if (err < 0)
@@ -1248,6 +1250,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
nlk = nlk_sk(sock->sk);
nlk->module = module;
nlk->netlink_bind = bind;
+ nlk->netlink_unbind = unbind;
out:
return err;
@@ -1301,6 +1304,7 @@ static int netlink_release(struct socket *sock)
kfree_rcu(old, rcu);
nl_table[sk->sk_protocol].module = NULL;
nl_table[sk->sk_protocol].bind = NULL;
+ nl_table[sk->sk_protocol].unbind = NULL;
nl_table[sk->sk_protocol].flags = 0;
nl_table[sk->sk_protocol].registered = 0;
}
@@ -1360,7 +1364,74 @@ retry:
return err;
}
-static inline int netlink_capable(const struct socket *sock, unsigned int flag)
+/**
+ * __netlink_ns_capable - General netlink message capability test
+ * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace.
+ * @user_ns: The user namespace of the capability to use
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket we received the message
+ * from had when the netlink socket was created and the sender of the
+ * message has has the capability @cap in the user namespace @user_ns.
+ */
+bool __netlink_ns_capable(const struct netlink_skb_parms *nsp,
+ struct user_namespace *user_ns, int cap)
+{
+ return ((nsp->flags & NETLINK_SKB_DST) ||
+ file_ns_capable(nsp->sk->sk_socket->file, user_ns, cap)) &&
+ ns_capable(user_ns, cap);
+}
+EXPORT_SYMBOL(__netlink_ns_capable);
+
+/**
+ * netlink_ns_capable - General netlink message capability test
+ * @skb: socket buffer holding a netlink command from userspace
+ * @user_ns: The user namespace of the capability to use
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket we received the message
+ * from had when the netlink socket was created and the sender of the
+ * message has has the capability @cap in the user namespace @user_ns.
+ */
+bool netlink_ns_capable(const struct sk_buff *skb,
+ struct user_namespace *user_ns, int cap)
+{
+ return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap);
+}
+EXPORT_SYMBOL(netlink_ns_capable);
+
+/**
+ * netlink_capable - Netlink global message capability test
+ * @skb: socket buffer holding a netlink command from userspace
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket we received the message
+ * from had when the netlink socket was created and the sender of the
+ * message has has the capability @cap in all user namespaces.
+ */
+bool netlink_capable(const struct sk_buff *skb, int cap)
+{
+ return netlink_ns_capable(skb, &init_user_ns, cap);
+}
+EXPORT_SYMBOL(netlink_capable);
+
+/**
+ * netlink_net_capable - Netlink network namespace message capability test
+ * @skb: socket buffer holding a netlink command from userspace
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket we received the message
+ * from had when the netlink socket was created and the sender of the
+ * message has has the capability @cap over the network namespace of
+ * the socket we received the message from.
+ */
+bool netlink_net_capable(const struct sk_buff *skb, int cap)
+{
+ return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap);
+}
+EXPORT_SYMBOL(netlink_net_capable);
+
+static inline int netlink_allowed(const struct socket *sock, unsigned int flag)
{
return (nl_table[sock->sk->sk_protocol].flags & flag) ||
ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN);
@@ -1411,6 +1482,19 @@ static int netlink_realloc_groups(struct sock *sk)
return err;
}
+static void netlink_unbind(int group, long unsigned int groups,
+ struct netlink_sock *nlk)
+{
+ int undo;
+
+ if (!nlk->netlink_unbind)
+ return;
+
+ for (undo = 0; undo < group; undo++)
+ if (test_bit(group, &groups))
+ nlk->netlink_unbind(undo);
+}
+
static int netlink_bind(struct socket *sock, struct sockaddr *addr,
int addr_len)
{
@@ -1419,6 +1503,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
struct netlink_sock *nlk = nlk_sk(sk);
struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
int err;
+ long unsigned int groups = nladdr->nl_groups;
if (addr_len < sizeof(struct sockaddr_nl))
return -EINVAL;
@@ -1427,45 +1512,53 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
return -EINVAL;
/* Only superuser is allowed to listen multicasts */
- if (nladdr->nl_groups) {
- if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
+ if (groups) {
+ if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
return -EPERM;
err = netlink_realloc_groups(sk);
if (err)
return err;
}
- if (nlk->portid) {
+ if (nlk->portid)
if (nladdr->nl_pid != nlk->portid)
return -EINVAL;
- } else {
+
+ if (nlk->netlink_bind && groups) {
+ int group;
+
+ for (group = 0; group < nlk->ngroups; group++) {
+ if (!test_bit(group, &groups))
+ continue;
+ err = nlk->netlink_bind(group);
+ if (!err)
+ continue;
+ netlink_unbind(group, groups, nlk);
+ return err;
+ }
+ }
+
+ if (!nlk->portid) {
err = nladdr->nl_pid ?
netlink_insert(sk, net, nladdr->nl_pid) :
netlink_autobind(sock);
- if (err)
+ if (err) {
+ netlink_unbind(nlk->ngroups - 1, groups, nlk);
return err;
+ }
}
- if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
+ if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
return 0;
netlink_table_grab();
netlink_update_subscriptions(sk, nlk->subscriptions +
- hweight32(nladdr->nl_groups) -
+ hweight32(groups) -
hweight32(nlk->groups[0]));
- nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups;
+ nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups;
netlink_update_listeners(sk);
netlink_table_ungrab();
- if (nlk->netlink_bind && nlk->groups[0]) {
- int i;
-
- for (i=0; i<nlk->ngroups; i++) {
- if (test_bit(i, nlk->groups))
- nlk->netlink_bind(i);
- }
- }
-
return 0;
}
@@ -1490,7 +1583,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
return -EINVAL;
if ((nladdr->nl_groups || nladdr->nl_pid) &&
- !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
+ !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
return -EPERM;
if (!nlk->portid)
@@ -1653,7 +1746,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
else
#endif /* CONFIG_NETLINK_MMAP */
skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk, len);
+ sk->sk_data_ready(sk);
return len;
}
@@ -2096,20 +2189,24 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
break;
case NETLINK_ADD_MEMBERSHIP:
case NETLINK_DROP_MEMBERSHIP: {
- if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
+ if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
return -EPERM;
err = netlink_realloc_groups(sk);
if (err)
return err;
if (!val || val - 1 >= nlk->ngroups)
return -EINVAL;
+ if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) {
+ err = nlk->netlink_bind(val);
+ if (err)
+ return err;
+ }
netlink_table_grab();
netlink_update_socket_mc(nlk, val,
optname == NETLINK_ADD_MEMBERSHIP);
netlink_table_ungrab();
-
- if (nlk->netlink_bind)
- nlk->netlink_bind(val);
+ if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind)
+ nlk->netlink_unbind(val);
err = 0;
break;
@@ -2228,6 +2325,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
struct sk_buff *skb;
int err;
struct scm_cookie scm;
+ u32 netlink_skb_flags = 0;
if (msg->msg_flags&MSG_OOB)
return -EOPNOTSUPP;
@@ -2247,8 +2345,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
dst_group = ffs(addr->nl_groups);
err = -EPERM;
if ((dst_group || dst_portid) &&
- !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
+ !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
goto out;
+ netlink_skb_flags |= NETLINK_SKB_DST;
} else {
dst_portid = nlk->dst_portid;
dst_group = nlk->dst_group;
@@ -2278,6 +2377,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
NETLINK_CB(skb).portid = nlk->portid;
NETLINK_CB(skb).dst_group = dst_group;
NETLINK_CB(skb).creds = siocb->scm->creds;
+ NETLINK_CB(skb).flags = netlink_skb_flags;
err = -EFAULT;
if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
@@ -2343,6 +2443,11 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
}
#endif
+ /* Record the max length of recvmsg() calls for future allocations */
+ nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len);
+ nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len,
+ 16384);
+
copied = data_skb->len;
if (len < copied) {
msg->msg_flags |= MSG_TRUNC;
@@ -2378,7 +2483,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
ret = netlink_dump(sk);
if (ret) {
- sk->sk_err = ret;
+ sk->sk_err = -ret;
sk->sk_error_report(sk);
}
}
@@ -2389,7 +2494,7 @@ out:
return err ? : copied;
}
-static void netlink_data_ready(struct sock *sk, int len)
+static void netlink_data_ready(struct sock *sk)
{
BUG();
}
@@ -2549,7 +2654,7 @@ __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int fla
struct nlmsghdr *nlh;
int size = nlmsg_msg_size(len);
- nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size));
+ nlh = (struct nlmsghdr *)skb_put(skb, NLMSG_ALIGN(size));
nlh->nlmsg_type = type;
nlh->nlmsg_len = size;
nlh->nlmsg_flags = flags;
@@ -2587,7 +2692,27 @@ static int netlink_dump(struct sock *sk)
if (!netlink_rx_is_mmaped(sk) &&
atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
goto errout_skb;
- skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL);
+
+ /* NLMSG_GOODSIZE is small to avoid high order allocations being
+ * required, but it makes sense to _attempt_ a 16K bytes allocation
+ * to reduce number of system calls on dump operations, if user
+ * ever provided a big enough buffer.
+ */
+ if (alloc_size < nlk->max_recvmsg_len) {
+ skb = netlink_alloc_skb(sk,
+ nlk->max_recvmsg_len,
+ nlk->portid,
+ GFP_KERNEL |
+ __GFP_NOWARN |
+ __GFP_NORETRY);
+ /* available room should be exact amount to avoid MSG_TRUNC */
+ if (skb)
+ skb_reserve(skb, skb_tailroom(skb) -
+ nlk->max_recvmsg_len);
+ }
+ if (!skb)
+ skb = netlink_alloc_skb(sk, alloc_size, nlk->portid,
+ GFP_KERNEL);
if (!skb)
goto errout_skb;
netlink_skb_set_owner_r(skb, sk);
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index acbd774eeb7..0b59d441f5b 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -31,13 +31,15 @@ struct netlink_sock {
u32 ngroups;
unsigned long *groups;
unsigned long state;
+ size_t max_recvmsg_len;
wait_queue_head_t wait;
bool cb_running;
struct netlink_callback cb;
struct mutex *cb_mutex;
struct mutex cb_def_mutex;
void (*netlink_rcv)(struct sk_buff *skb);
- void (*netlink_bind)(int group);
+ int (*netlink_bind)(int group);
+ void (*netlink_unbind)(int group);
struct module *module;
#ifdef CONFIG_NETLINK_MMAP
struct mutex pg_vec_lock;
@@ -73,7 +75,8 @@ struct netlink_table {
unsigned int groups;
struct mutex *cb_mutex;
struct module *module;
- void (*bind)(int group);
+ int (*bind)(int group);
+ void (*unbind)(int group);
bool (*compare)(struct net *net, struct sock *sock);
int registered;
};
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index b1dcdb932a8..76393f2f4b2 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -317,7 +317,7 @@ static void genl_unregister_mc_groups(struct genl_family *family)
}
}
-static int genl_validate_ops(struct genl_family *family)
+static int genl_validate_ops(const struct genl_family *family)
{
const struct genl_ops *ops = family->ops;
unsigned int n_ops = family->n_ops;
@@ -337,10 +337,6 @@ static int genl_validate_ops(struct genl_family *family)
return -EINVAL;
}
- /* family is not registered yet, so no locking needed */
- family->ops = ops;
- family->n_ops = n_ops;
-
return 0;
}
@@ -561,7 +557,7 @@ static int genl_family_rcv_msg(struct genl_family *family,
return -EOPNOTSUPP;
if ((ops->flags & GENL_ADMIN_PERM) &&
- !capable(CAP_NET_ADMIN))
+ !netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index b74aa075552..ede50d197e1 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1011,7 +1011,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
skb_queue_head(&sk->sk_receive_queue, skb);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk, skb->len);
+ sk->sk_data_ready(sk);
bh_unlock_sock(sk);
diff --git a/net/nfc/core.c b/net/nfc/core.c
index ca1e65f4b13..819b87702b7 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -280,9 +280,6 @@ static struct nfc_target *nfc_find_target(struct nfc_dev *dev, u32 target_idx)
{
int i;
- if (dev->n_targets == 0)
- return NULL;
-
for (i = 0; i < dev->n_targets; i++) {
if (dev->targets[i].idx == target_idx)
return &dev->targets[i];
@@ -546,9 +543,9 @@ error:
struct nfc_se *nfc_find_se(struct nfc_dev *dev, u32 se_idx)
{
- struct nfc_se *se, *n;
+ struct nfc_se *se;
- list_for_each_entry_safe(se, n, &dev->secure_elements, list)
+ list_for_each_entry(se, &dev->secure_elements, list)
if (se->idx == se_idx)
return se;
@@ -655,9 +652,6 @@ int nfc_set_remote_general_bytes(struct nfc_dev *dev, u8 *gb, u8 gb_len)
{
pr_debug("dev_name=%s gb_len=%d\n", dev_name(&dev->dev), gb_len);
- if (gb_len > NFC_MAX_GT_LEN)
- return -EINVAL;
-
return nfc_llcp_set_remote_gb(dev, gb, gb_len);
}
EXPORT_SYMBOL(nfc_set_remote_general_bytes);
diff --git a/net/nfc/digital.h b/net/nfc/digital.h
index 08b29b55ea6..71ad7eefddd 100644
--- a/net/nfc/digital.h
+++ b/net/nfc/digital.h
@@ -71,7 +71,14 @@ static inline int digital_in_send_cmd(struct nfc_digital_dev *ddev,
void digital_poll_next_tech(struct nfc_digital_dev *ddev);
int digital_in_send_sens_req(struct nfc_digital_dev *ddev, u8 rf_tech);
+int digital_in_send_sensb_req(struct nfc_digital_dev *ddev, u8 rf_tech);
int digital_in_send_sensf_req(struct nfc_digital_dev *ddev, u8 rf_tech);
+int digital_in_send_iso15693_inv_req(struct nfc_digital_dev *ddev, u8 rf_tech);
+
+int digital_in_iso_dep_pull_sod(struct nfc_digital_dev *ddev,
+ struct sk_buff *skb);
+int digital_in_iso_dep_push_sod(struct nfc_digital_dev *ddev,
+ struct sk_buff *skb);
int digital_target_found(struct nfc_digital_dev *ddev,
struct nfc_target *target, u8 protocol);
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index c129d1571ca..a6ce3c627e4 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -22,9 +22,13 @@
#define DIGITAL_PROTO_NFCA_RF_TECH \
(NFC_PROTO_JEWEL_MASK | NFC_PROTO_MIFARE_MASK | NFC_PROTO_NFC_DEP_MASK)
+#define DIGITAL_PROTO_NFCB_RF_TECH NFC_PROTO_ISO14443_B_MASK
+
#define DIGITAL_PROTO_NFCF_RF_TECH \
(NFC_PROTO_FELICA_MASK | NFC_PROTO_NFC_DEP_MASK)
+#define DIGITAL_PROTO_ISO15693_RF_TECH NFC_PROTO_ISO15693_MASK
+
struct digital_cmd {
struct list_head queue;
@@ -331,6 +335,24 @@ int digital_target_found(struct nfc_digital_dev *ddev,
}
break;
+ case NFC_PROTO_ISO15693:
+ framing = NFC_DIGITAL_FRAMING_ISO15693_T5T;
+ check_crc = digital_skb_check_crc_b;
+ add_crc = digital_skb_add_crc_b;
+ break;
+
+ case NFC_PROTO_ISO14443:
+ framing = NFC_DIGITAL_FRAMING_NFCA_T4T;
+ check_crc = digital_skb_check_crc_a;
+ add_crc = digital_skb_add_crc_a;
+ break;
+
+ case NFC_PROTO_ISO14443_B:
+ framing = NFC_DIGITAL_FRAMING_NFCB_T4T;
+ check_crc = digital_skb_check_crc_b;
+ add_crc = digital_skb_add_crc_b;
+ break;
+
default:
pr_err("Invalid protocol %d\n", protocol);
return -EINVAL;
@@ -364,6 +386,8 @@ int digital_target_found(struct nfc_digital_dev *ddev,
void digital_poll_next_tech(struct nfc_digital_dev *ddev)
{
+ u8 rand_mod;
+
digital_switch_rf(ddev, 0);
mutex_lock(&ddev->poll_lock);
@@ -373,8 +397,8 @@ void digital_poll_next_tech(struct nfc_digital_dev *ddev)
return;
}
- ddev->poll_tech_index = (ddev->poll_tech_index + 1) %
- ddev->poll_tech_count;
+ get_random_bytes(&rand_mod, sizeof(rand_mod));
+ ddev->poll_tech_index = rand_mod % ddev->poll_tech_count;
mutex_unlock(&ddev->poll_lock);
@@ -461,7 +485,11 @@ static int digital_start_poll(struct nfc_dev *nfc_dev, __u32 im_protocols,
digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106A,
digital_in_send_sens_req);
- if (im_protocols & DIGITAL_PROTO_NFCF_RF_TECH) {
+ if (matching_im_protocols & DIGITAL_PROTO_NFCB_RF_TECH)
+ digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106B,
+ digital_in_send_sensb_req);
+
+ if (matching_im_protocols & DIGITAL_PROTO_NFCF_RF_TECH) {
digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_212F,
digital_in_send_sensf_req);
@@ -469,7 +497,11 @@ static int digital_start_poll(struct nfc_dev *nfc_dev, __u32 im_protocols,
digital_in_send_sensf_req);
}
- if (tm_protocols & NFC_PROTO_NFC_DEP_MASK) {
+ if (matching_im_protocols & DIGITAL_PROTO_ISO15693_RF_TECH)
+ digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_ISO15693,
+ digital_in_send_iso15693_inv_req);
+
+ if (matching_tm_protocols & NFC_PROTO_NFC_DEP_MASK) {
if (ddev->ops->tg_listen_mdaa) {
digital_add_poll_tech(ddev, 0,
digital_tg_listen_mdaa);
@@ -607,20 +639,31 @@ static void digital_in_send_complete(struct nfc_digital_dev *ddev, void *arg,
if (IS_ERR(resp)) {
rc = PTR_ERR(resp);
+ resp = NULL;
goto done;
}
- if (ddev->curr_protocol == NFC_PROTO_MIFARE)
+ if (ddev->curr_protocol == NFC_PROTO_MIFARE) {
rc = digital_in_recv_mifare_res(resp);
- else
- rc = ddev->skb_check_crc(resp);
+ /* crc check is done in digital_in_recv_mifare_res() */
+ goto done;
+ }
+
+ if ((ddev->curr_protocol == NFC_PROTO_ISO14443) ||
+ (ddev->curr_protocol == NFC_PROTO_ISO14443_B)) {
+ rc = digital_in_iso_dep_pull_sod(ddev, resp);
+ if (rc)
+ goto done;
+ }
+
+ rc = ddev->skb_check_crc(resp);
+done:
if (rc) {
kfree_skb(resp);
resp = NULL;
}
-done:
data_exch->cb(data_exch->cb_context, resp, rc);
kfree(data_exch);
@@ -632,6 +675,7 @@ static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target,
{
struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev);
struct digital_data_exch *data_exch;
+ int rc;
data_exch = kzalloc(sizeof(struct digital_data_exch), GFP_KERNEL);
if (!data_exch) {
@@ -642,13 +686,28 @@ static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target,
data_exch->cb = cb;
data_exch->cb_context = cb_context;
- if (ddev->curr_protocol == NFC_PROTO_NFC_DEP)
- return digital_in_send_dep_req(ddev, target, skb, data_exch);
+ if (ddev->curr_protocol == NFC_PROTO_NFC_DEP) {
+ rc = digital_in_send_dep_req(ddev, target, skb, data_exch);
+ goto exit;
+ }
+
+ if ((ddev->curr_protocol == NFC_PROTO_ISO14443) ||
+ (ddev->curr_protocol == NFC_PROTO_ISO14443_B)) {
+ rc = digital_in_iso_dep_push_sod(ddev, skb);
+ if (rc)
+ goto exit;
+ }
ddev->skb_add_crc(skb);
- return digital_in_send_cmd(ddev, skb, 500, digital_in_send_complete,
- data_exch);
+ rc = digital_in_send_cmd(ddev, skb, 500, digital_in_send_complete,
+ data_exch);
+
+exit:
+ if (rc)
+ kfree(data_exch);
+
+ return rc;
}
static struct nfc_ops digital_nfc_ops = {
@@ -700,6 +759,12 @@ struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops,
ddev->protocols |= NFC_PROTO_FELICA_MASK;
if (supported_protocols & NFC_PROTO_NFC_DEP_MASK)
ddev->protocols |= NFC_PROTO_NFC_DEP_MASK;
+ if (supported_protocols & NFC_PROTO_ISO15693_MASK)
+ ddev->protocols |= NFC_PROTO_ISO15693_MASK;
+ if (supported_protocols & NFC_PROTO_ISO14443_MASK)
+ ddev->protocols |= NFC_PROTO_ISO14443_MASK;
+ if (supported_protocols & NFC_PROTO_ISO14443_B_MASK)
+ ddev->protocols |= NFC_PROTO_ISO14443_B_MASK;
ddev->tx_headroom = tx_headroom + DIGITAL_MAX_HEADER_LEN;
ddev->tx_tailroom = tx_tailroom + DIGITAL_CRC_LEN;
diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c
index d4ed25ff723..171cb9949ab 100644
--- a/net/nfc/digital_dep.c
+++ b/net/nfc/digital_dep.c
@@ -224,9 +224,8 @@ int digital_in_send_atr_req(struct nfc_digital_dev *ddev,
ddev->skb_add_crc(skb);
- digital_in_send_cmd(ddev, skb, 500, digital_in_recv_atr_res, target);
-
- return 0;
+ return digital_in_send_cmd(ddev, skb, 500, digital_in_recv_atr_res,
+ target);
}
static int digital_in_send_rtox(struct nfc_digital_dev *ddev,
diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c
index 251c8c753eb..c2c1c0189b7 100644
--- a/net/nfc/digital_technology.c
+++ b/net/nfc/digital_technology.c
@@ -30,6 +30,7 @@
#define DIGITAL_SEL_RES_NFCID1_COMPLETE(sel_res) (!((sel_res) & 0x04))
#define DIGITAL_SEL_RES_IS_T2T(sel_res) (!((sel_res) & 0x60))
+#define DIGITAL_SEL_RES_IS_T4T(sel_res) ((sel_res) & 0x20)
#define DIGITAL_SEL_RES_IS_NFC_DEP(sel_res) ((sel_res) & 0x40)
#define DIGITAL_SENS_RES_IS_T1T(sens_res) (((sens_res) & 0x0C00) == 0x0C00)
@@ -40,6 +41,24 @@
#define DIGITAL_MIFARE_READ_RES_LEN 16
#define DIGITAL_MIFARE_ACK_RES 0x0A
+#define DIGITAL_CMD_SENSB_REQ 0x05
+#define DIGITAL_SENSB_ADVANCED BIT(5)
+#define DIGITAL_SENSB_EXTENDED BIT(4)
+#define DIGITAL_SENSB_ALLB_REQ BIT(3)
+#define DIGITAL_SENSB_N(n) ((n) & 0x7)
+
+#define DIGITAL_CMD_SENSB_RES 0x50
+
+#define DIGITAL_CMD_ATTRIB_REQ 0x1D
+#define DIGITAL_ATTRIB_P1_TR0_DEFAULT (0x0 << 6)
+#define DIGITAL_ATTRIB_P1_TR1_DEFAULT (0x0 << 4)
+#define DIGITAL_ATTRIB_P1_SUPRESS_EOS BIT(3)
+#define DIGITAL_ATTRIB_P1_SUPRESS_SOS BIT(2)
+#define DIGITAL_ATTRIB_P2_LISTEN_POLL_1 (0x0 << 6)
+#define DIGITAL_ATTRIB_P2_POLL_LISTEN_1 (0x0 << 4)
+#define DIGITAL_ATTRIB_P2_MAX_FRAME_256 0x8
+#define DIGITAL_ATTRIB_P4_DID(n) ((n) & 0xf)
+
#define DIGITAL_CMD_SENSF_REQ 0x00
#define DIGITAL_CMD_SENSF_RES 0x01
@@ -51,6 +70,35 @@
#define DIGITAL_SENSF_REQ_RC_SC 1
#define DIGITAL_SENSF_REQ_RC_AP 2
+#define DIGITAL_CMD_ISO15693_INVENTORY_REQ 0x01
+
+#define DIGITAL_ISO15693_REQ_FLAG_DATA_RATE BIT(1)
+#define DIGITAL_ISO15693_REQ_FLAG_INVENTORY BIT(2)
+#define DIGITAL_ISO15693_REQ_FLAG_NB_SLOTS BIT(5)
+#define DIGITAL_ISO15693_RES_FLAG_ERROR BIT(0)
+#define DIGITAL_ISO15693_RES_IS_VALID(flags) \
+ (!((flags) & DIGITAL_ISO15693_RES_FLAG_ERROR))
+
+#define DIGITAL_ISO_DEP_I_PCB 0x02
+#define DIGITAL_ISO_DEP_PNI(pni) ((pni) & 0x01)
+
+#define DIGITAL_ISO_DEP_PCB_TYPE(pcb) ((pcb) & 0xC0)
+
+#define DIGITAL_ISO_DEP_I_BLOCK 0x00
+
+#define DIGITAL_ISO_DEP_BLOCK_HAS_DID(pcb) ((pcb) & 0x08)
+
+static const u8 digital_ats_fsc[] = {
+ 16, 24, 32, 40, 48, 64, 96, 128,
+};
+
+#define DIGITAL_ATS_FSCI(t0) ((t0) & 0x0F)
+#define DIGITAL_SENSB_FSCI(pi2) (((pi2) & 0xF0) >> 4)
+#define DIGITAL_ATS_MAX_FSC 256
+
+#define DIGITAL_RATS_BYTE1 0xE0
+#define DIGITAL_RATS_PARAM 0x80
+
struct digital_sdd_res {
u8 nfcid1[4];
u8 bcc;
@@ -63,6 +111,32 @@ struct digital_sel_req {
u8 bcc;
} __packed;
+struct digital_sensb_req {
+ u8 cmd;
+ u8 afi;
+ u8 param;
+} __packed;
+
+struct digital_sensb_res {
+ u8 cmd;
+ u8 nfcid0[4];
+ u8 app_data[4];
+ u8 proto_info[3];
+} __packed;
+
+struct digital_attrib_req {
+ u8 cmd;
+ u8 nfcid0[4];
+ u8 param1;
+ u8 param2;
+ u8 param3;
+ u8 param4;
+} __packed;
+
+struct digital_attrib_res {
+ u8 mbli_did;
+} __packed;
+
struct digital_sensf_req {
u8 cmd;
u8 sc1;
@@ -82,9 +156,127 @@ struct digital_sensf_res {
u8 rd[2];
} __packed;
+struct digital_iso15693_inv_req {
+ u8 flags;
+ u8 cmd;
+ u8 mask_len;
+ u64 mask;
+} __packed;
+
+struct digital_iso15693_inv_res {
+ u8 flags;
+ u8 dsfid;
+ u64 uid;
+} __packed;
+
static int digital_in_send_sdd_req(struct nfc_digital_dev *ddev,
struct nfc_target *target);
+int digital_in_iso_dep_pull_sod(struct nfc_digital_dev *ddev,
+ struct sk_buff *skb)
+{
+ u8 pcb;
+ u8 block_type;
+
+ if (skb->len < 1)
+ return -EIO;
+
+ pcb = *skb->data;
+ block_type = DIGITAL_ISO_DEP_PCB_TYPE(pcb);
+
+ /* No support fo R-block nor S-block */
+ if (block_type != DIGITAL_ISO_DEP_I_BLOCK) {
+ pr_err("ISO_DEP R-block and S-block not supported\n");
+ return -EIO;
+ }
+
+ if (DIGITAL_ISO_DEP_BLOCK_HAS_DID(pcb)) {
+ pr_err("DID field in ISO_DEP PCB not supported\n");
+ return -EIO;
+ }
+
+ skb_pull(skb, 1);
+
+ return 0;
+}
+
+int digital_in_iso_dep_push_sod(struct nfc_digital_dev *ddev,
+ struct sk_buff *skb)
+{
+ /*
+ * Chaining not supported so skb->len + 1 PCB byte + 2 CRC bytes must
+ * not be greater than remote FSC
+ */
+ if (skb->len + 3 > ddev->target_fsc)
+ return -EIO;
+
+ skb_push(skb, 1);
+
+ *skb->data = DIGITAL_ISO_DEP_I_PCB | ddev->curr_nfc_dep_pni;
+
+ ddev->curr_nfc_dep_pni =
+ DIGITAL_ISO_DEP_PNI(ddev->curr_nfc_dep_pni + 1);
+
+ return 0;
+}
+
+static void digital_in_recv_ats(struct nfc_digital_dev *ddev, void *arg,
+ struct sk_buff *resp)
+{
+ struct nfc_target *target = arg;
+ u8 fsdi;
+ int rc;
+
+ if (IS_ERR(resp)) {
+ rc = PTR_ERR(resp);
+ resp = NULL;
+ goto exit;
+ }
+
+ if (resp->len < 2) {
+ rc = -EIO;
+ goto exit;
+ }
+
+ fsdi = DIGITAL_ATS_FSCI(resp->data[1]);
+ if (fsdi >= 8)
+ ddev->target_fsc = DIGITAL_ATS_MAX_FSC;
+ else
+ ddev->target_fsc = digital_ats_fsc[fsdi];
+
+ ddev->curr_nfc_dep_pni = 0;
+
+ rc = digital_target_found(ddev, target, NFC_PROTO_ISO14443);
+
+exit:
+ dev_kfree_skb(resp);
+ kfree(target);
+
+ if (rc)
+ digital_poll_next_tech(ddev);
+}
+
+static int digital_in_send_rats(struct nfc_digital_dev *ddev,
+ struct nfc_target *target)
+{
+ int rc;
+ struct sk_buff *skb;
+
+ skb = digital_skb_alloc(ddev, 2);
+ if (!skb)
+ return -ENOMEM;
+
+ *skb_put(skb, 1) = DIGITAL_RATS_BYTE1;
+ *skb_put(skb, 1) = DIGITAL_RATS_PARAM;
+
+ rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_ats,
+ target);
+ if (rc)
+ kfree_skb(skb);
+
+ return rc;
+}
+
static void digital_in_recv_sel_res(struct nfc_digital_dev *ddev, void *arg,
struct sk_buff *resp)
{
@@ -122,8 +314,19 @@ static void digital_in_recv_sel_res(struct nfc_digital_dev *ddev, void *arg,
goto exit_free_skb;
}
+ target->sel_res = sel_res;
+
if (DIGITAL_SEL_RES_IS_T2T(sel_res)) {
nfc_proto = NFC_PROTO_MIFARE;
+ } else if (DIGITAL_SEL_RES_IS_T4T(sel_res)) {
+ rc = digital_in_send_rats(ddev, target);
+ if (rc)
+ goto exit;
+ /*
+ * Skip target_found and don't free it for now. This will be
+ * done when receiving the ATS
+ */
+ goto exit_free_skb;
} else if (DIGITAL_SEL_RES_IS_NFC_DEP(sel_res)) {
nfc_proto = NFC_PROTO_NFC_DEP;
} else {
@@ -131,8 +334,6 @@ static void digital_in_recv_sel_res(struct nfc_digital_dev *ddev, void *arg,
goto exit;
}
- target->sel_res = sel_res;
-
rc = digital_target_found(ddev, target, nfc_proto);
exit:
@@ -375,6 +576,175 @@ int digital_in_recv_mifare_res(struct sk_buff *resp)
return -EIO;
}
+static void digital_in_recv_attrib_res(struct nfc_digital_dev *ddev, void *arg,
+ struct sk_buff *resp)
+{
+ struct nfc_target *target = arg;
+ struct digital_attrib_res *attrib_res;
+ int rc;
+
+ if (IS_ERR(resp)) {
+ rc = PTR_ERR(resp);
+ resp = NULL;
+ goto exit;
+ }
+
+ if (resp->len < sizeof(*attrib_res)) {
+ PROTOCOL_ERR("12.6.2");
+ rc = -EIO;
+ goto exit;
+ }
+
+ attrib_res = (struct digital_attrib_res *)resp->data;
+
+ if (attrib_res->mbli_did & 0x0f) {
+ PROTOCOL_ERR("12.6.2.1");
+ rc = -EIO;
+ goto exit;
+ }
+
+ rc = digital_target_found(ddev, target, NFC_PROTO_ISO14443_B);
+
+exit:
+ dev_kfree_skb(resp);
+ kfree(target);
+
+ if (rc)
+ digital_poll_next_tech(ddev);
+}
+
+static int digital_in_send_attrib_req(struct nfc_digital_dev *ddev,
+ struct nfc_target *target,
+ struct digital_sensb_res *sensb_res)
+{
+ struct digital_attrib_req *attrib_req;
+ struct sk_buff *skb;
+ int rc;
+
+ skb = digital_skb_alloc(ddev, sizeof(*attrib_req));
+ if (!skb)
+ return -ENOMEM;
+
+ attrib_req = (struct digital_attrib_req *)skb_put(skb,
+ sizeof(*attrib_req));
+
+ attrib_req->cmd = DIGITAL_CMD_ATTRIB_REQ;
+ memcpy(attrib_req->nfcid0, sensb_res->nfcid0,
+ sizeof(attrib_req->nfcid0));
+ attrib_req->param1 = DIGITAL_ATTRIB_P1_TR0_DEFAULT |
+ DIGITAL_ATTRIB_P1_TR1_DEFAULT;
+ attrib_req->param2 = DIGITAL_ATTRIB_P2_LISTEN_POLL_1 |
+ DIGITAL_ATTRIB_P2_POLL_LISTEN_1 |
+ DIGITAL_ATTRIB_P2_MAX_FRAME_256;
+ attrib_req->param3 = sensb_res->proto_info[1] & 0x07;
+ attrib_req->param4 = DIGITAL_ATTRIB_P4_DID(0);
+
+ rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_attrib_res,
+ target);
+ if (rc)
+ kfree_skb(skb);
+
+ return rc;
+}
+
+static void digital_in_recv_sensb_res(struct nfc_digital_dev *ddev, void *arg,
+ struct sk_buff *resp)
+{
+ struct nfc_target *target = NULL;
+ struct digital_sensb_res *sensb_res;
+ u8 fsci;
+ int rc;
+
+ if (IS_ERR(resp)) {
+ rc = PTR_ERR(resp);
+ resp = NULL;
+ goto exit;
+ }
+
+ if (resp->len != sizeof(*sensb_res)) {
+ PROTOCOL_ERR("5.6.2.1");
+ rc = -EIO;
+ goto exit;
+ }
+
+ sensb_res = (struct digital_sensb_res *)resp->data;
+
+ if (sensb_res->cmd != DIGITAL_CMD_SENSB_RES) {
+ PROTOCOL_ERR("5.6.2");
+ rc = -EIO;
+ goto exit;
+ }
+
+ if (!(sensb_res->proto_info[1] & BIT(0))) {
+ PROTOCOL_ERR("5.6.2.12");
+ rc = -EIO;
+ goto exit;
+ }
+
+ if (sensb_res->proto_info[1] & BIT(3)) {
+ PROTOCOL_ERR("5.6.2.16");
+ rc = -EIO;
+ goto exit;
+ }
+
+ fsci = DIGITAL_SENSB_FSCI(sensb_res->proto_info[1]);
+ if (fsci >= 8)
+ ddev->target_fsc = DIGITAL_ATS_MAX_FSC;
+ else
+ ddev->target_fsc = digital_ats_fsc[fsci];
+
+ target = kzalloc(sizeof(struct nfc_target), GFP_KERNEL);
+ if (!target) {
+ rc = -ENOMEM;
+ goto exit;
+ }
+
+ rc = digital_in_send_attrib_req(ddev, target, sensb_res);
+
+exit:
+ dev_kfree_skb(resp);
+
+ if (rc) {
+ kfree(target);
+ digital_poll_next_tech(ddev);
+ }
+}
+
+int digital_in_send_sensb_req(struct nfc_digital_dev *ddev, u8 rf_tech)
+{
+ struct digital_sensb_req *sensb_req;
+ struct sk_buff *skb;
+ int rc;
+
+ rc = digital_in_configure_hw(ddev, NFC_DIGITAL_CONFIG_RF_TECH,
+ NFC_DIGITAL_RF_TECH_106B);
+ if (rc)
+ return rc;
+
+ rc = digital_in_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING,
+ NFC_DIGITAL_FRAMING_NFCB);
+ if (rc)
+ return rc;
+
+ skb = digital_skb_alloc(ddev, sizeof(*sensb_req));
+ if (!skb)
+ return -ENOMEM;
+
+ sensb_req = (struct digital_sensb_req *)skb_put(skb,
+ sizeof(*sensb_req));
+
+ sensb_req->cmd = DIGITAL_CMD_SENSB_REQ;
+ sensb_req->afi = 0x00; /* All families and sub-families */
+ sensb_req->param = DIGITAL_SENSB_N(0);
+
+ rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sensb_res,
+ NULL);
+ if (rc)
+ kfree_skb(skb);
+
+ return rc;
+}
+
static void digital_in_recv_sensf_res(struct nfc_digital_dev *ddev, void *arg,
struct sk_buff *resp)
{
@@ -473,6 +843,93 @@ int digital_in_send_sensf_req(struct nfc_digital_dev *ddev, u8 rf_tech)
return rc;
}
+static void digital_in_recv_iso15693_inv_res(struct nfc_digital_dev *ddev,
+ void *arg, struct sk_buff *resp)
+{
+ struct digital_iso15693_inv_res *res;
+ struct nfc_target *target = NULL;
+ int rc;
+
+ if (IS_ERR(resp)) {
+ rc = PTR_ERR(resp);
+ resp = NULL;
+ goto out_free_skb;
+ }
+
+ if (resp->len != sizeof(*res)) {
+ rc = -EIO;
+ goto out_free_skb;
+ }
+
+ res = (struct digital_iso15693_inv_res *)resp->data;
+
+ if (!DIGITAL_ISO15693_RES_IS_VALID(res->flags)) {
+ PROTOCOL_ERR("ISO15693 - 10.3.1");
+ rc = -EINVAL;
+ goto out_free_skb;
+ }
+
+ target = kzalloc(sizeof(*target), GFP_KERNEL);
+ if (!target) {
+ rc = -ENOMEM;
+ goto out_free_skb;
+ }
+
+ target->is_iso15693 = 1;
+ target->iso15693_dsfid = res->dsfid;
+ memcpy(target->iso15693_uid, &res->uid, sizeof(target->iso15693_uid));
+
+ rc = digital_target_found(ddev, target, NFC_PROTO_ISO15693);
+
+ kfree(target);
+
+out_free_skb:
+ dev_kfree_skb(resp);
+
+ if (rc)
+ digital_poll_next_tech(ddev);
+}
+
+int digital_in_send_iso15693_inv_req(struct nfc_digital_dev *ddev, u8 rf_tech)
+{
+ struct digital_iso15693_inv_req *req;
+ struct sk_buff *skb;
+ int rc;
+
+ rc = digital_in_configure_hw(ddev, NFC_DIGITAL_CONFIG_RF_TECH,
+ NFC_DIGITAL_RF_TECH_ISO15693);
+ if (rc)
+ return rc;
+
+ rc = digital_in_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING,
+ NFC_DIGITAL_FRAMING_ISO15693_INVENTORY);
+ if (rc)
+ return rc;
+
+ skb = digital_skb_alloc(ddev, sizeof(*req));
+ if (!skb)
+ return -ENOMEM;
+
+ skb_put(skb, sizeof(*req) - sizeof(req->mask)); /* No mask */
+ req = (struct digital_iso15693_inv_req *)skb->data;
+
+ /* Single sub-carrier, high data rate, no AFI, single slot
+ * Inventory command
+ */
+ req->flags = DIGITAL_ISO15693_REQ_FLAG_DATA_RATE |
+ DIGITAL_ISO15693_REQ_FLAG_INVENTORY |
+ DIGITAL_ISO15693_REQ_FLAG_NB_SLOTS;
+ req->cmd = DIGITAL_CMD_ISO15693_INVENTORY_REQ;
+ req->mask_len = 0;
+
+ rc = digital_in_send_cmd(ddev, skb, 30,
+ digital_in_recv_iso15693_inv_res, NULL);
+ if (rc)
+ kfree_skb(skb);
+
+ return rc;
+}
+
static int digital_tg_send_sel_res(struct nfc_digital_dev *ddev)
{
struct sk_buff *skb;
@@ -634,6 +1091,18 @@ exit:
dev_kfree_skb(resp);
}
+static void digital_tg_recv_atr_or_sensf_req(struct nfc_digital_dev *ddev,
+ void *arg, struct sk_buff *resp)
+{
+ if (!IS_ERR(resp) && (resp->len >= 2) &&
+ (resp->data[1] == DIGITAL_CMD_SENSF_REQ))
+ digital_tg_recv_sensf_req(ddev, arg, resp);
+ else
+ digital_tg_recv_atr_req(ddev, arg, resp);
+
+ return;
+}
+
static int digital_tg_send_sensf_res(struct nfc_digital_dev *ddev,
struct digital_sensf_req *sensf_req)
{
@@ -644,7 +1113,7 @@ static int digital_tg_send_sensf_res(struct nfc_digital_dev *ddev,
size = sizeof(struct digital_sensf_res);
- if (sensf_req->rc != DIGITAL_SENSF_REQ_RC_NONE)
+ if (sensf_req->rc == DIGITAL_SENSF_REQ_RC_NONE)
size -= sizeof(sensf_res->rd);
skb = digital_skb_alloc(ddev, size);
@@ -679,7 +1148,7 @@ static int digital_tg_send_sensf_res(struct nfc_digital_dev *ddev,
digital_skb_add_crc_f(skb);
rc = digital_tg_send_cmd(ddev, skb, 300,
- digital_tg_recv_atr_req, NULL);
+ digital_tg_recv_atr_or_sensf_req, NULL);
if (rc)
kfree_skb(skb);
diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c
index a9f4d2e62d8..677d24bb70f 100644
--- a/net/nfc/hci/command.c
+++ b/net/nfc/hci/command.c
@@ -26,6 +26,8 @@
#include "hci.h"
+#define MAX_FWI 4949
+
static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
const u8 *param, size_t param_len,
data_exchange_cb_t cb, void *cb_context)
@@ -37,7 +39,7 @@ static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
* for all commands?
*/
return nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_COMMAND, cmd,
- param, param_len, cb, cb_context, 3000);
+ param, param_len, cb, cb_context, MAX_FWI);
}
/*
@@ -82,7 +84,7 @@ static int nfc_hci_execute_cmd(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
NFC_HCI_HCP_COMMAND, cmd,
param, param_len,
nfc_hci_execute_cb, &hcp_ew,
- 3000);
+ MAX_FWI);
if (hcp_ew.exec_result < 0)
return hcp_ew.exec_result;
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index d45b638e77c..47403705197 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -225,7 +225,7 @@ int nfc_hci_target_discovered(struct nfc_hci_dev *hdev, u8 gate)
goto exit;
}
- targets->sens_res = be16_to_cpu(*(u16 *)atqa_skb->data);
+ targets->sens_res = be16_to_cpu(*(__be16 *)atqa_skb->data);
targets->sel_res = sak_skb->data[0];
r = nfc_hci_get_param(hdev, NFC_HCI_RF_READER_A_GATE,
@@ -380,34 +380,31 @@ static int hci_dev_session_init(struct nfc_hci_dev *hdev)
if (r < 0)
goto disconnect_all;
- if (skb->len && skb->len == strlen(hdev->init_data.session_id))
- if (memcmp(hdev->init_data.session_id, skb->data,
- skb->len) == 0) {
- /* TODO ELa: restore gate<->pipe table from
- * some TBD location.
- * note: it doesn't seem possible to get the chip
- * currently open gate/pipe table.
- * It is only possible to obtain the supported
- * gate list.
- */
+ if (skb->len && skb->len == strlen(hdev->init_data.session_id) &&
+ (memcmp(hdev->init_data.session_id, skb->data,
+ skb->len) == 0) && hdev->ops->load_session) {
+ /* Restore gate<->pipe table from some proprietary location. */
- /* goto exit
- * For now, always do a full initialization */
- }
+ r = hdev->ops->load_session(hdev);
- r = nfc_hci_disconnect_all_gates(hdev);
- if (r < 0)
- goto exit;
+ if (r < 0)
+ goto disconnect_all;
+ } else {
- r = hci_dev_connect_gates(hdev, hdev->init_data.gate_count,
- hdev->init_data.gates);
- if (r < 0)
- goto disconnect_all;
+ r = nfc_hci_disconnect_all_gates(hdev);
+ if (r < 0)
+ goto exit;
- r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE,
- NFC_HCI_ADMIN_SESSION_IDENTITY,
- hdev->init_data.session_id,
- strlen(hdev->init_data.session_id));
+ r = hci_dev_connect_gates(hdev, hdev->init_data.gate_count,
+ hdev->init_data.gates);
+ if (r < 0)
+ goto disconnect_all;
+
+ r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE,
+ NFC_HCI_ADMIN_SESSION_IDENTITY,
+ hdev->init_data.session_id,
+ strlen(hdev->init_data.session_id));
+ }
if (r == 0)
goto exit;
diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c
index a07d2b81848..1b90c053185 100644
--- a/net/nfc/hci/llc.c
+++ b/net/nfc/hci/llc.c
@@ -20,14 +20,12 @@
#include "llc.h"
-static struct list_head llc_engines;
+static LIST_HEAD(llc_engines);
int nfc_llc_init(void)
{
int r;
- INIT_LIST_HEAD(&llc_engines);
-
r = nfc_llc_nop_register();
if (r)
goto exit;
diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index bec6ed15f50..a3ad69a4c64 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -387,7 +387,7 @@ int nfc_llcp_send_symm(struct nfc_dev *dev)
__net_timestamp(skb);
- nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_TX);
+ nfc_llcp_send_to_raw_sock(local, skb, NFC_DIRECTION_TX);
return nfc_data_exchange(dev, local->target_idx, skb,
nfc_llcp_recv, local);
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index 6184bd1fba3..51e78879731 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -27,7 +27,7 @@
static u8 llcp_magic[3] = {0x46, 0x66, 0x6d};
-static struct list_head llcp_devices;
+static LIST_HEAD(llcp_devices);
static void nfc_llcp_rx_skb(struct nfc_llcp_local *local, struct sk_buff *skb);
@@ -293,9 +293,9 @@ static void nfc_llcp_sdreq_timer(unsigned long data)
struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev)
{
- struct nfc_llcp_local *local, *n;
+ struct nfc_llcp_local *local;
- list_for_each_entry_safe(local, n, &llcp_devices, list)
+ list_for_each_entry(local, &llcp_devices, list)
if (local->dev == dev)
return local;
@@ -609,14 +609,16 @@ u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len)
int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len)
{
- struct nfc_llcp_local *local = nfc_llcp_find_local(dev);
+ struct nfc_llcp_local *local;
+
+ if (gb_len < 3 || gb_len > NFC_MAX_GT_LEN)
+ return -EINVAL;
+ local = nfc_llcp_find_local(dev);
if (local == NULL) {
pr_err("No LLCP device\n");
return -ENODEV;
}
- if (gb_len < 3)
- return -EINVAL;
memset(local->remote_gb, 0, NFC_MAX_GT_LEN);
memcpy(local->remote_gb, gb, gb_len);
@@ -678,16 +680,17 @@ void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local,
continue;
if (skb_copy == NULL) {
- skb_copy = __pskb_copy(skb, NFC_LLCP_RAW_HEADER_SIZE,
- GFP_ATOMIC);
+ skb_copy = __pskb_copy_fclone(skb, NFC_RAW_HEADER_SIZE,
+ GFP_ATOMIC, true);
if (skb_copy == NULL)
continue;
- data = skb_push(skb_copy, NFC_LLCP_RAW_HEADER_SIZE);
+ data = skb_push(skb_copy, NFC_RAW_HEADER_SIZE);
data[0] = local->dev ? local->dev->idx : 0xFF;
- data[1] = direction;
+ data[1] = direction & 0x01;
+ data[1] |= (RAW_PAYLOAD_LLCP << 1);
}
nskb = skb_clone(skb_copy, GFP_ATOMIC);
@@ -745,7 +748,7 @@ static void nfc_llcp_tx_work(struct work_struct *work)
__net_timestamp(skb);
nfc_llcp_send_to_raw_sock(local, skb,
- NFC_LLCP_DIRECTION_TX);
+ NFC_DIRECTION_TX);
ret = nfc_data_exchange(local->dev, local->target_idx,
skb, nfc_llcp_recv, local);
@@ -974,7 +977,7 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
new_sk->sk_state = LLCP_CONNECTED;
/* Wake the listening processes */
- parent->sk_data_ready(parent, 0);
+ parent->sk_data_ready(parent);
/* Send CC */
nfc_llcp_send_cc(new_sock);
@@ -1474,7 +1477,7 @@ static void nfc_llcp_rx_work(struct work_struct *work)
__net_timestamp(skb);
- nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX);
+ nfc_llcp_send_to_raw_sock(local, skb, NFC_DIRECTION_RX);
nfc_llcp_rx_skb(local, skb);
@@ -1622,8 +1625,6 @@ void nfc_llcp_unregister_device(struct nfc_dev *dev)
int __init nfc_llcp_init(void)
{
- INIT_LIST_HEAD(&llcp_devices);
-
return nfc_llcp_sock_init();
}
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 56db888b1cd..2b400e1a869 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -74,7 +74,7 @@ static int __nci_request(struct nci_dev *ndev,
ndev->req_status = NCI_REQ_PEND;
- init_completion(&ndev->req_completion);
+ reinit_completion(&ndev->req_completion);
req(ndev, opt);
completion_rc =
wait_for_completion_interruptible_timeout(&ndev->req_completion,
@@ -709,6 +709,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops,
ndev->ops = ops;
ndev->tx_headroom = tx_headroom;
ndev->tx_tailroom = tx_tailroom;
+ init_completion(&ndev->req_completion);
ndev->nfc_dev = nfc_allocate_device(&nci_nfc_ops,
supported_protocols,
@@ -860,6 +861,10 @@ static int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb)
/* Get rid of skb owner, prior to sending to the driver. */
skb_orphan(skb);
+ /* Send copy to sniffer */
+ nfc_send_to_raw_sock(ndev->nfc_dev, skb,
+ RAW_PAYLOAD_NCI, NFC_DIRECTION_TX);
+
return ndev->ops->send(ndev, skb);
}
@@ -934,6 +939,11 @@ static void nci_rx_work(struct work_struct *work)
struct sk_buff *skb;
while ((skb = skb_dequeue(&ndev->rx_q))) {
+
+ /* Send copy to sniffer */
+ nfc_send_to_raw_sock(ndev->nfc_dev, skb,
+ RAW_PAYLOAD_NCI, NFC_DIRECTION_RX);
+
/* Process frame */
switch (nci_mt(skb->data)) {
case NCI_MT_RSP_PKT:
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 1e905097456..f8f6af23138 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -366,7 +366,6 @@ static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev,
struct nci_rf_intf_activated_ntf *ntf, __u8 *data)
{
struct activation_params_poll_nfc_dep *poll;
- int i;
switch (ntf->activation_rf_tech_and_mode) {
case NCI_NFC_A_PASSIVE_POLL_MODE:
@@ -374,10 +373,8 @@ static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev,
poll = &ntf->activation_params.poll_nfc_dep;
poll->atr_res_len = min_t(__u8, *data++, 63);
pr_debug("atr_res_len %d\n", poll->atr_res_len);
- if (poll->atr_res_len > 0) {
- for (i = 0; i < poll->atr_res_len; i++)
- poll->atr_res[poll->atr_res_len-1-i] = data[i];
- }
+ if (poll->atr_res_len > 0)
+ memcpy(poll->atr_res, data, poll->atr_res_len);
break;
default:
diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c
index f1d426f10cc..ec250e77763 100644
--- a/net/nfc/nci/spi.c
+++ b/net/nfc/nci/spi.c
@@ -105,7 +105,7 @@ int nci_spi_send(struct nci_spi *nspi,
if (ret != 0 || nspi->acknowledge_mode == NCI_SPI_CRC_DISABLED)
goto done;
- init_completion(&nspi->req_completion);
+ reinit_completion(&nspi->req_completion);
completion_rc = wait_for_completion_interruptible_timeout(
&nspi->req_completion,
NCI_SPI_SEND_TIMEOUT);
@@ -145,6 +145,7 @@ struct nci_spi *nci_spi_allocate_spi(struct spi_device *spi,
nspi->spi = spi;
nspi->ndev = ndev;
+ init_completion(&nspi->req_completion);
return nspi;
}
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index ebbf6fb88b3..43cb1c17e26 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -94,6 +94,14 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
target->sensf_res))
goto nla_put_failure;
+ if (target->is_iso15693) {
+ if (nla_put_u8(msg, NFC_ATTR_TARGET_ISO15693_DSFID,
+ target->iso15693_dsfid) ||
+ nla_put(msg, NFC_ATTR_TARGET_ISO15693_UID,
+ sizeof(target->iso15693_uid), target->iso15693_uid))
+ goto nla_put_failure;
+ }
+
return genlmsg_end(msg, hdr);
nla_put_failure:
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 9d6e74f7e6b..88d60064890 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -40,6 +40,12 @@ struct nfc_rawsock {
struct work_struct tx_work;
bool tx_work_scheduled;
};
+
+struct nfc_sock_list {
+ struct hlist_head head;
+ rwlock_t lock;
+};
+
#define nfc_rawsock(sk) ((struct nfc_rawsock *) sk)
#define to_rawsock_sk(_tx_work) \
((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work))
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index c27a6e86cae..11c3544ea54 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -27,6 +27,24 @@
#include "nfc.h"
+static struct nfc_sock_list raw_sk_list = {
+ .lock = __RW_LOCK_UNLOCKED(raw_sk_list.lock)
+};
+
+static void nfc_sock_link(struct nfc_sock_list *l, struct sock *sk)
+{
+ write_lock(&l->lock);
+ sk_add_node(sk, &l->head);
+ write_unlock(&l->lock);
+}
+
+static void nfc_sock_unlink(struct nfc_sock_list *l, struct sock *sk)
+{
+ write_lock(&l->lock);
+ sk_del_node_init(sk);
+ write_unlock(&l->lock);
+}
+
static void rawsock_write_queue_purge(struct sock *sk)
{
pr_debug("sk=%p\n", sk);
@@ -57,6 +75,9 @@ static int rawsock_release(struct socket *sock)
if (!sk)
return 0;
+ if (sock->type == SOCK_RAW)
+ nfc_sock_unlink(&raw_sk_list, sk);
+
sock_orphan(sk);
sock_put(sk);
@@ -275,6 +296,26 @@ static const struct proto_ops rawsock_ops = {
.mmap = sock_no_mmap,
};
+static const struct proto_ops rawsock_raw_ops = {
+ .family = PF_NFC,
+ .owner = THIS_MODULE,
+ .release = rawsock_release,
+ .bind = sock_no_bind,
+ .connect = sock_no_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = sock_no_getname,
+ .poll = datagram_poll,
+ .ioctl = sock_no_ioctl,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .setsockopt = sock_no_setsockopt,
+ .getsockopt = sock_no_getsockopt,
+ .sendmsg = sock_no_sendmsg,
+ .recvmsg = rawsock_recvmsg,
+ .mmap = sock_no_mmap,
+};
+
static void rawsock_destruct(struct sock *sk)
{
pr_debug("sk=%p\n", sk);
@@ -300,10 +341,13 @@ static int rawsock_create(struct net *net, struct socket *sock,
pr_debug("sock=%p\n", sock);
- if (sock->type != SOCK_SEQPACKET)
+ if ((sock->type != SOCK_SEQPACKET) && (sock->type != SOCK_RAW))
return -ESOCKTNOSUPPORT;
- sock->ops = &rawsock_ops;
+ if (sock->type == SOCK_RAW)
+ sock->ops = &rawsock_raw_ops;
+ else
+ sock->ops = &rawsock_ops;
sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto);
if (!sk)
@@ -313,13 +357,53 @@ static int rawsock_create(struct net *net, struct socket *sock,
sk->sk_protocol = nfc_proto->id;
sk->sk_destruct = rawsock_destruct;
sock->state = SS_UNCONNECTED;
-
- INIT_WORK(&nfc_rawsock(sk)->tx_work, rawsock_tx_work);
- nfc_rawsock(sk)->tx_work_scheduled = false;
+ if (sock->type == SOCK_RAW)
+ nfc_sock_link(&raw_sk_list, sk);
+ else {
+ INIT_WORK(&nfc_rawsock(sk)->tx_work, rawsock_tx_work);
+ nfc_rawsock(sk)->tx_work_scheduled = false;
+ }
return 0;
}
+void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb,
+ u8 payload_type, u8 direction)
+{
+ struct sk_buff *skb_copy = NULL, *nskb;
+ struct sock *sk;
+ u8 *data;
+
+ read_lock(&raw_sk_list.lock);
+
+ sk_for_each(sk, &raw_sk_list.head) {
+ if (!skb_copy) {
+ skb_copy = __pskb_copy_fclone(skb, NFC_RAW_HEADER_SIZE,
+ GFP_ATOMIC, true);
+ if (!skb_copy)
+ continue;
+
+ data = skb_push(skb_copy, NFC_RAW_HEADER_SIZE);
+
+ data[0] = dev ? dev->idx : 0xFF;
+ data[1] = direction & 0x01;
+ data[1] |= (payload_type << 1);
+ }
+
+ nskb = skb_clone(skb_copy, GFP_ATOMIC);
+ if (!nskb)
+ continue;
+
+ if (sock_queue_rcv_skb(sk, nskb))
+ kfree_skb(nskb);
+ }
+
+ read_unlock(&raw_sk_list.lock);
+
+ kfree_skb(skb_copy);
+}
+EXPORT_SYMBOL(nfc_send_to_raw_sock);
+
static struct proto rawsock_proto = {
.name = "NFC_RAW",
.owner = THIS_MODULE,
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 2c77e7b1a91..e70d8b18e96 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -134,8 +134,8 @@ static int set_eth_addr(struct sk_buff *skb,
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
- memcpy(eth_hdr(skb)->h_source, eth_key->eth_src, ETH_ALEN);
- memcpy(eth_hdr(skb)->h_dest, eth_key->eth_dst, ETH_ALEN);
+ ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src);
+ ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst);
ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
@@ -551,6 +551,8 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
case OVS_ACTION_ATTR_SAMPLE:
err = sample(dp, skb, a);
+ if (unlikely(err)) /* skb already freed. */
+ return err;
break;
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 270b77dfac3..9db4bf6740d 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2013 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -44,11 +44,11 @@
#include <linux/netfilter_ipv4.h>
#include <linux/inetdevice.h>
#include <linux/list.h>
-#include <linux/lockdep.h>
#include <linux/openvswitch.h>
#include <linux/rculist.h>
#include <linux/dmi.h>
-#include <linux/workqueue.h>
+#include <linux/genetlink.h>
+#include <net/genetlink.h>
#include <net/genetlink.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -62,6 +62,31 @@
int ovs_net_id __read_mostly;
+static struct genl_family dp_packet_genl_family;
+static struct genl_family dp_flow_genl_family;
+static struct genl_family dp_datapath_genl_family;
+
+static struct genl_multicast_group ovs_dp_flow_multicast_group = {
+ .name = OVS_FLOW_MCGROUP
+};
+
+static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
+ .name = OVS_DATAPATH_MCGROUP
+};
+
+struct genl_multicast_group ovs_dp_vport_multicast_group = {
+ .name = OVS_VPORT_MCGROUP
+};
+
+/* Check if need to build a reply message.
+ * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
+static bool ovs_must_notify(struct genl_info *info,
+ const struct genl_multicast_group *grp)
+{
+ return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
+ netlink_has_listeners(genl_info_net(info)->genl_sock, 0);
+}
+
static void ovs_notify(struct genl_family *family,
struct sk_buff *skb, struct genl_info *info)
{
@@ -173,6 +198,7 @@ static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
}
+/* Called with ovs_mutex or RCU read lock. */
struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
{
struct vport *vport;
@@ -250,28 +276,18 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
OVS_CB(skb)->flow = flow;
OVS_CB(skb)->pkt_key = &key;
- ovs_flow_stats_update(OVS_CB(skb)->flow, skb);
+ ovs_flow_stats_update(OVS_CB(skb)->flow, key.tp.flags, skb);
ovs_execute_actions(dp, skb);
stats_counter = &stats->n_hit;
out:
/* Update datapath statistics. */
- u64_stats_update_begin(&stats->sync);
+ u64_stats_update_begin(&stats->syncp);
(*stats_counter)++;
stats->n_mask_hit += n_mask_hit;
- u64_stats_update_end(&stats->sync);
+ u64_stats_update_end(&stats->syncp);
}
-static struct genl_family dp_packet_genl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = sizeof(struct ovs_header),
- .name = OVS_PACKET_FAMILY,
- .version = OVS_PACKET_VERSION,
- .maxattr = OVS_PACKET_ATTR_MAX,
- .netnsok = true,
- .parallel_ops = true,
-};
-
int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
const struct dp_upcall_info *upcall_info)
{
@@ -295,9 +311,9 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
err:
stats = this_cpu_ptr(dp->stats_percpu);
- u64_stats_update_begin(&stats->sync);
+ u64_stats_update_begin(&stats->syncp);
stats->n_lost++;
- u64_stats_update_end(&stats->sync);
+ u64_stats_update_end(&stats->syncp);
return err;
}
@@ -524,7 +540,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
packet->protocol = htons(ETH_P_802_2);
/* Build an sw_flow for sending this packet. */
- flow = ovs_flow_alloc(false);
+ flow = ovs_flow_alloc();
err = PTR_ERR(flow);
if (IS_ERR(flow))
goto err_kfree_skb;
@@ -590,6 +606,18 @@ static const struct genl_ops dp_packet_genl_ops[] = {
}
};
+static struct genl_family dp_packet_genl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = sizeof(struct ovs_header),
+ .name = OVS_PACKET_FAMILY,
+ .version = OVS_PACKET_VERSION,
+ .maxattr = OVS_PACKET_ATTR_MAX,
+ .netnsok = true,
+ .parallel_ops = true,
+ .ops = dp_packet_genl_ops,
+ .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
+};
+
static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
struct ovs_dp_megaflow_stats *mega_stats)
{
@@ -610,9 +638,9 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
do {
- start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
+ start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
local_stats = *percpu_stats;
- } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
+ } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
stats->n_hit += local_stats.n_hit;
stats->n_missed += local_stats.n_missed;
@@ -621,26 +649,6 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
}
}
-static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
- [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
- [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
- [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
-};
-
-static struct genl_family dp_flow_genl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = sizeof(struct ovs_header),
- .name = OVS_FLOW_FAMILY,
- .version = OVS_FLOW_VERSION,
- .maxattr = OVS_FLOW_ATTR_MAX,
- .netnsok = true,
- .parallel_ops = true,
-};
-
-static struct genl_multicast_group ovs_dp_flow_multicast_group = {
- .name = OVS_FLOW_MCGROUP
-};
-
static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
{
return NLMSG_ALIGN(sizeof(struct ovs_header))
@@ -652,8 +660,8 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
+ nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
}
-/* Called with ovs_mutex. */
-static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
+/* Called with ovs_mutex or RCU read lock. */
+static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
struct sk_buff *skb, u32 portid,
u32 seq, u32 flags, u8 cmd)
{
@@ -670,7 +678,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
if (!ovs_header)
return -EMSGSIZE;
- ovs_header->dp_ifindex = get_dpifindex(dp);
+ ovs_header->dp_ifindex = dp_ifindex;
/* Fill flow key. */
nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
@@ -693,6 +701,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
nla_nest_end(skb, nla);
ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
+
if (used &&
nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
goto nla_put_failure;
@@ -720,9 +729,9 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
const struct sw_flow_actions *sf_acts;
sf_acts = rcu_dereference_ovsl(flow->sf_acts);
-
err = ovs_nla_put_actions(sf_acts->actions,
sf_acts->actions_len, skb);
+
if (!err)
nla_nest_end(skb, start);
else {
@@ -743,113 +752,128 @@ error:
return err;
}
-static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow,
- struct genl_info *info)
+/* May not be called with RCU read lock. */
+static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
+ struct genl_info *info,
+ bool always)
{
- size_t len;
+ struct sk_buff *skb;
- len = ovs_flow_cmd_msg_size(ovsl_dereference(flow->sf_acts));
+ if (!always && !ovs_must_notify(info, &ovs_dp_flow_multicast_group))
+ return NULL;
- return genlmsg_new_unicast(len, info, GFP_KERNEL);
+ skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ return skb;
}
-static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
- struct datapath *dp,
- struct genl_info *info,
- u8 cmd)
+/* Called with ovs_mutex. */
+static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
+ int dp_ifindex,
+ struct genl_info *info, u8 cmd,
+ bool always)
{
struct sk_buff *skb;
int retval;
- skb = ovs_flow_cmd_alloc_info(flow, info);
- if (!skb)
- return ERR_PTR(-ENOMEM);
+ skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info,
+ always);
+ if (!skb || IS_ERR(skb))
+ return skb;
- retval = ovs_flow_cmd_fill_info(flow, dp, skb, info->snd_portid,
- info->snd_seq, 0, cmd);
+ retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
+ info->snd_portid, info->snd_seq, 0,
+ cmd);
BUG_ON(retval < 0);
return skb;
}
-static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
+static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
- struct sw_flow_key key, masked_key;
- struct sw_flow *flow = NULL;
+ struct sw_flow *flow, *new_flow;
struct sw_flow_mask mask;
struct sk_buff *reply;
struct datapath *dp;
- struct sw_flow_actions *acts = NULL;
+ struct sw_flow_actions *acts;
struct sw_flow_match match;
- bool exact_5tuple;
int error;
- /* Extract key. */
+ /* Must have key and actions. */
error = -EINVAL;
if (!a[OVS_FLOW_ATTR_KEY])
goto error;
+ if (!a[OVS_FLOW_ATTR_ACTIONS])
+ goto error;
- ovs_match_init(&match, &key, &mask);
- error = ovs_nla_get_match(&match, &exact_5tuple,
+ /* Most of the time we need to allocate a new flow, do it before
+ * locking.
+ */
+ new_flow = ovs_flow_alloc();
+ if (IS_ERR(new_flow)) {
+ error = PTR_ERR(new_flow);
+ goto error;
+ }
+
+ /* Extract key. */
+ ovs_match_init(&match, &new_flow->unmasked_key, &mask);
+ error = ovs_nla_get_match(&match,
a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
if (error)
- goto error;
+ goto err_kfree_flow;
+
+ ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);
/* Validate actions. */
- if (a[OVS_FLOW_ATTR_ACTIONS]) {
- acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
- error = PTR_ERR(acts);
- if (IS_ERR(acts))
- goto error;
+ acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
+ error = PTR_ERR(acts);
+ if (IS_ERR(acts))
+ goto err_kfree_flow;
- ovs_flow_mask_key(&masked_key, &key, &mask);
- error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
- &masked_key, 0, &acts);
- if (error) {
- OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
- goto err_kfree;
- }
- } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
- error = -EINVAL;
- goto error;
+ error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
+ 0, &acts);
+ if (error) {
+ OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
+ goto err_kfree_acts;
+ }
+
+ reply = ovs_flow_cmd_alloc_info(acts, info, false);
+ if (IS_ERR(reply)) {
+ error = PTR_ERR(reply);
+ goto err_kfree_acts;
}
ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
- error = -ENODEV;
- if (!dp)
+ if (unlikely(!dp)) {
+ error = -ENODEV;
goto err_unlock_ovs;
-
+ }
/* Check if this is a duplicate flow */
- flow = ovs_flow_tbl_lookup(&dp->table, &key);
- if (!flow) {
- /* Bail out if we're not allowed to create a new flow. */
- error = -ENOENT;
- if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
- goto err_unlock_ovs;
-
- /* Allocate flow. */
- flow = ovs_flow_alloc(!exact_5tuple);
- if (IS_ERR(flow)) {
- error = PTR_ERR(flow);
- goto err_unlock_ovs;
- }
-
- flow->key = masked_key;
- flow->unmasked_key = key;
- rcu_assign_pointer(flow->sf_acts, acts);
+ flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key);
+ if (likely(!flow)) {
+ rcu_assign_pointer(new_flow->sf_acts, acts);
/* Put flow in bucket. */
- error = ovs_flow_tbl_insert(&dp->table, flow, &mask);
- if (error) {
+ error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
+ if (unlikely(error)) {
acts = NULL;
- goto err_flow_free;
+ goto err_unlock_ovs;
}
- reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);
+ if (unlikely(reply)) {
+ error = ovs_flow_cmd_fill_info(new_flow,
+ ovs_header->dp_ifindex,
+ reply, info->snd_portid,
+ info->snd_seq, 0,
+ OVS_FLOW_CMD_NEW);
+ BUG_ON(error < 0);
+ }
+ ovs_unlock();
} else {
- /* We found a matching flow. */
struct sw_flow_actions *old_acts;
/* Bail out if we're not allowed to modify an existing flow.
@@ -858,40 +882,153 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
* request. We also accept NLM_F_EXCL in case that bug ever
* gets fixed.
*/
- error = -EEXIST;
- if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
- info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
+ if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
+ | NLM_F_EXCL))) {
+ error = -EEXIST;
goto err_unlock_ovs;
-
+ }
/* The unmasked key has to be the same for flow updates. */
- if (!ovs_flow_cmp_unmasked_key(flow, &match))
- goto err_unlock_ovs;
-
+ if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) {
+ flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+ if (!flow) {
+ error = -ENOENT;
+ goto err_unlock_ovs;
+ }
+ }
/* Update actions. */
old_acts = ovsl_dereference(flow->sf_acts);
rcu_assign_pointer(flow->sf_acts, acts);
+
+ if (unlikely(reply)) {
+ error = ovs_flow_cmd_fill_info(flow,
+ ovs_header->dp_ifindex,
+ reply, info->snd_portid,
+ info->snd_seq, 0,
+ OVS_FLOW_CMD_NEW);
+ BUG_ON(error < 0);
+ }
+ ovs_unlock();
+
ovs_nla_free_flow_actions(old_acts);
+ ovs_flow_free(new_flow, false);
+ }
+
+ if (reply)
+ ovs_notify(&dp_flow_genl_family, reply, info);
+ return 0;
+
+err_unlock_ovs:
+ ovs_unlock();
+ kfree_skb(reply);
+err_kfree_acts:
+ kfree(acts);
+err_kfree_flow:
+ ovs_flow_free(new_flow, false);
+error:
+ return error;
+}
+
+static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr **a = info->attrs;
+ struct ovs_header *ovs_header = info->userhdr;
+ struct sw_flow_key key, masked_key;
+ struct sw_flow *flow;
+ struct sw_flow_mask mask;
+ struct sk_buff *reply = NULL;
+ struct datapath *dp;
+ struct sw_flow_actions *old_acts = NULL, *acts = NULL;
+ struct sw_flow_match match;
+ int error;
+
+ /* Extract key. */
+ error = -EINVAL;
+ if (!a[OVS_FLOW_ATTR_KEY])
+ goto error;
+
+ ovs_match_init(&match, &key, &mask);
+ error = ovs_nla_get_match(&match,
+ a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
+ if (error)
+ goto error;
+
+ /* Validate actions. */
+ if (a[OVS_FLOW_ATTR_ACTIONS]) {
+ acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
+ error = PTR_ERR(acts);
+ if (IS_ERR(acts))
+ goto error;
+
+ ovs_flow_mask_key(&masked_key, &key, &mask);
+ error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
+ &masked_key, 0, &acts);
+ if (error) {
+ OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
+ goto err_kfree_acts;
+ }
+ }
+
+ /* Can allocate before locking if have acts. */
+ if (acts) {
+ reply = ovs_flow_cmd_alloc_info(acts, info, false);
+ if (IS_ERR(reply)) {
+ error = PTR_ERR(reply);
+ goto err_kfree_acts;
+ }
+ }
+
+ ovs_lock();
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ if (unlikely(!dp)) {
+ error = -ENODEV;
+ goto err_unlock_ovs;
+ }
+ /* Check that the flow exists. */
+ flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+ if (unlikely(!flow)) {
+ error = -ENOENT;
+ goto err_unlock_ovs;
+ }
- reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);
+ /* Update actions, if present. */
+ if (likely(acts)) {
+ old_acts = ovsl_dereference(flow->sf_acts);
+ rcu_assign_pointer(flow->sf_acts, acts);
- /* Clear stats. */
- if (a[OVS_FLOW_ATTR_CLEAR])
- ovs_flow_stats_clear(flow);
+ if (unlikely(reply)) {
+ error = ovs_flow_cmd_fill_info(flow,
+ ovs_header->dp_ifindex,
+ reply, info->snd_portid,
+ info->snd_seq, 0,
+ OVS_FLOW_CMD_NEW);
+ BUG_ON(error < 0);
+ }
+ } else {
+ /* Could not alloc without acts before locking. */
+ reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
+ info, OVS_FLOW_CMD_NEW, false);
+ if (unlikely(IS_ERR(reply))) {
+ error = PTR_ERR(reply);
+ goto err_unlock_ovs;
+ }
}
+
+ /* Clear stats. */
+ if (a[OVS_FLOW_ATTR_CLEAR])
+ ovs_flow_stats_clear(flow);
ovs_unlock();
- if (!IS_ERR(reply))
+ if (reply)
ovs_notify(&dp_flow_genl_family, reply, info);
- else
- genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0,
- 0, PTR_ERR(reply));
+ if (old_acts)
+ ovs_nla_free_flow_actions(old_acts);
+
return 0;
-err_flow_free:
- ovs_flow_free(flow, false);
err_unlock_ovs:
ovs_unlock();
-err_kfree:
+ kfree_skb(reply);
+err_kfree_acts:
kfree(acts);
error:
return error;
@@ -914,7 +1051,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
}
ovs_match_init(&match, &key, NULL);
- err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL);
+ err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
if (err)
return err;
@@ -925,13 +1062,14 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
goto unlock;
}
- flow = ovs_flow_tbl_lookup(&dp->table, &key);
- if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) {
+ flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+ if (!flow) {
err = -ENOENT;
goto unlock;
}
- reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);
+ reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
+ OVS_FLOW_CMD_NEW, true);
if (IS_ERR(reply)) {
err = PTR_ERR(reply);
goto unlock;
@@ -955,45 +1093,53 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
struct sw_flow_match match;
int err;
+ if (likely(a[OVS_FLOW_ATTR_KEY])) {
+ ovs_match_init(&match, &key, NULL);
+ err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
+ if (unlikely(err))
+ return err;
+ }
+
ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
- if (!dp) {
+ if (unlikely(!dp)) {
err = -ENODEV;
goto unlock;
}
- if (!a[OVS_FLOW_ATTR_KEY]) {
+ if (unlikely(!a[OVS_FLOW_ATTR_KEY])) {
err = ovs_flow_tbl_flush(&dp->table);
goto unlock;
}
- ovs_match_init(&match, &key, NULL);
- err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL);
- if (err)
- goto unlock;
-
- flow = ovs_flow_tbl_lookup(&dp->table, &key);
- if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) {
+ flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+ if (unlikely(!flow)) {
err = -ENOENT;
goto unlock;
}
- reply = ovs_flow_cmd_alloc_info(flow, info);
- if (!reply) {
- err = -ENOMEM;
- goto unlock;
- }
-
ovs_flow_tbl_remove(&dp->table, flow);
+ ovs_unlock();
- err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
- info->snd_seq, 0, OVS_FLOW_CMD_DEL);
- BUG_ON(err < 0);
+ reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
+ info, false);
+ if (likely(reply)) {
+ if (likely(!IS_ERR(reply))) {
+ rcu_read_lock(); /*To keep RCU checker happy. */
+ err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
+ reply, info->snd_portid,
+ info->snd_seq, 0,
+ OVS_FLOW_CMD_DEL);
+ rcu_read_unlock();
+ BUG_ON(err < 0);
+
+ ovs_notify(&dp_flow_genl_family, reply, info);
+ } else {
+ netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply));
+ }
+ }
ovs_flow_free(flow, true);
- ovs_unlock();
-
- ovs_notify(&dp_flow_genl_family, reply, info);
return 0;
unlock:
ovs_unlock();
@@ -1024,7 +1170,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (!flow)
break;
- if (ovs_flow_cmd_fill_info(flow, dp, skb,
+ if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
OVS_FLOW_CMD_NEW) < 0)
@@ -1037,11 +1183,17 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-static const struct genl_ops dp_flow_genl_ops[] = {
+static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
+ [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
+ [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
+ [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
+};
+
+static struct genl_ops dp_flow_genl_ops[] = {
{ .cmd = OVS_FLOW_CMD_NEW,
.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = flow_policy,
- .doit = ovs_flow_cmd_new_or_set
+ .doit = ovs_flow_cmd_new
},
{ .cmd = OVS_FLOW_CMD_DEL,
.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
@@ -1057,28 +1209,22 @@ static const struct genl_ops dp_flow_genl_ops[] = {
{ .cmd = OVS_FLOW_CMD_SET,
.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = flow_policy,
- .doit = ovs_flow_cmd_new_or_set,
+ .doit = ovs_flow_cmd_set,
},
};
-static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
- [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
- [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
- [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
-};
-
-static struct genl_family dp_datapath_genl_family = {
+static struct genl_family dp_flow_genl_family = {
.id = GENL_ID_GENERATE,
.hdrsize = sizeof(struct ovs_header),
- .name = OVS_DATAPATH_FAMILY,
- .version = OVS_DATAPATH_VERSION,
- .maxattr = OVS_DP_ATTR_MAX,
+ .name = OVS_FLOW_FAMILY,
+ .version = OVS_FLOW_VERSION,
+ .maxattr = OVS_FLOW_ATTR_MAX,
.netnsok = true,
.parallel_ops = true,
-};
-
-static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
- .name = OVS_DATAPATH_MCGROUP
+ .ops = dp_flow_genl_ops,
+ .n_ops = ARRAY_SIZE(dp_flow_genl_ops),
+ .mcgrps = &ovs_dp_flow_multicast_group,
+ .n_mcgrps = 1,
};
static size_t ovs_dp_cmd_msg_size(void)
@@ -1093,6 +1239,7 @@ static size_t ovs_dp_cmd_msg_size(void)
return msgsize;
}
+/* Called with ovs_mutex or RCU read lock. */
static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
u32 portid, u32 seq, u32 flags, u8 cmd)
{
@@ -1108,9 +1255,7 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
ovs_header->dp_ifindex = get_dpifindex(dp);
- rcu_read_lock();
err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
- rcu_read_unlock();
if (err)
goto nla_put_failure;
@@ -1135,25 +1280,12 @@ error:
return -EMSGSIZE;
}
-static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp,
- struct genl_info *info, u8 cmd)
+static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info)
{
- struct sk_buff *skb;
- int retval;
-
- skb = genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);
- if (!skb)
- return ERR_PTR(-ENOMEM);
-
- retval = ovs_dp_cmd_fill_info(dp, skb, info->snd_portid, info->snd_seq, 0, cmd);
- if (retval < 0) {
- kfree_skb(skb);
- return ERR_PTR(retval);
- }
- return skb;
+ return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);
}
-/* Called with ovs_mutex. */
+/* Called with rcu_read_lock or ovs_mutex. */
static struct datapath *lookup_datapath(struct net *net,
struct ovs_header *ovs_header,
struct nlattr *a[OVS_DP_ATTR_MAX + 1])
@@ -1165,10 +1297,8 @@ static struct datapath *lookup_datapath(struct net *net,
else {
struct vport *vport;
- rcu_read_lock();
vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
- rcu_read_unlock();
}
return dp ? dp : ERR_PTR(-ENODEV);
}
@@ -1205,12 +1335,14 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
goto err;
- ovs_lock();
+ reply = ovs_dp_cmd_alloc_info(info);
+ if (!reply)
+ return -ENOMEM;
err = -ENOMEM;
dp = kzalloc(sizeof(*dp), GFP_KERNEL);
if (dp == NULL)
- goto err_unlock_ovs;
+ goto err_free_reply;
ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
@@ -1219,18 +1351,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (err)
goto err_free_dp;
- dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
+ dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
if (!dp->stats_percpu) {
err = -ENOMEM;
goto err_destroy_table;
}
- for_each_possible_cpu(i) {
- struct dp_stats_percpu *dpath_stats;
- dpath_stats = per_cpu_ptr(dp->stats_percpu, i);
- u64_stats_init(&dpath_stats->sync);
- }
-
dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
GFP_KERNEL);
if (!dp->ports) {
@@ -1251,6 +1377,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_dp_change(dp, a);
+ /* So far only local changes have been made, now need the lock. */
+ ovs_lock();
+
vport = new_vport(&parms);
if (IS_ERR(vport)) {
err = PTR_ERR(vport);
@@ -1269,10 +1398,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto err_destroy_ports_array;
}
- reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW);
- err = PTR_ERR(reply);
- if (IS_ERR(reply))
- goto err_destroy_local_port;
+ err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
+ info->snd_seq, 0, OVS_DP_CMD_NEW);
+ BUG_ON(err < 0);
ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
@@ -1282,9 +1410,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_notify(&dp_datapath_genl_family, reply, info);
return 0;
-err_destroy_local_port:
- ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
err_destroy_ports_array:
+ ovs_unlock();
kfree(dp->ports);
err_destroy_percpu:
free_percpu(dp->stats_percpu);
@@ -1293,8 +1420,8 @@ err_destroy_table:
err_free_dp:
release_net(ovs_dp_get_net(dp));
kfree(dp);
-err_unlock_ovs:
- ovs_unlock();
+err_free_reply:
+ kfree_skb(reply);
err:
return err;
}
@@ -1332,16 +1459,19 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
int err;
+ reply = ovs_dp_cmd_alloc_info(info);
+ if (!reply)
+ return -ENOMEM;
+
ovs_lock();
dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
err = PTR_ERR(dp);
if (IS_ERR(dp))
- goto unlock;
+ goto err_unlock_free;
- reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_DEL);
- err = PTR_ERR(reply);
- if (IS_ERR(reply))
- goto unlock;
+ err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
+ info->snd_seq, 0, OVS_DP_CMD_DEL);
+ BUG_ON(err < 0);
__dp_destroy(dp);
ovs_unlock();
@@ -1349,8 +1479,10 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
ovs_notify(&dp_datapath_genl_family, reply, info);
return 0;
-unlock:
+
+err_unlock_free:
ovs_unlock();
+ kfree_skb(reply);
return err;
}
@@ -1360,29 +1492,30 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
int err;
+ reply = ovs_dp_cmd_alloc_info(info);
+ if (!reply)
+ return -ENOMEM;
+
ovs_lock();
dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
err = PTR_ERR(dp);
if (IS_ERR(dp))
- goto unlock;
+ goto err_unlock_free;
ovs_dp_change(dp, info->attrs);
- reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW);
- if (IS_ERR(reply)) {
- err = PTR_ERR(reply);
- genl_set_err(&dp_datapath_genl_family, sock_net(skb->sk), 0,
- 0, err);
- err = 0;
- goto unlock;
- }
+ err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
+ info->snd_seq, 0, OVS_DP_CMD_NEW);
+ BUG_ON(err < 0);
ovs_unlock();
ovs_notify(&dp_datapath_genl_family, reply, info);
return 0;
-unlock:
+
+err_unlock_free:
ovs_unlock();
+ kfree_skb(reply);
return err;
}
@@ -1392,24 +1525,26 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
int err;
- ovs_lock();
+ reply = ovs_dp_cmd_alloc_info(info);
+ if (!reply)
+ return -ENOMEM;
+
+ rcu_read_lock();
dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
if (IS_ERR(dp)) {
err = PTR_ERR(dp);
- goto unlock;
- }
-
- reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW);
- if (IS_ERR(reply)) {
- err = PTR_ERR(reply);
- goto unlock;
+ goto err_unlock_free;
}
+ err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
+ info->snd_seq, 0, OVS_DP_CMD_NEW);
+ BUG_ON(err < 0);
+ rcu_read_unlock();
- ovs_unlock();
return genlmsg_reply(reply, info);
-unlock:
- ovs_unlock();
+err_unlock_free:
+ rcu_read_unlock();
+ kfree_skb(reply);
return err;
}
@@ -1436,7 +1571,13 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-static const struct genl_ops dp_datapath_genl_ops[] = {
+static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
+ [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+ [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
+ [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
+};
+
+static struct genl_ops dp_datapath_genl_ops[] = {
{ .cmd = OVS_DP_CMD_NEW,
.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = datapath_policy,
@@ -1460,27 +1601,18 @@ static const struct genl_ops dp_datapath_genl_ops[] = {
},
};
-static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
- [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
- [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
- [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
- [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
- [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
- [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
-};
-
-struct genl_family dp_vport_genl_family = {
+static struct genl_family dp_datapath_genl_family = {
.id = GENL_ID_GENERATE,
.hdrsize = sizeof(struct ovs_header),
- .name = OVS_VPORT_FAMILY,
- .version = OVS_VPORT_VERSION,
- .maxattr = OVS_VPORT_ATTR_MAX,
+ .name = OVS_DATAPATH_FAMILY,
+ .version = OVS_DATAPATH_VERSION,
+ .maxattr = OVS_DP_ATTR_MAX,
.netnsok = true,
.parallel_ops = true,
-};
-
-static struct genl_multicast_group ovs_dp_vport_multicast_group = {
- .name = OVS_VPORT_MCGROUP
+ .ops = dp_datapath_genl_ops,
+ .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
+ .mcgrps = &ovs_dp_datapath_multicast_group,
+ .n_mcgrps = 1,
};
/* Called with ovs_mutex or RCU read lock. */
@@ -1522,7 +1654,12 @@ error:
return err;
}
-/* Called with ovs_mutex or RCU read lock. */
+static struct sk_buff *ovs_vport_cmd_alloc_info(void)
+{
+ return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+}
+
+/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
u32 seq, u8 cmd)
{
@@ -1584,33 +1721,35 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
u32 port_no;
int err;
- err = -EINVAL;
if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
!a[OVS_VPORT_ATTR_UPCALL_PID])
- goto exit;
+ return -EINVAL;
+
+ port_no = a[OVS_VPORT_ATTR_PORT_NO]
+ ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
+ if (port_no >= DP_MAX_PORTS)
+ return -EFBIG;
+
+ reply = ovs_vport_cmd_alloc_info();
+ if (!reply)
+ return -ENOMEM;
ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
err = -ENODEV;
if (!dp)
- goto exit_unlock;
-
- if (a[OVS_VPORT_ATTR_PORT_NO]) {
- port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
-
- err = -EFBIG;
- if (port_no >= DP_MAX_PORTS)
- goto exit_unlock;
+ goto exit_unlock_free;
+ if (port_no) {
vport = ovs_vport_ovsl(dp, port_no);
err = -EBUSY;
if (vport)
- goto exit_unlock;
+ goto exit_unlock_free;
} else {
for (port_no = 1; ; port_no++) {
if (port_no >= DP_MAX_PORTS) {
err = -EFBIG;
- goto exit_unlock;
+ goto exit_unlock_free;
}
vport = ovs_vport_ovsl(dp, port_no);
if (!vport)
@@ -1628,22 +1767,19 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
vport = new_vport(&parms);
err = PTR_ERR(vport);
if (IS_ERR(vport))
- goto exit_unlock;
+ goto exit_unlock_free;
- err = 0;
- reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
- OVS_VPORT_CMD_NEW);
- if (IS_ERR(reply)) {
- err = PTR_ERR(reply);
- ovs_dp_detach_port(vport);
- goto exit_unlock;
- }
+ err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
+ info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+ BUG_ON(err < 0);
+ ovs_unlock();
ovs_notify(&dp_vport_genl_family, reply, info);
+ return 0;
-exit_unlock:
+exit_unlock_free:
ovs_unlock();
-exit:
+ kfree_skb(reply);
return err;
}
@@ -1654,28 +1790,26 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct vport *vport;
int err;
+ reply = ovs_vport_cmd_alloc_info();
+ if (!reply)
+ return -ENOMEM;
+
ovs_lock();
vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
- goto exit_unlock;
+ goto exit_unlock_free;
if (a[OVS_VPORT_ATTR_TYPE] &&
nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
err = -EINVAL;
- goto exit_unlock;
- }
-
- reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!reply) {
- err = -ENOMEM;
- goto exit_unlock;
+ goto exit_unlock_free;
}
if (a[OVS_VPORT_ATTR_OPTIONS]) {
err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
if (err)
- goto exit_free;
+ goto exit_unlock_free;
}
if (a[OVS_VPORT_ATTR_UPCALL_PID])
@@ -1689,10 +1823,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
ovs_notify(&dp_vport_genl_family, reply, info);
return 0;
-exit_free:
- kfree_skb(reply);
-exit_unlock:
+exit_unlock_free:
ovs_unlock();
+ kfree_skb(reply);
return err;
}
@@ -1703,30 +1836,33 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
struct vport *vport;
int err;
+ reply = ovs_vport_cmd_alloc_info();
+ if (!reply)
+ return -ENOMEM;
+
ovs_lock();
vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
- goto exit_unlock;
+ goto exit_unlock_free;
if (vport->port_no == OVSP_LOCAL) {
err = -EINVAL;
- goto exit_unlock;
+ goto exit_unlock_free;
}
- reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
- info->snd_seq, OVS_VPORT_CMD_DEL);
- err = PTR_ERR(reply);
- if (IS_ERR(reply))
- goto exit_unlock;
-
- err = 0;
+ err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
+ info->snd_seq, 0, OVS_VPORT_CMD_DEL);
+ BUG_ON(err < 0);
ovs_dp_detach_port(vport);
+ ovs_unlock();
ovs_notify(&dp_vport_genl_family, reply, info);
+ return 0;
-exit_unlock:
+exit_unlock_free:
ovs_unlock();
+ kfree_skb(reply);
return err;
}
@@ -1738,24 +1874,25 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
struct vport *vport;
int err;
+ reply = ovs_vport_cmd_alloc_info();
+ if (!reply)
+ return -ENOMEM;
+
rcu_read_lock();
vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
- goto exit_unlock;
-
- reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
- info->snd_seq, OVS_VPORT_CMD_NEW);
- err = PTR_ERR(reply);
- if (IS_ERR(reply))
- goto exit_unlock;
-
+ goto exit_unlock_free;
+ err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
+ info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+ BUG_ON(err < 0);
rcu_read_unlock();
return genlmsg_reply(reply, info);
-exit_unlock:
+exit_unlock_free:
rcu_read_unlock();
+ kfree_skb(reply);
return err;
}
@@ -1798,7 +1935,16 @@ out:
return skb->len;
}
-static const struct genl_ops dp_vport_genl_ops[] = {
+static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
+ [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+ [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
+ [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
+ [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
+ [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
+ [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
+};
+
+static struct genl_ops dp_vport_genl_ops[] = {
{ .cmd = OVS_VPORT_CMD_NEW,
.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = vport_policy,
@@ -1822,26 +1968,25 @@ static const struct genl_ops dp_vport_genl_ops[] = {
},
};
-struct genl_family_and_ops {
- struct genl_family *family;
- const struct genl_ops *ops;
- int n_ops;
- const struct genl_multicast_group *group;
+struct genl_family dp_vport_genl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = sizeof(struct ovs_header),
+ .name = OVS_VPORT_FAMILY,
+ .version = OVS_VPORT_VERSION,
+ .maxattr = OVS_VPORT_ATTR_MAX,
+ .netnsok = true,
+ .parallel_ops = true,
+ .ops = dp_vport_genl_ops,
+ .n_ops = ARRAY_SIZE(dp_vport_genl_ops),
+ .mcgrps = &ovs_dp_vport_multicast_group,
+ .n_mcgrps = 1,
};
-static const struct genl_family_and_ops dp_genl_families[] = {
- { &dp_datapath_genl_family,
- dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
- &ovs_dp_datapath_multicast_group },
- { &dp_vport_genl_family,
- dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
- &ovs_dp_vport_multicast_group },
- { &dp_flow_genl_family,
- dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
- &ovs_dp_flow_multicast_group },
- { &dp_packet_genl_family,
- dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
- NULL },
+static struct genl_family * const dp_genl_families[] = {
+ &dp_datapath_genl_family,
+ &dp_vport_genl_family,
+ &dp_flow_genl_family,
+ &dp_packet_genl_family,
};
static void dp_unregister_genl(int n_families)
@@ -1849,33 +1994,25 @@ static void dp_unregister_genl(int n_families)
int i;
for (i = 0; i < n_families; i++)
- genl_unregister_family(dp_genl_families[i].family);
+ genl_unregister_family(dp_genl_families[i]);
}
static int dp_register_genl(void)
{
- int n_registered;
int err;
int i;
- n_registered = 0;
for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
- const struct genl_family_and_ops *f = &dp_genl_families[i];
- f->family->ops = f->ops;
- f->family->n_ops = f->n_ops;
- f->family->mcgrps = f->group;
- f->family->n_mcgrps = f->group ? 1 : 0;
- err = genl_register_family(f->family);
+ err = genl_register_family(dp_genl_families[i]);
if (err)
goto error;
- n_registered++;
}
return 0;
error:
- dp_unregister_genl(n_registered);
+ dp_unregister_genl(i);
return err;
}
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 6be9fbb5e9c..7ede507500d 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -55,7 +55,7 @@ struct dp_stats_percpu {
u64 n_missed;
u64 n_lost;
u64 n_mask_hit;
- struct u64_stats_sync sync;
+ struct u64_stats_sync syncp;
};
/**
@@ -194,7 +194,9 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
void ovs_dp_notify_wq(struct work_struct *work);
-#define OVS_NLERR(fmt, ...) \
- pr_info_once("netlink: " fmt, ##__VA_ARGS__)
-
+#define OVS_NLERR(fmt, ...) \
+do { \
+ if (net_ratelimit()) \
+ pr_info("netlink: " fmt, ##__VA_ARGS__); \
+} while (0)
#endif /* datapath.h */
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 2998989e76d..d07ab538fc9 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -61,91 +61,113 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
#define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF))
-void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
+void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
+ struct sk_buff *skb)
{
struct flow_stats *stats;
- __be16 tcp_flags = 0;
-
- if (!flow->stats.is_percpu)
- stats = flow->stats.stat;
- else
- stats = this_cpu_ptr(flow->stats.cpu_stats);
-
- if ((flow->key.eth.type == htons(ETH_P_IP) ||
- flow->key.eth.type == htons(ETH_P_IPV6)) &&
- flow->key.ip.frag != OVS_FRAG_TYPE_LATER &&
- flow->key.ip.proto == IPPROTO_TCP &&
- likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) {
- tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb));
+ int node = numa_node_id();
+
+ stats = rcu_dereference(flow->stats[node]);
+
+ /* Check if already have node-specific stats. */
+ if (likely(stats)) {
+ spin_lock(&stats->lock);
+ /* Mark if we write on the pre-allocated stats. */
+ if (node == 0 && unlikely(flow->stats_last_writer != node))
+ flow->stats_last_writer = node;
+ } else {
+ stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */
+ spin_lock(&stats->lock);
+
+ /* If the current NUMA-node is the only writer on the
+ * pre-allocated stats keep using them.
+ */
+ if (unlikely(flow->stats_last_writer != node)) {
+ /* A previous locker may have already allocated the
+ * stats, so we need to check again. If node-specific
+ * stats were already allocated, we update the pre-
+ * allocated stats as we have already locked them.
+ */
+ if (likely(flow->stats_last_writer != NUMA_NO_NODE)
+ && likely(!rcu_dereference(flow->stats[node]))) {
+ /* Try to allocate node-specific stats. */
+ struct flow_stats *new_stats;
+
+ new_stats =
+ kmem_cache_alloc_node(flow_stats_cache,
+ GFP_THISNODE |
+ __GFP_NOMEMALLOC,
+ node);
+ if (likely(new_stats)) {
+ new_stats->used = jiffies;
+ new_stats->packet_count = 1;
+ new_stats->byte_count = skb->len;
+ new_stats->tcp_flags = tcp_flags;
+ spin_lock_init(&new_stats->lock);
+
+ rcu_assign_pointer(flow->stats[node],
+ new_stats);
+ goto unlock;
+ }
+ }
+ flow->stats_last_writer = node;
+ }
}
- spin_lock(&stats->lock);
stats->used = jiffies;
stats->packet_count++;
stats->byte_count += skb->len;
stats->tcp_flags |= tcp_flags;
+unlock:
spin_unlock(&stats->lock);
}
-static void stats_read(struct flow_stats *stats,
- struct ovs_flow_stats *ovs_stats,
- unsigned long *used, __be16 *tcp_flags)
-{
- spin_lock(&stats->lock);
- if (!*used || time_after(stats->used, *used))
- *used = stats->used;
- *tcp_flags |= stats->tcp_flags;
- ovs_stats->n_packets += stats->packet_count;
- ovs_stats->n_bytes += stats->byte_count;
- spin_unlock(&stats->lock);
-}
-
-void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats,
+/* Must be called with rcu_read_lock or ovs_mutex. */
+void ovs_flow_stats_get(const struct sw_flow *flow,
+ struct ovs_flow_stats *ovs_stats,
unsigned long *used, __be16 *tcp_flags)
{
- int cpu;
+ int node;
*used = 0;
*tcp_flags = 0;
memset(ovs_stats, 0, sizeof(*ovs_stats));
- local_bh_disable();
- if (!flow->stats.is_percpu) {
- stats_read(flow->stats.stat, ovs_stats, used, tcp_flags);
- } else {
- for_each_possible_cpu(cpu) {
- struct flow_stats *stats;
+ for_each_node(node) {
+ struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]);
- stats = per_cpu_ptr(flow->stats.cpu_stats, cpu);
- stats_read(stats, ovs_stats, used, tcp_flags);
+ if (stats) {
+ /* Local CPU may write on non-local stats, so we must
+ * block bottom-halves here.
+ */
+ spin_lock_bh(&stats->lock);
+ if (!*used || time_after(stats->used, *used))
+ *used = stats->used;
+ *tcp_flags |= stats->tcp_flags;
+ ovs_stats->n_packets += stats->packet_count;
+ ovs_stats->n_bytes += stats->byte_count;
+ spin_unlock_bh(&stats->lock);
}
}
- local_bh_enable();
-}
-
-static void stats_reset(struct flow_stats *stats)
-{
- spin_lock(&stats->lock);
- stats->used = 0;
- stats->packet_count = 0;
- stats->byte_count = 0;
- stats->tcp_flags = 0;
- spin_unlock(&stats->lock);
}
+/* Called with ovs_mutex. */
void ovs_flow_stats_clear(struct sw_flow *flow)
{
- int cpu;
-
- local_bh_disable();
- if (!flow->stats.is_percpu) {
- stats_reset(flow->stats.stat);
- } else {
- for_each_possible_cpu(cpu) {
- stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu));
+ int node;
+
+ for_each_node(node) {
+ struct flow_stats *stats = ovsl_dereference(flow->stats[node]);
+
+ if (stats) {
+ spin_lock_bh(&stats->lock);
+ stats->used = 0;
+ stats->packet_count = 0;
+ stats->byte_count = 0;
+ stats->tcp_flags = 0;
+ spin_unlock_bh(&stats->lock);
}
}
- local_bh_enable();
}
static int check_header(struct sk_buff *skb, int len)
@@ -332,8 +354,8 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
/* The ICMPv6 type and code fields use the 16-bit transport port
* fields, so we need to store them in 16-bit network byte order.
*/
- key->ipv6.tp.src = htons(icmp->icmp6_type);
- key->ipv6.tp.dst = htons(icmp->icmp6_code);
+ key->tp.src = htons(icmp->icmp6_type);
+ key->tp.dst = htons(icmp->icmp6_code);
if (icmp->icmp6_code == 0 &&
(icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
@@ -372,14 +394,14 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
&& opt_len == 8) {
if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll)))
goto invalid;
- memcpy(key->ipv6.nd.sll,
- &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+ ether_addr_copy(key->ipv6.nd.sll,
+ &nd->opt[offset+sizeof(*nd_opt)]);
} else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
&& opt_len == 8) {
if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll)))
goto invalid;
- memcpy(key->ipv6.nd.tll,
- &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+ ether_addr_copy(key->ipv6.nd.tll,
+ &nd->opt[offset+sizeof(*nd_opt)]);
}
icmp_len -= opt_len;
@@ -439,8 +461,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
* header in the linear data area.
*/
eth = eth_hdr(skb);
- memcpy(key->eth.src, eth->h_source, ETH_ALEN);
- memcpy(key->eth.dst, eth->h_dest, ETH_ALEN);
+ ether_addr_copy(key->eth.src, eth->h_source);
+ ether_addr_copy(key->eth.dst, eth->h_dest);
__skb_pull(skb, 2 * ETH_ALEN);
/* We are going to push all headers that we pull, so no need to
@@ -495,21 +517,21 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
if (key->ip.proto == IPPROTO_TCP) {
if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
- key->ipv4.tp.src = tcp->source;
- key->ipv4.tp.dst = tcp->dest;
- key->ipv4.tp.flags = TCP_FLAGS_BE16(tcp);
+ key->tp.src = tcp->source;
+ key->tp.dst = tcp->dest;
+ key->tp.flags = TCP_FLAGS_BE16(tcp);
}
} else if (key->ip.proto == IPPROTO_UDP) {
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
- key->ipv4.tp.src = udp->source;
- key->ipv4.tp.dst = udp->dest;
+ key->tp.src = udp->source;
+ key->tp.dst = udp->dest;
}
} else if (key->ip.proto == IPPROTO_SCTP) {
if (sctphdr_ok(skb)) {
struct sctphdr *sctp = sctp_hdr(skb);
- key->ipv4.tp.src = sctp->source;
- key->ipv4.tp.dst = sctp->dest;
+ key->tp.src = sctp->source;
+ key->tp.dst = sctp->dest;
}
} else if (key->ip.proto == IPPROTO_ICMP) {
if (icmphdr_ok(skb)) {
@@ -517,8 +539,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
/* The ICMP type and code fields use the 16-bit
* transport port fields, so we need to store
* them in 16-bit network byte order. */
- key->ipv4.tp.src = htons(icmp->type);
- key->ipv4.tp.dst = htons(icmp->code);
+ key->tp.src = htons(icmp->type);
+ key->tp.dst = htons(icmp->code);
}
}
@@ -538,8 +560,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
key->ip.proto = ntohs(arp->ar_op);
memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
- memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
- memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
+ ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha);
+ ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha);
}
} else if (key->eth.type == htons(ETH_P_IPV6)) {
int nh_len; /* IPv6 Header + Extensions */
@@ -564,21 +586,21 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
if (key->ip.proto == NEXTHDR_TCP) {
if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
- key->ipv6.tp.src = tcp->source;
- key->ipv6.tp.dst = tcp->dest;
- key->ipv6.tp.flags = TCP_FLAGS_BE16(tcp);
+ key->tp.src = tcp->source;
+ key->tp.dst = tcp->dest;
+ key->tp.flags = TCP_FLAGS_BE16(tcp);
}
} else if (key->ip.proto == NEXTHDR_UDP) {
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
- key->ipv6.tp.src = udp->source;
- key->ipv6.tp.dst = udp->dest;
+ key->tp.src = udp->source;
+ key->tp.dst = udp->dest;
}
} else if (key->ip.proto == NEXTHDR_SCTP) {
if (sctphdr_ok(skb)) {
struct sctphdr *sctp = sctp_hdr(skb);
- key->ipv6.tp.src = sctp->source;
- key->ipv6.tp.dst = sctp->dest;
+ key->tp.src = sctp->source;
+ key->tp.dst = sctp->dest;
}
} else if (key->ip.proto == NEXTHDR_ICMP) {
if (icmp6hdr_ok(skb)) {
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 2d770e28a3a..5e5aaed3a85 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2013 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -47,7 +47,7 @@ struct ovs_key_ipv4_tunnel {
__be16 tun_flags;
u8 ipv4_tos;
u8 ipv4_ttl;
-};
+} __packed __aligned(4); /* Minimize padding. */
static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
const struct iphdr *iph, __be64 tun_id,
@@ -71,7 +71,7 @@ struct sw_flow_key {
u32 priority; /* Packet QoS priority. */
u32 skb_mark; /* SKB mark. */
u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
- } phy;
+ } __packed phy; /* Safe when right after 'tun_key'. */
struct {
u8 src[ETH_ALEN]; /* Ethernet source address. */
u8 dst[ETH_ALEN]; /* Ethernet destination address. */
@@ -84,23 +84,21 @@ struct sw_flow_key {
u8 ttl; /* IP TTL/hop limit. */
u8 frag; /* One of OVS_FRAG_TYPE_*. */
} ip;
+ struct {
+ __be16 src; /* TCP/UDP/SCTP source port. */
+ __be16 dst; /* TCP/UDP/SCTP destination port. */
+ __be16 flags; /* TCP flags. */
+ } tp;
union {
struct {
struct {
__be32 src; /* IP source address. */
__be32 dst; /* IP destination address. */
} addr;
- union {
- struct {
- __be16 src; /* TCP/UDP/SCTP source port. */
- __be16 dst; /* TCP/UDP/SCTP destination port. */
- __be16 flags; /* TCP flags. */
- } tp;
- struct {
- u8 sha[ETH_ALEN]; /* ARP source hardware address. */
- u8 tha[ETH_ALEN]; /* ARP target hardware address. */
- } arp;
- };
+ struct {
+ u8 sha[ETH_ALEN]; /* ARP source hardware address. */
+ u8 tha[ETH_ALEN]; /* ARP target hardware address. */
+ } arp;
} ipv4;
struct {
struct {
@@ -109,11 +107,6 @@ struct sw_flow_key {
} addr;
__be32 label; /* IPv6 flow label. */
struct {
- __be16 src; /* TCP/UDP/SCTP source port. */
- __be16 dst; /* TCP/UDP/SCTP destination port. */
- __be16 flags; /* TCP flags. */
- } tp;
- struct {
struct in6_addr target; /* ND target address. */
u8 sll[ETH_ALEN]; /* ND source link layer address. */
u8 tll[ETH_ALEN]; /* ND target link layer address. */
@@ -155,24 +148,22 @@ struct flow_stats {
__be16 tcp_flags; /* Union of seen TCP flags. */
};
-struct sw_flow_stats {
- bool is_percpu;
- union {
- struct flow_stats *stat;
- struct flow_stats __percpu *cpu_stats;
- };
-};
-
struct sw_flow {
struct rcu_head rcu;
struct hlist_node hash_node[2];
u32 hash;
-
+ int stats_last_writer; /* NUMA-node id of the last writer on
+ * 'stats[0]'.
+ */
struct sw_flow_key key;
struct sw_flow_key unmasked_key;
struct sw_flow_mask *mask;
struct sw_flow_actions __rcu *sf_acts;
- struct sw_flow_stats stats;
+ struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one
+ * is allocated at flow creation time,
+ * the rest are allocated on demand
+ * while holding the 'stats[0].lock'.
+ */
};
struct arp_eth_header {
@@ -189,10 +180,11 @@ struct arp_eth_header {
unsigned char ar_tip[4]; /* target IP address */
} __packed;
-void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb);
-void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *stats,
+void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags,
+ struct sk_buff *);
+void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
unsigned long *used, __be16 *tcp_flags);
-void ovs_flow_stats_clear(struct sw_flow *flow);
+void ovs_flow_stats_clear(struct sw_flow *);
u64 ovs_flow_used_time(unsigned long flow_jiffies);
int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 4d000acaed0..d757848da89 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -16,6 +16,8 @@
* 02110-1301, USA
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include "flow.h"
#include "datapath.h"
#include <linux/uaccess.h>
@@ -202,11 +204,11 @@ static bool match_validate(const struct sw_flow_match *match,
if (match->mask && (match->mask->key.ip.proto == 0xff))
mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
- if (match->key->ipv6.tp.src ==
+ if (match->key->tp.src ==
htons(NDISC_NEIGHBOUR_SOLICITATION) ||
- match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+ match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
key_expected |= 1 << OVS_KEY_ATTR_ND;
- if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff)))
+ if (match->mask && (match->mask->key.tp.src == htons(0xffff)))
mask_allowed |= 1 << OVS_KEY_ATTR_ND;
}
}
@@ -216,14 +218,14 @@ static bool match_validate(const struct sw_flow_match *match,
if ((key_attrs & key_expected) != key_expected) {
/* Key attributes check failed. */
OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
- key_attrs, key_expected);
+ (unsigned long long)key_attrs, (unsigned long long)key_expected);
return false;
}
if ((mask_attrs & mask_allowed) != mask_attrs) {
/* Mask attributes check failed. */
OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
- mask_attrs, mask_allowed);
+ (unsigned long long)mask_attrs, (unsigned long long)mask_allowed);
return false;
}
@@ -266,20 +268,6 @@ static bool is_all_zero(const u8 *fp, size_t size)
return true;
}
-static bool is_all_set(const u8 *fp, size_t size)
-{
- int i;
-
- if (!fp)
- return false;
-
- for (i = 0; i < size; i++)
- if (fp[i] != 0xff)
- return false;
-
- return true;
-}
-
static int __parse_flow_nlattrs(const struct nlattr *attr,
const struct nlattr *a[],
u64 *attrsp, bool nz)
@@ -501,9 +489,8 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
return 0;
}
-static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple,
- u64 attrs, const struct nlattr **a,
- bool is_mask)
+static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
+ const struct nlattr **a, bool is_mask)
{
int err;
u64 orig_attrs = attrs;
@@ -560,11 +547,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple
SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
}
- if (is_mask && exact_5tuple) {
- if (match->mask->key.eth.type != htons(0xffff))
- *exact_5tuple = false;
- }
-
if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
const struct ovs_key_ipv4 *ipv4_key;
@@ -587,13 +569,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple
SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
ipv4_key->ipv4_dst, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
-
- if (is_mask && exact_5tuple && *exact_5tuple) {
- if (ipv4_key->ipv4_proto != 0xff ||
- ipv4_key->ipv4_src != htonl(0xffffffff) ||
- ipv4_key->ipv4_dst != htonl(0xffffffff))
- *exact_5tuple = false;
- }
}
if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
@@ -625,13 +600,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple
is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
-
- if (is_mask && exact_5tuple && *exact_5tuple) {
- if (ipv6_key->ipv6_proto != 0xff ||
- !is_all_set((u8 *)ipv6_key->ipv6_src, sizeof(match->key->ipv6.addr.src)) ||
- !is_all_set((u8 *)ipv6_key->ipv6_dst, sizeof(match->key->ipv6.addr.dst)))
- *exact_5tuple = false;
- }
}
if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
@@ -662,32 +630,18 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple
const struct ovs_key_tcp *tcp_key;
tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
- if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
- SW_FLOW_KEY_PUT(match, ipv4.tp.src,
- tcp_key->tcp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
- tcp_key->tcp_dst, is_mask);
- } else {
- SW_FLOW_KEY_PUT(match, ipv6.tp.src,
- tcp_key->tcp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
- tcp_key->tcp_dst, is_mask);
- }
+ SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_TCP);
-
- if (is_mask && exact_5tuple && *exact_5tuple &&
- (tcp_key->tcp_src != htons(0xffff) ||
- tcp_key->tcp_dst != htons(0xffff)))
- *exact_5tuple = false;
}
if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
- SW_FLOW_KEY_PUT(match, ipv4.tp.flags,
+ SW_FLOW_KEY_PUT(match, tp.flags,
nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
is_mask);
} else {
- SW_FLOW_KEY_PUT(match, ipv6.tp.flags,
+ SW_FLOW_KEY_PUT(match, tp.flags,
nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
is_mask);
}
@@ -698,40 +652,17 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple
const struct ovs_key_udp *udp_key;
udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
- if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
- SW_FLOW_KEY_PUT(match, ipv4.tp.src,
- udp_key->udp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
- udp_key->udp_dst, is_mask);
- } else {
- SW_FLOW_KEY_PUT(match, ipv6.tp.src,
- udp_key->udp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
- udp_key->udp_dst, is_mask);
- }
+ SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_UDP);
-
- if (is_mask && exact_5tuple && *exact_5tuple &&
- (udp_key->udp_src != htons(0xffff) ||
- udp_key->udp_dst != htons(0xffff)))
- *exact_5tuple = false;
}
if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
const struct ovs_key_sctp *sctp_key;
sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
- if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
- SW_FLOW_KEY_PUT(match, ipv4.tp.src,
- sctp_key->sctp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
- sctp_key->sctp_dst, is_mask);
- } else {
- SW_FLOW_KEY_PUT(match, ipv6.tp.src,
- sctp_key->sctp_src, is_mask);
- SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
- sctp_key->sctp_dst, is_mask);
- }
+ SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
}
@@ -739,9 +670,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple
const struct ovs_key_icmp *icmp_key;
icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
- SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+ SW_FLOW_KEY_PUT(match, tp.src,
htons(icmp_key->icmp_type), is_mask);
- SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+ SW_FLOW_KEY_PUT(match, tp.dst,
htons(icmp_key->icmp_code), is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
}
@@ -750,9 +681,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple
const struct ovs_key_icmpv6 *icmpv6_key;
icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
- SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+ SW_FLOW_KEY_PUT(match, tp.src,
htons(icmpv6_key->icmpv6_type), is_mask);
- SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+ SW_FLOW_KEY_PUT(match, tp.dst,
htons(icmpv6_key->icmpv6_code), is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
}
@@ -800,7 +731,6 @@ static void sw_flow_mask_set(struct sw_flow_mask *mask,
* attribute specifies the mask field of the wildcarded flow.
*/
int ovs_nla_get_match(struct sw_flow_match *match,
- bool *exact_5tuple,
const struct nlattr *key,
const struct nlattr *mask)
{
@@ -848,13 +778,10 @@ int ovs_nla_get_match(struct sw_flow_match *match,
}
}
- err = ovs_key_from_nlattrs(match, NULL, key_attrs, a, false);
+ err = ovs_key_from_nlattrs(match, key_attrs, a, false);
if (err)
return err;
- if (exact_5tuple)
- *exact_5tuple = true;
-
if (mask) {
err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
if (err)
@@ -892,7 +819,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
}
}
- err = ovs_key_from_nlattrs(match, exact_5tuple, mask_attrs, a, true);
+ err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
if (err)
return err;
} else {
@@ -982,8 +909,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
goto nla_put_failure;
eth_key = nla_data(nla);
- memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN);
- memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN);
+ ether_addr_copy(eth_key->eth_src, output->eth.src);
+ ether_addr_copy(eth_key->eth_dst, output->eth.dst);
if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
__be16 eth_type;
@@ -1055,8 +982,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
arp_key->arp_sip = output->ipv4.addr.src;
arp_key->arp_tip = output->ipv4.addr.dst;
arp_key->arp_op = htons(output->ip.proto);
- memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN);
- memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN);
+ ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
+ ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
}
if ((swkey->eth.type == htons(ETH_P_IP) ||
@@ -1070,19 +997,11 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
if (!nla)
goto nla_put_failure;
tcp_key = nla_data(nla);
- if (swkey->eth.type == htons(ETH_P_IP)) {
- tcp_key->tcp_src = output->ipv4.tp.src;
- tcp_key->tcp_dst = output->ipv4.tp.dst;
- if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
- output->ipv4.tp.flags))
- goto nla_put_failure;
- } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- tcp_key->tcp_src = output->ipv6.tp.src;
- tcp_key->tcp_dst = output->ipv6.tp.dst;
- if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
- output->ipv6.tp.flags))
- goto nla_put_failure;
- }
+ tcp_key->tcp_src = output->tp.src;
+ tcp_key->tcp_dst = output->tp.dst;
+ if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
+ output->tp.flags))
+ goto nla_put_failure;
} else if (swkey->ip.proto == IPPROTO_UDP) {
struct ovs_key_udp *udp_key;
@@ -1090,13 +1009,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
if (!nla)
goto nla_put_failure;
udp_key = nla_data(nla);
- if (swkey->eth.type == htons(ETH_P_IP)) {
- udp_key->udp_src = output->ipv4.tp.src;
- udp_key->udp_dst = output->ipv4.tp.dst;
- } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- udp_key->udp_src = output->ipv6.tp.src;
- udp_key->udp_dst = output->ipv6.tp.dst;
- }
+ udp_key->udp_src = output->tp.src;
+ udp_key->udp_dst = output->tp.dst;
} else if (swkey->ip.proto == IPPROTO_SCTP) {
struct ovs_key_sctp *sctp_key;
@@ -1104,13 +1018,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
if (!nla)
goto nla_put_failure;
sctp_key = nla_data(nla);
- if (swkey->eth.type == htons(ETH_P_IP)) {
- sctp_key->sctp_src = swkey->ipv4.tp.src;
- sctp_key->sctp_dst = swkey->ipv4.tp.dst;
- } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- sctp_key->sctp_src = swkey->ipv6.tp.src;
- sctp_key->sctp_dst = swkey->ipv6.tp.dst;
- }
+ sctp_key->sctp_src = output->tp.src;
+ sctp_key->sctp_dst = output->tp.dst;
} else if (swkey->eth.type == htons(ETH_P_IP) &&
swkey->ip.proto == IPPROTO_ICMP) {
struct ovs_key_icmp *icmp_key;
@@ -1119,8 +1028,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
if (!nla)
goto nla_put_failure;
icmp_key = nla_data(nla);
- icmp_key->icmp_type = ntohs(output->ipv4.tp.src);
- icmp_key->icmp_code = ntohs(output->ipv4.tp.dst);
+ icmp_key->icmp_type = ntohs(output->tp.src);
+ icmp_key->icmp_code = ntohs(output->tp.dst);
} else if (swkey->eth.type == htons(ETH_P_IPV6) &&
swkey->ip.proto == IPPROTO_ICMPV6) {
struct ovs_key_icmpv6 *icmpv6_key;
@@ -1130,8 +1039,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
if (!nla)
goto nla_put_failure;
icmpv6_key = nla_data(nla);
- icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src);
- icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst);
+ icmpv6_key->icmpv6_type = ntohs(output->tp.src);
+ icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
@@ -1143,8 +1052,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
nd_key = nla_data(nla);
memcpy(nd_key->nd_target, &output->ipv6.nd.target,
sizeof(nd_key->nd_target));
- memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN);
- memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN);
+ ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
+ ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
}
}
}
@@ -1309,13 +1218,10 @@ static int validate_and_copy_sample(const struct nlattr *attr,
static int validate_tp_port(const struct sw_flow_key *flow_key)
{
- if (flow_key->eth.type == htons(ETH_P_IP)) {
- if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
- return 0;
- } else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
- if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
- return 0;
- }
+ if ((flow_key->eth.type == htons(ETH_P_IP) ||
+ flow_key->eth.type == htons(ETH_P_IPV6)) &&
+ (flow_key->tp.src || flow_key->tp.dst))
+ return 0;
return -EINVAL;
}
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index b31fbe28bc7..440151045d3 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -45,7 +45,6 @@ int ovs_nla_put_flow(const struct sw_flow_key *,
int ovs_nla_get_flow_metadata(struct sw_flow *flow,
const struct nlattr *attr);
int ovs_nla_get_match(struct sw_flow_match *match,
- bool *exact_5tuple,
const struct nlattr *,
const struct nlattr *);
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 3c268b3d71c..cf2d853646f 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -48,6 +48,7 @@
#define REHASH_INTERVAL (10 * 60 * HZ)
static struct kmem_cache *flow_cache;
+struct kmem_cache *flow_stats_cache __read_mostly;
static u16 range_n_bytes(const struct sw_flow_key_range *range)
{
@@ -57,8 +58,10 @@ static u16 range_n_bytes(const struct sw_flow_key_range *range)
void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
const struct sw_flow_mask *mask)
{
- const long *m = (long *)((u8 *)&mask->key + mask->range.start);
- const long *s = (long *)((u8 *)src + mask->range.start);
+ const long *m = (const long *)((const u8 *)&mask->key +
+ mask->range.start);
+ const long *s = (const long *)((const u8 *)src +
+ mask->range.start);
long *d = (long *)((u8 *)dst + mask->range.start);
int i;
@@ -70,10 +73,11 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
*d++ = *s++ & *m++;
}
-struct sw_flow *ovs_flow_alloc(bool percpu_stats)
+struct sw_flow *ovs_flow_alloc(void)
{
struct sw_flow *flow;
- int cpu;
+ struct flow_stats *stats;
+ int node;
flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
if (!flow)
@@ -81,27 +85,22 @@ struct sw_flow *ovs_flow_alloc(bool percpu_stats)
flow->sf_acts = NULL;
flow->mask = NULL;
+ flow->stats_last_writer = NUMA_NO_NODE;
- flow->stats.is_percpu = percpu_stats;
+ /* Initialize the default stat node. */
+ stats = kmem_cache_alloc_node(flow_stats_cache,
+ GFP_KERNEL | __GFP_ZERO, 0);
+ if (!stats)
+ goto err;
- if (!percpu_stats) {
- flow->stats.stat = kzalloc(sizeof(*flow->stats.stat), GFP_KERNEL);
- if (!flow->stats.stat)
- goto err;
+ spin_lock_init(&stats->lock);
- spin_lock_init(&flow->stats.stat->lock);
- } else {
- flow->stats.cpu_stats = alloc_percpu(struct flow_stats);
- if (!flow->stats.cpu_stats)
- goto err;
+ RCU_INIT_POINTER(flow->stats[0], stats);
- for_each_possible_cpu(cpu) {
- struct flow_stats *cpu_stats;
+ for_each_node(node)
+ if (node != 0)
+ RCU_INIT_POINTER(flow->stats[node], NULL);
- cpu_stats = per_cpu_ptr(flow->stats.cpu_stats, cpu);
- spin_lock_init(&cpu_stats->lock);
- }
- }
return flow;
err:
kmem_cache_free(flow_cache, flow);
@@ -138,11 +137,13 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets)
static void flow_free(struct sw_flow *flow)
{
- kfree((struct sf_flow_acts __force *)flow->sf_acts);
- if (flow->stats.is_percpu)
- free_percpu(flow->stats.cpu_stats);
- else
- kfree(flow->stats.stat);
+ int node;
+
+ kfree((struct sw_flow_actions __force *)flow->sf_acts);
+ for_each_node(node)
+ if (flow->stats[node])
+ kmem_cache_free(flow_stats_cache,
+ (struct flow_stats __force *)flow->stats[node]);
kmem_cache_free(flow_cache, flow);
}
@@ -158,25 +159,6 @@ void ovs_flow_free(struct sw_flow *flow, bool deferred)
if (!flow)
return;
- if (flow->mask) {
- struct sw_flow_mask *mask = flow->mask;
-
- /* ovs-lock is required to protect mask-refcount and
- * mask list.
- */
- ASSERT_OVSL();
- BUG_ON(!mask->ref_count);
- mask->ref_count--;
-
- if (!mask->ref_count) {
- list_del_rcu(&mask->list);
- if (deferred)
- kfree_rcu(mask, rcu);
- else
- kfree(mask);
- }
- }
-
if (deferred)
call_rcu(&flow->rcu, rcu_free_flow_callback);
else
@@ -375,7 +357,7 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)
static u32 flow_hash(const struct sw_flow_key *key, int key_start,
int key_end)
{
- u32 *hash_key = (u32 *)((u8 *)key + key_start);
+ const u32 *hash_key = (const u32 *)((const u8 *)key + key_start);
int hash_u32s = (key_end - key_start) >> 2;
/* Make sure number of hash bytes are multiple of u32. */
@@ -397,8 +379,8 @@ static bool cmp_key(const struct sw_flow_key *key1,
const struct sw_flow_key *key2,
int key_start, int key_end)
{
- const long *cp1 = (long *)((u8 *)key1 + key_start);
- const long *cp2 = (long *)((u8 *)key2 + key_start);
+ const long *cp1 = (const long *)((const u8 *)key1 + key_start);
+ const long *cp2 = (const long *)((const u8 *)key2 + key_start);
long diffs = 0;
int i;
@@ -474,6 +456,22 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit);
}
+struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
+ struct sw_flow_match *match)
+{
+ struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+ struct sw_flow_mask *mask;
+ struct sw_flow *flow;
+
+ /* Always called under ovs-mutex. */
+ list_for_each_entry(mask, &tbl->mask_list, list) {
+ flow = masked_flow_lookup(ti, match->key, mask);
+ if (flow && ovs_flow_cmp_unmasked_key(flow, match)) /* Found */
+ return flow;
+ }
+ return NULL;
+}
+
int ovs_flow_tbl_num_masks(const struct flow_table *table)
{
struct sw_flow_mask *mask;
@@ -490,6 +488,25 @@ static struct table_instance *table_instance_expand(struct table_instance *ti)
return table_instance_rehash(ti, ti->n_buckets * 2);
}
+/* Remove 'mask' from the mask list, if it is not needed any more. */
+static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
+{
+ if (mask) {
+ /* ovs-lock is required to protect mask-refcount and
+ * mask list.
+ */
+ ASSERT_OVSL();
+ BUG_ON(!mask->ref_count);
+ mask->ref_count--;
+
+ if (!mask->ref_count) {
+ list_del_rcu(&mask->list);
+ kfree_rcu(mask, rcu);
+ }
+ }
+}
+
+/* Must be called with OVS mutex held. */
void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
{
struct table_instance *ti = ovsl_dereference(table->ti);
@@ -497,6 +514,11 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
BUG_ON(table->count == 0);
hlist_del_rcu(&flow->hash_node[ti->node_ver]);
table->count--;
+
+ /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
+ * accessible as long as the RCU read lock is held.
+ */
+ flow_mask_remove(table, flow->mask);
}
static struct sw_flow_mask *mask_alloc(void)
@@ -513,8 +535,8 @@ static struct sw_flow_mask *mask_alloc(void)
static bool mask_equal(const struct sw_flow_mask *a,
const struct sw_flow_mask *b)
{
- u8 *a_ = (u8 *)&a->key + a->range.start;
- u8 *b_ = (u8 *)&b->key + b->range.start;
+ const u8 *a_ = (const u8 *)&a->key + a->range.start;
+ const u8 *b_ = (const u8 *)&b->key + b->range.start;
return (a->range.end == b->range.end)
&& (a->range.start == b->range.start)
@@ -559,6 +581,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
return 0;
}
+/* Must be called with OVS mutex held. */
int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
struct sw_flow_mask *mask)
{
@@ -597,16 +620,28 @@ int ovs_flow_init(void)
BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
- flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
- 0, NULL);
+ flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
+ + (num_possible_nodes()
+ * sizeof(struct flow_stats *)),
+ 0, 0, NULL);
if (flow_cache == NULL)
return -ENOMEM;
+ flow_stats_cache
+ = kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (flow_stats_cache == NULL) {
+ kmem_cache_destroy(flow_cache);
+ flow_cache = NULL;
+ return -ENOMEM;
+ }
+
return 0;
}
/* Uninitializes the flow module. */
void ovs_flow_exit(void)
{
+ kmem_cache_destroy(flow_stats_cache);
kmem_cache_destroy(flow_cache);
}
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index baaeb101924..5918bff7f3f 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -52,10 +52,12 @@ struct flow_table {
unsigned int count;
};
+extern struct kmem_cache *flow_stats_cache;
+
int ovs_flow_init(void);
void ovs_flow_exit(void);
-struct sw_flow *ovs_flow_alloc(bool percpu_stats);
+struct sw_flow *ovs_flow_alloc(void);
void ovs_flow_free(struct sw_flow *, bool deferred);
int ovs_flow_tbl_init(struct flow_table *);
@@ -74,7 +76,8 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
u32 *n_mask_hit);
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
const struct sw_flow_key *);
-
+struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
+ struct sw_flow_match *match);
bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
struct sw_flow_match *match);
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index a3d6951602d..f49148a07da 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -110,6 +110,22 @@ static int gre_rcv(struct sk_buff *skb,
return PACKET_RCVD;
}
+/* Called with rcu_read_lock and BH disabled. */
+static int gre_err(struct sk_buff *skb, u32 info,
+ const struct tnl_ptk_info *tpi)
+{
+ struct ovs_net *ovs_net;
+ struct vport *vport;
+
+ ovs_net = net_generic(dev_net(skb->dev), ovs_net_id);
+ vport = rcu_dereference(ovs_net->vport_net.gre_vport);
+
+ if (unlikely(!vport))
+ return PACKET_REJECT;
+ else
+ return PACKET_RCVD;
+}
+
static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
{
struct net *net = ovs_dp_get_net(vport->dp);
@@ -172,9 +188,9 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
htons(IP_DF) : 0;
- skb->local_df = 1;
+ skb->ignore_df = 1;
- return iptunnel_xmit(rt, skb, fl.saddr,
+ return iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE,
OVS_CB(skb)->tun_key->ipv4_tos,
OVS_CB(skb)->tun_key->ipv4_ttl, df, false);
@@ -186,6 +202,7 @@ error:
static struct gre_cisco_protocol gre_protocol = {
.handler = gre_rcv,
+ .err_handler = gre_err,
.priority = 1,
};
@@ -256,7 +273,7 @@ static void gre_tnl_destroy(struct vport *vport)
ovs_net = net_generic(net, ovs_net_id);
- rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL);
+ RCU_INIT_POINTER(ovs_net->vport_net.gre_vport, NULL);
ovs_vport_deferred_free(vport);
gre_exit();
}
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 729c68763fe..789af9280e7 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -130,7 +130,7 @@ static void do_setup(struct net_device *netdev)
netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
netdev->destructor = internal_dev_destructor;
- SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops);
+ netdev->ethtool_ops = &internal_dev_ethtool_ops;
netdev->tx_queue_len = 0;
netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index e797a50ac2b..0edbd95c60e 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -122,7 +122,7 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
vxlan_port = vxlan_vport(vport);
strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
- vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, false);
+ vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0);
if (IS_ERR(vs)) {
ovs_vport_free(vport);
return (void *)vs;
@@ -170,7 +170,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
htons(IP_DF) : 0;
- skb->local_df = 1;
+ skb->ignore_df = 1;
inet_get_local_port_range(net, &port_min, &port_max);
src_port = vxlan_src_port(port_min, port_max, skb);
@@ -180,7 +180,8 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
OVS_CB(skb)->tun_key->ipv4_tos,
OVS_CB(skb)->tun_key->ipv4_ttl, df,
src_port, dst_port,
- htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8));
+ htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8),
+ false);
if (err < 0)
ip_rt_put(rt);
error:
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 208dd9a26dd..42c0f4a0b78 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -121,7 +121,6 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
{
struct vport *vport;
size_t alloc_size;
- int i;
alloc_size = sizeof(struct vport);
if (priv_size) {
@@ -139,19 +138,12 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
vport->ops = ops;
INIT_HLIST_NODE(&vport->dp_hash_node);
- vport->percpu_stats = alloc_percpu(struct pcpu_sw_netstats);
+ vport->percpu_stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!vport->percpu_stats) {
kfree(vport);
return ERR_PTR(-ENOMEM);
}
- for_each_possible_cpu(i) {
- struct pcpu_sw_netstats *vport_stats;
- vport_stats = per_cpu_ptr(vport->percpu_stats, i);
- u64_stats_init(&vport_stats->syncp);
- }
-
-
spin_lock_init(&vport->stats_lock);
return vport;
@@ -285,9 +277,9 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
do {
- start = u64_stats_fetch_begin_bh(&percpu_stats->syncp);
+ start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
local_stats = *percpu_stats;
- } while (u64_stats_fetch_retry_bh(&percpu_stats->syncp, start));
+ } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
stats->rx_bytes += local_stats.rx_bytes;
stats->rx_packets += local_stats.rx_packets;
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index d7e50a17396..8d721e62f38 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -172,7 +172,7 @@ void ovs_vport_deferred_free(struct vport *vport);
*/
static inline void *vport_priv(const struct vport *vport)
{
- return (u8 *)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN);
+ return (u8 *)(uintptr_t)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN);
}
/**
@@ -185,9 +185,9 @@ static inline void *vport_priv(const struct vport *vport)
* the result of a hash table lookup. @priv must point to the start of the
* private data area.
*/
-static inline struct vport *vport_from_priv(const void *priv)
+static inline struct vport *vport_from_priv(void *priv)
{
- return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN));
+ return (struct vport *)((u8 *)priv - ALIGN(sizeof(struct vport), VPORT_ALIGN));
}
void ovs_vport_receive(struct vport *, struct sk_buff *,
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 48a6a93db29..b85c67ccb79 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -243,40 +243,41 @@ static int packet_direct_xmit(struct sk_buff *skb)
const struct net_device_ops *ops = dev->netdev_ops;
netdev_features_t features;
struct netdev_queue *txq;
+ int ret = NETDEV_TX_BUSY;
u16 queue_map;
- int ret;
if (unlikely(!netif_running(dev) ||
- !netif_carrier_ok(dev))) {
- kfree_skb(skb);
- return NET_XMIT_DROP;
- }
+ !netif_carrier_ok(dev)))
+ goto drop;
features = netif_skb_features(skb);
if (skb_needs_linearize(skb, features) &&
- __skb_linearize(skb)) {
- kfree_skb(skb);
- return NET_XMIT_DROP;
- }
+ __skb_linearize(skb))
+ goto drop;
queue_map = skb_get_queue_mapping(skb);
txq = netdev_get_tx_queue(dev, queue_map);
- __netif_tx_lock_bh(txq);
- if (unlikely(netif_xmit_frozen_or_stopped(txq))) {
- ret = NETDEV_TX_BUSY;
- kfree_skb(skb);
- goto out;
+ local_bh_disable();
+
+ HARD_TX_LOCK(dev, txq, smp_processor_id());
+ if (!netif_xmit_frozen_or_drv_stopped(txq)) {
+ ret = ops->ndo_start_xmit(skb, dev);
+ if (ret == NETDEV_TX_OK)
+ txq_trans_update(txq);
}
+ HARD_TX_UNLOCK(dev, txq);
- ret = ops->ndo_start_xmit(skb, dev);
- if (likely(dev_xmit_complete(ret)))
- txq_trans_update(txq);
- else
+ local_bh_enable();
+
+ if (!dev_xmit_complete(ret))
kfree_skb(skb);
-out:
- __netif_tx_unlock_bh(txq);
+
return ret;
+drop:
+ atomic_long_inc(&dev->tx_dropped);
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
}
static struct net_device *packet_cached_dev_get(struct packet_sock *po)
@@ -1277,7 +1278,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f,
struct sk_buff *skb,
unsigned int num)
{
- return reciprocal_scale(skb->rxhash, num);
+ return reciprocal_scale(skb_get_hash(skb), num);
}
static unsigned int fanout_demux_lb(struct packet_fanout *f,
@@ -1362,7 +1363,6 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
if (!skb)
return 0;
}
- skb_get_hash(skb);
idx = fanout_demux_hash(f, skb, num);
break;
case PACKET_FANOUT_LB:
@@ -1848,7 +1848,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
skb->dropcount = atomic_read(&sk->sk_drops);
__skb_queue_tail(&sk->sk_receive_queue, skb);
spin_unlock(&sk->sk_receive_queue.lock);
- sk->sk_data_ready(sk, skb->len);
+ sk->sk_data_ready(sk);
return 0;
drop_n_acct:
@@ -2054,7 +2054,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
else
prb_clear_blk_fill_status(&po->rx_ring);
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk);
drop_n_restore:
if (skb_head != skb->data && skb_shared(skb)) {
@@ -2069,7 +2069,7 @@ ring_is_full:
po->stats.stats1.tp_drops++;
spin_unlock(&sk->sk_receive_queue.lock);
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk);
kfree_skb(copy_skb);
goto drop_n_restore;
}
@@ -2257,8 +2257,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
if (unlikely(!(dev->flags & IFF_UP)))
goto out_put;
- reserve = dev->hard_header_len;
-
+ reserve = dev->hard_header_len + VLAN_HLEN;
size_max = po->tx_ring.frame_size
- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
@@ -2285,8 +2284,19 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
goto out_status;
tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
- addr, hlen);
+ addr, hlen);
+ if (tp_len > dev->mtu + dev->hard_header_len) {
+ struct ethhdr *ehdr;
+ /* Earlier code assumed this would be a VLAN pkt,
+ * double-check this now that we have the actual
+ * packet in hand.
+ */
+ skb_reset_mac_header(skb);
+ ehdr = eth_hdr(skb);
+ if (ehdr->h_proto != htons(ETH_P_8021Q))
+ tp_len = -EMSGSIZE;
+ }
if (unlikely(tp_len < 0)) {
if (po->tp_loss) {
__packet_set_status(po, ph,
diff --git a/net/packet/diag.c b/net/packet/diag.c
index 533ce4ff108..92f2c7107ee 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -128,6 +128,7 @@ static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb)
static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
struct packet_diag_req *req,
+ bool may_report_filterinfo,
struct user_namespace *user_ns,
u32 portid, u32 seq, u32 flags, int sk_ino)
{
@@ -172,7 +173,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
goto out_nlmsg_trim;
if ((req->pdiag_show & PACKET_SHOW_FILTER) &&
- sock_diag_put_filterinfo(user_ns, sk, skb, PACKET_DIAG_FILTER))
+ sock_diag_put_filterinfo(may_report_filterinfo, sk, skb,
+ PACKET_DIAG_FILTER))
goto out_nlmsg_trim;
return nlmsg_end(skb, nlh);
@@ -188,9 +190,11 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct packet_diag_req *req;
struct net *net;
struct sock *sk;
+ bool may_report_filterinfo;
net = sock_net(skb->sk);
req = nlmsg_data(cb->nlh);
+ may_report_filterinfo = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
mutex_lock(&net->packet.sklist_lock);
sk_for_each(sk, &net->packet.sklist) {
@@ -200,6 +204,7 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
goto next;
if (sk_diag_fill(sk, skb, req,
+ may_report_filterinfo,
sk_user_ns(NETLINK_CB(cb->skb).sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
index a2fba7edfd1..66dc65e7c6a 100644
--- a/net/phonet/pep-gprs.c
+++ b/net/phonet/pep-gprs.c
@@ -37,7 +37,7 @@
struct gprs_dev {
struct sock *sk;
void (*old_state_change)(struct sock *);
- void (*old_data_ready)(struct sock *, int);
+ void (*old_data_ready)(struct sock *);
void (*old_write_space)(struct sock *);
struct net_device *dev;
@@ -146,7 +146,7 @@ drop:
return err;
}
-static void gprs_data_ready(struct sock *sk, int len)
+static void gprs_data_ready(struct sock *sk)
{
struct gprs_dev *gp = sk->sk_user_data;
struct sk_buff *skb;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index e77411735de..70a547ea517 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -462,10 +462,9 @@ out:
queue:
skb->dev = NULL;
skb_set_owner_r(skb, sk);
- err = skb->len;
skb_queue_tail(queue, skb);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk, err);
+ sk->sk_data_ready(sk);
return NET_RX_SUCCESS;
}
@@ -587,10 +586,9 @@ static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb)
pn->rx_credits--;
skb->dev = NULL;
skb_set_owner_r(skb, sk);
- err = skb->len;
skb_queue_tail(&sk->sk_receive_queue, skb);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk, err);
+ sk->sk_data_ready(sk);
return NET_RX_SUCCESS;
case PNS_PEP_CONNECT_RESP:
@@ -698,7 +696,7 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb)
skb_queue_head(&sk->sk_receive_queue, skb);
sk_acceptq_added(sk);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk);
return NET_RX_SUCCESS;
case PNS_PEP_DISCONNECT_REQ:
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index dc15f430080..b64151ade6b 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -70,10 +70,10 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
int err;
u8 pnaddr;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
- if (!capable(CAP_SYS_ADMIN))
+ if (!netlink_capable(skb, CAP_SYS_ADMIN))
return -EPERM;
ASSERT_RTNL();
@@ -233,10 +233,10 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
int err;
u8 dst;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
- if (!capable(CAP_SYS_ADMIN))
+ if (!netlink_capable(skb, CAP_SYS_ADMIN))
return -EPERM;
ASSERT_RTNL();
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index b7ebe23cded..d67de453c35 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -598,7 +598,7 @@ static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
{
atomic64_set(&ic->i_ack_next, seq);
if (ack_required) {
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
}
}
@@ -606,7 +606,7 @@ static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
{
clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
return atomic64_read(&ic->i_ack_next);
}
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 37be6e226d1..1dde91e3dc7 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -298,7 +298,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
rds_ib_stats_inc(s_ib_tx_cq_event);
if (wc.wr_id == RDS_IB_ACK_WR_ID) {
- if (ic->i_ack_queued + HZ/2 < jiffies)
+ if (time_after(jiffies, ic->i_ack_queued + HZ/2))
rds_ib_stats_inc(s_ib_tx_stalled);
rds_ib_ack_send_complete(ic);
continue;
@@ -315,7 +315,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
rm = rds_ib_send_unmap_op(ic, send, wc.status);
- if (send->s_queued + HZ/2 < jiffies)
+ if (time_after(jiffies, send->s_queued + HZ/2))
rds_ib_stats_inc(s_ib_tx_stalled);
if (send->s_op) {
diff --git a/net/rds/iw.c b/net/rds/iw.c
index 7826d46baa7..589935661d6 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -239,7 +239,8 @@ static int rds_iw_laddr_check(__be32 addr)
ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
/* due to this, we will claim to support IB devices unless we
check node_type. */
- if (ret || cm_id->device->node_type != RDMA_NODE_RNIC)
+ if (ret || !cm_id->device ||
+ cm_id->device->node_type != RDMA_NODE_RNIC)
ret = -EADDRNOTAVAIL;
rdsdebug("addr %pI4 ret %d node type %d\n",
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
index 45033358358..aa8bf678600 100644
--- a/net/rds/iw_recv.c
+++ b/net/rds/iw_recv.c
@@ -429,7 +429,7 @@ static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
{
atomic64_set(&ic->i_ack_next, seq);
if (ack_required) {
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
}
}
@@ -437,7 +437,7 @@ static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
static u64 rds_iw_get_ack(struct rds_iw_connection *ic)
{
clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
return atomic64_read(&ic->i_ack_next);
}
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index e40c3c5db2c..9105ea03aec 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -232,7 +232,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
}
if (wc.wr_id == RDS_IW_ACK_WR_ID) {
- if (ic->i_ack_queued + HZ/2 < jiffies)
+ if (time_after(jiffies, ic->i_ack_queued + HZ/2))
rds_iw_stats_inc(s_iw_tx_stalled);
rds_iw_ack_send_complete(ic);
continue;
@@ -267,7 +267,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
send->s_wr.opcode = 0xdead;
send->s_wr.num_sge = 1;
- if (send->s_queued + HZ/2 < jiffies)
+ if (time_after(jiffies, send->s_queued + HZ/2))
rds_iw_stats_inc(s_iw_tx_stalled);
/* If a RDMA operation produced an error, signal this right
diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c
index 89c91515ed0..139239d2cb2 100644
--- a/net/rds/iw_sysctl.c
+++ b/net/rds/iw_sysctl.c
@@ -111,8 +111,7 @@ static struct ctl_table rds_iw_sysctl_table[] = {
void rds_iw_sysctl_exit(void)
{
- if (rds_iw_sysctl_hdr)
- unregister_net_sysctl_table(rds_iw_sysctl_hdr);
+ unregister_net_sysctl_table(rds_iw_sysctl_hdr);
}
int rds_iw_sysctl_init(void)
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index c2be901d19e..6cd9d1deafc 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -168,7 +168,7 @@ static int rds_rdma_listen_init(void)
return ret;
}
- sin.sin_family = AF_INET,
+ sin.sin_family = AF_INET;
sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY);
sin.sin_port = (__force u16)htons(RDS_PORT);
diff --git a/net/rds/send.c b/net/rds/send.c
index a82fb660ec0..23718160d71 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -107,7 +107,7 @@ static int acquire_in_xmit(struct rds_connection *conn)
static void release_in_xmit(struct rds_connection *conn)
{
clear_bit(RDS_IN_XMIT, &conn->c_flags);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
/*
* We don't use wait_on_bit()/wake_up_bit() because our waking is in a
* hot path and finding waiters is very rare. We don't want to walk
@@ -661,7 +661,7 @@ void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
/* order flag updates with spin locks */
if (!list_empty(&list))
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
spin_unlock_irqrestore(&conn->c_lock, flags);
@@ -691,7 +691,7 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
}
/* order flag updates with the rs lock */
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
spin_unlock_irqrestore(&rs->rs_lock, flags);
diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
index b5cb2aa08f3..c3b0cd43eb5 100644
--- a/net/rds/sysctl.c
+++ b/net/rds/sysctl.c
@@ -94,8 +94,7 @@ static struct ctl_table rds_sysctl_rds_table[] = {
void rds_sysctl_exit(void)
{
- if (rds_sysctl_reg_table)
- unregister_net_sysctl_table(rds_sysctl_reg_table);
+ unregister_net_sysctl_table(rds_sysctl_reg_table);
}
int rds_sysctl_init(void)
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 9cf2927d002..65637491f72 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -61,12 +61,12 @@ void rds_tcp_state_change(struct sock *sk);
/* tcp_listen.c */
int rds_tcp_listen_init(void);
void rds_tcp_listen_stop(void);
-void rds_tcp_listen_data_ready(struct sock *sk, int bytes);
+void rds_tcp_listen_data_ready(struct sock *sk);
/* tcp_recv.c */
int rds_tcp_recv_init(void);
void rds_tcp_recv_exit(void);
-void rds_tcp_data_ready(struct sock *sk, int bytes);
+void rds_tcp_data_ready(struct sock *sk);
int rds_tcp_recv(struct rds_connection *conn);
void rds_tcp_inc_free(struct rds_incoming *inc);
int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 7787537e9c2..23ab4dcd1d9 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -108,9 +108,9 @@ static void rds_tcp_accept_worker(struct work_struct *work)
cond_resched();
}
-void rds_tcp_listen_data_ready(struct sock *sk, int bytes)
+void rds_tcp_listen_data_ready(struct sock *sk)
{
- void (*ready)(struct sock *sk, int bytes);
+ void (*ready)(struct sock *sk);
rdsdebug("listen data ready sk %p\n", sk);
@@ -132,7 +132,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes)
out:
read_unlock(&sk->sk_callback_lock);
- ready(sk, bytes);
+ ready(sk);
}
int rds_tcp_listen_init(void)
@@ -153,7 +153,7 @@ int rds_tcp_listen_init(void)
sock->sk->sk_data_ready = rds_tcp_listen_data_ready;
write_unlock_bh(&sock->sk->sk_callback_lock);
- sin.sin_family = PF_INET,
+ sin.sin_family = PF_INET;
sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY);
sin.sin_port = (__force u16)htons(RDS_TCP_PORT);
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index 4fac4f2bb9d..9ae6e0a264e 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -314,13 +314,13 @@ int rds_tcp_recv(struct rds_connection *conn)
return ret;
}
-void rds_tcp_data_ready(struct sock *sk, int bytes)
+void rds_tcp_data_ready(struct sock *sk)
{
- void (*ready)(struct sock *sk, int bytes);
+ void (*ready)(struct sock *sk);
struct rds_connection *conn;
struct rds_tcp_connection *tc;
- rdsdebug("data ready sk %p bytes %d\n", sk, bytes);
+ rdsdebug("data ready sk %p\n", sk);
read_lock(&sk->sk_callback_lock);
conn = sk->sk_user_data;
@@ -337,7 +337,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes)
queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
out:
read_unlock(&sk->sk_callback_lock);
- ready(sk, bytes);
+ ready(sk);
}
int rds_tcp_recv_init(void)
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 81cf5a4c5e4..53b17ca0dff 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -93,7 +93,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
rm->m_ack_seq = tc->t_last_sent_nxt +
sizeof(struct rds_header) +
be32_to_cpu(rm->m_inc.i_hdr.h_len) - 1;
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
set_bit(RDS_MSG_HAS_ACK_SEQ, &rm->m_flags);
tc->t_last_expected_una = rm->m_ack_seq + 1;
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index ed7e0b4e7f9..b3b16c070a7 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -789,7 +789,8 @@ void rfkill_resume_polling(struct rfkill *rfkill)
if (!rfkill->ops->poll)
return;
- schedule_work(&rfkill->poll_work.work);
+ queue_delayed_work(system_power_efficient_wq,
+ &rfkill->poll_work, 0);
}
EXPORT_SYMBOL(rfkill_resume_polling);
@@ -894,7 +895,8 @@ static void rfkill_poll(struct work_struct *work)
*/
rfkill->ops->poll(rfkill, rfkill->data);
- schedule_delayed_work(&rfkill->poll_work,
+ queue_delayed_work(system_power_efficient_wq,
+ &rfkill->poll_work,
round_jiffies_relative(POLL_INTERVAL));
}
@@ -958,7 +960,8 @@ int __must_check rfkill_register(struct rfkill *rfkill)
INIT_WORK(&rfkill->sync_work, rfkill_sync_work);
if (rfkill->ops->poll)
- schedule_delayed_work(&rfkill->poll_work,
+ queue_delayed_work(system_power_efficient_wq,
+ &rfkill->poll_work,
round_jiffies_relative(POLL_INTERVAL));
if (!rfkill->persistent || rfkill_epo_lock_active) {
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index bd2a5b90400..14c98e48f26 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -36,8 +36,6 @@ struct rfkill_gpio_data {
struct gpio_desc *shutdown_gpio;
struct rfkill *rfkill_dev;
- char *reset_name;
- char *shutdown_name;
struct clk *clk;
bool clk_enabled;
@@ -47,17 +45,14 @@ static int rfkill_gpio_set_power(void *data, bool blocked)
{
struct rfkill_gpio_data *rfkill = data;
- if (blocked) {
- gpiod_set_value(rfkill->shutdown_gpio, 0);
- gpiod_set_value(rfkill->reset_gpio, 0);
- if (!IS_ERR(rfkill->clk) && rfkill->clk_enabled)
- clk_disable(rfkill->clk);
- } else {
- if (!IS_ERR(rfkill->clk) && !rfkill->clk_enabled)
- clk_enable(rfkill->clk);
- gpiod_set_value(rfkill->reset_gpio, 1);
- gpiod_set_value(rfkill->shutdown_gpio, 1);
- }
+ if (!blocked && !IS_ERR(rfkill->clk) && !rfkill->clk_enabled)
+ clk_enable(rfkill->clk);
+
+ gpiod_set_value_cansleep(rfkill->shutdown_gpio, !blocked);
+ gpiod_set_value_cansleep(rfkill->reset_gpio, !blocked);
+
+ if (blocked && !IS_ERR(rfkill->clk) && rfkill->clk_enabled)
+ clk_disable(rfkill->clk);
rfkill->clk_enabled = blocked;
@@ -87,10 +82,8 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
{
struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data;
struct rfkill_gpio_data *rfkill;
- const char *clk_name = NULL;
struct gpio_desc *gpio;
int ret;
- int len;
rfkill = devm_kzalloc(&pdev->dev, sizeof(*rfkill), GFP_KERNEL);
if (!rfkill)
@@ -101,28 +94,15 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
if (ret)
return ret;
} else if (pdata) {
- clk_name = pdata->power_clk_name;
rfkill->name = pdata->name;
rfkill->type = pdata->type;
} else {
return -ENODEV;
}
- len = strlen(rfkill->name);
- rfkill->reset_name = devm_kzalloc(&pdev->dev, len + 7, GFP_KERNEL);
- if (!rfkill->reset_name)
- return -ENOMEM;
-
- rfkill->shutdown_name = devm_kzalloc(&pdev->dev, len + 10, GFP_KERNEL);
- if (!rfkill->shutdown_name)
- return -ENOMEM;
+ rfkill->clk = devm_clk_get(&pdev->dev, NULL);
- snprintf(rfkill->reset_name, len + 6 , "%s_reset", rfkill->name);
- snprintf(rfkill->shutdown_name, len + 9, "%s_shutdown", rfkill->name);
-
- rfkill->clk = devm_clk_get(&pdev->dev, clk_name);
-
- gpio = devm_gpiod_get_index(&pdev->dev, rfkill->reset_name, 0);
+ gpio = devm_gpiod_get_index(&pdev->dev, "reset", 0);
if (!IS_ERR(gpio)) {
ret = gpiod_direction_output(gpio, 0);
if (ret)
@@ -130,7 +110,7 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
rfkill->reset_gpio = gpio;
}
- gpio = devm_gpiod_get_index(&pdev->dev, rfkill->shutdown_name, 1);
+ gpio = devm_gpiod_get_index(&pdev->dev, "shutdown", 1);
if (!IS_ERR(gpio)) {
ret = gpiod_direction_output(gpio, 0);
if (ret)
@@ -146,14 +126,6 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
return -EINVAL;
}
- if (pdata && pdata->gpio_runtime_setup) {
- ret = pdata->gpio_runtime_setup(pdev);
- if (ret) {
- dev_err(&pdev->dev, "can't set up gpio\n");
- return ret;
- }
- }
-
rfkill->rfkill_dev = rfkill_alloc(rfkill->name, &pdev->dev,
rfkill->type, &rfkill_gpio_ops,
rfkill);
@@ -174,20 +146,23 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
static int rfkill_gpio_remove(struct platform_device *pdev)
{
struct rfkill_gpio_data *rfkill = platform_get_drvdata(pdev);
- struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data;
- if (pdata && pdata->gpio_runtime_close)
- pdata->gpio_runtime_close(pdev);
rfkill_unregister(rfkill->rfkill_dev);
rfkill_destroy(rfkill->rfkill_dev);
return 0;
}
+#ifdef CONFIG_ACPI
static const struct acpi_device_id rfkill_acpi_match[] = {
+ { "BCM2E1A", RFKILL_TYPE_BLUETOOTH },
+ { "BCM2E39", RFKILL_TYPE_BLUETOOTH },
+ { "BCM2E3D", RFKILL_TYPE_BLUETOOTH },
{ "BCM4752", RFKILL_TYPE_GPS },
+ { "LNV4752", RFKILL_TYPE_GPS },
{ },
};
+#endif
static struct platform_driver rfkill_gpio_driver = {
.probe = rfkill_gpio_probe,
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index c2cca2ee6ae..8451c8cdc9d 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1041,7 +1041,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros
rose_start_heartbeat(make);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk, skb->len);
+ sk->sk_data_ready(sk);
return 1;
}
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index d1c3429b69e..ec126f91276 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -20,9 +20,8 @@ af-rxrpc-y := \
ar-skbuff.o \
ar-transport.o
-ifeq ($(CONFIG_PROC_FS),y)
-af-rxrpc-y += ar-proc.o
-endif
+af-rxrpc-$(CONFIG_PROC_FS) += ar-proc.o
+af-rxrpc-$(CONFIG_SYSCTL) += sysctl.o
obj-$(CONFIG_AF_RXRPC) += af-rxrpc.o
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index e61aa6001c6..7b167048963 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -838,6 +838,12 @@ static int __init af_rxrpc_init(void)
goto error_key_type_s;
}
+ ret = rxrpc_sysctl_init();
+ if (ret < 0) {
+ printk(KERN_CRIT "RxRPC: Cannot register sysctls\n");
+ goto error_sysctls;
+ }
+
#ifdef CONFIG_PROC_FS
proc_create("rxrpc_calls", 0, init_net.proc_net, &rxrpc_call_seq_fops);
proc_create("rxrpc_conns", 0, init_net.proc_net,
@@ -845,6 +851,8 @@ static int __init af_rxrpc_init(void)
#endif
return 0;
+error_sysctls:
+ unregister_key_type(&key_type_rxrpc_s);
error_key_type_s:
unregister_key_type(&key_type_rxrpc);
error_key_type:
@@ -865,6 +873,7 @@ error_call_jar:
static void __exit af_rxrpc_exit(void)
{
_enter("");
+ rxrpc_sysctl_exit();
unregister_key_type(&key_type_rxrpc_s);
unregister_key_type(&key_type_rxrpc);
sock_unregister(PF_RXRPC);
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index cd97a0ce48d..c6be17a959a 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -19,7 +19,49 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
-static unsigned int rxrpc_ack_defer = 1;
+/*
+ * How long to wait before scheduling ACK generation after seeing a
+ * packet with RXRPC_REQUEST_ACK set (in jiffies).
+ */
+unsigned rxrpc_requested_ack_delay = 1;
+
+/*
+ * How long to wait before scheduling an ACK with subtype DELAY (in jiffies).
+ *
+ * We use this when we've received new data packets. If those packets aren't
+ * all consumed within this time we will send a DELAY ACK if an ACK was not
+ * requested to let the sender know it doesn't need to resend.
+ */
+unsigned rxrpc_soft_ack_delay = 1 * HZ;
+
+/*
+ * How long to wait before scheduling an ACK with subtype IDLE (in jiffies).
+ *
+ * We use this when we've consumed some previously soft-ACK'd packets when
+ * further packets aren't immediately received to decide when to send an IDLE
+ * ACK let the other end know that it can free up its Tx buffer space.
+ */
+unsigned rxrpc_idle_ack_delay = 0.5 * HZ;
+
+/*
+ * Receive window size in packets. This indicates the maximum number of
+ * unconsumed received packets we're willing to retain in memory. Once this
+ * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further
+ * packets.
+ */
+unsigned rxrpc_rx_window_size = 32;
+
+/*
+ * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet
+ * made by gluing normal packets together that we're willing to handle.
+ */
+unsigned rxrpc_rx_mtu = 5692;
+
+/*
+ * The maximum number of fragments in a received jumbo packet that we tell the
+ * sender that we're willing to handle.
+ */
+unsigned rxrpc_rx_jumbo_max = 4;
static const char *rxrpc_acks(u8 reason)
{
@@ -82,24 +124,23 @@ void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
switch (ack_reason) {
case RXRPC_ACK_DELAY:
_debug("run delay timer");
- call->ack_timer.expires = jiffies + rxrpc_ack_timeout * HZ;
- add_timer(&call->ack_timer);
- return;
+ expiry = rxrpc_soft_ack_delay;
+ goto run_timer;
case RXRPC_ACK_IDLE:
if (!immediate) {
_debug("run defer timer");
- expiry = 1;
+ expiry = rxrpc_idle_ack_delay;
goto run_timer;
}
goto cancel_timer;
case RXRPC_ACK_REQUESTED:
- if (!rxrpc_ack_defer)
+ expiry = rxrpc_requested_ack_delay;
+ if (!expiry)
goto cancel_timer;
if (!immediate || serial == cpu_to_be32(1)) {
_debug("run defer timer");
- expiry = rxrpc_ack_defer;
goto run_timer;
}
@@ -1174,11 +1215,11 @@ send_ACK:
mtu = call->conn->trans->peer->if_mtu;
mtu -= call->conn->trans->peer->hdrsize;
ackinfo.maxMTU = htonl(mtu);
- ackinfo.rwind = htonl(32);
+ ackinfo.rwind = htonl(rxrpc_rx_window_size);
/* permit the peer to send us jumbo packets if it wants to */
- ackinfo.rxMTU = htonl(5692);
- ackinfo.jumbo_max = htonl(4);
+ ackinfo.rxMTU = htonl(rxrpc_rx_mtu);
+ ackinfo.jumbo_max = htonl(rxrpc_rx_jumbo_max);
hdr.serial = htonl(atomic_inc_return(&call->conn->serial));
_proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c
index a3bbb360a3f..a9e05db0f5d 100644
--- a/net/rxrpc/ar-call.c
+++ b/net/rxrpc/ar-call.c
@@ -12,10 +12,22 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/circ_buf.h>
+#include <linux/hashtable.h>
+#include <linux/spinlock_types.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include "ar-internal.h"
+/*
+ * Maximum lifetime of a call (in jiffies).
+ */
+unsigned rxrpc_max_call_lifetime = 60 * HZ;
+
+/*
+ * Time till dead call expires after last use (in jiffies).
+ */
+unsigned rxrpc_dead_call_expiry = 2 * HZ;
+
const char *const rxrpc_call_states[] = {
[RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq",
[RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl",
@@ -38,8 +50,6 @@ const char *const rxrpc_call_states[] = {
struct kmem_cache *rxrpc_call_jar;
LIST_HEAD(rxrpc_calls);
DEFINE_RWLOCK(rxrpc_call_lock);
-static unsigned int rxrpc_call_max_lifetime = 60;
-static unsigned int rxrpc_dead_call_timeout = 2;
static void rxrpc_destroy_call(struct work_struct *work);
static void rxrpc_call_life_expired(unsigned long _call);
@@ -47,6 +57,145 @@ static void rxrpc_dead_call_expired(unsigned long _call);
static void rxrpc_ack_time_expired(unsigned long _call);
static void rxrpc_resend_time_expired(unsigned long _call);
+static DEFINE_SPINLOCK(rxrpc_call_hash_lock);
+static DEFINE_HASHTABLE(rxrpc_call_hash, 10);
+
+/*
+ * Hash function for rxrpc_call_hash
+ */
+static unsigned long rxrpc_call_hashfunc(
+ u8 clientflag,
+ __be32 cid,
+ __be32 call_id,
+ __be32 epoch,
+ __be16 service_id,
+ sa_family_t proto,
+ void *localptr,
+ unsigned int addr_size,
+ const u8 *peer_addr)
+{
+ const u16 *p;
+ unsigned int i;
+ unsigned long key;
+ u32 hcid = ntohl(cid);
+
+ _enter("");
+
+ key = (unsigned long)localptr;
+ /* We just want to add up the __be32 values, so forcing the
+ * cast should be okay.
+ */
+ key += (__force u32)epoch;
+ key += (__force u16)service_id;
+ key += (__force u32)call_id;
+ key += (hcid & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT;
+ key += hcid & RXRPC_CHANNELMASK;
+ key += clientflag;
+ key += proto;
+ /* Step through the peer address in 16-bit portions for speed */
+ for (i = 0, p = (const u16 *)peer_addr; i < addr_size >> 1; i++, p++)
+ key += *p;
+ _leave(" key = 0x%lx", key);
+ return key;
+}
+
+/*
+ * Add a call to the hashtable
+ */
+static void rxrpc_call_hash_add(struct rxrpc_call *call)
+{
+ unsigned long key;
+ unsigned int addr_size = 0;
+
+ _enter("");
+ switch (call->proto) {
+ case AF_INET:
+ addr_size = sizeof(call->peer_ip.ipv4_addr);
+ break;
+ case AF_INET6:
+ addr_size = sizeof(call->peer_ip.ipv6_addr);
+ break;
+ default:
+ break;
+ }
+ key = rxrpc_call_hashfunc(call->in_clientflag, call->cid,
+ call->call_id, call->epoch,
+ call->service_id, call->proto,
+ call->conn->trans->local, addr_size,
+ call->peer_ip.ipv6_addr);
+ /* Store the full key in the call */
+ call->hash_key = key;
+ spin_lock(&rxrpc_call_hash_lock);
+ hash_add_rcu(rxrpc_call_hash, &call->hash_node, key);
+ spin_unlock(&rxrpc_call_hash_lock);
+ _leave("");
+}
+
+/*
+ * Remove a call from the hashtable
+ */
+static void rxrpc_call_hash_del(struct rxrpc_call *call)
+{
+ _enter("");
+ spin_lock(&rxrpc_call_hash_lock);
+ hash_del_rcu(&call->hash_node);
+ spin_unlock(&rxrpc_call_hash_lock);
+ _leave("");
+}
+
+/*
+ * Find a call in the hashtable and return it, or NULL if it
+ * isn't there.
+ */
+struct rxrpc_call *rxrpc_find_call_hash(
+ u8 clientflag,
+ __be32 cid,
+ __be32 call_id,
+ __be32 epoch,
+ __be16 service_id,
+ void *localptr,
+ sa_family_t proto,
+ const u8 *peer_addr)
+{
+ unsigned long key;
+ unsigned int addr_size = 0;
+ struct rxrpc_call *call = NULL;
+ struct rxrpc_call *ret = NULL;
+
+ _enter("");
+ switch (proto) {
+ case AF_INET:
+ addr_size = sizeof(call->peer_ip.ipv4_addr);
+ break;
+ case AF_INET6:
+ addr_size = sizeof(call->peer_ip.ipv6_addr);
+ break;
+ default:
+ break;
+ }
+
+ key = rxrpc_call_hashfunc(clientflag, cid, call_id, epoch,
+ service_id, proto, localptr, addr_size,
+ peer_addr);
+ hash_for_each_possible_rcu(rxrpc_call_hash, call, hash_node, key) {
+ if (call->hash_key == key &&
+ call->call_id == call_id &&
+ call->cid == cid &&
+ call->in_clientflag == clientflag &&
+ call->service_id == service_id &&
+ call->proto == proto &&
+ call->local == localptr &&
+ memcmp(call->peer_ip.ipv6_addr, peer_addr,
+ addr_size) == 0 &&
+ call->epoch == epoch) {
+ ret = call;
+ break;
+ }
+ }
+ _leave(" = %p", ret);
+ return ret;
+}
+
/*
* allocate a new call
*/
@@ -91,7 +240,7 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
call->rx_data_expect = 1;
call->rx_data_eaten = 0;
call->rx_first_oos = 0;
- call->ackr_win_top = call->rx_data_eaten + 1 + RXRPC_MAXACKS;
+ call->ackr_win_top = call->rx_data_eaten + 1 + rxrpc_rx_window_size;
call->creation_jif = jiffies;
return call;
}
@@ -128,11 +277,31 @@ static struct rxrpc_call *rxrpc_alloc_client_call(
return ERR_PTR(ret);
}
+ /* Record copies of information for hashtable lookup */
+ call->proto = rx->proto;
+ call->local = trans->local;
+ switch (call->proto) {
+ case AF_INET:
+ call->peer_ip.ipv4_addr =
+ trans->peer->srx.transport.sin.sin_addr.s_addr;
+ break;
+ case AF_INET6:
+ memcpy(call->peer_ip.ipv6_addr,
+ trans->peer->srx.transport.sin6.sin6_addr.in6_u.u6_addr8,
+ sizeof(call->peer_ip.ipv6_addr));
+ break;
+ }
+ call->epoch = call->conn->epoch;
+ call->service_id = call->conn->service_id;
+ call->in_clientflag = call->conn->in_clientflag;
+ /* Add the new call to the hashtable */
+ rxrpc_call_hash_add(call);
+
spin_lock(&call->conn->trans->peer->lock);
list_add(&call->error_link, &call->conn->trans->peer->error_targets);
spin_unlock(&call->conn->trans->peer->lock);
- call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ;
+ call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime;
add_timer(&call->lifetimer);
_leave(" = %p", call);
@@ -320,9 +489,12 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
parent = *p;
call = rb_entry(parent, struct rxrpc_call, conn_node);
- if (call_id < call->call_id)
+ /* The tree is sorted in order of the __be32 value without
+ * turning it into host order.
+ */
+ if ((__force u32)call_id < (__force u32)call->call_id)
p = &(*p)->rb_left;
- else if (call_id > call->call_id)
+ else if ((__force u32)call_id > (__force u32)call->call_id)
p = &(*p)->rb_right;
else
goto old_call;
@@ -347,9 +519,31 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
list_add_tail(&call->link, &rxrpc_calls);
write_unlock_bh(&rxrpc_call_lock);
+ /* Record copies of information for hashtable lookup */
+ call->proto = rx->proto;
+ call->local = conn->trans->local;
+ switch (call->proto) {
+ case AF_INET:
+ call->peer_ip.ipv4_addr =
+ conn->trans->peer->srx.transport.sin.sin_addr.s_addr;
+ break;
+ case AF_INET6:
+ memcpy(call->peer_ip.ipv6_addr,
+ conn->trans->peer->srx.transport.sin6.sin6_addr.in6_u.u6_addr8,
+ sizeof(call->peer_ip.ipv6_addr));
+ break;
+ default:
+ break;
+ }
+ call->epoch = conn->epoch;
+ call->service_id = conn->service_id;
+ call->in_clientflag = conn->in_clientflag;
+ /* Add the new call to the hashtable */
+ rxrpc_call_hash_add(call);
+
_net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
- call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ;
+ call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime;
add_timer(&call->lifetimer);
_leave(" = %p {%d} [new]", call, call->debug_id);
return call;
@@ -533,7 +727,7 @@ void rxrpc_release_call(struct rxrpc_call *call)
del_timer_sync(&call->resend_timer);
del_timer_sync(&call->ack_timer);
del_timer_sync(&call->lifetimer);
- call->deadspan.expires = jiffies + rxrpc_dead_call_timeout * HZ;
+ call->deadspan.expires = jiffies + rxrpc_dead_call_expiry;
add_timer(&call->deadspan);
_leave("");
@@ -665,6 +859,9 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call)
rxrpc_put_connection(call->conn);
}
+ /* Remove the call from the hash */
+ rxrpc_call_hash_del(call);
+
if (call->acks_window) {
_debug("kill Tx window %d",
CIRC_CNT(call->acks_head, call->acks_tail,
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 7bf5b5b9e8b..6631f4f1e39 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -18,11 +18,15 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
+/*
+ * Time till a connection expires after last use (in seconds).
+ */
+unsigned rxrpc_connection_expiry = 10 * 60;
+
static void rxrpc_connection_reaper(struct work_struct *work);
LIST_HEAD(rxrpc_connections);
DEFINE_RWLOCK(rxrpc_connection_lock);
-static unsigned long rxrpc_connection_timeout = 10 * 60;
static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
/*
@@ -862,7 +866,7 @@ static void rxrpc_connection_reaper(struct work_struct *work)
spin_lock(&conn->trans->client_lock);
write_lock(&conn->trans->conn_lock);
- reap_time = conn->put_time + rxrpc_connection_timeout;
+ reap_time = conn->put_time + rxrpc_connection_expiry;
if (atomic_read(&conn->usage) > 0) {
;
@@ -916,7 +920,7 @@ void __exit rxrpc_destroy_all_connections(void)
{
_enter("");
- rxrpc_connection_timeout = 0;
+ rxrpc_connection_expiry = 0;
cancel_delayed_work(&rxrpc_connection_reap);
rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
index a9206087b4d..db57458c824 100644
--- a/net/rxrpc/ar-error.c
+++ b/net/rxrpc/ar-error.c
@@ -83,6 +83,7 @@ void rxrpc_UDP_error_report(struct sock *sk)
if (mtu == 0) {
/* they didn't give us a size, estimate one */
+ mtu = peer->if_mtu;
if (mtu > 1500) {
mtu >>= 1;
if (mtu < 1500)
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 529572f18d1..63b21e580de 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -25,8 +25,6 @@
#include <net/net_namespace.h>
#include "ar-internal.h"
-unsigned long rxrpc_ack_timeout = 1;
-
const char *rxrpc_pkts[] = {
"?00",
"DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG",
@@ -115,7 +113,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb,
spin_unlock_bh(&sk->sk_receive_queue.lock);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk, skb_len);
+ sk->sk_data_ready(sk);
}
skb = NULL;
} else {
@@ -349,8 +347,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
* it */
if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
_proto("ACK Requested on %%%u", serial);
- rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial,
- !(sp->hdr.flags & RXRPC_MORE_PACKETS));
+ rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial, false);
}
switch (sp->hdr.type) {
@@ -526,36 +523,38 @@ protocol_error:
* post an incoming packet to the appropriate call/socket to deal with
* - must get rid of the sk_buff, either by freeing it or by queuing it
*/
-static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn,
+static void rxrpc_post_packet_to_call(struct rxrpc_call *call,
struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp;
- struct rxrpc_call *call;
- struct rb_node *p;
- __be32 call_id;
-
- _enter("%p,%p", conn, skb);
- read_lock_bh(&conn->lock);
+ _enter("%p,%p", call, skb);
sp = rxrpc_skb(skb);
- /* look at extant calls by channel number first */
- call = conn->channels[ntohl(sp->hdr.cid) & RXRPC_CHANNELMASK];
- if (!call || call->call_id != sp->hdr.callNumber)
- goto call_not_extant;
-
_debug("extant call [%d]", call->state);
- ASSERTCMP(call->conn, ==, conn);
read_lock(&call->state_lock);
switch (call->state) {
case RXRPC_CALL_LOCALLY_ABORTED:
- if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events))
+ if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events)) {
rxrpc_queue_call(call);
+ goto free_unlock;
+ }
case RXRPC_CALL_REMOTELY_ABORTED:
case RXRPC_CALL_NETWORK_ERROR:
case RXRPC_CALL_DEAD:
+ goto dead_call;
+ case RXRPC_CALL_COMPLETE:
+ case RXRPC_CALL_CLIENT_FINAL_ACK:
+ /* complete server call */
+ if (call->conn->in_clientflag)
+ goto dead_call;
+ /* resend last packet of a completed call */
+ _debug("final ack again");
+ rxrpc_get_call(call);
+ set_bit(RXRPC_CALL_ACK_FINAL, &call->events);
+ rxrpc_queue_call(call);
goto free_unlock;
default:
break;
@@ -563,7 +562,6 @@ static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn,
read_unlock(&call->state_lock);
rxrpc_get_call(call);
- read_unlock_bh(&conn->lock);
if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
sp->hdr.flags & RXRPC_JUMBO_PACKET)
@@ -574,78 +572,16 @@ static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn,
rxrpc_put_call(call);
goto done;
-call_not_extant:
- /* search the completed calls in case what we're dealing with is
- * there */
- _debug("call not extant");
-
- call_id = sp->hdr.callNumber;
- p = conn->calls.rb_node;
- while (p) {
- call = rb_entry(p, struct rxrpc_call, conn_node);
-
- if (call_id < call->call_id)
- p = p->rb_left;
- else if (call_id > call->call_id)
- p = p->rb_right;
- else
- goto found_completed_call;
- }
-
dead_call:
- /* it's a either a really old call that we no longer remember or its a
- * new incoming call */
- read_unlock_bh(&conn->lock);
-
- if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
- sp->hdr.seq == cpu_to_be32(1)) {
- _debug("incoming call");
- skb_queue_tail(&conn->trans->local->accept_queue, skb);
- rxrpc_queue_work(&conn->trans->local->acceptor);
- goto done;
- }
-
- _debug("dead call");
- skb->priority = RX_CALL_DEAD;
- rxrpc_reject_packet(conn->trans->local, skb);
- goto done;
-
- /* resend last packet of a completed call
- * - client calls may have been aborted or ACK'd
- * - server calls may have been aborted
- */
-found_completed_call:
- _debug("completed call");
-
- if (atomic_read(&call->usage) == 0)
- goto dead_call;
-
- /* synchronise any state changes */
- read_lock(&call->state_lock);
- ASSERTIFCMP(call->state != RXRPC_CALL_CLIENT_FINAL_ACK,
- call->state, >=, RXRPC_CALL_COMPLETE);
-
- if (call->state == RXRPC_CALL_LOCALLY_ABORTED ||
- call->state == RXRPC_CALL_REMOTELY_ABORTED ||
- call->state == RXRPC_CALL_DEAD) {
- read_unlock(&call->state_lock);
- goto dead_call;
- }
-
- if (call->conn->in_clientflag) {
- read_unlock(&call->state_lock);
- goto dead_call; /* complete server call */
+ if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) {
+ skb->priority = RX_CALL_DEAD;
+ rxrpc_reject_packet(call->conn->trans->local, skb);
+ goto unlock;
}
-
- _debug("final ack again");
- rxrpc_get_call(call);
- set_bit(RXRPC_CALL_ACK_FINAL, &call->events);
- rxrpc_queue_call(call);
-
free_unlock:
- read_unlock(&call->state_lock);
- read_unlock_bh(&conn->lock);
rxrpc_free_skb(skb);
+unlock:
+ read_unlock(&call->state_lock);
done:
_leave("");
}
@@ -664,21 +600,46 @@ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
rxrpc_queue_conn(conn);
}
+static struct rxrpc_connection *rxrpc_conn_from_local(struct rxrpc_local *local,
+ struct sk_buff *skb,
+ struct rxrpc_skb_priv *sp)
+{
+ struct rxrpc_peer *peer;
+ struct rxrpc_transport *trans;
+ struct rxrpc_connection *conn;
+
+ peer = rxrpc_find_peer(local, ip_hdr(skb)->saddr,
+ udp_hdr(skb)->source);
+ if (IS_ERR(peer))
+ goto cant_find_conn;
+
+ trans = rxrpc_find_transport(local, peer);
+ rxrpc_put_peer(peer);
+ if (!trans)
+ goto cant_find_conn;
+
+ conn = rxrpc_find_connection(trans, &sp->hdr);
+ rxrpc_put_transport(trans);
+ if (!conn)
+ goto cant_find_conn;
+
+ return conn;
+cant_find_conn:
+ return NULL;
+}
+
/*
* handle data received on the local endpoint
* - may be called in interrupt context
*/
-void rxrpc_data_ready(struct sock *sk, int count)
+void rxrpc_data_ready(struct sock *sk)
{
- struct rxrpc_connection *conn;
- struct rxrpc_transport *trans;
struct rxrpc_skb_priv *sp;
struct rxrpc_local *local;
- struct rxrpc_peer *peer;
struct sk_buff *skb;
int ret;
- _enter("%p, %d", sk, count);
+ _enter("%p", sk);
ASSERT(!irqs_disabled());
@@ -749,27 +710,34 @@ void rxrpc_data_ready(struct sock *sk, int count)
(sp->hdr.callNumber == 0 || sp->hdr.seq == 0))
goto bad_message;
- peer = rxrpc_find_peer(local, ip_hdr(skb)->saddr, udp_hdr(skb)->source);
- if (IS_ERR(peer))
- goto cant_route_call;
+ if (sp->hdr.callNumber == 0) {
+ /* This is a connection-level packet. These should be
+ * fairly rare, so the extra overhead of looking them up the
+ * old-fashioned way doesn't really hurt */
+ struct rxrpc_connection *conn;
- trans = rxrpc_find_transport(local, peer);
- rxrpc_put_peer(peer);
- if (!trans)
- goto cant_route_call;
+ conn = rxrpc_conn_from_local(local, skb, sp);
+ if (!conn)
+ goto cant_route_call;
- conn = rxrpc_find_connection(trans, &sp->hdr);
- rxrpc_put_transport(trans);
- if (!conn)
- goto cant_route_call;
-
- _debug("CONN %p {%d}", conn, conn->debug_id);
-
- if (sp->hdr.callNumber == 0)
+ _debug("CONN %p {%d}", conn, conn->debug_id);
rxrpc_post_packet_to_conn(conn, skb);
- else
- rxrpc_post_packet_to_call(conn, skb);
- rxrpc_put_connection(conn);
+ rxrpc_put_connection(conn);
+ } else {
+ struct rxrpc_call *call;
+ u8 in_clientflag = 0;
+
+ if (sp->hdr.flags & RXRPC_CLIENT_INITIATED)
+ in_clientflag = RXRPC_CLIENT_INITIATED;
+ call = rxrpc_find_call_hash(in_clientflag, sp->hdr.cid,
+ sp->hdr.callNumber, sp->hdr.epoch,
+ sp->hdr.serviceId, local, AF_INET,
+ (u8 *)&ip_hdr(skb)->saddr);
+ if (call)
+ rxrpc_post_packet_to_call(call, skb);
+ else
+ goto cant_route_call;
+ }
rxrpc_put_local(local);
return;
@@ -790,8 +758,10 @@ cant_route_call:
skb->priority = RX_CALL_DEAD;
}
- _debug("reject");
- rxrpc_reject_packet(local, skb);
+ if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) {
+ _debug("reject type %d",sp->hdr.type);
+ rxrpc_reject_packet(local, skb);
+ }
rxrpc_put_local(local);
_leave(" [no call]");
return;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 5f43675ee1d..ba9fd36d3f1 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -396,9 +396,20 @@ struct rxrpc_call {
#define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG)
unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1];
+ struct hlist_node hash_node;
+ unsigned long hash_key; /* Full hash key */
+ u8 in_clientflag; /* Copy of conn->in_clientflag for hashing */
+ struct rxrpc_local *local; /* Local endpoint. Used for hashing. */
+ sa_family_t proto; /* Frame protocol */
/* the following should all be in net order */
__be32 cid; /* connection ID + channel index */
__be32 call_id; /* call ID on connection */
+ __be32 epoch; /* epoch of this connection */
+ __be16 service_id; /* service ID */
+ union { /* Peer IP address for hashing */
+ __be32 ipv4_addr;
+ __u8 ipv6_addr[16]; /* Anticipates eventual IPv6 support */
+ } peer_ip;
};
/*
@@ -433,6 +444,13 @@ int rxrpc_reject_call(struct rxrpc_sock *);
/*
* ar-ack.c
*/
+extern unsigned rxrpc_requested_ack_delay;
+extern unsigned rxrpc_soft_ack_delay;
+extern unsigned rxrpc_idle_ack_delay;
+extern unsigned rxrpc_rx_window_size;
+extern unsigned rxrpc_rx_mtu;
+extern unsigned rxrpc_rx_jumbo_max;
+
void __rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool);
void rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool);
void rxrpc_process_call(struct work_struct *);
@@ -440,10 +458,14 @@ void rxrpc_process_call(struct work_struct *);
/*
* ar-call.c
*/
+extern unsigned rxrpc_max_call_lifetime;
+extern unsigned rxrpc_dead_call_expiry;
extern struct kmem_cache *rxrpc_call_jar;
extern struct list_head rxrpc_calls;
extern rwlock_t rxrpc_call_lock;
+struct rxrpc_call *rxrpc_find_call_hash(u8, __be32, __be32, __be32,
+ __be16, void *, sa_family_t, const u8 *);
struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *,
struct rxrpc_transport *,
struct rxrpc_conn_bundle *,
@@ -460,6 +482,7 @@ void __exit rxrpc_destroy_all_calls(void);
/*
* ar-connection.c
*/
+extern unsigned rxrpc_connection_expiry;
extern struct list_head rxrpc_connections;
extern rwlock_t rxrpc_connection_lock;
@@ -493,10 +516,9 @@ void rxrpc_UDP_error_handler(struct work_struct *);
/*
* ar-input.c
*/
-extern unsigned long rxrpc_ack_timeout;
extern const char *rxrpc_pkts[];
-void rxrpc_data_ready(struct sock *, int);
+void rxrpc_data_ready(struct sock *);
int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool, bool);
void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *);
@@ -504,6 +526,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *);
* ar-local.c
*/
extern rwlock_t rxrpc_local_lock;
+
struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *);
void rxrpc_put_local(struct rxrpc_local *);
void __exit rxrpc_destroy_all_locals(void);
@@ -522,7 +545,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t,
/*
* ar-output.c
*/
-extern int rxrpc_resend_timeout;
+extern unsigned rxrpc_resend_timeout;
int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *);
int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *,
@@ -572,6 +595,8 @@ void rxrpc_packet_destructor(struct sk_buff *);
/*
* ar-transport.c
*/
+extern unsigned rxrpc_transport_expiry;
+
struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *,
struct rxrpc_peer *, gfp_t);
void rxrpc_put_transport(struct rxrpc_transport *);
@@ -580,6 +605,17 @@ struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *,
struct rxrpc_peer *);
/*
+ * sysctl.c
+ */
+#ifdef CONFIG_SYSCTL
+extern int __init rxrpc_sysctl_init(void);
+extern void rxrpc_sysctl_exit(void);
+#else
+static inline int __init rxrpc_sysctl_init(void) { return 0; }
+static inline void rxrpc_sysctl_exit(void) {}
+#endif
+
+/*
* debug tracing
*/
extern unsigned int rxrpc_debug;
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 7633a752c65..0ad080790a3 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -99,7 +99,7 @@ static int rxrpc_instantiate_xdr_rxkad(struct key *key, const __be32 *xdr,
_debug("tktlen: %x", tktlen);
if (tktlen > AFSTOKEN_RK_TIX_MAX)
return -EKEYREJECTED;
- if (8 * 4 + tktlen != toklen)
+ if (toklen < 8 * 4 + tktlen)
return -EKEYREJECTED;
plen = sizeof(*token) + sizeof(*token->kad) + tktlen;
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index d0e8f1c1898..0b4b9a79f5a 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -18,7 +18,10 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
-int rxrpc_resend_timeout = 4;
+/*
+ * Time till packet resend (in jiffies).
+ */
+unsigned rxrpc_resend_timeout = 4 * HZ;
static int rxrpc_send_data(struct kiocb *iocb,
struct rxrpc_sock *rx,
@@ -487,7 +490,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
sp->need_resend = false;
- sp->resend_at = jiffies + rxrpc_resend_timeout * HZ;
+ sp->resend_at = jiffies + rxrpc_resend_timeout;
if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) {
_debug("run timer");
call->resend_timer.expires = sp->resend_at;
@@ -666,6 +669,7 @@ static int rxrpc_send_data(struct kiocb *iocb,
/* add the packet to the send queue if it's now full */
if (sp->remain <= 0 || (segment == 0 && !more)) {
struct rxrpc_connection *conn = call->conn;
+ uint32_t seq;
size_t pad;
/* pad out if we're using security */
@@ -678,11 +682,12 @@ static int rxrpc_send_data(struct kiocb *iocb,
memset(skb_put(skb, pad), 0, pad);
}
+ seq = atomic_inc_return(&call->sequence);
+
sp->hdr.epoch = conn->epoch;
sp->hdr.cid = call->cid;
sp->hdr.callNumber = call->call_id;
- sp->hdr.seq =
- htonl(atomic_inc_return(&call->sequence));
+ sp->hdr.seq = htonl(seq);
sp->hdr.serial =
htonl(atomic_inc_return(&conn->serial));
sp->hdr.type = RXRPC_PACKET_TYPE_DATA;
@@ -697,6 +702,8 @@ static int rxrpc_send_data(struct kiocb *iocb,
else if (CIRC_SPACE(call->acks_head, call->acks_tail,
call->acks_winsz) > 1)
sp->hdr.flags |= RXRPC_MORE_PACKETS;
+ if (more && seq & 1)
+ sp->hdr.flags |= RXRPC_REQUEST_ACK;
ret = rxrpc_secure_packet(
call, skb, skb->mark,
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index 34b5490dde6..e9aaa65c077 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -180,16 +180,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
if (copy > len - copied)
copy = len - copied;
- if (skb->ip_summed == CHECKSUM_UNNECESSARY ||
- skb->ip_summed == CHECKSUM_PARTIAL) {
- ret = skb_copy_datagram_iovec(skb, offset,
- msg->msg_iov, copy);
- } else {
- ret = skb_copy_and_csum_datagram_iovec(skb, offset,
- msg->msg_iov);
- if (ret == -EINVAL)
- goto csum_copy_error;
- }
+ ret = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copy);
if (ret < 0)
goto copy_error;
@@ -348,20 +339,6 @@ copy_error:
_leave(" = %d", ret);
return ret;
-csum_copy_error:
- _debug("csum error");
- release_sock(&rx->sk);
- if (continue_call)
- rxrpc_put_call(continue_call);
- rxrpc_kill_skb(skb);
- if (!(flags & MSG_PEEK)) {
- if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
- BUG();
- }
- skb_kill_datagram(&rx->sk, skb, flags);
- rxrpc_put_call(call);
- return -EAGAIN;
-
wait_interrupted:
ret = sock_intr_errno(timeo);
wait_error:
diff --git a/net/rxrpc/ar-skbuff.c b/net/rxrpc/ar-skbuff.c
index de755e04d29..4cfab49e329 100644
--- a/net/rxrpc/ar-skbuff.c
+++ b/net/rxrpc/ar-skbuff.c
@@ -83,9 +83,14 @@ static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
rxrpc_request_final_ACK(call);
} else if (atomic_dec_and_test(&call->ackr_not_idle) &&
test_and_clear_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags)) {
+ /* We previously soft-ACK'd some received packets that have now
+ * been consumed, so send a hard-ACK if no more packets are
+ * immediately forthcoming to allow the transmitter to free up
+ * its Tx bufferage.
+ */
_debug("send Rx idle ACK");
__rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, sp->hdr.serial,
- true);
+ false);
}
spin_unlock_bh(&call->lock);
diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c
index 92df566930b..1976dec84f2 100644
--- a/net/rxrpc/ar-transport.c
+++ b/net/rxrpc/ar-transport.c
@@ -17,11 +17,15 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
+/*
+ * Time after last use at which transport record is cleaned up.
+ */
+unsigned rxrpc_transport_expiry = 3600 * 24;
+
static void rxrpc_transport_reaper(struct work_struct *work);
static LIST_HEAD(rxrpc_transports);
static DEFINE_RWLOCK(rxrpc_transport_lock);
-static unsigned long rxrpc_transport_timeout = 3600 * 24;
static DECLARE_DELAYED_WORK(rxrpc_transport_reap, rxrpc_transport_reaper);
/*
@@ -235,7 +239,7 @@ static void rxrpc_transport_reaper(struct work_struct *work)
if (likely(atomic_read(&trans->usage) > 0))
continue;
- reap_time = trans->put_time + rxrpc_transport_timeout;
+ reap_time = trans->put_time + rxrpc_transport_expiry;
if (reap_time <= now)
list_move_tail(&trans->link, &graveyard);
else if (reap_time < earliest)
@@ -271,7 +275,7 @@ void __exit rxrpc_destroy_all_transports(void)
{
_enter("");
- rxrpc_transport_timeout = 0;
+ rxrpc_transport_expiry = 0;
cancel_delayed_work(&rxrpc_transport_reap);
rxrpc_queue_delayed_work(&rxrpc_transport_reap, 0);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
new file mode 100644
index 00000000000..50a98a910eb
--- /dev/null
+++ b/net/rxrpc/sysctl.c
@@ -0,0 +1,146 @@
+/* sysctls for configuring RxRPC operating parameters
+ *
+ * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/sysctl.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static struct ctl_table_header *rxrpc_sysctl_reg_table;
+static const unsigned zero = 0;
+static const unsigned one = 1;
+static const unsigned four = 4;
+static const unsigned n_65535 = 65535;
+static const unsigned n_max_acks = RXRPC_MAXACKS;
+
+/*
+ * RxRPC operating parameters.
+ *
+ * See Documentation/networking/rxrpc.txt and the variable definitions for more
+ * information on the individual parameters.
+ */
+static struct ctl_table rxrpc_sysctl_table[] = {
+ /* Values measured in milliseconds */
+ {
+ .procname = "req_ack_delay",
+ .data = &rxrpc_requested_ack_delay,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ .extra1 = (void *)&zero,
+ },
+ {
+ .procname = "soft_ack_delay",
+ .data = &rxrpc_soft_ack_delay,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ .extra1 = (void *)&one,
+ },
+ {
+ .procname = "idle_ack_delay",
+ .data = &rxrpc_idle_ack_delay,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ .extra1 = (void *)&one,
+ },
+ {
+ .procname = "resend_timeout",
+ .data = &rxrpc_resend_timeout,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ .extra1 = (void *)&one,
+ },
+
+ /* Values measured in seconds but used in jiffies */
+ {
+ .procname = "max_call_lifetime",
+ .data = &rxrpc_max_call_lifetime,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ .extra1 = (void *)&one,
+ },
+ {
+ .procname = "dead_call_expiry",
+ .data = &rxrpc_dead_call_expiry,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ .extra1 = (void *)&one,
+ },
+
+ /* Values measured in seconds */
+ {
+ .procname = "connection_expiry",
+ .data = &rxrpc_connection_expiry,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&one,
+ },
+ {
+ .procname = "transport_expiry",
+ .data = &rxrpc_transport_expiry,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&one,
+ },
+
+ /* Non-time values */
+ {
+ .procname = "rx_window_size",
+ .data = &rxrpc_rx_window_size,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&one,
+ .extra2 = (void *)&n_max_acks,
+ },
+ {
+ .procname = "rx_mtu",
+ .data = &rxrpc_rx_mtu,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&one,
+ .extra1 = (void *)&n_65535,
+ },
+ {
+ .procname = "rx_jumbo_max",
+ .data = &rxrpc_rx_jumbo_max,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&one,
+ .extra2 = (void *)&four,
+ },
+
+ { }
+};
+
+int __init rxrpc_sysctl_init(void)
+{
+ rxrpc_sysctl_reg_table = register_net_sysctl(&init_net, "net/rxrpc",
+ rxrpc_sysctl_table);
+ if (!rxrpc_sysctl_reg_table)
+ return -ENOMEM;
+ return 0;
+}
+
+void rxrpc_sysctl_exit(void)
+{
+ if (rxrpc_sysctl_reg_table)
+ unregister_net_sysctl_table(rxrpc_sysctl_reg_table);
+}
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 72bdc716634..648778aef1a 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -27,8 +27,11 @@
#include <net/act_api.h>
#include <net/netlink.h>
-void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
+void tcf_hash_destroy(struct tc_action *a)
{
+ struct tcf_common *p = a->priv;
+ struct tcf_hashinfo *hinfo = a->ops->hinfo;
+
spin_lock_bh(&hinfo->lock);
hlist_del(&p->tcfc_head);
spin_unlock_bh(&hinfo->lock);
@@ -42,18 +45,22 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
}
EXPORT_SYMBOL(tcf_hash_destroy);
-int tcf_hash_release(struct tcf_common *p, int bind,
- struct tcf_hashinfo *hinfo)
+int tcf_hash_release(struct tc_action *a, int bind)
{
+ struct tcf_common *p = a->priv;
int ret = 0;
if (p) {
if (bind)
p->tcfc_bindcnt--;
+ else if (p->tcfc_bindcnt > 0)
+ return -EPERM;
p->tcfc_refcnt--;
if (p->tcfc_bindcnt <= 0 && p->tcfc_refcnt <= 0) {
- tcf_hash_destroy(p, hinfo);
+ if (a->ops->cleanup)
+ a->ops->cleanup(a, bind);
+ tcf_hash_destroy(a);
ret = 1;
}
}
@@ -118,6 +125,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a)
struct tcf_common *p;
struct nlattr *nest;
int i = 0, n_i = 0;
+ int ret = -EINVAL;
nest = nla_nest_start(skb, a->order);
if (nest == NULL)
@@ -127,10 +135,13 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a)
for (i = 0; i < (hinfo->hmask + 1); i++) {
head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
hlist_for_each_entry_safe(p, n, head, tcfc_head) {
- if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) {
+ a->priv = p;
+ ret = tcf_hash_release(a, 0);
+ if (ret == ACT_P_DELETED) {
module_put(a->ops->owner);
n_i++;
- }
+ } else if (ret < 0)
+ goto nla_put_failure;
}
}
if (nla_put_u32(skb, TCA_FCNT, n_i))
@@ -140,7 +151,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a)
return n_i;
nla_put_failure:
nla_nest_cancel(skb, nest);
- return -EINVAL;
+ return ret;
}
static int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
@@ -198,7 +209,7 @@ int tcf_hash_search(struct tc_action *a, u32 index)
}
EXPORT_SYMBOL(tcf_hash_search);
-struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind)
+int tcf_hash_check(u32 index, struct tc_action *a, int bind)
{
struct tcf_hashinfo *hinfo = a->ops->hinfo;
struct tcf_common *p = NULL;
@@ -207,19 +218,30 @@ struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind)
p->tcfc_bindcnt++;
p->tcfc_refcnt++;
a->priv = p;
+ return 1;
}
- return p;
+ return 0;
}
EXPORT_SYMBOL(tcf_hash_check);
-struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est,
- struct tc_action *a, int size, int bind)
+void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est)
+{
+ struct tcf_common *pc = a->priv;
+ if (est)
+ gen_kill_estimator(&pc->tcfc_bstats,
+ &pc->tcfc_rate_est);
+ kfree_rcu(pc, tcfc_rcu);
+}
+EXPORT_SYMBOL(tcf_hash_cleanup);
+
+int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
+ int size, int bind)
{
struct tcf_hashinfo *hinfo = a->ops->hinfo;
struct tcf_common *p = kzalloc(size, GFP_KERNEL);
if (unlikely(!p))
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
p->tcfc_refcnt = 1;
if (bind)
p->tcfc_bindcnt = 1;
@@ -234,17 +256,19 @@ struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est,
&p->tcfc_lock, est);
if (err) {
kfree(p);
- return ERR_PTR(err);
+ return err;
}
}
a->priv = (void *) p;
- return p;
+ return 0;
}
EXPORT_SYMBOL(tcf_hash_create);
-void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo)
+void tcf_hash_insert(struct tc_action *a)
{
+ struct tcf_common *p = a->priv;
+ struct tcf_hashinfo *hinfo = a->ops->hinfo;
unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
spin_lock_bh(&hinfo->lock);
@@ -256,12 +280,13 @@ EXPORT_SYMBOL(tcf_hash_insert);
static LIST_HEAD(act_base);
static DEFINE_RWLOCK(act_mod_lock);
-int tcf_register_action(struct tc_action_ops *act)
+int tcf_register_action(struct tc_action_ops *act, unsigned int mask)
{
struct tc_action_ops *a;
+ int err;
- /* Must supply act, dump, cleanup and init */
- if (!act->act || !act->dump || !act->cleanup || !act->init)
+ /* Must supply act, dump and init */
+ if (!act->act || !act->dump || !act->init)
return -EINVAL;
/* Supply defaults */
@@ -270,10 +295,21 @@ int tcf_register_action(struct tc_action_ops *act)
if (!act->walk)
act->walk = tcf_generic_walker;
+ act->hinfo = kmalloc(sizeof(struct tcf_hashinfo), GFP_KERNEL);
+ if (!act->hinfo)
+ return -ENOMEM;
+ err = tcf_hashinfo_init(act->hinfo, mask);
+ if (err) {
+ kfree(act->hinfo);
+ return err;
+ }
+
write_lock(&act_mod_lock);
list_for_each_entry(a, &act_base, head) {
if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
write_unlock(&act_mod_lock);
+ tcf_hashinfo_destroy(act->hinfo);
+ kfree(act->hinfo);
return -EEXIST;
}
}
@@ -292,6 +328,8 @@ int tcf_unregister_action(struct tc_action_ops *act)
list_for_each_entry(a, &act_base, head) {
if (a == act) {
list_del(&act->head);
+ tcf_hashinfo_destroy(act->hinfo);
+ kfree(act->hinfo);
err = 0;
break;
}
@@ -368,16 +406,21 @@ exec_done:
}
EXPORT_SYMBOL(tcf_action_exec);
-void tcf_action_destroy(struct list_head *actions, int bind)
+int tcf_action_destroy(struct list_head *actions, int bind)
{
struct tc_action *a, *tmp;
+ int ret = 0;
list_for_each_entry_safe(a, tmp, actions, list) {
- if (a->ops->cleanup(a, bind) == ACT_P_DELETED)
+ ret = tcf_hash_release(a, bind);
+ if (ret == ACT_P_DELETED)
module_put(a->ops->owner);
+ else if (ret < 0)
+ return ret;
list_del(&a->list);
kfree(a);
}
+ return ret;
}
int
@@ -642,6 +685,20 @@ act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
return rtnl_unicast(skb, net, portid);
}
+static struct tc_action *create_a(int i)
+{
+ struct tc_action *act;
+
+ act = kzalloc(sizeof(*act), GFP_KERNEL);
+ if (act == NULL) {
+ pr_debug("create_a: failed to alloc!\n");
+ return NULL;
+ }
+ act->order = i;
+ INIT_LIST_HEAD(&act->list);
+ return act;
+}
+
static struct tc_action *
tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid)
{
@@ -661,11 +718,10 @@ tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid)
index = nla_get_u32(tb[TCA_ACT_INDEX]);
err = -ENOMEM;
- a = kzalloc(sizeof(struct tc_action), GFP_KERNEL);
+ a = create_a(0);
if (a == NULL)
goto err_out;
- INIT_LIST_HEAD(&a->list);
err = -EINVAL;
a->ops = tc_lookup_action(tb[TCA_ACT_KIND]);
if (a->ops == NULL) /* could happen in batch of actions */
@@ -695,20 +751,6 @@ static void cleanup_a(struct list_head *actions)
}
}
-static struct tc_action *create_a(int i)
-{
- struct tc_action *act;
-
- act = kzalloc(sizeof(*act), GFP_KERNEL);
- if (act == NULL) {
- pr_debug("create_a: failed to alloc!\n");
- return NULL;
- }
- act->order = i;
- INIT_LIST_HEAD(&act->list);
- return act;
-}
-
static int tca_action_flush(struct net *net, struct nlattr *nla,
struct nlmsghdr *n, u32 portid)
{
@@ -720,18 +762,12 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
struct nlattr *nest;
struct nlattr *tb[TCA_ACT_MAX + 1];
struct nlattr *kind;
- struct tc_action *a = create_a(0);
+ struct tc_action a;
int err = -ENOMEM;
- if (a == NULL) {
- pr_debug("tca_action_flush: couldnt create tc_action\n");
- return err;
- }
-
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb) {
pr_debug("tca_action_flush: failed skb alloc\n");
- kfree(a);
return err;
}
@@ -743,8 +779,10 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
err = -EINVAL;
kind = tb[TCA_ACT_KIND];
- a->ops = tc_lookup_action(kind);
- if (a->ops == NULL) /*some idjot trying to flush unknown action */
+ memset(&a, 0, sizeof(struct tc_action));
+ INIT_LIST_HEAD(&a.list);
+ a.ops = tc_lookup_action(kind);
+ if (a.ops == NULL) /*some idjot trying to flush unknown action */
goto err_out;
nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0);
@@ -759,7 +797,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
if (nest == NULL)
goto out_module_put;
- err = a->ops->walk(skb, &dcb, RTM_DELACTION, a);
+ err = a.ops->walk(skb, &dcb, RTM_DELACTION, &a);
if (err < 0)
goto out_module_put;
if (err == 0)
@@ -769,8 +807,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
nlh->nlmsg_flags |= NLM_F_ROOT;
- module_put(a->ops->owner);
- kfree(a);
+ module_put(a.ops->owner);
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
if (err > 0)
@@ -779,11 +816,10 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
return err;
out_module_put:
- module_put(a->ops->owner);
+ module_put(a.ops->owner);
err_out:
noflush_out:
kfree_skb(skb);
- kfree(a);
return err;
}
@@ -805,7 +841,11 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
}
/* now do the delete */
- tcf_action_destroy(actions, 0);
+ ret = tcf_action_destroy(actions, 0);
+ if (ret < 0) {
+ kfree_skb(skb);
+ return ret;
+ }
ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
@@ -908,7 +948,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
u32 portid = skb ? NETLINK_CB(skb).portid : 0;
int ret = 0, ovr = 0;
- if ((n->nlmsg_type != RTM_GETACTION) && !capable(CAP_NET_ADMIN))
+ if ((n->nlmsg_type != RTM_GETACTION) && !netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 2210187c45c..edbf40dac70 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -37,7 +37,6 @@
#include <net/tc_act/tc_csum.h>
#define CSUM_TAB_MASK 15
-static struct tcf_hashinfo csum_hash_info;
static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
[TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
@@ -48,7 +47,6 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
{
struct nlattr *tb[TCA_CSUM_MAX + 1];
struct tc_csum *parm;
- struct tcf_common *pc;
struct tcf_csum *p;
int ret = 0, err;
@@ -63,38 +61,31 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
return -EINVAL;
parm = nla_data(tb[TCA_CSUM_PARMS]);
- pc = tcf_hash_check(parm->index, a, bind);
- if (!pc) {
- pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
+ if (!tcf_hash_check(parm->index, a, bind)) {
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+ if (ret)
+ return ret;
ret = ACT_P_CREATED;
} else {
if (bind)/* dont override defaults */
return 0;
- tcf_hash_release(pc, bind, a->ops->hinfo);
+ tcf_hash_release(a, bind);
if (!ovr)
return -EEXIST;
}
- p = to_tcf_csum(pc);
+ p = to_tcf_csum(a);
spin_lock_bh(&p->tcf_lock);
p->tcf_action = parm->action;
p->update_flags = parm->update_flags;
spin_unlock_bh(&p->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(pc, a->ops->hinfo);
+ tcf_hash_insert(a);
return ret;
}
-static int tcf_csum_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_csum *p = a->priv;
- return tcf_hash_release(&p->common, bind, &csum_hash_info);
-}
-
/**
* tcf_csum_skb_nextlayer - Get next layer pointer
* @skb: sk_buff to use
@@ -569,12 +560,10 @@ nla_put_failure:
static struct tc_action_ops act_csum_ops = {
.kind = "csum",
- .hinfo = &csum_hash_info,
.type = TCA_ACT_CSUM,
.owner = THIS_MODULE,
.act = tcf_csum,
.dump = tcf_csum_dump,
- .cleanup = tcf_csum_cleanup,
.init = tcf_csum_init,
};
@@ -583,11 +572,7 @@ MODULE_LICENSE("GPL");
static int __init csum_init_module(void)
{
- int err = tcf_hashinfo_init(&csum_hash_info, CSUM_TAB_MASK);
- if (err)
- return err;
-
- return tcf_register_action(&act_csum_ops);
+ return tcf_register_action(&act_csum_ops, CSUM_TAB_MASK);
}
static void __exit csum_cleanup_module(void)
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index a0eed30d581..d6bcbd9f779 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -24,7 +24,6 @@
#include <net/tc_act/tc_gact.h>
#define GACT_TAB_MASK 15
-static struct tcf_hashinfo gact_hash_info;
#ifdef CONFIG_GACT_PROB
static int gact_net_rand(struct tcf_gact *gact)
@@ -57,7 +56,6 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
struct nlattr *tb[TCA_GACT_MAX + 1];
struct tc_gact *parm;
struct tcf_gact *gact;
- struct tcf_common *pc;
int ret = 0;
int err;
#ifdef CONFIG_GACT_PROB
@@ -86,21 +84,20 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
}
#endif
- pc = tcf_hash_check(parm->index, a, bind);
- if (!pc) {
- pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), bind);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
+ if (!tcf_hash_check(parm->index, a, bind)) {
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*gact), bind);
+ if (ret)
+ return ret;
ret = ACT_P_CREATED;
} else {
if (bind)/* dont override defaults */
return 0;
- tcf_hash_release(pc, bind, a->ops->hinfo);
+ tcf_hash_release(a, bind);
if (!ovr)
return -EEXIST;
}
- gact = to_gact(pc);
+ gact = to_gact(a);
spin_lock_bh(&gact->tcf_lock);
gact->tcf_action = parm->action;
@@ -113,19 +110,10 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
#endif
spin_unlock_bh(&gact->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(pc, a->ops->hinfo);
+ tcf_hash_insert(a);
return ret;
}
-static int tcf_gact_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_gact *gact = a->priv;
-
- if (gact)
- return tcf_hash_release(&gact->common, bind, a->ops->hinfo);
- return 0;
-}
-
static int tcf_gact(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -191,12 +179,10 @@ nla_put_failure:
static struct tc_action_ops act_gact_ops = {
.kind = "gact",
- .hinfo = &gact_hash_info,
.type = TCA_ACT_GACT,
.owner = THIS_MODULE,
.act = tcf_gact,
.dump = tcf_gact_dump,
- .cleanup = tcf_gact_cleanup,
.init = tcf_gact_init,
};
@@ -206,21 +192,17 @@ MODULE_LICENSE("GPL");
static int __init gact_init_module(void)
{
- int err = tcf_hashinfo_init(&gact_hash_info, GACT_TAB_MASK);
- if (err)
- return err;
#ifdef CONFIG_GACT_PROB
pr_info("GACT probability on\n");
#else
pr_info("GACT probability NOT on\n");
#endif
- return tcf_register_action(&act_gact_ops);
+ return tcf_register_action(&act_gact_ops, GACT_TAB_MASK);
}
static void __exit gact_cleanup_module(void)
{
tcf_unregister_action(&act_gact_ops);
- tcf_hashinfo_destroy(&gact_hash_info);
}
module_init(gact_init_module);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 0a6d6217402..8a64a0734ae 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -29,7 +29,6 @@
#define IPT_TAB_MASK 15
-static struct tcf_hashinfo ipt_hash_info;
static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook)
{
@@ -69,22 +68,12 @@ static void ipt_destroy_target(struct xt_entry_target *t)
module_put(par.target->me);
}
-static int tcf_ipt_release(struct tcf_ipt *ipt, int bind)
+static void tcf_ipt_release(struct tc_action *a, int bind)
{
- int ret = 0;
- if (ipt) {
- if (bind)
- ipt->tcf_bindcnt--;
- ipt->tcf_refcnt--;
- if (ipt->tcf_bindcnt <= 0 && ipt->tcf_refcnt <= 0) {
- ipt_destroy_target(ipt->tcfi_t);
- kfree(ipt->tcfi_tname);
- kfree(ipt->tcfi_t);
- tcf_hash_destroy(&ipt->common, &ipt_hash_info);
- ret = ACT_P_DELETED;
- }
- }
- return ret;
+ struct tcf_ipt *ipt = to_ipt(a);
+ ipt_destroy_target(ipt->tcfi_t);
+ kfree(ipt->tcfi_tname);
+ kfree(ipt->tcfi_t);
}
static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
@@ -99,7 +88,6 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
{
struct nlattr *tb[TCA_IPT_MAX + 1];
struct tcf_ipt *ipt;
- struct tcf_common *pc;
struct xt_entry_target *td, *t;
char *tname;
int ret = 0, err;
@@ -125,21 +113,20 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
if (tb[TCA_IPT_INDEX] != NULL)
index = nla_get_u32(tb[TCA_IPT_INDEX]);
- pc = tcf_hash_check(index, a, bind);
- if (!pc) {
- pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
+ if (!tcf_hash_check(index, a, bind) ) {
+ ret = tcf_hash_create(index, est, a, sizeof(*ipt), bind);
+ if (ret)
+ return ret;
ret = ACT_P_CREATED;
} else {
if (bind)/* dont override defaults */
return 0;
- tcf_ipt_release(to_ipt(pc), bind);
+ tcf_hash_release(a, bind);
if (!ovr)
return -EEXIST;
}
- ipt = to_ipt(pc);
+ ipt = to_ipt(a);
hook = nla_get_u32(tb[TCA_IPT_HOOK]);
@@ -170,7 +157,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
ipt->tcfi_hook = hook;
spin_unlock_bh(&ipt->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(pc, a->ops->hinfo);
+ tcf_hash_insert(a);
return ret;
err3:
@@ -178,21 +165,11 @@ err3:
err2:
kfree(tname);
err1:
- if (ret == ACT_P_CREATED) {
- if (est)
- gen_kill_estimator(&pc->tcfc_bstats,
- &pc->tcfc_rate_est);
- kfree_rcu(pc, tcfc_rcu);
- }
+ if (ret == ACT_P_CREATED)
+ tcf_hash_cleanup(a, est);
return err;
}
-static int tcf_ipt_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_ipt *ipt = a->priv;
- return tcf_ipt_release(ipt, bind);
-}
-
static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -284,23 +261,21 @@ nla_put_failure:
static struct tc_action_ops act_ipt_ops = {
.kind = "ipt",
- .hinfo = &ipt_hash_info,
.type = TCA_ACT_IPT,
.owner = THIS_MODULE,
.act = tcf_ipt,
.dump = tcf_ipt_dump,
- .cleanup = tcf_ipt_cleanup,
+ .cleanup = tcf_ipt_release,
.init = tcf_ipt_init,
};
static struct tc_action_ops act_xt_ops = {
.kind = "xt",
- .hinfo = &ipt_hash_info,
.type = TCA_ACT_XT,
.owner = THIS_MODULE,
.act = tcf_ipt,
.dump = tcf_ipt_dump,
- .cleanup = tcf_ipt_cleanup,
+ .cleanup = tcf_ipt_release,
.init = tcf_ipt_init,
};
@@ -311,20 +286,16 @@ MODULE_ALIAS("act_xt");
static int __init ipt_init_module(void)
{
- int ret1, ret2, err;
- err = tcf_hashinfo_init(&ipt_hash_info, IPT_TAB_MASK);
- if (err)
- return err;
+ int ret1, ret2;
- ret1 = tcf_register_action(&act_xt_ops);
+ ret1 = tcf_register_action(&act_xt_ops, IPT_TAB_MASK);
if (ret1 < 0)
printk("Failed to load xt action\n");
- ret2 = tcf_register_action(&act_ipt_ops);
+ ret2 = tcf_register_action(&act_ipt_ops, IPT_TAB_MASK);
if (ret2 < 0)
printk("Failed to load ipt action\n");
if (ret1 < 0 && ret2 < 0) {
- tcf_hashinfo_destroy(&ipt_hash_info);
return ret1;
} else
return 0;
@@ -334,7 +305,6 @@ static void __exit ipt_cleanup_module(void)
{
tcf_unregister_action(&act_xt_ops);
tcf_unregister_action(&act_ipt_ops);
- tcf_hashinfo_destroy(&ipt_hash_info);
}
module_init(ipt_init_module);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 0b2c6d39d39..4f912c0e225 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -31,23 +31,13 @@
#define MIRRED_TAB_MASK 7
static LIST_HEAD(mirred_list);
-static struct tcf_hashinfo mirred_hash_info;
-static int tcf_mirred_release(struct tcf_mirred *m, int bind)
+static void tcf_mirred_release(struct tc_action *a, int bind)
{
- if (m) {
- if (bind)
- m->tcf_bindcnt--;
- m->tcf_refcnt--;
- if (!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
- list_del(&m->tcfm_list);
- if (m->tcfm_dev)
- dev_put(m->tcfm_dev);
- tcf_hash_destroy(&m->common, &mirred_hash_info);
- return 1;
- }
- }
- return 0;
+ struct tcf_mirred *m = to_mirred(a);
+ list_del(&m->tcfm_list);
+ if (m->tcfm_dev)
+ dev_put(m->tcfm_dev);
}
static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
@@ -61,7 +51,6 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct nlattr *tb[TCA_MIRRED_MAX + 1];
struct tc_mirred *parm;
struct tcf_mirred *m;
- struct tcf_common *pc;
struct net_device *dev;
int ret, ok_push = 0;
@@ -101,21 +90,20 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
dev = NULL;
}
- pc = tcf_hash_check(parm->index, a, bind);
- if (!pc) {
+ if (!tcf_hash_check(parm->index, a, bind)) {
if (dev == NULL)
return -EINVAL;
- pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*m), bind);
+ if (ret)
+ return ret;
ret = ACT_P_CREATED;
} else {
if (!ovr) {
- tcf_mirred_release(to_mirred(pc), bind);
+ tcf_hash_release(a, bind);
return -EEXIST;
}
}
- m = to_mirred(pc);
+ m = to_mirred(a);
spin_lock_bh(&m->tcf_lock);
m->tcf_action = parm->action;
@@ -131,21 +119,12 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
spin_unlock_bh(&m->tcf_lock);
if (ret == ACT_P_CREATED) {
list_add(&m->tcfm_list, &mirred_list);
- tcf_hash_insert(pc, a->ops->hinfo);
+ tcf_hash_insert(a);
}
return ret;
}
-static int tcf_mirred_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_mirred *m = a->priv;
-
- if (m)
- return tcf_mirred_release(m, bind);
- return 0;
-}
-
static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -254,12 +233,11 @@ static struct notifier_block mirred_device_notifier = {
static struct tc_action_ops act_mirred_ops = {
.kind = "mirred",
- .hinfo = &mirred_hash_info,
.type = TCA_ACT_MIRRED,
.owner = THIS_MODULE,
.act = tcf_mirred,
.dump = tcf_mirred_dump,
- .cleanup = tcf_mirred_cleanup,
+ .cleanup = tcf_mirred_release,
.init = tcf_mirred_init,
};
@@ -273,19 +251,13 @@ static int __init mirred_init_module(void)
if (err)
return err;
- err = tcf_hashinfo_init(&mirred_hash_info, MIRRED_TAB_MASK);
- if (err) {
- unregister_netdevice_notifier(&mirred_device_notifier);
- return err;
- }
pr_info("Mirror/redirect action on\n");
- return tcf_register_action(&act_mirred_ops);
+ return tcf_register_action(&act_mirred_ops, MIRRED_TAB_MASK);
}
static void __exit mirred_cleanup_module(void)
{
tcf_unregister_action(&act_mirred_ops);
- tcf_hashinfo_destroy(&mirred_hash_info);
unregister_netdevice_notifier(&mirred_device_notifier);
}
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 81f0404bb33..270a030d5fd 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -31,8 +31,6 @@
#define NAT_TAB_MASK 15
-static struct tcf_hashinfo nat_hash_info;
-
static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
[TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) },
};
@@ -44,7 +42,6 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
struct tc_nat *parm;
int ret = 0, err;
struct tcf_nat *p;
- struct tcf_common *pc;
if (nla == NULL)
return -EINVAL;
@@ -57,20 +54,19 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
return -EINVAL;
parm = nla_data(tb[TCA_NAT_PARMS]);
- pc = tcf_hash_check(parm->index, a, bind);
- if (!pc) {
- pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
+ if (!tcf_hash_check(parm->index, a, bind)) {
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+ if (ret)
+ return ret;
ret = ACT_P_CREATED;
} else {
if (bind)
return 0;
- tcf_hash_release(pc, bind, a->ops->hinfo);
+ tcf_hash_release(a, bind);
if (!ovr)
return -EEXIST;
}
- p = to_tcf_nat(pc);
+ p = to_tcf_nat(a);
spin_lock_bh(&p->tcf_lock);
p->old_addr = parm->old_addr;
@@ -82,18 +78,11 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
spin_unlock_bh(&p->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(pc, a->ops->hinfo);
+ tcf_hash_insert(a);
return ret;
}
-static int tcf_nat_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_nat *p = a->priv;
-
- return tcf_hash_release(&p->common, bind, &nat_hash_info);
-}
-
static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -293,12 +282,10 @@ nla_put_failure:
static struct tc_action_ops act_nat_ops = {
.kind = "nat",
- .hinfo = &nat_hash_info,
.type = TCA_ACT_NAT,
.owner = THIS_MODULE,
.act = tcf_nat,
.dump = tcf_nat_dump,
- .cleanup = tcf_nat_cleanup,
.init = tcf_nat_init,
};
@@ -307,16 +294,12 @@ MODULE_LICENSE("GPL");
static int __init nat_init_module(void)
{
- int err = tcf_hashinfo_init(&nat_hash_info, NAT_TAB_MASK);
- if (err)
- return err;
- return tcf_register_action(&act_nat_ops);
+ return tcf_register_action(&act_nat_ops, NAT_TAB_MASK);
}
static void __exit nat_cleanup_module(void)
{
tcf_unregister_action(&act_nat_ops);
- tcf_hashinfo_destroy(&nat_hash_info);
}
module_init(nat_init_module);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index be3f0f6875b..5f9bcb2e080 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -25,8 +25,6 @@
#define PEDIT_TAB_MASK 15
-static struct tcf_hashinfo pedit_hash_info;
-
static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
[TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) },
};
@@ -39,7 +37,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct tc_pedit *parm;
int ret = 0, err;
struct tcf_pedit *p;
- struct tcf_common *pc;
struct tc_pedit_key *keys = NULL;
int ksize;
@@ -57,26 +54,22 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
if (nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm) + ksize)
return -EINVAL;
- pc = tcf_hash_check(parm->index, a, bind);
- if (!pc) {
+ if (!tcf_hash_check(parm->index, a, bind)) {
if (!parm->nkeys)
return -EINVAL;
- pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
- p = to_pedit(pc);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+ if (ret)
+ return ret;
+ p = to_pedit(a);
keys = kmalloc(ksize, GFP_KERNEL);
if (keys == NULL) {
- if (est)
- gen_kill_estimator(&pc->tcfc_bstats,
- &pc->tcfc_rate_est);
- kfree_rcu(pc, tcfc_rcu);
+ tcf_hash_cleanup(a, est);
return -ENOMEM;
}
ret = ACT_P_CREATED;
} else {
- p = to_pedit(pc);
- tcf_hash_release(pc, bind, a->ops->hinfo);
+ p = to_pedit(a);
+ tcf_hash_release(a, bind);
if (bind)
return 0;
if (!ovr)
@@ -100,22 +93,15 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
memcpy(p->tcfp_keys, parm->keys, ksize);
spin_unlock_bh(&p->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(pc, a->ops->hinfo);
+ tcf_hash_insert(a);
return ret;
}
-static int tcf_pedit_cleanup(struct tc_action *a, int bind)
+static void tcf_pedit_cleanup(struct tc_action *a, int bind)
{
struct tcf_pedit *p = a->priv;
-
- if (p) {
- struct tc_pedit_key *keys = p->tcfp_keys;
- if (tcf_hash_release(&p->common, bind, &pedit_hash_info)) {
- kfree(keys);
- return 1;
- }
- }
- return 0;
+ struct tc_pedit_key *keys = p->tcfp_keys;
+ kfree(keys);
}
static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
@@ -230,7 +216,6 @@ nla_put_failure:
static struct tc_action_ops act_pedit_ops = {
.kind = "pedit",
- .hinfo = &pedit_hash_info,
.type = TCA_ACT_PEDIT,
.owner = THIS_MODULE,
.act = tcf_pedit,
@@ -245,15 +230,11 @@ MODULE_LICENSE("GPL");
static int __init pedit_init_module(void)
{
- int err = tcf_hashinfo_init(&pedit_hash_info, PEDIT_TAB_MASK);
- if (err)
- return err;
- return tcf_register_action(&act_pedit_ops);
+ return tcf_register_action(&act_pedit_ops, PEDIT_TAB_MASK);
}
static void __exit pedit_cleanup_module(void)
{
- tcf_hashinfo_destroy(&pedit_hash_info);
tcf_unregister_action(&act_pedit_ops);
}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 1778209a332..0566e4606a4 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -41,7 +41,6 @@ struct tcf_police {
container_of(pc, struct tcf_police, common)
#define POL_TAB_MASK 15
-static struct tcf_hashinfo police_hash_info;
/* old policer structure from before tc actions */
struct tc_police_compat {
@@ -234,7 +233,7 @@ override:
police->tcfp_t_c = ktime_to_ns(ktime_get());
police->tcf_index = parm->index ? parm->index :
- tcf_hash_new_index(a->ops->hinfo);
+ tcf_hash_new_index(hinfo);
h = tcf_hash(police->tcf_index, POL_TAB_MASK);
spin_lock_bh(&hinfo->lock);
hlist_add_head(&police->tcf_head, &hinfo->htab[h]);
@@ -253,14 +252,6 @@ failure:
return err;
}
-static int tcf_act_police_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_police *p = a->priv;
- if (p)
- return tcf_hash_release(&p->common, bind, &police_hash_info);
- return 0;
-}
-
static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -357,12 +348,10 @@ MODULE_LICENSE("GPL");
static struct tc_action_ops act_police_ops = {
.kind = "police",
- .hinfo = &police_hash_info,
.type = TCA_ID_POLICE,
.owner = THIS_MODULE,
.act = tcf_act_police,
.dump = tcf_act_police_dump,
- .cleanup = tcf_act_police_cleanup,
.init = tcf_act_police_locate,
.walk = tcf_act_police_walker
};
@@ -370,19 +359,12 @@ static struct tc_action_ops act_police_ops = {
static int __init
police_init_module(void)
{
- int err = tcf_hashinfo_init(&police_hash_info, POL_TAB_MASK);
- if (err)
- return err;
- err = tcf_register_action(&act_police_ops);
- if (err)
- tcf_hashinfo_destroy(&police_hash_info);
- return err;
+ return tcf_register_action(&act_police_ops, POL_TAB_MASK);
}
static void __exit
police_cleanup_module(void)
{
- tcf_hashinfo_destroy(&police_hash_info);
tcf_unregister_action(&act_police_ops);
}
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 8ef2f1fcbfb..992c2317ce8 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -25,7 +25,6 @@
#include <net/tc_act/tc_defact.h>
#define SIMP_TAB_MASK 7
-static struct tcf_hashinfo simp_hash_info;
#define SIMP_MAX_DATA 32
static int tcf_simp(struct sk_buff *skb, const struct tc_action *a,
@@ -47,20 +46,10 @@ static int tcf_simp(struct sk_buff *skb, const struct tc_action *a,
return d->tcf_action;
}
-static int tcf_simp_release(struct tcf_defact *d, int bind)
+static void tcf_simp_release(struct tc_action *a, int bind)
{
- int ret = 0;
- if (d) {
- if (bind)
- d->tcf_bindcnt--;
- d->tcf_refcnt--;
- if (d->tcf_bindcnt <= 0 && d->tcf_refcnt <= 0) {
- kfree(d->tcfd_defdata);
- tcf_hash_destroy(&d->common, &simp_hash_info);
- ret = 1;
- }
- }
- return ret;
+ struct tcf_defact *d = to_defact(a);
+ kfree(d->tcfd_defdata);
}
static int alloc_defdata(struct tcf_defact *d, char *defdata)
@@ -94,7 +83,6 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
struct nlattr *tb[TCA_DEF_MAX + 1];
struct tc_defact *parm;
struct tcf_defact *d;
- struct tcf_common *pc;
char *defdata;
int ret = 0, err;
@@ -114,29 +102,25 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_DEF_PARMS]);
defdata = nla_data(tb[TCA_DEF_DATA]);
- pc = tcf_hash_check(parm->index, a, bind);
- if (!pc) {
- pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
+ if (!tcf_hash_check(parm->index, a, bind)) {
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind);
+ if (ret)
+ return ret;
- d = to_defact(pc);
+ d = to_defact(a);
ret = alloc_defdata(d, defdata);
if (ret < 0) {
- if (est)
- gen_kill_estimator(&pc->tcfc_bstats,
- &pc->tcfc_rate_est);
- kfree_rcu(pc, tcfc_rcu);
+ tcf_hash_cleanup(a, est);
return ret;
}
d->tcf_action = parm->action;
ret = ACT_P_CREATED;
} else {
- d = to_defact(pc);
+ d = to_defact(a);
if (bind)
return 0;
- tcf_simp_release(d, bind);
+ tcf_hash_release(a, bind);
if (!ovr)
return -EEXIST;
@@ -144,19 +128,10 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
}
if (ret == ACT_P_CREATED)
- tcf_hash_insert(pc, a->ops->hinfo);
+ tcf_hash_insert(a);
return ret;
}
-static int tcf_simp_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_defact *d = a->priv;
-
- if (d)
- return tcf_simp_release(d, bind);
- return 0;
-}
-
static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
@@ -187,12 +162,11 @@ nla_put_failure:
static struct tc_action_ops act_simp_ops = {
.kind = "simple",
- .hinfo = &simp_hash_info,
.type = TCA_ACT_SIMP,
.owner = THIS_MODULE,
.act = tcf_simp,
.dump = tcf_simp_dump,
- .cleanup = tcf_simp_cleanup,
+ .cleanup = tcf_simp_release,
.init = tcf_simp_init,
};
@@ -202,23 +176,15 @@ MODULE_LICENSE("GPL");
static int __init simp_init_module(void)
{
- int err, ret;
- err = tcf_hashinfo_init(&simp_hash_info, SIMP_TAB_MASK);
- if (err)
- return err;
-
- ret = tcf_register_action(&act_simp_ops);
+ int ret;
+ ret = tcf_register_action(&act_simp_ops, SIMP_TAB_MASK);
if (!ret)
pr_info("Simple TC action Loaded\n");
- else
- tcf_hashinfo_destroy(&simp_hash_info);
-
return ret;
}
static void __exit simp_cleanup_module(void)
{
- tcf_hashinfo_destroy(&simp_hash_info);
tcf_unregister_action(&act_simp_ops);
}
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 98725080b5a..fcfeeaf838b 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -28,7 +28,6 @@
#include <net/tc_act/tc_skbedit.h>
#define SKBEDIT_TAB_MASK 15
-static struct tcf_hashinfo skbedit_hash_info;
static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
@@ -65,7 +64,6 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
struct tc_skbedit *parm;
struct tcf_skbedit *d;
- struct tcf_common *pc;
u32 flags = 0, *priority = NULL, *mark = NULL;
u16 *queue_mapping = NULL;
int ret = 0, err;
@@ -100,19 +98,18 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
- pc = tcf_hash_check(parm->index, a, bind);
- if (!pc) {
- pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind);
- if (IS_ERR(pc))
- return PTR_ERR(pc);
+ if (!tcf_hash_check(parm->index, a, bind)) {
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind);
+ if (ret)
+ return ret;
- d = to_skbedit(pc);
+ d = to_skbedit(a);
ret = ACT_P_CREATED;
} else {
- d = to_skbedit(pc);
+ d = to_skbedit(a);
if (bind)
return 0;
- tcf_hash_release(pc, bind, a->ops->hinfo);
+ tcf_hash_release(a, bind);
if (!ovr)
return -EEXIST;
}
@@ -132,19 +129,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
spin_unlock_bh(&d->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(pc, a->ops->hinfo);
+ tcf_hash_insert(a);
return ret;
}
-static int tcf_skbedit_cleanup(struct tc_action *a, int bind)
-{
- struct tcf_skbedit *d = a->priv;
-
- if (d)
- return tcf_hash_release(&d->common, bind, &skbedit_hash_info);
- return 0;
-}
-
static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
@@ -186,12 +174,10 @@ nla_put_failure:
static struct tc_action_ops act_skbedit_ops = {
.kind = "skbedit",
- .hinfo = &skbedit_hash_info,
.type = TCA_ACT_SKBEDIT,
.owner = THIS_MODULE,
.act = tcf_skbedit,
.dump = tcf_skbedit_dump,
- .cleanup = tcf_skbedit_cleanup,
.init = tcf_skbedit_init,
};
@@ -201,15 +187,11 @@ MODULE_LICENSE("GPL");
static int __init skbedit_init_module(void)
{
- int err = tcf_hashinfo_init(&skbedit_hash_info, SKBEDIT_TAB_MASK);
- if (err)
- return err;
- return tcf_register_action(&act_skbedit_ops);
+ return tcf_register_action(&act_skbedit_ops, SKBEDIT_TAB_MASK);
}
static void __exit skbedit_cleanup_module(void)
{
- tcf_hashinfo_destroy(&skbedit_hash_info);
tcf_unregister_action(&act_skbedit_ops);
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 29a30a14c31..45527e6b52d 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -134,7 +134,8 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
int err;
int tp_created = 0;
- if ((n->nlmsg_type != RTM_GETTFILTER) && !capable(CAP_NET_ADMIN))
+ if ((n->nlmsg_type != RTM_GETTFILTER) &&
+ !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
replay:
@@ -317,7 +318,8 @@ replay:
}
}
- err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh);
+ err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
+ n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE);
if (err == 0) {
if (tp_created) {
spin_lock_bh(root_lock);
@@ -504,7 +506,7 @@ void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
EXPORT_SYMBOL(tcf_exts_destroy);
int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
- struct nlattr *rate_tlv, struct tcf_exts *exts)
+ struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr)
{
#ifdef CONFIG_NET_CLS_ACT
{
@@ -513,7 +515,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
INIT_LIST_HEAD(&exts->actions);
if (exts->police && tb[exts->police]) {
act = tcf_action_init_1(net, tb[exts->police], rate_tlv,
- "police", TCA_ACT_NOREPLACE,
+ "police", ovr,
TCA_ACT_BIND);
if (IS_ERR(act))
return PTR_ERR(act);
@@ -523,7 +525,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
} else if (exts->action && tb[exts->action]) {
int err;
err = tcf_action_init(net, tb[exts->action], rate_tlv,
- NULL, TCA_ACT_NOREPLACE,
+ NULL, ovr,
TCA_ACT_BIND, &exts->actions);
if (err)
return err;
@@ -543,14 +545,12 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
struct tcf_exts *src)
{
#ifdef CONFIG_NET_CLS_ACT
- if (!list_empty(&src->actions)) {
- LIST_HEAD(tmp);
- tcf_tree_lock(tp);
- list_splice_init(&dst->actions, &tmp);
- list_splice(&src->actions, &dst->actions);
- tcf_tree_unlock(tp);
- tcf_action_destroy(&tmp, TCA_ACT_UNBIND);
- }
+ LIST_HEAD(tmp);
+ tcf_tree_lock(tp);
+ list_splice_init(&dst->actions, &tmp);
+ list_splice(&src->actions, &dst->actions);
+ tcf_tree_unlock(tp);
+ tcf_action_destroy(&tmp, TCA_ACT_UNBIND);
#endif
}
EXPORT_SYMBOL(tcf_exts_change);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index e98ca99c202..0ae1813e3e9 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -130,14 +130,14 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
static int basic_set_parms(struct net *net, struct tcf_proto *tp,
struct basic_filter *f, unsigned long base,
struct nlattr **tb,
- struct nlattr *est)
+ struct nlattr *est, bool ovr)
{
int err;
struct tcf_exts e;
struct tcf_ematch_tree t;
tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE);
- err = tcf_exts_validate(net, tp, tb, est, &e);
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
if (err < 0)
return err;
@@ -161,7 +161,7 @@ errout:
static int basic_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, unsigned long *arg)
+ struct nlattr **tca, unsigned long *arg, bool ovr)
{
int err;
struct basic_head *head = tp->root;
@@ -179,7 +179,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
if (f != NULL) {
if (handle && f->handle != handle)
return -EINVAL;
- return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]);
+ return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr);
}
err = -ENOBUFS;
@@ -206,7 +206,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
f->handle = head->hgenerator;
}
- err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]);
+ err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr);
if (err < 0)
goto errout;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 8e3cf49118e..13f64df2c71 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -156,11 +156,11 @@ static void cls_bpf_put(struct tcf_proto *tp, unsigned long f)
static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
struct cls_bpf_prog *prog,
unsigned long base, struct nlattr **tb,
- struct nlattr *est)
+ struct nlattr *est, bool ovr)
{
struct sock_filter *bpf_ops, *bpf_old;
struct tcf_exts exts;
- struct sock_fprog tmp;
+ struct sock_fprog_kern tmp;
struct sk_filter *fp, *fp_old;
u16 bpf_size, bpf_len;
u32 classid;
@@ -170,7 +170,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
return -EINVAL;
tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
- ret = tcf_exts_validate(net, tp, tb, est, &exts);
+ ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
if (ret < 0)
return ret;
@@ -191,7 +191,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
tmp.len = bpf_len;
- tmp.filter = (struct sock_filter __user *) bpf_ops;
+ tmp.filter = bpf_ops;
ret = sk_unattached_filter_create(&fp, &tmp);
if (ret)
@@ -242,7 +242,7 @@ static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- unsigned long *arg)
+ unsigned long *arg, bool ovr)
{
struct cls_bpf_head *head = tp->root;
struct cls_bpf_prog *prog = (struct cls_bpf_prog *) *arg;
@@ -260,7 +260,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
if (handle && prog->handle != handle)
return -EINVAL;
return cls_bpf_modify_existing(net, tp, prog, base, tb,
- tca[TCA_RATE]);
+ tca[TCA_RATE], ovr);
}
prog = kzalloc(sizeof(*prog), GFP_KERNEL);
@@ -277,7 +277,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
goto errout;
}
- ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE]);
+ ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], ovr);
if (ret < 0)
goto errout;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 8e2158ab551..cacf01bd04f 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -83,7 +83,7 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- unsigned long *arg)
+ unsigned long *arg, bool ovr)
{
struct nlattr *tb[TCA_CGROUP_MAX + 1];
struct cls_cgroup_head *head = tp->root;
@@ -119,7 +119,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
return err;
tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
if (err < 0)
return err;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 257029c5433..35be16f7c19 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -349,7 +349,7 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
static int flow_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- unsigned long *arg)
+ unsigned long *arg, bool ovr)
{
struct flow_head *head = tp->root;
struct flow_filter *f;
@@ -393,7 +393,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
}
tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE);
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
if (err < 0)
return err;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index a366537f82c..861b03ccfed 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -29,11 +29,11 @@
#include <net/act_api.h>
#include <net/pkt_cls.h>
-#define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *))
+#define HTSIZE 256
struct fw_head {
- struct fw_filter *ht[HTSIZE];
- u32 mask;
+ u32 mask;
+ struct fw_filter *ht[HTSIZE];
};
struct fw_filter {
@@ -46,30 +46,11 @@ struct fw_filter {
struct tcf_exts exts;
};
-static inline int fw_hash(u32 handle)
+static u32 fw_hash(u32 handle)
{
- if (HTSIZE == 4096)
- return ((handle >> 24) & 0xFFF) ^
- ((handle >> 12) & 0xFFF) ^
- (handle & 0xFFF);
- else if (HTSIZE == 2048)
- return ((handle >> 22) & 0x7FF) ^
- ((handle >> 11) & 0x7FF) ^
- (handle & 0x7FF);
- else if (HTSIZE == 1024)
- return ((handle >> 20) & 0x3FF) ^
- ((handle >> 10) & 0x3FF) ^
- (handle & 0x3FF);
- else if (HTSIZE == 512)
- return (handle >> 27) ^
- ((handle >> 18) & 0x1FF) ^
- ((handle >> 9) & 0x1FF) ^
- (handle & 0x1FF);
- else if (HTSIZE == 256) {
- u8 *t = (u8 *) &handle;
- return t[0] ^ t[1] ^ t[2] ^ t[3];
- } else
- return handle & (HTSIZE - 1);
+ handle ^= (handle >> 16);
+ handle ^= (handle >> 8);
+ return handle % HTSIZE;
}
static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -188,7 +169,7 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
static int
fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
- struct nlattr **tb, struct nlattr **tca, unsigned long base)
+ struct nlattr **tb, struct nlattr **tca, unsigned long base, bool ovr)
{
struct fw_head *head = tp->root;
struct tcf_exts e;
@@ -196,7 +177,7 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
int err;
tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE);
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
if (err < 0)
return err;
@@ -237,7 +218,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle,
struct nlattr **tca,
- unsigned long *arg)
+ unsigned long *arg, bool ovr)
{
struct fw_head *head = tp->root;
struct fw_filter *f = (struct fw_filter *) *arg;
@@ -255,7 +236,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
if (f != NULL) {
if (f->id != handle && handle)
return -EINVAL;
- return fw_change_attrs(net, tp, f, tb, tca, base);
+ return fw_change_attrs(net, tp, f, tb, tca, base, ovr);
}
if (!handle)
@@ -283,7 +264,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE);
f->id = handle;
- err = fw_change_attrs(net, tp, f, tb, tca, base);
+ err = fw_change_attrs(net, tp, f, tb, tca, base, ovr);
if (err < 0)
goto errout;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 1ad3068f2ce..dd9fc2523c7 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -333,7 +333,8 @@ static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = {
static int route4_set_parms(struct net *net, struct tcf_proto *tp,
unsigned long base, struct route4_filter *f,
u32 handle, struct route4_head *head,
- struct nlattr **tb, struct nlattr *est, int new)
+ struct nlattr **tb, struct nlattr *est, int new,
+ bool ovr)
{
int err;
u32 id = 0, to = 0, nhandle = 0x8000;
@@ -343,7 +344,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
struct tcf_exts e;
tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
- err = tcf_exts_validate(net, tp, tb, est, &e);
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
if (err < 0)
return err;
@@ -428,7 +429,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle,
struct nlattr **tca,
- unsigned long *arg)
+ unsigned long *arg, bool ovr)
{
struct route4_head *head = tp->root;
struct route4_filter *f, *f1, **fp;
@@ -455,7 +456,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
old_handle = f->handle;
err = route4_set_parms(net, tp, base, f, handle, head, tb,
- tca[TCA_RATE], 0);
+ tca[TCA_RATE], 0, ovr);
if (err < 0)
return err;
@@ -479,7 +480,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
err = route4_set_parms(net, tp, base, f, handle, head, tb,
- tca[TCA_RATE], 1);
+ tca[TCA_RATE], 1, ovr);
if (err < 0)
goto errout;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 19f8e5dfa8b..1020e233a5d 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -415,7 +415,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle,
struct nlattr **tca,
- unsigned long *arg)
+ unsigned long *arg, bool ovr)
{
struct rsvp_head *data = tp->root;
struct rsvp_filter *f, **fp;
@@ -436,7 +436,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
return err;
tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
if (err < 0)
return err;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index eed8404443d..c721cd4a469 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -188,11 +188,17 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
[TCA_TCINDEX_CLASSID] = { .type = NLA_U32 },
};
+static void tcindex_filter_result_init(struct tcindex_filter_result *r)
+{
+ memset(r, 0, sizeof(*r));
+ tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+}
+
static int
tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
u32 handle, struct tcindex_data *p,
struct tcindex_filter_result *r, struct nlattr **tb,
- struct nlattr *est)
+ struct nlattr *est, bool ovr)
{
int err, balloc = 0;
struct tcindex_filter_result new_filter_result, *old_r = r;
@@ -202,20 +208,16 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
struct tcf_exts e;
tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
- err = tcf_exts_validate(net, tp, tb, est, &e);
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
if (err < 0)
return err;
memcpy(&cp, p, sizeof(cp));
- memset(&new_filter_result, 0, sizeof(new_filter_result));
- tcf_exts_init(&new_filter_result.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+ tcindex_filter_result_init(&new_filter_result);
+ tcindex_filter_result_init(&cr);
if (old_r)
- memcpy(&cr, r, sizeof(cr));
- else {
- memset(&cr, 0, sizeof(cr));
- tcf_exts_init(&cr.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
- }
+ cr.res = r->res;
if (tb[TCA_TCINDEX_HASH])
cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
@@ -267,9 +269,14 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
err = -ENOMEM;
if (!cp.perfect && !cp.h) {
if (valid_perfect_hash(&cp)) {
+ int i;
+
cp.perfect = kcalloc(cp.hash, sizeof(*r), GFP_KERNEL);
if (!cp.perfect)
goto errout;
+ for (i = 0; i < cp.hash; i++)
+ tcf_exts_init(&cp.perfect[i].exts, TCA_TCINDEX_ACT,
+ TCA_TCINDEX_POLICE);
balloc = 1;
} else {
cp.h = kcalloc(cp.hash, sizeof(f), GFP_KERNEL);
@@ -295,14 +302,17 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
tcf_bind_filter(tp, &cr.res, base);
}
- tcf_exts_change(tp, &cr.exts, &e);
+ if (old_r)
+ tcf_exts_change(tp, &r->exts, &e);
+ else
+ tcf_exts_change(tp, &cr.exts, &e);
tcf_tree_lock(tp);
if (old_r && old_r != r)
- memset(old_r, 0, sizeof(*old_r));
+ tcindex_filter_result_init(old_r);
memcpy(p, &cp, sizeof(cp));
- memcpy(r, &cr, sizeof(cr));
+ r->res = cr.res;
if (r == &new_filter_result) {
struct tcindex_filter **fp;
@@ -331,7 +341,7 @@ errout:
static int
tcindex_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, unsigned long *arg)
+ struct nlattr **tca, unsigned long *arg, bool ovr)
{
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_TCINDEX_MAX + 1];
@@ -351,7 +361,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
return err;
return tcindex_set_parms(net, tp, base, handle, p, r, tb,
- tca[TCA_RATE]);
+ tca[TCA_RATE], ovr);
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 84c28daff84..70c0be8d012 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -38,6 +38,7 @@
#include <linux/errno.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
+#include <linux/bitmap.h>
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
@@ -460,17 +461,25 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
return 0;
}
+#define NR_U32_NODE (1<<12)
static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
{
struct tc_u_knode *n;
- unsigned int i = 0x7FF;
+ unsigned long i;
+ unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!bitmap)
+ return handle | 0xFFF;
for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
- if (i < TC_U32_NODE(n->handle))
- i = TC_U32_NODE(n->handle);
- i++;
+ set_bit(TC_U32_NODE(n->handle), bitmap);
- return handle | (i > 0xFFF ? 0xFFF : i);
+ i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800);
+ if (i >= NR_U32_NODE)
+ i = find_next_zero_bit(bitmap, NR_U32_NODE, 1);
+
+ kfree(bitmap);
+ return handle | (i >= NR_U32_NODE ? 0xFFF : i);
}
static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
@@ -486,13 +495,13 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
unsigned long base, struct tc_u_hnode *ht,
struct tc_u_knode *n, struct nlattr **tb,
- struct nlattr *est)
+ struct nlattr *est, bool ovr)
{
int err;
struct tcf_exts e;
tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
- err = tcf_exts_validate(net, tp, tb, est, &e);
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
if (err < 0)
return err;
@@ -545,7 +554,7 @@ errout:
static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca,
- unsigned long *arg)
+ unsigned long *arg, bool ovr)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *ht;
@@ -569,7 +578,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
return u32_set_parms(net, tp, base, n->ht_up, n, tb,
- tca[TCA_RATE]);
+ tca[TCA_RATE], ovr);
}
if (tb[TCA_U32_DIVISOR]) {
@@ -656,7 +665,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
}
#endif
- err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE]);
+ err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr);
if (err == 0) {
struct tc_u_knode **ins;
for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index a07d55e7569..58bed7599db 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -563,7 +563,7 @@ out:
}
EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
-void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
+void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
{
if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
@@ -1084,7 +1084,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
struct Qdisc *p = NULL;
int err;
- if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN))
+ if ((n->nlmsg_type != RTM_GETQDISC) &&
+ !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1151,7 +1152,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
struct Qdisc *q, *p;
int err;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
replay:
@@ -1304,6 +1305,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
struct gnet_dump d;
struct qdisc_size_table *stab;
+ cond_resched();
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
if (!nlh)
goto out_nlmsg_trim;
@@ -1435,9 +1437,9 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
s_idx = cb->args[0];
s_q_idx = q_idx = cb->args[1];
- rcu_read_lock();
idx = 0;
- for_each_netdev_rcu(net, dev) {
+ ASSERT_RTNL();
+ for_each_netdev(net, dev) {
struct netdev_queue *dev_queue;
if (idx < s_idx)
@@ -1460,8 +1462,6 @@ cont:
}
done:
- rcu_read_unlock();
-
cb->args[0] = idx;
cb->args[1] = q_idx;
@@ -1491,7 +1491,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
u32 qid;
int err;
- if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN))
+ if ((n->nlmsg_type != RTM_GETTCLASS) &&
+ !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1618,6 +1619,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
struct gnet_dump d;
const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
+ cond_resched();
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
if (!nlh)
goto out_nlmsg_trim;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 1f9c31411f1..8449b337f9e 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -623,8 +623,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
if (nla_put_u32(skb, TCA_ATM_EXCESS, 0))
goto nla_put_failure;
}
- nla_nest_end(skb, nest);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
nla_nest_cancel(skb, nest);
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 2f80d01d42a..ead526467cc 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1563,8 +1563,7 @@ static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
goto nla_put_failure;
if (cbq_dump_attr(skb, &q->link) < 0)
goto nla_put_failure;
- nla_nest_end(skb, nest);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
nla_nest_cancel(skb, nest);
@@ -1599,8 +1598,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
goto nla_put_failure;
if (cbq_dump_attr(skb, cl) < 0)
goto nla_put_failure;
- nla_nest_end(skb, nest);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
nla_nest_cancel(skb, nest);
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 2aee02802c2..ed30e436128 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -391,12 +391,7 @@ static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
static void choke_free(void *addr)
{
- if (addr) {
- if (is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
- }
+ kvfree(addr);
}
static int choke_change(struct Qdisc *sch, struct nlattr *opt)
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 8302717ea30..7bbbfe11219 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -391,8 +391,10 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch)
while (1) {
cl = list_first_entry(&q->active, struct drr_class, alist);
skb = cl->qdisc->ops->peek(cl->qdisc);
- if (skb == NULL)
+ if (skb == NULL) {
+ qdisc_warn_nonwc(__func__, cl->qdisc);
goto out;
+ }
len = qdisc_pkt_len(skb);
if (len <= cl->deficit) {
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 21e251766eb..ba32c2b005d 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -591,10 +591,7 @@ static void *fq_alloc_node(size_t sz, int node)
static void fq_free(void *addr)
{
- if (addr && is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
+ kvfree(addr);
}
static int fq_resize(struct Qdisc *sch, u32 log)
@@ -781,8 +778,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
goto nla_put_failure;
- nla_nest_end(skb, opts);
- return skb->len;
+ return nla_nest_end(skb, opts);
nla_put_failure:
return -1;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index ba5bc929eac..063b726bf1f 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -365,12 +365,7 @@ static void *fq_codel_zalloc(size_t sz)
static void fq_codel_free(void *addr)
{
- if (addr) {
- if (is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
- }
+ kvfree(addr);
}
static void fq_codel_destroy(struct Qdisc *sch)
@@ -450,8 +445,7 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
q->flows_cnt))
goto nla_put_failure;
- nla_nest_end(skb, opts);
- return skb->len;
+ return nla_nest_end(skb, opts);
nla_put_failure:
return -1;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e82e43b69c3..e1543b03e39 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -310,6 +310,7 @@ void netif_carrier_on(struct net_device *dev)
if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
if (dev->reg_state == NETREG_UNINITIALIZED)
return;
+ atomic_inc(&dev->carrier_changes);
linkwatch_fire_event(dev);
if (netif_running(dev))
__netdev_watchdog_up(dev);
@@ -328,6 +329,7 @@ void netif_carrier_off(struct net_device *dev)
if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
if (dev->reg_state == NETREG_UNINITIALIZED)
return;
+ atomic_inc(&dev->carrier_changes);
linkwatch_fire_event(dev);
}
}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index c4075610502..ec8aeaac1dd 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1353,8 +1353,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
goto nla_put_failure;
if (hfsc_dump_curves(skb, cl) < 0)
goto nla_put_failure;
- nla_nest_end(skb, nest);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
nla_nest_cancel(skb, nest);
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 647680b1c62..d85b6812a7d 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -414,7 +414,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
bucket->deficit = weight * q->quantum;
}
- if (++sch->q.qlen < sch->limit)
+ if (++sch->q.qlen <= sch->limit)
return NET_XMIT_SUCCESS;
q->drop_overlimit++;
@@ -494,12 +494,7 @@ static void *hhf_zalloc(size_t sz)
static void hhf_free(void *addr)
{
- if (addr) {
- if (is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
- }
+ kvfree(addr);
}
static void hhf_destroy(struct Qdisc *sch)
@@ -553,11 +548,6 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
if (err < 0)
return err;
- sch_tree_lock(sch);
-
- if (tb[TCA_HHF_BACKLOG_LIMIT])
- sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]);
-
if (tb[TCA_HHF_QUANTUM])
new_quantum = nla_get_u32(tb[TCA_HHF_QUANTUM]);
@@ -567,6 +557,12 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
non_hh_quantum = (u64)new_quantum * new_hhf_non_hh_weight;
if (non_hh_quantum > INT_MAX)
return -EINVAL;
+
+ sch_tree_lock(sch);
+
+ if (tb[TCA_HHF_BACKLOG_LIMIT])
+ sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]);
+
q->quantum = new_quantum;
q->hhf_non_hh_weight = new_hhf_non_hh_weight;
@@ -691,8 +687,7 @@ static int hhf_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT, q->hhf_non_hh_weight))
goto nla_put_failure;
- nla_nest_end(skb, opts);
- return skb->len;
+ return nla_nest_end(skb, opts);
nla_put_failure:
return -1;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 722e137df24..9f949abcace 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1062,12 +1062,13 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
{
- spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
struct htb_sched *q = qdisc_priv(sch);
struct nlattr *nest;
struct tc_htb_glob gopt;
- spin_lock_bh(root_lock);
+ /* Its safe to not acquire qdisc lock. As we hold RTNL,
+ * no change can happen on the qdisc parameters.
+ */
gopt.direct_pkts = q->direct_pkts;
gopt.version = HTB_VER;
@@ -1081,13 +1082,10 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
goto nla_put_failure;
- nla_nest_end(skb, nest);
- spin_unlock_bh(root_lock);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
- spin_unlock_bh(root_lock);
nla_nest_cancel(skb, nest);
return -1;
}
@@ -1096,11 +1094,12 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
struct sk_buff *skb, struct tcmsg *tcm)
{
struct htb_class *cl = (struct htb_class *)arg;
- spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
struct nlattr *nest;
struct tc_htb_opt opt;
- spin_lock_bh(root_lock);
+ /* Its safe to not acquire qdisc lock. As we hold RTNL,
+ * no change can happen on the class parameters.
+ */
tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
tcm->tcm_handle = cl->common.classid;
if (!cl->level && cl->un.leaf.q)
@@ -1128,12 +1127,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
nla_put_u64(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps))
goto nla_put_failure;
- nla_nest_end(skb, nest);
- spin_unlock_bh(root_lock);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
- spin_unlock_bh(root_lock);
nla_nest_cancel(skb, nest);
return -1;
}
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index bce1665239b..62871c14e1f 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -100,8 +100,7 @@ static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
- nla_nest_end(skb, nest);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
nla_nest_cancel(skb, nest);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index de1059af6da..111d70fddae 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -117,6 +117,11 @@ struct netem_sched_data {
LOST_IN_BURST_PERIOD,
} _4_state_model;
+ enum {
+ GOOD_STATE = 1,
+ BAD_STATE,
+ } GE_state_model;
+
/* Correlated Loss Generation models */
struct clgstate {
/* state of the Markov chain */
@@ -272,15 +277,15 @@ static bool loss_gilb_ell(struct netem_sched_data *q)
struct clgstate *clg = &q->clg;
switch (clg->state) {
- case 1:
+ case GOOD_STATE:
if (prandom_u32() < clg->a1)
- clg->state = 2;
+ clg->state = BAD_STATE;
if (prandom_u32() < clg->a4)
return true;
break;
- case 2:
+ case BAD_STATE:
if (prandom_u32() < clg->a2)
- clg->state = 1;
+ clg->state = GOOD_STATE;
if (prandom_u32() > clg->a3)
return true;
}
@@ -643,12 +648,7 @@ static void netem_reset(struct Qdisc *sch)
static void dist_free(struct disttable *d)
{
- if (d) {
- if (is_vmalloc_addr(d))
- vfree(d);
- else
- kfree(d);
- }
+ kvfree(d);
}
/*
@@ -689,9 +689,8 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
return 0;
}
-static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
+static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
{
- struct netem_sched_data *q = qdisc_priv(sch);
const struct tc_netem_corr *c = nla_data(attr);
init_crandom(&q->delay_cor, c->delay_corr);
@@ -699,27 +698,24 @@ static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
init_crandom(&q->dup_cor, c->dup_corr);
}
-static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
+static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr)
{
- struct netem_sched_data *q = qdisc_priv(sch);
const struct tc_netem_reorder *r = nla_data(attr);
q->reorder = r->probability;
init_crandom(&q->reorder_cor, r->correlation);
}
-static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
+static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr)
{
- struct netem_sched_data *q = qdisc_priv(sch);
const struct tc_netem_corrupt *r = nla_data(attr);
q->corrupt = r->probability;
init_crandom(&q->corrupt_cor, r->correlation);
}
-static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
+static void get_rate(struct netem_sched_data *q, const struct nlattr *attr)
{
- struct netem_sched_data *q = qdisc_priv(sch);
const struct tc_netem_rate *r = nla_data(attr);
q->rate = r->rate;
@@ -732,9 +728,8 @@ static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
q->cell_size_reciprocal = (struct reciprocal_value) { 0 };
}
-static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
+static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr)
{
- struct netem_sched_data *q = qdisc_priv(sch);
const struct nlattr *la;
int rem;
@@ -752,7 +747,7 @@ static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
q->loss_model = CLG_4_STATES;
- q->clg.state = 1;
+ q->clg.state = TX_IN_GAP_PERIOD;
q->clg.a1 = gi->p13;
q->clg.a2 = gi->p31;
q->clg.a3 = gi->p32;
@@ -770,7 +765,7 @@ static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
}
q->loss_model = CLG_GILB_ELL;
- q->clg.state = 1;
+ q->clg.state = GOOD_STATE;
q->clg.a1 = ge->p;
q->clg.a2 = ge->r;
q->clg.a3 = ge->h;
@@ -821,6 +816,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
struct netem_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_NETEM_MAX + 1];
struct tc_netem_qopt *qopt;
+ struct clgstate old_clg;
+ int old_loss_model = CLG_RANDOM;
int ret;
if (opt == NULL)
@@ -831,6 +828,33 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
if (ret < 0)
return ret;
+ /* backup q->clg and q->loss_model */
+ old_clg = q->clg;
+ old_loss_model = q->loss_model;
+
+ if (tb[TCA_NETEM_LOSS]) {
+ ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
+ if (ret) {
+ q->loss_model = old_loss_model;
+ return ret;
+ }
+ } else {
+ q->loss_model = CLG_RANDOM;
+ }
+
+ if (tb[TCA_NETEM_DELAY_DIST]) {
+ ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
+ if (ret) {
+ /* recover clg and loss_model, in case of
+ * q->clg and q->loss_model were modified
+ * in get_loss_clg()
+ */
+ q->clg = old_clg;
+ q->loss_model = old_loss_model;
+ return ret;
+ }
+ }
+
sch->limit = qopt->limit;
q->latency = qopt->latency;
@@ -848,22 +872,16 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
q->reorder = ~0;
if (tb[TCA_NETEM_CORR])
- get_correlation(sch, tb[TCA_NETEM_CORR]);
-
- if (tb[TCA_NETEM_DELAY_DIST]) {
- ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
- if (ret)
- return ret;
- }
+ get_correlation(q, tb[TCA_NETEM_CORR]);
if (tb[TCA_NETEM_REORDER])
- get_reorder(sch, tb[TCA_NETEM_REORDER]);
+ get_reorder(q, tb[TCA_NETEM_REORDER]);
if (tb[TCA_NETEM_CORRUPT])
- get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
+ get_corrupt(q, tb[TCA_NETEM_CORRUPT]);
if (tb[TCA_NETEM_RATE])
- get_rate(sch, tb[TCA_NETEM_RATE]);
+ get_rate(q, tb[TCA_NETEM_RATE]);
if (tb[TCA_NETEM_RATE64])
q->rate = max_t(u64, q->rate,
@@ -872,10 +890,6 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_NETEM_ECN])
q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
- q->loss_model = CLG_RANDOM;
- if (tb[TCA_NETEM_LOSS])
- ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
-
return ret;
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 87317ff0b4e..1af2f73906d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -716,12 +716,7 @@ static void *sfq_alloc(size_t sz)
static void sfq_free(void *addr)
{
- if (addr) {
- if (is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
- }
+ kvfree(addr);
}
static void sfq_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 4f505a00689..18ff6343370 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -101,12 +101,11 @@
struct tbf_sched_data {
/* Parameters */
u32 limit; /* Maximal length of backlog: bytes */
+ u32 max_size;
s64 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
s64 mtu;
- u32 max_size;
struct psched_ratecfg rate;
struct psched_ratecfg peak;
- bool peak_present;
/* Variables */
s64 tokens; /* Current number of B tokens */
@@ -222,6 +221,11 @@ static unsigned int tbf_drop(struct Qdisc *sch)
return len;
}
+static bool tbf_peak_present(const struct tbf_sched_data *q)
+{
+ return q->peak.rate_bytes_ps;
+}
+
static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
{
struct tbf_sched_data *q = qdisc_priv(sch);
@@ -238,7 +242,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
now = ktime_to_ns(ktime_get());
toks = min_t(s64, now - q->t_c, q->buffer);
- if (q->peak_present) {
+ if (tbf_peak_present(q)) {
ptoks = toks + q->ptokens;
if (ptoks > q->mtu)
ptoks = q->mtu;
@@ -366,6 +370,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
} else {
max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
}
+ } else {
+ memset(&peak, 0, sizeof(peak));
}
if (max_size < psched_mtu(qdisc_dev(sch)))
@@ -410,12 +416,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
q->ptokens = q->mtu;
memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
- if (qopt->peakrate.rate) {
- memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
- q->peak_present = true;
- } else {
- q->peak_present = false;
- }
+ memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
sch_tree_unlock(sch);
err = 0;
@@ -458,7 +459,7 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.limit = q->limit;
psched_ratecfg_getrate(&opt.rate, &q->rate);
- if (q->peak_present)
+ if (tbf_peak_present(q))
psched_ratecfg_getrate(&opt.peakrate, &q->peak);
else
memset(&opt.peakrate, 0, sizeof(opt.peakrate));
@@ -469,13 +470,12 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
if (q->rate.rate_bytes_ps >= (1ULL << 32) &&
nla_put_u64(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps))
goto nla_put_failure;
- if (q->peak_present &&
+ if (tbf_peak_present(q) &&
q->peak.rate_bytes_ps >= (1ULL << 32) &&
nla_put_u64(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps))
goto nla_put_failure;
- nla_nest_end(skb, nest);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
nla_nest_cancel(skb, nest);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index ee13d28d39d..06a9ee6b2d3 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -55,6 +55,7 @@
#include <net/sctp/sm.h>
/* Forward declarations for internal functions. */
+static void sctp_select_active_and_retran_path(struct sctp_association *asoc);
static void sctp_assoc_bh_rcv(struct work_struct *work);
static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc);
static void sctp_assoc_free_asconf_queue(struct sctp_association *asoc);
@@ -330,7 +331,7 @@ void sctp_association_free(struct sctp_association *asoc)
/* Only real associations count against the endpoint, so
* don't bother for if this is a temporary association.
*/
- if (!asoc->temp) {
+ if (!list_empty(&asoc->asocs)) {
list_del(&asoc->asocs);
/* Decrement the backlog value for a TCP-style listening
@@ -774,9 +775,6 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
sctp_transport_cmd_t command,
sctp_sn_error_t error)
{
- struct sctp_transport *t = NULL;
- struct sctp_transport *first;
- struct sctp_transport *second;
struct sctp_ulpevent *event;
struct sockaddr_storage addr;
int spc_state = 0;
@@ -829,13 +827,14 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
return;
}
- /* Generate and send a SCTP_PEER_ADDR_CHANGE notification to the
- * user.
+ /* Generate and send a SCTP_PEER_ADDR_CHANGE notification
+ * to the user.
*/
if (ulp_notify) {
memset(&addr, 0, sizeof(struct sockaddr_storage));
memcpy(&addr, &transport->ipaddr,
transport->af_specific->sockaddr_len);
+
event = sctp_ulpevent_make_peer_addr_change(asoc, &addr,
0, spc_state, error, GFP_ATOMIC);
if (event)
@@ -843,60 +842,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
}
/* Select new active and retran paths. */
-
- /* Look for the two most recently used active transports.
- *
- * This code produces the wrong ordering whenever jiffies
- * rolls over, but we still get usable transports, so we don't
- * worry about it.
- */
- first = NULL; second = NULL;
-
- list_for_each_entry(t, &asoc->peer.transport_addr_list,
- transports) {
-
- if ((t->state == SCTP_INACTIVE) ||
- (t->state == SCTP_UNCONFIRMED) ||
- (t->state == SCTP_PF))
- continue;
- if (!first || t->last_time_heard > first->last_time_heard) {
- second = first;
- first = t;
- } else if (!second ||
- t->last_time_heard > second->last_time_heard)
- second = t;
- }
-
- /* RFC 2960 6.4 Multi-Homed SCTP Endpoints
- *
- * By default, an endpoint should always transmit to the
- * primary path, unless the SCTP user explicitly specifies the
- * destination transport address (and possibly source
- * transport address) to use.
- *
- * [If the primary is active but not most recent, bump the most
- * recently used transport.]
- */
- if (((asoc->peer.primary_path->state == SCTP_ACTIVE) ||
- (asoc->peer.primary_path->state == SCTP_UNKNOWN)) &&
- first != asoc->peer.primary_path) {
- second = first;
- first = asoc->peer.primary_path;
- }
-
- if (!second)
- second = first;
- /* If we failed to find a usable transport, just camp on the
- * primary, even if it is inactive.
- */
- if (!first) {
- first = asoc->peer.primary_path;
- second = asoc->peer.primary_path;
- }
-
- /* Set the active and retran transports. */
- asoc->peer.active_path = first;
- asoc->peer.retran_path = second;
+ sctp_select_active_and_retran_path(asoc);
}
/* Hold a reference to an association. */
@@ -1090,7 +1036,7 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
}
if (chunk->transport)
- chunk->transport->last_time_heard = jiffies;
+ chunk->transport->last_time_heard = ktime_get();
/* Run through the state machine. */
error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype,
@@ -1151,6 +1097,7 @@ void sctp_assoc_update(struct sctp_association *asoc,
asoc->c = new->c;
asoc->peer.rwnd = new->peer.rwnd;
asoc->peer.sack_needed = new->peer.sack_needed;
+ asoc->peer.auth_capable = new->peer.auth_capable;
asoc->peer.i = new->peer.i;
sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL,
asoc->peer.i.initial_tsn, GFP_ATOMIC);
@@ -1278,13 +1225,41 @@ static u8 sctp_trans_score(const struct sctp_transport *trans)
return sctp_trans_state_to_prio_map[trans->state];
}
+static struct sctp_transport *sctp_trans_elect_tie(struct sctp_transport *trans1,
+ struct sctp_transport *trans2)
+{
+ if (trans1->error_count > trans2->error_count) {
+ return trans2;
+ } else if (trans1->error_count == trans2->error_count &&
+ ktime_after(trans2->last_time_heard,
+ trans1->last_time_heard)) {
+ return trans2;
+ } else {
+ return trans1;
+ }
+}
+
static struct sctp_transport *sctp_trans_elect_best(struct sctp_transport *curr,
struct sctp_transport *best)
{
+ u8 score_curr, score_best;
+
if (best == NULL)
return curr;
- return sctp_trans_score(curr) > sctp_trans_score(best) ? curr : best;
+ score_curr = sctp_trans_score(curr);
+ score_best = sctp_trans_score(best);
+
+ /* First, try a score-based selection if both transport states
+ * differ. If we're in a tie, lets try to make a more clever
+ * decision here based on error counts and last time heard.
+ */
+ if (score_curr > score_best)
+ return curr;
+ else if (score_curr == score_best)
+ return sctp_trans_elect_tie(curr, best);
+ else
+ return best;
}
void sctp_assoc_update_retran_path(struct sctp_association *asoc)
@@ -1319,13 +1294,82 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
break;
}
- if (trans_next != NULL)
- asoc->peer.retran_path = trans_next;
+ asoc->peer.retran_path = trans_next;
pr_debug("%s: association:%p updated new path to addr:%pISpc\n",
__func__, asoc, &asoc->peer.retran_path->ipaddr.sa);
}
+static void sctp_select_active_and_retran_path(struct sctp_association *asoc)
+{
+ struct sctp_transport *trans, *trans_pri = NULL, *trans_sec = NULL;
+ struct sctp_transport *trans_pf = NULL;
+
+ /* Look for the two most recently used active transports. */
+ list_for_each_entry(trans, &asoc->peer.transport_addr_list,
+ transports) {
+ /* Skip uninteresting transports. */
+ if (trans->state == SCTP_INACTIVE ||
+ trans->state == SCTP_UNCONFIRMED)
+ continue;
+ /* Keep track of the best PF transport from our
+ * list in case we don't find an active one.
+ */
+ if (trans->state == SCTP_PF) {
+ trans_pf = sctp_trans_elect_best(trans, trans_pf);
+ continue;
+ }
+ /* For active transports, pick the most recent ones. */
+ if (trans_pri == NULL ||
+ ktime_after(trans->last_time_heard,
+ trans_pri->last_time_heard)) {
+ trans_sec = trans_pri;
+ trans_pri = trans;
+ } else if (trans_sec == NULL ||
+ ktime_after(trans->last_time_heard,
+ trans_sec->last_time_heard)) {
+ trans_sec = trans;
+ }
+ }
+
+ /* RFC 2960 6.4 Multi-Homed SCTP Endpoints
+ *
+ * By default, an endpoint should always transmit to the primary
+ * path, unless the SCTP user explicitly specifies the
+ * destination transport address (and possibly source transport
+ * address) to use. [If the primary is active but not most recent,
+ * bump the most recently used transport.]
+ */
+ if ((asoc->peer.primary_path->state == SCTP_ACTIVE ||
+ asoc->peer.primary_path->state == SCTP_UNKNOWN) &&
+ asoc->peer.primary_path != trans_pri) {
+ trans_sec = trans_pri;
+ trans_pri = asoc->peer.primary_path;
+ }
+
+ /* We did not find anything useful for a possible retransmission
+ * path; either primary path that we found is the the same as
+ * the current one, or we didn't generally find an active one.
+ */
+ if (trans_sec == NULL)
+ trans_sec = trans_pri;
+
+ /* If we failed to find a usable transport, just camp on the
+ * primary or retran, even if they are inactive, if possible
+ * pick a PF iff it's the better choice.
+ */
+ if (trans_pri == NULL) {
+ trans_pri = sctp_trans_elect_best(asoc->peer.primary_path,
+ asoc->peer.retran_path);
+ trans_pri = sctp_trans_elect_best(trans_pri, trans_pf);
+ trans_sec = asoc->peer.primary_path;
+ }
+
+ /* Set the active and retran transports. */
+ asoc->peer.active_path = trans_pri;
+ asoc->peer.retran_path = trans_sec;
+}
+
struct sctp_transport *
sctp_assoc_choose_alter_transport(struct sctp_association *asoc,
struct sctp_transport *last_sent_to)
@@ -1396,35 +1440,44 @@ static inline bool sctp_peer_needs_update(struct sctp_association *asoc)
return false;
}
-/* Update asoc's rwnd for the approximated state in the buffer,
- * and check whether SACK needs to be sent.
- */
-void sctp_assoc_rwnd_update(struct sctp_association *asoc, bool update_peer)
+/* Increase asoc's rwnd by len and send any window update SACK if needed. */
+void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len)
{
- int rx_count;
struct sctp_chunk *sack;
struct timer_list *timer;
- if (asoc->ep->rcvbuf_policy)
- rx_count = atomic_read(&asoc->rmem_alloc);
- else
- rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
+ if (asoc->rwnd_over) {
+ if (asoc->rwnd_over >= len) {
+ asoc->rwnd_over -= len;
+ } else {
+ asoc->rwnd += (len - asoc->rwnd_over);
+ asoc->rwnd_over = 0;
+ }
+ } else {
+ asoc->rwnd += len;
+ }
- if ((asoc->base.sk->sk_rcvbuf - rx_count) > 0)
- asoc->rwnd = (asoc->base.sk->sk_rcvbuf - rx_count) >> 1;
- else
- asoc->rwnd = 0;
+ /* If we had window pressure, start recovering it
+ * once our rwnd had reached the accumulated pressure
+ * threshold. The idea is to recover slowly, but up
+ * to the initial advertised window.
+ */
+ if (asoc->rwnd_press && asoc->rwnd >= asoc->rwnd_press) {
+ int change = min(asoc->pathmtu, asoc->rwnd_press);
+ asoc->rwnd += change;
+ asoc->rwnd_press -= change;
+ }
- pr_debug("%s: asoc:%p rwnd=%u, rx_count=%d, sk_rcvbuf=%d\n",
- __func__, asoc, asoc->rwnd, rx_count,
- asoc->base.sk->sk_rcvbuf);
+ pr_debug("%s: asoc:%p rwnd increased by %d to (%u, %u) - %u\n",
+ __func__, asoc, len, asoc->rwnd, asoc->rwnd_over,
+ asoc->a_rwnd);
/* Send a window update SACK if the rwnd has increased by at least the
* minimum of the association's PMTU and half of the receive buffer.
* The algorithm used is similar to the one described in
* Section 4.2.3.3 of RFC 1122.
*/
- if (update_peer && sctp_peer_needs_update(asoc)) {
+ if (sctp_peer_needs_update(asoc)) {
asoc->a_rwnd = asoc->rwnd;
pr_debug("%s: sending window update SACK- asoc:%p rwnd:%u "
@@ -1446,6 +1499,45 @@ void sctp_assoc_rwnd_update(struct sctp_association *asoc, bool update_peer)
}
}
+/* Decrease asoc's rwnd by len. */
+void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
+{
+ int rx_count;
+ int over = 0;
+
+ if (unlikely(!asoc->rwnd || asoc->rwnd_over))
+ pr_debug("%s: association:%p has asoc->rwnd:%u, "
+ "asoc->rwnd_over:%u!\n", __func__, asoc,
+ asoc->rwnd, asoc->rwnd_over);
+
+ if (asoc->ep->rcvbuf_policy)
+ rx_count = atomic_read(&asoc->rmem_alloc);
+ else
+ rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
+
+ /* If we've reached or overflowed our receive buffer, announce
+ * a 0 rwnd if rwnd would still be positive. Store the
+ * the potential pressure overflow so that the window can be restored
+ * back to original value.
+ */
+ if (rx_count >= asoc->base.sk->sk_rcvbuf)
+ over = 1;
+
+ if (asoc->rwnd >= len) {
+ asoc->rwnd -= len;
+ if (over) {
+ asoc->rwnd_press += asoc->rwnd;
+ asoc->rwnd = 0;
+ }
+ } else {
+ asoc->rwnd_over = len - asoc->rwnd;
+ asoc->rwnd = 0;
+ }
+
+ pr_debug("%s: asoc:%p rwnd decreased by %d to (%u, %u, %u)\n",
+ __func__, asoc, len, asoc->rwnd, asoc->rwnd_over,
+ asoc->rwnd_press);
+}
/* Build the bind address list for the association based on info from the
* local endpoint and the remote peer.
@@ -1500,7 +1592,7 @@ int sctp_assoc_lookup_laddr(struct sctp_association *asoc,
/* Set an association id for a given association */
int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp)
{
- bool preload = gfp & __GFP_WAIT;
+ bool preload = !!(gfp & __GFP_WAIT);
int ret;
/* If the id is already assigned, keep it. */
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 683c7d1b130..0e8529113dc 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -386,14 +386,13 @@ nomem:
*/
int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp)
{
- struct net *net = sock_net(asoc->base.sk);
struct sctp_auth_bytes *secret;
struct sctp_shared_key *ep_key;
/* If we don't support AUTH, or peer is not capable
* we don't need to do anything.
*/
- if (!net->sctp.auth_enable || !asoc->peer.auth_capable)
+ if (!asoc->ep->auth_enable || !asoc->peer.auth_capable)
return 0;
/* If the key_id is non-zero and we couldn't find an
@@ -440,16 +439,16 @@ struct sctp_shared_key *sctp_auth_get_shkey(
*/
int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
{
- struct net *net = sock_net(ep->base.sk);
struct crypto_hash *tfm = NULL;
__u16 id;
- /* if the transforms are already allocted, we are done */
- if (!net->sctp.auth_enable) {
+ /* If AUTH extension is disabled, we are done */
+ if (!ep->auth_enable) {
ep->auth_hmacs = NULL;
return 0;
}
+ /* If the transforms are already allocated, we are done */
if (ep->auth_hmacs)
return 0;
@@ -665,12 +664,10 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param)
/* Check if peer requested that this chunk is authenticated */
int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
{
- struct net *net;
if (!asoc)
return 0;
- net = sock_net(asoc->base.sk);
- if (!net->sctp.auth_enable || !asoc->peer.auth_capable)
+ if (!asoc->ep->auth_enable || !asoc->peer.auth_capable)
return 0;
return __sctp_auth_cid(chunk, asoc->peer.peer_chunks);
@@ -679,12 +676,10 @@ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
/* Check if we requested that peer authenticate this chunk. */
int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
{
- struct net *net;
if (!asoc)
return 0;
- net = sock_net(asoc->base.sk);
- if (!net->sctp.auth_enable)
+ if (!asoc->ep->auth_enable)
return 0;
return __sctp_auth_cid(chunk,
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 8e5fdea0521..9da76ba4d10 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -68,7 +68,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
if (!ep->digest)
return NULL;
- if (net->sctp.auth_enable) {
+ ep->auth_enable = net->sctp.auth_enable;
+ if (ep->auth_enable) {
/* Allocate space for HMACS and CHUNKS authentication
* variables. There are arrays that we encode directly
* into parameters to make the rest of the operations easier.
@@ -480,7 +481,7 @@ normal:
}
if (chunk->transport)
- chunk->transport->last_time_heard = jiffies;
+ chunk->transport->last_time_heard = ktime_get();
error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype, state,
ep, asoc, chunk, GFP_ATOMIC);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 2b1738ef939..1999592ba88 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -216,7 +216,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
IP6_ECN_flow_xmit(sk, fl6->flowlabel);
if (!(transport->param_flags & SPP_PMTUD_ENABLE))
- skb->local_df = 1;
+ skb->ignore_df = 1;
SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
@@ -943,7 +943,6 @@ static struct inet_protosw sctpv6_seqpacket_protosw = {
.protocol = IPPROTO_SCTP,
.prot = &sctpv6_prot,
.ops = &inet6_seqpacket_ops,
- .no_check = 0,
.flags = SCTP_PROTOSW_FLAG
};
static struct inet_protosw sctpv6_stream_protosw = {
@@ -951,7 +950,6 @@ static struct inet_protosw sctpv6_stream_protosw = {
.protocol = IPPROTO_SCTP,
.prot = &sctpv6_prot,
.ops = &inet6_seqpacket_ops,
- .no_check = 0,
.flags = SCTP_PROTOSW_FLAG,
};
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 0f4d15fc262..01ab8e0723f 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -591,7 +591,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len);
- nskb->local_df = packet->ipfragok;
+ nskb->ignore_df = packet->ipfragok;
tp->af_specific->sctp_xmit(nskb, tp);
out:
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 0947f1e15eb..34229ee7f37 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -78,7 +78,7 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v)
for (i = 0; sctp_snmp_list[i].name != NULL; i++)
seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name,
- snmp_fold_field((void __percpu **)net->sctp.sctp_statistics,
+ snmp_fold_field(net->sctp.sctp_statistics,
sctp_snmp_list[i].entry));
return 0;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 4e1d0fcb028..6789d785e69 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -491,8 +491,13 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
continue;
if ((laddr->state == SCTP_ADDR_SRC) &&
(AF_INET == laddr->a.sa.sa_family)) {
- fl4->saddr = laddr->a.v4.sin_addr.s_addr;
fl4->fl4_sport = laddr->a.v4.sin_port;
+ flowi4_update_output(fl4,
+ asoc->base.sk->sk_bound_dev_if,
+ RT_CONN_FLAGS(asoc->base.sk),
+ daddr->v4.sin_addr.s_addr,
+ laddr->a.v4.sin_addr.s_addr);
+
rt = ip_route_output_key(sock_net(sk), fl4);
if (!IS_ERR(rt)) {
dst = &rt->dst;
@@ -957,7 +962,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb,
SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS);
- return ip_queue_xmit(skb, &transport->fl);
+ return ip_queue_xmit(&inet->sk, skb, &transport->fl);
}
static struct sctp_af sctp_af_inet;
@@ -1012,7 +1017,6 @@ static struct inet_protosw sctp_seqpacket_protosw = {
.protocol = IPPROTO_SCTP,
.prot = &sctp_prot,
.ops = &inet_seqpacket_ops,
- .no_check = 0,
.flags = SCTP_PROTOSW_FLAG
};
static struct inet_protosw sctp_stream_protosw = {
@@ -1020,7 +1024,6 @@ static struct inet_protosw sctp_stream_protosw = {
.protocol = IPPROTO_SCTP,
.prot = &sctp_prot,
.ops = &inet_seqpacket_ops,
- .no_check = 0,
.flags = SCTP_PROTOSW_FLAG
};
@@ -1100,14 +1103,15 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family)
static inline int init_sctp_mibs(struct net *net)
{
- return snmp_mib_init((void __percpu **)net->sctp.sctp_statistics,
- sizeof(struct sctp_mib),
- __alignof__(struct sctp_mib));
+ net->sctp.sctp_statistics = alloc_percpu(struct sctp_mib);
+ if (!net->sctp.sctp_statistics)
+ return -ENOMEM;
+ return 0;
}
static inline void cleanup_sctp_mibs(struct net *net)
{
- snmp_mib_free((void __percpu **)net->sctp.sctp_statistics);
+ free_percpu(net->sctp.sctp_statistics);
}
static void sctp_v4_pf_init(void)
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 3a1767ef320..ae0e616a7ca 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -219,6 +219,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
gfp_t gfp, int vparam_len)
{
struct net *net = sock_net(asoc->base.sk);
+ struct sctp_endpoint *ep = asoc->ep;
sctp_inithdr_t init;
union sctp_params addrs;
size_t chunksize;
@@ -278,7 +279,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
chunksize += vparam_len;
/* Account for AUTH related parameters */
- if (net->sctp.auth_enable) {
+ if (ep->auth_enable) {
/* Add random parameter length*/
chunksize += sizeof(asoc->c.auth_random);
@@ -363,7 +364,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
}
/* Add SCTP-AUTH chunks to the parameter list */
- if (net->sctp.auth_enable) {
+ if (ep->auth_enable) {
sctp_addto_chunk(retval, sizeof(asoc->c.auth_random),
asoc->c.auth_random);
if (auth_hmacs)
@@ -1781,7 +1782,7 @@ no_hmac:
else
kt = ktime_get();
- if (!asoc && ktime_compare(bear_cookie->expiration, kt) < 0) {
+ if (!asoc && ktime_before(bear_cookie->expiration, kt)) {
/*
* Section 3.3.10.3 Stale Cookie Error (3)
*
@@ -2010,7 +2011,7 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
/* if the peer reports AUTH, assume that he
* supports AUTH.
*/
- if (net->sctp.auth_enable)
+ if (asoc->ep->auth_enable)
asoc->peer.auth_capable = 1;
break;
case SCTP_CID_ASCONF:
@@ -2102,6 +2103,7 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
* SCTP_IERROR_NO_ERROR - continue with the chunk
*/
static sctp_ierror_t sctp_verify_param(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
union sctp_params param,
sctp_cid_t cid,
@@ -2152,7 +2154,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
goto fallthrough;
case SCTP_PARAM_RANDOM:
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
goto fallthrough;
/* SCTP-AUTH: Secion 6.1
@@ -2169,7 +2171,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
break;
case SCTP_PARAM_CHUNKS:
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
goto fallthrough;
/* SCTP-AUTH: Section 3.2
@@ -2185,7 +2187,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
break;
case SCTP_PARAM_HMAC_ALGO:
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
goto fallthrough;
hmacs = (struct sctp_hmac_algo_param *)param.p;
@@ -2220,10 +2222,9 @@ fallthrough:
}
/* Verify the INIT packet before we process it. */
-int sctp_verify_init(struct net *net, const struct sctp_association *asoc,
- sctp_cid_t cid,
- sctp_init_chunk_t *peer_init,
- struct sctp_chunk *chunk,
+int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc, sctp_cid_t cid,
+ sctp_init_chunk_t *peer_init, struct sctp_chunk *chunk,
struct sctp_chunk **errp)
{
union sctp_params param;
@@ -2264,8 +2265,8 @@ int sctp_verify_init(struct net *net, const struct sctp_association *asoc,
/* Verify all the variable length parameters */
sctp_walk_params(param, peer_init, init_hdr.params) {
-
- result = sctp_verify_param(net, asoc, param, cid, chunk, errp);
+ result = sctp_verify_param(net, ep, asoc, param, cid,
+ chunk, errp);
switch (result) {
case SCTP_IERROR_ABORT:
case SCTP_IERROR_NOMEM:
@@ -2497,6 +2498,7 @@ static int sctp_process_param(struct sctp_association *asoc,
struct sctp_af *af;
union sctp_addr_param *addr_param;
struct sctp_transport *t;
+ struct sctp_endpoint *ep = asoc->ep;
/* We maintain all INIT parameters in network byte order all the
* time. This allows us to not worry about whether the parameters
@@ -2636,7 +2638,7 @@ do_addr_param:
goto fall_through;
case SCTP_PARAM_RANDOM:
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
goto fall_through;
/* Save peer's random parameter */
@@ -2649,7 +2651,7 @@ do_addr_param:
break;
case SCTP_PARAM_HMAC_ALGO:
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
goto fall_through;
/* Save peer's HMAC list */
@@ -2665,7 +2667,7 @@ do_addr_param:
break;
case SCTP_PARAM_CHUNKS:
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
goto fall_through;
asoc->peer.peer_chunks = kmemdup(param.p,
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 5d6883ff00c..fef2acdf4a2 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -496,11 +496,10 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands,
/* If the transport error count is greater than the pf_retrans
* threshold, and less than pathmaxrtx, and if the current state
- * is not SCTP_UNCONFIRMED, then mark this transport as Partially
- * Failed, see SCTP Quick Failover Draft, section 5.1
+ * is SCTP_ACTIVE, then mark this transport as Partially Failed,
+ * see SCTP Quick Failover Draft, section 5.1
*/
- if ((transport->state != SCTP_PF) &&
- (transport->state != SCTP_UNCONFIRMED) &&
+ if ((transport->state == SCTP_ACTIVE) &&
(asoc->pf_retrans < transport->pathmaxrxt) &&
(transport->error_count > asoc->pf_retrans)) {
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 01e002430c8..5170a1ff95a 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -357,7 +357,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
/* Verify the INIT chunk before processing it. */
err_chunk = NULL;
- if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type,
+ if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
(sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
&err_chunk)) {
/* This chunk contains fatal error. It is to be discarded.
@@ -524,7 +524,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
/* Verify the INIT chunk before processing it. */
err_chunk = NULL;
- if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type,
+ if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
(sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
&err_chunk)) {
@@ -1430,7 +1430,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
/* Verify the INIT chunk before processing it. */
err_chunk = NULL;
- if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type,
+ if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
(sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
&err_chunk)) {
/* This chunk contains fatal error. It is to be discarded.
@@ -6178,7 +6178,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
* PMTU. In cases, such as loopback, this might be a rather
* large spill over.
*/
- if ((!chunk->data_accepted) && (!asoc->rwnd ||
+ if ((!chunk->data_accepted) && (!asoc->rwnd || asoc->rwnd_over ||
(datalen > asoc->rwnd + asoc->frag_point))) {
/* If this is the next TSN, consider reneging to make
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 981aaf8b6ac..42989968940 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -71,6 +71,7 @@
#include <net/route.h>
#include <net/ipv6.h>
#include <net/inet_common.h>
+#include <net/busy_poll.h>
#include <linux/socket.h> /* for sa_family_t */
#include <linux/export.h>
@@ -2115,6 +2116,12 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
sctp_skb_pull(skb, copied);
skb_queue_head(&sk->sk_receive_queue, skb);
+ /* When only partial message is copied to the user, increase
+ * rwnd by that amount. If all the data in the skb is read,
+ * rwnd is updated when the event is freed.
+ */
+ if (!sctp_ulpevent_is_notification(event))
+ sctp_assoc_rwnd_increase(event->asoc, copied);
goto out;
} else if ((event->msg_flags & MSG_NOTIFICATION) ||
(event->msg_flags & MSG_EOR))
@@ -3315,10 +3322,10 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
- struct net *net = sock_net(sk);
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_authchunk val;
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
return -EACCES;
if (optlen != sizeof(struct sctp_authchunk))
@@ -3335,7 +3342,7 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk,
}
/* add this chunk id to the endpoint */
- return sctp_auth_ep_add_chunkid(sctp_sk(sk)->ep, val.sauth_chunk);
+ return sctp_auth_ep_add_chunkid(ep, val.sauth_chunk);
}
/*
@@ -3348,12 +3355,12 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
- struct net *net = sock_net(sk);
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_hmacalgo *hmacs;
u32 idents;
int err;
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
return -EACCES;
if (optlen < sizeof(struct sctp_hmacalgo))
@@ -3370,7 +3377,7 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
goto out;
}
- err = sctp_auth_ep_set_hmacs(sctp_sk(sk)->ep, hmacs);
+ err = sctp_auth_ep_set_hmacs(ep, hmacs);
out:
kfree(hmacs);
return err;
@@ -3386,12 +3393,12 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
- struct net *net = sock_net(sk);
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_authkey *authkey;
struct sctp_association *asoc;
int ret;
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
return -EACCES;
if (optlen <= sizeof(struct sctp_authkey))
@@ -3412,7 +3419,7 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
goto out;
}
- ret = sctp_auth_set_key(sctp_sk(sk)->ep, asoc, authkey);
+ ret = sctp_auth_set_key(ep, asoc, authkey);
out:
kzfree(authkey);
return ret;
@@ -3428,11 +3435,11 @@ static int sctp_setsockopt_active_key(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
- struct net *net = sock_net(sk);
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_authkeyid val;
struct sctp_association *asoc;
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
return -EACCES;
if (optlen != sizeof(struct sctp_authkeyid))
@@ -3444,8 +3451,7 @@ static int sctp_setsockopt_active_key(struct sock *sk,
if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP))
return -EINVAL;
- return sctp_auth_set_active_key(sctp_sk(sk)->ep, asoc,
- val.scact_keynumber);
+ return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber);
}
/*
@@ -3457,11 +3463,11 @@ static int sctp_setsockopt_del_key(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
- struct net *net = sock_net(sk);
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_authkeyid val;
struct sctp_association *asoc;
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
return -EACCES;
if (optlen != sizeof(struct sctp_authkeyid))
@@ -3473,8 +3479,7 @@ static int sctp_setsockopt_del_key(struct sock *sk,
if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP))
return -EINVAL;
- return sctp_auth_del_key_id(sctp_sk(sk)->ep, asoc,
- val.scact_keynumber);
+ return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber);
}
@@ -5381,16 +5386,16 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len,
static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
char __user *optval, int __user *optlen)
{
- struct net *net = sock_net(sk);
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_hmacalgo __user *p = (void __user *)optval;
struct sctp_hmac_algo_param *hmacs;
__u16 data_len = 0;
u32 num_idents;
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
return -EACCES;
- hmacs = sctp_sk(sk)->ep->auth_hmacs_list;
+ hmacs = ep->auth_hmacs_list;
data_len = ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t);
if (len < sizeof(struct sctp_hmacalgo) + data_len)
@@ -5411,11 +5416,11 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
static int sctp_getsockopt_active_key(struct sock *sk, int len,
char __user *optval, int __user *optlen)
{
- struct net *net = sock_net(sk);
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_authkeyid val;
struct sctp_association *asoc;
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
return -EACCES;
if (len < sizeof(struct sctp_authkeyid))
@@ -5430,7 +5435,7 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
if (asoc)
val.scact_keynumber = asoc->active_key_id;
else
- val.scact_keynumber = sctp_sk(sk)->ep->active_key_id;
+ val.scact_keynumber = ep->active_key_id;
len = sizeof(struct sctp_authkeyid);
if (put_user(len, optlen))
@@ -5444,7 +5449,7 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
char __user *optval, int __user *optlen)
{
- struct net *net = sock_net(sk);
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_authchunks __user *p = (void __user *)optval;
struct sctp_authchunks val;
struct sctp_association *asoc;
@@ -5452,7 +5457,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
u32 num_chunks = 0;
char __user *to;
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
return -EACCES;
if (len < sizeof(struct sctp_authchunks))
@@ -5489,7 +5494,7 @@ num:
static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
char __user *optval, int __user *optlen)
{
- struct net *net = sock_net(sk);
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_authchunks __user *p = (void __user *)optval;
struct sctp_authchunks val;
struct sctp_association *asoc;
@@ -5497,7 +5502,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
u32 num_chunks = 0;
char __user *to;
- if (!net->sctp.auth_enable)
+ if (!ep->auth_enable)
return -EACCES;
if (len < sizeof(struct sctp_authchunks))
@@ -5514,7 +5519,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
if (asoc)
ch = (struct sctp_chunks_param *)asoc->c.auth_chunks;
else
- ch = sctp_sk(sk)->ep->auth_chunk_list;
+ ch = ep->auth_chunk_list;
if (!ch)
goto num;
@@ -5941,8 +5946,9 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
/* Search for an available port. */
int low, high, remaining, index;
unsigned int rover;
+ struct net *net = sock_net(sk);
- inet_get_local_port_range(sock_net(sk), &low, &high);
+ inet_get_local_port_range(net, &low, &high);
remaining = (high - low) + 1;
rover = prandom_u32() % remaining + low;
@@ -5950,7 +5956,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
rover++;
if ((rover < low) || (rover > high))
rover = low;
- if (inet_is_reserved_local_port(rover))
+ if (inet_is_local_reserved_port(net, rover))
continue;
index = sctp_phashfn(sock_net(sk), rover);
head = &sctp_port_hashtable[index];
@@ -6553,6 +6559,10 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
if (sk->sk_shutdown & RCV_SHUTDOWN)
break;
+ if (sk_can_busy_loop(sk) &&
+ sk_busy_loop(sk, noblock))
+ continue;
+
/* User doesn't want to wait. */
error = -EAGAIN;
if (!timeo)
@@ -6593,6 +6603,46 @@ static void __sctp_write_space(struct sctp_association *asoc)
}
}
+static void sctp_wake_up_waiters(struct sock *sk,
+ struct sctp_association *asoc)
+{
+ struct sctp_association *tmp = asoc;
+
+ /* We do accounting for the sndbuf space per association,
+ * so we only need to wake our own association.
+ */
+ if (asoc->ep->sndbuf_policy)
+ return __sctp_write_space(asoc);
+
+ /* If association goes down and is just flushing its
+ * outq, then just normally notify others.
+ */
+ if (asoc->base.dead)
+ return sctp_write_space(sk);
+
+ /* Accounting for the sndbuf space is per socket, so we
+ * need to wake up others, try to be fair and in case of
+ * other associations, let them have a go first instead
+ * of just doing a sctp_write_space() call.
+ *
+ * Note that we reach sctp_wake_up_waiters() only when
+ * associations free up queued chunks, thus we are under
+ * lock and the list of associations on a socket is
+ * guaranteed not to change.
+ */
+ for (tmp = list_next_entry(tmp, asocs); 1;
+ tmp = list_next_entry(tmp, asocs)) {
+ /* Manually skip the head element. */
+ if (&tmp->asocs == &((sctp_sk(sk))->ep->asocs))
+ continue;
+ /* Wake up association. */
+ __sctp_write_space(tmp);
+ /* We've reached the end. */
+ if (tmp == asoc)
+ break;
+ }
+}
+
/* Do accounting for the sndbuf space.
* Decrement the used sndbuf space of the corresponding association by the
* data size which was just transmitted(freed).
@@ -6620,7 +6670,7 @@ static void sctp_wfree(struct sk_buff *skb)
sk_mem_uncharge(sk, skb->truesize);
sock_wfree(skb);
- __sctp_write_space(asoc);
+ sctp_wake_up_waiters(sk, asoc);
sctp_association_put(asoc);
}
@@ -6705,7 +6755,7 @@ do_nonblock:
goto out;
}
-void sctp_data_ready(struct sock *sk, int len)
+void sctp_data_ready(struct sock *sk)
{
struct socket_wq *wq;
@@ -6896,7 +6946,8 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
newsk->sk_type = sk->sk_type;
newsk->sk_bound_dev_if = sk->sk_bound_dev_if;
newsk->sk_flags = sk->sk_flags;
- newsk->sk_no_check = sk->sk_no_check;
+ newsk->sk_no_check_tx = sk->sk_no_check_tx;
+ newsk->sk_no_check_rx = sk->sk_no_check_rx;
newsk->sk_reuse = sk->sk_reuse;
newsk->sk_shutdown = sk->sk_shutdown;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 35c8923b555..12c7e01c267 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -34,6 +34,8 @@
* Sridhar Samudrala <sri@us.ibm.com>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <net/sctp/structs.h>
#include <net/sctp/sctp.h>
#include <linux/sysctl.h>
@@ -46,6 +48,11 @@ static int sack_timer_min = 1;
static int sack_timer_max = 500;
static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */
static int rwnd_scale_max = 16;
+static int rto_alpha_min = 0;
+static int rto_beta_min = 0;
+static int rto_alpha_max = 1000;
+static int rto_beta_max = 1000;
+
static unsigned long max_autoclose_min = 0;
static unsigned long max_autoclose_max =
(MAX_SCHEDULE_TIMEOUT / HZ > UINT_MAX)
@@ -64,6 +71,12 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write,
static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
+static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos);
+static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos);
static struct ctl_table sctp_table[] = {
{
@@ -123,15 +136,19 @@ static struct ctl_table sctp_net_table[] = {
.procname = "rto_alpha_exp_divisor",
.data = &init_net.sctp.rto_alpha,
.maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = proc_dointvec,
+ .mode = 0644,
+ .proc_handler = proc_sctp_do_alpha_beta,
+ .extra1 = &rto_alpha_min,
+ .extra2 = &rto_alpha_max,
},
{
.procname = "rto_beta_exp_divisor",
.data = &init_net.sctp.rto_beta,
.maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = proc_dointvec,
+ .mode = 0644,
+ .proc_handler = proc_sctp_do_alpha_beta,
+ .extra1 = &rto_beta_min,
+ .extra2 = &rto_beta_max,
},
{
.procname = "max_burst",
@@ -266,7 +283,7 @@ static struct ctl_table sctp_net_table[] = {
.data = &init_net.sctp.auth_enable,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_sctp_do_auth,
},
{
.procname = "addr_scope_policy",
@@ -304,41 +321,40 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write,
loff_t *ppos)
{
struct net *net = current->nsproxy->net_ns;
- char tmp[8];
struct ctl_table tbl;
- int ret;
- int changed = 0;
+ bool changed = false;
char *none = "none";
+ char tmp[8];
+ int ret;
memset(&tbl, 0, sizeof(struct ctl_table));
if (write) {
tbl.data = tmp;
- tbl.maxlen = 8;
+ tbl.maxlen = sizeof(tmp);
} else {
tbl.data = net->sctp.sctp_hmac_alg ? : none;
tbl.maxlen = strlen(tbl.data);
}
- ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
- if (write) {
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+ if (write && ret == 0) {
#ifdef CONFIG_CRYPTO_MD5
if (!strncmp(tmp, "md5", 3)) {
net->sctp.sctp_hmac_alg = "md5";
- changed = 1;
+ changed = true;
}
#endif
#ifdef CONFIG_CRYPTO_SHA1
if (!strncmp(tmp, "sha1", 4)) {
net->sctp.sctp_hmac_alg = "sha1";
- changed = 1;
+ changed = true;
}
#endif
if (!strncmp(tmp, "none", 4)) {
net->sctp.sctp_hmac_alg = NULL;
- changed = 1;
+ changed = true;
}
-
if (!changed)
ret = -EINVAL;
}
@@ -351,11 +367,10 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write,
loff_t *ppos)
{
struct net *net = current->nsproxy->net_ns;
- int new_value;
- struct ctl_table tbl;
unsigned int min = *(unsigned int *) ctl->extra1;
unsigned int max = *(unsigned int *) ctl->extra2;
- int ret;
+ struct ctl_table tbl;
+ int ret, new_value;
memset(&tbl, 0, sizeof(struct ctl_table));
tbl.maxlen = sizeof(unsigned int);
@@ -364,12 +379,15 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write,
tbl.data = &new_value;
else
tbl.data = &net->sctp.rto_min;
+
ret = proc_dointvec(&tbl, write, buffer, lenp, ppos);
- if (write) {
- if (ret || new_value > max || new_value < min)
+ if (write && ret == 0) {
+ if (new_value > max || new_value < min)
return -EINVAL;
+
net->sctp.rto_min = new_value;
}
+
return ret;
}
@@ -378,11 +396,10 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write,
loff_t *ppos)
{
struct net *net = current->nsproxy->net_ns;
- int new_value;
- struct ctl_table tbl;
unsigned int min = *(unsigned int *) ctl->extra1;
unsigned int max = *(unsigned int *) ctl->extra2;
- int ret;
+ struct ctl_table tbl;
+ int ret, new_value;
memset(&tbl, 0, sizeof(struct ctl_table));
tbl.maxlen = sizeof(unsigned int);
@@ -391,31 +408,75 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write,
tbl.data = &new_value;
else
tbl.data = &net->sctp.rto_max;
+
ret = proc_dointvec(&tbl, write, buffer, lenp, ppos);
- if (write) {
- if (ret || new_value > max || new_value < min)
+ if (write && ret == 0) {
+ if (new_value > max || new_value < min)
return -EINVAL;
+
net->sctp.rto_max = new_value;
}
+
return ret;
}
-int sctp_sysctl_net_register(struct net *net)
+static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
{
- struct ctl_table *table = sctp_net_table;
+ pr_warn_once("Changing rto_alpha or rto_beta may lead to "
+ "suboptimal rtt/srtt estimations!\n");
+
+ return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
+}
- if (!net_eq(net, &init_net)) {
- int i;
+static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct net *net = current->nsproxy->net_ns;
+ struct ctl_table tbl;
+ int new_value, ret;
- table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL);
- if (!table)
- return -ENOMEM;
+ memset(&tbl, 0, sizeof(struct ctl_table));
+ tbl.maxlen = sizeof(unsigned int);
+
+ if (write)
+ tbl.data = &new_value;
+ else
+ tbl.data = &net->sctp.auth_enable;
- for (i = 0; table[i].data; i++)
- table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp;
+ ret = proc_dointvec(&tbl, write, buffer, lenp, ppos);
+ if (write && ret == 0) {
+ struct sock *sk = net->sctp.ctl_sock;
+
+ net->sctp.auth_enable = new_value;
+ /* Update the value in the control socket */
+ lock_sock(sk);
+ sctp_sk(sk)->ep->auth_enable = new_value;
+ release_sock(sk);
}
+ return ret;
+}
+
+int sctp_sysctl_net_register(struct net *net)
+{
+ struct ctl_table *table;
+ int i;
+
+ table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+
+ for (i = 0; table[i].data; i++)
+ table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp;
+
net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table);
+ if (net->sctp.sysctl_header == NULL) {
+ kfree(table);
+ return -ENOMEM;
+ }
return 0;
}
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index d0810dc5f07..7dd672fa651 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -72,7 +72,7 @@ static struct sctp_transport *sctp_transport_init(struct net *net,
*/
peer->rto = msecs_to_jiffies(net->sctp.rto_initial);
- peer->last_time_heard = jiffies;
+ peer->last_time_heard = ktime_get();
peer->last_time_ecne_reduced = jiffies;
peer->param_flags = SPP_HB_DISABLE |
@@ -652,5 +652,4 @@ void sctp_transport_immediate_rtx(struct sctp_transport *t)
if (!mod_timer(&t->T3_rtx_timer, jiffies + t->rto))
sctp_transport_hold(t);
}
- return;
}
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 8d198ae0360..b6842fdb53d 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -366,9 +366,10 @@ fail:
* specification [SCTP] and any extensions for a list of possible
* error formats.
*/
-struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
- const struct sctp_association *asoc, struct sctp_chunk *chunk,
- __u16 flags, gfp_t gfp)
+struct sctp_ulpevent *
+sctp_ulpevent_make_remote_error(const struct sctp_association *asoc,
+ struct sctp_chunk *chunk, __u16 flags,
+ gfp_t gfp)
{
struct sctp_ulpevent *event;
struct sctp_remote_error *sre;
@@ -387,8 +388,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
/* Copy the skb to a new skb with room for us to prepend
* notification with.
*/
- skb = skb_copy_expand(chunk->skb, sizeof(struct sctp_remote_error),
- 0, gfp);
+ skb = skb_copy_expand(chunk->skb, sizeof(*sre), 0, gfp);
/* Pull off the rest of the cause TLV from the chunk. */
skb_pull(chunk->skb, elen);
@@ -399,62 +399,21 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
event = sctp_skb2event(skb);
sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize);
- sre = (struct sctp_remote_error *)
- skb_push(skb, sizeof(struct sctp_remote_error));
+ sre = (struct sctp_remote_error *) skb_push(skb, sizeof(*sre));
/* Trim the buffer to the right length. */
- skb_trim(skb, sizeof(struct sctp_remote_error) + elen);
+ skb_trim(skb, sizeof(*sre) + elen);
- /* Socket Extensions for SCTP
- * 5.3.1.3 SCTP_REMOTE_ERROR
- *
- * sre_type:
- * It should be SCTP_REMOTE_ERROR.
- */
+ /* RFC6458, Section 6.1.3. SCTP_REMOTE_ERROR */
+ memset(sre, 0, sizeof(*sre));
sre->sre_type = SCTP_REMOTE_ERROR;
-
- /*
- * Socket Extensions for SCTP
- * 5.3.1.3 SCTP_REMOTE_ERROR
- *
- * sre_flags: 16 bits (unsigned integer)
- * Currently unused.
- */
sre->sre_flags = 0;
-
- /* Socket Extensions for SCTP
- * 5.3.1.3 SCTP_REMOTE_ERROR
- *
- * sre_length: sizeof (__u32)
- *
- * This field is the total length of the notification data,
- * including the notification header.
- */
sre->sre_length = skb->len;
-
- /* Socket Extensions for SCTP
- * 5.3.1.3 SCTP_REMOTE_ERROR
- *
- * sre_error: 16 bits (unsigned integer)
- * This value represents one of the Operational Error causes defined in
- * the SCTP specification, in network byte order.
- */
sre->sre_error = cause;
-
- /* Socket Extensions for SCTP
- * 5.3.1.3 SCTP_REMOTE_ERROR
- *
- * sre_assoc_id: sizeof (sctp_assoc_t)
- *
- * The association id field, holds the identifier for the association.
- * All notifications for a given association have the same association
- * identifier. For TCP style socket, this field is ignored.
- */
sctp_ulpevent_set_owner(event, asoc);
sre->sre_assoc_id = sctp_assoc2id(asoc);
return event;
-
fail:
return NULL;
}
@@ -899,7 +858,9 @@ __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event)
return notification->sn_header.sn_type;
}
-/* Copy out the sndrcvinfo into a msghdr. */
+/* RFC6458, Section 5.3.2. SCTP Header Information Structure
+ * (SCTP_SNDRCV, DEPRECATED)
+ */
void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
struct msghdr *msghdr)
{
@@ -908,74 +869,21 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
if (sctp_ulpevent_is_notification(event))
return;
- /* Sockets API Extensions for SCTP
- * Section 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV)
- *
- * sinfo_stream: 16 bits (unsigned integer)
- *
- * For recvmsg() the SCTP stack places the message's stream number in
- * this value.
- */
+ memset(&sinfo, 0, sizeof(sinfo));
sinfo.sinfo_stream = event->stream;
- /* sinfo_ssn: 16 bits (unsigned integer)
- *
- * For recvmsg() this value contains the stream sequence number that
- * the remote endpoint placed in the DATA chunk. For fragmented
- * messages this is the same number for all deliveries of the message
- * (if more than one recvmsg() is needed to read the message).
- */
sinfo.sinfo_ssn = event->ssn;
- /* sinfo_ppid: 32 bits (unsigned integer)
- *
- * In recvmsg() this value is
- * the same information that was passed by the upper layer in the peer
- * application. Please note that byte order issues are NOT accounted
- * for and this information is passed opaquely by the SCTP stack from
- * one end to the other.
- */
sinfo.sinfo_ppid = event->ppid;
- /* sinfo_flags: 16 bits (unsigned integer)
- *
- * This field may contain any of the following flags and is composed of
- * a bitwise OR of these values.
- *
- * recvmsg() flags:
- *
- * SCTP_UNORDERED - This flag is present when the message was sent
- * non-ordered.
- */
sinfo.sinfo_flags = event->flags;
- /* sinfo_tsn: 32 bit (unsigned integer)
- *
- * For the receiving side, this field holds a TSN that was
- * assigned to one of the SCTP Data Chunks.
- */
sinfo.sinfo_tsn = event->tsn;
- /* sinfo_cumtsn: 32 bit (unsigned integer)
- *
- * This field will hold the current cumulative TSN as
- * known by the underlying SCTP layer. Note this field is
- * ignored when sending and only valid for a receive
- * operation when sinfo_flags are set to SCTP_UNORDERED.
- */
sinfo.sinfo_cumtsn = event->cumtsn;
- /* sinfo_assoc_id: sizeof (sctp_assoc_t)
- *
- * The association handle field, sinfo_assoc_id, holds the identifier
- * for the association announced in the COMMUNICATION_UP notification.
- * All notifications for a given association have the same identifier.
- * Ignored for one-to-one style sockets.
- */
sinfo.sinfo_assoc_id = sctp_assoc2id(event->asoc);
-
- /* context value that is set via SCTP_CONTEXT socket option. */
+ /* Context value that is set via SCTP_CONTEXT socket option. */
sinfo.sinfo_context = event->asoc->default_rcv_context;
-
/* These fields are not used while receiving. */
sinfo.sinfo_timetolive = 0;
put_cmsg(msghdr, IPPROTO_SCTP, SCTP_SNDRCV,
- sizeof(struct sctp_sndrcvinfo), (void *)&sinfo);
+ sizeof(sinfo), &sinfo);
}
/* Do accounting for bytes received and hold a reference to the association
@@ -989,7 +897,7 @@ static void sctp_ulpevent_receive_data(struct sctp_ulpevent *event,
skb = sctp_event2skb(event);
/* Set the owner and charge rwnd for bytes received. */
sctp_ulpevent_set_owner(event, asoc);
- sctp_assoc_rwnd_update(asoc, false);
+ sctp_assoc_rwnd_decrease(asoc, skb_headlen(skb));
if (!skb->data_len)
return;
@@ -1011,7 +919,6 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event)
{
struct sk_buff *skb, *frag;
unsigned int len;
- struct sctp_association *asoc;
/* Current stack structures assume that the rcv buffer is
* per socket. For UDP style sockets this is not true as
@@ -1036,11 +943,8 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event)
}
done:
- asoc = event->asoc;
- sctp_association_hold(asoc);
+ sctp_assoc_rwnd_increase(event->asoc, len);
sctp_ulpevent_release_owner(event);
- sctp_assoc_rwnd_update(asoc, true);
- sctp_association_put(asoc);
}
static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event)
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 5dc94117e9d..d49dc2ed30a 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -38,6 +38,7 @@
#include <linux/types.h>
#include <linux/skbuff.h>
#include <net/sock.h>
+#include <net/busy_poll.h>
#include <net/sctp/structs.h>
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
@@ -204,6 +205,9 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
if (sock_flag(sk, SOCK_DEAD) || (sk->sk_shutdown & RCV_SHUTDOWN))
goto out_free;
+ if (!sctp_ulpevent_is_notification(event))
+ sk_mark_napi_id(sk, skb);
+
/* Check if the user wishes to receive this event. */
if (!sctp_ulpevent_is_enabled(event, &sctp_sk(sk)->subscribe))
goto out_free;
@@ -259,7 +263,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
sctp_ulpq_clear_pd(ulpq);
if (queue == &sk->sk_receive_queue)
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk);
return 1;
out_free:
@@ -1135,5 +1139,5 @@ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp)
/* If there is data waiting, send it up the socket now. */
if (sctp_ulpq_clear_pd(ulpq) || ev)
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk);
}
diff --git a/net/socket.c b/net/socket.c
index a19ae1968d3..abf56b2a14f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -72,6 +72,7 @@
#include <linux/if_bridge.h>
#include <linux/if_frad.h>
#include <linux/if_vlan.h>
+#include <linux/ptp_classify.h>
#include <linux/init.h>
#include <linux/poll.h>
#include <linux/cache.h>
@@ -594,7 +595,7 @@ void sock_release(struct socket *sock)
}
if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
- printk(KERN_ERR "sock_release: fasync list not empty!\n");
+ pr_err("%s: fasync list not empty!\n", __func__);
if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
return;
@@ -1266,8 +1267,8 @@ int __sock_create(struct net *net, int family, int type, int protocol,
static int warned;
if (!warned) {
warned = 1;
- printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
- current->comm);
+ pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
+ current->comm);
}
family = PF_PACKET;
}
@@ -1879,8 +1880,8 @@ out:
* Receive a datagram from a socket.
*/
-asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
- unsigned int flags)
+SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
+ unsigned int, flags)
{
return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
}
@@ -2600,8 +2601,7 @@ int sock_register(const struct net_proto_family *ops)
int err;
if (ops->family >= NPROTO) {
- printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
- NPROTO);
+ pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
return -ENOBUFS;
}
@@ -2615,7 +2615,7 @@ int sock_register(const struct net_proto_family *ops)
}
spin_unlock(&net_family_lock);
- printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
+ pr_info("NET: Registered protocol family %d\n", ops->family);
return err;
}
EXPORT_SYMBOL(sock_register);
@@ -2643,7 +2643,7 @@ void sock_unregister(int family)
synchronize_rcu();
- printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
+ pr_info("NET: Unregistered protocol family %d\n", family);
}
EXPORT_SYMBOL(sock_unregister);
@@ -2686,9 +2686,7 @@ static int __init sock_init(void)
goto out;
#endif
-#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
- skb_timestamping_init();
-#endif
+ ptp_classifier_init();
out:
return err;
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 241b54f3020..0754d0f466d 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -9,19 +9,6 @@ config SUNRPC_BACKCHANNEL
bool
depends on SUNRPC
-config SUNRPC_XPRT_RDMA
- tristate
- depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
- default SUNRPC && INFINIBAND
- help
- This option allows the NFS client and server to support
- an RDMA-enabled transport.
-
- To compile RPC client RDMA transport support as a module,
- choose M here: the module will be called xprtrdma.
-
- If unsure, say N.
-
config SUNRPC_SWAP
bool
depends on SUNRPC
@@ -57,3 +44,29 @@ config SUNRPC_DEBUG
but makes troubleshooting NFS issues significantly harder.
If unsure, say Y.
+
+config SUNRPC_XPRT_RDMA_CLIENT
+ tristate "RPC over RDMA Client Support"
+ depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
+ default SUNRPC && INFINIBAND
+ help
+ This option allows the NFS client to support an RDMA-enabled
+ transport.
+
+ To compile RPC client RDMA transport support as a module,
+ choose M here: the module will be called xprtrdma.
+
+ If unsure, say N.
+
+config SUNRPC_XPRT_RDMA_SERVER
+ tristate "RPC over RDMA Server Support"
+ depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
+ default SUNRPC && INFINIBAND
+ help
+ This option allows the NFS server to support an RDMA-enabled
+ transport.
+
+ To compile RPC server RDMA transport support as a module,
+ choose M here: the module will be called svcrdma.
+
+ If unsure, say N.
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 8209a0411bc..e5a7a1cac8f 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -5,7 +5,8 @@
obj-$(CONFIG_SUNRPC) += sunrpc.o
obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
-obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
+
+obj-y += xprtrdma/
sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
auth.o auth_null.o auth_unix.o auth_generic.o \
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 5285ead196c..f7736671742 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -296,7 +296,7 @@ static void
rpcauth_unhash_cred_locked(struct rpc_cred *cred)
{
hlist_del_rcu(&cred->cr_hash);
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
}
@@ -592,6 +592,7 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags)
put_group_info(acred.group_info);
return ret;
}
+EXPORT_SYMBOL_GPL(rpcauth_lookupcred);
void
rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 36e431ee1c9..b6e440baccc 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -143,7 +143,7 @@ gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx)
gss_get_ctx(ctx);
rcu_assign_pointer(gss_cred->gc_ctx, ctx);
set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags);
}
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 27ce2624093..92d5ab99fbf 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -218,10 +218,8 @@ static struct gss_api_mech *_gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
spin_lock(&registered_mechs_lock);
list_for_each_entry(pos, &registered_mechs, gm_list) {
- if (!mech_supports_pseudoflavor(pos, pseudoflavor)) {
- module_put(pos->gm_owner);
+ if (!mech_supports_pseudoflavor(pos, pseudoflavor))
continue;
- }
if (try_module_get(pos->gm_owner))
gm = pos;
break;
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 0f73f450774..4ce5eccec1f 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1503,6 +1503,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
if (unwrap_integ_data(rqstp, &rqstp->rq_arg,
gc->gc_seq, rsci->mechctx))
goto garbage_args;
+ rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE;
break;
case RPC_GSS_SVC_PRIVACY:
/* placeholders for length and seq. number: */
@@ -1511,6 +1512,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
if (unwrap_priv_data(rqstp, &rqstp->rq_arg,
gc->gc_seq, rsci->mechctx))
goto garbage_args;
+ rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE * 2;
break;
default:
goto auth_err;
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index e860d4f7ed2..9761a0da964 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -212,39 +212,23 @@ out:
}
EXPORT_SYMBOL_GPL(xprt_destroy_backchannel);
-/*
- * One or more rpc_rqst structure have been preallocated during the
- * backchannel setup. Buffer space for the send and private XDR buffers
- * has been preallocated as well. Use xprt_alloc_bc_request to allocate
- * to this request. Use xprt_free_bc_request to return it.
- *
- * We know that we're called in soft interrupt context, grab the spin_lock
- * since there is no need to grab the bottom half spin_lock.
- *
- * Return an available rpc_rqst, otherwise NULL if non are available.
- */
-struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt)
+static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
{
- struct rpc_rqst *req;
+ struct rpc_rqst *req = NULL;
dprintk("RPC: allocate a backchannel request\n");
- spin_lock(&xprt->bc_pa_lock);
- if (!list_empty(&xprt->bc_pa_list)) {
- req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst,
- rq_bc_pa_list);
- list_del(&req->rq_bc_pa_list);
- } else {
- req = NULL;
- }
- spin_unlock(&xprt->bc_pa_lock);
+ if (list_empty(&xprt->bc_pa_list))
+ goto not_found;
- if (req != NULL) {
- set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
- req->rq_reply_bytes_recvd = 0;
- req->rq_bytes_sent = 0;
- memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
+ req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst,
+ rq_bc_pa_list);
+ req->rq_reply_bytes_recvd = 0;
+ req->rq_bytes_sent = 0;
+ memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
sizeof(req->rq_private_buf));
- }
+ req->rq_xid = xid;
+ req->rq_connect_cookie = xprt->connect_cookie;
+not_found:
dprintk("RPC: backchannel req=%p\n", req);
return req;
}
@@ -259,10 +243,11 @@ void xprt_free_bc_request(struct rpc_rqst *req)
dprintk("RPC: free backchannel req=%p\n", req);
- smp_mb__before_clear_bit();
+ req->rq_connect_cookie = xprt->connect_cookie - 1;
+ smp_mb__before_atomic();
WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
if (!xprt_need_to_requeue(xprt)) {
/*
@@ -281,7 +266,57 @@ void xprt_free_bc_request(struct rpc_rqst *req)
* may be reused by a new callback request.
*/
spin_lock_bh(&xprt->bc_pa_lock);
- list_add(&req->rq_bc_pa_list, &xprt->bc_pa_list);
+ list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
spin_unlock_bh(&xprt->bc_pa_lock);
}
+/*
+ * One or more rpc_rqst structure have been preallocated during the
+ * backchannel setup. Buffer space for the send and private XDR buffers
+ * has been preallocated as well. Use xprt_alloc_bc_request to allocate
+ * to this request. Use xprt_free_bc_request to return it.
+ *
+ * We know that we're called in soft interrupt context, grab the spin_lock
+ * since there is no need to grab the bottom half spin_lock.
+ *
+ * Return an available rpc_rqst, otherwise NULL if non are available.
+ */
+struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid)
+{
+ struct rpc_rqst *req;
+
+ spin_lock(&xprt->bc_pa_lock);
+ list_for_each_entry(req, &xprt->bc_pa_list, rq_bc_pa_list) {
+ if (req->rq_connect_cookie != xprt->connect_cookie)
+ continue;
+ if (req->rq_xid == xid)
+ goto found;
+ }
+ req = xprt_alloc_bc_request(xprt, xid);
+found:
+ spin_unlock(&xprt->bc_pa_lock);
+ return req;
+}
+
+/*
+ * Add callback request to callback list. The callback
+ * service sleeps on the sv_cb_waitq waiting for new
+ * requests. Wake it up after adding enqueing the
+ * request.
+ */
+void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
+{
+ struct rpc_xprt *xprt = req->rq_xprt;
+ struct svc_serv *bc_serv = xprt->bc_serv;
+
+ req->rq_private_buf.len = copied;
+ set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
+
+ dprintk("RPC: add callback request to list\n");
+ spin_lock(&bc_serv->sv_cb_lock);
+ list_del(&req->rq_bc_pa_list);
+ list_add(&req->rq_bc_list, &bc_serv->sv_cb_list);
+ wake_up(&bc_serv->sv_cb_waitq);
+ spin_unlock(&bc_serv->sv_cb_lock);
+}
+
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index ae333c1845b..06636214113 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -374,7 +374,7 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
}
return;
out:
- printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name);
+ printk(KERN_ERR "RPC: failed to unregister %s cache\n", cd->name);
}
EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 0edada97343..2e6ab10734f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -438,6 +438,38 @@ out_no_rpciod:
return ERR_PTR(err);
}
+struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
+ struct rpc_xprt *xprt)
+{
+ struct rpc_clnt *clnt = NULL;
+
+ clnt = rpc_new_client(args, xprt, NULL);
+ if (IS_ERR(clnt))
+ return clnt;
+
+ if (!(args->flags & RPC_CLNT_CREATE_NOPING)) {
+ int err = rpc_ping(clnt);
+ if (err != 0) {
+ rpc_shutdown_client(clnt);
+ return ERR_PTR(err);
+ }
+ }
+
+ clnt->cl_softrtry = 1;
+ if (args->flags & RPC_CLNT_CREATE_HARDRTRY)
+ clnt->cl_softrtry = 0;
+
+ if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
+ clnt->cl_autobind = 1;
+ if (args->flags & RPC_CLNT_CREATE_DISCRTRY)
+ clnt->cl_discrtry = 1;
+ if (!(args->flags & RPC_CLNT_CREATE_QUIET))
+ clnt->cl_chatty = 1;
+
+ return clnt;
+}
+EXPORT_SYMBOL_GPL(rpc_create_xprt);
+
/**
* rpc_create - create an RPC client and transport with one call
* @args: rpc_clnt create argument structure
@@ -451,7 +483,6 @@ out_no_rpciod:
struct rpc_clnt *rpc_create(struct rpc_create_args *args)
{
struct rpc_xprt *xprt;
- struct rpc_clnt *clnt;
struct xprt_create xprtargs = {
.net = args->net,
.ident = args->protocol,
@@ -515,30 +546,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
xprt->resvport = 0;
- clnt = rpc_new_client(args, xprt, NULL);
- if (IS_ERR(clnt))
- return clnt;
-
- if (!(args->flags & RPC_CLNT_CREATE_NOPING)) {
- int err = rpc_ping(clnt);
- if (err != 0) {
- rpc_shutdown_client(clnt);
- return ERR_PTR(err);
- }
- }
-
- clnt->cl_softrtry = 1;
- if (args->flags & RPC_CLNT_CREATE_HARDRTRY)
- clnt->cl_softrtry = 0;
-
- if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
- clnt->cl_autobind = 1;
- if (args->flags & RPC_CLNT_CREATE_DISCRTRY)
- clnt->cl_discrtry = 1;
- if (!(args->flags & RPC_CLNT_CREATE_QUIET))
- clnt->cl_chatty = 1;
-
- return clnt;
+ return rpc_create_xprt(args, xprt);
}
EXPORT_SYMBOL_GPL(rpc_create);
@@ -1363,6 +1371,7 @@ rpc_restart_call_prepare(struct rpc_task *task)
if (RPC_ASSASSINATED(task))
return 0;
task->tk_action = call_start;
+ task->tk_status = 0;
if (task->tk_ops->rpc_call_prepare != NULL)
task->tk_action = rpc_prepare_task;
return 1;
@@ -1379,6 +1388,7 @@ rpc_restart_call(struct rpc_task *task)
if (RPC_ASSASSINATED(task))
return 0;
task->tk_action = call_start;
+ task->tk_status = 0;
return 1;
}
EXPORT_SYMBOL_GPL(rpc_restart_call);
@@ -1728,9 +1738,7 @@ call_bind_status(struct rpc_task *task)
case -EPROTONOSUPPORT:
dprintk("RPC: %5u remote rpcbind version unavailable, retrying\n",
task->tk_pid);
- task->tk_status = 0;
- task->tk_action = call_bind;
- return;
+ goto retry_timeout;
case -ECONNREFUSED: /* connection problems */
case -ECONNRESET:
case -ECONNABORTED:
@@ -1756,6 +1764,7 @@ call_bind_status(struct rpc_task *task)
return;
retry_timeout:
+ task->tk_status = 0;
task->tk_action = call_timeout;
}
@@ -1798,21 +1807,19 @@ call_connect_status(struct rpc_task *task)
trace_rpc_connect_status(task, status);
task->tk_status = 0;
switch (status) {
- /* if soft mounted, test if we've timed out */
- case -ETIMEDOUT:
- task->tk_action = call_timeout;
- return;
case -ECONNREFUSED:
case -ECONNRESET:
case -ECONNABORTED:
case -ENETUNREACH:
case -EHOSTUNREACH:
- /* retry with existing socket, after a delay */
- rpc_delay(task, 3*HZ);
if (RPC_IS_SOFTCONN(task))
break;
+ /* retry with existing socket, after a delay */
+ rpc_delay(task, 3*HZ);
case -EAGAIN:
- task->tk_action = call_bind;
+ /* Check for timeouts before looping back to call_bind */
+ case -ETIMEDOUT:
+ task->tk_action = call_timeout;
return;
case 0:
clnt->cl_stats->netreconn++;
@@ -2007,6 +2014,10 @@ call_status(struct rpc_task *task)
case -EHOSTDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
+ if (RPC_IS_SOFTCONN(task)) {
+ rpc_exit(task, status);
+ break;
+ }
/*
* Delay any retries for 3 seconds, then handle as if it
* were a timeout.
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index ff3cc4bf4b2..c0365c14b85 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -637,7 +637,8 @@ static void __rpc_queue_timer_fn(unsigned long ptr)
static void __rpc_atrun(struct rpc_task *task)
{
- task->tk_status = 0;
+ if (task->tk_status == -ETIMEDOUT)
+ task->tk_status = 0;
}
/*
@@ -831,7 +832,8 @@ static void rpc_async_schedule(struct work_struct *work)
* @size: requested byte size
*
* To prevent rpciod from hanging, this allocator never sleeps,
- * returning NULL if the request cannot be serviced immediately.
+ * returning NULL and suppressing warning if the request cannot be serviced
+ * immediately.
* The caller can arrange to sleep in a way that is safe for rpciod.
*
* Most requests are 'small' (under 2KiB) and can be serviced from a
@@ -844,7 +846,7 @@ static void rpc_async_schedule(struct work_struct *work)
void *rpc_malloc(struct rpc_task *task, size_t size)
{
struct rpc_buffer *buf;
- gfp_t gfp = GFP_NOWAIT;
+ gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN;
if (RPC_IS_SWAPPER(task))
gfp |= __GFP_MEMALLOC;
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 0a648c502fc..2df87f78e51 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -173,7 +173,8 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
return -1;
if (csum_fold(desc.csum))
return -1;
- if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+ !skb->csum_complete_sw)
netdev_rx_csum_fault(skb->dev);
return 0;
no_checksum:
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index 14c9f6d1c5f..f2b7cb540e6 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -43,6 +43,19 @@ static inline int rpc_reply_expected(struct rpc_task *task)
(task->tk_msg.rpc_proc->p_decode != NULL);
}
+static inline int sock_is_loopback(struct sock *sk)
+{
+ struct dst_entry *dst;
+ int loopback = 0;
+ rcu_read_lock();
+ dst = rcu_dereference(sk->sk_dst_cache);
+ if (dst && dst->dev &&
+ (dst->dev->features & NETIF_F_LOOPBACK))
+ loopback = 1;
+ rcu_read_unlock();
+ return loopback;
+}
+
int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
struct page *headpage, unsigned long headoffset,
struct page *tailpage, unsigned long tailoffset);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 06c6ff0cb91..b4737fbdec1 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -597,6 +597,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
}
rqstp->rq_pages[i] = p;
}
+ rqstp->rq_page_end = &rqstp->rq_pages[i];
rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */
/* Make arg->head point to first page and arg->pages point to rest */
@@ -730,6 +731,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
newxpt = xprt->xpt_ops->xpo_accept(xprt);
if (newxpt)
svc_add_new_temp_xprt(serv, newxpt);
+ else
+ module_put(xprt->xpt_class->xcl_owner);
} else if (xprt->xpt_ops->xpo_has_wspace(xprt)) {
/* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
@@ -793,7 +796,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
clear_bit(XPT_OLD, &xprt->xpt_flags);
- rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp));
+ rqstp->rq_secure = xprt->xpt_ops->xpo_secure_port(rqstp);
rqstp->rq_chandle.defer = svc_defer;
if (serv->sv_stats)
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 2af7b0cba43..79c0f3459b5 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -54,6 +54,8 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
}
spin_unlock(&authtab_lock);
+ rqstp->rq_auth_slack = 0;
+
rqstp->rq_authop = aops;
return aops->accept(rqstp, authp);
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index b6e59f0a947..b507cd327d9 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -60,7 +60,7 @@
static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
int flags);
-static void svc_udp_data_ready(struct sock *, int);
+static void svc_udp_data_ready(struct sock *);
static int svc_udp_recvfrom(struct svc_rqst *);
static int svc_udp_sendto(struct svc_rqst *);
static void svc_sock_detach(struct svc_xprt *);
@@ -400,17 +400,23 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
release_sock(sock->sk);
#endif
}
+
+static int svc_sock_secure_port(struct svc_rqst *rqstp)
+{
+ return svc_port_is_privileged(svc_addr(rqstp));
+}
+
/*
* INET callback when data has been received on the socket.
*/
-static void svc_udp_data_ready(struct sock *sk, int count)
+static void svc_udp_data_ready(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
wait_queue_head_t *wq = sk_sleep(sk);
if (svsk) {
- dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n",
- svsk, sk, count,
+ dprintk("svc: socket %p(inet %p), busy=%d\n",
+ svsk, sk,
test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
@@ -678,6 +684,7 @@ static struct svc_xprt_ops svc_udp_ops = {
.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
.xpo_has_wspace = svc_udp_has_wspace,
.xpo_accept = svc_udp_accept,
+ .xpo_secure_port = svc_sock_secure_port,
};
static struct svc_xprt_class svc_udp_class = {
@@ -731,7 +738,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
* A data_ready event on a listening socket means there's a connection
* pending. Do not use state_change as a substitute for it.
*/
-static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
+static void svc_tcp_listen_data_ready(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
wait_queue_head_t *wq;
@@ -783,7 +790,7 @@ static void svc_tcp_state_change(struct sock *sk)
wake_up_interruptible_all(wq);
}
-static void svc_tcp_data_ready(struct sock *sk, int count)
+static void svc_tcp_data_ready(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
wait_queue_head_t *wq = sk_sleep(sk);
@@ -842,8 +849,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
* tell us anything. For now just warn about unpriv connections.
*/
if (!svc_port_is_privileged(sin)) {
- dprintk(KERN_WARNING
- "%s: connect from unprivileged port: %s\n",
+ dprintk("%s: connect from unprivileged port: %s\n",
serv->sv_name,
__svc_print_addr(sin, buf, sizeof(buf)));
}
@@ -867,6 +873,10 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
}
svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
+ if (sock_is_loopback(newsock->sk))
+ set_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
+ else
+ clear_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
if (serv->sv_stats)
serv->sv_stats->nettcpconn++;
@@ -1112,6 +1122,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_xprt_ctxt = NULL;
rqstp->rq_prot = IPPROTO_TCP;
+ rqstp->rq_local = !!test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags);
p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
calldir = p[1];
@@ -1234,6 +1245,7 @@ static struct svc_xprt_ops svc_tcp_bc_ops = {
.xpo_detach = svc_bc_tcp_sock_detach,
.xpo_free = svc_bc_sock_free,
.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
+ .xpo_secure_port = svc_sock_secure_port,
};
static struct svc_xprt_class svc_tcp_bc_class = {
@@ -1272,6 +1284,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
.xpo_has_wspace = svc_tcp_has_wspace,
.xpo_accept = svc_tcp_accept,
+ .xpo_secure_port = svc_sock_secure_port,
};
static struct svc_xprt_class svc_tcp_class = {
@@ -1397,6 +1410,22 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
return svsk;
}
+bool svc_alien_sock(struct net *net, int fd)
+{
+ int err;
+ struct socket *sock = sockfd_lookup(fd, &err);
+ bool ret = false;
+
+ if (!sock)
+ goto out;
+ if (sock_net(sock->sk) != net)
+ ret = true;
+ sockfd_put(sock);
+out:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(svc_alien_sock);
+
/**
* svc_addsock - add a listener socket to an RPC service
* @serv: pointer to RPC service to which to add a new listener
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 1504bb11e4f..23fb4e75e24 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -462,6 +462,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
struct kvec *iov = buf->head;
int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
+ xdr_set_scratch_buffer(xdr, NULL, 0);
BUG_ON(scratch_len < 0);
xdr->buf = buf;
xdr->iov = iov;
@@ -482,6 +483,73 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
EXPORT_SYMBOL_GPL(xdr_init_encode);
/**
+ * xdr_commit_encode - Ensure all data is written to buffer
+ * @xdr: pointer to xdr_stream
+ *
+ * We handle encoding across page boundaries by giving the caller a
+ * temporary location to write to, then later copying the data into
+ * place; xdr_commit_encode does that copying.
+ *
+ * Normally the caller doesn't need to call this directly, as the
+ * following xdr_reserve_space will do it. But an explicit call may be
+ * required at the end of encoding, or any other time when the xdr_buf
+ * data might be read.
+ */
+void xdr_commit_encode(struct xdr_stream *xdr)
+{
+ int shift = xdr->scratch.iov_len;
+ void *page;
+
+ if (shift == 0)
+ return;
+ page = page_address(*xdr->page_ptr);
+ memcpy(xdr->scratch.iov_base, page, shift);
+ memmove(page, page + shift, (void *)xdr->p - page);
+ xdr->scratch.iov_len = 0;
+}
+EXPORT_SYMBOL_GPL(xdr_commit_encode);
+
+__be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes)
+{
+ static __be32 *p;
+ int space_left;
+ int frag1bytes, frag2bytes;
+
+ if (nbytes > PAGE_SIZE)
+ return NULL; /* Bigger buffers require special handling */
+ if (xdr->buf->len + nbytes > xdr->buf->buflen)
+ return NULL; /* Sorry, we're totally out of space */
+ frag1bytes = (xdr->end - xdr->p) << 2;
+ frag2bytes = nbytes - frag1bytes;
+ if (xdr->iov)
+ xdr->iov->iov_len += frag1bytes;
+ else
+ xdr->buf->page_len += frag1bytes;
+ xdr->page_ptr++;
+ xdr->iov = NULL;
+ /*
+ * If the last encode didn't end exactly on a page boundary, the
+ * next one will straddle boundaries. Encode into the next
+ * page, then copy it back later in xdr_commit_encode. We use
+ * the "scratch" iov to track any temporarily unused fragment of
+ * space at the end of the previous buffer:
+ */
+ xdr->scratch.iov_base = xdr->p;
+ xdr->scratch.iov_len = frag1bytes;
+ p = page_address(*xdr->page_ptr);
+ /*
+ * Note this is where the next encode will start after we've
+ * shifted this one back:
+ */
+ xdr->p = (void *)p + frag2bytes;
+ space_left = xdr->buf->buflen - xdr->buf->len;
+ xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE);
+ xdr->buf->page_len += frag2bytes;
+ xdr->buf->len += nbytes;
+ return p;
+}
+
+/**
* xdr_reserve_space - Reserve buffer space for sending
* @xdr: pointer to xdr_stream
* @nbytes: number of bytes to reserve
@@ -495,20 +563,122 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
__be32 *p = xdr->p;
__be32 *q;
+ xdr_commit_encode(xdr);
/* align nbytes on the next 32-bit boundary */
nbytes += 3;
nbytes &= ~3;
q = p + (nbytes >> 2);
if (unlikely(q > xdr->end || q < p))
- return NULL;
+ return xdr_get_next_encode_buffer(xdr, nbytes);
xdr->p = q;
- xdr->iov->iov_len += nbytes;
+ if (xdr->iov)
+ xdr->iov->iov_len += nbytes;
+ else
+ xdr->buf->page_len += nbytes;
xdr->buf->len += nbytes;
return p;
}
EXPORT_SYMBOL_GPL(xdr_reserve_space);
/**
+ * xdr_truncate_encode - truncate an encode buffer
+ * @xdr: pointer to xdr_stream
+ * @len: new length of buffer
+ *
+ * Truncates the xdr stream, so that xdr->buf->len == len,
+ * and xdr->p points at offset len from the start of the buffer, and
+ * head, tail, and page lengths are adjusted to correspond.
+ *
+ * If this means moving xdr->p to a different buffer, we assume that
+ * that the end pointer should be set to the end of the current page,
+ * except in the case of the head buffer when we assume the head
+ * buffer's current length represents the end of the available buffer.
+ *
+ * This is *not* safe to use on a buffer that already has inlined page
+ * cache pages (as in a zero-copy server read reply), except for the
+ * simple case of truncating from one position in the tail to another.
+ *
+ */
+void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
+{
+ struct xdr_buf *buf = xdr->buf;
+ struct kvec *head = buf->head;
+ struct kvec *tail = buf->tail;
+ int fraglen;
+ int new, old;
+
+ if (len > buf->len) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+ xdr_commit_encode(xdr);
+
+ fraglen = min_t(int, buf->len - len, tail->iov_len);
+ tail->iov_len -= fraglen;
+ buf->len -= fraglen;
+ if (tail->iov_len && buf->len == len) {
+ xdr->p = tail->iov_base + tail->iov_len;
+ /* xdr->end, xdr->iov should be set already */
+ return;
+ }
+ WARN_ON_ONCE(fraglen);
+ fraglen = min_t(int, buf->len - len, buf->page_len);
+ buf->page_len -= fraglen;
+ buf->len -= fraglen;
+
+ new = buf->page_base + buf->page_len;
+ old = new + fraglen;
+ xdr->page_ptr -= (old >> PAGE_SHIFT) - (new >> PAGE_SHIFT);
+
+ if (buf->page_len && buf->len == len) {
+ xdr->p = page_address(*xdr->page_ptr);
+ xdr->end = (void *)xdr->p + PAGE_SIZE;
+ xdr->p = (void *)xdr->p + (new % PAGE_SIZE);
+ /* xdr->iov should already be NULL */
+ return;
+ }
+ if (fraglen) {
+ xdr->end = head->iov_base + head->iov_len;
+ xdr->page_ptr--;
+ }
+ /* (otherwise assume xdr->end is already set) */
+ head->iov_len = len;
+ buf->len = len;
+ xdr->p = head->iov_base + head->iov_len;
+ xdr->iov = buf->head;
+}
+EXPORT_SYMBOL(xdr_truncate_encode);
+
+/**
+ * xdr_restrict_buflen - decrease available buffer space
+ * @xdr: pointer to xdr_stream
+ * @newbuflen: new maximum number of bytes available
+ *
+ * Adjust our idea of how much space is available in the buffer.
+ * If we've already used too much space in the buffer, returns -1.
+ * If the available space is already smaller than newbuflen, returns 0
+ * and does nothing. Otherwise, adjusts xdr->buf->buflen to newbuflen
+ * and ensures xdr->end is set at most offset newbuflen from the start
+ * of the buffer.
+ */
+int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen)
+{
+ struct xdr_buf *buf = xdr->buf;
+ int left_in_this_buf = (void *)xdr->end - (void *)xdr->p;
+ int end_offset = buf->len + left_in_this_buf;
+
+ if (newbuflen < 0 || newbuflen < buf->len)
+ return -1;
+ if (newbuflen > buf->buflen)
+ return 0;
+ if (newbuflen < end_offset)
+ xdr->end = (void *)xdr->end + newbuflen - end_offset;
+ buf->buflen = newbuflen;
+ return 0;
+}
+EXPORT_SYMBOL(xdr_restrict_buflen);
+
+/**
* xdr_write_pages - Insert a list of pages into an XDR buffer for sending
* @xdr: pointer to xdr_stream
* @pages: list of pages
@@ -833,8 +1003,20 @@ xdr_buf_from_iov(struct kvec *iov, struct xdr_buf *buf)
}
EXPORT_SYMBOL_GPL(xdr_buf_from_iov);
-/* Sets subbuf to the portion of buf of length len beginning base bytes
- * from the start of buf. Returns -1 if base of length are out of bounds. */
+/**
+ * xdr_buf_subsegment - set subbuf to a portion of buf
+ * @buf: an xdr buffer
+ * @subbuf: the result buffer
+ * @base: beginning of range in bytes
+ * @len: length of range in bytes
+ *
+ * sets @subbuf to an xdr buffer representing the portion of @buf of
+ * length @len starting at offset @base.
+ *
+ * @buf and @subbuf may be pointers to the same struct xdr_buf.
+ *
+ * Returns -1 if base of length are out of bounds.
+ */
int
xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
unsigned int base, unsigned int len)
@@ -847,9 +1029,8 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
len -= subbuf->head[0].iov_len;
base = 0;
} else {
- subbuf->head[0].iov_base = NULL;
- subbuf->head[0].iov_len = 0;
base -= buf->head[0].iov_len;
+ subbuf->head[0].iov_len = 0;
}
if (base < buf->page_len) {
@@ -871,9 +1052,8 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
len -= subbuf->tail[0].iov_len;
base = 0;
} else {
- subbuf->tail[0].iov_base = NULL;
- subbuf->tail[0].iov_len = 0;
base -= buf->tail[0].iov_len;
+ subbuf->tail[0].iov_len = 0;
}
if (base || len)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 7d4df99f761..c3b2b3369e5 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -71,24 +71,6 @@ static void xprt_destroy(struct rpc_xprt *xprt);
static DEFINE_SPINLOCK(xprt_list_lock);
static LIST_HEAD(xprt_list);
-/*
- * The transport code maintains an estimate on the maximum number of out-
- * standing RPC requests, using a smoothed version of the congestion
- * avoidance implemented in 44BSD. This is basically the Van Jacobson
- * congestion algorithm: If a retransmit occurs, the congestion window is
- * halved; otherwise, it is incremented by 1/cwnd when
- *
- * - a reply is received and
- * - a full number of requests are outstanding and
- * - the congestion window hasn't been updated recently.
- */
-#define RPC_CWNDSHIFT (8U)
-#define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT)
-#define RPC_INITCWND RPC_CWNDSCALE
-#define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT)
-
-#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
-
/**
* xprt_register_transport - register a transport implementation
* @transport: transport to register
@@ -230,9 +212,9 @@ static void xprt_clear_locked(struct rpc_xprt *xprt)
{
xprt->snd_task = NULL;
if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(XPRT_LOCKED, &xprt->state);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
} else
queue_work(rpciod_workqueue, &xprt->task_cleanup);
}
@@ -446,7 +428,15 @@ EXPORT_SYMBOL_GPL(xprt_release_rqst_cong);
* @task: recently completed RPC request used to adjust window
* @result: result code of completed RPC request
*
- * We use a time-smoothed congestion estimator to avoid heavy oscillation.
+ * The transport code maintains an estimate on the maximum number of out-
+ * standing RPC requests, using a smoothed version of the congestion
+ * avoidance implemented in 44BSD. This is basically the Van Jacobson
+ * congestion algorithm: If a retransmit occurs, the congestion window is
+ * halved; otherwise, it is incremented by 1/cwnd when
+ *
+ * - a reply is received and
+ * - a full number of requests are outstanding and
+ * - the congestion window hasn't been updated recently.
*/
void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result)
{
@@ -1383,15 +1373,3 @@ void xprt_put(struct rpc_xprt *xprt)
if (atomic_dec_and_test(&xprt->count))
xprt_destroy(xprt);
}
-
-/**
- * xprt_get - return a reference to an RPC transport.
- * @xprt: pointer to the transport
- *
- */
-struct rpc_xprt *xprt_get(struct rpc_xprt *xprt)
-{
- if (atomic_inc_not_zero(&xprt->count))
- return xprt;
- return NULL;
-}
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index 5a8f268bdd3..da5136fd569 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -1,8 +1,8 @@
-obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o
+obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o
xprtrdma-y := transport.o rpc_rdma.o verbs.o
-obj-$(CONFIG_SUNRPC_XPRT_RDMA) += svcrdma.o
+obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o
svcrdma-y := svc_rdma.o svc_rdma_transport.o \
svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index e03725bfe2b..693966d3f33 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -78,8 +78,7 @@ static const char transfertypes[][12] = {
* elements. Segments are then coalesced when registered, if possible
* within the selected memreg mode.
*
- * Note, this routine is never called if the connection's memory
- * registration strategy is 0 (bounce buffers).
+ * Returns positive number of segments converted, or a negative errno.
*/
static int
@@ -102,10 +101,17 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
page_base = xdrbuf->page_base & ~PAGE_MASK;
p = 0;
while (len && n < nsegs) {
+ if (!ppages[p]) {
+ /* alloc the pagelist for receiving buffer */
+ ppages[p] = alloc_page(GFP_ATOMIC);
+ if (!ppages[p])
+ return -ENOMEM;
+ }
seg[n].mr_page = ppages[p];
seg[n].mr_offset = (void *)(unsigned long) page_base;
seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len);
- BUG_ON(seg[n].mr_len > PAGE_SIZE);
+ if (seg[n].mr_len > PAGE_SIZE)
+ return -EIO;
len -= seg[n].mr_len;
++n;
++p;
@@ -114,7 +120,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
/* Message overflows the seg array */
if (len && n == nsegs)
- return 0;
+ return -EIO;
if (xdrbuf->tail[0].iov_len) {
/* the rpcrdma protocol allows us to omit any trailing
@@ -123,7 +129,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
return n;
if (n == nsegs)
/* Tail remains, but we're out of segments */
- return 0;
+ return -EIO;
seg[n].mr_page = NULL;
seg[n].mr_offset = xdrbuf->tail[0].iov_base;
seg[n].mr_len = xdrbuf->tail[0].iov_len;
@@ -164,15 +170,17 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
* Reply chunk (a counted array):
* N elements:
* 1 - N - HLOO - HLOO - ... - HLOO
+ *
+ * Returns positive RPC/RDMA header size, or negative errno.
*/
-static unsigned int
+static ssize_t
rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type)
{
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
- int nsegs, nchunks = 0;
+ int n, nsegs, nchunks = 0;
unsigned int pos;
struct rpcrdma_mr_seg *seg = req->rl_segments;
struct rpcrdma_read_chunk *cur_rchunk = NULL;
@@ -198,12 +206,11 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
pos = target->head[0].iov_len;
nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS);
- if (nsegs == 0)
- return 0;
+ if (nsegs < 0)
+ return nsegs;
do {
- /* bind/register the memory, then build chunk from result. */
- int n = rpcrdma_register_external(seg, nsegs,
+ n = rpcrdma_register_external(seg, nsegs,
cur_wchunk != NULL, r_xprt);
if (n <= 0)
goto out;
@@ -248,10 +255,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
/* success. all failures return above */
req->rl_nchunks = nchunks;
- BUG_ON(nchunks == 0);
- BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
- && (nchunks > 3));
-
/*
* finish off header. If write, marshal discrim and nchunks.
*/
@@ -278,8 +281,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
out:
for (pos = 0; nchunks--;)
pos += rpcrdma_deregister_external(
- &req->rl_segments[pos], r_xprt, NULL);
- return 0;
+ &req->rl_segments[pos], r_xprt);
+ return n;
}
/*
@@ -361,6 +364,8 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
* [1] -- the RPC header/data, marshaled by RPC and the NFS protocol.
* [2] -- optional padding.
* [3] -- if padded, header only in [1] and data here.
+ *
+ * Returns zero on success, otherwise a negative errno.
*/
int
@@ -370,7 +375,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
char *base;
- size_t hdrlen, rpclen, padlen;
+ size_t rpclen, padlen;
+ ssize_t hdrlen;
enum rpcrdma_chunktype rtype, wtype;
struct rpcrdma_msg *headerp;
@@ -441,14 +447,10 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
/* The following simplification is not true forever */
if (rtype != rpcrdma_noch && wtype == rpcrdma_replych)
wtype = rpcrdma_noch;
- BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch);
-
- if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS &&
- (rtype != rpcrdma_noch || wtype != rpcrdma_noch)) {
- /* forced to "pure inline"? */
- dprintk("RPC: %s: too much data (%d/%d) for inline\n",
- __func__, rqst->rq_rcv_buf.len, rqst->rq_snd_buf.len);
- return -1;
+ if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) {
+ dprintk("RPC: %s: cannot marshal multiple chunk lists\n",
+ __func__);
+ return -EIO;
}
hdrlen = 28; /*sizeof *headerp;*/
@@ -474,8 +476,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
hdrlen += 2 * sizeof(u32); /* extra words in padhdr */
- BUG_ON(wtype != rpcrdma_noch);
-
+ if (wtype != rpcrdma_noch) {
+ dprintk("RPC: %s: invalid chunk list\n",
+ __func__);
+ return -EIO;
+ }
} else {
headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero;
headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero;
@@ -492,8 +497,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
* on receive. Therefore, we request a reply chunk
* for non-writes wherever feasible and efficient.
*/
- if (wtype == rpcrdma_noch &&
- r_xprt->rx_ia.ri_memreg_strategy > RPCRDMA_REGISTER)
+ if (wtype == rpcrdma_noch)
wtype = rpcrdma_replych;
}
}
@@ -511,9 +515,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
hdrlen = rpcrdma_create_chunks(rqst,
&rqst->rq_rcv_buf, headerp, wtype);
}
-
- if (hdrlen == 0)
- return -1;
+ if (hdrlen < 0)
+ return hdrlen;
dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd"
" headerp 0x%p base 0x%p lkey 0x%x\n",
@@ -649,9 +652,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
break;
page_base = 0;
}
- rqst->rq_rcv_buf.page_len = olen - copy_len;
- } else
- rqst->rq_rcv_buf.page_len = 0;
+ }
if (copy_len && rqst->rq_rcv_buf.tail[0].iov_len) {
curlen = copy_len;
@@ -682,15 +683,11 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
rqst->rq_private_buf = rqst->rq_rcv_buf;
}
-/*
- * This function is called when an async event is posted to
- * the connection which changes the connection state. All it
- * does at this point is mark the connection up/down, the rpc
- * timers do the rest.
- */
void
-rpcrdma_conn_func(struct rpcrdma_ep *ep)
+rpcrdma_connect_worker(struct work_struct *work)
{
+ struct rpcrdma_ep *ep =
+ container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
struct rpc_xprt *xprt = ep->rep_xprt;
spin_lock_bh(&xprt->transport_lock);
@@ -707,13 +704,15 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep)
}
/*
- * This function is called when memory window unbind which we are waiting
- * for completes. Just use rr_func (zeroed by upcall) to signal completion.
+ * This function is called when an async event is posted to
+ * the connection which changes the connection state. All it
+ * does at this point is mark the connection up/down, the rpc
+ * timers do the rest.
*/
-static void
-rpcrdma_unbind_func(struct rpcrdma_rep *rep)
+void
+rpcrdma_conn_func(struct rpcrdma_ep *ep)
{
- wake_up(&rep->rr_unbind);
+ schedule_delayed_work(&ep->rep_connect_worker, 0);
}
/*
@@ -730,7 +729,8 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
struct rpc_xprt *xprt = rep->rr_xprt;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
__be32 *iptr;
- int i, rdmalen, status;
+ int rdmalen, status;
+ unsigned long cwnd;
/* Check status. If bad, signal disconnect and return rep to pool */
if (rep->rr_len == ~0U) {
@@ -785,6 +785,7 @@ repost:
/* from here on, the reply is no longer an orphan */
req->rl_reply = rep;
+ xprt->reestablish_timeout = 0;
/* check for expected message types */
/* The order of some of these tests is important. */
@@ -859,26 +860,10 @@ badheader:
break;
}
- /* If using mw bind, start the deregister process now. */
- /* (Note: if mr_free(), cannot perform it here, in tasklet context) */
- if (req->rl_nchunks) switch (r_xprt->rx_ia.ri_memreg_strategy) {
- case RPCRDMA_MEMWINDOWS:
- for (i = 0; req->rl_nchunks-- > 1;)
- i += rpcrdma_deregister_external(
- &req->rl_segments[i], r_xprt, NULL);
- /* Optionally wait (not here) for unbinds to complete */
- rep->rr_func = rpcrdma_unbind_func;
- (void) rpcrdma_deregister_external(&req->rl_segments[i],
- r_xprt, rep);
- break;
- case RPCRDMA_MEMWINDOWS_ASYNC:
- for (i = 0; req->rl_nchunks--;)
- i += rpcrdma_deregister_external(&req->rl_segments[i],
- r_xprt, NULL);
- break;
- default:
- break;
- }
+ cwnd = xprt->cwnd;
+ xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
+ if (xprt->cwnd > cwnd)
+ xprt_release_rqst_cong(rqst->rq_task);
dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
__func__, xprt, rqst, status);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 0ce75524ed2..8f92a61ee2d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -69,7 +70,8 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
/* Set up the XDR head */
rqstp->rq_arg.head[0].iov_base = page_address(page);
- rqstp->rq_arg.head[0].iov_len = min(byte_count, ctxt->sge[0].length);
+ rqstp->rq_arg.head[0].iov_len =
+ min_t(size_t, byte_count, ctxt->sge[0].length);
rqstp->rq_arg.len = byte_count;
rqstp->rq_arg.buflen = byte_count;
@@ -85,11 +87,12 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
page = ctxt->pages[sge_no];
put_page(rqstp->rq_pages[sge_no]);
rqstp->rq_pages[sge_no] = page;
- bc -= min(bc, ctxt->sge[sge_no].length);
+ bc -= min_t(u32, bc, ctxt->sge[sge_no].length);
rqstp->rq_arg.buflen += ctxt->sge[sge_no].length;
sge_no++;
}
rqstp->rq_respages = &rqstp->rq_pages[sge_no];
+ rqstp->rq_next_page = rqstp->rq_respages + 1;
/* We should never run out of SGE because the limit is defined to
* support the max allowed RPC data length
@@ -112,289 +115,265 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
rqstp->rq_arg.tail[0].iov_len = 0;
}
-/* Encode a read-chunk-list as an array of IB SGE
- *
- * Assumptions:
- * - chunk[0]->position points to pages[0] at an offset of 0
- * - pages[] is not physically or virtually contiguous and consists of
- * PAGE_SIZE elements.
- *
- * Output:
- * - sge array pointing into pages[] array.
- * - chunk_sge array specifying sge index and count for each
- * chunk in the read list
- *
- */
-static int map_read_chunks(struct svcxprt_rdma *xprt,
- struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *head,
- struct rpcrdma_msg *rmsgp,
- struct svc_rdma_req_map *rpl_map,
- struct svc_rdma_req_map *chl_map,
- int ch_count,
- int byte_count)
+static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
{
- int sge_no;
- int sge_bytes;
- int page_off;
- int page_no;
- int ch_bytes;
- int ch_no;
- struct rpcrdma_read_chunk *ch;
+ if (rdma_node_get_transport(xprt->sc_cm_id->device->node_type) ==
+ RDMA_TRANSPORT_IWARP)
+ return 1;
+ else
+ return min_t(int, sge_count, xprt->sc_max_sge);
+}
- sge_no = 0;
- page_no = 0;
- page_off = 0;
- ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
- ch_no = 0;
- ch_bytes = ntohl(ch->rc_target.rs_length);
- head->arg.head[0] = rqstp->rq_arg.head[0];
- head->arg.tail[0] = rqstp->rq_arg.tail[0];
- head->arg.pages = &head->pages[head->count];
- head->hdr_count = head->count; /* save count of hdr pages */
- head->arg.page_base = 0;
- head->arg.page_len = ch_bytes;
- head->arg.len = rqstp->rq_arg.len + ch_bytes;
- head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes;
- head->count++;
- chl_map->ch[0].start = 0;
- while (byte_count) {
- rpl_map->sge[sge_no].iov_base =
- page_address(rqstp->rq_arg.pages[page_no]) + page_off;
- sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes);
- rpl_map->sge[sge_no].iov_len = sge_bytes;
- /*
- * Don't bump head->count here because the same page
- * may be used by multiple SGE.
- */
- head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
- rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
+typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt,
+ struct svc_rqst *rqstp,
+ struct svc_rdma_op_ctxt *head,
+ int *page_no,
+ u32 *page_offset,
+ u32 rs_handle,
+ u32 rs_length,
+ u64 rs_offset,
+ int last);
+
+/* Issue an RDMA_READ using the local lkey to map the data sink */
+static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
+ struct svc_rqst *rqstp,
+ struct svc_rdma_op_ctxt *head,
+ int *page_no,
+ u32 *page_offset,
+ u32 rs_handle,
+ u32 rs_length,
+ u64 rs_offset,
+ int last)
+{
+ struct ib_send_wr read_wr;
+ int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
+ struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
+ int ret, read, pno;
+ u32 pg_off = *page_offset;
+ u32 pg_no = *page_no;
- byte_count -= sge_bytes;
- ch_bytes -= sge_bytes;
- sge_no++;
- /*
- * If all bytes for this chunk have been mapped to an
- * SGE, move to the next SGE
- */
- if (ch_bytes == 0) {
- chl_map->ch[ch_no].count =
- sge_no - chl_map->ch[ch_no].start;
- ch_no++;
- ch++;
- chl_map->ch[ch_no].start = sge_no;
- ch_bytes = ntohl(ch->rc_target.rs_length);
- /* If bytes remaining account for next chunk */
- if (byte_count) {
- head->arg.page_len += ch_bytes;
- head->arg.len += ch_bytes;
- head->arg.buflen += ch_bytes;
- }
+ ctxt->direction = DMA_FROM_DEVICE;
+ ctxt->read_hdr = head;
+ pages_needed =
+ min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed));
+ read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
+
+ for (pno = 0; pno < pages_needed; pno++) {
+ int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
+
+ head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
+ head->arg.page_len += len;
+ head->arg.len += len;
+ if (!pg_off)
+ head->count++;
+ rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
+ rqstp->rq_next_page = rqstp->rq_respages + 1;
+ ctxt->sge[pno].addr =
+ ib_dma_map_page(xprt->sc_cm_id->device,
+ head->arg.pages[pg_no], pg_off,
+ PAGE_SIZE - pg_off,
+ DMA_FROM_DEVICE);
+ ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
+ ctxt->sge[pno].addr);
+ if (ret)
+ goto err;
+ atomic_inc(&xprt->sc_dma_used);
+
+ /* The lkey here is either a local dma lkey or a dma_mr lkey */
+ ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
+ ctxt->sge[pno].length = len;
+ ctxt->count++;
+
+ /* adjust offset and wrap to next page if needed */
+ pg_off += len;
+ if (pg_off == PAGE_SIZE) {
+ pg_off = 0;
+ pg_no++;
}
- /*
- * If this SGE consumed all of the page, move to the
- * next page
- */
- if ((sge_bytes + page_off) == PAGE_SIZE) {
- page_no++;
- page_off = 0;
- /*
- * If there are still bytes left to map, bump
- * the page count
- */
- if (byte_count)
- head->count++;
- } else
- page_off += sge_bytes;
+ rs_length -= len;
}
- BUG_ON(byte_count != 0);
- return sge_no;
+
+ if (last && rs_length == 0)
+ set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+ else
+ clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+
+ memset(&read_wr, 0, sizeof(read_wr));
+ read_wr.wr_id = (unsigned long)ctxt;
+ read_wr.opcode = IB_WR_RDMA_READ;
+ ctxt->wr_op = read_wr.opcode;
+ read_wr.send_flags = IB_SEND_SIGNALED;
+ read_wr.wr.rdma.rkey = rs_handle;
+ read_wr.wr.rdma.remote_addr = rs_offset;
+ read_wr.sg_list = ctxt->sge;
+ read_wr.num_sge = pages_needed;
+
+ ret = svc_rdma_send(xprt, &read_wr);
+ if (ret) {
+ pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
+ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+ goto err;
+ }
+
+ /* return current location in page array */
+ *page_no = pg_no;
+ *page_offset = pg_off;
+ ret = read;
+ atomic_inc(&rdma_stat_read);
+ return ret;
+ err:
+ svc_rdma_unmap_dma(ctxt);
+ svc_rdma_put_context(ctxt, 0);
+ return ret;
}
-/* Map a read-chunk-list to an XDR and fast register the page-list.
- *
- * Assumptions:
- * - chunk[0] position points to pages[0] at an offset of 0
- * - pages[] will be made physically contiguous by creating a one-off memory
- * region using the fastreg verb.
- * - byte_count is # of bytes in read-chunk-list
- * - ch_count is # of chunks in read-chunk-list
- *
- * Output:
- * - sge array pointing into pages[] array.
- * - chunk_sge array specifying sge index and count for each
- * chunk in the read list
- */
-static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
+/* Issue an RDMA_READ using an FRMR to map the data sink */
+static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head,
- struct rpcrdma_msg *rmsgp,
- struct svc_rdma_req_map *rpl_map,
- struct svc_rdma_req_map *chl_map,
- int ch_count,
- int byte_count)
+ int *page_no,
+ u32 *page_offset,
+ u32 rs_handle,
+ u32 rs_length,
+ u64 rs_offset,
+ int last)
{
- int page_no;
- int ch_no;
- u32 offset;
- struct rpcrdma_read_chunk *ch;
- struct svc_rdma_fastreg_mr *frmr;
- int ret = 0;
+ struct ib_send_wr read_wr;
+ struct ib_send_wr inv_wr;
+ struct ib_send_wr fastreg_wr;
+ u8 key;
+ int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
+ struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
+ struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
+ int ret, read, pno;
+ u32 pg_off = *page_offset;
+ u32 pg_no = *page_no;
- frmr = svc_rdma_get_frmr(xprt);
if (IS_ERR(frmr))
return -ENOMEM;
- head->frmr = frmr;
- head->arg.head[0] = rqstp->rq_arg.head[0];
- head->arg.tail[0] = rqstp->rq_arg.tail[0];
- head->arg.pages = &head->pages[head->count];
- head->hdr_count = head->count; /* save count of hdr pages */
- head->arg.page_base = 0;
- head->arg.page_len = byte_count;
- head->arg.len = rqstp->rq_arg.len + byte_count;
- head->arg.buflen = rqstp->rq_arg.buflen + byte_count;
+ ctxt->direction = DMA_FROM_DEVICE;
+ ctxt->frmr = frmr;
+ pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len);
+ read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
- /* Fast register the page list */
- frmr->kva = page_address(rqstp->rq_arg.pages[0]);
+ frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
frmr->direction = DMA_FROM_DEVICE;
frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
- frmr->map_len = byte_count;
- frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
- for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
- frmr->page_list->page_list[page_no] =
+ frmr->map_len = pages_needed << PAGE_SHIFT;
+ frmr->page_list_len = pages_needed;
+
+ for (pno = 0; pno < pages_needed; pno++) {
+ int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
+
+ head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
+ head->arg.page_len += len;
+ head->arg.len += len;
+ if (!pg_off)
+ head->count++;
+ rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
+ rqstp->rq_next_page = rqstp->rq_respages + 1;
+ frmr->page_list->page_list[pno] =
ib_dma_map_page(xprt->sc_cm_id->device,
- rqstp->rq_arg.pages[page_no], 0,
+ head->arg.pages[pg_no], 0,
PAGE_SIZE, DMA_FROM_DEVICE);
- if (ib_dma_mapping_error(xprt->sc_cm_id->device,
- frmr->page_list->page_list[page_no]))
- goto fatal_err;
+ ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
+ frmr->page_list->page_list[pno]);
+ if (ret)
+ goto err;
atomic_inc(&xprt->sc_dma_used);
- head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
- }
- head->count += page_no;
- /* rq_respages points one past arg pages */
- rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
-
- /* Create the reply and chunk maps */
- offset = 0;
- ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
- for (ch_no = 0; ch_no < ch_count; ch_no++) {
- int len = ntohl(ch->rc_target.rs_length);
- rpl_map->sge[ch_no].iov_base = frmr->kva + offset;
- rpl_map->sge[ch_no].iov_len = len;
- chl_map->ch[ch_no].count = 1;
- chl_map->ch[ch_no].start = ch_no;
- offset += len;
- ch++;
+ /* adjust offset and wrap to next page if needed */
+ pg_off += len;
+ if (pg_off == PAGE_SIZE) {
+ pg_off = 0;
+ pg_no++;
+ }
+ rs_length -= len;
}
- ret = svc_rdma_fastreg(xprt, frmr);
- if (ret)
- goto fatal_err;
-
- return ch_no;
-
- fatal_err:
- printk("svcrdma: error fast registering xdr for xprt %p", xprt);
- svc_rdma_put_frmr(xprt, frmr);
- return -EIO;
-}
-
-static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
- struct svc_rdma_op_ctxt *ctxt,
- struct svc_rdma_fastreg_mr *frmr,
- struct kvec *vec,
- u64 *sgl_offset,
- int count)
-{
- int i;
- unsigned long off;
+ if (last && rs_length == 0)
+ set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+ else
+ clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
- ctxt->count = count;
- ctxt->direction = DMA_FROM_DEVICE;
- for (i = 0; i < count; i++) {
- ctxt->sge[i].length = 0; /* in case map fails */
- if (!frmr) {
- BUG_ON(!virt_to_page(vec[i].iov_base));
- off = (unsigned long)vec[i].iov_base & ~PAGE_MASK;
- ctxt->sge[i].addr =
- ib_dma_map_page(xprt->sc_cm_id->device,
- virt_to_page(vec[i].iov_base),
- off,
- vec[i].iov_len,
- DMA_FROM_DEVICE);
- if (ib_dma_mapping_error(xprt->sc_cm_id->device,
- ctxt->sge[i].addr))
- return -EINVAL;
- ctxt->sge[i].lkey = xprt->sc_dma_lkey;
- atomic_inc(&xprt->sc_dma_used);
- } else {
- ctxt->sge[i].addr = (unsigned long)vec[i].iov_base;
- ctxt->sge[i].lkey = frmr->mr->lkey;
- }
- ctxt->sge[i].length = vec[i].iov_len;
- *sgl_offset = *sgl_offset + vec[i].iov_len;
+ /* Bump the key */
+ key = (u8)(frmr->mr->lkey & 0x000000FF);
+ ib_update_fast_reg_key(frmr->mr, ++key);
+
+ ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset;
+ ctxt->sge[0].lkey = frmr->mr->lkey;
+ ctxt->sge[0].length = read;
+ ctxt->count = 1;
+ ctxt->read_hdr = head;
+
+ /* Prepare FASTREG WR */
+ memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+ fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+ fastreg_wr.send_flags = IB_SEND_SIGNALED;
+ fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
+ fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
+ fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
+ fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+ fastreg_wr.wr.fast_reg.length = frmr->map_len;
+ fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
+ fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
+ fastreg_wr.next = &read_wr;
+
+ /* Prepare RDMA_READ */
+ memset(&read_wr, 0, sizeof(read_wr));
+ read_wr.send_flags = IB_SEND_SIGNALED;
+ read_wr.wr.rdma.rkey = rs_handle;
+ read_wr.wr.rdma.remote_addr = rs_offset;
+ read_wr.sg_list = ctxt->sge;
+ read_wr.num_sge = 1;
+ if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
+ read_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
+ read_wr.wr_id = (unsigned long)ctxt;
+ read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
+ } else {
+ read_wr.opcode = IB_WR_RDMA_READ;
+ read_wr.next = &inv_wr;
+ /* Prepare invalidate */
+ memset(&inv_wr, 0, sizeof(inv_wr));
+ inv_wr.wr_id = (unsigned long)ctxt;
+ inv_wr.opcode = IB_WR_LOCAL_INV;
+ inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
+ inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
+ }
+ ctxt->wr_op = read_wr.opcode;
+
+ /* Post the chain */
+ ret = svc_rdma_send(xprt, &fastreg_wr);
+ if (ret) {
+ pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
+ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+ goto err;
}
- return 0;
-}
-static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
-{
- if ((rdma_node_get_transport(xprt->sc_cm_id->device->node_type) ==
- RDMA_TRANSPORT_IWARP) &&
- sge_count > 1)
- return 1;
- else
- return min_t(int, sge_count, xprt->sc_max_sge);
+ /* return current location in page array */
+ *page_no = pg_no;
+ *page_offset = pg_off;
+ ret = read;
+ atomic_inc(&rdma_stat_read);
+ return ret;
+ err:
+ svc_rdma_unmap_dma(ctxt);
+ svc_rdma_put_context(ctxt, 0);
+ svc_rdma_put_frmr(xprt, frmr);
+ return ret;
}
-/*
- * Use RDMA_READ to read data from the advertised client buffer into the
- * XDR stream starting at rq_arg.head[0].iov_base.
- * Each chunk in the array
- * contains the following fields:
- * discrim - '1', This isn't used for data placement
- * position - The xdr stream offset (the same for every chunk)
- * handle - RMR for client memory region
- * length - data transfer length
- * offset - 64 bit tagged offset in remote memory region
- *
- * On our side, we need to read into a pagelist. The first page immediately
- * follows the RPC header.
- *
- * This function returns:
- * 0 - No error and no read-list found.
- *
- * 1 - Successful read-list processing. The data is not yet in
- * the pagelist and therefore the RPC request must be deferred. The
- * I/O completion will enqueue the transport again and
- * svc_rdma_recvfrom will complete the request.
- *
- * <0 - Error processing/posting read-list.
- *
- * NOTE: The ctxt must not be touched after the last WR has been posted
- * because the I/O completion processing may occur on another
- * processor and free / modify the context. Ne touche pas!
- */
-static int rdma_read_xdr(struct svcxprt_rdma *xprt,
- struct rpcrdma_msg *rmsgp,
- struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *hdr_ctxt)
+static int rdma_read_chunks(struct svcxprt_rdma *xprt,
+ struct rpcrdma_msg *rmsgp,
+ struct svc_rqst *rqstp,
+ struct svc_rdma_op_ctxt *head)
{
- struct ib_send_wr read_wr;
- struct ib_send_wr inv_wr;
- int err = 0;
- int ch_no;
- int ch_count;
- int byte_count;
- int sge_count;
- u64 sgl_offset;
+ int page_no, ch_count, ret;
struct rpcrdma_read_chunk *ch;
- struct svc_rdma_op_ctxt *ctxt = NULL;
- struct svc_rdma_req_map *rpl_map;
- struct svc_rdma_req_map *chl_map;
+ u32 page_offset, byte_count;
+ u64 rs_offset;
+ rdma_reader_fn reader;
/* If no read list is present, return 0 */
ch = svc_rdma_get_read_chunk(rmsgp);
@@ -405,129 +384,55 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
if (ch_count > RPCSVC_MAXPAGES)
return -EINVAL;
- /* Allocate temporary reply and chunk maps */
- rpl_map = svc_rdma_get_req_map();
- chl_map = svc_rdma_get_req_map();
+ /* The request is completed when the RDMA_READs complete. The
+ * head context keeps all the pages that comprise the
+ * request.
+ */
+ head->arg.head[0] = rqstp->rq_arg.head[0];
+ head->arg.tail[0] = rqstp->rq_arg.tail[0];
+ head->arg.pages = &head->pages[head->count];
+ head->hdr_count = head->count;
+ head->arg.page_base = 0;
+ head->arg.page_len = 0;
+ head->arg.len = rqstp->rq_arg.len;
+ head->arg.buflen = rqstp->rq_arg.buflen;
- if (!xprt->sc_frmr_pg_list_len)
- sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
- rpl_map, chl_map, ch_count,
- byte_count);
+ /* Use FRMR if supported */
+ if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)
+ reader = rdma_read_chunk_frmr;
else
- sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
- rpl_map, chl_map, ch_count,
- byte_count);
- if (sge_count < 0) {
- err = -EIO;
- goto out;
- }
-
- sgl_offset = 0;
- ch_no = 0;
+ reader = rdma_read_chunk_lcl;
+ page_no = 0; page_offset = 0;
for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
- ch->rc_discrim != 0; ch++, ch_no++) {
- u64 rs_offset;
-next_sge:
- ctxt = svc_rdma_get_context(xprt);
- ctxt->direction = DMA_FROM_DEVICE;
- ctxt->frmr = hdr_ctxt->frmr;
- ctxt->read_hdr = NULL;
- clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
- clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
+ ch->rc_discrim != 0; ch++) {
- /* Prepare READ WR */
- memset(&read_wr, 0, sizeof read_wr);
- read_wr.wr_id = (unsigned long)ctxt;
- read_wr.opcode = IB_WR_RDMA_READ;
- ctxt->wr_op = read_wr.opcode;
- read_wr.send_flags = IB_SEND_SIGNALED;
- read_wr.wr.rdma.rkey = ntohl(ch->rc_target.rs_handle);
xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset,
&rs_offset);
- read_wr.wr.rdma.remote_addr = rs_offset + sgl_offset;
- read_wr.sg_list = ctxt->sge;
- read_wr.num_sge =
- rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
- err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr,
- &rpl_map->sge[chl_map->ch[ch_no].start],
- &sgl_offset,
- read_wr.num_sge);
- if (err) {
- svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_context(ctxt, 0);
- goto out;
- }
- if (((ch+1)->rc_discrim == 0) &&
- (read_wr.num_sge == chl_map->ch[ch_no].count)) {
- /*
- * Mark the last RDMA_READ with a bit to
- * indicate all RPC data has been fetched from
- * the client and the RPC needs to be enqueued.
- */
- set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
- if (hdr_ctxt->frmr) {
- set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
- /*
- * Invalidate the local MR used to map the data
- * sink.
- */
- if (xprt->sc_dev_caps &
- SVCRDMA_DEVCAP_READ_W_INV) {
- read_wr.opcode =
- IB_WR_RDMA_READ_WITH_INV;
- ctxt->wr_op = read_wr.opcode;
- read_wr.ex.invalidate_rkey =
- ctxt->frmr->mr->lkey;
- } else {
- /* Prepare INVALIDATE WR */
- memset(&inv_wr, 0, sizeof inv_wr);
- inv_wr.opcode = IB_WR_LOCAL_INV;
- inv_wr.send_flags = IB_SEND_SIGNALED;
- inv_wr.ex.invalidate_rkey =
- hdr_ctxt->frmr->mr->lkey;
- read_wr.next = &inv_wr;
- }
- }
- ctxt->read_hdr = hdr_ctxt;
+ byte_count = ntohl(ch->rc_target.rs_length);
+
+ while (byte_count > 0) {
+ ret = reader(xprt, rqstp, head,
+ &page_no, &page_offset,
+ ntohl(ch->rc_target.rs_handle),
+ byte_count, rs_offset,
+ ((ch+1)->rc_discrim == 0) /* last */
+ );
+ if (ret < 0)
+ goto err;
+ byte_count -= ret;
+ rs_offset += ret;
+ head->arg.buflen += ret;
}
- /* Post the read */
- err = svc_rdma_send(xprt, &read_wr);
- if (err) {
- printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
- err);
- set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
- svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_context(ctxt, 0);
- goto out;
- }
- atomic_inc(&rdma_stat_read);
-
- if (read_wr.num_sge < chl_map->ch[ch_no].count) {
- chl_map->ch[ch_no].count -= read_wr.num_sge;
- chl_map->ch[ch_no].start += read_wr.num_sge;
- goto next_sge;
- }
- sgl_offset = 0;
- err = 1;
}
-
- out:
- svc_rdma_put_req_map(rpl_map);
- svc_rdma_put_req_map(chl_map);
-
+ ret = 1;
+ err:
/* Detach arg pages. svc_recv will replenish them */
- for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++)
- rqstp->rq_pages[ch_no] = NULL;
-
- /*
- * Detach res pages. If svc_release sees any it will attempt to
- * put them.
- */
- while (rqstp->rq_next_page != rqstp->rq_respages)
- *(--rqstp->rq_next_page) = NULL;
+ for (page_no = 0;
+ &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++)
+ rqstp->rq_pages[page_no] = NULL;
- return err;
+ return ret;
}
static int rdma_read_complete(struct svc_rqst *rqstp,
@@ -550,7 +455,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
/* rq_respages starts after the last arg page */
rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
- rqstp->rq_next_page = &rqstp->rq_arg.pages[page_no];
+ rqstp->rq_next_page = rqstp->rq_respages + 1;
/* Rebuild rq_arg head and tail. */
rqstp->rq_arg.head[0] = head->arg.head[0];
@@ -599,13 +504,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
struct svc_rdma_op_ctxt,
dto_q);
list_del_init(&ctxt->dto_q);
- }
- if (ctxt) {
spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
return rdma_read_complete(rqstp, ctxt);
- }
-
- if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
+ } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
struct svc_rdma_op_ctxt,
dto_q);
@@ -625,7 +526,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
goto close_out;
- BUG_ON(ret);
goto out;
}
dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
@@ -648,12 +548,11 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
}
/* Read read-list data. */
- ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt);
+ ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
if (ret > 0) {
/* read-list posted, defer until data received from client. */
goto defer;
- }
- if (ret < 0) {
+ } else if (ret < 0) {
/* Post of read-list failed, free context. */
svc_rdma_put_context(ctxt, 1);
return 0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index c1d124dc772..49fd21a5c21 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -49,152 +50,6 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
-/* Encode an XDR as an array of IB SGE
- *
- * Assumptions:
- * - head[0] is physically contiguous.
- * - tail[0] is physically contiguous.
- * - pages[] is not physically or virtually contiguous and consists of
- * PAGE_SIZE elements.
- *
- * Output:
- * SGE[0] reserved for RCPRDMA header
- * SGE[1] data from xdr->head[]
- * SGE[2..sge_count-2] data from xdr->pages[]
- * SGE[sge_count-1] data from xdr->tail.
- *
- * The max SGE we need is the length of the XDR / pagesize + one for
- * head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES
- * reserves a page for both the request and the reply header, and this
- * array is only concerned with the reply we are assured that we have
- * on extra page for the RPCRMDA header.
- */
-static int fast_reg_xdr(struct svcxprt_rdma *xprt,
- struct xdr_buf *xdr,
- struct svc_rdma_req_map *vec)
-{
- int sge_no;
- u32 sge_bytes;
- u32 page_bytes;
- u32 page_off;
- int page_no = 0;
- u8 *frva;
- struct svc_rdma_fastreg_mr *frmr;
-
- frmr = svc_rdma_get_frmr(xprt);
- if (IS_ERR(frmr))
- return -ENOMEM;
- vec->frmr = frmr;
-
- /* Skip the RPCRDMA header */
- sge_no = 1;
-
- /* Map the head. */
- frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK);
- vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
- vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
- vec->count = 2;
- sge_no++;
-
- /* Map the XDR head */
- frmr->kva = frva;
- frmr->direction = DMA_TO_DEVICE;
- frmr->access_flags = 0;
- frmr->map_len = PAGE_SIZE;
- frmr->page_list_len = 1;
- page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
- frmr->page_list->page_list[page_no] =
- ib_dma_map_page(xprt->sc_cm_id->device,
- virt_to_page(xdr->head[0].iov_base),
- page_off,
- PAGE_SIZE - page_off,
- DMA_TO_DEVICE);
- if (ib_dma_mapping_error(xprt->sc_cm_id->device,
- frmr->page_list->page_list[page_no]))
- goto fatal_err;
- atomic_inc(&xprt->sc_dma_used);
-
- /* Map the XDR page list */
- page_off = xdr->page_base;
- page_bytes = xdr->page_len + page_off;
- if (!page_bytes)
- goto encode_tail;
-
- /* Map the pages */
- vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
- vec->sge[sge_no].iov_len = page_bytes;
- sge_no++;
- while (page_bytes) {
- struct page *page;
-
- page = xdr->pages[page_no++];
- sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
- page_bytes -= sge_bytes;
-
- frmr->page_list->page_list[page_no] =
- ib_dma_map_page(xprt->sc_cm_id->device,
- page, page_off,
- sge_bytes, DMA_TO_DEVICE);
- if (ib_dma_mapping_error(xprt->sc_cm_id->device,
- frmr->page_list->page_list[page_no]))
- goto fatal_err;
-
- atomic_inc(&xprt->sc_dma_used);
- page_off = 0; /* reset for next time through loop */
- frmr->map_len += PAGE_SIZE;
- frmr->page_list_len++;
- }
- vec->count++;
-
- encode_tail:
- /* Map tail */
- if (0 == xdr->tail[0].iov_len)
- goto done;
-
- vec->count++;
- vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
-
- if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) ==
- ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) {
- /*
- * If head and tail use the same page, we don't need
- * to map it again.
- */
- vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
- } else {
- void *va;
-
- /* Map another page for the tail */
- page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
- va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK);
- vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
-
- frmr->page_list->page_list[page_no] =
- ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va),
- page_off,
- PAGE_SIZE,
- DMA_TO_DEVICE);
- if (ib_dma_mapping_error(xprt->sc_cm_id->device,
- frmr->page_list->page_list[page_no]))
- goto fatal_err;
- atomic_inc(&xprt->sc_dma_used);
- frmr->map_len += PAGE_SIZE;
- frmr->page_list_len++;
- }
-
- done:
- if (svc_rdma_fastreg(xprt, frmr))
- goto fatal_err;
-
- return 0;
-
- fatal_err:
- printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
- vec->frmr = NULL;
- svc_rdma_put_frmr(xprt, frmr);
- return -EIO;
-}
-
static int map_xdr(struct svcxprt_rdma *xprt,
struct xdr_buf *xdr,
struct svc_rdma_req_map *vec)
@@ -208,9 +63,6 @@ static int map_xdr(struct svcxprt_rdma *xprt,
BUG_ON(xdr->len !=
(xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
- if (xprt->sc_frmr_pg_list_len)
- return fast_reg_xdr(xprt, xdr, vec);
-
/* Skip the first sge, this is for the RPCRDMA header */
sge_no = 1;
@@ -265,6 +117,7 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
xdr_off -= xdr->head[0].iov_len;
if (xdr_off < xdr->page_len) {
/* This offset is in the page list */
+ xdr_off += xdr->page_base;
page = xdr->pages[xdr_off >> PAGE_SHIFT];
xdr_off &= ~PAGE_MASK;
} else {
@@ -281,8 +134,6 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
}
/* Assumptions:
- * - We are using FRMR
- * - or -
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
*/
static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
@@ -326,23 +177,16 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
sge_bytes = min_t(size_t,
bc, vec->sge[xdr_sge_no].iov_len-sge_off);
sge[sge_no].length = sge_bytes;
- if (!vec->frmr) {
- sge[sge_no].addr =
- dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
- sge_bytes, DMA_TO_DEVICE);
- xdr_off += sge_bytes;
- if (ib_dma_mapping_error(xprt->sc_cm_id->device,
- sge[sge_no].addr))
- goto err;
- atomic_inc(&xprt->sc_dma_used);
- sge[sge_no].lkey = xprt->sc_dma_lkey;
- } else {
- sge[sge_no].addr = (unsigned long)
- vec->sge[xdr_sge_no].iov_base + sge_off;
- sge[sge_no].lkey = vec->frmr->mr->lkey;
- }
+ sge[sge_no].addr =
+ dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
+ sge_bytes, DMA_TO_DEVICE);
+ xdr_off += sge_bytes;
+ if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+ sge[sge_no].addr))
+ goto err;
+ atomic_inc(&xprt->sc_dma_used);
+ sge[sge_no].lkey = xprt->sc_dma_lkey;
ctxt->count++;
- ctxt->frmr = vec->frmr;
sge_off = 0;
sge_no++;
xdr_sge_no++;
@@ -368,7 +212,6 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
return 0;
err:
svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_frmr(xprt, vec->frmr);
svc_rdma_put_context(ctxt, 0);
/* Fatal error, close transport */
return -EIO;
@@ -396,10 +239,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[1];
- if (vec->frmr)
- max_write = vec->frmr->map_len;
- else
- max_write = xprt->sc_max_sge * PAGE_SIZE;
+ max_write = xprt->sc_max_sge * PAGE_SIZE;
/* Write chunks start at the pagelist */
for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
@@ -471,10 +311,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[2];
- if (vec->frmr)
- max_write = vec->frmr->map_len;
- else
- max_write = xprt->sc_max_sge * PAGE_SIZE;
+ max_write = xprt->sc_max_sge * PAGE_SIZE;
/* xdr offset starts at RPC message */
nchunks = ntohl(arg_ary->wc_nchunks);
@@ -544,7 +381,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
int byte_count)
{
struct ib_send_wr send_wr;
- struct ib_send_wr inv_wr;
int sge_no;
int sge_bytes;
int page_no;
@@ -558,7 +394,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
"svcrdma: could not post a receive buffer, err=%d."
"Closing transport %p.\n", ret, rdma);
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
- svc_rdma_put_frmr(rdma, vec->frmr);
svc_rdma_put_context(ctxt, 0);
return -ENOTCONN;
}
@@ -566,11 +401,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
/* Prepare the context */
ctxt->pages[0] = page;
ctxt->count = 1;
- ctxt->frmr = vec->frmr;
- if (vec->frmr)
- set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
- else
- clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
/* Prepare the SGE for the RPCRDMA Header */
ctxt->sge[0].lkey = rdma->sc_dma_lkey;
@@ -589,21 +419,15 @@ static int send_reply(struct svcxprt_rdma *rdma,
int xdr_off = 0;
sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
byte_count -= sge_bytes;
- if (!vec->frmr) {
- ctxt->sge[sge_no].addr =
- dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
- sge_bytes, DMA_TO_DEVICE);
- xdr_off += sge_bytes;
- if (ib_dma_mapping_error(rdma->sc_cm_id->device,
- ctxt->sge[sge_no].addr))
- goto err;
- atomic_inc(&rdma->sc_dma_used);
- ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
- } else {
- ctxt->sge[sge_no].addr = (unsigned long)
- vec->sge[sge_no].iov_base;
- ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey;
- }
+ ctxt->sge[sge_no].addr =
+ dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
+ sge_bytes, DMA_TO_DEVICE);
+ xdr_off += sge_bytes;
+ if (ib_dma_mapping_error(rdma->sc_cm_id->device,
+ ctxt->sge[sge_no].addr))
+ goto err;
+ atomic_inc(&rdma->sc_dma_used);
+ ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
ctxt->sge[sge_no].length = sge_bytes;
}
BUG_ON(byte_count != 0);
@@ -625,6 +449,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
if (page_no+1 >= sge_no)
ctxt->sge[page_no+1].length = 0;
}
+ rqstp->rq_next_page = rqstp->rq_respages + 1;
+
BUG_ON(sge_no > rdma->sc_max_sge);
memset(&send_wr, 0, sizeof send_wr);
ctxt->wr_op = IB_WR_SEND;
@@ -633,15 +459,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
send_wr.num_sge = sge_no;
send_wr.opcode = IB_WR_SEND;
send_wr.send_flags = IB_SEND_SIGNALED;
- if (vec->frmr) {
- /* Prepare INVALIDATE WR */
- memset(&inv_wr, 0, sizeof inv_wr);
- inv_wr.opcode = IB_WR_LOCAL_INV;
- inv_wr.send_flags = IB_SEND_SIGNALED;
- inv_wr.ex.invalidate_rkey =
- vec->frmr->mr->lkey;
- send_wr.next = &inv_wr;
- }
ret = svc_rdma_send(rdma, &send_wr);
if (ret)
@@ -651,7 +468,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
err:
svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_frmr(rdma, vec->frmr);
svc_rdma_put_context(ctxt, 1);
return -EIO;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 62e4f9bcc38..e7323fbbd34 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -65,6 +66,7 @@ static void dto_tasklet_func(unsigned long data);
static void svc_rdma_detach(struct svc_xprt *xprt);
static void svc_rdma_free(struct svc_xprt *xprt);
static int svc_rdma_has_wspace(struct svc_xprt *xprt);
+static int svc_rdma_secure_port(struct svc_rqst *);
static void rq_cq_reap(struct svcxprt_rdma *xprt);
static void sq_cq_reap(struct svcxprt_rdma *xprt);
@@ -82,6 +84,7 @@ static struct svc_xprt_ops svc_rdma_ops = {
.xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
.xpo_has_wspace = svc_rdma_has_wspace,
.xpo_accept = svc_rdma_accept,
+ .xpo_secure_port = svc_rdma_secure_port,
};
struct svc_xprt_class svc_rdma_class = {
@@ -160,7 +163,6 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void)
schedule_timeout_uninterruptible(msecs_to_jiffies(500));
}
map->count = 0;
- map->frmr = NULL;
return map;
}
@@ -336,22 +338,21 @@ static void process_context(struct svcxprt_rdma *xprt,
switch (ctxt->wr_op) {
case IB_WR_SEND:
- if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
- svc_rdma_put_frmr(xprt, ctxt->frmr);
+ BUG_ON(ctxt->frmr);
svc_rdma_put_context(ctxt, 1);
break;
case IB_WR_RDMA_WRITE:
+ BUG_ON(ctxt->frmr);
svc_rdma_put_context(ctxt, 0);
break;
case IB_WR_RDMA_READ:
case IB_WR_RDMA_READ_WITH_INV:
+ svc_rdma_put_frmr(xprt, ctxt->frmr);
if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
BUG_ON(!read_hdr);
- if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
- svc_rdma_put_frmr(xprt, ctxt->frmr);
spin_lock_bh(&xprt->sc_rq_dto_lock);
set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
list_add_tail(&read_hdr->dto_q,
@@ -363,6 +364,7 @@ static void process_context(struct svcxprt_rdma *xprt,
break;
default:
+ BUG_ON(1);
printk(KERN_ERR "svcrdma: unexpected completion type, "
"opcode=%d\n",
ctxt->wr_op);
@@ -378,29 +380,42 @@ static void process_context(struct svcxprt_rdma *xprt,
static void sq_cq_reap(struct svcxprt_rdma *xprt)
{
struct svc_rdma_op_ctxt *ctxt = NULL;
- struct ib_wc wc;
+ struct ib_wc wc_a[6];
+ struct ib_wc *wc;
struct ib_cq *cq = xprt->sc_sq_cq;
int ret;
+ memset(wc_a, 0, sizeof(wc_a));
+
if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
return;
ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
atomic_inc(&rdma_stat_sq_poll);
- while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
- if (wc.status != IB_WC_SUCCESS)
- /* Close the transport */
- set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+ while ((ret = ib_poll_cq(cq, ARRAY_SIZE(wc_a), wc_a)) > 0) {
+ int i;
- /* Decrement used SQ WR count */
- atomic_dec(&xprt->sc_sq_count);
- wake_up(&xprt->sc_send_wait);
+ for (i = 0; i < ret; i++) {
+ wc = &wc_a[i];
+ if (wc->status != IB_WC_SUCCESS) {
+ dprintk("svcrdma: sq wc err status %d\n",
+ wc->status);
- ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
- if (ctxt)
- process_context(xprt, ctxt);
+ /* Close the transport */
+ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+ }
- svc_xprt_put(&xprt->sc_xprt);
+ /* Decrement used SQ WR count */
+ atomic_dec(&xprt->sc_sq_count);
+ wake_up(&xprt->sc_send_wait);
+
+ ctxt = (struct svc_rdma_op_ctxt *)
+ (unsigned long)wc->wr_id;
+ if (ctxt)
+ process_context(xprt, ctxt);
+
+ svc_xprt_put(&xprt->sc_xprt);
+ }
}
if (ctxt)
@@ -477,8 +492,7 @@ struct page *svc_rdma_get_page(void)
while ((page = alloc_page(GFP_KERNEL)) == NULL) {
/* If we can't get memory, wait a bit and try again */
- printk(KERN_INFO "svcrdma: out of memory...retrying in 1000 "
- "jiffies.\n");
+ printk(KERN_INFO "svcrdma: out of memory...retrying in 1s\n");
schedule_timeout_uninterruptible(msecs_to_jiffies(1000));
}
return page;
@@ -994,7 +1008,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
need_dma_mr = 0;
break;
case RDMA_TRANSPORT_IB:
- if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
+ if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
+ need_dma_mr = 1;
+ dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
+ } else if (!(devattr.device_cap_flags &
+ IB_DEVICE_LOCAL_DMA_LKEY)) {
need_dma_mr = 1;
dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
} else
@@ -1191,14 +1209,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
container_of(xprt, struct svcxprt_rdma, sc_xprt);
/*
- * If there are fewer SQ WR available than required to send a
- * simple response, return false.
- */
- if ((rdma->sc_sq_depth - atomic_read(&rdma->sc_sq_count) < 3))
- return 0;
-
- /*
- * ...or there are already waiters on the SQ,
+ * If there are already waiters on the SQ,
* return false.
*/
if (waitqueue_active(&rdma->sc_send_wait))
@@ -1208,6 +1219,11 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
return 1;
}
+static int svc_rdma_secure_port(struct svc_rqst *rqstp)
+{
+ return 1;
+}
+
/*
* Attempt to register the kvec representing the RPC memory with the
* device.
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 285dc088411..66f91f0d071 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -149,6 +149,11 @@ static struct ctl_table sunrpc_table[] = {
#endif
+#define RPCRDMA_BIND_TO (60U * HZ)
+#define RPCRDMA_INIT_REEST_TO (5U * HZ)
+#define RPCRDMA_MAX_REEST_TO (30U * HZ)
+#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ)
+
static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */
static void
@@ -229,7 +234,6 @@ static void
xprt_rdma_destroy(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- int rc;
dprintk("RPC: %s: called\n", __func__);
@@ -238,10 +242,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
xprt_clear_connected(xprt);
rpcrdma_buffer_destroy(&r_xprt->rx_buf);
- rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
- if (rc)
- dprintk("RPC: %s: rpcrdma_ep_destroy returned %i\n",
- __func__, rc);
+ rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
rpcrdma_ia_close(&r_xprt->rx_ia);
xprt_rdma_free_addresses(xprt);
@@ -289,9 +290,9 @@ xprt_setup_rdma(struct xprt_create *args)
/* 60 second timeout, no retries */
xprt->timeout = &xprt_rdma_default_timeout;
- xprt->bind_timeout = (60U * HZ);
- xprt->reestablish_timeout = (5U * HZ);
- xprt->idle_timeout = (5U * 60 * HZ);
+ xprt->bind_timeout = RPCRDMA_BIND_TO;
+ xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
+ xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
xprt->resvport = 0; /* privileged port not needed */
xprt->tsh_size = 0; /* RPC-RDMA handles framing */
@@ -391,7 +392,7 @@ out4:
xprt_rdma_free_addresses(xprt);
rc = -EINVAL;
out3:
- (void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
+ rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
out2:
rpcrdma_ia_close(&new_xprt->rx_ia);
out1:
@@ -436,10 +437,10 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
schedule_delayed_work(&r_xprt->rdma_connect,
xprt->reestablish_timeout);
xprt->reestablish_timeout <<= 1;
- if (xprt->reestablish_timeout > (30 * HZ))
- xprt->reestablish_timeout = (30 * HZ);
- else if (xprt->reestablish_timeout < (5 * HZ))
- xprt->reestablish_timeout = (5 * HZ);
+ if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO)
+ xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO;
+ else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO)
+ xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
} else {
schedule_delayed_work(&r_xprt->rdma_connect, 0);
if (!RPC_IS_ASYNC(task))
@@ -447,23 +448,6 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
}
}
-static int
-xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
-{
- struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- int credits = atomic_read(&r_xprt->rx_buf.rb_credits);
-
- /* == RPC_CWNDSCALE @ init, but *after* setup */
- if (r_xprt->rx_buf.rb_cwndscale == 0UL) {
- r_xprt->rx_buf.rb_cwndscale = xprt->cwnd;
- dprintk("RPC: %s: cwndscale %lu\n", __func__,
- r_xprt->rx_buf.rb_cwndscale);
- BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0);
- }
- xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale;
- return xprt_reserve_xprt_cong(xprt, task);
-}
-
/*
* The RDMA allocate/free functions need the task structure as a place
* to hide the struct rpcrdma_req, which is necessary for the actual send/recv
@@ -479,7 +463,8 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
struct rpcrdma_req *req, *nreq;
req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf);
- BUG_ON(NULL == req);
+ if (req == NULL)
+ return NULL;
if (size > req->rl_size) {
dprintk("RPC: %s: size %zd too large for buffer[%zd]: "
@@ -503,18 +488,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
* If the allocation or registration fails, the RPC framework
* will (doggedly) retry.
*/
- if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy ==
- RPCRDMA_BOUNCEBUFFERS) {
- /* forced to "pure inline" */
- dprintk("RPC: %s: too much data (%zd) for inline "
- "(r/w max %d/%d)\n", __func__, size,
- rpcx_to_rdmad(xprt).inline_rsize,
- rpcx_to_rdmad(xprt).inline_wsize);
- size = req->rl_size;
- rpc_exit(task, -EIO); /* fail the operation */
- rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++;
- goto out;
- }
if (task->tk_flags & RPC_TASK_SWAPPER)
nreq = kmalloc(sizeof *req + size, GFP_ATOMIC);
else
@@ -543,7 +516,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
req = nreq;
}
dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
-out:
req->rl_connect_cookie = 0; /* our reserved value */
return req->rl_xdr_buf;
@@ -579,9 +551,7 @@ xprt_rdma_free(void *buffer)
__func__, rep, (rep && rep->rr_func) ? " (with waiter)" : "");
/*
- * Finish the deregistration. When using mw bind, this was
- * begun in rpcrdma_reply_handler(). In all other modes, we
- * do it here, in thread context. The process is considered
+ * Finish the deregistration. The process is considered
* complete when the rr_func vector becomes NULL - this
* was put in place during rpcrdma_reply_handler() - the wait
* call below will not block if the dereg is "done". If
@@ -590,12 +560,7 @@ xprt_rdma_free(void *buffer)
for (i = 0; req->rl_nchunks;) {
--req->rl_nchunks;
i += rpcrdma_deregister_external(
- &req->rl_segments[i], r_xprt, NULL);
- }
-
- if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) {
- rep->rr_func = NULL; /* abandon the callback */
- req->rl_reply = NULL;
+ &req->rl_segments[i], r_xprt);
}
if (req->rl_iov.length == 0) { /* see allocate above */
@@ -630,13 +595,12 @@ xprt_rdma_send_request(struct rpc_task *task)
struct rpc_xprt *xprt = rqst->rq_xprt;
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ int rc;
- /* marshal the send itself */
- if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) {
- r_xprt->rx_stats.failed_marshal_count++;
- dprintk("RPC: %s: rpcrdma_marshal_req failed\n",
- __func__);
- return -EIO;
+ if (req->rl_niovs == 0) {
+ rc = rpcrdma_marshal_req(rqst);
+ if (rc < 0)
+ goto failed_marshal;
}
if (req->rl_reply == NULL) /* e.g. reconnection */
@@ -660,6 +624,12 @@ xprt_rdma_send_request(struct rpc_task *task)
rqst->rq_bytes_sent = 0;
return 0;
+failed_marshal:
+ r_xprt->rx_stats.failed_marshal_count++;
+ dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n",
+ __func__, rc);
+ if (rc == -EIO)
+ return -EIO;
drop_connection:
xprt_disconnect_done(xprt);
return -ENOTCONN; /* implies disconnect */
@@ -705,7 +675,7 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
*/
static struct rpc_xprt_ops xprt_rdma_procs = {
- .reserve_xprt = xprt_rdma_reserve_xprt,
+ .reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */
.alloc_slot = xprt_alloc_slot,
.release_request = xprt_release_rqst_cong, /* ditto */
@@ -733,7 +703,7 @@ static void __exit xprt_rdma_cleanup(void)
{
int rc;
- dprintk(KERN_INFO "RPCRDMA Module Removed, deregister RPC RDMA transport\n");
+ dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
#ifdef RPC_DEBUG
if (sunrpc_table_header) {
unregister_sysctl_table(sunrpc_table_header);
@@ -755,14 +725,14 @@ static int __init xprt_rdma_init(void)
if (rc)
return rc;
- dprintk(KERN_INFO "RPCRDMA Module Init, register RPC RDMA transport\n");
+ dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
- dprintk(KERN_INFO "Defaults:\n");
- dprintk(KERN_INFO "\tSlots %d\n"
+ dprintk("Defaults:\n");
+ dprintk("\tSlots %d\n"
"\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
xprt_rdma_slot_table_entries,
xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
- dprintk(KERN_INFO "\tPadding %d\n\tMemreg %d\n",
+ dprintk("\tPadding %d\n\tMemreg %d\n",
xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
#ifdef RPC_DEBUG
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 93726560eaa..13dbd1c389f 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -48,8 +48,8 @@
*/
#include <linux/interrupt.h>
-#include <linux/pci.h> /* for Tavor hack below */
#include <linux/slab.h>
+#include <asm/bitops.h>
#include "xprt_rdma.h"
@@ -142,98 +142,139 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
}
}
-static inline
-void rpcrdma_event_process(struct ib_wc *wc)
+static void
+rpcrdma_sendcq_process_wc(struct ib_wc *wc)
{
- struct rpcrdma_mw *frmr;
- struct rpcrdma_rep *rep =
- (struct rpcrdma_rep *)(unsigned long) wc->wr_id;
+ struct rpcrdma_mw *frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
- dprintk("RPC: %s: event rep %p status %X opcode %X length %u\n",
- __func__, rep, wc->status, wc->opcode, wc->byte_len);
+ dprintk("RPC: %s: frmr %p status %X opcode %d\n",
+ __func__, frmr, wc->status, wc->opcode);
- if (!rep) /* send or bind completion that we don't care about */
+ if (wc->wr_id == 0ULL)
return;
-
- if (IB_WC_SUCCESS != wc->status) {
- dprintk("RPC: %s: WC opcode %d status %X, connection lost\n",
- __func__, wc->opcode, wc->status);
- rep->rr_len = ~0U;
- if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV)
- rpcrdma_schedule_tasklet(rep);
+ if (wc->status != IB_WC_SUCCESS)
return;
- }
- switch (wc->opcode) {
- case IB_WC_FAST_REG_MR:
- frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
+ if (wc->opcode == IB_WC_FAST_REG_MR)
frmr->r.frmr.state = FRMR_IS_VALID;
- break;
- case IB_WC_LOCAL_INV:
- frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
+ else if (wc->opcode == IB_WC_LOCAL_INV)
frmr->r.frmr.state = FRMR_IS_INVALID;
- break;
- case IB_WC_RECV:
- rep->rr_len = wc->byte_len;
- ib_dma_sync_single_for_cpu(
- rdmab_to_ia(rep->rr_buffer)->ri_id->device,
- rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
- /* Keep (only) the most recent credits, after check validity */
- if (rep->rr_len >= 16) {
- struct rpcrdma_msg *p =
- (struct rpcrdma_msg *) rep->rr_base;
- unsigned int credits = ntohl(p->rm_credit);
- if (credits == 0) {
- dprintk("RPC: %s: server"
- " dropped credits to 0!\n", __func__);
- /* don't deadlock */
- credits = 1;
- } else if (credits > rep->rr_buffer->rb_max_requests) {
- dprintk("RPC: %s: server"
- " over-crediting: %d (%d)\n",
- __func__, credits,
- rep->rr_buffer->rb_max_requests);
- credits = rep->rr_buffer->rb_max_requests;
- }
- atomic_set(&rep->rr_buffer->rb_credits, credits);
- }
- /* fall through */
- case IB_WC_BIND_MW:
- rpcrdma_schedule_tasklet(rep);
- break;
- default:
- dprintk("RPC: %s: unexpected WC event %X\n",
- __func__, wc->opcode);
- break;
- }
}
-static inline int
-rpcrdma_cq_poll(struct ib_cq *cq)
+static int
+rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
{
- struct ib_wc wc;
- int rc;
+ struct ib_wc *wcs;
+ int budget, count, rc;
- for (;;) {
- rc = ib_poll_cq(cq, 1, &wc);
- if (rc < 0) {
- dprintk("RPC: %s: ib_poll_cq failed %i\n",
- __func__, rc);
+ budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
+ do {
+ wcs = ep->rep_send_wcs;
+
+ rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
+ if (rc <= 0)
return rc;
- }
- if (rc == 0)
- break;
- rpcrdma_event_process(&wc);
+ count = rc;
+ while (count-- > 0)
+ rpcrdma_sendcq_process_wc(wcs++);
+ } while (rc == RPCRDMA_POLLSIZE && --budget);
+ return 0;
+}
+
+/*
+ * Handle send, fast_reg_mr, and local_inv completions.
+ *
+ * Send events are typically suppressed and thus do not result
+ * in an upcall. Occasionally one is signaled, however. This
+ * prevents the provider's completion queue from wrapping and
+ * losing a completion.
+ */
+static void
+rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
+{
+ struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
+ int rc;
+
+ rc = rpcrdma_sendcq_poll(cq, ep);
+ if (rc) {
+ dprintk("RPC: %s: ib_poll_cq failed: %i\n",
+ __func__, rc);
+ return;
}
+ rc = ib_req_notify_cq(cq,
+ IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+ if (rc == 0)
+ return;
+ if (rc < 0) {
+ dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
+ __func__, rc);
+ return;
+ }
+
+ rpcrdma_sendcq_poll(cq, ep);
+}
+
+static void
+rpcrdma_recvcq_process_wc(struct ib_wc *wc)
+{
+ struct rpcrdma_rep *rep =
+ (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
+
+ dprintk("RPC: %s: rep %p status %X opcode %X length %u\n",
+ __func__, rep, wc->status, wc->opcode, wc->byte_len);
+
+ if (wc->status != IB_WC_SUCCESS) {
+ rep->rr_len = ~0U;
+ goto out_schedule;
+ }
+ if (wc->opcode != IB_WC_RECV)
+ return;
+
+ rep->rr_len = wc->byte_len;
+ ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
+ rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
+
+ if (rep->rr_len >= 16) {
+ struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base;
+ unsigned int credits = ntohl(p->rm_credit);
+
+ if (credits == 0)
+ credits = 1; /* don't deadlock */
+ else if (credits > rep->rr_buffer->rb_max_requests)
+ credits = rep->rr_buffer->rb_max_requests;
+ atomic_set(&rep->rr_buffer->rb_credits, credits);
+ }
+
+out_schedule:
+ rpcrdma_schedule_tasklet(rep);
+}
+
+static int
+rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
+{
+ struct ib_wc *wcs;
+ int budget, count, rc;
+
+ budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
+ do {
+ wcs = ep->rep_recv_wcs;
+
+ rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
+ if (rc <= 0)
+ return rc;
+
+ count = rc;
+ while (count-- > 0)
+ rpcrdma_recvcq_process_wc(wcs++);
+ } while (rc == RPCRDMA_POLLSIZE && --budget);
return 0;
}
/*
- * rpcrdma_cq_event_upcall
+ * Handle receive completions.
*
- * This upcall handles recv, send, bind and unbind events.
* It is reentrant but processes single events in order to maintain
* ordering of receives to keep server credits.
*
@@ -242,26 +283,31 @@ rpcrdma_cq_poll(struct ib_cq *cq)
* connection shutdown. That is, the structures required for
* the completion of the reply handler must remain intact until
* all memory has been reclaimed.
- *
- * Note that send events are suppressed and do not result in an upcall.
*/
static void
-rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context)
+rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
{
+ struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
int rc;
- rc = rpcrdma_cq_poll(cq);
- if (rc)
+ rc = rpcrdma_recvcq_poll(cq, ep);
+ if (rc) {
+ dprintk("RPC: %s: ib_poll_cq failed: %i\n",
+ __func__, rc);
return;
+ }
- rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
- if (rc) {
- dprintk("RPC: %s: ib_req_notify_cq failed %i\n",
+ rc = ib_req_notify_cq(cq,
+ IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+ if (rc == 0)
+ return;
+ if (rc < 0) {
+ dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
__func__, rc);
return;
}
- rpcrdma_cq_poll(cq);
+ rpcrdma_recvcq_poll(cq, ep);
}
#ifdef RPC_DEBUG
@@ -493,54 +539,32 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
}
- switch (memreg) {
- case RPCRDMA_MEMWINDOWS:
- case RPCRDMA_MEMWINDOWS_ASYNC:
- if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) {
- dprintk("RPC: %s: MEMWINDOWS registration "
- "specified but not supported by adapter, "
- "using slower RPCRDMA_REGISTER\n",
- __func__);
- memreg = RPCRDMA_REGISTER;
- }
- break;
- case RPCRDMA_MTHCAFMR:
- if (!ia->ri_id->device->alloc_fmr) {
-#if RPCRDMA_PERSISTENT_REGISTRATION
- dprintk("RPC: %s: MTHCAFMR registration "
- "specified but not supported by adapter, "
- "using riskier RPCRDMA_ALLPHYSICAL\n",
- __func__);
- memreg = RPCRDMA_ALLPHYSICAL;
-#else
- dprintk("RPC: %s: MTHCAFMR registration "
- "specified but not supported by adapter, "
- "using slower RPCRDMA_REGISTER\n",
- __func__);
- memreg = RPCRDMA_REGISTER;
-#endif
- }
- break;
- case RPCRDMA_FRMR:
+ if (memreg == RPCRDMA_FRMR) {
/* Requires both frmr reg and local dma lkey */
if ((devattr.device_cap_flags &
(IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
(IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
-#if RPCRDMA_PERSISTENT_REGISTRATION
dprintk("RPC: %s: FRMR registration "
- "specified but not supported by adapter, "
- "using riskier RPCRDMA_ALLPHYSICAL\n",
- __func__);
+ "not supported by HCA\n", __func__);
+ memreg = RPCRDMA_MTHCAFMR;
+ } else {
+ /* Mind the ia limit on FRMR page list depth */
+ ia->ri_max_frmr_depth = min_t(unsigned int,
+ RPCRDMA_MAX_DATA_SEGS,
+ devattr.max_fast_reg_page_list_len);
+ }
+ }
+ if (memreg == RPCRDMA_MTHCAFMR) {
+ if (!ia->ri_id->device->alloc_fmr) {
+ dprintk("RPC: %s: MTHCAFMR registration "
+ "not supported by HCA\n", __func__);
+#if RPCRDMA_PERSISTENT_REGISTRATION
memreg = RPCRDMA_ALLPHYSICAL;
#else
- dprintk("RPC: %s: FRMR registration "
- "specified but not supported by adapter, "
- "using slower RPCRDMA_REGISTER\n",
- __func__);
- memreg = RPCRDMA_REGISTER;
+ rc = -ENOMEM;
+ goto out2;
#endif
}
- break;
}
/*
@@ -552,8 +576,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
* adapter.
*/
switch (memreg) {
- case RPCRDMA_BOUNCEBUFFERS:
- case RPCRDMA_REGISTER:
case RPCRDMA_FRMR:
break;
#if RPCRDMA_PERSISTENT_REGISTRATION
@@ -563,30 +585,26 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
IB_ACCESS_REMOTE_READ;
goto register_setup;
#endif
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- mem_priv = IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_MW_BIND;
- goto register_setup;
case RPCRDMA_MTHCAFMR:
if (ia->ri_have_dma_lkey)
break;
mem_priv = IB_ACCESS_LOCAL_WRITE;
+#if RPCRDMA_PERSISTENT_REGISTRATION
register_setup:
+#endif
ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
if (IS_ERR(ia->ri_bind_mem)) {
printk(KERN_ALERT "%s: ib_get_dma_mr for "
- "phys register failed with %lX\n\t"
- "Will continue with degraded performance\n",
+ "phys register failed with %lX\n",
__func__, PTR_ERR(ia->ri_bind_mem));
- memreg = RPCRDMA_REGISTER;
- ia->ri_bind_mem = NULL;
+ rc = -ENOMEM;
+ goto out2;
}
break;
default:
- printk(KERN_ERR "%s: invalid memory registration mode %d\n",
- __func__, memreg);
- rc = -EINVAL;
+ printk(KERN_ERR "RPC: Unsupported memory "
+ "registration mode: %d\n", memreg);
+ rc = -ENOMEM;
goto out2;
}
dprintk("RPC: %s: memory registration strategy is %d\n",
@@ -640,6 +658,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
struct rpcrdma_create_data_internal *cdata)
{
struct ib_device_attr devattr;
+ struct ib_cq *sendcq, *recvcq;
int rc, err;
rc = ib_query_device(ia->ri_id->device, &devattr);
@@ -659,32 +678,42 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.srq = NULL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests;
switch (ia->ri_memreg_strategy) {
- case RPCRDMA_FRMR:
+ case RPCRDMA_FRMR: {
+ int depth = 7;
+
/* Add room for frmr register and invalidate WRs.
* 1. FRMR reg WR for head
* 2. FRMR invalidate WR for head
- * 3. FRMR reg WR for pagelist
- * 4. FRMR invalidate WR for pagelist
+ * 3. N FRMR reg WRs for pagelist
+ * 4. N FRMR invalidate WRs for pagelist
* 5. FRMR reg WR for tail
* 6. FRMR invalidate WR for tail
* 7. The RDMA_SEND WR
*/
- ep->rep_attr.cap.max_send_wr *= 7;
+
+ /* Calculate N if the device max FRMR depth is smaller than
+ * RPCRDMA_MAX_DATA_SEGS.
+ */
+ if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
+ int delta = RPCRDMA_MAX_DATA_SEGS -
+ ia->ri_max_frmr_depth;
+
+ do {
+ depth += 2; /* FRMR reg + invalidate */
+ delta -= ia->ri_max_frmr_depth;
+ } while (delta > 0);
+
+ }
+ ep->rep_attr.cap.max_send_wr *= depth;
if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
- cdata->max_requests = devattr.max_qp_wr / 7;
+ cdata->max_requests = devattr.max_qp_wr / depth;
if (!cdata->max_requests)
return -EINVAL;
- ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7;
+ ep->rep_attr.cap.max_send_wr = cdata->max_requests *
+ depth;
}
break;
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- /* Add room for mw_binds+unbinds - overkill! */
- ep->rep_attr.cap.max_send_wr++;
- ep->rep_attr.cap.max_send_wr *= (2 * RPCRDMA_MAX_SEGS);
- if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
- return -EINVAL;
- break;
+ }
default:
break;
}
@@ -705,46 +734,51 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.cap.max_recv_sge);
/* set trigger for requesting send completion */
- ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /* - 1*/;
- switch (ia->ri_memreg_strategy) {
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- ep->rep_cqinit -= RPCRDMA_MAX_SEGS;
- break;
- default:
- break;
- }
+ ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
if (ep->rep_cqinit <= 2)
ep->rep_cqinit = 0;
INIT_CQCOUNT(ep);
ep->rep_ia = ia;
init_waitqueue_head(&ep->rep_connect_wait);
+ INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
- /*
- * Create a single cq for receive dto and mw_bind (only ever
- * care about unbind, really). Send completions are suppressed.
- * Use single threaded tasklet upcalls to maintain ordering.
- */
- ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall,
- rpcrdma_cq_async_error_upcall, NULL,
- ep->rep_attr.cap.max_recv_wr +
+ sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
+ rpcrdma_cq_async_error_upcall, ep,
ep->rep_attr.cap.max_send_wr + 1, 0);
- if (IS_ERR(ep->rep_cq)) {
- rc = PTR_ERR(ep->rep_cq);
- dprintk("RPC: %s: ib_create_cq failed: %i\n",
+ if (IS_ERR(sendcq)) {
+ rc = PTR_ERR(sendcq);
+ dprintk("RPC: %s: failed to create send CQ: %i\n",
__func__, rc);
goto out1;
}
- rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP);
+ rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
+ if (rc) {
+ dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
+ __func__, rc);
+ goto out2;
+ }
+
+ recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
+ rpcrdma_cq_async_error_upcall, ep,
+ ep->rep_attr.cap.max_recv_wr + 1, 0);
+ if (IS_ERR(recvcq)) {
+ rc = PTR_ERR(recvcq);
+ dprintk("RPC: %s: failed to create recv CQ: %i\n",
+ __func__, rc);
+ goto out2;
+ }
+
+ rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
if (rc) {
dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
__func__, rc);
+ ib_destroy_cq(recvcq);
goto out2;
}
- ep->rep_attr.send_cq = ep->rep_cq;
- ep->rep_attr.recv_cq = ep->rep_cq;
+ ep->rep_attr.send_cq = sendcq;
+ ep->rep_attr.recv_cq = recvcq;
/* Initialize cma parameters */
@@ -754,9 +788,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
/* Client offers RDMA Read but does not initiate */
ep->rep_remote_cma.initiator_depth = 0;
- if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS)
- ep->rep_remote_cma.responder_resources = 0;
- else if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
+ if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
ep->rep_remote_cma.responder_resources = 32;
else
ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
@@ -768,7 +800,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
return 0;
out2:
- err = ib_destroy_cq(ep->rep_cq);
+ err = ib_destroy_cq(sendcq);
if (err)
dprintk("RPC: %s: ib_destroy_cq returned %i\n",
__func__, err);
@@ -782,11 +814,8 @@ out1:
* Disconnect and destroy endpoint. After this, the only
* valid operations on the ep are to free it (if dynamically
* allocated) or re-create it.
- *
- * The caller's error handling must be sure to not leak the endpoint
- * if this function fails.
*/
-int
+void
rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
int rc;
@@ -794,6 +823,8 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
dprintk("RPC: %s: entering, connected is %d\n",
__func__, ep->rep_connected);
+ cancel_delayed_work_sync(&ep->rep_connect_worker);
+
if (ia->ri_id->qp) {
rc = rpcrdma_ep_disconnect(ep, ia);
if (rc)
@@ -809,13 +840,17 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
ep->rep_pad_mr = NULL;
}
- rpcrdma_clean_cq(ep->rep_cq);
- rc = ib_destroy_cq(ep->rep_cq);
+ rpcrdma_clean_cq(ep->rep_attr.recv_cq);
+ rc = ib_destroy_cq(ep->rep_attr.recv_cq);
if (rc)
dprintk("RPC: %s: ib_destroy_cq returned %i\n",
__func__, rc);
- return rc;
+ rpcrdma_clean_cq(ep->rep_attr.send_cq);
+ rc = ib_destroy_cq(ep->rep_attr.send_cq);
+ if (rc)
+ dprintk("RPC: %s: ib_destroy_cq returned %i\n",
+ __func__, rc);
}
/*
@@ -831,17 +866,20 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
if (ep->rep_connected != 0) {
struct rpcrdma_xprt *xprt;
retry:
+ dprintk("RPC: %s: reconnecting...\n", __func__);
rc = rpcrdma_ep_disconnect(ep, ia);
if (rc && rc != -ENOTCONN)
dprintk("RPC: %s: rpcrdma_ep_disconnect"
" status %i\n", __func__, rc);
- rpcrdma_clean_cq(ep->rep_cq);
+
+ rpcrdma_clean_cq(ep->rep_attr.recv_cq);
+ rpcrdma_clean_cq(ep->rep_attr.send_cq);
xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
id = rpcrdma_create_id(xprt, ia,
(struct sockaddr *)&xprt->rx_data.addr);
if (IS_ERR(id)) {
- rc = PTR_ERR(id);
+ rc = -EHOSTUNREACH;
goto out;
}
/* TEMP TEMP TEMP - fail if new device:
@@ -855,35 +893,32 @@ retry:
printk("RPC: %s: can't reconnect on "
"different device!\n", __func__);
rdma_destroy_id(id);
- rc = -ENETDOWN;
+ rc = -ENETUNREACH;
goto out;
}
/* END TEMP */
+ rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
+ if (rc) {
+ dprintk("RPC: %s: rdma_create_qp failed %i\n",
+ __func__, rc);
+ rdma_destroy_id(id);
+ rc = -ENETUNREACH;
+ goto out;
+ }
rdma_destroy_qp(ia->ri_id);
rdma_destroy_id(ia->ri_id);
ia->ri_id = id;
+ } else {
+ dprintk("RPC: %s: connecting...\n", __func__);
+ rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
+ if (rc) {
+ dprintk("RPC: %s: rdma_create_qp failed %i\n",
+ __func__, rc);
+ /* do not update ep->rep_connected */
+ return -ENETUNREACH;
+ }
}
- rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
- if (rc) {
- dprintk("RPC: %s: rdma_create_qp failed %i\n",
- __func__, rc);
- goto out;
- }
-
-/* XXX Tavor device performs badly with 2K MTU! */
-if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
- struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device);
- if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR &&
- (pcid->vendor == PCI_VENDOR_ID_MELLANOX ||
- pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) {
- struct ib_qp_attr attr = {
- .path_mtu = IB_MTU_1024
- };
- rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU);
- }
-}
-
ep->rep_connected = 0;
rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
@@ -944,7 +979,8 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
int rc;
- rpcrdma_clean_cq(ep->rep_cq);
+ rpcrdma_clean_cq(ep->rep_attr.recv_cq);
+ rpcrdma_clean_cq(ep->rep_attr.send_cq);
rc = rdma_disconnect(ia->ri_id);
if (!rc) {
/* returns without wait if not connected */
@@ -967,7 +1003,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
{
char *p;
- size_t len;
+ size_t len, rlen, wlen;
int i, rc;
struct rpcrdma_mw *r;
@@ -997,11 +1033,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
sizeof(struct rpcrdma_mw);
break;
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
- sizeof(struct rpcrdma_mw);
- break;
default:
break;
}
@@ -1032,32 +1063,29 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
}
p += cdata->padding;
- /*
- * Allocate the fmr's, or mw's for mw_bind chunk registration.
- * We "cycle" the mw's in order to minimize rkey reuse,
- * and also reduce unbind-to-bind collision.
- */
INIT_LIST_HEAD(&buf->rb_mws);
r = (struct rpcrdma_mw *)p;
switch (ia->ri_memreg_strategy) {
case RPCRDMA_FRMR:
for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
- RPCRDMA_MAX_SEGS);
+ ia->ri_max_frmr_depth);
if (IS_ERR(r->r.frmr.fr_mr)) {
rc = PTR_ERR(r->r.frmr.fr_mr);
dprintk("RPC: %s: ib_alloc_fast_reg_mr"
" failed %i\n", __func__, rc);
goto out;
}
- r->r.frmr.fr_pgl =
- ib_alloc_fast_reg_page_list(ia->ri_id->device,
- RPCRDMA_MAX_SEGS);
+ r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
+ ia->ri_id->device,
+ ia->ri_max_frmr_depth);
if (IS_ERR(r->r.frmr.fr_pgl)) {
rc = PTR_ERR(r->r.frmr.fr_pgl);
dprintk("RPC: %s: "
"ib_alloc_fast_reg_page_list "
"failed %i\n", __func__, rc);
+
+ ib_dereg_mr(r->r.frmr.fr_mr);
goto out;
}
list_add(&r->mw_list, &buf->rb_mws);
@@ -1082,21 +1110,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
++r;
}
break;
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- /* Allocate one extra request's worth, for full cycling */
- for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
- r->r.mw = ib_alloc_mw(ia->ri_pd, IB_MW_TYPE_1);
- if (IS_ERR(r->r.mw)) {
- rc = PTR_ERR(r->r.mw);
- dprintk("RPC: %s: ib_alloc_mw"
- " failed %i\n", __func__, rc);
- goto out;
- }
- list_add(&r->mw_list, &buf->rb_mws);
- ++r;
- }
- break;
default:
break;
}
@@ -1105,16 +1118,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
* Allocate/init the request/reply buffers. Doing this
* using kmalloc for now -- one for each buf.
*/
+ wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req));
+ rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep));
+ dprintk("RPC: %s: wlen = %zu, rlen = %zu\n",
+ __func__, wlen, rlen);
+
for (i = 0; i < buf->rb_max_requests; i++) {
struct rpcrdma_req *req;
struct rpcrdma_rep *rep;
- len = cdata->inline_wsize + sizeof(struct rpcrdma_req);
- /* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */
- /* Typical ~2400b, so rounding up saves work later */
- if (len < 4096)
- len = 4096;
- req = kmalloc(len, GFP_KERNEL);
+ req = kmalloc(wlen, GFP_KERNEL);
if (req == NULL) {
dprintk("RPC: %s: request buffer %d alloc"
" failed\n", __func__, i);
@@ -1126,16 +1139,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
buf->rb_send_bufs[i]->rl_buffer = buf;
rc = rpcrdma_register_internal(ia, req->rl_base,
- len - offsetof(struct rpcrdma_req, rl_base),
+ wlen - offsetof(struct rpcrdma_req, rl_base),
&buf->rb_send_bufs[i]->rl_handle,
&buf->rb_send_bufs[i]->rl_iov);
if (rc)
goto out;
- buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req);
+ buf->rb_send_bufs[i]->rl_size = wlen -
+ sizeof(struct rpcrdma_req);
- len = cdata->inline_rsize + sizeof(struct rpcrdma_rep);
- rep = kmalloc(len, GFP_KERNEL);
+ rep = kmalloc(rlen, GFP_KERNEL);
if (rep == NULL) {
dprintk("RPC: %s: reply buffer %d alloc failed\n",
__func__, i);
@@ -1145,10 +1158,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
memset(rep, 0, sizeof(struct rpcrdma_rep));
buf->rb_recv_bufs[i] = rep;
buf->rb_recv_bufs[i]->rr_buffer = buf;
- init_waitqueue_head(&rep->rr_unbind);
rc = rpcrdma_register_internal(ia, rep->rr_base,
- len - offsetof(struct rpcrdma_rep, rr_base),
+ rlen - offsetof(struct rpcrdma_rep, rr_base),
&buf->rb_recv_bufs[i]->rr_handle,
&buf->rb_recv_bufs[i]->rr_iov);
if (rc)
@@ -1179,7 +1191,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
/* clean up in reverse order from create
* 1. recv mr memory (mr free, then kfree)
- * 1a. bind mw memory
* 2. send mr memory (mr free, then kfree)
* 3. padding (if any) [moved to rpcrdma_ep_destroy]
* 4. arrays
@@ -1194,41 +1205,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
kfree(buf->rb_recv_bufs[i]);
}
if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
- while (!list_empty(&buf->rb_mws)) {
- r = list_entry(buf->rb_mws.next,
- struct rpcrdma_mw, mw_list);
- list_del(&r->mw_list);
- switch (ia->ri_memreg_strategy) {
- case RPCRDMA_FRMR:
- rc = ib_dereg_mr(r->r.frmr.fr_mr);
- if (rc)
- dprintk("RPC: %s:"
- " ib_dereg_mr"
- " failed %i\n",
- __func__, rc);
- ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
- break;
- case RPCRDMA_MTHCAFMR:
- rc = ib_dealloc_fmr(r->r.fmr);
- if (rc)
- dprintk("RPC: %s:"
- " ib_dealloc_fmr"
- " failed %i\n",
- __func__, rc);
- break;
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- rc = ib_dealloc_mw(r->r.mw);
- if (rc)
- dprintk("RPC: %s:"
- " ib_dealloc_mw"
- " failed %i\n",
- __func__, rc);
- break;
- default:
- break;
- }
- }
rpcrdma_deregister_internal(ia,
buf->rb_send_bufs[i]->rl_handle,
&buf->rb_send_bufs[i]->rl_iov);
@@ -1236,6 +1212,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
}
}
+ while (!list_empty(&buf->rb_mws)) {
+ r = list_entry(buf->rb_mws.next,
+ struct rpcrdma_mw, mw_list);
+ list_del(&r->mw_list);
+ switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_FRMR:
+ rc = ib_dereg_mr(r->r.frmr.fr_mr);
+ if (rc)
+ dprintk("RPC: %s:"
+ " ib_dereg_mr"
+ " failed %i\n",
+ __func__, rc);
+ ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
+ break;
+ case RPCRDMA_MTHCAFMR:
+ rc = ib_dealloc_fmr(r->r.fmr);
+ if (rc)
+ dprintk("RPC: %s:"
+ " ib_dealloc_fmr"
+ " failed %i\n",
+ __func__, rc);
+ break;
+ default:
+ break;
+ }
+ }
+
kfree(buf->rb_pool);
}
@@ -1299,21 +1302,17 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
int i;
unsigned long flags;
- BUG_ON(req->rl_nchunks != 0);
spin_lock_irqsave(&buffers->rb_lock, flags);
buffers->rb_send_bufs[--buffers->rb_send_index] = req;
req->rl_niovs = 0;
if (req->rl_reply) {
buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply;
- init_waitqueue_head(&req->rl_reply->rr_unbind);
req->rl_reply->rr_func = NULL;
req->rl_reply = NULL;
}
switch (ia->ri_memreg_strategy) {
case RPCRDMA_FRMR:
case RPCRDMA_MTHCAFMR:
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
/*
* Cycle mw's back in reverse order, and "spin" them.
* This delays and scrambles reuse as much as possible.
@@ -1358,8 +1357,7 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
/*
* Put reply buffers back into pool when not attached to
- * request. This happens in error conditions, and when
- * aborting unbinds. Pre-decrement counter/array index.
+ * request. This happens in error conditions.
*/
void
rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
@@ -1498,8 +1496,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
seg1->mr_offset -= pageoff; /* start of page */
seg1->mr_len += pageoff;
len = -pageoff;
- if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
- *nsegs = RPCRDMA_MAX_DATA_SEGS;
+ if (*nsegs > ia->ri_max_frmr_depth)
+ *nsegs = ia->ri_max_frmr_depth;
for (page_no = i = 0; i < *nsegs;) {
rpcrdma_map_one(ia, seg, writing);
pa = seg->mr_dma;
@@ -1536,10 +1534,6 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
} else
post_wr = &frmr_wr;
- /* Bump the key */
- key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
- ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
-
/* Prepare FRMR WR */
memset(&frmr_wr, 0, sizeof frmr_wr);
frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
@@ -1550,7 +1544,16 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
frmr_wr.wr.fast_reg.page_list_len = page_no;
frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
- BUG_ON(frmr_wr.wr.fast_reg.length < len);
+ if (frmr_wr.wr.fast_reg.length < len) {
+ while (seg1->mr_nsegs--)
+ rpcrdma_unmap_one(ia, seg++);
+ return -EIO;
+ }
+
+ /* Bump the key */
+ key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
+
frmr_wr.wr.fast_reg.access_flags = (writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ);
@@ -1661,135 +1664,6 @@ rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
return rc;
}
-static int
-rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg,
- int *nsegs, int writing, struct rpcrdma_ia *ia,
- struct rpcrdma_xprt *r_xprt)
-{
- int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
- IB_ACCESS_REMOTE_READ);
- struct ib_mw_bind param;
- int rc;
-
- *nsegs = 1;
- rpcrdma_map_one(ia, seg, writing);
- param.bind_info.mr = ia->ri_bind_mem;
- param.wr_id = 0ULL; /* no send cookie */
- param.bind_info.addr = seg->mr_dma;
- param.bind_info.length = seg->mr_len;
- param.send_flags = 0;
- param.bind_info.mw_access_flags = mem_priv;
-
- DECR_CQCOUNT(&r_xprt->rx_ep);
- rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
- if (rc) {
- dprintk("RPC: %s: failed ib_bind_mw "
- "%u@0x%llx status %i\n",
- __func__, seg->mr_len,
- (unsigned long long)seg->mr_dma, rc);
- rpcrdma_unmap_one(ia, seg);
- } else {
- seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
- seg->mr_base = param.bind_info.addr;
- seg->mr_nsegs = 1;
- }
- return rc;
-}
-
-static int
-rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg,
- struct rpcrdma_ia *ia,
- struct rpcrdma_xprt *r_xprt, void **r)
-{
- struct ib_mw_bind param;
- LIST_HEAD(l);
- int rc;
-
- BUG_ON(seg->mr_nsegs != 1);
- param.bind_info.mr = ia->ri_bind_mem;
- param.bind_info.addr = 0ULL; /* unbind */
- param.bind_info.length = 0;
- param.bind_info.mw_access_flags = 0;
- if (*r) {
- param.wr_id = (u64) (unsigned long) *r;
- param.send_flags = IB_SEND_SIGNALED;
- INIT_CQCOUNT(&r_xprt->rx_ep);
- } else {
- param.wr_id = 0ULL;
- param.send_flags = 0;
- DECR_CQCOUNT(&r_xprt->rx_ep);
- }
- rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
- rpcrdma_unmap_one(ia, seg);
- if (rc)
- dprintk("RPC: %s: failed ib_(un)bind_mw,"
- " status %i\n", __func__, rc);
- else
- *r = NULL; /* will upcall on completion */
- return rc;
-}
-
-static int
-rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg,
- int *nsegs, int writing, struct rpcrdma_ia *ia)
-{
- int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
- IB_ACCESS_REMOTE_READ);
- struct rpcrdma_mr_seg *seg1 = seg;
- struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
- int len, i, rc = 0;
-
- if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
- *nsegs = RPCRDMA_MAX_DATA_SEGS;
- for (len = 0, i = 0; i < *nsegs;) {
- rpcrdma_map_one(ia, seg, writing);
- ipb[i].addr = seg->mr_dma;
- ipb[i].size = seg->mr_len;
- len += seg->mr_len;
- ++seg;
- ++i;
- /* Check for holes */
- if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
- offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
- break;
- }
- seg1->mr_base = seg1->mr_dma;
- seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
- ipb, i, mem_priv, &seg1->mr_base);
- if (IS_ERR(seg1->mr_chunk.rl_mr)) {
- rc = PTR_ERR(seg1->mr_chunk.rl_mr);
- dprintk("RPC: %s: failed ib_reg_phys_mr "
- "%u@0x%llx (%d)... status %i\n",
- __func__, len,
- (unsigned long long)seg1->mr_dma, i, rc);
- while (i--)
- rpcrdma_unmap_one(ia, --seg);
- } else {
- seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
- seg1->mr_nsegs = i;
- seg1->mr_len = len;
- }
- *nsegs = i;
- return rc;
-}
-
-static int
-rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg,
- struct rpcrdma_ia *ia)
-{
- struct rpcrdma_mr_seg *seg1 = seg;
- int rc;
-
- rc = ib_dereg_mr(seg1->mr_chunk.rl_mr);
- seg1->mr_chunk.rl_mr = NULL;
- while (seg1->mr_nsegs--)
- rpcrdma_unmap_one(ia, seg++);
- if (rc)
- dprintk("RPC: %s: failed ib_dereg_mr,"
- " status %i\n", __func__, rc);
- return rc;
-}
-
int
rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
@@ -1819,16 +1693,8 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
break;
- /* Registration using memory windows */
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt);
- break;
-
- /* Default registration each time */
default:
- rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia);
- break;
+ return -1;
}
if (rc)
return -1;
@@ -1838,7 +1704,7 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
int
rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
- struct rpcrdma_xprt *r_xprt, void *r)
+ struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
int nsegs = seg->mr_nsegs, rc;
@@ -1847,9 +1713,7 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
#if RPCRDMA_PERSISTENT_REGISTRATION
case RPCRDMA_ALLPHYSICAL:
- BUG_ON(nsegs != 1);
rpcrdma_unmap_one(ia, seg);
- rc = 0;
break;
#endif
@@ -1861,21 +1725,9 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
rc = rpcrdma_deregister_fmr_external(seg, ia);
break;
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r);
- break;
-
default:
- rc = rpcrdma_deregister_default_external(seg, ia);
break;
}
- if (r) {
- struct rpcrdma_rep *rep = r;
- void (*func)(struct rpcrdma_rep *) = rep->rr_func;
- rep->rr_func = NULL;
- func(rep); /* dereg done, callback now */
- }
return nsegs;
}
@@ -1950,7 +1802,6 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
ib_dma_sync_single_for_cpu(ia->ri_id->device,
rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
- DECR_CQCOUNT(ep);
rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
if (rc)
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index cc1445dc1d1..89e7cd47970 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -43,6 +43,7 @@
#include <linux/wait.h> /* wait_queue_head_t, etc */
#include <linux/spinlock.h> /* spinlock_t, etc */
#include <linux/atomic.h> /* atomic_t, etc */
+#include <linux/workqueue.h> /* struct work_struct */
#include <rdma/rdma_cm.h> /* RDMA connection api */
#include <rdma/ib_verbs.h> /* RDMA verbs api */
@@ -66,18 +67,21 @@ struct rpcrdma_ia {
struct completion ri_done;
int ri_async_rc;
enum rpcrdma_memreg ri_memreg_strategy;
+ unsigned int ri_max_frmr_depth;
};
/*
* RDMA Endpoint -- one per transport instance
*/
+#define RPCRDMA_WC_BUDGET (128)
+#define RPCRDMA_POLLSIZE (16)
+
struct rpcrdma_ep {
atomic_t rep_cqcount;
int rep_cqinit;
int rep_connected;
struct rpcrdma_ia *rep_ia;
- struct ib_cq *rep_cq;
struct ib_qp_init_attr rep_attr;
wait_queue_head_t rep_connect_wait;
struct ib_sge rep_pad; /* holds zeroed pad */
@@ -86,6 +90,9 @@ struct rpcrdma_ep {
struct rpc_xprt *rep_xprt; /* for rep_func */
struct rdma_conn_param rep_remote_cma;
struct sockaddr_storage rep_remote_addr;
+ struct delayed_work rep_connect_worker;
+ struct ib_wc rep_send_wcs[RPCRDMA_POLLSIZE];
+ struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE];
};
#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
@@ -124,7 +131,6 @@ struct rpcrdma_rep {
struct rpc_xprt *rr_xprt; /* needed for request/reply matching */
void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */
struct list_head rr_list; /* tasklet list */
- wait_queue_head_t rr_unbind; /* optional unbind wait */
struct ib_sge rr_iov; /* for posting */
struct ib_mr *rr_handle; /* handle for mem in rr_iov */
char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */
@@ -159,7 +165,6 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
struct ib_mr *rl_mr; /* if registered directly */
struct rpcrdma_mw { /* if registered from region */
union {
- struct ib_mw *mw;
struct ib_fmr *fmr;
struct {
struct ib_fast_reg_page_list *fr_pgl;
@@ -207,7 +212,6 @@ struct rpcrdma_req {
struct rpcrdma_buffer {
spinlock_t rb_lock; /* protects indexes */
atomic_t rb_credits; /* most recent server credits */
- unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */
int rb_max_requests;/* client max requests */
struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */
int rb_send_index;
@@ -300,7 +304,7 @@ void rpcrdma_ia_close(struct rpcrdma_ia *);
*/
int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
struct rpcrdma_create_data_internal *);
-int rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
+void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
@@ -330,11 +334,12 @@ int rpcrdma_deregister_internal(struct rpcrdma_ia *,
int rpcrdma_register_external(struct rpcrdma_mr_seg *,
int, int, struct rpcrdma_xprt *);
int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
- struct rpcrdma_xprt *, void *);
+ struct rpcrdma_xprt *);
/*
* RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
*/
+void rpcrdma_connect_worker(struct work_struct *);
void rpcrdma_conn_func(struct rpcrdma_ep *);
void rpcrdma_reply_handler(struct rpcrdma_rep *);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 0addefca8e7..be8bbd5d65e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -254,7 +254,7 @@ struct sock_xprt {
/*
* Saved socket callback addresses
*/
- void (*old_data_ready)(struct sock *, int);
+ void (*old_data_ready)(struct sock *);
void (*old_state_change)(struct sock *);
void (*old_write_space)(struct sock *);
void (*old_error_report)(struct sock *);
@@ -866,8 +866,6 @@ static void xs_reset_transport(struct sock_xprt *transport)
xs_restore_old_callbacks(transport, sk);
write_unlock_bh(&sk->sk_callback_lock);
- sk->sk_no_check = 0;
-
trace_rpc_socket_close(&transport->xprt, sock);
sock_release(sock);
}
@@ -893,11 +891,11 @@ static void xs_close(struct rpc_xprt *xprt)
xs_reset_transport(transport);
xprt->reestablish_timeout = 0;
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
clear_bit(XPRT_CLOSING, &xprt->state);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
xprt_disconnect_done(xprt);
}
@@ -909,6 +907,12 @@ static void xs_tcp_close(struct rpc_xprt *xprt)
xs_tcp_shutdown(xprt);
}
+static void xs_xprt_free(struct rpc_xprt *xprt)
+{
+ xs_free_peer_addresses(xprt);
+ xprt_free(xprt);
+}
+
/**
* xs_destroy - prepare to shutdown a transport
* @xprt: doomed transport
@@ -919,8 +923,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
dprintk("RPC: xs_destroy xprt %p\n", xprt);
xs_close(xprt);
- xs_free_peer_addresses(xprt);
- xprt_free(xprt);
+ xs_xprt_free(xprt);
module_put(THIS_MODULE);
}
@@ -946,7 +949,7 @@ static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
*
* Currently this assumes we can read the whole reply in a single gulp.
*/
-static void xs_local_data_ready(struct sock *sk, int len)
+static void xs_local_data_ready(struct sock *sk)
{
struct rpc_task *task;
struct rpc_xprt *xprt;
@@ -1009,7 +1012,7 @@ static void xs_local_data_ready(struct sock *sk, int len)
* @len: how much data to read
*
*/
-static void xs_udp_data_ready(struct sock *sk, int len)
+static void xs_udp_data_ready(struct sock *sk)
{
struct rpc_task *task;
struct rpc_xprt *xprt;
@@ -1306,41 +1309,29 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
* If we're unable to obtain the rpc_rqst we schedule the closing of the
* connection and return -1.
*/
-static inline int xs_tcp_read_callback(struct rpc_xprt *xprt,
+static int xs_tcp_read_callback(struct rpc_xprt *xprt,
struct xdr_skb_reader *desc)
{
struct sock_xprt *transport =
container_of(xprt, struct sock_xprt, xprt);
struct rpc_rqst *req;
- req = xprt_alloc_bc_request(xprt);
+ /* Look up and lock the request corresponding to the given XID */
+ spin_lock(&xprt->transport_lock);
+ req = xprt_lookup_bc_request(xprt, transport->tcp_xid);
if (req == NULL) {
+ spin_unlock(&xprt->transport_lock);
printk(KERN_WARNING "Callback slot table overflowed\n");
xprt_force_disconnect(xprt);
return -1;
}
- req->rq_xid = transport->tcp_xid;
dprintk("RPC: read callback XID %08x\n", ntohl(req->rq_xid));
xs_tcp_read_common(xprt, desc, req);
- if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) {
- struct svc_serv *bc_serv = xprt->bc_serv;
-
- /*
- * Add callback request to callback list. The callback
- * service sleeps on the sv_cb_waitq waiting for new
- * requests. Wake it up after adding enqueing the
- * request.
- */
- dprintk("RPC: add callback request to list\n");
- spin_lock(&bc_serv->sv_cb_lock);
- list_add(&req->rq_bc_list, &bc_serv->sv_cb_list);
- spin_unlock(&bc_serv->sv_cb_lock);
- wake_up(&bc_serv->sv_cb_waitq);
- }
-
- req->rq_private_buf.len = transport->tcp_copied;
+ if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
+ xprt_complete_bc_request(req, transport->tcp_copied);
+ spin_unlock(&xprt->transport_lock);
return 0;
}
@@ -1444,7 +1435,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
* @bytes: how much data to read
*
*/
-static void xs_tcp_data_ready(struct sock *sk, int bytes)
+static void xs_tcp_data_ready(struct sock *sk)
{
struct rpc_xprt *xprt;
read_descriptor_t rd_desc;
@@ -1504,12 +1495,12 @@ static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
{
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
clear_bit(XPRT_CLOSING, &xprt->state);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
}
static void xs_sock_mark_closed(struct rpc_xprt *xprt)
@@ -1563,10 +1554,10 @@ static void xs_tcp_state_change(struct sock *sk)
xprt->connect_cookie++;
xprt->reestablish_timeout = 0;
set_bit(XPRT_CLOSING, &xprt->state);
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(XPRT_CONNECTED, &xprt->state);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
break;
case TCP_CLOSE_WAIT:
@@ -1585,9 +1576,9 @@ static void xs_tcp_state_change(struct sock *sk)
case TCP_LAST_ACK:
set_bit(XPRT_CLOSING, &xprt->state);
xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(XPRT_CONNECTED, &xprt->state);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
break;
case TCP_CLOSE:
xs_tcp_cancel_linger_timeout(xprt);
@@ -2053,7 +2044,6 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_user_data = xprt;
sk->sk_data_ready = xs_udp_data_ready;
sk->sk_write_space = xs_udp_write_space;
- sk->sk_no_check = UDP_CSUM_NORCV;
sk->sk_allocation = GFP_ATOMIC;
xprt_set_connected(xprt);
@@ -2544,6 +2534,10 @@ static void bc_close(struct rpc_xprt *xprt)
static void bc_destroy(struct rpc_xprt *xprt)
{
+ dprintk("RPC: bc_destroy xprt %p\n", xprt);
+
+ xs_xprt_free(xprt);
+ module_put(THIS_MODULE);
}
static struct rpc_xprt_ops xs_local_ops = {
@@ -2744,7 +2738,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
return xprt;
ret = ERR_PTR(-EINVAL);
out_err:
- xprt_free(xprt);
+ xs_xprt_free(xprt);
return ret;
}
@@ -2822,7 +2816,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
return xprt;
ret = ERR_PTR(-EINVAL);
out_err:
- xprt_free(xprt);
+ xs_xprt_free(xprt);
return ret;
}
@@ -2897,12 +2891,11 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
xprt->address_strings[RPC_DISPLAY_ADDR],
xprt->address_strings[RPC_DISPLAY_PROTO]);
-
if (try_module_get(THIS_MODULE))
return xprt;
ret = ERR_PTR(-EINVAL);
out_err:
- xprt_free(xprt);
+ xs_xprt_free(xprt);
return ret;
}
@@ -2919,15 +2912,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
struct svc_sock *bc_sock;
struct rpc_xprt *ret;
- if (args->bc_xprt->xpt_bc_xprt) {
- /*
- * This server connection already has a backchannel
- * transport; we can't create a new one, as we wouldn't
- * be able to match replies based on xid any more. So,
- * reuse the already-existing one:
- */
- return args->bc_xprt->xpt_bc_xprt;
- }
xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
xprt_tcp_slot_table_entries);
if (IS_ERR(xprt))
@@ -2985,13 +2969,14 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
*/
xprt_set_connected(xprt);
-
if (try_module_get(THIS_MODULE))
return xprt;
+
+ args->bc_xprt->xpt_bc_xprt = NULL;
xprt_put(xprt);
ret = ERR_PTR(-EINVAL);
out_err:
- xprt_free(xprt);
+ xs_xprt_free(xprt);
return ret;
}
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index b282f7130d2..a080c66d819 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -5,7 +5,7 @@
obj-$(CONFIG_TIPC) := tipc.o
tipc-y += addr.o bcast.o bearer.o config.o \
- core.o handler.o link.o discover.o msg.o \
+ core.o link.o discover.o msg.o \
name_distr.o subscr.o name_table.o net.o \
netlink.o node.o node_subscr.o port.o ref.o \
socket.o log.o eth_media.o server.o
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 60b00ab93d7..a74acf9ee80 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -37,6 +37,8 @@
#ifndef _TIPC_ADDR_H
#define _TIPC_ADDR_H
+#include "core.h"
+
#define TIPC_ZONE_MASK 0xff000000u
#define TIPC_CLUSTER_MASK 0xfffff000u
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index bf860d9e75a..55c6c9d3e1c 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -41,9 +41,9 @@
#include "bcast.h"
#include "name_distr.h"
-#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */
-
-#define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */
+#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */
+#define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */
+#define BCBEARER MAX_BEARERS
/**
* struct tipc_bcbearer_pair - a pair of bearers used by broadcast link
@@ -71,7 +71,7 @@ struct tipc_bcbearer_pair {
* Note: The fields labelled "temporary" are incorporated into the bearer
* to avoid consuming potentially limited stack space through the use of
* large local variables within multicast routines. Concurrent access is
- * prevented through use of the spinlock "bc_lock".
+ * prevented through use of the spinlock "bclink_lock".
*/
struct tipc_bcbearer {
struct tipc_bearer bearer;
@@ -84,34 +84,64 @@ struct tipc_bcbearer {
/**
* struct tipc_bclink - link used for broadcast messages
+ * @lock: spinlock governing access to structure
* @link: (non-standard) broadcast link structure
* @node: (non-standard) node structure representing b'cast link's peer node
+ * @flags: represent bclink states
* @bcast_nodes: map of broadcast-capable nodes
* @retransmit_to: node that most recently requested a retransmit
*
* Handles sequence numbering, fragmentation, bundling, etc.
*/
struct tipc_bclink {
+ spinlock_t lock;
struct tipc_link link;
struct tipc_node node;
+ unsigned int flags;
struct tipc_node_map bcast_nodes;
struct tipc_node *retransmit_to;
};
-static struct tipc_bcbearer bcast_bearer;
-static struct tipc_bclink bcast_link;
-
-static struct tipc_bcbearer *bcbearer = &bcast_bearer;
-static struct tipc_bclink *bclink = &bcast_link;
-static struct tipc_link *bcl = &bcast_link.link;
-
-static DEFINE_SPINLOCK(bc_lock);
+static struct tipc_bcbearer *bcbearer;
+static struct tipc_bclink *bclink;
+static struct tipc_link *bcl;
const char tipc_bclink_name[] = "broadcast-link";
static void tipc_nmap_diff(struct tipc_node_map *nm_a,
struct tipc_node_map *nm_b,
struct tipc_node_map *nm_diff);
+static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node);
+static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node);
+
+static void tipc_bclink_lock(void)
+{
+ spin_lock_bh(&bclink->lock);
+}
+
+static void tipc_bclink_unlock(void)
+{
+ struct tipc_node *node = NULL;
+
+ if (likely(!bclink->flags)) {
+ spin_unlock_bh(&bclink->lock);
+ return;
+ }
+
+ if (bclink->flags & TIPC_BCLINK_RESET) {
+ bclink->flags &= ~TIPC_BCLINK_RESET;
+ node = tipc_bclink_retransmit_to();
+ }
+ spin_unlock_bh(&bclink->lock);
+
+ if (node)
+ tipc_link_reset_all(node);
+}
+
+void tipc_bclink_set_flags(unsigned int flags)
+{
+ bclink->flags |= flags;
+}
static u32 bcbuf_acks(struct sk_buff *buf)
{
@@ -130,16 +160,16 @@ static void bcbuf_decr_acks(struct sk_buff *buf)
void tipc_bclink_add_node(u32 addr)
{
- spin_lock_bh(&bc_lock);
+ tipc_bclink_lock();
tipc_nmap_add(&bclink->bcast_nodes, addr);
- spin_unlock_bh(&bc_lock);
+ tipc_bclink_unlock();
}
void tipc_bclink_remove_node(u32 addr)
{
- spin_lock_bh(&bc_lock);
+ tipc_bclink_lock();
tipc_nmap_remove(&bclink->bcast_nodes, addr);
- spin_unlock_bh(&bc_lock);
+ tipc_bclink_unlock();
}
static void bclink_set_last_sent(void)
@@ -165,7 +195,7 @@ static void bclink_update_last_sent(struct tipc_node *node, u32 seqno)
/**
* tipc_bclink_retransmit_to - get most recent node to request retransmission
*
- * Called with bc_lock locked
+ * Called with bclink_lock locked
*/
struct tipc_node *tipc_bclink_retransmit_to(void)
{
@@ -177,7 +207,7 @@ struct tipc_node *tipc_bclink_retransmit_to(void)
* @after: sequence number of last packet to *not* retransmit
* @to: sequence number of last packet to retransmit
*
- * Called with bc_lock locked
+ * Called with bclink_lock locked
*/
static void bclink_retransmit_pkt(u32 after, u32 to)
{
@@ -194,7 +224,7 @@ static void bclink_retransmit_pkt(u32 after, u32 to)
* @n_ptr: node that sent acknowledgement info
* @acked: broadcast sequence # that has been acknowledged
*
- * Node is locked, bc_lock unlocked.
+ * Node is locked, bclink_lock unlocked.
*/
void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
{
@@ -202,8 +232,7 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
struct sk_buff *next;
unsigned int released = 0;
- spin_lock_bh(&bc_lock);
-
+ tipc_bclink_lock();
/* Bail out if tx queue is empty (no clean up is required) */
crs = bcl->first_out;
if (!crs)
@@ -267,13 +296,13 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
if (unlikely(released && !list_empty(&bcl->waiting_ports)))
tipc_link_wakeup_ports(bcl, 0);
exit:
- spin_unlock_bh(&bc_lock);
+ tipc_bclink_unlock();
}
/**
* tipc_bclink_update_link_state - update broadcast link state
*
- * tipc_net_lock and node lock set
+ * RCU and node lock set
*/
void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent)
{
@@ -320,10 +349,10 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent)
? buf_seqno(n_ptr->bclink.deferred_head) - 1
: n_ptr->bclink.last_sent);
- spin_lock_bh(&bc_lock);
- tipc_bearer_send(&bcbearer->bearer, buf, NULL);
+ tipc_bclink_lock();
+ tipc_bearer_send(MAX_BEARERS, buf, NULL);
bcl->stats.sent_nacks++;
- spin_unlock_bh(&bc_lock);
+ tipc_bclink_unlock();
kfree_skb(buf);
n_ptr->bclink.oos_state++;
@@ -335,8 +364,6 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent)
*
* Delay any upcoming NACK by this node if another node has already
* requested the first message this node is going to ask for.
- *
- * Only tipc_net_lock set.
*/
static void bclink_peek_nack(struct tipc_msg *msg)
{
@@ -356,13 +383,13 @@ static void bclink_peek_nack(struct tipc_msg *msg)
}
/*
- * tipc_bclink_send_msg - broadcast a packet to all nodes in cluster
+ * tipc_bclink_xmit - broadcast a packet to all nodes in cluster
*/
-int tipc_bclink_send_msg(struct sk_buff *buf)
+int tipc_bclink_xmit(struct sk_buff *buf)
{
int res;
- spin_lock_bh(&bc_lock);
+ tipc_bclink_lock();
if (!bclink->bcast_nodes.count) {
res = msg_data_sz(buf_msg(buf));
@@ -370,21 +397,21 @@ int tipc_bclink_send_msg(struct sk_buff *buf)
goto exit;
}
- res = tipc_link_send_buf(bcl, buf);
+ res = __tipc_link_xmit(bcl, buf);
if (likely(res >= 0)) {
bclink_set_last_sent();
bcl->stats.queue_sz_counts++;
bcl->stats.accu_queue_sz += bcl->out_queue_size;
}
exit:
- spin_unlock_bh(&bc_lock);
+ tipc_bclink_unlock();
return res;
}
/**
* bclink_accept_pkt - accept an incoming, in-sequence broadcast packet
*
- * Called with both sending node's lock and bc_lock taken.
+ * Called with both sending node's lock and bclink_lock taken.
*/
static void bclink_accept_pkt(struct tipc_node *node, u32 seqno)
{
@@ -399,19 +426,18 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno)
*/
if (((seqno - tipc_own_addr) % TIPC_MIN_LINK_WIN) == 0) {
- tipc_link_send_proto_msg(
- node->active_links[node->addr & 1],
- STATE_MSG, 0, 0, 0, 0, 0);
+ tipc_link_proto_xmit(node->active_links[node->addr & 1],
+ STATE_MSG, 0, 0, 0, 0, 0);
bcl->stats.sent_acks++;
}
}
/**
- * tipc_bclink_recv_pkt - receive a broadcast packet, and deliver upwards
+ * tipc_bclink_rcv - receive a broadcast packet, and deliver upwards
*
- * tipc_net_lock is read_locked, no other locks set
+ * RCU is locked, no other locks set
*/
-void tipc_bclink_recv_pkt(struct sk_buff *buf)
+void tipc_bclink_rcv(struct sk_buff *buf)
{
struct tipc_msg *msg = buf_msg(buf);
struct tipc_node *node;
@@ -440,12 +466,12 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf)
if (msg_destnode(msg) == tipc_own_addr) {
tipc_bclink_acknowledge(node, msg_bcast_ack(msg));
tipc_node_unlock(node);
- spin_lock_bh(&bc_lock);
+ tipc_bclink_lock();
bcl->stats.recv_nacks++;
bclink->retransmit_to = node;
bclink_retransmit_pkt(msg_bcgap_after(msg),
msg_bcgap_to(msg));
- spin_unlock_bh(&bc_lock);
+ tipc_bclink_unlock();
} else {
tipc_node_unlock(node);
bclink_peek_nack(msg);
@@ -463,51 +489,47 @@ receive:
/* Deliver message to destination */
if (likely(msg_isdata(msg))) {
- spin_lock_bh(&bc_lock);
+ tipc_bclink_lock();
bclink_accept_pkt(node, seqno);
- spin_unlock_bh(&bc_lock);
+ tipc_bclink_unlock();
tipc_node_unlock(node);
if (likely(msg_mcast(msg)))
- tipc_port_recv_mcast(buf, NULL);
+ tipc_port_mcast_rcv(buf, NULL);
else
kfree_skb(buf);
} else if (msg_user(msg) == MSG_BUNDLER) {
- spin_lock_bh(&bc_lock);
+ tipc_bclink_lock();
bclink_accept_pkt(node, seqno);
bcl->stats.recv_bundles++;
bcl->stats.recv_bundled += msg_msgcnt(msg);
- spin_unlock_bh(&bc_lock);
+ tipc_bclink_unlock();
tipc_node_unlock(node);
- tipc_link_recv_bundle(buf);
+ tipc_link_bundle_rcv(buf);
} else if (msg_user(msg) == MSG_FRAGMENTER) {
- int ret;
- ret = tipc_link_recv_fragment(&node->bclink.reasm_head,
- &node->bclink.reasm_tail,
- &buf);
- if (ret == LINK_REASM_ERROR)
+ tipc_buf_append(&node->bclink.reasm_buf, &buf);
+ if (unlikely(!buf && !node->bclink.reasm_buf))
goto unlock;
- spin_lock_bh(&bc_lock);
+ tipc_bclink_lock();
bclink_accept_pkt(node, seqno);
bcl->stats.recv_fragments++;
- if (ret == LINK_REASM_COMPLETE) {
+ if (buf) {
bcl->stats.recv_fragmented++;
- /* Point msg to inner header */
msg = buf_msg(buf);
- spin_unlock_bh(&bc_lock);
+ tipc_bclink_unlock();
goto receive;
}
- spin_unlock_bh(&bc_lock);
+ tipc_bclink_unlock();
tipc_node_unlock(node);
} else if (msg_user(msg) == NAME_DISTRIBUTOR) {
- spin_lock_bh(&bc_lock);
+ tipc_bclink_lock();
bclink_accept_pkt(node, seqno);
- spin_unlock_bh(&bc_lock);
+ tipc_bclink_unlock();
tipc_node_unlock(node);
- tipc_named_recv(buf);
+ tipc_named_rcv(buf);
} else {
- spin_lock_bh(&bc_lock);
+ tipc_bclink_lock();
bclink_accept_pkt(node, seqno);
- spin_unlock_bh(&bc_lock);
+ tipc_bclink_unlock();
tipc_node_unlock(node);
kfree_skb(buf);
}
@@ -537,6 +559,7 @@ receive:
buf = node->bclink.deferred_head;
node->bclink.deferred_head = buf->next;
+ buf->next = NULL;
node->bclink.deferred_size--;
goto receive;
}
@@ -553,14 +576,14 @@ receive:
} else
deferred = 0;
- spin_lock_bh(&bc_lock);
+ tipc_bclink_lock();
if (deferred)
bcl->stats.deferred_recv++;
else
bcl->stats.duplicates++;
- spin_unlock_bh(&bc_lock);
+ tipc_bclink_unlock();
unlock:
tipc_node_unlock(node);
@@ -628,13 +651,13 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1,
if (bp_index == 0) {
/* Use original buffer for first bearer */
- tipc_bearer_send(b, buf, &b->bcast_addr);
+ tipc_bearer_send(b->identity, buf, &b->bcast_addr);
} else {
/* Avoid concurrent buffer access */
- tbuf = pskb_copy(buf, GFP_ATOMIC);
+ tbuf = pskb_copy_for_clone(buf, GFP_ATOMIC);
if (!tbuf)
break;
- tipc_bearer_send(b, tbuf, &b->bcast_addr);
+ tipc_bearer_send(b->identity, tbuf, &b->bcast_addr);
kfree_skb(tbuf); /* Bearer keeps a clone */
}
@@ -656,22 +679,28 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1,
/**
* tipc_bcbearer_sort - create sets of bearer pairs used by broadcast bearer
*/
-void tipc_bcbearer_sort(void)
+void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action)
{
struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp;
struct tipc_bcbearer_pair *bp_curr;
+ struct tipc_bearer *b;
int b_index;
int pri;
- spin_lock_bh(&bc_lock);
+ tipc_bclink_lock();
+
+ if (action)
+ tipc_nmap_add(nm_ptr, node);
+ else
+ tipc_nmap_remove(nm_ptr, node);
/* Group bearers by priority (can assume max of two per priority) */
memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp));
+ rcu_read_lock();
for (b_index = 0; b_index < MAX_BEARERS; b_index++) {
- struct tipc_bearer *b = &tipc_bearers[b_index];
-
- if (!b->active || !b->nodes.count)
+ b = rcu_dereference_rtnl(bearer_list[b_index]);
+ if (!b || !b->nodes.count)
continue;
if (!bp_temp[b->priority].primary)
@@ -679,6 +708,7 @@ void tipc_bcbearer_sort(void)
else
bp_temp[b->priority].secondary = b;
}
+ rcu_read_unlock();
/* Create array of bearer pairs for broadcasting */
bp_curr = bcbearer->bpairs;
@@ -704,7 +734,7 @@ void tipc_bcbearer_sort(void)
bp_curr++;
}
- spin_unlock_bh(&bc_lock);
+ tipc_bclink_unlock();
}
@@ -716,7 +746,7 @@ int tipc_bclink_stats(char *buf, const u32 buf_size)
if (!bcl)
return 0;
- spin_lock_bh(&bc_lock);
+ tipc_bclink_lock();
s = &bcl->stats;
@@ -745,7 +775,7 @@ int tipc_bclink_stats(char *buf, const u32 buf_size)
s->queue_sz_counts ?
(s->accu_queue_sz / s->queue_sz_counts) : 0);
- spin_unlock_bh(&bc_lock);
+ tipc_bclink_unlock();
return ret;
}
@@ -754,9 +784,9 @@ int tipc_bclink_reset_stats(void)
if (!bcl)
return -ENOPROTOOPT;
- spin_lock_bh(&bc_lock);
+ tipc_bclink_lock();
memset(&bcl->stats, 0, sizeof(bcl->stats));
- spin_unlock_bh(&bc_lock);
+ tipc_bclink_unlock();
return 0;
}
@@ -767,45 +797,59 @@ int tipc_bclink_set_queue_limits(u32 limit)
if ((limit < TIPC_MIN_LINK_WIN) || (limit > TIPC_MAX_LINK_WIN))
return -EINVAL;
- spin_lock_bh(&bc_lock);
+ tipc_bclink_lock();
tipc_link_set_queue_limits(bcl, limit);
- spin_unlock_bh(&bc_lock);
+ tipc_bclink_unlock();
return 0;
}
-void tipc_bclink_init(void)
+int tipc_bclink_init(void)
{
+ bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC);
+ if (!bcbearer)
+ return -ENOMEM;
+
+ bclink = kzalloc(sizeof(*bclink), GFP_ATOMIC);
+ if (!bclink) {
+ kfree(bcbearer);
+ return -ENOMEM;
+ }
+
+ bcl = &bclink->link;
bcbearer->bearer.media = &bcbearer->media;
bcbearer->media.send_msg = tipc_bcbearer_send;
sprintf(bcbearer->media.name, "tipc-broadcast");
+ spin_lock_init(&bclink->lock);
INIT_LIST_HEAD(&bcl->waiting_ports);
bcl->next_out_no = 1;
spin_lock_init(&bclink->node.lock);
bcl->owner = &bclink->node;
bcl->max_pkt = MAX_PKT_DEFAULT_MCAST;
tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT);
- spin_lock_init(&bcbearer->bearer.lock);
- bcl->b_ptr = &bcbearer->bearer;
+ bcl->bearer_id = MAX_BEARERS;
+ rcu_assign_pointer(bearer_list[MAX_BEARERS], &bcbearer->bearer);
bcl->state = WORKING_WORKING;
strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME);
+ return 0;
}
void tipc_bclink_stop(void)
{
- spin_lock_bh(&bc_lock);
+ tipc_bclink_lock();
tipc_link_purge_queues(bcl);
- spin_unlock_bh(&bc_lock);
+ tipc_bclink_unlock();
- memset(bclink, 0, sizeof(*bclink));
- memset(bcbearer, 0, sizeof(*bcbearer));
+ RCU_INIT_POINTER(bearer_list[BCBEARER], NULL);
+ synchronize_net();
+ kfree(bcbearer);
+ kfree(bclink);
}
-
/**
* tipc_nmap_add - add a node to a node map
*/
-void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
+static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
{
int n = tipc_node(node);
int w = n / WSIZE;
@@ -820,7 +864,7 @@ void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
/**
* tipc_nmap_remove - remove a node from a node map
*/
-void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
+static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
{
int n = tipc_node(node);
int w = n / WSIZE;
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 6ee587b469f..00330c45df3 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -39,6 +39,7 @@
#define MAX_NODES 4096
#define WSIZE 32
+#define TIPC_BCLINK_RESET 1
/**
* struct tipc_node_map - set of node identifiers
@@ -69,9 +70,6 @@ struct tipc_node;
extern const char tipc_bclink_name[];
-void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node);
-void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node);
-
/**
* tipc_nmap_equal - test for equality of node maps
*/
@@ -84,20 +82,21 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a,
void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port);
void tipc_port_list_free(struct tipc_port_list *pl_ptr);
-void tipc_bclink_init(void);
+int tipc_bclink_init(void);
void tipc_bclink_stop(void);
+void tipc_bclink_set_flags(unsigned int flags);
void tipc_bclink_add_node(u32 addr);
void tipc_bclink_remove_node(u32 addr);
struct tipc_node *tipc_bclink_retransmit_to(void);
void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked);
-int tipc_bclink_send_msg(struct sk_buff *buf);
-void tipc_bclink_recv_pkt(struct sk_buff *buf);
+int tipc_bclink_xmit(struct sk_buff *buf);
+void tipc_bclink_rcv(struct sk_buff *buf);
u32 tipc_bclink_get_last_sent(void);
u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr);
void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent);
int tipc_bclink_stats(char *stats_buf, const u32 buf_size);
int tipc_bclink_reset_stats(void);
int tipc_bclink_set_queue_limits(u32 limit);
-void tipc_bcbearer_sort(void);
+void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action);
#endif
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 574b86193b1..264474394f9 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -49,9 +49,9 @@ static struct tipc_media * const media_info_array[] = {
NULL
};
-struct tipc_bearer tipc_bearers[MAX_BEARERS];
+struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1];
-static void bearer_disable(struct tipc_bearer *b_ptr);
+static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down);
/**
* tipc_media_find - locates specified media object by name
@@ -177,8 +177,9 @@ struct tipc_bearer *tipc_bearer_find(const char *name)
struct tipc_bearer *b_ptr;
u32 i;
- for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
- if (b_ptr->active && (!strcmp(b_ptr->name, name)))
+ for (i = 0; i < MAX_BEARERS; i++) {
+ b_ptr = rtnl_dereference(bearer_list[i]);
+ if (b_ptr && (!strcmp(b_ptr->name, name)))
return b_ptr;
}
return NULL;
@@ -197,33 +198,45 @@ struct sk_buff *tipc_bearer_get_names(void)
if (!buf)
return NULL;
- read_lock_bh(&tipc_net_lock);
for (i = 0; media_info_array[i] != NULL; i++) {
for (j = 0; j < MAX_BEARERS; j++) {
- b = &tipc_bearers[j];
- if (b->active && (b->media == media_info_array[i])) {
+ b = rtnl_dereference(bearer_list[j]);
+ if (!b)
+ continue;
+ if (b->media == media_info_array[i]) {
tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME,
b->name,
strlen(b->name) + 1);
}
}
}
- read_unlock_bh(&tipc_net_lock);
return buf;
}
-void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest)
+void tipc_bearer_add_dest(u32 bearer_id, u32 dest)
{
- tipc_nmap_add(&b_ptr->nodes, dest);
- tipc_bcbearer_sort();
- tipc_disc_add_dest(b_ptr->link_req);
+ struct tipc_bearer *b_ptr;
+
+ rcu_read_lock();
+ b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]);
+ if (b_ptr) {
+ tipc_bcbearer_sort(&b_ptr->nodes, dest, true);
+ tipc_disc_add_dest(b_ptr->link_req);
+ }
+ rcu_read_unlock();
}
-void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest)
+void tipc_bearer_remove_dest(u32 bearer_id, u32 dest)
{
- tipc_nmap_remove(&b_ptr->nodes, dest);
- tipc_bcbearer_sort();
- tipc_disc_remove_dest(b_ptr->link_req);
+ struct tipc_bearer *b_ptr;
+
+ rcu_read_lock();
+ b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]);
+ if (b_ptr) {
+ tipc_bcbearer_sort(&b_ptr->nodes, dest, false);
+ tipc_disc_remove_dest(b_ptr->link_req);
+ }
+ rcu_read_unlock();
}
/**
@@ -268,13 +281,11 @@ int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority)
return -EINVAL;
}
- write_lock_bh(&tipc_net_lock);
-
m_ptr = tipc_media_find(b_names.media_name);
if (!m_ptr) {
pr_warn("Bearer <%s> rejected, media <%s> not registered\n",
name, b_names.media_name);
- goto exit;
+ return -EINVAL;
}
if (priority == TIPC_MEDIA_LINK_PRI)
@@ -284,21 +295,22 @@ restart:
bearer_id = MAX_BEARERS;
with_this_prio = 1;
for (i = MAX_BEARERS; i-- != 0; ) {
- if (!tipc_bearers[i].active) {
+ b_ptr = rtnl_dereference(bearer_list[i]);
+ if (!b_ptr) {
bearer_id = i;
continue;
}
- if (!strcmp(name, tipc_bearers[i].name)) {
+ if (!strcmp(name, b_ptr->name)) {
pr_warn("Bearer <%s> rejected, already enabled\n",
name);
- goto exit;
+ return -EINVAL;
}
- if ((tipc_bearers[i].priority == priority) &&
+ if ((b_ptr->priority == priority) &&
(++with_this_prio > 2)) {
if (priority-- == 0) {
pr_warn("Bearer <%s> rejected, duplicate priority\n",
name);
- goto exit;
+ return -EINVAL;
}
pr_warn("Bearer <%s> priority adjustment required %u->%u\n",
name, priority + 1, priority);
@@ -308,40 +320,42 @@ restart:
if (bearer_id >= MAX_BEARERS) {
pr_warn("Bearer <%s> rejected, bearer limit reached (%u)\n",
name, MAX_BEARERS);
- goto exit;
+ return -EINVAL;
}
- b_ptr = &tipc_bearers[bearer_id];
+ b_ptr = kzalloc(sizeof(*b_ptr), GFP_ATOMIC);
+ if (!b_ptr)
+ return -ENOMEM;
+
strcpy(b_ptr->name, name);
b_ptr->media = m_ptr;
res = m_ptr->enable_media(b_ptr);
if (res) {
pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
name, -res);
- goto exit;
+ return -EINVAL;
}
b_ptr->identity = bearer_id;
b_ptr->tolerance = m_ptr->tolerance;
b_ptr->window = m_ptr->window;
+ b_ptr->domain = disc_domain;
b_ptr->net_plane = bearer_id + 'A';
- b_ptr->active = 1;
b_ptr->priority = priority;
- INIT_LIST_HEAD(&b_ptr->links);
- spin_lock_init(&b_ptr->lock);
- res = tipc_disc_create(b_ptr, &b_ptr->bcast_addr, disc_domain);
+ res = tipc_disc_create(b_ptr, &b_ptr->bcast_addr);
if (res) {
- bearer_disable(b_ptr);
+ bearer_disable(b_ptr, false);
pr_warn("Bearer <%s> rejected, discovery object creation failed\n",
name);
- goto exit;
+ return -EINVAL;
}
+
+ rcu_assign_pointer(bearer_list[bearer_id], b_ptr);
+
pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
name,
tipc_addr_string_fill(addr_string, disc_domain), priority);
-exit:
- write_unlock_bh(&tipc_net_lock);
return res;
}
@@ -350,49 +364,35 @@ exit:
*/
static int tipc_reset_bearer(struct tipc_bearer *b_ptr)
{
- struct tipc_link *l_ptr;
- struct tipc_link *temp_l_ptr;
-
- read_lock_bh(&tipc_net_lock);
pr_info("Resetting bearer <%s>\n", b_ptr->name);
- spin_lock_bh(&b_ptr->lock);
- list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
- struct tipc_node *n_ptr = l_ptr->owner;
-
- spin_lock_bh(&n_ptr->lock);
- tipc_link_reset(l_ptr);
- spin_unlock_bh(&n_ptr->lock);
- }
- spin_unlock_bh(&b_ptr->lock);
- read_unlock_bh(&tipc_net_lock);
+ tipc_link_reset_list(b_ptr->identity);
+ tipc_disc_reset(b_ptr);
return 0;
}
/**
* bearer_disable
*
- * Note: This routine assumes caller holds tipc_net_lock.
+ * Note: This routine assumes caller holds RTNL lock.
*/
-static void bearer_disable(struct tipc_bearer *b_ptr)
+static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down)
{
- struct tipc_link *l_ptr;
- struct tipc_link *temp_l_ptr;
- struct tipc_link_req *temp_req;
+ u32 i;
pr_info("Disabling bearer <%s>\n", b_ptr->name);
- spin_lock_bh(&b_ptr->lock);
b_ptr->media->disable_media(b_ptr);
- list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
- tipc_link_delete(l_ptr);
- }
- temp_req = b_ptr->link_req;
- b_ptr->link_req = NULL;
- spin_unlock_bh(&b_ptr->lock);
- if (temp_req)
- tipc_disc_delete(temp_req);
+ tipc_link_delete_list(b_ptr->identity, shutting_down);
+ if (b_ptr->link_req)
+ tipc_disc_delete(b_ptr->link_req);
- memset(b_ptr, 0, sizeof(struct tipc_bearer));
+ for (i = 0; i < MAX_BEARERS; i++) {
+ if (b_ptr == rtnl_dereference(bearer_list[i])) {
+ RCU_INIT_POINTER(bearer_list[i], NULL);
+ break;
+ }
+ }
+ kfree_rcu(b_ptr, rcu);
}
int tipc_disable_bearer(const char *name)
@@ -400,41 +400,17 @@ int tipc_disable_bearer(const char *name)
struct tipc_bearer *b_ptr;
int res;
- write_lock_bh(&tipc_net_lock);
b_ptr = tipc_bearer_find(name);
if (b_ptr == NULL) {
pr_warn("Attempt to disable unknown bearer <%s>\n", name);
res = -EINVAL;
} else {
- bearer_disable(b_ptr);
+ bearer_disable(b_ptr, false);
res = 0;
}
- write_unlock_bh(&tipc_net_lock);
return res;
}
-
-/* tipc_l2_media_addr_set - initialize Ethernet media address structure
- *
- * Media-dependent "value" field stores MAC address in first 6 bytes
- * and zeroes out the remaining bytes.
- */
-void tipc_l2_media_addr_set(const struct tipc_bearer *b,
- struct tipc_media_addr *a, char *mac)
-{
- int len = b->media->hwaddr_len;
-
- if (unlikely(sizeof(a->value) < len)) {
- WARN_ONCE(1, "Media length invalid\n");
- return;
- }
-
- memcpy(a->value, mac, len);
- memset(a->value + len, 0, sizeof(a->value) - len);
- a->media_id = b->media->type_id;
- a->broadcast = !memcmp(mac, b->bcast_addr.value, len);
-}
-
int tipc_enable_l2_media(struct tipc_bearer *b)
{
struct net_device *dev;
@@ -445,33 +421,37 @@ int tipc_enable_l2_media(struct tipc_bearer *b)
if (!dev)
return -ENODEV;
- /* Associate TIPC bearer with Ethernet bearer */
- b->media_ptr = dev;
- memset(b->bcast_addr.value, 0, sizeof(b->bcast_addr.value));
+ /* Associate TIPC bearer with L2 bearer */
+ rcu_assign_pointer(b->media_ptr, dev);
+ memset(&b->bcast_addr, 0, sizeof(b->bcast_addr));
memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len);
b->bcast_addr.media_id = b->media->type_id;
b->bcast_addr.broadcast = 1;
b->mtu = dev->mtu;
- tipc_l2_media_addr_set(b, &b->addr, (char *)dev->dev_addr);
+ b->media->raw2addr(b, &b->addr, (char *)dev->dev_addr);
rcu_assign_pointer(dev->tipc_ptr, b);
return 0;
}
-/* tipc_disable_l2_media - detach TIPC bearer from an Ethernet interface
+/* tipc_disable_l2_media - detach TIPC bearer from an L2 interface
*
- * Mark Ethernet bearer as inactive so that incoming buffers are thrown away,
+ * Mark L2 bearer as inactive so that incoming buffers are thrown away,
* then get worker thread to complete bearer cleanup. (Can't do cleanup
* here because cleanup code needs to sleep and caller holds spinlocks.)
*/
void tipc_disable_l2_media(struct tipc_bearer *b)
{
- struct net_device *dev = (struct net_device *)b->media_ptr;
+ struct net_device *dev;
+
+ dev = (struct net_device *)rtnl_dereference(b->media_ptr);
+ RCU_INIT_POINTER(b->media_ptr, NULL);
RCU_INIT_POINTER(dev->tipc_ptr, NULL);
+ synchronize_net();
dev_put(dev);
}
/**
- * tipc_l2_send_msg - send a TIPC packet out over an Ethernet interface
+ * tipc_l2_send_msg - send a TIPC packet out over an L2 interface
* @buf: the packet to be sent
* @b_ptr: the bearer through which the packet is to be sent
* @dest: peer destination address
@@ -480,8 +460,12 @@ int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b,
struct tipc_media_addr *dest)
{
struct sk_buff *clone;
+ struct net_device *dev;
int delta;
- struct net_device *dev = (struct net_device *)b->media_ptr;
+
+ dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr);
+ if (!dev)
+ return 0;
clone = skb_clone(buf, GFP_ATOMIC);
if (!clone)
@@ -509,10 +493,16 @@ int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b,
* The media send routine must not alter the buffer being passed in
* as it may be needed for later retransmission!
*/
-void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf,
+void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf,
struct tipc_media_addr *dest)
{
- b->media->send_msg(buf, b, dest);
+ struct tipc_bearer *b_ptr;
+
+ rcu_read_lock();
+ b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]);
+ if (likely(b_ptr))
+ b_ptr->media->send_msg(buf, b_ptr, dest);
+ rcu_read_unlock();
}
/**
@@ -537,7 +527,7 @@ static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev,
}
rcu_read_lock();
- b_ptr = rcu_dereference(dev->tipc_ptr);
+ b_ptr = rcu_dereference_rtnl(dev->tipc_ptr);
if (likely(b_ptr)) {
if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
buf->next = NULL;
@@ -570,12 +560,9 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
- rcu_read_lock();
- b_ptr = rcu_dereference(dev->tipc_ptr);
- if (!b_ptr) {
- rcu_read_unlock();
+ b_ptr = rtnl_dereference(dev->tipc_ptr);
+ if (!b_ptr)
return NOTIFY_DONE;
- }
b_ptr->mtu = dev->mtu;
@@ -585,21 +572,23 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
break;
case NETDEV_DOWN:
case NETDEV_CHANGEMTU:
+ tipc_reset_bearer(b_ptr);
+ break;
case NETDEV_CHANGEADDR:
+ b_ptr->media->raw2addr(b_ptr, &b_ptr->addr,
+ (char *)dev->dev_addr);
tipc_reset_bearer(b_ptr);
break;
case NETDEV_UNREGISTER:
case NETDEV_CHANGENAME:
- tipc_disable_bearer(b_ptr->name);
+ bearer_disable(b_ptr, false);
break;
}
- rcu_read_unlock();
-
return NOTIFY_OK;
}
static struct packet_type tipc_packet_type __read_mostly = {
- .type = __constant_htons(ETH_P_TIPC),
+ .type = htons(ETH_P_TIPC),
.func = tipc_l2_rcv_msg,
};
@@ -627,10 +616,14 @@ void tipc_bearer_cleanup(void)
void tipc_bearer_stop(void)
{
+ struct tipc_bearer *b_ptr;
u32 i;
for (i = 0; i < MAX_BEARERS; i++) {
- if (tipc_bearers[i].active)
- bearer_disable(&tipc_bearers[i]);
+ b_ptr = rtnl_dereference(bearer_list[i]);
+ if (b_ptr) {
+ bearer_disable(b_ptr, true);
+ bearer_list[i] = NULL;
+ }
}
}
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 4f5db9ad5bf..78fccc49de2 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -42,14 +42,12 @@
#define MAX_BEARERS 2
#define MAX_MEDIA 2
-/*
- * Identifiers associated with TIPC message header media address info
- *
- * - address info field is 20 bytes long
- * - media type identifier located at offset 3
- * - remaining bytes vary according to media type
+/* Identifiers associated with TIPC message header media address info
+ * - address info field is 32 bytes long
+ * - the field's actual content and length is defined per media
+ * - remaining unused bytes in the field are set to zero
*/
-#define TIPC_MEDIA_ADDR_SIZE 20
+#define TIPC_MEDIA_ADDR_SIZE 32
#define TIPC_MEDIA_TYPE_OFFSET 3
/*
@@ -77,9 +75,10 @@ struct tipc_bearer;
* @send_msg: routine which handles buffer transmission
* @enable_media: routine which enables a media
* @disable_media: routine which disables a media
- * @addr2str: routine which converts media address to string
- * @addr2msg: routine which converts media address to protocol message area
- * @msg2addr: routine which converts media address from protocol message area
+ * @addr2str: convert media address format to string
+ * @addr2msg: convert from media addr format to discovery msg addr format
+ * @msg2addr: convert from discovery msg addr format to media addr format
+ * @raw2addr: convert from raw addr format to media addr format
* @priority: default link (and bearer) priority
* @tolerance: default time (in ms) before declaring link failure
* @window: default window (in packets) before declaring link congestion
@@ -93,10 +92,16 @@ struct tipc_media {
struct tipc_media_addr *dest);
int (*enable_media)(struct tipc_bearer *b_ptr);
void (*disable_media)(struct tipc_bearer *b_ptr);
- int (*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size);
- int (*addr2msg)(struct tipc_media_addr *a, char *msg_area);
- int (*msg2addr)(const struct tipc_bearer *b_ptr,
- struct tipc_media_addr *a, char *msg_area);
+ int (*addr2str)(struct tipc_media_addr *addr,
+ char *strbuf,
+ int bufsz);
+ int (*addr2msg)(char *msg, struct tipc_media_addr *addr);
+ int (*msg2addr)(struct tipc_bearer *b,
+ struct tipc_media_addr *addr,
+ char *msg);
+ int (*raw2addr)(struct tipc_bearer *b,
+ struct tipc_media_addr *addr,
+ char *raw);
u32 priority;
u32 tolerance;
u32 window;
@@ -107,21 +112,19 @@ struct tipc_media {
/**
* struct tipc_bearer - Generic TIPC bearer structure
- * @dev: ptr to associated network device
- * @usr_handle: pointer to additional media-specific information about bearer
+ * @media_ptr: pointer to additional media-specific information about bearer
* @mtu: max packet size bearer can support
- * @lock: spinlock for controlling access to bearer
* @addr: media-specific address associated with bearer
* @name: bearer name (format = media:interface)
* @media: ptr to media structure associated with bearer
* @bcast_addr: media address used in broadcasting
+ * @rcu: rcu struct for tipc_bearer
* @priority: default link priority for bearer
* @window: default window size for bearer
* @tolerance: default link tolerance for bearer
+ * @domain: network domain to which links can be established
* @identity: array index of this bearer within TIPC bearer array
* @link_req: ptr to (optional) structure making periodic link setup requests
- * @links: list of non-congested links associated with bearer
- * @active: non-zero if bearer structure is represents a bearer
* @net_plane: network plane ('A' through 'H') currently associated with bearer
* @nodes: indicates which nodes in cluster can be reached through bearer
*
@@ -130,20 +133,19 @@ struct tipc_media {
* care of initializing all other fields.
*/
struct tipc_bearer {
- void *media_ptr; /* initalized by media */
+ void __rcu *media_ptr; /* initalized by media */
u32 mtu; /* initalized by media */
struct tipc_media_addr addr; /* initalized by media */
char name[TIPC_MAX_BEARER_NAME];
- spinlock_t lock;
struct tipc_media *media;
struct tipc_media_addr bcast_addr;
+ struct rcu_head rcu;
u32 priority;
u32 window;
u32 tolerance;
+ u32 domain;
u32 identity;
struct tipc_link_req *link_req;
- struct list_head links;
- int active;
char net_plane;
struct tipc_node_map nodes;
};
@@ -155,7 +157,7 @@ struct tipc_bearer_names {
struct tipc_link;
-extern struct tipc_bearer tipc_bearers[];
+extern struct tipc_bearer __rcu *bearer_list[];
/*
* TIPC routines available to supported media types
@@ -178,22 +180,20 @@ int tipc_media_set_priority(const char *name, u32 new_value);
int tipc_media_set_window(const char *name, u32 new_value);
void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a);
struct sk_buff *tipc_media_get_names(void);
-void tipc_l2_media_addr_set(const struct tipc_bearer *b,
- struct tipc_media_addr *a, char *mac);
int tipc_enable_l2_media(struct tipc_bearer *b);
void tipc_disable_l2_media(struct tipc_bearer *b);
int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b,
struct tipc_media_addr *dest);
struct sk_buff *tipc_bearer_get_names(void);
-void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest);
-void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest);
+void tipc_bearer_add_dest(u32 bearer_id, u32 dest);
+void tipc_bearer_remove_dest(u32 bearer_id, u32 dest);
struct tipc_bearer *tipc_bearer_find(const char *name);
struct tipc_media *tipc_media_find(const char *name);
int tipc_bearer_setup(void);
void tipc_bearer_cleanup(void);
void tipc_bearer_stop(void);
-void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf,
+void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf,
struct tipc_media_addr *dest);
#endif /* _TIPC_BEARER_H */
diff --git a/net/tipc/config.c b/net/tipc/config.c
index e6d721692ae..2b42403ad33 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -42,14 +42,10 @@
#define REPLY_TRUNCATED "<truncated>\n"
-static DEFINE_MUTEX(config_mutex);
-static struct tipc_server cfgsrv;
-
static const void *req_tlv_area; /* request message TLV area */
static int req_tlv_space; /* request message TLV area size */
static int rep_headroom; /* reply message headroom to use */
-
struct sk_buff *tipc_cfg_reply_alloc(int payload_size)
{
struct sk_buff *buf;
@@ -181,20 +177,10 @@ static struct sk_buff *cfg_set_own_addr(void)
if (tipc_own_addr)
return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
" (cannot change node address once assigned)");
- tipc_net_start(addr);
- return tipc_cfg_reply_none();
-}
-
-static struct sk_buff *cfg_set_remote_mng(void)
-{
- u32 value;
-
- if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
- return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+ if (!tipc_net_start(addr))
+ return tipc_cfg_reply_none();
- value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
- tipc_remote_management = (value != 0);
- return tipc_cfg_reply_none();
+ return tipc_cfg_reply_error_string("cannot change to network mode");
}
static struct sk_buff *cfg_set_max_ports(void)
@@ -237,7 +223,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
{
struct sk_buff *rep_tlv_buf;
- mutex_lock(&config_mutex);
+ rtnl_lock();
/* Save request and reply details in a well-known location */
req_tlv_area = request_area;
@@ -247,21 +233,10 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
/* Check command authorization */
if (likely(in_own_node(orig_node))) {
/* command is permitted */
- } else if (cmd >= 0x8000) {
+ } else {
rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
" (cannot be done remotely)");
goto exit;
- } else if (!tipc_remote_management) {
- rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NO_REMOTE);
- goto exit;
- } else if (cmd >= 0x4000) {
- u32 domain = 0;
-
- if ((tipc_nametbl_translate(TIPC_ZM_SRV, 0, &domain) == 0) ||
- (domain != orig_node)) {
- rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_ZONE_MSTR);
- goto exit;
- }
}
/* Call appropriate processing routine */
@@ -310,18 +285,12 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
case TIPC_CMD_SET_NODE_ADDR:
rep_tlv_buf = cfg_set_own_addr();
break;
- case TIPC_CMD_SET_REMOTE_MNG:
- rep_tlv_buf = cfg_set_remote_mng();
- break;
case TIPC_CMD_SET_MAX_PORTS:
rep_tlv_buf = cfg_set_max_ports();
break;
case TIPC_CMD_SET_NETID:
rep_tlv_buf = cfg_set_netid();
break;
- case TIPC_CMD_GET_REMOTE_MNG:
- rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_remote_management);
- break;
case TIPC_CMD_GET_MAX_PORTS:
rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_ports);
break;
@@ -345,6 +314,8 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
case TIPC_CMD_SET_MAX_PUBL:
case TIPC_CMD_GET_MAX_PUBL:
case TIPC_CMD_SET_LOG_SIZE:
+ case TIPC_CMD_SET_REMOTE_MNG:
+ case TIPC_CMD_GET_REMOTE_MNG:
case TIPC_CMD_DUMP_LOG:
rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
" (obsolete command)");
@@ -366,78 +337,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
/* Return reply buffer */
exit:
- mutex_unlock(&config_mutex);
+ rtnl_unlock();
return rep_tlv_buf;
}
-
-static void cfg_conn_msg_event(int conid, struct sockaddr_tipc *addr,
- void *usr_data, void *buf, size_t len)
-{
- struct tipc_cfg_msg_hdr *req_hdr;
- struct tipc_cfg_msg_hdr *rep_hdr;
- struct sk_buff *rep_buf;
-
- /* Validate configuration message header (ignore invalid message) */
- req_hdr = (struct tipc_cfg_msg_hdr *)buf;
- if ((len < sizeof(*req_hdr)) ||
- (len != TCM_ALIGN(ntohl(req_hdr->tcm_len))) ||
- (ntohs(req_hdr->tcm_flags) != TCM_F_REQUEST)) {
- pr_warn("Invalid configuration message discarded\n");
- return;
- }
-
- /* Generate reply for request (if can't, return request) */
- rep_buf = tipc_cfg_do_cmd(addr->addr.id.node, ntohs(req_hdr->tcm_type),
- buf + sizeof(*req_hdr),
- len - sizeof(*req_hdr),
- BUF_HEADROOM + MAX_H_SIZE + sizeof(*rep_hdr));
- if (rep_buf) {
- skb_push(rep_buf, sizeof(*rep_hdr));
- rep_hdr = (struct tipc_cfg_msg_hdr *)rep_buf->data;
- memcpy(rep_hdr, req_hdr, sizeof(*rep_hdr));
- rep_hdr->tcm_len = htonl(rep_buf->len);
- rep_hdr->tcm_flags &= htons(~TCM_F_REQUEST);
- tipc_conn_sendmsg(&cfgsrv, conid, addr, rep_buf->data,
- rep_buf->len);
- kfree_skb(rep_buf);
- }
-}
-
-static struct sockaddr_tipc cfgsrv_addr __read_mostly = {
- .family = AF_TIPC,
- .addrtype = TIPC_ADDR_NAMESEQ,
- .addr.nameseq.type = TIPC_CFG_SRV,
- .addr.nameseq.lower = 0,
- .addr.nameseq.upper = 0,
- .scope = TIPC_ZONE_SCOPE
-};
-
-static struct tipc_server cfgsrv __read_mostly = {
- .saddr = &cfgsrv_addr,
- .imp = TIPC_CRITICAL_IMPORTANCE,
- .type = SOCK_RDM,
- .max_rcvbuf_size = 64 * 1024,
- .name = "cfg_server",
- .tipc_conn_recvmsg = cfg_conn_msg_event,
- .tipc_conn_new = NULL,
- .tipc_conn_shutdown = NULL
-};
-
-int tipc_cfg_init(void)
-{
- return tipc_server_start(&cfgsrv);
-}
-
-void tipc_cfg_reinit(void)
-{
- tipc_server_stop(&cfgsrv);
-
- cfgsrv_addr.addr.nameseq.lower = tipc_own_addr;
- cfgsrv_addr.addr.nameseq.upper = tipc_own_addr;
- tipc_server_start(&cfgsrv);
-}
-
-void tipc_cfg_stop(void)
-{
- tipc_server_stop(&cfgsrv);
-}
diff --git a/net/tipc/config.h b/net/tipc/config.h
index 1f252f3fa05..47b1bf18161 100644
--- a/net/tipc/config.h
+++ b/net/tipc/config.h
@@ -64,9 +64,4 @@ static inline struct sk_buff *tipc_cfg_reply_ultra_string(char *string)
struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd,
const void *req_tlv_area, int req_tlv_space,
int headroom);
-
-int tipc_cfg_init(void);
-void tipc_cfg_reinit(void);
-void tipc_cfg_stop(void);
-
#endif
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 80c20647b3d..676d18015dd 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -1,7 +1,7 @@
/*
* net/tipc/core.c: TIPC module code
*
- * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2003-2006, 2013, Ericsson AB
* Copyright (c) 2005-2006, 2010-2013, Wind River Systems
* All rights reserved.
*
@@ -50,7 +50,6 @@ int tipc_random __read_mostly;
u32 tipc_own_addr __read_mostly;
int tipc_max_ports __read_mostly;
int tipc_net_id __read_mostly;
-int tipc_remote_management __read_mostly;
int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */
/**
@@ -81,11 +80,9 @@ struct sk_buff *tipc_buf_acquire(u32 size)
*/
static void tipc_core_stop(void)
{
- tipc_handler_stop();
tipc_net_stop();
tipc_bearer_cleanup();
tipc_netlink_stop();
- tipc_cfg_stop();
tipc_subscr_stop();
tipc_nametbl_stop();
tipc_ref_table_stop();
@@ -102,10 +99,6 @@ static int tipc_core_start(void)
get_random_bytes(&tipc_random, sizeof(tipc_random));
- err = tipc_handler_start();
- if (err)
- goto out_handler;
-
err = tipc_ref_table_init(tipc_max_ports, tipc_random);
if (err)
goto out_reftbl;
@@ -130,18 +123,12 @@ static int tipc_core_start(void)
if (err)
goto out_subscr;
- err = tipc_cfg_init();
- if (err)
- goto out_cfg;
-
err = tipc_bearer_setup();
if (err)
goto out_bearer;
return 0;
out_bearer:
- tipc_cfg_stop();
-out_cfg:
tipc_subscr_stop();
out_subscr:
tipc_unregister_sysctl();
@@ -154,8 +141,6 @@ out_netlink:
out_nametbl:
tipc_ref_table_stop();
out_reftbl:
- tipc_handler_stop();
-out_handler:
return err;
}
@@ -166,14 +151,14 @@ static int __init tipc_init(void)
pr_info("Activated (version " TIPC_MOD_VER ")\n");
tipc_own_addr = 0;
- tipc_remote_management = 1;
tipc_max_ports = CONFIG_TIPC_PORTS;
tipc_net_id = 4711;
- sysctl_tipc_rmem[0] = CONN_OVERLOAD_LIMIT >> 4 << TIPC_LOW_IMPORTANCE;
- sysctl_tipc_rmem[1] = CONN_OVERLOAD_LIMIT >> 4 <<
+ sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 <<
+ TIPC_LOW_IMPORTANCE;
+ sysctl_tipc_rmem[1] = TIPC_CONN_OVERLOAD_LIMIT >> 4 <<
TIPC_CRITICAL_IMPORTANCE;
- sysctl_tipc_rmem[2] = CONN_OVERLOAD_LIMIT;
+ sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT;
res = tipc_core_start();
if (res)
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 4dfe137587b..bb26ed1ee96 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -56,7 +56,8 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
-
+#include <linux/rtnetlink.h>
+#include <linux/etherdevice.h>
#define TIPC_MOD_VER "2.0.0"
@@ -79,7 +80,6 @@ int tipc_snprintf(char *buf, int len, const char *fmt, ...);
extern u32 tipc_own_addr __read_mostly;
extern int tipc_max_ports __read_mostly;
extern int tipc_net_id __read_mostly;
-extern int tipc_remote_management __read_mostly;
extern int sysctl_tipc_rmem[3] __read_mostly;
/*
@@ -90,8 +90,6 @@ extern int tipc_random __read_mostly;
/*
* Routines available to privileged subsystems
*/
-int tipc_handler_start(void);
-void tipc_handler_stop(void);
int tipc_netlink_start(void);
void tipc_netlink_stop(void);
int tipc_socket_init(void);
@@ -110,12 +108,10 @@ void tipc_unregister_sysctl(void);
#endif
/*
- * TIPC timer and signal code
+ * TIPC timer code
*/
typedef void (*Handler) (unsigned long);
-u32 tipc_k_signal(Handler routine, unsigned long argument);
-
/**
* k_init_timer - initialize a timer
* @timer: pointer to timer structure
@@ -192,6 +188,7 @@ static inline void k_term_timer(struct timer_list *timer)
struct tipc_skb_cb {
void *handle;
bool deferred;
+ struct sk_buff *tail;
};
#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 412ff41b861..aa722a42ef8 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -1,7 +1,7 @@
/*
* net/tipc/discover.c
*
- * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2003-2006, 2014, Ericsson AB
* Copyright (c) 2005-2006, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -46,7 +46,7 @@
/**
* struct tipc_link_req - information about an ongoing link setup request
- * @bearer: bearer issuing requests
+ * @bearer_id: identity of bearer issuing requests
* @dest: destination address for request messages
* @domain: network domain to which links can be established
* @num_nodes: number of nodes currently discovered (i.e. with an active link)
@@ -56,7 +56,7 @@
* @timer_intv: current interval between requests (in ms)
*/
struct tipc_link_req {
- struct tipc_bearer *bearer;
+ u32 bearer_id;
struct tipc_media_addr dest;
u32 domain;
int num_nodes;
@@ -69,25 +69,21 @@ struct tipc_link_req {
/**
* tipc_disc_init_msg - initialize a link setup message
* @type: message type (request or response)
- * @dest_domain: network domain of node(s) which should respond to message
* @b_ptr: ptr to bearer issuing message
*/
-static struct sk_buff *tipc_disc_init_msg(u32 type, u32 dest_domain,
- struct tipc_bearer *b_ptr)
+static void tipc_disc_init_msg(struct sk_buff *buf, u32 type,
+ struct tipc_bearer *b_ptr)
{
- struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE);
struct tipc_msg *msg;
-
- if (buf) {
- msg = buf_msg(buf);
- tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain);
- msg_set_non_seq(msg, 1);
- msg_set_node_sig(msg, tipc_random);
- msg_set_dest_domain(msg, dest_domain);
- msg_set_bc_netid(msg, tipc_net_id);
- b_ptr->media->addr2msg(&b_ptr->addr, msg_media_addr(msg));
- }
- return buf;
+ u32 dest_domain = b_ptr->domain;
+
+ msg = buf_msg(buf);
+ tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain);
+ msg_set_non_seq(msg, 1);
+ msg_set_node_sig(msg, tipc_random);
+ msg_set_dest_domain(msg, dest_domain);
+ msg_set_bc_netid(msg, tipc_net_id);
+ b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr);
}
/**
@@ -110,146 +106,150 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr,
}
/**
- * tipc_disc_recv_msg - handle incoming link setup message (request or response)
+ * tipc_disc_rcv - handle incoming discovery message (request or response)
* @buf: buffer containing message
- * @b_ptr: bearer that message arrived on
+ * @bearer: bearer that message arrived on
*/
-void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr)
+void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *bearer)
{
- struct tipc_node *n_ptr;
+ struct tipc_node *node;
struct tipc_link *link;
- struct tipc_media_addr media_addr;
+ struct tipc_media_addr maddr;
struct sk_buff *rbuf;
struct tipc_msg *msg = buf_msg(buf);
- u32 dest = msg_dest_domain(msg);
- u32 orig = msg_prevnode(msg);
+ u32 ddom = msg_dest_domain(msg);
+ u32 onode = msg_prevnode(msg);
u32 net_id = msg_bc_netid(msg);
- u32 type = msg_type(msg);
+ u32 mtyp = msg_type(msg);
u32 signature = msg_node_sig(msg);
- int addr_mismatch;
- int link_fully_up;
-
- media_addr.broadcast = 1;
- b_ptr->media->msg2addr(b_ptr, &media_addr, msg_media_addr(msg));
+ bool addr_match = false;
+ bool sign_match = false;
+ bool link_up = false;
+ bool accept_addr = false;
+ bool accept_sign = false;
+ bool respond = false;
+
+ bearer->media->msg2addr(bearer, &maddr, msg_media_addr(msg));
kfree_skb(buf);
/* Ensure message from node is valid and communication is permitted */
if (net_id != tipc_net_id)
return;
- if (media_addr.broadcast)
+ if (maddr.broadcast)
return;
- if (!tipc_addr_domain_valid(dest))
+ if (!tipc_addr_domain_valid(ddom))
return;
- if (!tipc_addr_node_valid(orig))
+ if (!tipc_addr_node_valid(onode))
return;
- if (orig == tipc_own_addr) {
- if (memcmp(&media_addr, &b_ptr->addr, sizeof(media_addr)))
- disc_dupl_alert(b_ptr, tipc_own_addr, &media_addr);
+
+ if (in_own_node(onode)) {
+ if (memcmp(&maddr, &bearer->addr, sizeof(maddr)))
+ disc_dupl_alert(bearer, tipc_own_addr, &maddr);
return;
}
- if (!tipc_in_scope(dest, tipc_own_addr))
+ if (!tipc_in_scope(ddom, tipc_own_addr))
return;
- if (!tipc_in_scope(b_ptr->link_req->domain, orig))
+ if (!tipc_in_scope(bearer->domain, onode))
return;
- /* Locate structure corresponding to requesting node */
- n_ptr = tipc_node_find(orig);
- if (!n_ptr) {
- n_ptr = tipc_node_create(orig);
- if (!n_ptr)
- return;
- }
- tipc_node_lock(n_ptr);
+ /* Locate, or if necessary, create, node: */
+ node = tipc_node_find(onode);
+ if (!node)
+ node = tipc_node_create(onode);
+ if (!node)
+ return;
- /* Prepare to validate requesting node's signature and media address */
- link = n_ptr->links[b_ptr->identity];
- addr_mismatch = (link != NULL) &&
- memcmp(&link->media_addr, &media_addr, sizeof(media_addr));
+ tipc_node_lock(node);
+ link = node->links[bearer->identity];
- /*
- * Ensure discovery message's signature is correct
- *
- * If signature is incorrect and there is no working link to the node,
- * accept the new signature but invalidate all existing links to the
- * node so they won't re-activate without a new discovery message.
- *
- * If signature is incorrect and the requested link to the node is
- * working, accept the new signature. (This is an instance of delayed
- * rediscovery, where a link endpoint was able to re-establish contact
- * with its peer endpoint on a node that rebooted before receiving a
- * discovery message from that node.)
- *
- * If signature is incorrect and there is a working link to the node
- * that is not the requested link, reject the request (must be from
- * a duplicate node).
- */
- if (signature != n_ptr->signature) {
- if (n_ptr->working_links == 0) {
- struct tipc_link *curr_link;
- int i;
-
- for (i = 0; i < MAX_BEARERS; i++) {
- curr_link = n_ptr->links[i];
- if (curr_link) {
- memset(&curr_link->media_addr, 0,
- sizeof(media_addr));
- tipc_link_reset(curr_link);
- }
- }
- addr_mismatch = (link != NULL);
- } else if (tipc_link_is_up(link) && !addr_mismatch) {
- /* delayed rediscovery */
- } else {
- disc_dupl_alert(b_ptr, orig, &media_addr);
- tipc_node_unlock(n_ptr);
- return;
- }
- n_ptr->signature = signature;
+ /* Prepare to validate requesting node's signature and media address */
+ sign_match = (signature == node->signature);
+ addr_match = link && !memcmp(&link->media_addr, &maddr, sizeof(maddr));
+ link_up = link && tipc_link_is_up(link);
+
+
+ /* These three flags give us eight permutations: */
+
+ if (sign_match && addr_match && link_up) {
+ /* All is fine. Do nothing. */
+ } else if (sign_match && addr_match && !link_up) {
+ /* Respond. The link will come up in due time */
+ respond = true;
+ } else if (sign_match && !addr_match && link_up) {
+ /* Peer has changed i/f address without rebooting.
+ * If so, the link will reset soon, and the next
+ * discovery will be accepted. So we can ignore it.
+ * It may also be an cloned or malicious peer having
+ * chosen the same node address and signature as an
+ * existing one.
+ * Ignore requests until the link goes down, if ever.
+ */
+ disc_dupl_alert(bearer, onode, &maddr);
+ } else if (sign_match && !addr_match && !link_up) {
+ /* Peer link has changed i/f address without rebooting.
+ * It may also be a cloned or malicious peer; we can't
+ * distinguish between the two.
+ * The signature is correct, so we must accept.
+ */
+ accept_addr = true;
+ respond = true;
+ } else if (!sign_match && addr_match && link_up) {
+ /* Peer node rebooted. Two possibilities:
+ * - Delayed re-discovery; this link endpoint has already
+ * reset and re-established contact with the peer, before
+ * receiving a discovery message from that node.
+ * (The peer happened to receive one from this node first).
+ * - The peer came back so fast that our side has not
+ * discovered it yet. Probing from this side will soon
+ * reset the link, since there can be no working link
+ * endpoint at the peer end, and the link will re-establish.
+ * Accept the signature, since it comes from a known peer.
+ */
+ accept_sign = true;
+ } else if (!sign_match && addr_match && !link_up) {
+ /* The peer node has rebooted.
+ * Accept signature, since it is a known peer.
+ */
+ accept_sign = true;
+ respond = true;
+ } else if (!sign_match && !addr_match && link_up) {
+ /* Peer rebooted with new address, or a new/duplicate peer.
+ * Ignore until the link goes down, if ever.
+ */
+ disc_dupl_alert(bearer, onode, &maddr);
+ } else if (!sign_match && !addr_match && !link_up) {
+ /* Peer rebooted with new address, or it is a new peer.
+ * Accept signature and address.
+ */
+ accept_sign = true;
+ accept_addr = true;
+ respond = true;
}
- /*
- * Ensure requesting node's media address is correct
- *
- * If media address doesn't match and the link is working, reject the
- * request (must be from a duplicate node).
- *
- * If media address doesn't match and the link is not working, accept
- * the new media address and reset the link to ensure it starts up
- * cleanly.
- */
- if (addr_mismatch) {
- if (tipc_link_is_up(link)) {
- disc_dupl_alert(b_ptr, orig, &media_addr);
- tipc_node_unlock(n_ptr);
- return;
- } else {
- memcpy(&link->media_addr, &media_addr,
- sizeof(media_addr));
- tipc_link_reset(link);
- }
- }
+ if (accept_sign)
+ node->signature = signature;
- /* Create a link endpoint for this bearer, if necessary */
- if (!link) {
- link = tipc_link_create(n_ptr, b_ptr, &media_addr);
- if (!link) {
- tipc_node_unlock(n_ptr);
- return;
+ if (accept_addr) {
+ if (!link)
+ link = tipc_link_create(node, bearer, &maddr);
+ if (link) {
+ memcpy(&link->media_addr, &maddr, sizeof(maddr));
+ tipc_link_reset(link);
+ } else {
+ respond = false;
}
}
- /* Accept discovery message & send response, if necessary */
- link_fully_up = link_working_working(link);
-
- if ((type == DSC_REQ_MSG) && !link_fully_up) {
- rbuf = tipc_disc_init_msg(DSC_RESP_MSG, orig, b_ptr);
+ /* Send response, if necessary */
+ if (respond && (mtyp == DSC_REQ_MSG)) {
+ rbuf = tipc_buf_acquire(INT_H_SIZE);
if (rbuf) {
- tipc_bearer_send(b_ptr, rbuf, &media_addr);
+ tipc_disc_init_msg(rbuf, DSC_RESP_MSG, bearer);
+ tipc_bearer_send(bearer->identity, rbuf, &maddr);
kfree_skb(rbuf);
}
}
-
- tipc_node_unlock(n_ptr);
+ tipc_node_unlock(node);
}
/**
@@ -318,7 +318,7 @@ static void disc_timeout(struct tipc_link_req *req)
* hold at fast polling rate if don't have any associated nodes,
* otherwise hold at slow polling rate
*/
- tipc_bearer_send(req->bearer, req->buf, &req->dest);
+ tipc_bearer_send(req->bearer_id, req->buf, &req->dest);
req->timer_intv *= 2;
@@ -342,8 +342,7 @@ exit:
*
* Returns 0 if successful, otherwise -errno.
*/
-int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest,
- u32 dest_domain)
+int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest)
{
struct tipc_link_req *req;
@@ -351,22 +350,23 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest,
if (!req)
return -ENOMEM;
- req->buf = tipc_disc_init_msg(DSC_REQ_MSG, dest_domain, b_ptr);
+ req->buf = tipc_buf_acquire(INT_H_SIZE);
if (!req->buf) {
kfree(req);
- return -ENOMSG;
+ return -ENOMEM;
}
+ tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr);
memcpy(&req->dest, dest, sizeof(*dest));
- req->bearer = b_ptr;
- req->domain = dest_domain;
+ req->bearer_id = b_ptr->identity;
+ req->domain = b_ptr->domain;
req->num_nodes = 0;
req->timer_intv = TIPC_LINK_REQ_INIT;
spin_lock_init(&req->lock);
k_init_timer(&req->timer, (Handler)disc_timeout, (unsigned long)req);
k_start_timer(&req->timer, req->timer_intv);
b_ptr->link_req = req;
- tipc_bearer_send(req->bearer, req->buf, &req->dest);
+ tipc_bearer_send(req->bearer_id, req->buf, &req->dest);
return 0;
}
@@ -381,3 +381,23 @@ void tipc_disc_delete(struct tipc_link_req *req)
kfree_skb(req->buf);
kfree(req);
}
+
+/**
+ * tipc_disc_reset - reset object to send periodic link setup requests
+ * @b_ptr: ptr to bearer issuing requests
+ * @dest_domain: network domain to which links can be established
+ */
+void tipc_disc_reset(struct tipc_bearer *b_ptr)
+{
+ struct tipc_link_req *req = b_ptr->link_req;
+
+ spin_lock_bh(&req->lock);
+ tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr);
+ req->bearer_id = b_ptr->identity;
+ req->domain = b_ptr->domain;
+ req->num_nodes = 0;
+ req->timer_intv = TIPC_LINK_REQ_INIT;
+ k_start_timer(&req->timer, req->timer_intv);
+ tipc_bearer_send(req->bearer_id, req->buf, &req->dest);
+ spin_unlock_bh(&req->lock);
+}
diff --git a/net/tipc/discover.h b/net/tipc/discover.h
index 75b67c403aa..515b57392f4 100644
--- a/net/tipc/discover.h
+++ b/net/tipc/discover.h
@@ -39,11 +39,11 @@
struct tipc_link_req;
-int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest,
- u32 dest_domain);
+int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest);
void tipc_disc_delete(struct tipc_link_req *req);
+void tipc_disc_reset(struct tipc_bearer *b_ptr);
void tipc_disc_add_dest(struct tipc_link_req *req);
void tipc_disc_remove_dest(struct tipc_link_req *req);
-void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr);
+void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr);
#endif
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 67cf3f935db..5e1426f1751 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -1,7 +1,7 @@
/*
* net/tipc/eth_media.c: Ethernet bearer support for TIPC
*
- * Copyright (c) 2001-2007, 2013, Ericsson AB
+ * Copyright (c) 2001-2007, 2013-2014, Ericsson AB
* Copyright (c) 2005-2008, 2011-2013, Wind River Systems
* All rights reserved.
*
@@ -37,39 +37,52 @@
#include "core.h"
#include "bearer.h"
-#define ETH_ADDR_OFFSET 4 /* message header offset of MAC address */
+#define ETH_ADDR_OFFSET 4 /* MAC addr position inside address field */
-/* convert Ethernet address to string */
-static int tipc_eth_addr2str(struct tipc_media_addr *a, char *str_buf,
- int str_size)
+/* Convert Ethernet address (media address format) to string */
+static int tipc_eth_addr2str(struct tipc_media_addr *addr,
+ char *strbuf, int bufsz)
{
- if (str_size < 18) /* 18 = strlen("aa:bb:cc:dd:ee:ff\0") */
+ if (bufsz < 18) /* 18 = strlen("aa:bb:cc:dd:ee:ff\0") */
return 1;
- sprintf(str_buf, "%pM", a->value);
+ sprintf(strbuf, "%pM", addr->value);
return 0;
}
-/* convert Ethernet address format to message header format */
-static int tipc_eth_addr2msg(struct tipc_media_addr *a, char *msg_area)
+/* Convert from media address format to discovery message addr format */
+static int tipc_eth_addr2msg(char *msg, struct tipc_media_addr *addr)
{
- memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE);
- msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH;
- memcpy(msg_area + ETH_ADDR_OFFSET, a->value, ETH_ALEN);
+ memset(msg, 0, TIPC_MEDIA_ADDR_SIZE);
+ msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH;
+ memcpy(msg + ETH_ADDR_OFFSET, addr->value, ETH_ALEN);
return 0;
}
-/* convert message header address format to Ethernet format */
-static int tipc_eth_msg2addr(const struct tipc_bearer *tb_ptr,
- struct tipc_media_addr *a, char *msg_area)
+/* Convert raw mac address format to media addr format */
+static int tipc_eth_raw2addr(struct tipc_bearer *b,
+ struct tipc_media_addr *addr,
+ char *msg)
{
- if (msg_area[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_ETH)
- return 1;
+ char bcast_mac[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
- tipc_l2_media_addr_set(tb_ptr, a, msg_area + ETH_ADDR_OFFSET);
+ memset(addr, 0, sizeof(*addr));
+ ether_addr_copy(addr->value, msg);
+ addr->media_id = TIPC_MEDIA_TYPE_ETH;
+ addr->broadcast = !memcmp(addr->value, bcast_mac, ETH_ALEN);
return 0;
}
+/* Convert discovery msg addr format to Ethernet media addr format */
+static int tipc_eth_msg2addr(struct tipc_bearer *b,
+ struct tipc_media_addr *addr,
+ char *msg)
+{
+ /* Skip past preamble: */
+ msg += ETH_ADDR_OFFSET;
+ return tipc_eth_raw2addr(b, addr, msg);
+}
+
/* Ethernet media registration info */
struct tipc_media eth_media_info = {
.send_msg = tipc_l2_send_msg,
@@ -78,6 +91,7 @@ struct tipc_media eth_media_info = {
.addr2str = tipc_eth_addr2str,
.addr2msg = tipc_eth_addr2msg,
.msg2addr = tipc_eth_msg2addr,
+ .raw2addr = tipc_eth_raw2addr,
.priority = TIPC_DEF_LINK_PRI,
.tolerance = TIPC_DEF_LINK_TOL,
.window = TIPC_DEF_LINK_WIN,
@@ -85,4 +99,3 @@ struct tipc_media eth_media_info = {
.hwaddr_len = ETH_ALEN,
.name = "eth"
};
-
diff --git a/net/tipc/handler.c b/net/tipc/handler.c
deleted file mode 100644
index 1fabf160501..00000000000
--- a/net/tipc/handler.c
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * net/tipc/handler.c: TIPC signal handling
- *
- * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "core.h"
-
-struct queue_item {
- struct list_head next_signal;
- void (*handler) (unsigned long);
- unsigned long data;
-};
-
-static struct kmem_cache *tipc_queue_item_cache;
-static struct list_head signal_queue_head;
-static DEFINE_SPINLOCK(qitem_lock);
-static int handler_enabled __read_mostly;
-
-static void process_signal_queue(unsigned long dummy);
-
-static DECLARE_TASKLET_DISABLED(tipc_tasklet, process_signal_queue, 0);
-
-
-unsigned int tipc_k_signal(Handler routine, unsigned long argument)
-{
- struct queue_item *item;
-
- spin_lock_bh(&qitem_lock);
- if (!handler_enabled) {
- spin_unlock_bh(&qitem_lock);
- return -ENOPROTOOPT;
- }
-
- item = kmem_cache_alloc(tipc_queue_item_cache, GFP_ATOMIC);
- if (!item) {
- pr_err("Signal queue out of memory\n");
- spin_unlock_bh(&qitem_lock);
- return -ENOMEM;
- }
- item->handler = routine;
- item->data = argument;
- list_add_tail(&item->next_signal, &signal_queue_head);
- spin_unlock_bh(&qitem_lock);
- tasklet_schedule(&tipc_tasklet);
- return 0;
-}
-
-static void process_signal_queue(unsigned long dummy)
-{
- struct queue_item *__volatile__ item;
- struct list_head *l, *n;
-
- spin_lock_bh(&qitem_lock);
- list_for_each_safe(l, n, &signal_queue_head) {
- item = list_entry(l, struct queue_item, next_signal);
- list_del(&item->next_signal);
- spin_unlock_bh(&qitem_lock);
- item->handler(item->data);
- spin_lock_bh(&qitem_lock);
- kmem_cache_free(tipc_queue_item_cache, item);
- }
- spin_unlock_bh(&qitem_lock);
-}
-
-int tipc_handler_start(void)
-{
- tipc_queue_item_cache =
- kmem_cache_create("tipc_queue_items", sizeof(struct queue_item),
- 0, SLAB_HWCACHE_ALIGN, NULL);
- if (!tipc_queue_item_cache)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&signal_queue_head);
- tasklet_enable(&tipc_tasklet);
- handler_enabled = 1;
- return 0;
-}
-
-void tipc_handler_stop(void)
-{
- struct list_head *l, *n;
- struct queue_item *item;
-
- spin_lock_bh(&qitem_lock);
- if (!handler_enabled) {
- spin_unlock_bh(&qitem_lock);
- return;
- }
- handler_enabled = 0;
- spin_unlock_bh(&qitem_lock);
-
- tasklet_kill(&tipc_tasklet);
-
- spin_lock_bh(&qitem_lock);
- list_for_each_safe(l, n, &signal_queue_head) {
- item = list_entry(l, struct queue_item, next_signal);
- list_del(&item->next_signal);
- kmem_cache_free(tipc_queue_item_cache, item);
- }
- spin_unlock_bh(&qitem_lock);
-
- kmem_cache_destroy(tipc_queue_item_cache);
-}
diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c
index 844a77e2582..8522eef9c13 100644
--- a/net/tipc/ib_media.c
+++ b/net/tipc/ib_media.c
@@ -42,7 +42,7 @@
#include "core.h"
#include "bearer.h"
-/* convert InfiniBand address to string */
+/* convert InfiniBand address (media address format) media address to string */
static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf,
int str_size)
{
@@ -54,23 +54,35 @@ static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf,
return 0;
}
-/* convert InfiniBand address format to message header format */
-static int tipc_ib_addr2msg(struct tipc_media_addr *a, char *msg_area)
+/* Convert from media address format to discovery message addr format */
+static int tipc_ib_addr2msg(char *msg, struct tipc_media_addr *addr)
{
- memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE);
- msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_IB;
- memcpy(msg_area, a->value, INFINIBAND_ALEN);
+ memset(msg, 0, TIPC_MEDIA_ADDR_SIZE);
+ memcpy(msg, addr->value, INFINIBAND_ALEN);
return 0;
}
-/* convert message header address format to InfiniBand format */
-static int tipc_ib_msg2addr(const struct tipc_bearer *tb_ptr,
- struct tipc_media_addr *a, char *msg_area)
+/* Convert raw InfiniBand address format to media addr format */
+static int tipc_ib_raw2addr(struct tipc_bearer *b,
+ struct tipc_media_addr *addr,
+ char *msg)
{
- tipc_l2_media_addr_set(tb_ptr, a, msg_area);
+ memset(addr, 0, sizeof(*addr));
+ memcpy(addr->value, msg, INFINIBAND_ALEN);
+ addr->media_id = TIPC_MEDIA_TYPE_IB;
+ addr->broadcast = !memcmp(msg, b->bcast_addr.value,
+ INFINIBAND_ALEN);
return 0;
}
+/* Convert discovery msg addr format to InfiniBand media addr format */
+static int tipc_ib_msg2addr(struct tipc_bearer *b,
+ struct tipc_media_addr *addr,
+ char *msg)
+{
+ return tipc_ib_raw2addr(b, addr, msg);
+}
+
/* InfiniBand media registration info */
struct tipc_media ib_media_info = {
.send_msg = tipc_l2_send_msg,
@@ -79,6 +91,7 @@ struct tipc_media ib_media_info = {
.addr2str = tipc_ib_addr2str,
.addr2msg = tipc_ib_addr2msg,
.msg2addr = tipc_ib_msg2addr,
+ .raw2addr = tipc_ib_raw2addr,
.priority = TIPC_DEF_LINK_PRI,
.tolerance = TIPC_DEF_LINK_TOL,
.window = TIPC_DEF_LINK_WIN,
@@ -86,4 +99,3 @@ struct tipc_media ib_media_info = {
.hwaddr_len = INFINIBAND_ALEN,
.name = "ib"
};
-
diff --git a/net/tipc/link.c b/net/tipc/link.c
index da6018beb6e..ad2c57f5868 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -37,6 +37,7 @@
#include "core.h"
#include "link.h"
#include "port.h"
+#include "socket.h"
#include "name_distr.h"
#include "discover.h"
#include "config.h"
@@ -77,19 +78,19 @@ static const char *link_unk_evt = "Unknown link event ";
static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
struct sk_buff *buf);
-static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf);
-static int tipc_link_tunnel_rcv(struct tipc_link **l_ptr,
+static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf);
+static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr,
struct sk_buff **buf);
static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance);
-static int link_send_sections_long(struct tipc_port *sender,
- struct iovec const *msg_sect,
- unsigned int len, u32 destnode);
+static int tipc_link_iovec_long_xmit(struct tipc_port *sender,
+ struct iovec const *msg_sect,
+ unsigned int len, u32 destnode);
static void link_state_event(struct tipc_link *l_ptr, u32 event);
static void link_reset_statistics(struct tipc_link *l_ptr);
static void link_print(struct tipc_link *l_ptr, const char *str);
-static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf);
-static void tipc_link_send_sync(struct tipc_link *l);
-static void tipc_link_recv_sync(struct tipc_node *n, struct sk_buff *buf);
+static int tipc_link_frag_xmit(struct tipc_link *l_ptr, struct sk_buff *buf);
+static void tipc_link_sync_xmit(struct tipc_link *l);
+static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf);
/*
* Simple link routines
@@ -101,9 +102,18 @@ static unsigned int align(unsigned int i)
static void link_init_max_pkt(struct tipc_link *l_ptr)
{
+ struct tipc_bearer *b_ptr;
u32 max_pkt;
- max_pkt = (l_ptr->b_ptr->mtu & ~3);
+ rcu_read_lock();
+ b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]);
+ if (!b_ptr) {
+ rcu_read_unlock();
+ return;
+ }
+ max_pkt = (b_ptr->mtu & ~3);
+ rcu_read_unlock();
+
if (max_pkt > MAX_MSG_SIZE)
max_pkt = MAX_MSG_SIZE;
@@ -147,11 +157,6 @@ int tipc_link_is_active(struct tipc_link *l_ptr)
/**
* link_timeout - handle expiration of link timer
* @l_ptr: pointer to link
- *
- * This routine must not grab "tipc_net_lock" to avoid a potential deadlock conflict
- * with tipc_link_delete(). (There is no risk that the node will be deleted by
- * another thread because tipc_link_delete() always cancels the link timer before
- * tipc_node_delete() is called.)
*/
static void link_timeout(struct tipc_link *l_ptr)
{
@@ -213,8 +218,8 @@ static void link_set_timer(struct tipc_link *l_ptr, u32 time)
* Returns pointer to link.
*/
struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
- struct tipc_bearer *b_ptr,
- const struct tipc_media_addr *media_addr)
+ struct tipc_bearer *b_ptr,
+ const struct tipc_media_addr *media_addr)
{
struct tipc_link *l_ptr;
struct tipc_msg *msg;
@@ -253,7 +258,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
l_ptr->owner = n_ptr;
l_ptr->checkpoint = 1;
l_ptr->peer_session = INVALID_SESSION;
- l_ptr->b_ptr = b_ptr;
+ l_ptr->bearer_id = b_ptr->identity;
link_set_supervision_props(l_ptr, b_ptr->tolerance);
l_ptr->state = RESET_UNKNOWN;
@@ -268,6 +273,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
l_ptr->priority = b_ptr->priority;
tipc_link_set_queue_limits(l_ptr, b_ptr->window);
+ l_ptr->net_plane = b_ptr->net_plane;
link_init_max_pkt(l_ptr);
l_ptr->next_out_no = 1;
@@ -279,41 +285,44 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
k_init_timer(&l_ptr->timer, (Handler)link_timeout,
(unsigned long)l_ptr);
- list_add_tail(&l_ptr->link_list, &b_ptr->links);
link_state_event(l_ptr, STARTING_EVT);
return l_ptr;
}
-/**
- * tipc_link_delete - delete a link
- * @l_ptr: pointer to link
- *
- * Note: 'tipc_net_lock' is write_locked, bearer is locked.
- * This routine must not grab the node lock until after link timer cancellation
- * to avoid a potential deadlock situation.
- */
-void tipc_link_delete(struct tipc_link *l_ptr)
+void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down)
{
- if (!l_ptr) {
- pr_err("Attempt to delete non-existent link\n");
- return;
- }
+ struct tipc_link *l_ptr;
+ struct tipc_node *n_ptr;
- k_cancel_timer(&l_ptr->timer);
+ rcu_read_lock();
+ list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) {
+ tipc_node_lock(n_ptr);
+ l_ptr = n_ptr->links[bearer_id];
+ if (l_ptr) {
+ tipc_link_reset(l_ptr);
+ if (shutting_down || !tipc_node_is_up(n_ptr)) {
+ tipc_node_detach_link(l_ptr->owner, l_ptr);
+ tipc_link_reset_fragments(l_ptr);
+ tipc_node_unlock(n_ptr);
- tipc_node_lock(l_ptr->owner);
- tipc_link_reset(l_ptr);
- tipc_node_detach_link(l_ptr->owner, l_ptr);
- tipc_link_purge_queues(l_ptr);
- list_del_init(&l_ptr->link_list);
- tipc_node_unlock(l_ptr->owner);
- k_term_timer(&l_ptr->timer);
- kfree(l_ptr);
+ /* Nobody else can access this link now: */
+ del_timer_sync(&l_ptr->timer);
+ kfree(l_ptr);
+ } else {
+ /* Detach/delete when failover is finished: */
+ l_ptr->flags |= LINK_STOPPED;
+ tipc_node_unlock(n_ptr);
+ del_timer_sync(&l_ptr->timer);
+ }
+ continue;
+ }
+ tipc_node_unlock(n_ptr);
+ }
+ rcu_read_unlock();
}
-
/**
* link_schedule_port - schedule port for deferred sending
* @l_ptr: pointer to link
@@ -330,8 +339,6 @@ static int link_schedule_port(struct tipc_link *l_ptr, u32 origport, u32 sz)
spin_lock_bh(&tipc_port_list_lock);
p_ptr = tipc_port_lock(origport);
if (p_ptr) {
- if (!p_ptr->wakeup)
- goto exit;
if (!list_empty(&p_ptr->wait_list))
goto exit;
p_ptr->congested = 1;
@@ -366,7 +373,7 @@ void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all)
list_del_init(&p_ptr->wait_list);
spin_lock_bh(p_ptr->lock);
p_ptr->congested = 0;
- p_ptr->wakeup(p_ptr);
+ tipc_port_wakeup(p_ptr);
win -= p_ptr->waiting_pkts;
spin_unlock_bh(p_ptr->lock);
}
@@ -392,9 +399,8 @@ static void link_release_outqueue(struct tipc_link *l_ptr)
*/
void tipc_link_reset_fragments(struct tipc_link *l_ptr)
{
- kfree_skb(l_ptr->reasm_head);
- l_ptr->reasm_head = NULL;
- l_ptr->reasm_tail = NULL;
+ kfree_skb(l_ptr->reasm_buf);
+ l_ptr->reasm_buf = NULL;
}
/**
@@ -430,7 +436,7 @@ void tipc_link_reset(struct tipc_link *l_ptr)
return;
tipc_node_link_down(l_ptr->owner, l_ptr);
- tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr);
+ tipc_bearer_remove_dest(l_ptr->bearer_id, l_ptr->addr);
if (was_active_link && tipc_node_active_links(l_ptr->owner)) {
l_ptr->reset_checkpoint = checkpoint;
@@ -461,12 +467,27 @@ void tipc_link_reset(struct tipc_link *l_ptr)
link_reset_statistics(l_ptr);
}
+void tipc_link_reset_list(unsigned int bearer_id)
+{
+ struct tipc_link *l_ptr;
+ struct tipc_node *n_ptr;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) {
+ tipc_node_lock(n_ptr);
+ l_ptr = n_ptr->links[bearer_id];
+ if (l_ptr)
+ tipc_link_reset(l_ptr);
+ tipc_node_unlock(n_ptr);
+ }
+ rcu_read_unlock();
+}
static void link_activate(struct tipc_link *l_ptr)
{
l_ptr->next_in_no = l_ptr->stats.recv_info = 1;
tipc_node_link_up(l_ptr->owner, l_ptr);
- tipc_bearer_add_dest(l_ptr->b_ptr, l_ptr->addr);
+ tipc_bearer_add_dest(l_ptr->bearer_id, l_ptr->addr);
}
/**
@@ -479,7 +500,10 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
struct tipc_link *other;
u32 cont_intv = l_ptr->continuity_interval;
- if (!l_ptr->started && (event != STARTING_EVT))
+ if (l_ptr->flags & LINK_STOPPED)
+ return;
+
+ if (!(l_ptr->flags & LINK_STARTED) && (event != STARTING_EVT))
return; /* Not yet. */
/* Check whether changeover is going on */
@@ -499,12 +523,12 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
if (l_ptr->next_in_no != l_ptr->checkpoint) {
l_ptr->checkpoint = l_ptr->next_in_no;
if (tipc_bclink_acks_missing(l_ptr->owner)) {
- tipc_link_send_proto_msg(l_ptr, STATE_MSG,
- 0, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG,
+ 0, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
} else if (l_ptr->max_pkt < l_ptr->max_pkt_target) {
- tipc_link_send_proto_msg(l_ptr, STATE_MSG,
- 1, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG,
+ 1, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
}
link_set_timer(l_ptr, cont_intv);
@@ -512,7 +536,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
}
l_ptr->state = WORKING_UNKNOWN;
l_ptr->fsm_msg_cnt = 0;
- tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
link_set_timer(l_ptr, cont_intv / 4);
break;
@@ -522,7 +546,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
tipc_link_reset(l_ptr);
l_ptr->state = RESET_RESET;
l_ptr->fsm_msg_cnt = 0;
- tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
+ 0, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
link_set_timer(l_ptr, cont_intv);
break;
@@ -544,7 +569,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
tipc_link_reset(l_ptr);
l_ptr->state = RESET_RESET;
l_ptr->fsm_msg_cnt = 0;
- tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
+ 0, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
link_set_timer(l_ptr, cont_intv);
break;
@@ -554,14 +580,14 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
l_ptr->fsm_msg_cnt = 0;
l_ptr->checkpoint = l_ptr->next_in_no;
if (tipc_bclink_acks_missing(l_ptr->owner)) {
- tipc_link_send_proto_msg(l_ptr, STATE_MSG,
- 0, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG,
+ 0, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
}
link_set_timer(l_ptr, cont_intv);
} else if (l_ptr->fsm_msg_cnt < l_ptr->abort_limit) {
- tipc_link_send_proto_msg(l_ptr, STATE_MSG,
- 1, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG,
+ 1, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
link_set_timer(l_ptr, cont_intv / 4);
} else { /* Link has failed */
@@ -570,8 +596,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
tipc_link_reset(l_ptr);
l_ptr->state = RESET_UNKNOWN;
l_ptr->fsm_msg_cnt = 0;
- tipc_link_send_proto_msg(l_ptr, RESET_MSG,
- 0, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, RESET_MSG,
+ 0, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
link_set_timer(l_ptr, cont_intv);
}
@@ -591,24 +617,25 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
l_ptr->state = WORKING_WORKING;
l_ptr->fsm_msg_cnt = 0;
link_activate(l_ptr);
- tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
if (l_ptr->owner->working_links == 1)
- tipc_link_send_sync(l_ptr);
+ tipc_link_sync_xmit(l_ptr);
link_set_timer(l_ptr, cont_intv);
break;
case RESET_MSG:
l_ptr->state = RESET_RESET;
l_ptr->fsm_msg_cnt = 0;
- tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 1, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
+ 1, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
link_set_timer(l_ptr, cont_intv);
break;
case STARTING_EVT:
- l_ptr->started = 1;
+ l_ptr->flags |= LINK_STARTED;
/* fall through */
case TIMEOUT_EVT:
- tipc_link_send_proto_msg(l_ptr, RESET_MSG, 0, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
link_set_timer(l_ptr, cont_intv);
break;
@@ -626,16 +653,17 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
l_ptr->state = WORKING_WORKING;
l_ptr->fsm_msg_cnt = 0;
link_activate(l_ptr);
- tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
if (l_ptr->owner->working_links == 1)
- tipc_link_send_sync(l_ptr);
+ tipc_link_sync_xmit(l_ptr);
link_set_timer(l_ptr, cont_intv);
break;
case RESET_MSG:
break;
case TIMEOUT_EVT:
- tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
+ 0, 0, 0, 0, 0);
l_ptr->fsm_msg_cnt++;
link_set_timer(l_ptr, cont_intv);
break;
@@ -721,11 +749,11 @@ static void link_add_chain_to_outqueue(struct tipc_link *l_ptr,
}
/*
- * tipc_link_send_buf() is the 'full path' for messages, called from
- * inside TIPC when the 'fast path' in tipc_send_buf
+ * tipc_link_xmit() is the 'full path' for messages, called from
+ * inside TIPC when the 'fast path' in tipc_send_xmit
* has failed, and from link_send()
*/
-int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf)
+int __tipc_link_xmit(struct tipc_link *l_ptr, struct sk_buff *buf)
{
struct tipc_msg *msg = buf_msg(buf);
u32 size = msg_size(msg);
@@ -753,13 +781,13 @@ int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf)
/* Fragmentation needed ? */
if (size > max_packet)
- return link_send_long_buf(l_ptr, buf);
+ return tipc_link_frag_xmit(l_ptr, buf);
/* Packet can be queued or sent. */
if (likely(!link_congested(l_ptr))) {
link_add_to_outqueue(l_ptr, buf, msg);
- tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+ tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr);
l_ptr->unacked_window = 0;
return dsz;
}
@@ -797,42 +825,40 @@ int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf)
}
/*
- * tipc_link_send(): same as tipc_link_send_buf(), but the link to use has
- * not been selected yet, and the the owner node is not locked
+ * tipc_link_xmit(): same as __tipc_link_xmit(), but the link to use
+ * has not been selected yet, and the the owner node is not locked
* Called by TIPC internal users, e.g. the name distributor
*/
-int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector)
+int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector)
{
struct tipc_link *l_ptr;
struct tipc_node *n_ptr;
int res = -ELINKCONG;
- read_lock_bh(&tipc_net_lock);
n_ptr = tipc_node_find(dest);
if (n_ptr) {
tipc_node_lock(n_ptr);
l_ptr = n_ptr->active_links[selector & 1];
if (l_ptr)
- res = tipc_link_send_buf(l_ptr, buf);
+ res = __tipc_link_xmit(l_ptr, buf);
else
kfree_skb(buf);
tipc_node_unlock(n_ptr);
} else {
kfree_skb(buf);
}
- read_unlock_bh(&tipc_net_lock);
return res;
}
/*
- * tipc_link_send_sync - synchronize broadcast link endpoints.
+ * tipc_link_sync_xmit - synchronize broadcast link endpoints.
*
* Give a newly added peer node the sequence number where it should
* start receiving and acking broadcast packets.
*
* Called with node locked
*/
-static void tipc_link_send_sync(struct tipc_link *l)
+static void tipc_link_sync_xmit(struct tipc_link *l)
{
struct sk_buff *buf;
struct tipc_msg *msg;
@@ -849,14 +875,14 @@ static void tipc_link_send_sync(struct tipc_link *l)
}
/*
- * tipc_link_recv_sync - synchronize broadcast link endpoints.
+ * tipc_link_sync_rcv - synchronize broadcast link endpoints.
* Receive the sequence number where we should start receiving and
* acking broadcast packets from a newly added peer node, and open
* up for reception of such packets.
*
* Called with node locked
*/
-static void tipc_link_recv_sync(struct tipc_node *n, struct sk_buff *buf)
+static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf)
{
struct tipc_msg *msg = buf_msg(buf);
@@ -866,7 +892,7 @@ static void tipc_link_recv_sync(struct tipc_node *n, struct sk_buff *buf)
}
/*
- * tipc_link_send_names - send name table entries to new neighbor
+ * tipc_link_names_xmit - send name table entries to new neighbor
*
* Send routine for bulk delivery of name table messages when contact
* with a new neighbor occurs. No link congestion checking is performed
@@ -874,7 +900,7 @@ static void tipc_link_recv_sync(struct tipc_node *n, struct sk_buff *buf)
* small enough not to require fragmentation.
* Called without any locks held.
*/
-void tipc_link_send_names(struct list_head *message_list, u32 dest)
+void tipc_link_names_xmit(struct list_head *message_list, u32 dest)
{
struct tipc_node *n_ptr;
struct tipc_link *l_ptr;
@@ -884,7 +910,6 @@ void tipc_link_send_names(struct list_head *message_list, u32 dest)
if (list_empty(message_list))
return;
- read_lock_bh(&tipc_net_lock);
n_ptr = tipc_node_find(dest);
if (n_ptr) {
tipc_node_lock(n_ptr);
@@ -899,7 +924,6 @@ void tipc_link_send_names(struct list_head *message_list, u32 dest)
}
tipc_node_unlock(n_ptr);
}
- read_unlock_bh(&tipc_net_lock);
/* discard the messages if they couldn't be sent */
list_for_each_safe(buf, temp_buf, ((struct sk_buff *)message_list)) {
@@ -909,13 +933,13 @@ void tipc_link_send_names(struct list_head *message_list, u32 dest)
}
/*
- * link_send_buf_fast: Entry for data messages where the
+ * tipc_link_xmit_fast: Entry for data messages where the
* destination link is known and the header is complete,
* inclusive total message length. Very time critical.
* Link is locked. Returns user data length.
*/
-static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf,
- u32 *used_max_pkt)
+static int tipc_link_xmit_fast(struct tipc_link *l_ptr, struct sk_buff *buf,
+ u32 *used_max_pkt)
{
struct tipc_msg *msg = buf_msg(buf);
int res = msg_data_sz(msg);
@@ -923,7 +947,7 @@ static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf,
if (likely(!link_congested(l_ptr))) {
if (likely(msg_size(msg) <= l_ptr->max_pkt)) {
link_add_to_outqueue(l_ptr, buf, msg);
- tipc_bearer_send(l_ptr->b_ptr, buf,
+ tipc_bearer_send(l_ptr->bearer_id, buf,
&l_ptr->media_addr);
l_ptr->unacked_window = 0;
return res;
@@ -931,18 +955,18 @@ static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf,
else
*used_max_pkt = l_ptr->max_pkt;
}
- return tipc_link_send_buf(l_ptr, buf); /* All other cases */
+ return __tipc_link_xmit(l_ptr, buf); /* All other cases */
}
/*
- * tipc_link_send_sections_fast: Entry for messages where the
+ * tipc_link_iovec_xmit_fast: Entry for messages where the
* destination processor is known and the header is complete,
* except for total message length.
* Returns user data length or errno.
*/
-int tipc_link_send_sections_fast(struct tipc_port *sender,
- struct iovec const *msg_sect,
- unsigned int len, u32 destaddr)
+int tipc_link_iovec_xmit_fast(struct tipc_port *sender,
+ struct iovec const *msg_sect,
+ unsigned int len, u32 destaddr)
{
struct tipc_msg *hdr = &sender->phdr;
struct tipc_link *l_ptr;
@@ -961,18 +985,16 @@ again:
if (unlikely(res < 0))
return res;
- read_lock_bh(&tipc_net_lock);
node = tipc_node_find(destaddr);
if (likely(node)) {
tipc_node_lock(node);
l_ptr = node->active_links[selector];
if (likely(l_ptr)) {
if (likely(buf)) {
- res = link_send_buf_fast(l_ptr, buf,
- &sender->max_pkt);
+ res = tipc_link_xmit_fast(l_ptr, buf,
+ &sender->max_pkt);
exit:
tipc_node_unlock(node);
- read_unlock_bh(&tipc_net_lock);
return res;
}
@@ -989,30 +1011,25 @@ exit:
*/
sender->max_pkt = l_ptr->max_pkt;
tipc_node_unlock(node);
- read_unlock_bh(&tipc_net_lock);
if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt)
goto again;
- return link_send_sections_long(sender, msg_sect, len,
- destaddr);
+ return tipc_link_iovec_long_xmit(sender, msg_sect,
+ len, destaddr);
}
tipc_node_unlock(node);
}
- read_unlock_bh(&tipc_net_lock);
/* Couldn't find a link to the destination node */
- if (buf)
- return tipc_reject_msg(buf, TIPC_ERR_NO_NODE);
- if (res >= 0)
- return tipc_port_reject_sections(sender, hdr, msg_sect,
- len, TIPC_ERR_NO_NODE);
- return res;
+ kfree_skb(buf);
+ tipc_port_iovec_reject(sender, hdr, msg_sect, len, TIPC_ERR_NO_NODE);
+ return -ENETUNREACH;
}
/*
- * link_send_sections_long(): Entry for long messages where the
+ * tipc_link_iovec_long_xmit(): Entry for long messages where the
* destination node is known and the header is complete,
* inclusive total message length.
* Link and bearer congestion status have been checked to be ok,
@@ -1025,9 +1042,9 @@ exit:
*
* Returns user data length or errno.
*/
-static int link_send_sections_long(struct tipc_port *sender,
- struct iovec const *msg_sect,
- unsigned int len, u32 destaddr)
+static int tipc_link_iovec_long_xmit(struct tipc_port *sender,
+ struct iovec const *msg_sect,
+ unsigned int len, u32 destaddr)
{
struct tipc_link *l_ptr;
struct tipc_node *node;
@@ -1146,8 +1163,9 @@ error:
} else {
reject:
kfree_skb_list(buf_chain);
- return tipc_port_reject_sections(sender, hdr, msg_sect,
- len, TIPC_ERR_NO_NODE);
+ tipc_port_iovec_reject(sender, hdr, msg_sect, len,
+ TIPC_ERR_NO_NODE);
+ return -ENETUNREACH;
}
/* Append chain of fragments to send queue & send them */
@@ -1188,7 +1206,7 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr)
if (r_q_size && buf) {
msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1));
msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in);
- tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+ tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr);
l_ptr->retransm_queue_head = mod(++r_q_head);
l_ptr->retransm_queue_size = --r_q_size;
l_ptr->stats.retransmitted++;
@@ -1200,7 +1218,7 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr)
if (buf) {
msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1));
msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in);
- tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+ tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr);
l_ptr->unacked_window = 0;
kfree_skb(buf);
l_ptr->proto_msg_queue = NULL;
@@ -1217,7 +1235,8 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr)
if (mod(next - first) < l_ptr->queue_limit[0]) {
msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
- tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+ tipc_bearer_send(l_ptr->bearer_id, buf,
+ &l_ptr->media_addr);
if (msg_user(msg) == MSG_BUNDLER)
msg_set_type(msg, CLOSED_MSG);
l_ptr->next_out = buf->next;
@@ -1240,33 +1259,24 @@ void tipc_link_push_queue(struct tipc_link *l_ptr)
} while (!res);
}
-static void link_reset_all(unsigned long addr)
+void tipc_link_reset_all(struct tipc_node *node)
{
- struct tipc_node *n_ptr;
char addr_string[16];
u32 i;
- read_lock_bh(&tipc_net_lock);
- n_ptr = tipc_node_find((u32)addr);
- if (!n_ptr) {
- read_unlock_bh(&tipc_net_lock);
- return; /* node no longer exists */
- }
-
- tipc_node_lock(n_ptr);
+ tipc_node_lock(node);
pr_warn("Resetting all links to %s\n",
- tipc_addr_string_fill(addr_string, n_ptr->addr));
+ tipc_addr_string_fill(addr_string, node->addr));
for (i = 0; i < MAX_BEARERS; i++) {
- if (n_ptr->links[i]) {
- link_print(n_ptr->links[i], "Resetting link\n");
- tipc_link_reset(n_ptr->links[i]);
+ if (node->links[i]) {
+ link_print(node->links[i], "Resetting link\n");
+ tipc_link_reset(node->links[i]);
}
}
- tipc_node_unlock(n_ptr);
- read_unlock_bh(&tipc_net_lock);
+ tipc_node_unlock(node);
}
static void link_retransmit_failure(struct tipc_link *l_ptr,
@@ -1303,10 +1313,9 @@ static void link_retransmit_failure(struct tipc_link *l_ptr,
n_ptr->bclink.oos_state,
n_ptr->bclink.last_sent);
- tipc_k_signal((Handler)link_reset_all, (unsigned long)n_ptr->addr);
-
tipc_node_unlock(n_ptr);
+ tipc_bclink_set_flags(TIPC_BCLINK_RESET);
l_ptr->stale_count = 0;
}
}
@@ -1336,7 +1345,7 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *buf,
msg = buf_msg(buf);
msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
- tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+ tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr);
buf = buf->next;
retransmits--;
l_ptr->stats.retransmitted++;
@@ -1424,14 +1433,13 @@ static int link_recv_buf_validate(struct sk_buff *buf)
/**
* tipc_rcv - process TIPC packets/messages arriving from off-node
* @head: pointer to message buffer chain
- * @tb_ptr: pointer to bearer message arrived on
+ * @b_ptr: pointer to bearer message arrived on
*
* Invoked with no locks held. Bearer pointer must point to a valid bearer
* structure (i.e. cannot be NULL), but bearer can be inactive.
*/
void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr)
{
- read_lock_bh(&tipc_net_lock);
while (head) {
struct tipc_node *n_ptr;
struct tipc_link *l_ptr;
@@ -1441,15 +1449,10 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr)
u32 seq_no;
u32 ackd;
u32 released = 0;
- int type;
head = head->next;
buf->next = NULL;
- /* Ensure bearer is still enabled */
- if (unlikely(!b_ptr->active))
- goto discard;
-
/* Ensure message is well-formed */
if (unlikely(!link_recv_buf_validate(buf)))
goto discard;
@@ -1463,9 +1466,9 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr)
if (unlikely(msg_non_seq(msg))) {
if (msg_user(msg) == LINK_CONFIG)
- tipc_disc_recv_msg(buf, b_ptr);
+ tipc_disc_rcv(buf, b_ptr);
else
- tipc_bclink_recv_pkt(buf);
+ tipc_bclink_rcv(buf);
continue;
}
@@ -1486,14 +1489,14 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr)
goto unlock_discard;
/* Verify that communication with node is currently allowed */
- if ((n_ptr->block_setup & WAIT_PEER_DOWN) &&
- msg_user(msg) == LINK_PROTOCOL &&
- (msg_type(msg) == RESET_MSG ||
- msg_type(msg) == ACTIVATE_MSG) &&
- !msg_redundant_link(msg))
- n_ptr->block_setup &= ~WAIT_PEER_DOWN;
-
- if (n_ptr->block_setup)
+ if ((n_ptr->action_flags & TIPC_WAIT_PEER_LINKS_DOWN) &&
+ msg_user(msg) == LINK_PROTOCOL &&
+ (msg_type(msg) == RESET_MSG ||
+ msg_type(msg) == ACTIVATE_MSG) &&
+ !msg_redundant_link(msg))
+ n_ptr->action_flags &= ~TIPC_WAIT_PEER_LINKS_DOWN;
+
+ if (tipc_node_blocked(n_ptr))
goto unlock_discard;
/* Validate message sequence number info */
@@ -1508,7 +1511,6 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr)
while ((crs != l_ptr->next_out) &&
less_eq(buf_seqno(crs), ackd)) {
struct sk_buff *next = crs->next;
-
kfree_skb(crs);
crs = next;
released++;
@@ -1521,18 +1523,19 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr)
/* Try sending any messages link endpoint has pending */
if (unlikely(l_ptr->next_out))
tipc_link_push_queue(l_ptr);
+
if (unlikely(!list_empty(&l_ptr->waiting_ports)))
tipc_link_wakeup_ports(l_ptr, 0);
+
if (unlikely(++l_ptr->unacked_window >= TIPC_MIN_LINK_WIN)) {
l_ptr->stats.sent_acks++;
- tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0);
}
- /* Now (finally!) process the incoming message */
-protocol_check:
+ /* Process the incoming packet */
if (unlikely(!link_working_working(l_ptr))) {
if (msg_user(msg) == LINK_PROTOCOL) {
- link_recv_proto_msg(l_ptr, buf);
+ tipc_link_proto_rcv(l_ptr, buf);
head = link_insert_deferred_queue(l_ptr, head);
tipc_node_unlock(n_ptr);
continue;
@@ -1561,72 +1564,64 @@ protocol_check:
l_ptr->next_in_no++;
if (unlikely(l_ptr->oldest_deferred_in))
head = link_insert_deferred_queue(l_ptr, head);
-deliver:
- if (likely(msg_isdata(msg))) {
- tipc_node_unlock(n_ptr);
- tipc_port_recv_msg(buf);
- continue;
+
+ /* Deliver packet/message to correct user: */
+ if (unlikely(msg_user(msg) == CHANGEOVER_PROTOCOL)) {
+ if (!tipc_link_tunnel_rcv(n_ptr, &buf)) {
+ tipc_node_unlock(n_ptr);
+ continue;
+ }
+ msg = buf_msg(buf);
+ } else if (msg_user(msg) == MSG_FRAGMENTER) {
+ l_ptr->stats.recv_fragments++;
+ if (tipc_buf_append(&l_ptr->reasm_buf, &buf)) {
+ l_ptr->stats.recv_fragmented++;
+ msg = buf_msg(buf);
+ } else {
+ if (!l_ptr->reasm_buf)
+ tipc_link_reset(l_ptr);
+ tipc_node_unlock(n_ptr);
+ continue;
+ }
}
+
switch (msg_user(msg)) {
- int ret;
+ case TIPC_LOW_IMPORTANCE:
+ case TIPC_MEDIUM_IMPORTANCE:
+ case TIPC_HIGH_IMPORTANCE:
+ case TIPC_CRITICAL_IMPORTANCE:
+ tipc_node_unlock(n_ptr);
+ tipc_sk_rcv(buf);
+ continue;
case MSG_BUNDLER:
l_ptr->stats.recv_bundles++;
l_ptr->stats.recv_bundled += msg_msgcnt(msg);
tipc_node_unlock(n_ptr);
- tipc_link_recv_bundle(buf);
+ tipc_link_bundle_rcv(buf);
continue;
case NAME_DISTRIBUTOR:
n_ptr->bclink.recv_permitted = true;
tipc_node_unlock(n_ptr);
- tipc_named_recv(buf);
- continue;
- case BCAST_PROTOCOL:
- tipc_link_recv_sync(n_ptr, buf);
- tipc_node_unlock(n_ptr);
+ tipc_named_rcv(buf);
continue;
case CONN_MANAGER:
tipc_node_unlock(n_ptr);
- tipc_port_recv_proto_msg(buf);
+ tipc_port_proto_rcv(buf);
continue;
- case MSG_FRAGMENTER:
- l_ptr->stats.recv_fragments++;
- ret = tipc_link_recv_fragment(&l_ptr->reasm_head,
- &l_ptr->reasm_tail,
- &buf);
- if (ret == LINK_REASM_COMPLETE) {
- l_ptr->stats.recv_fragmented++;
- msg = buf_msg(buf);
- goto deliver;
- }
- if (ret == LINK_REASM_ERROR)
- tipc_link_reset(l_ptr);
- tipc_node_unlock(n_ptr);
- continue;
- case CHANGEOVER_PROTOCOL:
- type = msg_type(msg);
- if (tipc_link_tunnel_rcv(&l_ptr, &buf)) {
- msg = buf_msg(buf);
- seq_no = msg_seqno(msg);
- if (type == ORIGINAL_MSG)
- goto deliver;
- goto protocol_check;
- }
+ case BCAST_PROTOCOL:
+ tipc_link_sync_rcv(n_ptr, buf);
break;
default:
kfree_skb(buf);
- buf = NULL;
break;
}
tipc_node_unlock(n_ptr);
- tipc_net_route_msg(buf);
continue;
unlock_discard:
-
tipc_node_unlock(n_ptr);
discard:
kfree_skb(buf);
}
- read_unlock_bh(&tipc_net_lock);
}
/**
@@ -1688,7 +1683,7 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
u32 seq_no = buf_seqno(buf);
if (likely(msg_user(buf_msg(buf)) == LINK_PROTOCOL)) {
- link_recv_proto_msg(l_ptr, buf);
+ tipc_link_proto_rcv(l_ptr, buf);
return;
}
@@ -1711,7 +1706,7 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
l_ptr->stats.deferred_recv++;
TIPC_SKB_CB(buf)->deferred = true;
if ((l_ptr->deferred_inqueue_sz % 16) == 1)
- tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0);
} else
l_ptr->stats.duplicates++;
}
@@ -1719,9 +1714,8 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
/*
* Send protocol message to the other endpoint.
*/
-void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ,
- int probe_msg, u32 gap, u32 tolerance,
- u32 priority, u32 ack_mtu)
+void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg,
+ u32 gap, u32 tolerance, u32 priority, u32 ack_mtu)
{
struct sk_buff *buf = NULL;
struct tipc_msg *msg = l_ptr->pmsg;
@@ -1739,12 +1733,12 @@ void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ,
return;
/* Abort non-RESET send if communication with node is prohibited */
- if ((l_ptr->owner->block_setup) && (msg_typ != RESET_MSG))
+ if ((tipc_node_blocked(l_ptr->owner)) && (msg_typ != RESET_MSG))
return;
/* Create protocol message with "out-of-sequence" sequence number */
msg_set_type(msg, msg_typ);
- msg_set_net_plane(msg, l_ptr->b_ptr->net_plane);
+ msg_set_net_plane(msg, l_ptr->net_plane);
msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
msg_set_last_bcast(msg, tipc_bclink_get_last_sent());
@@ -1810,7 +1804,7 @@ void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ,
skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg));
buf->priority = TC_PRIO_CONTROL;
- tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+ tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr);
l_ptr->unacked_window = 0;
kfree_skb(buf);
}
@@ -1820,7 +1814,7 @@ void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ,
* Note that network plane id propagates through the network, and may
* change at any time. The node with lowest address rules
*/
-static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf)
+static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf)
{
u32 rec_gap = 0;
u32 max_pkt_info;
@@ -1832,12 +1826,9 @@ static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf)
if (l_ptr->exp_msg_count)
goto exit;
- /* record unnumbered packet arrival (force mismatch on next timeout) */
- l_ptr->checkpoint--;
-
- if (l_ptr->b_ptr->net_plane != msg_net_plane(msg))
+ if (l_ptr->net_plane != msg_net_plane(msg))
if (tipc_own_addr > msg_prevnode(msg))
- l_ptr->b_ptr->net_plane = msg_net_plane(msg);
+ l_ptr->net_plane = msg_net_plane(msg);
switch (msg_type(msg)) {
@@ -1854,7 +1845,7 @@ static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf)
* peer has lost contact -- don't allow peer's links
* to reactivate before we recognize loss & clean up
*/
- l_ptr->owner->block_setup = WAIT_NODE_DOWN;
+ l_ptr->owner->action_flags |= TIPC_WAIT_OWN_LINKS_DOWN;
}
link_state_event(l_ptr, RESET_MSG);
@@ -1910,6 +1901,10 @@ static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf)
tipc_link_reset(l_ptr); /* Enforce change to take effect */
break;
}
+
+ /* Record reception; force mismatch at next timeout: */
+ l_ptr->checkpoint--;
+
link_state_event(l_ptr, TRAFFIC_MSG_EVT);
l_ptr->stats.recv_states++;
if (link_reset_unknown(l_ptr))
@@ -1939,8 +1934,8 @@ static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf)
msg_last_bcast(msg));
if (rec_gap || (msg_probe(msg))) {
- tipc_link_send_proto_msg(l_ptr, STATE_MSG,
- 0, rec_gap, 0, 0, max_pkt_ack);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, rec_gap, 0,
+ 0, max_pkt_ack);
}
if (msg_seq_gap(msg)) {
l_ptr->stats.recv_nacks++;
@@ -1979,7 +1974,7 @@ static void tipc_link_tunnel_xmit(struct tipc_link *l_ptr,
}
skb_copy_to_linear_data(buf, tunnel_hdr, INT_H_SIZE);
skb_copy_to_linear_data_offset(buf, INT_H_SIZE, msg, length);
- tipc_link_send_buf(tunnel, buf);
+ __tipc_link_xmit(tunnel, buf);
}
@@ -2012,7 +2007,7 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr)
if (buf) {
skb_copy_to_linear_data(buf, &tunnel_hdr, INT_H_SIZE);
msg_set_size(&tunnel_hdr, INT_H_SIZE);
- tipc_link_send_buf(tunnel, buf);
+ __tipc_link_xmit(tunnel, buf);
} else {
pr_warn("%sunable to send changeover msg\n",
link_co_err);
@@ -2046,7 +2041,7 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr)
}
}
-/* tipc_link_dup_send_queue(): A second link has become active. Tunnel a
+/* tipc_link_dup_queue_xmit(): A second link has become active. Tunnel a
* duplicate of the first link's send queue via the new link. This way, we
* are guaranteed that currently queued packets from a socket are delivered
* before future traffic from the same socket, even if this is using the
@@ -2055,7 +2050,7 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr)
* and sequence order is preserved per sender/receiver socket pair.
* Owner node is locked.
*/
-void tipc_link_dup_send_queue(struct tipc_link *l_ptr,
+void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr,
struct tipc_link *tunnel)
{
struct sk_buff *iter;
@@ -2085,7 +2080,7 @@ void tipc_link_dup_send_queue(struct tipc_link *l_ptr,
skb_copy_to_linear_data(outbuf, &tunnel_hdr, INT_H_SIZE);
skb_copy_to_linear_data_offset(outbuf, INT_H_SIZE, iter->data,
length);
- tipc_link_send_buf(tunnel, outbuf);
+ __tipc_link_xmit(tunnel, outbuf);
if (!tipc_link_is_up(l_ptr))
return;
iter = iter->next;
@@ -2112,89 +2107,112 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos)
return eb;
}
-/* tipc_link_tunnel_rcv(): Receive a tunneled packet, sent
- * via other link as result of a failover (ORIGINAL_MSG) or
- * a new active link (DUPLICATE_MSG). Failover packets are
- * returned to the active link for delivery upwards.
+
+
+/* tipc_link_dup_rcv(): Receive a tunnelled DUPLICATE_MSG packet.
+ * Owner node is locked.
+ */
+static void tipc_link_dup_rcv(struct tipc_link *l_ptr,
+ struct sk_buff *t_buf)
+{
+ struct sk_buff *buf;
+
+ if (!tipc_link_is_up(l_ptr))
+ return;
+
+ buf = buf_extract(t_buf, INT_H_SIZE);
+ if (buf == NULL) {
+ pr_warn("%sfailed to extract inner dup pkt\n", link_co_err);
+ return;
+ }
+
+ /* Add buffer to deferred queue, if applicable: */
+ link_handle_out_of_seq_msg(l_ptr, buf);
+}
+
+/* tipc_link_failover_rcv(): Receive a tunnelled ORIGINAL_MSG packet
* Owner node is locked.
*/
-static int tipc_link_tunnel_rcv(struct tipc_link **l_ptr,
- struct sk_buff **buf)
+static struct sk_buff *tipc_link_failover_rcv(struct tipc_link *l_ptr,
+ struct sk_buff *t_buf)
{
- struct sk_buff *tunnel_buf = *buf;
- struct tipc_link *dest_link;
+ struct tipc_msg *t_msg = buf_msg(t_buf);
+ struct sk_buff *buf = NULL;
struct tipc_msg *msg;
- struct tipc_msg *tunnel_msg = buf_msg(tunnel_buf);
- u32 msg_typ = msg_type(tunnel_msg);
- u32 msg_count = msg_msgcnt(tunnel_msg);
- u32 bearer_id = msg_bearer_id(tunnel_msg);
- if (bearer_id >= MAX_BEARERS)
- goto exit;
- dest_link = (*l_ptr)->owner->links[bearer_id];
- if (!dest_link)
- goto exit;
- if (dest_link == *l_ptr) {
- pr_err("Unexpected changeover message on link <%s>\n",
- (*l_ptr)->name);
- goto exit;
- }
- *l_ptr = dest_link;
- msg = msg_get_wrapped(tunnel_msg);
+ if (tipc_link_is_up(l_ptr))
+ tipc_link_reset(l_ptr);
- if (msg_typ == DUPLICATE_MSG) {
- if (less(msg_seqno(msg), mod(dest_link->next_in_no)))
- goto exit;
- *buf = buf_extract(tunnel_buf, INT_H_SIZE);
- if (*buf == NULL) {
- pr_warn("%sduplicate msg dropped\n", link_co_err);
+ /* First failover packet? */
+ if (l_ptr->exp_msg_count == START_CHANGEOVER)
+ l_ptr->exp_msg_count = msg_msgcnt(t_msg);
+
+ /* Should there be an inner packet? */
+ if (l_ptr->exp_msg_count) {
+ l_ptr->exp_msg_count--;
+ buf = buf_extract(t_buf, INT_H_SIZE);
+ if (buf == NULL) {
+ pr_warn("%sno inner failover pkt\n", link_co_err);
goto exit;
}
- kfree_skb(tunnel_buf);
- return 1;
- }
+ msg = buf_msg(buf);
- /* First original message ?: */
- if (tipc_link_is_up(dest_link)) {
- pr_info("%s<%s>, changeover initiated by peer\n", link_rst_msg,
- dest_link->name);
- tipc_link_reset(dest_link);
- dest_link->exp_msg_count = msg_count;
- if (!msg_count)
- goto exit;
- } else if (dest_link->exp_msg_count == START_CHANGEOVER) {
- dest_link->exp_msg_count = msg_count;
- if (!msg_count)
+ if (less(msg_seqno(msg), l_ptr->reset_checkpoint)) {
+ kfree_skb(buf);
+ buf = NULL;
goto exit;
+ }
+ if (msg_user(msg) == MSG_FRAGMENTER) {
+ l_ptr->stats.recv_fragments++;
+ tipc_buf_append(&l_ptr->reasm_buf, &buf);
+ }
}
+exit:
+ if ((l_ptr->exp_msg_count == 0) && (l_ptr->flags & LINK_STOPPED)) {
+ tipc_node_detach_link(l_ptr->owner, l_ptr);
+ kfree(l_ptr);
+ }
+ return buf;
+}
- /* Receive original message */
- if (dest_link->exp_msg_count == 0) {
- pr_warn("%sgot too many tunnelled messages\n", link_co_err);
+/* tipc_link_tunnel_rcv(): Receive a tunnelled packet, sent
+ * via other link as result of a failover (ORIGINAL_MSG) or
+ * a new active link (DUPLICATE_MSG). Failover packets are
+ * returned to the active link for delivery upwards.
+ * Owner node is locked.
+ */
+static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr,
+ struct sk_buff **buf)
+{
+ struct sk_buff *t_buf = *buf;
+ struct tipc_link *l_ptr;
+ struct tipc_msg *t_msg = buf_msg(t_buf);
+ u32 bearer_id = msg_bearer_id(t_msg);
+
+ *buf = NULL;
+
+ if (bearer_id >= MAX_BEARERS)
goto exit;
- }
- dest_link->exp_msg_count--;
- if (less(msg_seqno(msg), dest_link->reset_checkpoint)) {
+
+ l_ptr = n_ptr->links[bearer_id];
+ if (!l_ptr)
goto exit;
- } else {
- *buf = buf_extract(tunnel_buf, INT_H_SIZE);
- if (*buf != NULL) {
- kfree_skb(tunnel_buf);
- return 1;
- } else {
- pr_warn("%soriginal msg dropped\n", link_co_err);
- }
- }
+
+ if (msg_type(t_msg) == DUPLICATE_MSG)
+ tipc_link_dup_rcv(l_ptr, t_buf);
+ else if (msg_type(t_msg) == ORIGINAL_MSG)
+ *buf = tipc_link_failover_rcv(l_ptr, t_buf);
+ else
+ pr_warn("%sunknown tunnel pkt received\n", link_co_err);
exit:
- *buf = NULL;
- kfree_skb(tunnel_buf);
- return 0;
+ kfree_skb(t_buf);
+ return *buf != NULL;
}
/*
* Bundler functionality:
*/
-void tipc_link_recv_bundle(struct sk_buff *buf)
+void tipc_link_bundle_rcv(struct sk_buff *buf)
{
u32 msgcount = msg_msgcnt(buf_msg(buf));
u32 pos = INT_H_SIZE;
@@ -2217,11 +2235,11 @@ void tipc_link_recv_bundle(struct sk_buff *buf)
*/
/*
- * link_send_long_buf: Entry for buffers needing fragmentation.
+ * tipc_link_frag_xmit: Entry for buffers needing fragmentation.
* The buffer is complete, inclusive total message length.
* Returns user data length.
*/
-static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf)
+static int tipc_link_frag_xmit(struct tipc_link *l_ptr, struct sk_buff *buf)
{
struct sk_buff *buf_chain = NULL;
struct sk_buff *buf_chain_tail = (struct sk_buff *)&buf_chain;
@@ -2284,51 +2302,6 @@ static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf)
return dsz;
}
-/*
- * tipc_link_recv_fragment(): Called with node lock on. Returns
- * the reassembled buffer if message is complete.
- */
-int tipc_link_recv_fragment(struct sk_buff **head, struct sk_buff **tail,
- struct sk_buff **fbuf)
-{
- struct sk_buff *frag = *fbuf;
- struct tipc_msg *msg = buf_msg(frag);
- u32 fragid = msg_type(msg);
- bool headstolen;
- int delta;
-
- skb_pull(frag, msg_hdr_sz(msg));
- if (fragid == FIRST_FRAGMENT) {
- if (*head || skb_unclone(frag, GFP_ATOMIC))
- goto out_free;
- *head = frag;
- skb_frag_list_init(*head);
- return 0;
- } else if (*head &&
- skb_try_coalesce(*head, frag, &headstolen, &delta)) {
- kfree_skb_partial(frag, headstolen);
- } else {
- if (!*head)
- goto out_free;
- if (!skb_has_frag_list(*head))
- skb_shinfo(*head)->frag_list = frag;
- else
- (*tail)->next = frag;
- *tail = frag;
- (*head)->truesize += frag->truesize;
- }
- if (fragid == LAST_FRAGMENT) {
- *fbuf = *head;
- *tail = *head = NULL;
- return LINK_REASM_COMPLETE;
- }
- return 0;
-out_free:
- pr_warn_ratelimited("Link unable to reassemble fragmented message\n");
- kfree_skb(*fbuf);
- return LINK_REASM_ERROR;
-}
-
static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance)
{
if ((tolerance < TIPC_MIN_LINK_TOL) || (tolerance > TIPC_MAX_LINK_TOL))
@@ -2359,35 +2332,39 @@ void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window)
l_ptr->queue_limit[MSG_FRAGMENTER] = 4000;
}
-/**
- * link_find_link - locate link by name
- * @name: ptr to link name string
- * @node: ptr to area to be filled with ptr to associated node
- *
- * Caller must hold 'tipc_net_lock' to ensure node and bearer are not deleted;
- * this also prevents link deletion.
+/* tipc_link_find_owner - locate owner node of link by link's name
+ * @name: pointer to link name string
+ * @bearer_id: pointer to index in 'node->links' array where the link was found.
*
- * Returns pointer to link (or 0 if invalid link name).
+ * Returns pointer to node owning the link, or 0 if no matching link is found.
*/
-static struct tipc_link *link_find_link(const char *name,
- struct tipc_node **node)
+static struct tipc_node *tipc_link_find_owner(const char *link_name,
+ unsigned int *bearer_id)
{
struct tipc_link *l_ptr;
struct tipc_node *n_ptr;
+ struct tipc_node *found_node = 0;
int i;
- list_for_each_entry(n_ptr, &tipc_node_list, list) {
+ *bearer_id = 0;
+ rcu_read_lock();
+ list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) {
+ tipc_node_lock(n_ptr);
for (i = 0; i < MAX_BEARERS; i++) {
l_ptr = n_ptr->links[i];
- if (l_ptr && !strcmp(l_ptr->name, name))
- goto found;
+ if (l_ptr && !strcmp(l_ptr->name, link_name)) {
+ *bearer_id = i;
+ found_node = n_ptr;
+ break;
+ }
}
+ tipc_node_unlock(n_ptr);
+ if (found_node)
+ break;
}
- l_ptr = NULL;
- n_ptr = NULL;
-found:
- *node = n_ptr;
- return l_ptr;
+ rcu_read_unlock();
+
+ return found_node;
}
/**
@@ -2419,7 +2396,7 @@ static int link_value_is_valid(u16 cmd, u32 new_value)
* @new_value: new value of link, bearer, or media setting
* @cmd: which link, bearer, or media attribute to set (TIPC_CMD_SET_LINK_*)
*
- * Caller must hold 'tipc_net_lock' to ensure link/bearer/media is not deleted.
+ * Caller must hold RTNL lock to ensure link/bearer/media is not deleted.
*
* Returns 0 if value updated and negative value on error.
*/
@@ -2429,32 +2406,33 @@ static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd)
struct tipc_link *l_ptr;
struct tipc_bearer *b_ptr;
struct tipc_media *m_ptr;
+ int bearer_id;
int res = 0;
- l_ptr = link_find_link(name, &node);
- if (l_ptr) {
- /*
- * acquire node lock for tipc_link_send_proto_msg().
- * see "TIPC locking policy" in net.c.
- */
+ node = tipc_link_find_owner(name, &bearer_id);
+ if (node) {
tipc_node_lock(node);
- switch (cmd) {
- case TIPC_CMD_SET_LINK_TOL:
- link_set_supervision_props(l_ptr, new_value);
- tipc_link_send_proto_msg(l_ptr,
- STATE_MSG, 0, 0, new_value, 0, 0);
- break;
- case TIPC_CMD_SET_LINK_PRI:
- l_ptr->priority = new_value;
- tipc_link_send_proto_msg(l_ptr,
- STATE_MSG, 0, 0, 0, new_value, 0);
- break;
- case TIPC_CMD_SET_LINK_WINDOW:
- tipc_link_set_queue_limits(l_ptr, new_value);
- break;
- default:
- res = -EINVAL;
- break;
+ l_ptr = node->links[bearer_id];
+
+ if (l_ptr) {
+ switch (cmd) {
+ case TIPC_CMD_SET_LINK_TOL:
+ link_set_supervision_props(l_ptr, new_value);
+ tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0,
+ new_value, 0, 0);
+ break;
+ case TIPC_CMD_SET_LINK_PRI:
+ l_ptr->priority = new_value;
+ tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0,
+ 0, new_value, 0);
+ break;
+ case TIPC_CMD_SET_LINK_WINDOW:
+ tipc_link_set_queue_limits(l_ptr, new_value);
+ break;
+ default:
+ res = -EINVAL;
+ break;
+ }
}
tipc_node_unlock(node);
return res;
@@ -2524,9 +2502,7 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space
" (cannot change setting on broadcast link)");
}
- read_lock_bh(&tipc_net_lock);
res = link_cmd_set_value(args->name, new_value, cmd);
- read_unlock_bh(&tipc_net_lock);
if (res)
return tipc_cfg_reply_error_string("cannot change link setting");
@@ -2549,6 +2525,7 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_
char *link_name;
struct tipc_link *l_ptr;
struct tipc_node *node;
+ unsigned int bearer_id;
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME))
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
@@ -2559,18 +2536,18 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_
return tipc_cfg_reply_error_string("link not found");
return tipc_cfg_reply_none();
}
+ node = tipc_link_find_owner(link_name, &bearer_id);
+ if (!node)
+ return tipc_cfg_reply_error_string("link not found");
- read_lock_bh(&tipc_net_lock);
- l_ptr = link_find_link(link_name, &node);
+ tipc_node_lock(node);
+ l_ptr = node->links[bearer_id];
if (!l_ptr) {
- read_unlock_bh(&tipc_net_lock);
+ tipc_node_unlock(node);
return tipc_cfg_reply_error_string("link not found");
}
-
- tipc_node_lock(node);
link_reset_statistics(l_ptr);
tipc_node_unlock(node);
- read_unlock_bh(&tipc_net_lock);
return tipc_cfg_reply_none();
}
@@ -2597,18 +2574,24 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size)
struct tipc_node *node;
char *status;
u32 profile_total = 0;
+ unsigned int bearer_id;
int ret;
if (!strcmp(name, tipc_bclink_name))
return tipc_bclink_stats(buf, buf_size);
- read_lock_bh(&tipc_net_lock);
- l = link_find_link(name, &node);
+ node = tipc_link_find_owner(name, &bearer_id);
+ if (!node)
+ return 0;
+
+ tipc_node_lock(node);
+
+ l = node->links[bearer_id];
if (!l) {
- read_unlock_bh(&tipc_net_lock);
+ tipc_node_unlock(node);
return 0;
}
- tipc_node_lock(node);
+
s = &l->stats;
if (tipc_link_is_active(l))
@@ -2671,7 +2654,6 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size)
(s->accu_queue_sz / s->queue_sz_counts) : 0);
tipc_node_unlock(node);
- read_unlock_bh(&tipc_net_lock);
return ret;
}
@@ -2722,7 +2704,6 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector)
if (dest == tipc_own_addr)
return MAX_MSG_SIZE;
- read_lock_bh(&tipc_net_lock);
n_ptr = tipc_node_find(dest);
if (n_ptr) {
tipc_node_lock(n_ptr);
@@ -2731,13 +2712,18 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector)
res = l_ptr->max_pkt;
tipc_node_unlock(n_ptr);
}
- read_unlock_bh(&tipc_net_lock);
return res;
}
static void link_print(struct tipc_link *l_ptr, const char *str)
{
- pr_info("%s Link %x<%s>:", str, l_ptr->addr, l_ptr->b_ptr->name);
+ struct tipc_bearer *b_ptr;
+
+ rcu_read_lock();
+ b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]);
+ if (b_ptr)
+ pr_info("%s Link %x<%s>:", str, l_ptr->addr, b_ptr->name);
+ rcu_read_unlock();
if (link_working_unknown(l_ptr))
pr_cont(":WU\n");
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 3b6aa65b608..200d518b218 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -1,7 +1,7 @@
/*
* net/tipc/link.h: Include file for TIPC link code
*
- * Copyright (c) 1995-2006, Ericsson AB
+ * Copyright (c) 1995-2006, 2013, Ericsson AB
* Copyright (c) 2004-2005, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -40,27 +40,23 @@
#include "msg.h"
#include "node.h"
-/*
- * Link reassembly status codes
- */
-#define LINK_REASM_ERROR -1
-#define LINK_REASM_COMPLETE 1
-
-/*
- * Out-of-range value for link sequence numbers
+/* Out-of-range value for link sequence numbers
*/
#define INVALID_LINK_SEQ 0x10000
-/*
- * Link states
+/* Link working states
*/
#define WORKING_WORKING 560810u
#define WORKING_UNKNOWN 560811u
#define RESET_UNKNOWN 560812u
#define RESET_RESET 560813u
-/*
- * Starting value for maximum packet size negotiation on unicast links
+/* Link endpoint execution states
+ */
+#define LINK_STARTED 0x0001
+#define LINK_STOPPED 0x0002
+
+/* Starting value for maximum packet size negotiation on unicast links
* (unless bearer MTU is less)
*/
#define MAX_PKT_DEFAULT 1500
@@ -102,12 +98,11 @@ struct tipc_stats {
* @media_addr: media address to use when sending messages over link
* @timer: link timer
* @owner: pointer to peer node
- * @link_list: adjacent links in bearer's list of links
- * @started: indicates if link has been started
+ * @flags: execution state flags for link endpoint instance
* @checkpoint: reference point for triggering link continuity checking
* @peer_session: link session # being used by peer end of link
* @peer_bearer_id: bearer id used by link's peer endpoint
- * @b_ptr: pointer to bearer used by link
+ * @bearer_id: local bearer id used by link
* @tolerance: minimum link continuity loss needed to reset link [in ms]
* @continuity_interval: link continuity testing interval [in ms]
* @abort_limit: # of unacknowledged continuity probes needed to reset link
@@ -116,6 +111,7 @@ struct tipc_stats {
* @proto_msg: template for control messages generated by link
* @pmsg: convenience pointer to "proto_msg" field
* @priority: current link priority
+ * @net_plane: current link network plane ('A' through 'H')
* @queue_limit: outbound message queue congestion thresholds (indexed by user)
* @exp_msg_count: # of tunnelled messages expected during link changeover
* @reset_checkpoint: seq # of last acknowledged message at time of link reset
@@ -139,8 +135,7 @@ struct tipc_stats {
* @next_out: ptr to first unsent outbound message in queue
* @waiting_ports: linked list of ports waiting for link congestion to abate
* @long_msg_seq_no: next identifier to use for outbound fragmented messages
- * @reasm_head: list head of partially reassembled inbound message fragments
- * @reasm_tail: last fragment received
+ * @reasm_buf: head of partially reassembled inbound message fragments
* @stats: collects statistics regarding link activity
*/
struct tipc_link {
@@ -149,14 +144,13 @@ struct tipc_link {
struct tipc_media_addr media_addr;
struct timer_list timer;
struct tipc_node *owner;
- struct list_head link_list;
/* Management and link supervision data */
- int started;
+ unsigned int flags;
u32 checkpoint;
u32 peer_session;
u32 peer_bearer_id;
- struct tipc_bearer *b_ptr;
+ u32 bearer_id;
u32 tolerance;
u32 continuity_interval;
u32 abort_limit;
@@ -168,6 +162,7 @@ struct tipc_link {
} proto_msg;
struct tipc_msg *pmsg;
u32 priority;
+ char net_plane;
u32 queue_limit[15]; /* queue_limit[0]==window limit */
/* Changeover */
@@ -203,8 +198,7 @@ struct tipc_link {
/* Fragmentation/reassembly */
u32 long_msg_seq_no;
- struct sk_buff *reasm_head;
- struct sk_buff *reasm_tail;
+ struct sk_buff *reasm_buf;
/* Statistics */
struct tipc_stats stats;
@@ -215,10 +209,9 @@ struct tipc_port;
struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
struct tipc_bearer *b_ptr,
const struct tipc_media_addr *media_addr);
-void tipc_link_delete(struct tipc_link *l_ptr);
+void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down);
void tipc_link_failover_send_queue(struct tipc_link *l_ptr);
-void tipc_link_dup_send_queue(struct tipc_link *l_ptr,
- struct tipc_link *dest);
+void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, struct tipc_link *dest);
void tipc_link_reset_fragments(struct tipc_link *l_ptr);
int tipc_link_is_up(struct tipc_link *l_ptr);
int tipc_link_is_active(struct tipc_link *l_ptr);
@@ -230,24 +223,23 @@ struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area,
int req_tlv_space);
struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area,
int req_tlv_space);
+void tipc_link_reset_all(struct tipc_node *node);
void tipc_link_reset(struct tipc_link *l_ptr);
-int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector);
-void tipc_link_send_names(struct list_head *message_list, u32 dest);
+void tipc_link_reset_list(unsigned int bearer_id);
+int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector);
+void tipc_link_names_xmit(struct list_head *message_list, u32 dest);
+int __tipc_link_xmit(struct tipc_link *l_ptr, struct sk_buff *buf);
int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf);
u32 tipc_link_get_max_pkt(u32 dest, u32 selector);
-int tipc_link_send_sections_fast(struct tipc_port *sender,
- struct iovec const *msg_sect,
- unsigned int len, u32 destnode);
-void tipc_link_recv_bundle(struct sk_buff *buf);
-int tipc_link_recv_fragment(struct sk_buff **reasm_head,
- struct sk_buff **reasm_tail,
- struct sk_buff **fbuf);
-void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, int prob,
- u32 gap, u32 tolerance, u32 priority,
- u32 acked_mtu);
+int tipc_link_iovec_xmit_fast(struct tipc_port *sender,
+ struct iovec const *msg_sect,
+ unsigned int len, u32 destnode);
+void tipc_link_bundle_rcv(struct sk_buff *buf);
+void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob,
+ u32 gap, u32 tolerance, u32 priority, u32 acked_mtu);
void tipc_link_push_queue(struct tipc_link *l_ptr);
u32 tipc_link_defer_pkt(struct sk_buff **head, struct sk_buff **tail,
- struct sk_buff *buf);
+ struct sk_buff *buf);
void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all);
void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window);
void tipc_link_retransmit(struct tipc_link *l_ptr,
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index e525f8ce1de..0a37a472c29 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -1,7 +1,7 @@
/*
* net/tipc/msg.c: TIPC message header routines
*
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2006, 2014, Ericsson AB
* Copyright (c) 2005, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -99,3 +99,61 @@ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect,
}
return dsz;
}
+
+/* tipc_buf_append(): Append a buffer to the fragment list of another buffer
+ * @*headbuf: in: NULL for first frag, otherwise value returned from prev call
+ * out: set when successful non-complete reassembly, otherwise NULL
+ * @*buf: in: the buffer to append. Always defined
+ * out: head buf after sucessful complete reassembly, otherwise NULL
+ * Returns 1 when reassembly complete, otherwise 0
+ */
+int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
+{
+ struct sk_buff *head = *headbuf;
+ struct sk_buff *frag = *buf;
+ struct sk_buff *tail;
+ struct tipc_msg *msg = buf_msg(frag);
+ u32 fragid = msg_type(msg);
+ bool headstolen;
+ int delta;
+
+ skb_pull(frag, msg_hdr_sz(msg));
+
+ if (fragid == FIRST_FRAGMENT) {
+ if (head || skb_unclone(frag, GFP_ATOMIC))
+ goto out_free;
+ head = *headbuf = frag;
+ skb_frag_list_init(head);
+ *buf = NULL;
+ return 0;
+ }
+ if (!head)
+ goto out_free;
+ tail = TIPC_SKB_CB(head)->tail;
+ if (skb_try_coalesce(head, frag, &headstolen, &delta)) {
+ kfree_skb_partial(frag, headstolen);
+ } else {
+ if (!skb_has_frag_list(head))
+ skb_shinfo(head)->frag_list = frag;
+ else
+ tail->next = frag;
+ head->truesize += frag->truesize;
+ head->data_len += frag->len;
+ head->len += frag->len;
+ TIPC_SKB_CB(head)->tail = frag;
+ }
+ if (fragid == LAST_FRAGMENT) {
+ *buf = head;
+ TIPC_SKB_CB(head)->tail = NULL;
+ *headbuf = NULL;
+ return 1;
+ }
+ *buf = NULL;
+ return 0;
+out_free:
+ pr_warn_ratelimited("Unable to build fragment list\n");
+ kfree_skb(*buf);
+ kfree_skb(*headbuf);
+ *buf = *headbuf = NULL;
+ return 0;
+}
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 76d1269b944..503511903d1 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -1,7 +1,7 @@
/*
* net/tipc/msg.h: Include file for TIPC message header routines
*
- * Copyright (c) 2000-2007, Ericsson AB
+ * Copyright (c) 2000-2007, 2014, Ericsson AB
* Copyright (c) 2005-2008, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -711,4 +711,7 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize,
u32 destnode);
int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect,
unsigned int len, int max_size, struct sk_buff **buf);
+
+int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf);
+
#endif
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index e0d08055754..8ce730984aa 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -38,34 +38,6 @@
#include "link.h"
#include "name_distr.h"
-#define ITEM_SIZE sizeof(struct distr_item)
-
-/**
- * struct distr_item - publication info distributed to other nodes
- * @type: name sequence type
- * @lower: name sequence lower bound
- * @upper: name sequence upper bound
- * @ref: publishing port reference
- * @key: publication key
- *
- * ===> All fields are stored in network byte order. <===
- *
- * First 3 fields identify (name or) name sequence being published.
- * Reference field uniquely identifies port that published name sequence.
- * Key field uniquely identifies publication, in the event a port has
- * multiple publications of the same name sequence.
- *
- * Note: There is no field that identifies the publishing node because it is
- * the same for all items contained within a publication message.
- */
-struct distr_item {
- __be32 type;
- __be32 lower;
- __be32 upper;
- __be32 ref;
- __be32 key;
-};
-
/**
* struct publ_list - list of publications made by this node
* @list: circular list of publications
@@ -127,20 +99,28 @@ static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest)
return buf;
}
-static void named_cluster_distribute(struct sk_buff *buf)
+void named_cluster_distribute(struct sk_buff *buf)
{
struct sk_buff *buf_copy;
struct tipc_node *n_ptr;
+ struct tipc_link *l_ptr;
- list_for_each_entry(n_ptr, &tipc_node_list, list) {
- if (tipc_node_active_links(n_ptr)) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) {
+ tipc_node_lock(n_ptr);
+ l_ptr = n_ptr->active_links[n_ptr->addr & 1];
+ if (l_ptr) {
buf_copy = skb_copy(buf, GFP_ATOMIC);
- if (!buf_copy)
+ if (!buf_copy) {
+ tipc_node_unlock(n_ptr);
break;
+ }
msg_set_destnode(buf_msg(buf_copy), n_ptr->addr);
- tipc_link_send(buf_copy, n_ptr->addr, n_ptr->addr);
+ __tipc_link_xmit(l_ptr, buf_copy);
}
+ tipc_node_unlock(n_ptr);
}
+ rcu_read_unlock();
kfree_skb(buf);
}
@@ -148,7 +128,7 @@ static void named_cluster_distribute(struct sk_buff *buf)
/**
* tipc_named_publish - tell other nodes about a new publication by this node
*/
-void tipc_named_publish(struct publication *publ)
+struct sk_buff *tipc_named_publish(struct publication *publ)
{
struct sk_buff *buf;
struct distr_item *item;
@@ -157,23 +137,23 @@ void tipc_named_publish(struct publication *publ)
publ_lists[publ->scope]->size++;
if (publ->scope == TIPC_NODE_SCOPE)
- return;
+ return NULL;
buf = named_prepare_buf(PUBLICATION, ITEM_SIZE, 0);
if (!buf) {
pr_warn("Publication distribution failure\n");
- return;
+ return NULL;
}
item = (struct distr_item *)msg_data(buf_msg(buf));
publ_to_item(item, publ);
- named_cluster_distribute(buf);
+ return buf;
}
/**
* tipc_named_withdraw - tell other nodes about a withdrawn publication by this node
*/
-void tipc_named_withdraw(struct publication *publ)
+struct sk_buff *tipc_named_withdraw(struct publication *publ)
{
struct sk_buff *buf;
struct distr_item *item;
@@ -182,17 +162,17 @@ void tipc_named_withdraw(struct publication *publ)
publ_lists[publ->scope]->size--;
if (publ->scope == TIPC_NODE_SCOPE)
- return;
+ return NULL;
buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0);
if (!buf) {
pr_warn("Withdrawal distribution failure\n");
- return;
+ return NULL;
}
item = (struct distr_item *)msg_data(buf_msg(buf));
publ_to_item(item, publ);
- named_cluster_distribute(buf);
+ return buf;
}
/*
@@ -231,38 +211,16 @@ static void named_distribute(struct list_head *message_list, u32 node,
/**
* tipc_named_node_up - tell specified node about all publications by this node
*/
-void tipc_named_node_up(unsigned long nodearg)
+void tipc_named_node_up(u32 max_item_buf, u32 node)
{
- struct tipc_node *n_ptr;
- struct tipc_link *l_ptr;
- struct list_head message_list;
- u32 node = (u32)nodearg;
- u32 max_item_buf = 0;
-
- /* compute maximum amount of publication data to send per message */
- read_lock_bh(&tipc_net_lock);
- n_ptr = tipc_node_find(node);
- if (n_ptr) {
- tipc_node_lock(n_ptr);
- l_ptr = n_ptr->active_links[0];
- if (l_ptr)
- max_item_buf = ((l_ptr->max_pkt - INT_H_SIZE) /
- ITEM_SIZE) * ITEM_SIZE;
- tipc_node_unlock(n_ptr);
- }
- read_unlock_bh(&tipc_net_lock);
- if (!max_item_buf)
- return;
-
- /* create list of publication messages, then send them as a unit */
- INIT_LIST_HEAD(&message_list);
+ LIST_HEAD(message_list);
read_lock_bh(&tipc_nametbl_lock);
named_distribute(&message_list, node, &publ_cluster, max_item_buf);
named_distribute(&message_list, node, &publ_zone, max_item_buf);
read_unlock_bh(&tipc_nametbl_lock);
- tipc_link_send_names(&message_list, node);
+ tipc_link_names_xmit(&message_list, node);
}
/**
@@ -293,9 +251,9 @@ static void named_purge_publ(struct publication *publ)
}
/**
- * tipc_named_recv - process name table update message sent by another node
+ * tipc_named_rcv - process name table update message sent by another node
*/
-void tipc_named_recv(struct sk_buff *buf)
+void tipc_named_rcv(struct sk_buff *buf)
{
struct publication *publ;
struct tipc_msg *msg = buf_msg(buf);
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index 1e41bdd4f25..b2eed4ec152 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -39,10 +39,39 @@
#include "name_table.h"
-void tipc_named_publish(struct publication *publ);
-void tipc_named_withdraw(struct publication *publ);
-void tipc_named_node_up(unsigned long node);
-void tipc_named_recv(struct sk_buff *buf);
+#define ITEM_SIZE sizeof(struct distr_item)
+
+/**
+ * struct distr_item - publication info distributed to other nodes
+ * @type: name sequence type
+ * @lower: name sequence lower bound
+ * @upper: name sequence upper bound
+ * @ref: publishing port reference
+ * @key: publication key
+ *
+ * ===> All fields are stored in network byte order. <===
+ *
+ * First 3 fields identify (name or) name sequence being published.
+ * Reference field uniquely identifies port that published name sequence.
+ * Key field uniquely identifies publication, in the event a port has
+ * multiple publications of the same name sequence.
+ *
+ * Note: There is no field that identifies the publishing node because it is
+ * the same for all items contained within a publication message.
+ */
+struct distr_item {
+ __be32 type;
+ __be32 lower;
+ __be32 upper;
+ __be32 ref;
+ __be32 key;
+};
+
+struct sk_buff *tipc_named_publish(struct publication *publ);
+struct sk_buff *tipc_named_withdraw(struct publication *publ);
+void named_cluster_distribute(struct sk_buff *buf);
+void tipc_named_node_up(u32 max_item_buf, u32 node);
+void tipc_named_rcv(struct sk_buff *buf);
void tipc_named_reinit(void);
#endif
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 042e8e3cabc..9d7d37d9518 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -664,6 +664,7 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
u32 scope, u32 port_ref, u32 key)
{
struct publication *publ;
+ struct sk_buff *buf = NULL;
if (table.local_publ_count >= TIPC_MAX_PUBLICATIONS) {
pr_warn("Publication failed, local publication limit reached (%u)\n",
@@ -676,9 +677,12 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
tipc_own_addr, port_ref, key);
if (likely(publ)) {
table.local_publ_count++;
- tipc_named_publish(publ);
+ buf = tipc_named_publish(publ);
}
write_unlock_bh(&tipc_nametbl_lock);
+
+ if (buf)
+ named_cluster_distribute(buf);
return publ;
}
@@ -688,15 +692,19 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
{
struct publication *publ;
+ struct sk_buff *buf;
write_lock_bh(&tipc_nametbl_lock);
publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key);
if (likely(publ)) {
table.local_publ_count--;
- tipc_named_withdraw(publ);
+ buf = tipc_named_withdraw(publ);
write_unlock_bh(&tipc_nametbl_lock);
list_del_init(&publ->pport_list);
kfree(publ);
+
+ if (buf)
+ named_cluster_distribute(buf);
return 1;
}
write_unlock_bh(&tipc_nametbl_lock);
@@ -961,6 +969,7 @@ static void tipc_purge_publications(struct name_seq *seq)
list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) {
tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node,
publ->ref, publ->key);
+ kfree(publ);
}
}
@@ -982,7 +991,6 @@ void tipc_nametbl_stop(void)
hlist_for_each_entry_safe(seq, safe, seq_head, ns_list) {
tipc_purge_publications(seq);
}
- continue;
}
kfree(table.types);
table.types = NULL;
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 7d305ecc09c..f64375e7f99 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -39,45 +39,41 @@
#include "name_distr.h"
#include "subscr.h"
#include "port.h"
+#include "socket.h"
#include "node.h"
#include "config.h"
/*
* The TIPC locking policy is designed to ensure a very fine locking
* granularity, permitting complete parallel access to individual
- * port and node/link instances. The code consists of three major
+ * port and node/link instances. The code consists of four major
* locking domains, each protected with their own disjunct set of locks.
*
- * 1: The routing hierarchy.
- * Comprises the structures 'zone', 'cluster', 'node', 'link'
- * and 'bearer'. The whole hierarchy is protected by a big
- * read/write lock, tipc_net_lock, to enssure that nothing is added
- * or removed while code is accessing any of these structures.
- * This layer must not be called from the two others while they
- * hold any of their own locks.
- * Neither must it itself do any upcalls to the other two before
- * it has released tipc_net_lock and other protective locks.
+ * 1: The bearer level.
+ * RTNL lock is used to serialize the process of configuring bearer
+ * on update side, and RCU lock is applied on read side to make
+ * bearer instance valid on both paths of message transmission and
+ * reception.
*
- * Within the tipc_net_lock domain there are two sub-domains;'node' and
- * 'bearer', where local write operations are permitted,
- * provided that those are protected by individual spin_locks
- * per instance. Code holding tipc_net_lock(read) and a node spin_lock
- * is permitted to poke around in both the node itself and its
- * subordinate links. I.e, it can update link counters and queues,
- * change link state, send protocol messages, and alter the
- * "active_links" array in the node; but it can _not_ remove a link
- * or a node from the overall structure.
- * Correspondingly, individual bearers may change status within a
- * tipc_net_lock(read), protected by an individual spin_lock ber bearer
- * instance, but it needs tipc_net_lock(write) to remove/add any bearers.
+ * 2: The node and link level.
+ * All node instances are saved into two tipc_node_list and node_htable
+ * lists. The two lists are protected by node_list_lock on write side,
+ * and they are guarded with RCU lock on read side. Especially node
+ * instance is destroyed only when TIPC module is removed, and we can
+ * confirm that there has no any user who is accessing the node at the
+ * moment. Therefore, Except for iterating the two lists within RCU
+ * protection, it's no needed to hold RCU that we access node instance
+ * in other places.
*
+ * In addition, all members in node structure including link instances
+ * are protected by node spin lock.
*
- * 2: The transport level of the protocol.
- * This consists of the structures port, (and its user level
- * representations, such as user_port and tipc_sock), reference and
- * tipc_user (port.c, reg.c, socket.c).
+ * 3: The transport level of the protocol.
+ * This consists of the structures port, (and its user level
+ * representations, such as user_port and tipc_sock), reference and
+ * tipc_user (port.c, reg.c, socket.c).
*
- * This layer has four different locks:
+ * This layer has four different locks:
* - The tipc_port spin_lock. This is protecting each port instance
* from parallel data access and removal. Since we can not place
* this lock in the port itself, it has been placed in the
@@ -96,7 +92,7 @@
* There are two such lists; 'port_list', which is used for management,
* and 'wait_list', which is used to queue ports during congestion.
*
- * 3: The name table (name_table.c, name_distr.c, subscription.c)
+ * 4: The name table (name_table.c, name_distr.c, subscription.c)
* - There is one big read/write-lock (tipc_nametbl_lock) protecting the
* overall name table structure. Nothing must be added/removed to
* this structure without holding write access to it.
@@ -108,8 +104,6 @@
* - A local spin_lock protecting the queue of subscriber events.
*/
-DEFINE_RWLOCK(tipc_net_lock);
-
static void net_route_named_msg(struct sk_buff *buf)
{
struct tipc_msg *msg = buf_msg(buf);
@@ -146,19 +140,19 @@ void tipc_net_route_msg(struct sk_buff *buf)
if (tipc_in_scope(dnode, tipc_own_addr)) {
if (msg_isdata(msg)) {
if (msg_mcast(msg))
- tipc_port_recv_mcast(buf, NULL);
+ tipc_port_mcast_rcv(buf, NULL);
else if (msg_destport(msg))
- tipc_port_recv_msg(buf);
+ tipc_sk_rcv(buf);
else
net_route_named_msg(buf);
return;
}
switch (msg_user(msg)) {
case NAME_DISTRIBUTOR:
- tipc_named_recv(buf);
+ tipc_named_rcv(buf);
break;
case CONN_MANAGER:
- tipc_port_recv_proto_msg(buf);
+ tipc_port_proto_rcv(buf);
break;
default:
kfree_skb(buf);
@@ -168,38 +162,41 @@ void tipc_net_route_msg(struct sk_buff *buf)
/* Handle message for another node */
skb_trim(buf, msg_size(msg));
- tipc_link_send(buf, dnode, msg_link_selector(msg));
+ tipc_link_xmit(buf, dnode, msg_link_selector(msg));
}
-void tipc_net_start(u32 addr)
+int tipc_net_start(u32 addr)
{
char addr_string[16];
+ int res;
- write_lock_bh(&tipc_net_lock);
tipc_own_addr = addr;
tipc_named_reinit();
tipc_port_reinit();
- tipc_bclink_init();
- write_unlock_bh(&tipc_net_lock);
+ res = tipc_bclink_init();
+ if (res)
+ return res;
- tipc_cfg_reinit();
+ tipc_nametbl_publish(TIPC_CFG_SRV, tipc_own_addr, tipc_own_addr,
+ TIPC_ZONE_SCOPE, 0, tipc_own_addr);
pr_info("Started in network mode\n");
pr_info("Own node address %s, network identity %u\n",
tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id);
+ return 0;
}
void tipc_net_stop(void)
{
- struct tipc_node *node, *t_node;
-
if (!tipc_own_addr)
return;
- write_lock_bh(&tipc_net_lock);
+
+ tipc_nametbl_withdraw(TIPC_CFG_SRV, tipc_own_addr, 0, tipc_own_addr);
+ rtnl_lock();
tipc_bearer_stop();
tipc_bclink_stop();
- list_for_each_entry_safe(node, t_node, &tipc_node_list, list)
- tipc_node_delete(node);
- write_unlock_bh(&tipc_net_lock);
+ tipc_node_stop();
+ rtnl_unlock();
+
pr_info("Left network mode\n");
}
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 079daadb3f7..c6c2b46f7c2 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -37,11 +37,9 @@
#ifndef _TIPC_NET_H
#define _TIPC_NET_H
-extern rwlock_t tipc_net_lock;
-
void tipc_net_route_msg(struct sk_buff *buf);
-void tipc_net_start(u32 addr);
+int tipc_net_start(u32 addr);
void tipc_net_stop(void);
#endif
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 3aaf73de9e2..ad844d36534 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -47,7 +47,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN);
u16 cmd;
- if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN)))
+ if ((req_userhdr->cmd & 0xC000) && (!netlink_capable(skb, CAP_NET_ADMIN)))
cmd = TIPC_CMD_NOT_NET_ADMIN;
else
cmd = req_userhdr->cmd;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index efe4d41bf11..5b44c3041be 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -2,7 +2,7 @@
* net/tipc/node.c: TIPC node management routines
*
* Copyright (c) 2000-2006, 2012 Ericsson AB
- * Copyright (c) 2005-2006, 2010-2011, Wind River Systems
+ * Copyright (c) 2005-2006, 2010-2014, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -44,13 +44,11 @@
static void node_lost_contact(struct tipc_node *n_ptr);
static void node_established_contact(struct tipc_node *n_ptr);
-static DEFINE_SPINLOCK(node_create_lock);
-
static struct hlist_head node_htable[NODE_HTABLE_SIZE];
LIST_HEAD(tipc_node_list);
static u32 tipc_num_nodes;
-
-static atomic_t tipc_num_links = ATOMIC_INIT(0);
+static u32 tipc_num_links;
+static DEFINE_SPINLOCK(node_list_lock);
/*
* A trivial power-of-two bitmask technique is used for speed, since this
@@ -73,37 +71,26 @@ struct tipc_node *tipc_node_find(u32 addr)
if (unlikely(!in_own_cluster_exact(addr)))
return NULL;
- hlist_for_each_entry(node, &node_htable[tipc_hashfn(addr)], hash) {
- if (node->addr == addr)
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(node, &node_htable[tipc_hashfn(addr)], hash) {
+ if (node->addr == addr) {
+ rcu_read_unlock();
return node;
+ }
}
+ rcu_read_unlock();
return NULL;
}
-/**
- * tipc_node_create - create neighboring node
- *
- * Currently, this routine is called by neighbor discovery code, which holds
- * net_lock for reading only. We must take node_create_lock to ensure a node
- * isn't created twice if two different bearers discover the node at the same
- * time. (It would be preferable to switch to holding net_lock in write mode,
- * but this is a non-trivial change.)
- */
struct tipc_node *tipc_node_create(u32 addr)
{
struct tipc_node *n_ptr, *temp_node;
- spin_lock_bh(&node_create_lock);
-
- n_ptr = tipc_node_find(addr);
- if (n_ptr) {
- spin_unlock_bh(&node_create_lock);
- return n_ptr;
- }
+ spin_lock_bh(&node_list_lock);
n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC);
if (!n_ptr) {
- spin_unlock_bh(&node_create_lock);
+ spin_unlock_bh(&node_list_lock);
pr_warn("Node creation failed, no memory\n");
return NULL;
}
@@ -114,31 +101,41 @@ struct tipc_node *tipc_node_create(u32 addr)
INIT_LIST_HEAD(&n_ptr->list);
INIT_LIST_HEAD(&n_ptr->nsub);
- hlist_add_head(&n_ptr->hash, &node_htable[tipc_hashfn(addr)]);
+ hlist_add_head_rcu(&n_ptr->hash, &node_htable[tipc_hashfn(addr)]);
- list_for_each_entry(temp_node, &tipc_node_list, list) {
+ list_for_each_entry_rcu(temp_node, &tipc_node_list, list) {
if (n_ptr->addr < temp_node->addr)
break;
}
- list_add_tail(&n_ptr->list, &temp_node->list);
- n_ptr->block_setup = WAIT_PEER_DOWN;
+ list_add_tail_rcu(&n_ptr->list, &temp_node->list);
+ n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN;
n_ptr->signature = INVALID_NODE_SIG;
tipc_num_nodes++;
- spin_unlock_bh(&node_create_lock);
+ spin_unlock_bh(&node_list_lock);
return n_ptr;
}
-void tipc_node_delete(struct tipc_node *n_ptr)
+static void tipc_node_delete(struct tipc_node *n_ptr)
{
- list_del(&n_ptr->list);
- hlist_del(&n_ptr->hash);
- kfree(n_ptr);
+ list_del_rcu(&n_ptr->list);
+ hlist_del_rcu(&n_ptr->hash);
+ kfree_rcu(n_ptr, rcu);
tipc_num_nodes--;
}
+void tipc_node_stop(void)
+{
+ struct tipc_node *node, *t_node;
+
+ spin_lock_bh(&node_list_lock);
+ list_for_each_entry_safe(node, t_node, &tipc_node_list, list)
+ tipc_node_delete(node);
+ spin_unlock_bh(&node_list_lock);
+}
+
/**
* tipc_node_link_up - handle addition of link
*
@@ -147,11 +144,13 @@ void tipc_node_delete(struct tipc_node *n_ptr)
void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
{
struct tipc_link **active = &n_ptr->active_links[0];
+ u32 addr = n_ptr->addr;
n_ptr->working_links++;
-
+ tipc_nametbl_publish(TIPC_LINK_STATE, addr, addr, TIPC_NODE_SCOPE,
+ l_ptr->bearer_id, addr);
pr_info("Established link <%s> on network plane %c\n",
- l_ptr->name, l_ptr->b_ptr->net_plane);
+ l_ptr->name, l_ptr->net_plane);
if (!active[0]) {
active[0] = active[1] = l_ptr;
@@ -162,7 +161,7 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
pr_info("New link <%s> becomes standby\n", l_ptr->name);
return;
}
- tipc_link_dup_send_queue(active[0], l_ptr);
+ tipc_link_dup_queue_xmit(active[0], l_ptr);
if (l_ptr->priority == active[0]->priority) {
active[0] = l_ptr;
return;
@@ -206,16 +205,18 @@ static void node_select_active_links(struct tipc_node *n_ptr)
void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
{
struct tipc_link **active;
+ u32 addr = n_ptr->addr;
n_ptr->working_links--;
+ tipc_nametbl_withdraw(TIPC_LINK_STATE, addr, l_ptr->bearer_id, addr);
if (!tipc_link_is_active(l_ptr)) {
pr_info("Lost standby link <%s> on network plane %c\n",
- l_ptr->name, l_ptr->b_ptr->net_plane);
+ l_ptr->name, l_ptr->net_plane);
return;
}
pr_info("Lost link <%s> on network plane %c\n",
- l_ptr->name, l_ptr->b_ptr->net_plane);
+ l_ptr->name, l_ptr->net_plane);
active = &n_ptr->active_links[0];
if (active[0] == l_ptr)
@@ -242,40 +243,36 @@ int tipc_node_is_up(struct tipc_node *n_ptr)
void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
{
- n_ptr->links[l_ptr->b_ptr->identity] = l_ptr;
- atomic_inc(&tipc_num_links);
+ n_ptr->links[l_ptr->bearer_id] = l_ptr;
+ spin_lock_bh(&node_list_lock);
+ tipc_num_links++;
+ spin_unlock_bh(&node_list_lock);
n_ptr->link_cnt++;
}
void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
{
- n_ptr->links[l_ptr->b_ptr->identity] = NULL;
- atomic_dec(&tipc_num_links);
- n_ptr->link_cnt--;
+ int i;
+
+ for (i = 0; i < MAX_BEARERS; i++) {
+ if (l_ptr != n_ptr->links[i])
+ continue;
+ n_ptr->links[i] = NULL;
+ spin_lock_bh(&node_list_lock);
+ tipc_num_links--;
+ spin_unlock_bh(&node_list_lock);
+ n_ptr->link_cnt--;
+ }
}
static void node_established_contact(struct tipc_node *n_ptr)
{
- tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr);
+ n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP;
n_ptr->bclink.oos_state = 0;
n_ptr->bclink.acked = tipc_bclink_get_last_sent();
tipc_bclink_add_node(n_ptr->addr);
}
-static void node_name_purge_complete(unsigned long node_addr)
-{
- struct tipc_node *n_ptr;
-
- read_lock_bh(&tipc_net_lock);
- n_ptr = tipc_node_find(node_addr);
- if (n_ptr) {
- tipc_node_lock(n_ptr);
- n_ptr->block_setup &= ~WAIT_NAMES_GONE;
- tipc_node_unlock(n_ptr);
- }
- read_unlock_bh(&tipc_net_lock);
-}
-
static void node_lost_contact(struct tipc_node *n_ptr)
{
char addr_string[16];
@@ -289,10 +286,9 @@ static void node_lost_contact(struct tipc_node *n_ptr)
kfree_skb_list(n_ptr->bclink.deferred_head);
n_ptr->bclink.deferred_size = 0;
- if (n_ptr->bclink.reasm_head) {
- kfree_skb(n_ptr->bclink.reasm_head);
- n_ptr->bclink.reasm_head = NULL;
- n_ptr->bclink.reasm_tail = NULL;
+ if (n_ptr->bclink.reasm_buf) {
+ kfree_skb(n_ptr->bclink.reasm_buf);
+ n_ptr->bclink.reasm_buf = NULL;
}
tipc_bclink_remove_node(n_ptr->addr);
@@ -311,12 +307,13 @@ static void node_lost_contact(struct tipc_node *n_ptr)
tipc_link_reset_fragments(l_ptr);
}
- /* Notify subscribers */
- tipc_nodesub_notify(n_ptr);
+ n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN;
- /* Prevent re-contact with node until cleanup is done */
- n_ptr->block_setup = WAIT_PEER_DOWN | WAIT_NAMES_GONE;
- tipc_k_signal((Handler)node_name_purge_complete, n_ptr->addr);
+ /* Notify subscribers and prevent re-contact with node until
+ * cleanup is done.
+ */
+ n_ptr->action_flags |= TIPC_WAIT_PEER_LINKS_DOWN |
+ TIPC_NOTIFY_NODE_DOWN;
}
struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
@@ -335,27 +332,28 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
" (network address)");
- read_lock_bh(&tipc_net_lock);
+ spin_lock_bh(&node_list_lock);
if (!tipc_num_nodes) {
- read_unlock_bh(&tipc_net_lock);
+ spin_unlock_bh(&node_list_lock);
return tipc_cfg_reply_none();
}
/* For now, get space for all other nodes */
payload_size = TLV_SPACE(sizeof(node_info)) * tipc_num_nodes;
if (payload_size > 32768u) {
- read_unlock_bh(&tipc_net_lock);
+ spin_unlock_bh(&node_list_lock);
return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
" (too many nodes)");
}
+ spin_unlock_bh(&node_list_lock);
+
buf = tipc_cfg_reply_alloc(payload_size);
- if (!buf) {
- read_unlock_bh(&tipc_net_lock);
+ if (!buf)
return NULL;
- }
/* Add TLVs for all nodes in scope */
- list_for_each_entry(n_ptr, &tipc_node_list, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) {
if (!tipc_in_scope(domain, n_ptr->addr))
continue;
node_info.addr = htonl(n_ptr->addr);
@@ -363,8 +361,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
tipc_cfg_append_tlv(buf, TIPC_TLV_NODE_INFO,
&node_info, sizeof(node_info));
}
-
- read_unlock_bh(&tipc_net_lock);
+ rcu_read_unlock();
return buf;
}
@@ -387,21 +384,19 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
if (!tipc_own_addr)
return tipc_cfg_reply_none();
- read_lock_bh(&tipc_net_lock);
-
+ spin_lock_bh(&node_list_lock);
/* Get space for all unicast links + broadcast link */
- payload_size = TLV_SPACE(sizeof(link_info)) *
- (atomic_read(&tipc_num_links) + 1);
+ payload_size = TLV_SPACE((sizeof(link_info)) * (tipc_num_links + 1));
if (payload_size > 32768u) {
- read_unlock_bh(&tipc_net_lock);
+ spin_unlock_bh(&node_list_lock);
return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
" (too many links)");
}
+ spin_unlock_bh(&node_list_lock);
+
buf = tipc_cfg_reply_alloc(payload_size);
- if (!buf) {
- read_unlock_bh(&tipc_net_lock);
+ if (!buf)
return NULL;
- }
/* Add TLV for broadcast link */
link_info.dest = htonl(tipc_cluster_mask(tipc_own_addr));
@@ -410,7 +405,8 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info));
/* Add TLVs for any other links in scope */
- list_for_each_entry(n_ptr, &tipc_node_list, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) {
u32 i;
if (!tipc_in_scope(domain, n_ptr->addr))
@@ -427,7 +423,66 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
}
tipc_node_unlock(n_ptr);
}
-
- read_unlock_bh(&tipc_net_lock);
+ rcu_read_unlock();
return buf;
}
+
+/**
+ * tipc_node_get_linkname - get the name of a link
+ *
+ * @bearer_id: id of the bearer
+ * @node: peer node address
+ * @linkname: link name output buffer
+ *
+ * Returns 0 on success
+ */
+int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len)
+{
+ struct tipc_link *link;
+ struct tipc_node *node = tipc_node_find(addr);
+
+ if ((bearer_id >= MAX_BEARERS) || !node)
+ return -EINVAL;
+ tipc_node_lock(node);
+ link = node->links[bearer_id];
+ if (link) {
+ strncpy(linkname, link->name, len);
+ tipc_node_unlock(node);
+ return 0;
+ }
+ tipc_node_unlock(node);
+ return -EINVAL;
+}
+
+void tipc_node_unlock(struct tipc_node *node)
+{
+ LIST_HEAD(nsub_list);
+ struct tipc_link *link;
+ int pkt_sz = 0;
+ u32 addr = 0;
+
+ if (likely(!node->action_flags)) {
+ spin_unlock_bh(&node->lock);
+ return;
+ }
+
+ if (node->action_flags & TIPC_NOTIFY_NODE_DOWN) {
+ list_replace_init(&node->nsub, &nsub_list);
+ node->action_flags &= ~TIPC_NOTIFY_NODE_DOWN;
+ }
+ if (node->action_flags & TIPC_NOTIFY_NODE_UP) {
+ link = node->active_links[0];
+ node->action_flags &= ~TIPC_NOTIFY_NODE_UP;
+ if (link) {
+ pkt_sz = ((link->max_pkt - INT_H_SIZE) / ITEM_SIZE) *
+ ITEM_SIZE;
+ addr = node->addr;
+ }
+ }
+ spin_unlock_bh(&node->lock);
+
+ if (!list_empty(&nsub_list))
+ tipc_nodesub_notify(&nsub_list);
+ if (pkt_sz)
+ tipc_named_node_up(pkt_sz, addr);
+}
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 63e2e8ead2f..9087063793f 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -2,7 +2,7 @@
* net/tipc/node.h: Include file for TIPC node management routines
*
* Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005, 2010-2011, Wind River Systems
+ * Copyright (c) 2005, 2010-2014, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -47,67 +47,80 @@
*/
#define INVALID_NODE_SIG 0x10000
-/* Flags used to block (re)establishment of contact with a neighboring node */
-#define WAIT_PEER_DOWN 0x0001 /* wait to see that peer's links are down */
-#define WAIT_NAMES_GONE 0x0002 /* wait for peer's publications to be purged */
-#define WAIT_NODE_DOWN 0x0004 /* wait until peer node is declared down */
+/* Flags used to take different actions according to flag type
+ * TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down
+ * TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down
+ * TIPC_NOTIFY_NODE_DOWN: notify node is down
+ * TIPC_NOTIFY_NODE_UP: notify node is up
+ */
+enum {
+ TIPC_WAIT_PEER_LINKS_DOWN = (1 << 1),
+ TIPC_WAIT_OWN_LINKS_DOWN = (1 << 2),
+ TIPC_NOTIFY_NODE_DOWN = (1 << 3),
+ TIPC_NOTIFY_NODE_UP = (1 << 4)
+};
+
+/**
+ * struct tipc_node_bclink - TIPC node bclink structure
+ * @acked: sequence # of last outbound b'cast message acknowledged by node
+ * @last_in: sequence # of last in-sequence b'cast message received from node
+ * @last_sent: sequence # of last b'cast message sent by node
+ * @oos_state: state tracker for handling OOS b'cast messages
+ * @deferred_size: number of OOS b'cast messages in deferred queue
+ * @deferred_head: oldest OOS b'cast message received from node
+ * @deferred_tail: newest OOS b'cast message received from node
+ * @reasm_buf: broadcast reassembly queue head from node
+ * @recv_permitted: true if node is allowed to receive b'cast messages
+ */
+struct tipc_node_bclink {
+ u32 acked;
+ u32 last_in;
+ u32 last_sent;
+ u32 oos_state;
+ u32 deferred_size;
+ struct sk_buff *deferred_head;
+ struct sk_buff *deferred_tail;
+ struct sk_buff *reasm_buf;
+ bool recv_permitted;
+};
/**
* struct tipc_node - TIPC node structure
* @addr: network address of node
* @lock: spinlock governing access to structure
* @hash: links to adjacent nodes in unsorted hash chain
- * @list: links to adjacent nodes in sorted list of cluster's nodes
- * @nsub: list of "node down" subscriptions monitoring node
* @active_links: pointers to active links to node
* @links: pointers to all links to node
+ * @action_flags: bit mask of different types of node actions
+ * @bclink: broadcast-related info
+ * @list: links to adjacent nodes in sorted list of cluster's nodes
* @working_links: number of working links to node (both active and standby)
- * @block_setup: bit mask of conditions preventing link establishment to node
* @link_cnt: number of links to node
* @signature: node instance identifier
- * @bclink: broadcast-related info
- * @acked: sequence # of last outbound b'cast message acknowledged by node
- * @last_in: sequence # of last in-sequence b'cast message received from node
- * @last_sent: sequence # of last b'cast message sent by node
- * @oos_state: state tracker for handling OOS b'cast messages
- * @deferred_size: number of OOS b'cast messages in deferred queue
- * @deferred_head: oldest OOS b'cast message received from node
- * @deferred_tail: newest OOS b'cast message received from node
- * @reasm_head: broadcast reassembly queue head from node
- * @reasm_tail: last broadcast fragment received from node
- * @recv_permitted: true if node is allowed to receive b'cast messages
+ * @nsub: list of "node down" subscriptions monitoring node
+ * @rcu: rcu struct for tipc_node
*/
struct tipc_node {
u32 addr;
spinlock_t lock;
struct hlist_node hash;
- struct list_head list;
- struct list_head nsub;
struct tipc_link *active_links[2];
struct tipc_link *links[MAX_BEARERS];
+ unsigned int action_flags;
+ struct tipc_node_bclink bclink;
+ struct list_head list;
int link_cnt;
int working_links;
- int block_setup;
u32 signature;
- struct {
- u32 acked;
- u32 last_in;
- u32 last_sent;
- u32 oos_state;
- u32 deferred_size;
- struct sk_buff *deferred_head;
- struct sk_buff *deferred_tail;
- struct sk_buff *reasm_head;
- struct sk_buff *reasm_tail;
- bool recv_permitted;
- } bclink;
+ struct list_head nsub;
+ struct rcu_head rcu;
};
extern struct list_head tipc_node_list;
struct tipc_node *tipc_node_find(u32 addr);
struct tipc_node *tipc_node_create(u32 addr);
-void tipc_node_delete(struct tipc_node *n_ptr);
+void tipc_node_stop(void);
void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
@@ -116,15 +129,18 @@ int tipc_node_active_links(struct tipc_node *n_ptr);
int tipc_node_is_up(struct tipc_node *n_ptr);
struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space);
struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space);
+int tipc_node_get_linkname(u32 bearer_id, u32 node, char *linkname, size_t len);
+void tipc_node_unlock(struct tipc_node *node);
-static inline void tipc_node_lock(struct tipc_node *n_ptr)
+static inline void tipc_node_lock(struct tipc_node *node)
{
- spin_lock_bh(&n_ptr->lock);
+ spin_lock_bh(&node->lock);
}
-static inline void tipc_node_unlock(struct tipc_node *n_ptr)
+static inline bool tipc_node_blocked(struct tipc_node *node)
{
- spin_unlock_bh(&n_ptr->lock);
+ return (node->action_flags & (TIPC_WAIT_PEER_LINKS_DOWN |
+ TIPC_NOTIFY_NODE_DOWN | TIPC_WAIT_OWN_LINKS_DOWN));
}
#endif
diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c
index 8a7384c04ad..7c59ab1d6ec 100644
--- a/net/tipc/node_subscr.c
+++ b/net/tipc/node_subscr.c
@@ -81,14 +81,13 @@ void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub)
*
* Note: node is locked by caller
*/
-void tipc_nodesub_notify(struct tipc_node *node)
+void tipc_nodesub_notify(struct list_head *nsub_list)
{
- struct tipc_node_subscr *ns;
+ struct tipc_node_subscr *ns, *safe;
- list_for_each_entry(ns, &node->nsub, nodesub_list) {
+ list_for_each_entry_safe(ns, safe, nsub_list, nodesub_list) {
if (ns->handle_node_down) {
- tipc_k_signal((Handler)ns->handle_node_down,
- (unsigned long)ns->usr_handle);
+ ns->handle_node_down(ns->usr_handle);
ns->handle_node_down = NULL;
}
}
diff --git a/net/tipc/node_subscr.h b/net/tipc/node_subscr.h
index c95d20727de..d91b8cc81e3 100644
--- a/net/tipc/node_subscr.h
+++ b/net/tipc/node_subscr.h
@@ -58,6 +58,6 @@ struct tipc_node_subscr {
void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr,
void *usr_handle, net_ev_handler handle_down);
void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub);
-void tipc_nodesub_notify(struct tipc_node *node);
+void tipc_nodesub_notify(struct list_head *nsub_list);
#endif
diff --git a/net/tipc/port.c b/net/tipc/port.c
index b742b265452..5fd7acce01e 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -1,7 +1,7 @@
/*
* net/tipc/port.c: TIPC port code
*
- * Copyright (c) 1992-2007, Ericsson AB
+ * Copyright (c) 1992-2007, 2014, Ericsson AB
* Copyright (c) 2004-2008, 2010-2013, Wind River Systems
* All rights reserved.
*
@@ -38,6 +38,7 @@
#include "config.h"
#include "port.h"
#include "name_table.h"
+#include "socket.h"
/* Connection management: */
#define PROBING_INTERVAL 3600000 /* [ms] => 1 h */
@@ -54,17 +55,6 @@ static struct sk_buff *port_build_self_abort_msg(struct tipc_port *, u32 err);
static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *, u32 err);
static void port_timeout(unsigned long ref);
-
-static u32 port_peernode(struct tipc_port *p_ptr)
-{
- return msg_destnode(&p_ptr->phdr);
-}
-
-static u32 port_peerport(struct tipc_port *p_ptr)
-{
- return msg_destport(&p_ptr->phdr);
-}
-
/**
* tipc_port_peer_msg - verify message was sent by connected port's peer
*
@@ -76,33 +66,32 @@ int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg)
u32 peernode;
u32 orignode;
- if (msg_origport(msg) != port_peerport(p_ptr))
+ if (msg_origport(msg) != tipc_port_peerport(p_ptr))
return 0;
orignode = msg_orignode(msg);
- peernode = port_peernode(p_ptr);
+ peernode = tipc_port_peernode(p_ptr);
return (orignode == peernode) ||
(!orignode && (peernode == tipc_own_addr)) ||
(!peernode && (orignode == tipc_own_addr));
}
/**
- * tipc_multicast - send a multicast message to local and remote destinations
+ * tipc_port_mcast_xmit - send a multicast message to local and remote
+ * destinations
*/
-int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
- struct iovec const *msg_sect, unsigned int len)
+int tipc_port_mcast_xmit(struct tipc_port *oport,
+ struct tipc_name_seq const *seq,
+ struct iovec const *msg_sect,
+ unsigned int len)
{
struct tipc_msg *hdr;
struct sk_buff *buf;
struct sk_buff *ibuf = NULL;
struct tipc_port_list dports = {0, NULL, };
- struct tipc_port *oport = tipc_port_deref(ref);
int ext_targets;
int res;
- if (unlikely(!oport))
- return -EINVAL;
-
/* Create multicast message */
hdr = &oport->phdr;
msg_set_type(hdr, TIPC_MCAST_MSG);
@@ -131,7 +120,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
return -ENOMEM;
}
}
- res = tipc_bclink_send_msg(buf);
+ res = tipc_bclink_xmit(buf);
if ((res < 0) && (dports.count != 0))
kfree_skb(ibuf);
} else {
@@ -140,7 +129,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
if (res >= 0) {
if (ibuf)
- tipc_port_recv_mcast(ibuf, &dports);
+ tipc_port_mcast_rcv(ibuf, &dports);
} else {
tipc_port_list_free(&dports);
}
@@ -148,11 +137,11 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
}
/**
- * tipc_port_recv_mcast - deliver multicast message to all destination ports
+ * tipc_port_mcast_rcv - deliver multicast message to all destination ports
*
* If there is no port list, perform a lookup to create one
*/
-void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp)
+void tipc_port_mcast_rcv(struct sk_buff *buf, struct tipc_port_list *dp)
{
struct tipc_msg *msg;
struct tipc_port_list dports = {0, NULL, };
@@ -176,7 +165,7 @@ void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp)
msg_set_destnode(msg, tipc_own_addr);
if (dp->count == 1) {
msg_set_destport(msg, dp->ports[0]);
- tipc_port_recv_msg(buf);
+ tipc_sk_rcv(buf);
tipc_port_list_free(dp);
return;
}
@@ -191,7 +180,7 @@ void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp)
if ((index == 0) && (cnt != 0))
item = item->next;
msg_set_destport(buf_msg(b), item->ports[index]);
- tipc_port_recv_msg(b);
+ tipc_sk_rcv(b);
}
}
exit:
@@ -199,40 +188,32 @@ exit:
tipc_port_list_free(dp);
}
-/**
- * tipc_createport - create a generic TIPC port
+
+void tipc_port_wakeup(struct tipc_port *port)
+{
+ tipc_sock_wakeup(tipc_port_to_sock(port));
+}
+
+/* tipc_port_init - intiate TIPC port and lock it
*
- * Returns pointer to (locked) TIPC port, or NULL if unable to create it
+ * Returns obtained reference if initialization is successful, zero otherwise
*/
-struct tipc_port *tipc_createport(struct sock *sk,
- u32 (*dispatcher)(struct tipc_port *,
- struct sk_buff *),
- void (*wakeup)(struct tipc_port *),
- const u32 importance)
+u32 tipc_port_init(struct tipc_port *p_ptr,
+ const unsigned int importance)
{
- struct tipc_port *p_ptr;
struct tipc_msg *msg;
u32 ref;
- p_ptr = kzalloc(sizeof(*p_ptr), GFP_ATOMIC);
- if (!p_ptr) {
- pr_warn("Port creation failed, no memory\n");
- return NULL;
- }
ref = tipc_ref_acquire(p_ptr, &p_ptr->lock);
if (!ref) {
- pr_warn("Port creation failed, ref. table exhausted\n");
- kfree(p_ptr);
- return NULL;
+ pr_warn("Port registration failed, ref. table exhausted\n");
+ return 0;
}
- p_ptr->sk = sk;
p_ptr->max_pkt = MAX_PKT_DEFAULT;
p_ptr->ref = ref;
INIT_LIST_HEAD(&p_ptr->wait_list);
INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list);
- p_ptr->dispatcher = dispatcher;
- p_ptr->wakeup = wakeup;
k_init_timer(&p_ptr->timer, (Handler)port_timeout, ref);
INIT_LIST_HEAD(&p_ptr->publications);
INIT_LIST_HEAD(&p_ptr->port_list);
@@ -248,10 +229,10 @@ struct tipc_port *tipc_createport(struct sock *sk,
msg_set_origport(msg, ref);
list_add_tail(&p_ptr->port_list, &ports);
spin_unlock_bh(&tipc_port_list_lock);
- return p_ptr;
+ return ref;
}
-int tipc_deleteport(struct tipc_port *p_ptr)
+void tipc_port_destroy(struct tipc_port *p_ptr)
{
struct sk_buff *buf = NULL;
@@ -272,67 +253,7 @@ int tipc_deleteport(struct tipc_port *p_ptr)
list_del(&p_ptr->wait_list);
spin_unlock_bh(&tipc_port_list_lock);
k_term_timer(&p_ptr->timer);
- kfree(p_ptr);
tipc_net_route_msg(buf);
- return 0;
-}
-
-static int port_unreliable(struct tipc_port *p_ptr)
-{
- return msg_src_droppable(&p_ptr->phdr);
-}
-
-int tipc_portunreliable(u32 ref, unsigned int *isunreliable)
-{
- struct tipc_port *p_ptr;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- *isunreliable = port_unreliable(p_ptr);
- tipc_port_unlock(p_ptr);
- return 0;
-}
-
-int tipc_set_portunreliable(u32 ref, unsigned int isunreliable)
-{
- struct tipc_port *p_ptr;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- msg_set_src_droppable(&p_ptr->phdr, (isunreliable != 0));
- tipc_port_unlock(p_ptr);
- return 0;
-}
-
-static int port_unreturnable(struct tipc_port *p_ptr)
-{
- return msg_dest_droppable(&p_ptr->phdr);
-}
-
-int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable)
-{
- struct tipc_port *p_ptr;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- *isunrejectable = port_unreturnable(p_ptr);
- tipc_port_unlock(p_ptr);
- return 0;
-}
-
-int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable)
-{
- struct tipc_port *p_ptr;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- msg_set_dest_droppable(&p_ptr->phdr, (isunrejectable != 0));
- tipc_port_unlock(p_ptr);
- return 0;
}
/*
@@ -350,8 +271,8 @@ static struct sk_buff *port_build_proto_msg(struct tipc_port *p_ptr,
if (buf) {
msg = buf_msg(buf);
tipc_msg_init(msg, CONN_MANAGER, type, INT_H_SIZE,
- port_peernode(p_ptr));
- msg_set_destport(msg, port_peerport(p_ptr));
+ tipc_port_peernode(p_ptr));
+ msg_set_destport(msg, tipc_port_peerport(p_ptr));
msg_set_origport(msg, p_ptr->ref);
msg_set_msgcnt(msg, ack);
}
@@ -422,17 +343,17 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
/* send returned message & dispose of rejected message */
src_node = msg_prevnode(msg);
if (in_own_node(src_node))
- tipc_port_recv_msg(rbuf);
+ tipc_sk_rcv(rbuf);
else
- tipc_link_send(rbuf, src_node, msg_link_selector(rmsg));
+ tipc_link_xmit(rbuf, src_node, msg_link_selector(rmsg));
exit:
kfree_skb(buf);
return data_sz;
}
-int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr,
- struct iovec const *msg_sect, unsigned int len,
- int err)
+int tipc_port_iovec_reject(struct tipc_port *p_ptr, struct tipc_msg *hdr,
+ struct iovec const *msg_sect, unsigned int len,
+ int err)
{
struct sk_buff *buf;
int res;
@@ -519,7 +440,7 @@ static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *p_ptr, u32 er
return buf;
}
-void tipc_port_recv_proto_msg(struct sk_buff *buf)
+void tipc_port_proto_rcv(struct sk_buff *buf)
{
struct tipc_msg *msg = buf_msg(buf);
struct tipc_port *p_ptr;
@@ -547,13 +468,12 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf)
/* Process protocol message sent by peer */
switch (msg_type(msg)) {
case CONN_ACK:
- wakeable = tipc_port_congested(p_ptr) && p_ptr->congested &&
- p_ptr->wakeup;
+ wakeable = tipc_port_congested(p_ptr) && p_ptr->congested;
p_ptr->acked += msg_msgcnt(msg);
if (!tipc_port_congested(p_ptr)) {
p_ptr->congested = 0;
if (wakeable)
- p_ptr->wakeup(p_ptr);
+ tipc_port_wakeup(p_ptr);
}
break;
case CONN_PROBE:
@@ -584,8 +504,8 @@ static int port_print(struct tipc_port *p_ptr, char *buf, int len, int full_id)
ret = tipc_snprintf(buf, len, "%-10u:", p_ptr->ref);
if (p_ptr->connected) {
- u32 dport = port_peerport(p_ptr);
- u32 destnode = port_peernode(p_ptr);
+ u32 dport = tipc_port_peerport(p_ptr);
+ u32 destnode = tipc_port_peernode(p_ptr);
ret += tipc_snprintf(buf + ret, len - ret,
" connected to <%u.%u.%u:%u>",
@@ -673,34 +593,6 @@ void tipc_acknowledge(u32 ref, u32 ack)
tipc_net_route_msg(buf);
}
-int tipc_portimportance(u32 ref, unsigned int *importance)
-{
- struct tipc_port *p_ptr;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- *importance = (unsigned int)msg_importance(&p_ptr->phdr);
- tipc_port_unlock(p_ptr);
- return 0;
-}
-
-int tipc_set_portimportance(u32 ref, unsigned int imp)
-{
- struct tipc_port *p_ptr;
-
- if (imp > TIPC_CRITICAL_IMPORTANCE)
- return -EINVAL;
-
- p_ptr = tipc_port_lock(ref);
- if (!p_ptr)
- return -EINVAL;
- msg_set_importance(&p_ptr->phdr, (u32)imp);
- tipc_port_unlock(p_ptr);
- return 0;
-}
-
-
int tipc_publish(struct tipc_port *p_ptr, unsigned int scope,
struct tipc_name_seq const *seq)
{
@@ -760,7 +652,7 @@ int tipc_withdraw(struct tipc_port *p_ptr, unsigned int scope,
return res;
}
-int tipc_connect(u32 ref, struct tipc_portid const *peer)
+int tipc_port_connect(u32 ref, struct tipc_portid const *peer)
{
struct tipc_port *p_ptr;
int res;
@@ -768,17 +660,17 @@ int tipc_connect(u32 ref, struct tipc_portid const *peer)
p_ptr = tipc_port_lock(ref);
if (!p_ptr)
return -EINVAL;
- res = __tipc_connect(ref, p_ptr, peer);
+ res = __tipc_port_connect(ref, p_ptr, peer);
tipc_port_unlock(p_ptr);
return res;
}
/*
- * __tipc_connect - connect to a remote peer
+ * __tipc_port_connect - connect to a remote peer
*
* Port must be locked.
*/
-int __tipc_connect(u32 ref, struct tipc_port *p_ptr,
+int __tipc_port_connect(u32 ref, struct tipc_port *p_ptr,
struct tipc_portid const *peer)
{
struct tipc_msg *msg;
@@ -815,7 +707,7 @@ exit:
*
* Port must be locked.
*/
-int __tipc_disconnect(struct tipc_port *tp_ptr)
+int __tipc_port_disconnect(struct tipc_port *tp_ptr)
{
if (tp_ptr->connected) {
tp_ptr->connected = 0;
@@ -828,10 +720,10 @@ int __tipc_disconnect(struct tipc_port *tp_ptr)
}
/*
- * tipc_disconnect(): Disconnect port form peer.
+ * tipc_port_disconnect(): Disconnect port form peer.
* This is a node local operation.
*/
-int tipc_disconnect(u32 ref)
+int tipc_port_disconnect(u32 ref)
{
struct tipc_port *p_ptr;
int res;
@@ -839,15 +731,15 @@ int tipc_disconnect(u32 ref)
p_ptr = tipc_port_lock(ref);
if (!p_ptr)
return -EINVAL;
- res = __tipc_disconnect(p_ptr);
+ res = __tipc_port_disconnect(p_ptr);
tipc_port_unlock(p_ptr);
return res;
}
/*
- * tipc_shutdown(): Send a SHUTDOWN msg to peer and disconnect
+ * tipc_port_shutdown(): Send a SHUTDOWN msg to peer and disconnect
*/
-int tipc_shutdown(u32 ref)
+int tipc_port_shutdown(u32 ref)
{
struct tipc_port *p_ptr;
struct sk_buff *buf = NULL;
@@ -859,78 +751,47 @@ int tipc_shutdown(u32 ref)
buf = port_build_peer_abort_msg(p_ptr, TIPC_CONN_SHUTDOWN);
tipc_port_unlock(p_ptr);
tipc_net_route_msg(buf);
- return tipc_disconnect(ref);
-}
-
-/**
- * tipc_port_recv_msg - receive message from lower layer and deliver to port user
- */
-int tipc_port_recv_msg(struct sk_buff *buf)
-{
- struct tipc_port *p_ptr;
- struct tipc_msg *msg = buf_msg(buf);
- u32 destport = msg_destport(msg);
- u32 dsz = msg_data_sz(msg);
- u32 err;
-
- /* forward unresolved named message */
- if (unlikely(!destport)) {
- tipc_net_route_msg(buf);
- return dsz;
- }
-
- /* validate destination & pass to port, otherwise reject message */
- p_ptr = tipc_port_lock(destport);
- if (likely(p_ptr)) {
- err = p_ptr->dispatcher(p_ptr, buf);
- tipc_port_unlock(p_ptr);
- if (likely(!err))
- return dsz;
- } else {
- err = TIPC_ERR_NO_PORT;
- }
-
- return tipc_reject_msg(buf, err);
+ return tipc_port_disconnect(ref);
}
/*
- * tipc_port_recv_sections(): Concatenate and deliver sectioned
- * message for this node.
+ * tipc_port_iovec_rcv: Concatenate and deliver sectioned
+ * message for this node.
*/
-static int tipc_port_recv_sections(struct tipc_port *sender,
- struct iovec const *msg_sect,
- unsigned int len)
+static int tipc_port_iovec_rcv(struct tipc_port *sender,
+ struct iovec const *msg_sect,
+ unsigned int len)
{
struct sk_buff *buf;
int res;
res = tipc_msg_build(&sender->phdr, msg_sect, len, MAX_MSG_SIZE, &buf);
if (likely(buf))
- tipc_port_recv_msg(buf);
+ tipc_sk_rcv(buf);
return res;
}
/**
* tipc_send - send message sections on connection
*/
-int tipc_send(u32 ref, struct iovec const *msg_sect, unsigned int len)
+int tipc_send(struct tipc_port *p_ptr,
+ struct iovec const *msg_sect,
+ unsigned int len)
{
- struct tipc_port *p_ptr;
u32 destnode;
int res;
- p_ptr = tipc_port_deref(ref);
- if (!p_ptr || !p_ptr->connected)
+ if (!p_ptr->connected)
return -EINVAL;
p_ptr->congested = 1;
if (!tipc_port_congested(p_ptr)) {
- destnode = port_peernode(p_ptr);
+ destnode = tipc_port_peernode(p_ptr);
if (likely(!in_own_node(destnode)))
- res = tipc_link_send_sections_fast(p_ptr, msg_sect,
- len, destnode);
+ res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len,
+ destnode);
else
- res = tipc_port_recv_sections(p_ptr, msg_sect, len);
+ res = tipc_port_iovec_rcv(p_ptr, msg_sect, len);
if (likely(res != -ELINKCONG)) {
p_ptr->congested = 0;
@@ -939,7 +800,7 @@ int tipc_send(u32 ref, struct iovec const *msg_sect, unsigned int len)
return res;
}
}
- if (port_unreliable(p_ptr)) {
+ if (tipc_port_unreliable(p_ptr)) {
p_ptr->congested = 0;
return len;
}
@@ -949,17 +810,18 @@ int tipc_send(u32 ref, struct iovec const *msg_sect, unsigned int len)
/**
* tipc_send2name - send message sections to port name
*/
-int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain,
- struct iovec const *msg_sect, unsigned int len)
+int tipc_send2name(struct tipc_port *p_ptr,
+ struct tipc_name const *name,
+ unsigned int domain,
+ struct iovec const *msg_sect,
+ unsigned int len)
{
- struct tipc_port *p_ptr;
struct tipc_msg *msg;
u32 destnode = domain;
u32 destport;
int res;
- p_ptr = tipc_port_deref(ref);
- if (!p_ptr || p_ptr->connected)
+ if (p_ptr->connected)
return -EINVAL;
msg = &p_ptr->phdr;
@@ -974,39 +836,39 @@ int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain,
if (likely(destport || destnode)) {
if (likely(in_own_node(destnode)))
- res = tipc_port_recv_sections(p_ptr, msg_sect, len);
+ res = tipc_port_iovec_rcv(p_ptr, msg_sect, len);
else if (tipc_own_addr)
- res = tipc_link_send_sections_fast(p_ptr, msg_sect,
- len, destnode);
+ res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len,
+ destnode);
else
- res = tipc_port_reject_sections(p_ptr, msg, msg_sect,
- len, TIPC_ERR_NO_NODE);
+ res = tipc_port_iovec_reject(p_ptr, msg, msg_sect,
+ len, TIPC_ERR_NO_NODE);
if (likely(res != -ELINKCONG)) {
if (res > 0)
p_ptr->sent++;
return res;
}
- if (port_unreliable(p_ptr)) {
+ if (tipc_port_unreliable(p_ptr))
return len;
- }
+
return -ELINKCONG;
}
- return tipc_port_reject_sections(p_ptr, msg, msg_sect, len,
- TIPC_ERR_NO_NAME);
+ return tipc_port_iovec_reject(p_ptr, msg, msg_sect, len,
+ TIPC_ERR_NO_NAME);
}
/**
* tipc_send2port - send message sections to port identity
*/
-int tipc_send2port(u32 ref, struct tipc_portid const *dest,
- struct iovec const *msg_sect, unsigned int len)
+int tipc_send2port(struct tipc_port *p_ptr,
+ struct tipc_portid const *dest,
+ struct iovec const *msg_sect,
+ unsigned int len)
{
- struct tipc_port *p_ptr;
struct tipc_msg *msg;
int res;
- p_ptr = tipc_port_deref(ref);
- if (!p_ptr || p_ptr->connected)
+ if (p_ptr->connected)
return -EINVAL;
msg = &p_ptr->phdr;
@@ -1017,20 +879,20 @@ int tipc_send2port(u32 ref, struct tipc_portid const *dest,
msg_set_hdr_sz(msg, BASIC_H_SIZE);
if (in_own_node(dest->node))
- res = tipc_port_recv_sections(p_ptr, msg_sect, len);
+ res = tipc_port_iovec_rcv(p_ptr, msg_sect, len);
else if (tipc_own_addr)
- res = tipc_link_send_sections_fast(p_ptr, msg_sect, len,
- dest->node);
+ res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len,
+ dest->node);
else
- res = tipc_port_reject_sections(p_ptr, msg, msg_sect, len,
+ res = tipc_port_iovec_reject(p_ptr, msg, msg_sect, len,
TIPC_ERR_NO_NODE);
if (likely(res != -ELINKCONG)) {
if (res > 0)
p_ptr->sent++;
return res;
}
- if (port_unreliable(p_ptr)) {
+ if (tipc_port_unreliable(p_ptr))
return len;
- }
+
return -ELINKCONG;
}
diff --git a/net/tipc/port.h b/net/tipc/port.h
index 34f12bd4074..cf4ca5b1d9a 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -1,7 +1,7 @@
/*
* net/tipc/port.h: Include file for TIPC port code
*
- * Copyright (c) 1994-2007, Ericsson AB
+ * Copyright (c) 1994-2007, 2014, Ericsson AB
* Copyright (c) 2004-2007, 2010-2013, Wind River Systems
* All rights reserved.
*
@@ -42,13 +42,13 @@
#include "msg.h"
#include "node_subscr.h"
-#define TIPC_FLOW_CONTROL_WIN 512
-#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \
- SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
+#define TIPC_CONNACK_INTV 256
+#define TIPC_FLOWCTRL_WIN (TIPC_CONNACK_INTV * 2)
+#define TIPC_CONN_OVERLOAD_LIMIT ((TIPC_FLOWCTRL_WIN * 2 + 1) * \
+ SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
/**
* struct tipc_port - TIPC port structure
- * @sk: pointer to socket handle
* @lock: pointer to spinlock for controlling access to port
* @connected: non-zero if port is currently connected to a peer port
* @conn_type: TIPC type used when connection was established
@@ -60,8 +60,6 @@
* @ref: unique reference to port in TIPC object registry
* @phdr: preformatted message header used when sending messages
* @port_list: adjacent ports in TIPC's global list of ports
- * @dispatcher: ptr to routine which handles received messages
- * @wakeup: ptr to routine to call when port is no longer congested
* @wait_list: adjacent ports in list of ports waiting on link congestion
* @waiting_pkts:
* @sent: # of non-empty messages sent by port
@@ -74,7 +72,6 @@
* @subscription: "node down" subscription used to terminate failed connections
*/
struct tipc_port {
- struct sock *sk;
spinlock_t *lock;
int connected;
u32 conn_type;
@@ -86,8 +83,6 @@ struct tipc_port {
u32 ref;
struct tipc_msg phdr;
struct list_head port_list;
- u32 (*dispatcher)(struct tipc_port *, struct sk_buff *);
- void (*wakeup)(struct tipc_port *);
struct list_head wait_list;
u32 waiting_pkts;
u32 sent;
@@ -106,68 +101,70 @@ struct tipc_port_list;
/*
* TIPC port manipulation routines
*/
-struct tipc_port *tipc_createport(struct sock *sk,
- u32 (*dispatcher)(struct tipc_port *,
- struct sk_buff *),
- void (*wakeup)(struct tipc_port *),
- const u32 importance);
+u32 tipc_port_init(struct tipc_port *p_ptr,
+ const unsigned int importance);
int tipc_reject_msg(struct sk_buff *buf, u32 err);
void tipc_acknowledge(u32 port_ref, u32 ack);
-int tipc_deleteport(struct tipc_port *p_ptr);
-
-int tipc_portimportance(u32 portref, unsigned int *importance);
-int tipc_set_portimportance(u32 portref, unsigned int importance);
-
-int tipc_portunreliable(u32 portref, unsigned int *isunreliable);
-int tipc_set_portunreliable(u32 portref, unsigned int isunreliable);
-
-int tipc_portunreturnable(u32 portref, unsigned int *isunreturnable);
-int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable);
+void tipc_port_destroy(struct tipc_port *p_ptr);
int tipc_publish(struct tipc_port *p_ptr, unsigned int scope,
struct tipc_name_seq const *name_seq);
+
int tipc_withdraw(struct tipc_port *p_ptr, unsigned int scope,
struct tipc_name_seq const *name_seq);
-int tipc_connect(u32 portref, struct tipc_portid const *port);
+int tipc_port_connect(u32 portref, struct tipc_portid const *port);
-int tipc_disconnect(u32 portref);
+int tipc_port_disconnect(u32 portref);
-int tipc_shutdown(u32 ref);
+int tipc_port_shutdown(u32 ref);
+void tipc_port_wakeup(struct tipc_port *port);
/*
* The following routines require that the port be locked on entry
*/
-int __tipc_disconnect(struct tipc_port *tp_ptr);
-int __tipc_connect(u32 ref, struct tipc_port *p_ptr,
+int __tipc_port_disconnect(struct tipc_port *tp_ptr);
+int __tipc_port_connect(u32 ref, struct tipc_port *p_ptr,
struct tipc_portid const *peer);
int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg);
/*
* TIPC messaging routines
*/
-int tipc_port_recv_msg(struct sk_buff *buf);
-int tipc_send(u32 portref, struct iovec const *msg_sect, unsigned int len);
-int tipc_send2name(u32 portref, struct tipc_name const *name, u32 domain,
- struct iovec const *msg_sect, unsigned int len);
+int tipc_send(struct tipc_port *port,
+ struct iovec const *msg_sect,
+ unsigned int len);
-int tipc_send2port(u32 portref, struct tipc_portid const *dest,
- struct iovec const *msg_sect, unsigned int len);
+int tipc_send2name(struct tipc_port *port,
+ struct tipc_name const *name,
+ u32 domain,
+ struct iovec const *msg_sect,
+ unsigned int len);
-int tipc_multicast(u32 portref, struct tipc_name_seq const *seq,
- struct iovec const *msg, unsigned int len);
+int tipc_send2port(struct tipc_port *port,
+ struct tipc_portid const *dest,
+ struct iovec const *msg_sect,
+ unsigned int len);
+
+int tipc_port_mcast_xmit(struct tipc_port *port,
+ struct tipc_name_seq const *seq,
+ struct iovec const *msg,
+ unsigned int len);
+
+int tipc_port_iovec_reject(struct tipc_port *p_ptr,
+ struct tipc_msg *hdr,
+ struct iovec const *msg_sect,
+ unsigned int len,
+ int err);
-int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr,
- struct iovec const *msg_sect, unsigned int len,
- int err);
struct sk_buff *tipc_port_get_ports(void);
-void tipc_port_recv_proto_msg(struct sk_buff *buf);
-void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp);
+void tipc_port_proto_rcv(struct sk_buff *buf);
+void tipc_port_mcast_rcv(struct sk_buff *buf, struct tipc_port_list *dp);
void tipc_port_reinit(void);
/**
@@ -188,14 +185,53 @@ static inline void tipc_port_unlock(struct tipc_port *p_ptr)
spin_unlock_bh(p_ptr->lock);
}
-static inline struct tipc_port *tipc_port_deref(u32 ref)
+static inline int tipc_port_congested(struct tipc_port *p_ptr)
{
- return (struct tipc_port *)tipc_ref_deref(ref);
+ return ((p_ptr->sent - p_ptr->acked) >= TIPC_FLOWCTRL_WIN);
}
-static inline int tipc_port_congested(struct tipc_port *p_ptr)
+
+static inline u32 tipc_port_peernode(struct tipc_port *p_ptr)
+{
+ return msg_destnode(&p_ptr->phdr);
+}
+
+static inline u32 tipc_port_peerport(struct tipc_port *p_ptr)
+{
+ return msg_destport(&p_ptr->phdr);
+}
+
+static inline bool tipc_port_unreliable(struct tipc_port *port)
+{
+ return msg_src_droppable(&port->phdr) != 0;
+}
+
+static inline void tipc_port_set_unreliable(struct tipc_port *port,
+ bool unreliable)
+{
+ msg_set_src_droppable(&port->phdr, unreliable ? 1 : 0);
+}
+
+static inline bool tipc_port_unreturnable(struct tipc_port *port)
+{
+ return msg_dest_droppable(&port->phdr) != 0;
+}
+
+static inline void tipc_port_set_unreturnable(struct tipc_port *port,
+ bool unreturnable)
+{
+ msg_set_dest_droppable(&port->phdr, unreturnable ? 1 : 0);
+}
+
+
+static inline int tipc_port_importance(struct tipc_port *port)
+{
+ return msg_importance(&port->phdr);
+}
+
+static inline void tipc_port_set_importance(struct tipc_port *port, int imp)
{
- return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2);
+ msg_set_importance(&port->phdr, (u32)imp);
}
#endif
diff --git a/net/tipc/ref.c b/net/tipc/ref.c
index de3d593e2fe..3d4ecd754ee 100644
--- a/net/tipc/ref.c
+++ b/net/tipc/ref.c
@@ -89,7 +89,7 @@ struct ref_table {
static struct ref_table tipc_ref_table;
-static DEFINE_RWLOCK(ref_table_lock);
+static DEFINE_SPINLOCK(ref_table_lock);
/**
* tipc_ref_table_init - create reference table for objects
@@ -159,7 +159,7 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock)
}
/* take a free entry, if available; otherwise initialize a new entry */
- write_lock_bh(&ref_table_lock);
+ spin_lock_bh(&ref_table_lock);
if (tipc_ref_table.first_free) {
index = tipc_ref_table.first_free;
entry = &(tipc_ref_table.entries[index]);
@@ -175,7 +175,7 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock)
} else {
ref = 0;
}
- write_unlock_bh(&ref_table_lock);
+ spin_unlock_bh(&ref_table_lock);
/*
* Grab the lock so no one else can modify this entry
@@ -216,7 +216,7 @@ void tipc_ref_discard(u32 ref)
index = ref & index_mask;
entry = &(tipc_ref_table.entries[index]);
- write_lock_bh(&ref_table_lock);
+ spin_lock_bh(&ref_table_lock);
if (!entry->object) {
pr_err("Attempt to discard ref. to non-existent obj\n");
@@ -242,7 +242,7 @@ void tipc_ref_discard(u32 ref)
tipc_ref_table.last_free = index;
exit:
- write_unlock_bh(&ref_table_lock);
+ spin_unlock_bh(&ref_table_lock);
}
/**
@@ -264,20 +264,3 @@ void *tipc_ref_lock(u32 ref)
}
return NULL;
}
-
-
-/**
- * tipc_ref_deref - return pointer referenced object (without locking it)
- */
-void *tipc_ref_deref(u32 ref)
-{
- if (likely(tipc_ref_table.entries)) {
- struct reference *entry;
-
- entry = &tipc_ref_table.entries[ref &
- tipc_ref_table.index_mask];
- if (likely(entry->ref == ref))
- return entry->object;
- }
- return NULL;
-}
diff --git a/net/tipc/ref.h b/net/tipc/ref.h
index 5bc8e7ab84d..d01aa1df63b 100644
--- a/net/tipc/ref.h
+++ b/net/tipc/ref.h
@@ -44,6 +44,5 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock);
void tipc_ref_discard(u32 ref);
void *tipc_ref_lock(u32 ref);
-void *tipc_ref_deref(u32 ref);
#endif
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 646a930eefb..a538a02f869 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -119,7 +119,7 @@ static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid)
return con;
}
-static void sock_data_ready(struct sock *sk, int unused)
+static void sock_data_ready(struct sock *sk)
{
struct tipc_conn *con;
@@ -297,7 +297,7 @@ static int tipc_accept_from_sock(struct tipc_conn *con)
newcon->usr_data = s->tipc_conn_new(newcon->conid);
/* Wake up receive process in case of 'SYN+' message */
- newsock->sk->sk_data_ready(newsock->sk, 0);
+ newsock->sk->sk_data_ready(newsock->sk);
return ret;
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 0ed0eaa62f2..ef0475568f9 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1,7 +1,7 @@
/*
* net/tipc/socket.c: TIPC socket API
*
- * Copyright (c) 2001-2007, 2012 Ericsson AB
+ * Copyright (c) 2001-2007, 2012-2014, Ericsson AB
* Copyright (c) 2004-2008, 2010-2013, Wind River Systems
* All rights reserved.
*
@@ -36,32 +36,20 @@
#include "core.h"
#include "port.h"
+#include "node.h"
#include <linux/export.h>
-#include <net/sock.h>
#define SS_LISTENING -1 /* socket is listening */
#define SS_READY -2 /* socket is connectionless */
#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
-struct tipc_sock {
- struct sock sk;
- struct tipc_port *p;
- struct tipc_portid peer_name;
- unsigned int conn_timeout;
-};
-
-#define tipc_sk(sk) ((struct tipc_sock *)(sk))
-#define tipc_sk_port(sk) (tipc_sk(sk)->p)
-
-static int backlog_rcv(struct sock *sk, struct sk_buff *skb);
-static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf);
-static void wakeupdispatch(struct tipc_port *tport);
-static void tipc_data_ready(struct sock *sk, int len);
+static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
+static void tipc_data_ready(struct sock *sk);
static void tipc_write_space(struct sock *sk);
-static int release(struct socket *sock);
-static int accept(struct socket *sock, struct socket *new_sock, int flags);
+static int tipc_release(struct socket *sock);
+static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
@@ -115,6 +103,8 @@ static struct proto tipc_proto_kern;
* - port reference
*/
+#include "socket.h"
+
/**
* advance_rx_queue - discard first buffer in socket receive queue
*
@@ -150,13 +140,15 @@ static void reject_rx_queue(struct sock *sk)
*
* Returns 0 on success, errno otherwise
*/
-static int tipc_sk_create(struct net *net, struct socket *sock, int protocol,
- int kern)
+static int tipc_sk_create(struct net *net, struct socket *sock,
+ int protocol, int kern)
{
const struct proto_ops *ops;
socket_state state;
struct sock *sk;
- struct tipc_port *tp_ptr;
+ struct tipc_sock *tsk;
+ struct tipc_port *port;
+ u32 ref;
/* Validate arguments */
if (unlikely(protocol != 0))
@@ -189,10 +181,12 @@ static int tipc_sk_create(struct net *net, struct socket *sock, int protocol,
if (sk == NULL)
return -ENOMEM;
- /* Allocate TIPC port for socket to use */
- tp_ptr = tipc_createport(sk, &dispatch, &wakeupdispatch,
- TIPC_LOW_IMPORTANCE);
- if (unlikely(!tp_ptr)) {
+ tsk = tipc_sk(sk);
+ port = &tsk->port;
+
+ ref = tipc_port_init(port, TIPC_LOW_IMPORTANCE);
+ if (!ref) {
+ pr_warn("Socket registration failed, ref. table exhausted\n");
sk_free(sk);
return -ENOMEM;
}
@@ -202,21 +196,19 @@ static int tipc_sk_create(struct net *net, struct socket *sock, int protocol,
sock->state = state;
sock_init_data(sock, sk);
- sk->sk_backlog_rcv = backlog_rcv;
+ sk->sk_backlog_rcv = tipc_backlog_rcv;
sk->sk_rcvbuf = sysctl_tipc_rmem[1];
sk->sk_data_ready = tipc_data_ready;
sk->sk_write_space = tipc_write_space;
- tipc_sk(sk)->p = tp_ptr;
- tipc_sk(sk)->conn_timeout = CONN_TIMEOUT_DEFAULT;
-
- spin_unlock_bh(tp_ptr->lock);
+ tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
+ atomic_set(&tsk->dupl_rcvcnt, 0);
+ tipc_port_unlock(port);
if (sock->state == SS_READY) {
- tipc_set_portunreturnable(tp_ptr->ref, 1);
+ tipc_port_set_unreturnable(port, true);
if (sock->type == SOCK_DGRAM)
- tipc_set_portunreliable(tp_ptr->ref, 1);
+ tipc_port_set_unreliable(port, true);
}
-
return 0;
}
@@ -254,7 +246,7 @@ int tipc_sock_create_local(int type, struct socket **res)
*/
void tipc_sock_release_local(struct socket *sock)
{
- release(sock);
+ tipc_release(sock);
sock->ops = NULL;
sock_release(sock);
}
@@ -280,7 +272,7 @@ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
if (ret < 0)
return ret;
- ret = accept(sock, *newsock, flags);
+ ret = tipc_accept(sock, *newsock, flags);
if (ret < 0) {
sock_release(*newsock);
return ret;
@@ -290,7 +282,7 @@ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
}
/**
- * release - destroy a TIPC socket
+ * tipc_release - destroy a TIPC socket
* @sock: socket to destroy
*
* This routine cleans up any messages that are still queued on the socket.
@@ -305,12 +297,12 @@ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
*
* Returns 0 on success, errno otherwise
*/
-static int release(struct socket *sock)
+static int tipc_release(struct socket *sock)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport;
+ struct tipc_sock *tsk;
+ struct tipc_port *port;
struct sk_buff *buf;
- int res;
/*
* Exit if socket isn't fully initialized (occurs when a failed accept()
@@ -319,7 +311,8 @@ static int release(struct socket *sock)
if (sk == NULL)
return 0;
- tport = tipc_sk_port(sk);
+ tsk = tipc_sk(sk);
+ port = &tsk->port;
lock_sock(sk);
/*
@@ -336,17 +329,16 @@ static int release(struct socket *sock)
if ((sock->state == SS_CONNECTING) ||
(sock->state == SS_CONNECTED)) {
sock->state = SS_DISCONNECTING;
- tipc_disconnect(tport->ref);
+ tipc_port_disconnect(port->ref);
}
tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
}
}
- /*
- * Delete TIPC port; this ensures no more messages are queued
- * (also disconnects an active connection & sends a 'FIN-' to peer)
+ /* Destroy TIPC port; also disconnects an active connection and
+ * sends a 'FIN-' to peer.
*/
- res = tipc_deleteport(tport);
+ tipc_port_destroy(port);
/* Discard any remaining (connection-based) messages in receive queue */
__skb_queue_purge(&sk->sk_receive_queue);
@@ -358,11 +350,11 @@ static int release(struct socket *sock)
sock_put(sk);
sock->sk = NULL;
- return res;
+ return 0;
}
/**
- * bind - associate or disassocate TIPC name(s) with a socket
+ * tipc_bind - associate or disassocate TIPC name(s) with a socket
* @sock: socket structure
* @uaddr: socket address describing name(s) and desired operation
* @uaddr_len: size of socket address data structure
@@ -376,16 +368,17 @@ static int release(struct socket *sock)
* NOTE: This routine doesn't need to take the socket lock since it doesn't
* access any non-constant socket information.
*/
-static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len)
+static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
+ int uaddr_len)
{
struct sock *sk = sock->sk;
struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
- struct tipc_port *tport = tipc_sk_port(sock->sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
int res = -EINVAL;
lock_sock(sk);
if (unlikely(!uaddr_len)) {
- res = tipc_withdraw(tport, 0, NULL);
+ res = tipc_withdraw(&tsk->port, 0, NULL);
goto exit;
}
@@ -413,15 +406,15 @@ static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len)
}
res = (addr->scope > 0) ?
- tipc_publish(tport, addr->scope, &addr->addr.nameseq) :
- tipc_withdraw(tport, -addr->scope, &addr->addr.nameseq);
+ tipc_publish(&tsk->port, addr->scope, &addr->addr.nameseq) :
+ tipc_withdraw(&tsk->port, -addr->scope, &addr->addr.nameseq);
exit:
release_sock(sk);
return res;
}
/**
- * get_name - get port ID of socket or peer socket
+ * tipc_getname - get port ID of socket or peer socket
* @sock: socket structure
* @uaddr: area for returned socket address
* @uaddr_len: area for returned length of socket address
@@ -433,21 +426,21 @@ exit:
* accesses socket information that is unchanging (or which changes in
* a completely predictable manner).
*/
-static int get_name(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
+ int *uaddr_len, int peer)
{
struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
- struct tipc_sock *tsock = tipc_sk(sock->sk);
+ struct tipc_sock *tsk = tipc_sk(sock->sk);
memset(addr, 0, sizeof(*addr));
if (peer) {
if ((sock->state != SS_CONNECTED) &&
((peer != 2) || (sock->state != SS_DISCONNECTING)))
return -ENOTCONN;
- addr->addr.id.ref = tsock->peer_name.ref;
- addr->addr.id.node = tsock->peer_name.node;
+ addr->addr.id.ref = tipc_port_peerport(&tsk->port);
+ addr->addr.id.node = tipc_port_peernode(&tsk->port);
} else {
- addr->addr.id.ref = tsock->p->ref;
+ addr->addr.id.ref = tsk->port.ref;
addr->addr.id.node = tipc_own_addr;
}
@@ -461,7 +454,7 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
}
/**
- * poll - read and possibly block on pollmask
+ * tipc_poll - read and possibly block on pollmask
* @file: file structure associated with the socket
* @sock: socket for which to calculate the poll bits
* @wait: ???
@@ -500,22 +493,23 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
* imply that the operation will succeed, merely that it should be performed
* and will not block.
*/
-static unsigned int poll(struct file *file, struct socket *sock,
- poll_table *wait)
+static unsigned int tipc_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
{
struct sock *sk = sock->sk;
+ struct tipc_sock *tsk = tipc_sk(sk);
u32 mask = 0;
sock_poll_wait(file, sk_sleep(sk), wait);
switch ((int)sock->state) {
case SS_UNCONNECTED:
- if (!tipc_sk_port(sk)->congested)
+ if (!tsk->port.congested)
mask |= POLLOUT;
break;
case SS_READY:
case SS_CONNECTED:
- if (!tipc_sk_port(sk)->congested)
+ if (!tsk->port.congested)
mask |= POLLOUT;
/* fall thru' */
case SS_CONNECTING:
@@ -565,7 +559,7 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
DEFINE_WAIT(wait);
int done;
@@ -581,14 +575,15 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
return sock_intr_errno(*timeo_p);
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- done = sk_wait_event(sk, timeo_p, !tport->congested);
+ done = sk_wait_event(sk, timeo_p, !tsk->port.congested);
finish_wait(sk_sleep(sk), &wait);
} while (!done);
return 0;
}
+
/**
- * send_msg - send message in connectionless manner
+ * tipc_sendmsg - send message in connectionless manner
* @iocb: if NULL, indicates that socket lock is already held
* @sock: socket structure
* @m: message to send
@@ -601,11 +596,12 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
*
* Returns the number of bytes sent on success, or errno otherwise
*/
-static int send_msg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t total_len)
+static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t total_len)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_port *port = &tsk->port;
DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
int needs_conn;
long timeo;
@@ -632,13 +628,13 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
res = -EISCONN;
goto exit;
}
- if (tport->published) {
+ if (tsk->port.published) {
res = -EOPNOTSUPP;
goto exit;
}
if (dest->addrtype == TIPC_ADDR_NAME) {
- tport->conn_type = dest->addr.name.name.type;
- tport->conn_instance = dest->addr.name.name.instance;
+ tsk->port.conn_type = dest->addr.name.name.type;
+ tsk->port.conn_instance = dest->addr.name.name.instance;
}
/* Abort any pending connection attempts (very unlikely) */
@@ -651,13 +647,13 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
res = dest_name_check(dest, m);
if (res)
break;
- res = tipc_send2name(tport->ref,
+ res = tipc_send2name(port,
&dest->addr.name.name,
dest->addr.name.domain,
m->msg_iov,
total_len);
} else if (dest->addrtype == TIPC_ADDR_ID) {
- res = tipc_send2port(tport->ref,
+ res = tipc_send2port(port,
&dest->addr.id,
m->msg_iov,
total_len);
@@ -669,10 +665,10 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
res = dest_name_check(dest, m);
if (res)
break;
- res = tipc_multicast(tport->ref,
- &dest->addr.nameseq,
- m->msg_iov,
- total_len);
+ res = tipc_port_mcast_xmit(port,
+ &dest->addr.nameseq,
+ m->msg_iov,
+ total_len);
}
if (likely(res != -ELINKCONG)) {
if (needs_conn && (res >= 0))
@@ -693,7 +689,8 @@ exit:
static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_port *port = &tsk->port;
DEFINE_WAIT(wait);
int done;
@@ -712,14 +709,14 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
done = sk_wait_event(sk, timeo_p,
- (!tport->congested || !tport->connected));
+ (!port->congested || !port->connected));
finish_wait(sk_sleep(sk), &wait);
} while (!done);
return 0;
}
/**
- * send_packet - send a connection-oriented message
+ * tipc_send_packet - send a connection-oriented message
* @iocb: if NULL, indicates that socket lock is already held
* @sock: socket structure
* @m: message to send
@@ -729,18 +726,18 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
*
* Returns the number of bytes sent on success, or errno otherwise
*/
-static int send_packet(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t total_len)
+static int tipc_send_packet(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t total_len)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
int res = -EINVAL;
long timeo;
/* Handle implied connection establishment */
if (unlikely(dest))
- return send_msg(iocb, sock, m, total_len);
+ return tipc_sendmsg(iocb, sock, m, total_len);
if (total_len > TIPC_MAX_USER_MSG_SIZE)
return -EMSGSIZE;
@@ -758,7 +755,7 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
do {
- res = tipc_send(tport->ref, m->msg_iov, total_len);
+ res = tipc_send(&tsk->port, m->msg_iov, total_len);
if (likely(res != -ELINKCONG))
break;
res = tipc_wait_for_sndpkt(sock, &timeo);
@@ -772,7 +769,7 @@ exit:
}
/**
- * send_stream - send stream-oriented data
+ * tipc_send_stream - send stream-oriented data
* @iocb: (unused)
* @sock: socket structure
* @m: data to send
@@ -783,11 +780,11 @@ exit:
* Returns the number of bytes sent on success (or partial success),
* or errno if no data sent
*/
-static int send_stream(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t total_len)
+static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t total_len)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
struct msghdr my_msg;
struct iovec my_iov;
struct iovec *curr_iov;
@@ -804,7 +801,7 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
/* Handle special cases where there is no connection */
if (unlikely(sock->state != SS_CONNECTED)) {
if (sock->state == SS_UNCONNECTED)
- res = send_packet(NULL, sock, m, total_len);
+ res = tipc_send_packet(NULL, sock, m, total_len);
else
res = sock->state == SS_DISCONNECTING ? -EPIPE : -ENOTCONN;
goto exit;
@@ -835,21 +832,22 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
my_msg.msg_name = NULL;
bytes_sent = 0;
- hdr_size = msg_hdr_sz(&tport->phdr);
+ hdr_size = msg_hdr_sz(&tsk->port.phdr);
while (curr_iovlen--) {
curr_start = curr_iov->iov_base;
curr_left = curr_iov->iov_len;
while (curr_left) {
- bytes_to_send = tport->max_pkt - hdr_size;
+ bytes_to_send = tsk->port.max_pkt - hdr_size;
if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE)
bytes_to_send = TIPC_MAX_USER_MSG_SIZE;
if (curr_left < bytes_to_send)
bytes_to_send = curr_left;
my_iov.iov_base = curr_start;
my_iov.iov_len = bytes_to_send;
- res = send_packet(NULL, sock, &my_msg, bytes_to_send);
+ res = tipc_send_packet(NULL, sock, &my_msg,
+ bytes_to_send);
if (res < 0) {
if (bytes_sent)
res = bytes_sent;
@@ -870,27 +868,25 @@ exit:
/**
* auto_connect - complete connection setup to a remote port
- * @sock: socket structure
+ * @tsk: tipc socket structure
* @msg: peer's response message
*
* Returns 0 on success, errno otherwise
*/
-static int auto_connect(struct socket *sock, struct tipc_msg *msg)
+static int auto_connect(struct tipc_sock *tsk, struct tipc_msg *msg)
{
- struct tipc_sock *tsock = tipc_sk(sock->sk);
- struct tipc_port *p_ptr;
+ struct tipc_port *port = &tsk->port;
+ struct socket *sock = tsk->sk.sk_socket;
+ struct tipc_portid peer;
- tsock->peer_name.ref = msg_origport(msg);
- tsock->peer_name.node = msg_orignode(msg);
- p_ptr = tipc_port_deref(tsock->p->ref);
- if (!p_ptr)
- return -EINVAL;
+ peer.ref = msg_origport(msg);
+ peer.node = msg_orignode(msg);
- __tipc_connect(tsock->p->ref, p_ptr, &tsock->peer_name);
+ __tipc_port_connect(port->ref, port, &peer);
if (msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE)
return -EINVAL;
- msg_set_importance(&p_ptr->phdr, (u32)msg_importance(msg));
+ msg_set_importance(&port->phdr, (u32)msg_importance(msg));
sock->state = SS_CONNECTED;
return 0;
}
@@ -989,10 +985,11 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
return 0;
}
-static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo)
+static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
{
struct sock *sk = sock->sk;
DEFINE_WAIT(wait);
+ long timeo = *timeop;
int err;
for (;;) {
@@ -1017,11 +1014,12 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo)
break;
}
finish_wait(sk_sleep(sk), &wait);
+ *timeop = timeo;
return err;
}
/**
- * recv_msg - receive packet-oriented message
+ * tipc_recvmsg - receive packet-oriented message
* @iocb: (unused)
* @m: descriptor for message info
* @buf_len: total size of user buffer area
@@ -1032,11 +1030,12 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo)
*
* Returns size of returned message data, errno otherwise
*/
-static int recv_msg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t buf_len, int flags)
+static int tipc_recvmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t buf_len, int flags)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_port *port = &tsk->port;
struct sk_buff *buf;
struct tipc_msg *msg;
long timeo;
@@ -1059,7 +1058,7 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock,
restart:
/* Look for a message in receive queue; wait if necessary */
- res = tipc_wait_for_rcvmsg(sock, timeo);
+ res = tipc_wait_for_rcvmsg(sock, &timeo);
if (res)
goto exit;
@@ -1079,7 +1078,7 @@ restart:
set_orig_addr(m, msg);
/* Capture ancillary data (optional) */
- res = anc_data_recv(m, msg, tport);
+ res = anc_data_recv(m, msg, port);
if (res)
goto exit;
@@ -1105,8 +1104,8 @@ restart:
/* Consume received message (optional) */
if (likely(!(flags & MSG_PEEK))) {
if ((sock->state != SS_READY) &&
- (++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
- tipc_acknowledge(tport->ref, tport->conn_unacked);
+ (++port->conn_unacked >= TIPC_CONNACK_INTV))
+ tipc_acknowledge(port->ref, port->conn_unacked);
advance_rx_queue(sk);
}
exit:
@@ -1115,7 +1114,7 @@ exit:
}
/**
- * recv_stream - receive stream-oriented data
+ * tipc_recv_stream - receive stream-oriented data
* @iocb: (unused)
* @m: descriptor for message info
* @buf_len: total size of user buffer area
@@ -1126,11 +1125,12 @@ exit:
*
* Returns size of returned message data, errno otherwise
*/
-static int recv_stream(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t buf_len, int flags)
+static int tipc_recv_stream(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t buf_len, int flags)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_port *port = &tsk->port;
struct sk_buff *buf;
struct tipc_msg *msg;
long timeo;
@@ -1156,7 +1156,7 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
restart:
/* Look for a message in receive queue; wait if necessary */
- res = tipc_wait_for_rcvmsg(sock, timeo);
+ res = tipc_wait_for_rcvmsg(sock, &timeo);
if (res)
goto exit;
@@ -1175,7 +1175,7 @@ restart:
/* Optionally capture sender's address & ancillary data of first msg */
if (sz_copied == 0) {
set_orig_addr(m, msg);
- res = anc_data_recv(m, msg, tport);
+ res = anc_data_recv(m, msg, port);
if (res)
goto exit;
}
@@ -1213,8 +1213,8 @@ restart:
/* Consume received message (optional) */
if (likely(!(flags & MSG_PEEK))) {
- if (unlikely(++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
- tipc_acknowledge(tport->ref, tport->conn_unacked);
+ if (unlikely(++port->conn_unacked >= TIPC_CONNACK_INTV))
+ tipc_acknowledge(port->ref, port->conn_unacked);
advance_rx_queue(sk);
}
@@ -1252,7 +1252,7 @@ static void tipc_write_space(struct sock *sk)
* @sk: socket
* @len: the length of messages
*/
-static void tipc_data_ready(struct sock *sk, int len)
+static void tipc_data_ready(struct sock *sk)
{
struct socket_wq *wq;
@@ -1266,17 +1266,19 @@ static void tipc_data_ready(struct sock *sk, int len)
/**
* filter_connect - Handle all incoming messages for a connection-based socket
- * @tsock: TIPC socket
+ * @tsk: TIPC socket
* @msg: message
*
* Returns TIPC error status code and socket error status code
* once it encounters some errors
*/
-static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf)
+static u32 filter_connect(struct tipc_sock *tsk, struct sk_buff **buf)
{
- struct socket *sock = tsock->sk.sk_socket;
+ struct sock *sk = &tsk->sk;
+ struct tipc_port *port = &tsk->port;
+ struct socket *sock = sk->sk_socket;
struct tipc_msg *msg = buf_msg(*buf);
- struct sock *sk = &tsock->sk;
+
u32 retval = TIPC_ERR_NO_PORT;
int res;
@@ -1286,10 +1288,10 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf)
switch ((int)sock->state) {
case SS_CONNECTED:
/* Accept only connection-based messages sent by peer */
- if (msg_connected(msg) && tipc_port_peer_msg(tsock->p, msg)) {
+ if (msg_connected(msg) && tipc_port_peer_msg(port, msg)) {
if (unlikely(msg_errcode(msg))) {
sock->state = SS_DISCONNECTING;
- __tipc_disconnect(tsock->p);
+ __tipc_port_disconnect(port);
}
retval = TIPC_OK;
}
@@ -1306,7 +1308,7 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf)
if (unlikely(!msg_connected(msg)))
break;
- res = auto_connect(sock, msg);
+ res = auto_connect(tsk, msg);
if (res) {
sock->state = SS_DISCONNECTING;
sk->sk_err = -res;
@@ -1385,6 +1387,7 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf)
static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
{
struct socket *sock = sk->sk_socket;
+ struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_msg *msg = buf_msg(buf);
unsigned int limit = rcvbuf_limit(sk, buf);
u32 res = TIPC_OK;
@@ -1397,7 +1400,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
if (msg_connected(msg))
return TIPC_ERR_NO_PORT;
} else {
- res = filter_connect(tipc_sk(sk), &buf);
+ res = filter_connect(tsk, &buf);
if (res != TIPC_OK || buf == NULL)
return res;
}
@@ -1411,12 +1414,12 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
__skb_queue_tail(&sk->sk_receive_queue, buf);
skb_set_owner_r(buf, sk);
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk);
return TIPC_OK;
}
/**
- * backlog_rcv - handle incoming message from backlog queue
+ * tipc_backlog_rcv - handle incoming message from backlog queue
* @sk: socket
* @buf: message
*
@@ -1424,61 +1427,74 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
*
* Returns 0
*/
-static int backlog_rcv(struct sock *sk, struct sk_buff *buf)
+static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf)
{
u32 res;
+ struct tipc_sock *tsk = tipc_sk(sk);
+ uint truesize = buf->truesize;
res = filter_rcv(sk, buf);
- if (res)
+ if (unlikely(res))
tipc_reject_msg(buf, res);
+
+ if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT)
+ atomic_add(truesize, &tsk->dupl_rcvcnt);
+
return 0;
}
/**
- * dispatch - handle incoming message
- * @tport: TIPC port that received message
- * @buf: message
- *
- * Called with port lock already taken.
- *
- * Returns TIPC error status code (TIPC_OK if message is not to be rejected)
+ * tipc_sk_rcv - handle incoming message
+ * @buf: buffer containing arriving message
+ * Consumes buffer
+ * Returns 0 if success, or errno: -EHOSTUNREACH
*/
-static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
+int tipc_sk_rcv(struct sk_buff *buf)
{
- struct sock *sk = tport->sk;
- u32 res;
+ struct tipc_sock *tsk;
+ struct tipc_port *port;
+ struct sock *sk;
+ u32 dport = msg_destport(buf_msg(buf));
+ int err = TIPC_OK;
+ uint limit;
- /*
- * Process message if socket is unlocked; otherwise add to backlog queue
- *
- * This code is based on sk_receive_skb(), but must be distinct from it
- * since a TIPC-specific filter/reject mechanism is utilized
- */
+ /* Forward unresolved named message */
+ if (unlikely(!dport)) {
+ tipc_net_route_msg(buf);
+ return 0;
+ }
+
+ /* Validate destination */
+ port = tipc_port_lock(dport);
+ if (unlikely(!port)) {
+ err = TIPC_ERR_NO_PORT;
+ goto exit;
+ }
+
+ tsk = tipc_port_to_sock(port);
+ sk = &tsk->sk;
+
+ /* Queue message */
bh_lock_sock(sk);
+
if (!sock_owned_by_user(sk)) {
- res = filter_rcv(sk, buf);
+ err = filter_rcv(sk, buf);
} else {
- if (sk_add_backlog(sk, buf, rcvbuf_limit(sk, buf)))
- res = TIPC_ERR_OVERLOAD;
- else
- res = TIPC_OK;
+ if (sk->sk_backlog.len == 0)
+ atomic_set(&tsk->dupl_rcvcnt, 0);
+ limit = rcvbuf_limit(sk, buf) + atomic_read(&tsk->dupl_rcvcnt);
+ if (sk_add_backlog(sk, buf, limit))
+ err = TIPC_ERR_OVERLOAD;
}
- bh_unlock_sock(sk);
- return res;
-}
-
-/**
- * wakeupdispatch - wake up port after congestion
- * @tport: port to wakeup
- *
- * Called with port lock already taken.
- */
-static void wakeupdispatch(struct tipc_port *tport)
-{
- struct sock *sk = tport->sk;
+ bh_unlock_sock(sk);
+ tipc_port_unlock(port);
- sk->sk_write_space(sk);
+ if (likely(!err))
+ return 0;
+exit:
+ tipc_reject_msg(buf, err);
+ return -EHOSTUNREACH;
}
static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
@@ -1504,7 +1520,7 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
}
/**
- * connect - establish a connection to another TIPC port
+ * tipc_connect - establish a connection to another TIPC port
* @sock: socket structure
* @dest: socket address for destination port
* @destlen: size of socket address data structure
@@ -1512,8 +1528,8 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
*
* Returns 0 on success, errno otherwise
*/
-static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
- int flags)
+static int tipc_connect(struct socket *sock, struct sockaddr *dest,
+ int destlen, int flags)
{
struct sock *sk = sock->sk;
struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
@@ -1554,7 +1570,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
if (!timeout)
m.msg_flags = MSG_DONTWAIT;
- res = send_msg(NULL, sock, &m, 0);
+ res = tipc_sendmsg(NULL, sock, &m, 0);
if ((res < 0) && (res != -EWOULDBLOCK))
goto exit;
@@ -1585,13 +1601,13 @@ exit:
}
/**
- * listen - allow socket to listen for incoming connections
+ * tipc_listen - allow socket to listen for incoming connections
* @sock: socket structure
* @len: (unused)
*
* Returns 0 on success, errno otherwise
*/
-static int listen(struct socket *sock, int len)
+static int tipc_listen(struct socket *sock, int len)
{
struct sock *sk = sock->sk;
int res;
@@ -1646,20 +1662,20 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
}
/**
- * accept - wait for connection request
+ * tipc_accept - wait for connection request
* @sock: listening socket
* @newsock: new socket that is to be connected
* @flags: file-related flags associated with socket
*
* Returns 0 on success, errno otherwise
*/
-static int accept(struct socket *sock, struct socket *new_sock, int flags)
+static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
{
struct sock *new_sk, *sk = sock->sk;
struct sk_buff *buf;
- struct tipc_sock *new_tsock;
- struct tipc_port *new_tport;
+ struct tipc_port *new_port;
struct tipc_msg *msg;
+ struct tipc_portid peer;
u32 new_ref;
long timeo;
int res;
@@ -1670,7 +1686,6 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
res = -EINVAL;
goto exit;
}
-
timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
res = tipc_wait_for_accept(sock, timeo);
if (res)
@@ -1683,9 +1698,8 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
goto exit;
new_sk = new_sock->sk;
- new_tsock = tipc_sk(new_sk);
- new_tport = new_tsock->p;
- new_ref = new_tport->ref;
+ new_port = &tipc_sk(new_sk)->port;
+ new_ref = new_port->ref;
msg = buf_msg(buf);
/* we lock on new_sk; but lockdep sees the lock on sk */
@@ -1698,15 +1712,15 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
reject_rx_queue(new_sk);
/* Connect new socket to it's peer */
- new_tsock->peer_name.ref = msg_origport(msg);
- new_tsock->peer_name.node = msg_orignode(msg);
- tipc_connect(new_ref, &new_tsock->peer_name);
+ peer.ref = msg_origport(msg);
+ peer.node = msg_orignode(msg);
+ tipc_port_connect(new_ref, &peer);
new_sock->state = SS_CONNECTED;
- tipc_set_portimportance(new_ref, msg_importance(msg));
+ tipc_port_set_importance(new_port, msg_importance(msg));
if (msg_named(msg)) {
- new_tport->conn_type = msg_nametype(msg);
- new_tport->conn_instance = msg_nameinst(msg);
+ new_port->conn_type = msg_nametype(msg);
+ new_port->conn_instance = msg_nameinst(msg);
}
/*
@@ -1717,21 +1731,20 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
struct msghdr m = {NULL,};
advance_rx_queue(sk);
- send_packet(NULL, new_sock, &m, 0);
+ tipc_send_packet(NULL, new_sock, &m, 0);
} else {
__skb_dequeue(&sk->sk_receive_queue);
__skb_queue_head(&new_sk->sk_receive_queue, buf);
skb_set_owner_r(buf, new_sk);
}
release_sock(new_sk);
-
exit:
release_sock(sk);
return res;
}
/**
- * shutdown - shutdown socket connection
+ * tipc_shutdown - shutdown socket connection
* @sock: socket structure
* @how: direction to close (must be SHUT_RDWR)
*
@@ -1739,10 +1752,11 @@ exit:
*
* Returns 0 on success, errno otherwise
*/
-static int shutdown(struct socket *sock, int how)
+static int tipc_shutdown(struct socket *sock, int how)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_port *port = &tsk->port;
struct sk_buff *buf;
int res;
@@ -1763,10 +1777,10 @@ restart:
kfree_skb(buf);
goto restart;
}
- tipc_disconnect(tport->ref);
+ tipc_port_disconnect(port->ref);
tipc_reject_msg(buf, TIPC_CONN_SHUTDOWN);
} else {
- tipc_shutdown(tport->ref);
+ tipc_port_shutdown(port->ref);
}
sock->state = SS_DISCONNECTING;
@@ -1792,7 +1806,7 @@ restart:
}
/**
- * setsockopt - set socket option
+ * tipc_setsockopt - set socket option
* @sock: socket structure
* @lvl: option level
* @opt: option identifier
@@ -1804,11 +1818,12 @@ restart:
*
* Returns 0 on success, errno otherwise
*/
-static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
- unsigned int ol)
+static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
+ char __user *ov, unsigned int ol)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_port *port = &tsk->port;
u32 value;
int res;
@@ -1826,16 +1841,16 @@ static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
switch (opt) {
case TIPC_IMPORTANCE:
- res = tipc_set_portimportance(tport->ref, value);
+ tipc_port_set_importance(port, value);
break;
case TIPC_SRC_DROPPABLE:
if (sock->type != SOCK_STREAM)
- res = tipc_set_portunreliable(tport->ref, value);
+ tipc_port_set_unreliable(port, value);
else
res = -ENOPROTOOPT;
break;
case TIPC_DEST_DROPPABLE:
- res = tipc_set_portunreturnable(tport->ref, value);
+ tipc_port_set_unreturnable(port, value);
break;
case TIPC_CONN_TIMEOUT:
tipc_sk(sk)->conn_timeout = value;
@@ -1851,7 +1866,7 @@ static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
}
/**
- * getsockopt - get socket option
+ * tipc_getsockopt - get socket option
* @sock: socket structure
* @lvl: option level
* @opt: option identifier
@@ -1863,11 +1878,12 @@ static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
*
* Returns 0 on success, errno otherwise
*/
-static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
- int __user *ol)
+static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
+ char __user *ov, int __user *ol)
{
struct sock *sk = sock->sk;
- struct tipc_port *tport = tipc_sk_port(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_port *port = &tsk->port;
int len;
u32 value;
int res;
@@ -1884,13 +1900,13 @@ static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
switch (opt) {
case TIPC_IMPORTANCE:
- res = tipc_portimportance(tport->ref, &value);
+ value = tipc_port_importance(port);
break;
case TIPC_SRC_DROPPABLE:
- res = tipc_portunreliable(tport->ref, &value);
+ value = tipc_port_unreliable(port);
break;
case TIPC_DEST_DROPPABLE:
- res = tipc_portunreturnable(tport->ref, &value);
+ value = tipc_port_unreturnable(port);
break;
case TIPC_CONN_TIMEOUT:
value = tipc_sk(sk)->conn_timeout;
@@ -1920,25 +1936,47 @@ static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov,
return put_user(sizeof(value), ol);
}
+int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg)
+{
+ struct tipc_sioc_ln_req lnr;
+ void __user *argp = (void __user *)arg;
+
+ switch (cmd) {
+ case SIOCGETLINKNAME:
+ if (copy_from_user(&lnr, argp, sizeof(lnr)))
+ return -EFAULT;
+ if (!tipc_node_get_linkname(lnr.bearer_id, lnr.peer,
+ lnr.linkname, TIPC_MAX_LINK_NAME)) {
+ if (copy_to_user(argp, &lnr, sizeof(lnr)))
+ return -EFAULT;
+ return 0;
+ }
+ return -EADDRNOTAVAIL;
+ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+
/* Protocol switches for the various types of TIPC sockets */
static const struct proto_ops msg_ops = {
.owner = THIS_MODULE,
.family = AF_TIPC,
- .release = release,
- .bind = bind,
- .connect = connect,
+ .release = tipc_release,
+ .bind = tipc_bind,
+ .connect = tipc_connect,
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
- .getname = get_name,
- .poll = poll,
- .ioctl = sock_no_ioctl,
+ .getname = tipc_getname,
+ .poll = tipc_poll,
+ .ioctl = tipc_ioctl,
.listen = sock_no_listen,
- .shutdown = shutdown,
- .setsockopt = setsockopt,
- .getsockopt = getsockopt,
- .sendmsg = send_msg,
- .recvmsg = recv_msg,
+ .shutdown = tipc_shutdown,
+ .setsockopt = tipc_setsockopt,
+ .getsockopt = tipc_getsockopt,
+ .sendmsg = tipc_sendmsg,
+ .recvmsg = tipc_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage
};
@@ -1946,20 +1984,20 @@ static const struct proto_ops msg_ops = {
static const struct proto_ops packet_ops = {
.owner = THIS_MODULE,
.family = AF_TIPC,
- .release = release,
- .bind = bind,
- .connect = connect,
+ .release = tipc_release,
+ .bind = tipc_bind,
+ .connect = tipc_connect,
.socketpair = sock_no_socketpair,
- .accept = accept,
- .getname = get_name,
- .poll = poll,
- .ioctl = sock_no_ioctl,
- .listen = listen,
- .shutdown = shutdown,
- .setsockopt = setsockopt,
- .getsockopt = getsockopt,
- .sendmsg = send_packet,
- .recvmsg = recv_msg,
+ .accept = tipc_accept,
+ .getname = tipc_getname,
+ .poll = tipc_poll,
+ .ioctl = tipc_ioctl,
+ .listen = tipc_listen,
+ .shutdown = tipc_shutdown,
+ .setsockopt = tipc_setsockopt,
+ .getsockopt = tipc_getsockopt,
+ .sendmsg = tipc_send_packet,
+ .recvmsg = tipc_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage
};
@@ -1967,20 +2005,20 @@ static const struct proto_ops packet_ops = {
static const struct proto_ops stream_ops = {
.owner = THIS_MODULE,
.family = AF_TIPC,
- .release = release,
- .bind = bind,
- .connect = connect,
+ .release = tipc_release,
+ .bind = tipc_bind,
+ .connect = tipc_connect,
.socketpair = sock_no_socketpair,
- .accept = accept,
- .getname = get_name,
- .poll = poll,
- .ioctl = sock_no_ioctl,
- .listen = listen,
- .shutdown = shutdown,
- .setsockopt = setsockopt,
- .getsockopt = getsockopt,
- .sendmsg = send_stream,
- .recvmsg = recv_stream,
+ .accept = tipc_accept,
+ .getname = tipc_getname,
+ .poll = tipc_poll,
+ .ioctl = tipc_ioctl,
+ .listen = tipc_listen,
+ .shutdown = tipc_shutdown,
+ .setsockopt = tipc_setsockopt,
+ .getsockopt = tipc_getsockopt,
+ .sendmsg = tipc_send_stream,
+ .recvmsg = tipc_recv_stream,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage
};
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
new file mode 100644
index 00000000000..3afcd2a70b3
--- /dev/null
+++ b/net/tipc/socket.h
@@ -0,0 +1,74 @@
+/* net/tipc/socket.h: Include file for TIPC socket code
+ *
+ * Copyright (c) 2014, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_SOCK_H
+#define _TIPC_SOCK_H
+
+#include "port.h"
+#include <net/sock.h>
+
+/**
+ * struct tipc_sock - TIPC socket structure
+ * @sk: socket - interacts with 'port' and with user via the socket API
+ * @port: port - interacts with 'sk' and with the rest of the TIPC stack
+ * @peer_name: the peer of the connection, if any
+ * @conn_timeout: the time we can wait for an unresponded setup request
+ * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
+ */
+
+struct tipc_sock {
+ struct sock sk;
+ struct tipc_port port;
+ unsigned int conn_timeout;
+ atomic_t dupl_rcvcnt;
+};
+
+static inline struct tipc_sock *tipc_sk(const struct sock *sk)
+{
+ return container_of(sk, struct tipc_sock, sk);
+}
+
+static inline struct tipc_sock *tipc_port_to_sock(const struct tipc_port *port)
+{
+ return container_of(port, struct tipc_sock, port);
+}
+
+static inline void tipc_sock_wakeup(struct tipc_sock *tsk)
+{
+ tsk->sk.sk_write_space(&tsk->sk);
+}
+
+int tipc_sk_rcv(struct sk_buff *buf);
+
+#endif
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 94404f19f9d..e9688438073 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1207,7 +1207,7 @@ restart:
sk->sk_state = TCP_ESTABLISHED;
sock_hold(newsk);
- smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */
+ smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
unix_peer(sk) = newsk;
unix_state_unlock(sk);
@@ -1217,7 +1217,7 @@ restart:
__skb_queue_tail(&other->sk_receive_queue, skb);
spin_unlock(&other->sk_receive_queue.lock);
unix_state_unlock(other);
- other->sk_data_ready(other, 0);
+ other->sk_data_ready(other);
sock_put(other);
return 0;
@@ -1492,10 +1492,14 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
if (len > sk->sk_sndbuf - 32)
goto out;
- if (len > SKB_MAX_ALLOC)
+ if (len > SKB_MAX_ALLOC) {
data_len = min_t(size_t,
len - SKB_MAX_ALLOC,
MAX_SKB_FRAGS * PAGE_SIZE);
+ data_len = PAGE_ALIGN(data_len);
+
+ BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
+ }
skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
msg->msg_flags & MSG_DONTWAIT, &err,
@@ -1600,7 +1604,7 @@ restart:
if (max_level > unix_sk(other)->recursion_level)
unix_sk(other)->recursion_level = max_level;
unix_state_unlock(other);
- other->sk_data_ready(other, len);
+ other->sk_data_ready(other);
sock_put(other);
scm_destroy(siocb->scm);
return len;
@@ -1670,6 +1674,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
+ data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
+
skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
msg->msg_flags & MSG_DONTWAIT, &err,
get_order(UNIX_SKB_FRAGS_SZ));
@@ -1706,7 +1712,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
if (max_level > unix_sk(other)->recursion_level)
unix_sk(other)->recursion_level = max_level;
unix_state_unlock(other);
- other->sk_data_ready(other, size);
+ other->sk_data_ready(other);
sent += size;
}
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 5adfd94c5b8..85d232bed87 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1925,9 +1925,23 @@ static struct miscdevice vsock_device = {
.fops = &vsock_device_ops,
};
-static int __vsock_core_init(void)
+int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
{
- int err;
+ int err = mutex_lock_interruptible(&vsock_register_mutex);
+
+ if (err)
+ return err;
+
+ if (transport) {
+ err = -EBUSY;
+ goto err_busy;
+ }
+
+ /* Transport must be the owner of the protocol so that it can't
+ * unload while there are open sockets.
+ */
+ vsock_proto.owner = owner;
+ transport = t;
vsock_init_tables();
@@ -1951,36 +1965,19 @@ static int __vsock_core_init(void)
goto err_unregister_proto;
}
+ mutex_unlock(&vsock_register_mutex);
return 0;
err_unregister_proto:
proto_unregister(&vsock_proto);
err_misc_deregister:
misc_deregister(&vsock_device);
- return err;
-}
-
-int vsock_core_init(const struct vsock_transport *t)
-{
- int retval = mutex_lock_interruptible(&vsock_register_mutex);
- if (retval)
- return retval;
-
- if (transport) {
- retval = -EBUSY;
- goto out;
- }
-
- transport = t;
- retval = __vsock_core_init();
- if (retval)
- transport = NULL;
-
-out:
+ transport = NULL;
+err_busy:
mutex_unlock(&vsock_register_mutex);
- return retval;
+ return err;
}
-EXPORT_SYMBOL_GPL(vsock_core_init);
+EXPORT_SYMBOL_GPL(__vsock_core_init);
void vsock_core_exit(void)
{
@@ -2000,5 +1997,5 @@ EXPORT_SYMBOL_GPL(vsock_core_exit);
MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMware Virtual Socket Family");
-MODULE_VERSION("1.0.0.0-k");
+MODULE_VERSION("1.0.1.0-k");
MODULE_LICENSE("GPL v2");
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
index 9a730744e7b..9b7f207f2be 100644
--- a/net/vmw_vsock/vmci_transport_notify.c
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -315,7 +315,7 @@ vmci_transport_handle_wrote(struct sock *sk,
struct vsock_sock *vsk = vsock_sk(sk);
PKT_FIELD(vsk, sent_waiting_read) = false;
#endif
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk);
}
static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
index 622bd7aa101..dc9c7929a2f 100644
--- a/net/vmw_vsock/vmci_transport_notify_qstate.c
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -92,7 +92,7 @@ vmci_transport_handle_wrote(struct sock *sk,
bool bottom_half,
struct sockaddr_vm *dst, struct sockaddr_vm *src)
{
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk);
}
static void vsock_block_update_write_window(struct sock *sk)
@@ -290,7 +290,7 @@ vmci_transport_notify_pkt_recv_post_dequeue(
/* See the comment in
* vmci_transport_notify_pkt_send_post_enqueue().
*/
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk);
}
return err;
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 16d08b39921..405f3c4cf70 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -95,6 +95,43 @@ config CFG80211_CERTIFICATION_ONUS
you are a wireless researcher and are working in a controlled
and approved environment by your local regulatory agency.
+config CFG80211_REG_CELLULAR_HINTS
+ bool "cfg80211 regulatory support for cellular base station hints"
+ depends on CFG80211_CERTIFICATION_ONUS
+ ---help---
+ This option enables support for parsing regulatory hints
+ from cellular base stations. If enabled and at least one driver
+ claims support for parsing cellular base station hints the
+ regulatory core will allow and parse these regulatory hints.
+ The regulatory core will only apply these regulatory hints on
+ drivers that support this feature. You should only enable this
+ feature if you have tested and validated this feature on your
+ systems.
+
+config CFG80211_REG_RELAX_NO_IR
+ bool "cfg80211 support for NO_IR relaxation"
+ depends on CFG80211_CERTIFICATION_ONUS
+ ---help---
+ This option enables support for relaxation of the NO_IR flag for
+ situations that certain regulatory bodies have provided clarifications
+ on how relaxation can occur. This feature has an inherent dependency on
+ userspace features which must have been properly tested and as such is
+ not enabled by default.
+
+ A relaxation feature example is allowing the operation of a P2P group
+ owner (GO) on channels marked with NO_IR if there is an additional BSS
+ interface which associated to an AP which userspace assumes or confirms
+ to be an authorized master, i.e., with radar detection support and DFS
+ capabilities. However, note that in order to not create daisy chain
+ scenarios, this relaxation is not allowed in cases that the BSS client
+ is associated to P2P GO and in addition the P2P GO instantiated on
+ a channel due to this relaxation should not allow connection from
+ non P2P clients.
+
+ The regulatory core will apply these relaxations only for drivers that
+ support this feature by declaring the appropriate channel flags and
+ capabilities in their registration flow.
+
config CFG80211_DEFAULT_PS
bool "enable powersave by default"
depends on CFG80211
diff --git a/net/wireless/ap.c b/net/wireless/ap.c
index 11ee4ed04f7..bdad1f95156 100644
--- a/net/wireless/ap.c
+++ b/net/wireless/ap.c
@@ -6,8 +6,8 @@
#include "rdev-ops.h"
-static int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
- struct net_device *dev)
+int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
+ struct net_device *dev, bool notify)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
int err;
@@ -27,22 +27,24 @@ static int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
err = rdev_stop_ap(rdev, dev);
if (!err) {
wdev->beacon_interval = 0;
- wdev->channel = NULL;
+ memset(&wdev->chandef, 0, sizeof(wdev->chandef));
wdev->ssid_len = 0;
rdev_set_qos_map(rdev, dev, NULL);
+ if (notify)
+ nl80211_send_ap_stopped(wdev);
}
return err;
}
int cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
- struct net_device *dev)
+ struct net_device *dev, bool notify)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
int err;
wdev_lock(wdev);
- err = __cfg80211_stop_ap(rdev, dev);
+ err = __cfg80211_stop_ap(rdev, dev, notify);
wdev_unlock(wdev);
return err;
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 78559b5bbd1..992b34070bc 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -326,28 +326,57 @@ static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy,
int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
- const struct cfg80211_chan_def *chandef)
+ const struct cfg80211_chan_def *chandef,
+ enum nl80211_iftype iftype)
{
int width;
- int r;
+ int ret;
if (WARN_ON(!cfg80211_chandef_valid(chandef)))
return -EINVAL;
- width = cfg80211_chandef_get_width(chandef);
- if (width < 0)
- return -EINVAL;
+ switch (iftype) {
+ case NL80211_IFTYPE_ADHOC:
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
+ case NL80211_IFTYPE_MESH_POINT:
+ width = cfg80211_chandef_get_width(chandef);
+ if (width < 0)
+ return -EINVAL;
- r = cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq1,
- width);
- if (r)
- return r;
+ ret = cfg80211_get_chans_dfs_required(wiphy,
+ chandef->center_freq1,
+ width);
+ if (ret < 0)
+ return ret;
+ else if (ret > 0)
+ return BIT(chandef->width);
- if (!chandef->center_freq2)
- return 0;
+ if (!chandef->center_freq2)
+ return 0;
- return cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq2,
- width);
+ ret = cfg80211_get_chans_dfs_required(wiphy,
+ chandef->center_freq2,
+ width);
+ if (ret < 0)
+ return ret;
+ else if (ret > 0)
+ return BIT(chandef->width);
+
+ break;
+ case NL80211_IFTYPE_STATION:
+ case NL80211_IFTYPE_P2P_CLIENT:
+ case NL80211_IFTYPE_MONITOR:
+ case NL80211_IFTYPE_AP_VLAN:
+ case NL80211_IFTYPE_WDS:
+ case NL80211_IFTYPE_P2P_DEVICE:
+ break;
+ case NL80211_IFTYPE_UNSPECIFIED:
+ case NUM_NL80211_IFTYPES:
+ WARN_ON(1);
+ }
+
+ return 0;
}
EXPORT_SYMBOL(cfg80211_chandef_dfs_required);
@@ -490,6 +519,62 @@ static bool cfg80211_chandef_dfs_available(struct wiphy *wiphy,
return r;
}
+static unsigned int cfg80211_get_chans_dfs_cac_time(struct wiphy *wiphy,
+ u32 center_freq,
+ u32 bandwidth)
+{
+ struct ieee80211_channel *c;
+ u32 start_freq, end_freq, freq;
+ unsigned int dfs_cac_ms = 0;
+
+ start_freq = cfg80211_get_start_freq(center_freq, bandwidth);
+ end_freq = cfg80211_get_end_freq(center_freq, bandwidth);
+
+ for (freq = start_freq; freq <= end_freq; freq += 20) {
+ c = ieee80211_get_channel(wiphy, freq);
+ if (!c)
+ return 0;
+
+ if (c->flags & IEEE80211_CHAN_DISABLED)
+ return 0;
+
+ if (!(c->flags & IEEE80211_CHAN_RADAR))
+ continue;
+
+ if (c->dfs_cac_ms > dfs_cac_ms)
+ dfs_cac_ms = c->dfs_cac_ms;
+ }
+
+ return dfs_cac_ms;
+}
+
+unsigned int
+cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy,
+ const struct cfg80211_chan_def *chandef)
+{
+ int width;
+ unsigned int t1 = 0, t2 = 0;
+
+ if (WARN_ON(!cfg80211_chandef_valid(chandef)))
+ return 0;
+
+ width = cfg80211_chandef_get_width(chandef);
+ if (width < 0)
+ return 0;
+
+ t1 = cfg80211_get_chans_dfs_cac_time(wiphy,
+ chandef->center_freq1,
+ width);
+
+ if (!chandef->center_freq2)
+ return t1;
+
+ t2 = cfg80211_get_chans_dfs_cac_time(wiphy,
+ chandef->center_freq2,
+ width);
+
+ return max(t1, t2);
+}
static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy,
u32 center_freq, u32 bandwidth,
@@ -531,12 +616,14 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
width = 5;
break;
case NL80211_CHAN_WIDTH_10:
+ prohibited_flags |= IEEE80211_CHAN_NO_10MHZ;
width = 10;
break;
case NL80211_CHAN_WIDTH_20:
if (!ht_cap->ht_supported)
return false;
case NL80211_CHAN_WIDTH_20_NOHT:
+ prohibited_flags |= IEEE80211_CHAN_NO_20MHZ;
width = 20;
break;
case NL80211_CHAN_WIDTH_40:
@@ -605,17 +692,111 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
}
EXPORT_SYMBOL(cfg80211_chandef_usable);
+/*
+ * For GO only, check if the channel can be used under permissive conditions
+ * mandated by the some regulatory bodies, i.e., the channel is marked with
+ * IEEE80211_CHAN_GO_CONCURRENT and there is an additional station interface
+ * associated to an AP on the same channel or on the same UNII band
+ * (assuming that the AP is an authorized master).
+ * In addition allow the GO to operate on a channel on which indoor operation is
+ * allowed, iff we are currently operating in an indoor environment.
+ */
+static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev,
+ struct ieee80211_channel *chan)
+{
+ struct wireless_dev *wdev_iter;
+ struct wiphy *wiphy = wiphy_idx_to_wiphy(rdev->wiphy_idx);
+
+ ASSERT_RTNL();
+
+ if (!config_enabled(CONFIG_CFG80211_REG_RELAX_NO_IR) ||
+ !(wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR))
+ return false;
+
+ if (regulatory_indoor_allowed() &&
+ (chan->flags & IEEE80211_CHAN_INDOOR_ONLY))
+ return true;
+
+ if (!(chan->flags & IEEE80211_CHAN_GO_CONCURRENT))
+ return false;
+
+ /*
+ * Generally, it is possible to rely on another device/driver to allow
+ * the GO concurrent relaxation, however, since the device can further
+ * enforce the relaxation (by doing a similar verifications as this),
+ * and thus fail the GO instantiation, consider only the interfaces of
+ * the current registered device.
+ */
+ list_for_each_entry(wdev_iter, &rdev->wdev_list, list) {
+ struct ieee80211_channel *other_chan = NULL;
+ int r1, r2;
+
+ if (wdev_iter->iftype != NL80211_IFTYPE_STATION ||
+ !netif_running(wdev_iter->netdev))
+ continue;
+
+ wdev_lock(wdev_iter);
+ if (wdev_iter->current_bss)
+ other_chan = wdev_iter->current_bss->pub.channel;
+ wdev_unlock(wdev_iter);
+
+ if (!other_chan)
+ continue;
+
+ if (chan == other_chan)
+ return true;
+
+ if (chan->band != IEEE80211_BAND_5GHZ)
+ continue;
+
+ r1 = cfg80211_get_unii(chan->center_freq);
+ r2 = cfg80211_get_unii(other_chan->center_freq);
+
+ if (r1 != -EINVAL && r1 == r2) {
+ /*
+ * At some locations channels 149-165 are considered a
+ * bundle, but at other locations, e.g., Indonesia,
+ * channels 149-161 are considered a bundle while
+ * channel 165 is left out and considered to be in a
+ * different bundle. Thus, in case that there is a
+ * station interface connected to an AP on channel 165,
+ * it is assumed that channels 149-161 are allowed for
+ * GO operations. However, having a station interface
+ * connected to an AP on channels 149-161, does not
+ * allow GO operation on channel 165.
+ */
+ if (chan->center_freq == 5825 &&
+ other_chan->center_freq != 5825)
+ continue;
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
- struct cfg80211_chan_def *chandef)
+ struct cfg80211_chan_def *chandef,
+ enum nl80211_iftype iftype)
{
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
bool res;
u32 prohibited_flags = IEEE80211_CHAN_DISABLED |
- IEEE80211_CHAN_NO_IR |
IEEE80211_CHAN_RADAR;
- trace_cfg80211_reg_can_beacon(wiphy, chandef);
+ trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype);
+
+ /*
+ * Under certain conditions suggested by the some regulatory bodies
+ * a GO can operate on channels marked with IEEE80211_NO_IR
+ * so set this flag only if such relaxations are not enabled and
+ * the conditions are not met.
+ */
+ if (iftype != NL80211_IFTYPE_P2P_GO ||
+ !cfg80211_go_permissive_chan(rdev, chandef->chan))
+ prohibited_flags |= IEEE80211_CHAN_NO_IR;
- if (cfg80211_chandef_dfs_required(wiphy, chandef) > 0 &&
+ if (cfg80211_chandef_dfs_required(wiphy, chandef, iftype) > 0 &&
cfg80211_chandef_dfs_available(wiphy, chandef)) {
/* We can skip IEEE80211_CHAN_NO_IR if chandef dfs available */
prohibited_flags = IEEE80211_CHAN_DISABLED;
@@ -642,8 +823,11 @@ int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev,
void
cfg80211_get_chan_state(struct wireless_dev *wdev,
struct ieee80211_channel **chan,
- enum cfg80211_chan_mode *chanmode)
+ enum cfg80211_chan_mode *chanmode,
+ u8 *radar_detect)
{
+ int ret;
+
*chan = NULL;
*chanmode = CHAN_MODE_UNDEFINED;
@@ -660,6 +844,11 @@ cfg80211_get_chan_state(struct wireless_dev *wdev,
!wdev->ibss_dfs_possible)
? CHAN_MODE_SHARED
: CHAN_MODE_EXCLUSIVE;
+
+ /* consider worst-case - IBSS can try to return to the
+ * original user-specified channel as creator */
+ if (wdev->ibss_dfs_possible)
+ *radar_detect |= BIT(wdev->chandef.width);
return;
}
break;
@@ -674,33 +863,42 @@ cfg80211_get_chan_state(struct wireless_dev *wdev,
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_P2P_GO:
if (wdev->cac_started) {
- *chan = wdev->channel;
+ *chan = wdev->chandef.chan;
*chanmode = CHAN_MODE_SHARED;
+ *radar_detect |= BIT(wdev->chandef.width);
} else if (wdev->beacon_interval) {
- *chan = wdev->channel;
+ *chan = wdev->chandef.chan;
*chanmode = CHAN_MODE_SHARED;
+
+ ret = cfg80211_chandef_dfs_required(wdev->wiphy,
+ &wdev->chandef,
+ wdev->iftype);
+ WARN_ON(ret < 0);
+ if (ret > 0)
+ *radar_detect |= BIT(wdev->chandef.width);
}
return;
case NL80211_IFTYPE_MESH_POINT:
if (wdev->mesh_id_len) {
- *chan = wdev->channel;
+ *chan = wdev->chandef.chan;
*chanmode = CHAN_MODE_SHARED;
+
+ ret = cfg80211_chandef_dfs_required(wdev->wiphy,
+ &wdev->chandef,
+ wdev->iftype);
+ WARN_ON(ret < 0);
+ if (ret > 0)
+ *radar_detect |= BIT(wdev->chandef.width);
}
return;
case NL80211_IFTYPE_MONITOR:
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_WDS:
- /* these interface types don't really have a channel */
- return;
case NL80211_IFTYPE_P2P_DEVICE:
- if (wdev->wiphy->features &
- NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL)
- *chanmode = CHAN_MODE_EXCLUSIVE;
+ /* these interface types don't really have a channel */
return;
case NL80211_IFTYPE_UNSPECIFIED:
case NUM_NL80211_IFTYPES:
WARN_ON(1);
}
-
- return;
}
diff --git a/net/wireless/core.c b/net/wireless/core.c
index a3bf18d1160..a1c40654dd9 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -69,7 +69,7 @@ struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx)
int get_wiphy_idx(struct wiphy *wiphy)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
return rdev->wiphy_idx;
}
@@ -130,7 +130,7 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
newname))
pr_err("failed to rename debugfs dir to %s!\n", newname);
- nl80211_notify_dev_rename(rdev);
+ nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
return 0;
}
@@ -210,15 +210,12 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
}
}
-static int cfg80211_rfkill_set_block(void *data, bool blocked)
+void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy)
{
- struct cfg80211_registered_device *rdev = data;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct wireless_dev *wdev;
- if (!blocked)
- return 0;
-
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(wdev, &rdev->wdev_list, list) {
if (wdev->netdev) {
@@ -234,7 +231,18 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked)
break;
}
}
+}
+EXPORT_SYMBOL_GPL(cfg80211_shutdown_all_interfaces);
+
+static int cfg80211_rfkill_set_block(void *data, bool blocked)
+{
+ struct cfg80211_registered_device *rdev = data;
+
+ if (!blocked)
+ return 0;
+ rtnl_lock();
+ cfg80211_shutdown_all_interfaces(&rdev->wiphy);
rtnl_unlock();
return 0;
@@ -260,6 +268,45 @@ static void cfg80211_event_work(struct work_struct *work)
rtnl_unlock();
}
+void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev)
+{
+ struct cfg80211_iface_destroy *item;
+
+ ASSERT_RTNL();
+
+ spin_lock_irq(&rdev->destroy_list_lock);
+ while ((item = list_first_entry_or_null(&rdev->destroy_list,
+ struct cfg80211_iface_destroy,
+ list))) {
+ struct wireless_dev *wdev, *tmp;
+ u32 nlportid = item->nlportid;
+
+ list_del(&item->list);
+ kfree(item);
+ spin_unlock_irq(&rdev->destroy_list_lock);
+
+ list_for_each_entry_safe(wdev, tmp, &rdev->wdev_list, list) {
+ if (nlportid == wdev->owner_nlportid)
+ rdev_del_virtual_intf(rdev, wdev);
+ }
+
+ spin_lock_irq(&rdev->destroy_list_lock);
+ }
+ spin_unlock_irq(&rdev->destroy_list_lock);
+}
+
+static void cfg80211_destroy_iface_wk(struct work_struct *work)
+{
+ struct cfg80211_registered_device *rdev;
+
+ rdev = container_of(work, struct cfg80211_registered_device,
+ destroy_work);
+
+ rtnl_lock();
+ cfg80211_destroy_ifaces(rdev);
+ rtnl_unlock();
+}
+
/* exported functions */
struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
@@ -318,6 +365,10 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
rdev->wiphy.dev.class = &ieee80211_class;
rdev->wiphy.dev.platform_data = rdev;
+ INIT_LIST_HEAD(&rdev->destroy_list);
+ spin_lock_init(&rdev->destroy_list_lock);
+ INIT_WORK(&rdev->destroy_work, cfg80211_destroy_iface_wk);
+
#ifdef CONFIG_CFG80211_DEFAULT_PS
rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT;
#endif
@@ -351,6 +402,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
rdev->wiphy.rts_threshold = (u32) -1;
rdev->wiphy.coverage_class = 0;
+ rdev->wiphy.max_num_csa_counters = 1;
+
return &rdev->wiphy;
}
EXPORT_SYMBOL(wiphy_new);
@@ -396,10 +449,7 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
for (j = 0; j < c->n_limits; j++) {
u16 types = c->limits[j].types;
- /*
- * interface types shouldn't overlap, this is
- * used in cfg80211_can_change_interface()
- */
+ /* interface types shouldn't overlap */
if (WARN_ON(types & all_iftypes))
return -EINVAL;
all_iftypes |= types;
@@ -435,7 +485,7 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
int wiphy_register(struct wiphy *wiphy)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
int res;
enum ieee80211_band band;
struct ieee80211_supported_band *sband;
@@ -610,13 +660,15 @@ int wiphy_register(struct wiphy *wiphy)
return res;
}
+ nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
+
return 0;
}
EXPORT_SYMBOL(wiphy_register);
void wiphy_rfkill_start_polling(struct wiphy *wiphy)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
if (!rdev->ops->rfkill_poll)
return;
@@ -627,7 +679,7 @@ EXPORT_SYMBOL(wiphy_rfkill_start_polling);
void wiphy_rfkill_stop_polling(struct wiphy *wiphy)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
rfkill_pause_polling(rdev->rfkill);
}
@@ -635,7 +687,7 @@ EXPORT_SYMBOL(wiphy_rfkill_stop_polling);
void wiphy_unregister(struct wiphy *wiphy)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
wait_event(rdev->dev_wait, ({
int __count;
@@ -648,9 +700,10 @@ void wiphy_unregister(struct wiphy *wiphy)
rfkill_unregister(rdev->rfkill);
rtnl_lock();
+ nl80211_notify_wiphy(rdev, NL80211_CMD_DEL_WIPHY);
rdev->wiphy.registered = false;
- BUG_ON(!list_empty(&rdev->wdev_list));
+ WARN_ON(!list_empty(&rdev->wdev_list));
/*
* First remove the hardware from everywhere, this makes
@@ -675,6 +728,7 @@ void wiphy_unregister(struct wiphy *wiphy)
cancel_work_sync(&rdev->conn_work);
flush_work(&rdev->event_work);
cancel_delayed_work_sync(&rdev->dfs_update_channels_wk);
+ flush_work(&rdev->destroy_work);
#ifdef CONFIG_PM
if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup)
@@ -707,7 +761,7 @@ EXPORT_SYMBOL(wiphy_free);
void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
if (rfkill_set_hw_state(rdev->rfkill, blocked))
schedule_work(&rdev->rfkill_sync);
@@ -716,7 +770,7 @@ EXPORT_SYMBOL(wiphy_rfkill_set_hw_state);
void cfg80211_unregister_wdev(struct wireless_dev *wdev)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
ASSERT_RTNL();
@@ -737,7 +791,7 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev)
}
EXPORT_SYMBOL(cfg80211_unregister_wdev);
-static struct device_type wiphy_type = {
+static const struct device_type wiphy_type = {
.name = "wlan",
};
@@ -751,23 +805,23 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
rdev->num_running_monitor_ifaces += num;
}
-void cfg80211_leave(struct cfg80211_registered_device *rdev,
- struct wireless_dev *wdev)
+void __cfg80211_leave(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev)
{
struct net_device *dev = wdev->netdev;
ASSERT_RTNL();
+ ASSERT_WDEV_LOCK(wdev);
switch (wdev->iftype) {
case NL80211_IFTYPE_ADHOC:
- cfg80211_leave_ibss(rdev, dev, true);
+ __cfg80211_leave_ibss(rdev, dev, true);
break;
case NL80211_IFTYPE_P2P_CLIENT:
case NL80211_IFTYPE_STATION:
if (rdev->sched_scan_req && dev == rdev->sched_scan_req->dev)
__cfg80211_stop_sched_scan(rdev, false);
- wdev_lock(wdev);
#ifdef CONFIG_CFG80211_WEXT
kfree(wdev->wext.ie);
wdev->wext.ie = NULL;
@@ -776,32 +830,60 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev,
#endif
cfg80211_disconnect(rdev, dev,
WLAN_REASON_DEAUTH_LEAVING, true);
- wdev_unlock(wdev);
break;
case NL80211_IFTYPE_MESH_POINT:
- cfg80211_leave_mesh(rdev, dev);
+ __cfg80211_leave_mesh(rdev, dev);
break;
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_P2P_GO:
- cfg80211_stop_ap(rdev, dev);
+ __cfg80211_stop_ap(rdev, dev, true);
break;
default:
break;
}
}
+void cfg80211_leave(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev)
+{
+ wdev_lock(wdev);
+ __cfg80211_leave(rdev, wdev);
+ wdev_unlock(wdev);
+}
+
+void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev,
+ gfp_t gfp)
+{
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+ struct cfg80211_event *ev;
+ unsigned long flags;
+
+ trace_cfg80211_stop_iface(wiphy, wdev);
+
+ ev = kzalloc(sizeof(*ev), gfp);
+ if (!ev)
+ return;
+
+ ev->type = EVENT_STOPPED;
+
+ spin_lock_irqsave(&wdev->event_lock, flags);
+ list_add_tail(&ev->list, &wdev->event_list);
+ spin_unlock_irqrestore(&wdev->event_lock, flags);
+ queue_work(cfg80211_wq, &rdev->event_work);
+}
+EXPORT_SYMBOL(cfg80211_stop_iface);
+
static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
unsigned long state, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev;
- int ret;
if (!wdev)
return NOTIFY_DONE;
- rdev = wiphy_to_dev(wdev->wiphy);
+ rdev = wiphy_to_rdev(wdev->wiphy);
WARN_ON(wdev->iftype == NL80211_IFTYPE_UNSPECIFIED);
@@ -959,13 +1041,14 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
case NETDEV_PRE_UP:
if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype)))
return notifier_from_errno(-EOPNOTSUPP);
- ret = cfg80211_can_add_interface(rdev, wdev->iftype);
- if (ret)
- return notifier_from_errno(ret);
+ if (rfkill_blocked(rdev->rfkill))
+ return notifier_from_errno(-ERFKILL);
break;
+ default:
+ return NOTIFY_DONE;
}
- return NOTIFY_DONE;
+ return NOTIFY_OK;
}
static struct notifier_block cfg80211_netdev_notifier = {
diff --git a/net/wireless/core.h b/net/wireless/core.h
index f1d193b557b..7e3a3cef7df 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -80,13 +80,17 @@ struct cfg80211_registered_device {
struct cfg80211_coalesce *coalesce;
+ spinlock_t destroy_list_lock;
+ struct list_head destroy_list;
+ struct work_struct destroy_work;
+
/* must be last because of the way we do wiphy_priv(),
* and it should at least be aligned to NETDEV_ALIGN */
struct wiphy wiphy __aligned(NETDEV_ALIGN);
};
static inline
-struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy)
+struct cfg80211_registered_device *wiphy_to_rdev(struct wiphy *wiphy)
{
BUG_ON(!wiphy);
return container_of(wiphy, struct cfg80211_registered_device, wiphy);
@@ -166,7 +170,6 @@ static inline void wdev_unlock(struct wireless_dev *wdev)
mutex_unlock(&wdev->mtx);
}
-#define ASSERT_RDEV_LOCK(rdev) ASSERT_RTNL()
#define ASSERT_WDEV_LOCK(wdev) lockdep_assert_held(&(wdev)->mtx)
static inline bool cfg80211_has_monitors_only(struct cfg80211_registered_device *rdev)
@@ -182,6 +185,7 @@ enum cfg80211_event_type {
EVENT_ROAMED,
EVENT_DISCONNECTED,
EVENT_IBSS_JOINED,
+ EVENT_STOPPED,
};
struct cfg80211_event {
@@ -211,6 +215,7 @@ struct cfg80211_event {
} dc;
struct {
u8 bssid[ETH_ALEN];
+ struct ieee80211_channel *channel;
} ij;
};
};
@@ -232,6 +237,13 @@ struct cfg80211_beacon_registration {
u32 nlportid;
};
+struct cfg80211_iface_destroy {
+ struct list_head list;
+ u32 nlportid;
+};
+
+void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev);
+
/* free object */
void cfg80211_dev_free(struct cfg80211_registered_device *rdev);
@@ -240,15 +252,11 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
void ieee80211_set_bitrate_flags(struct wiphy *wiphy);
-void cfg80211_bss_expire(struct cfg80211_registered_device *dev);
-void cfg80211_bss_age(struct cfg80211_registered_device *dev,
+void cfg80211_bss_expire(struct cfg80211_registered_device *rdev);
+void cfg80211_bss_age(struct cfg80211_registered_device *rdev,
unsigned long age_secs);
/* IBSS */
-int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
- struct net_device *dev,
- struct cfg80211_ibss_params *params,
- struct cfg80211_cached_keys *connkeys);
int cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct cfg80211_ibss_params *params,
@@ -258,7 +266,8 @@ int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
struct net_device *dev, bool nowext);
int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
struct net_device *dev, bool nowext);
-void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid);
+void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid,
+ struct ieee80211_channel *channel);
int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
@@ -273,6 +282,8 @@ int cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct mesh_setup *setup,
const struct mesh_config *conf);
+int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
+ struct net_device *dev);
int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
struct net_device *dev);
int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
@@ -280,8 +291,10 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
struct cfg80211_chan_def *chandef);
/* AP */
+int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
+ struct net_device *dev, bool notify);
int cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
- struct net_device *dev);
+ struct net_device *dev, bool notify);
/* MLME */
int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
@@ -400,35 +413,9 @@ void cfg80211_set_dfs_state(struct wiphy *wiphy,
void cfg80211_dfs_channels_update_work(struct work_struct *work);
-
-static inline int
-cfg80211_can_change_interface(struct cfg80211_registered_device *rdev,
- struct wireless_dev *wdev,
- enum nl80211_iftype iftype)
-{
- return cfg80211_can_use_iftype_chan(rdev, wdev, iftype, NULL,
- CHAN_MODE_UNDEFINED, 0);
-}
-
-static inline int
-cfg80211_can_add_interface(struct cfg80211_registered_device *rdev,
- enum nl80211_iftype iftype)
-{
- if (rfkill_blocked(rdev->rfkill))
- return -ERFKILL;
-
- return cfg80211_can_change_interface(rdev, NULL, iftype);
-}
-
-static inline int
-cfg80211_can_use_chan(struct cfg80211_registered_device *rdev,
- struct wireless_dev *wdev,
- struct ieee80211_channel *chan,
- enum cfg80211_chan_mode chanmode)
-{
- return cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
- chan, chanmode, 0);
-}
+unsigned int
+cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy,
+ const struct cfg80211_chan_def *chandef);
static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
{
@@ -437,13 +424,14 @@ static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
if (end >= start)
return jiffies_to_msecs(end - start);
- return jiffies_to_msecs(end + (MAX_JIFFY_OFFSET - start) + 1);
+ return jiffies_to_msecs(end + (ULONG_MAX - start) + 1);
}
void
cfg80211_get_chan_state(struct wireless_dev *wdev,
struct ieee80211_channel **chan,
- enum cfg80211_chan_mode *chanmode);
+ enum cfg80211_chan_mode *chanmode,
+ u8 *radar_detect);
int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev,
struct cfg80211_chan_def *chandef);
@@ -458,6 +446,8 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
enum nl80211_iftype iftype, int num);
+void __cfg80211_leave(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev);
void cfg80211_leave(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c
index e37862f1b12..d4860bfc020 100644
--- a/net/wireless/ethtool.c
+++ b/net/wireless/ethtool.c
@@ -43,7 +43,7 @@ static void cfg80211_get_ringparam(struct net_device *dev,
struct ethtool_ringparam *rp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
memset(rp, 0, sizeof(*rp));
@@ -56,7 +56,7 @@ static int cfg80211_set_ringparam(struct net_device *dev,
struct ethtool_ringparam *rp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
if (rp->rx_mini_pending != 0 || rp->rx_jumbo_pending != 0)
return -EINVAL;
@@ -70,7 +70,7 @@ static int cfg80211_set_ringparam(struct net_device *dev,
static int cfg80211_get_sset_count(struct net_device *dev, int sset)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
if (rdev->ops->get_et_sset_count)
return rdev_get_et_sset_count(rdev, dev, sset);
return -EOPNOTSUPP;
@@ -80,7 +80,7 @@ static void cfg80211_get_stats(struct net_device *dev,
struct ethtool_stats *stats, u64 *data)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
if (rdev->ops->get_et_stats)
rdev_get_et_stats(rdev, dev, stats, data);
}
@@ -88,7 +88,7 @@ static void cfg80211_get_stats(struct net_device *dev,
static void cfg80211_get_strings(struct net_device *dev, u32 sset, u8 *data)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
if (rdev->ops->get_et_strings)
rdev_get_et_strings(rdev, dev, sset, data);
}
diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk
index 9a8217d2a90..40c37fc5b67 100644
--- a/net/wireless/genregdb.awk
+++ b/net/wireless/genregdb.awk
@@ -66,19 +66,15 @@ function parse_reg_rule()
units = $8
sub(/\)/, "", units)
sub(/,/, "", units)
+ dfs_cac = $9
if (units == "mW") {
- if (power == 100) {
- power = 20
- } else if (power == 200) {
- power = 23
- } else if (power == 500) {
- power = 27
- } else if (power == 1000) {
- power = 30
- } else {
- print "Unknown power value in database!"
- }
+ power = 10 * log(power)/log(10)
+ } else {
+ dfs_cac = $8
}
+ sub(/,/, "", dfs_cac)
+ sub(/\(/, "", dfs_cac)
+ sub(/\)/, "", dfs_cac)
flagstr = ""
for (i=8; i<=NF; i++)
flagstr = flagstr $i
@@ -105,11 +101,13 @@ function parse_reg_rule()
flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
} else if (flagarray[arg] == "NO-IR") {
flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
+ } else if (flagarray[arg] == "AUTO-BW") {
+ flags = flags "\n\t\t\tNL80211_RRF_AUTO_BW | "
}
}
flags = flags "0"
- printf "\t\tREG_RULE(%d, %d, %d, %d, %d, %s),\n", start, end, bw, gain, power, flags
+ printf "\t\tREG_RULE_EXT(%d, %d, %d, %d, %.0f, %d, %s),\n", start, end, bw, gain, power, dfs_cac, flags
rules++
}
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index f911c5f9f90..8f345da3ea5 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -14,7 +14,8 @@
#include "rdev-ops.h"
-void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid)
+void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid,
+ struct ieee80211_channel *channel)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_bss *bss;
@@ -28,8 +29,7 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid)
if (!wdev->ssid_len)
return;
- bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid,
- wdev->ssid, wdev->ssid_len,
+ bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, NULL, 0,
WLAN_CAPABILITY_IBSS, WLAN_CAPABILITY_IBSS);
if (WARN_ON(!bss))
@@ -45,7 +45,7 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid)
cfg80211_upload_connect_keys(wdev);
- nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid,
+ nl80211_send_ibss_bssid(wiphy_to_rdev(wdev->wiphy), dev, bssid,
GFP_KERNEL);
#ifdef CONFIG_CFG80211_WEXT
memset(&wrqu, 0, sizeof(wrqu));
@@ -54,21 +54,26 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid)
#endif
}
-void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp)
+void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid,
+ struct ieee80211_channel *channel, gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_event *ev;
unsigned long flags;
- trace_cfg80211_ibss_joined(dev, bssid);
+ trace_cfg80211_ibss_joined(dev, bssid, channel);
+
+ if (WARN_ON(!channel))
+ return;
ev = kzalloc(sizeof(*ev), gfp);
if (!ev)
return;
ev->type = EVENT_IBSS_JOINED;
- memcpy(ev->cr.bssid, bssid, ETH_ALEN);
+ memcpy(ev->ij.bssid, bssid, ETH_ALEN);
+ ev->ij.channel = channel;
spin_lock_irqsave(&wdev->event_lock, flags);
list_add_tail(&ev->list, &wdev->event_list);
@@ -77,14 +82,12 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp)
}
EXPORT_SYMBOL(cfg80211_ibss_joined);
-int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
- struct net_device *dev,
- struct cfg80211_ibss_params *params,
- struct cfg80211_cached_keys *connkeys)
+static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct cfg80211_ibss_params *params,
+ struct cfg80211_cached_keys *connkeys)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct ieee80211_channel *check_chan;
- u8 radar_detect_width = 0;
int err;
ASSERT_WDEV_LOCK(wdev);
@@ -117,32 +120,10 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
wdev->ibss_fixed = params->channel_fixed;
wdev->ibss_dfs_possible = params->userspace_handles_dfs;
+ wdev->chandef = params->chandef;
#ifdef CONFIG_CFG80211_WEXT
wdev->wext.ibss.chandef = params->chandef;
#endif
- check_chan = params->chandef.chan;
- if (params->userspace_handles_dfs) {
- /* use channel NULL to check for radar even if the current
- * channel is not a radar channel - it might decide to change
- * to DFS channel later.
- */
- radar_detect_width = BIT(params->chandef.width);
- check_chan = NULL;
- }
-
- err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
- check_chan,
- (params->channel_fixed &&
- !radar_detect_width)
- ? CHAN_MODE_SHARED
- : CHAN_MODE_EXCLUSIVE,
- radar_detect_width);
-
- if (err) {
- wdev->connect_keys = NULL;
- return err;
- }
-
err = rdev_join_ibss(rdev, dev, params);
if (err) {
wdev->connect_keys = NULL;
@@ -175,7 +156,7 @@ int cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
int i;
ASSERT_WDEV_LOCK(wdev);
@@ -200,6 +181,7 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext)
wdev->current_bss = NULL;
wdev->ssid_len = 0;
+ memset(&wdev->chandef, 0, sizeof(wdev->chandef));
#ifdef CONFIG_CFG80211_WEXT
if (!nowext)
wdev->wext.ibss.ssid_len = 0;
@@ -329,7 +311,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
struct iw_freq *wextfreq, char *extra)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct ieee80211_channel *chan = NULL;
int err, freq;
@@ -340,7 +322,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
if (!rdev->ops->join_ibss)
return -EOPNOTSUPP;
- freq = cfg80211_wext_freq(wdev->wiphy, wextfreq);
+ freq = cfg80211_wext_freq(wextfreq);
if (freq < 0)
return freq;
@@ -414,7 +396,7 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev,
struct iw_point *data, char *ssid)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
size_t len = data->length;
int err;
@@ -438,8 +420,8 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev,
if (len > 0 && ssid[len - 1] == '\0')
len--;
+ memcpy(wdev->ssid, ssid, len);
wdev->wext.ibss.ssid = wdev->ssid;
- memcpy(wdev->wext.ibss.ssid, ssid, len);
wdev->wext.ibss.ssid_len = len;
wdev_lock(wdev);
@@ -481,7 +463,7 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev,
struct sockaddr *ap_addr, char *extra)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
u8 *bssid = ap_addr->sa_data;
int err;
@@ -499,6 +481,9 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev,
if (is_zero_ether_addr(bssid) || is_broadcast_ether_addr(bssid))
bssid = NULL;
+ if (bssid && !is_valid_ether_addr(bssid))
+ return -EINVAL;
+
/* both automatic */
if (!bssid && !wdev->wext.ibss.bssid)
return 0;
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 885862447b6..092300b30c3 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -99,7 +99,6 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
const struct mesh_config *conf)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- u8 radar_detect_width = 0;
int err;
BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN);
@@ -175,27 +174,15 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
scan_width);
}
- if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef))
+ if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef,
+ NL80211_IFTYPE_MESH_POINT))
return -EINVAL;
- err = cfg80211_chandef_dfs_required(wdev->wiphy, &setup->chandef);
- if (err < 0)
- return err;
- if (err)
- radar_detect_width = BIT(setup->chandef.width);
-
- err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
- setup->chandef.chan,
- CHAN_MODE_SHARED,
- radar_detect_width);
- if (err)
- return err;
-
err = rdev_join_mesh(rdev, dev, conf, setup);
if (!err) {
memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len);
wdev->mesh_id_len = setup->mesh_id_len;
- wdev->channel = setup->chandef.chan;
+ wdev->chandef = setup->chandef;
}
return err;
@@ -236,15 +223,10 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
if (!netif_running(wdev->netdev))
return -ENETDOWN;
- err = cfg80211_can_use_chan(rdev, wdev, chandef->chan,
- CHAN_MODE_SHARED);
- if (err)
- return err;
-
err = rdev_libertas_set_mesh_channel(rdev, wdev->netdev,
chandef->chan);
if (!err)
- wdev->channel = chandef->chan;
+ wdev->chandef = *chandef;
return err;
}
@@ -256,8 +238,8 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
return 0;
}
-static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
- struct net_device *dev)
+int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
+ struct net_device *dev)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
int err;
@@ -276,7 +258,7 @@ static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
err = rdev_leave_mesh(rdev, dev);
if (!err) {
wdev->mesh_id_len = 0;
- wdev->channel = NULL;
+ memset(&wdev->chandef, 0, sizeof(wdev->chandef));
rdev_set_qos_map(rdev, dev, NULL);
}
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 52cca05044a..266766b8d80 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -23,7 +23,7 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss,
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
u8 *ie = mgmt->u.assoc_resp.variable;
int ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable);
@@ -54,7 +54,7 @@ EXPORT_SYMBOL(cfg80211_rx_assoc_resp);
static void cfg80211_process_auth(struct wireless_dev *wdev,
const u8 *buf, size_t len)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
nl80211_send_rx_auth(rdev, wdev->netdev, buf, len, GFP_KERNEL);
cfg80211_sme_rx_auth(wdev, buf, len);
@@ -63,7 +63,7 @@ static void cfg80211_process_auth(struct wireless_dev *wdev,
static void cfg80211_process_deauth(struct wireless_dev *wdev,
const u8 *buf, size_t len)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
const u8 *bssid = mgmt->bssid;
u16 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code);
@@ -82,7 +82,7 @@ static void cfg80211_process_deauth(struct wireless_dev *wdev,
static void cfg80211_process_disassoc(struct wireless_dev *wdev,
const u8 *buf, size_t len)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
const u8 *bssid = mgmt->bssid;
u16 reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
@@ -123,7 +123,7 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
trace_cfg80211_send_auth_timeout(dev, addr);
@@ -136,7 +136,7 @@ void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
trace_cfg80211_send_assoc_timeout(dev, bss->bssid);
@@ -172,7 +172,7 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr,
const u8 *tsc, gfp_t gfp)
{
struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
#ifdef CONFIG_CFG80211_WEXT
union iwreq_data wrqu;
char *buf = kmalloc(128, gfp);
@@ -233,14 +233,8 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
if (!req.bss)
return -ENOENT;
- err = cfg80211_can_use_chan(rdev, wdev, req.bss->channel,
- CHAN_MODE_SHARED);
- if (err)
- goto out;
-
err = rdev_auth(rdev, dev, &req);
-out:
cfg80211_put_bss(&rdev->wiphy, req.bss);
return err;
}
@@ -306,16 +300,10 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
if (!req->bss)
return -ENOENT;
- err = cfg80211_can_use_chan(rdev, wdev, chan, CHAN_MODE_SHARED);
- if (err)
- goto out;
-
err = rdev_assoc(rdev, dev, req);
if (!err)
cfg80211_hold_bss(bss_from_pub(req->bss));
-
-out:
- if (err)
+ else
cfg80211_put_bss(&rdev->wiphy, req->bss);
return err;
@@ -414,7 +402,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
int match_len)
{
struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct cfg80211_mgmt_registration *reg, *nreg;
int err = 0;
u16 mgmt_type;
@@ -473,7 +461,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
{
struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct cfg80211_mgmt_registration *reg, *tmp;
spin_lock_bh(&wdev->mgmt_registrations_lock);
@@ -620,7 +608,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
const u8 *buf, size_t len, u32 flags, gfp_t gfp)
{
struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct cfg80211_mgmt_registration *reg;
const struct ieee80211_txrx_stypes *stypes =
&wiphy->mgmt_stypes[wdev->iftype];
@@ -739,7 +727,7 @@ void cfg80211_radar_event(struct wiphy *wiphy,
struct cfg80211_chan_def *chandef,
gfp_t gfp)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
unsigned long timeout;
trace_cfg80211_radar_event(wiphy, chandef);
@@ -764,7 +752,7 @@ void cfg80211_cac_event(struct net_device *netdev,
{
struct wireless_dev *wdev = netdev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
unsigned long timeout;
trace_cfg80211_cac_event(netdev, event);
@@ -772,13 +760,13 @@ void cfg80211_cac_event(struct net_device *netdev,
if (WARN_ON(!wdev->cac_started))
return;
- if (WARN_ON(!wdev->channel))
+ if (WARN_ON(!wdev->chandef.chan))
return;
switch (event) {
case NL80211_RADAR_CAC_FINISHED:
timeout = wdev->cac_start_time +
- msecs_to_jiffies(IEEE80211_DFS_MIN_CAC_TIME_MS);
+ msecs_to_jiffies(wdev->cac_time_ms);
WARN_ON(!time_after_eq(jiffies, timeout));
cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_AVAILABLE);
break;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 4fe2e6e2bc7..6668daf6932 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -168,8 +168,8 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs)
netdev = __dev_get_by_index(netns, ifindex);
if (netdev) {
if (netdev->ieee80211_ptr)
- tmp = wiphy_to_dev(
- netdev->ieee80211_ptr->wiphy);
+ tmp = wiphy_to_rdev(
+ netdev->ieee80211_ptr->wiphy);
else
tmp = NULL;
@@ -371,8 +371,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
[NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 },
[NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG },
[NL80211_ATTR_CSA_IES] = { .type = NLA_NESTED },
- [NL80211_ATTR_CSA_C_OFF_BEACON] = { .type = NLA_U16 },
- [NL80211_ATTR_CSA_C_OFF_PRESP] = { .type = NLA_U16 },
+ [NL80211_ATTR_CSA_C_OFF_BEACON] = { .type = NLA_BINARY },
+ [NL80211_ATTR_CSA_C_OFF_PRESP] = { .type = NLA_BINARY },
[NL80211_ATTR_STA_SUPPORTED_CHANNELS] = { .type = NLA_BINARY },
[NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES] = { .type = NLA_BINARY },
[NL80211_ATTR_HANDLE_DFS] = { .type = NLA_FLAG },
@@ -382,6 +382,11 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
[NL80211_ATTR_VENDOR_DATA] = { .type = NLA_BINARY },
[NL80211_ATTR_QOS_MAP] = { .type = NLA_BINARY,
.len = IEEE80211_QOS_MAP_LEN_MAX },
+ [NL80211_ATTR_MAC_HINT] = { .len = ETH_ALEN },
+ [NL80211_ATTR_WIPHY_FREQ_HINT] = { .type = NLA_U32 },
+ [NL80211_ATTR_TDLS_PEER_CAPABILITY] = { .type = NLA_U32 },
+ [NL80211_ATTR_IFACE_SOCKET_OWNER] = { .type = NLA_FLAG },
+ [NL80211_ATTR_CSA_C_OFFSETS_TX] = { .type = NLA_BINARY },
};
/* policy for the key attributes */
@@ -481,7 +486,7 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
err = PTR_ERR(*wdev);
goto out_unlock;
}
- *rdev = wiphy_to_dev((*wdev)->wiphy);
+ *rdev = wiphy_to_rdev((*wdev)->wiphy);
/* 0 is the first index - add 1 to parse only once */
cb->args[0] = (*rdev)->wiphy_idx + 1;
cb->args[1] = (*wdev)->identifier;
@@ -494,7 +499,7 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
err = -ENODEV;
goto out_unlock;
}
- *rdev = wiphy_to_dev(wiphy);
+ *rdev = wiphy_to_rdev(wiphy);
*wdev = NULL;
list_for_each_entry(tmp, &(*rdev)->wdev_list, list) {
@@ -563,6 +568,13 @@ static int nl80211_msg_put_channel(struct sk_buff *msg,
struct ieee80211_channel *chan,
bool large)
{
+ /* Some channels must be completely excluded from the
+ * list to protect old user-space tools from breaking
+ */
+ if (!large && chan->flags &
+ (IEEE80211_CHAN_NO_10MHZ | IEEE80211_CHAN_NO_20MHZ))
+ return 0;
+
if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_FREQ,
chan->center_freq))
goto nla_put_failure;
@@ -590,6 +602,10 @@ static int nl80211_msg_put_channel(struct sk_buff *msg,
if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_TIME,
time))
goto nla_put_failure;
+ if (nla_put_u32(msg,
+ NL80211_FREQUENCY_ATTR_DFS_CAC_TIME,
+ chan->dfs_cac_ms))
+ goto nla_put_failure;
}
}
@@ -606,6 +622,18 @@ static int nl80211_msg_put_channel(struct sk_buff *msg,
if ((chan->flags & IEEE80211_CHAN_NO_160MHZ) &&
nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_160MHZ))
goto nla_put_failure;
+ if ((chan->flags & IEEE80211_CHAN_INDOOR_ONLY) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_INDOOR_ONLY))
+ goto nla_put_failure;
+ if ((chan->flags & IEEE80211_CHAN_GO_CONCURRENT) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_GO_CONCURRENT))
+ goto nla_put_failure;
+ if ((chan->flags & IEEE80211_CHAN_NO_20MHZ) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_20MHZ))
+ goto nla_put_failure;
+ if ((chan->flags & IEEE80211_CHAN_NO_10MHZ) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_10MHZ))
+ goto nla_put_failure;
}
if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER,
@@ -855,6 +883,19 @@ static int nl80211_key_allowed(struct wireless_dev *wdev)
return 0;
}
+static struct ieee80211_channel *nl80211_get_valid_chan(struct wiphy *wiphy,
+ struct nlattr *tb)
+{
+ struct ieee80211_channel *chan;
+
+ if (tb == NULL)
+ return NULL;
+ chan = ieee80211_get_channel(wiphy, nla_get_u32(tb));
+ if (!chan || chan->flags & IEEE80211_CHAN_DISABLED)
+ return NULL;
+ return chan;
+}
+
static int nl80211_put_iftypes(struct sk_buff *msg, u32 attr, u16 ifmodes)
{
struct nlattr *nl_modes = nla_nest_start(msg, attr);
@@ -930,8 +971,10 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy,
c->max_interfaces))
goto nla_put_failure;
if (large &&
- nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS,
- c->radar_detect_widths))
+ (nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS,
+ c->radar_detect_widths) ||
+ nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS,
+ c->radar_detect_regions)))
goto nla_put_failure;
nla_nest_end(msg, nl_combi);
@@ -986,42 +1029,42 @@ static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev,
}
static int nl80211_send_wowlan(struct sk_buff *msg,
- struct cfg80211_registered_device *dev,
+ struct cfg80211_registered_device *rdev,
bool large)
{
struct nlattr *nl_wowlan;
- if (!dev->wiphy.wowlan)
+ if (!rdev->wiphy.wowlan)
return 0;
nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED);
if (!nl_wowlan)
return -ENOBUFS;
- if (((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_ANY) &&
+ if (((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_ANY) &&
nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) ||
- ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_DISCONNECT) &&
+ ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_DISCONNECT) &&
nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) ||
- ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT) &&
+ ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT) &&
nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) ||
- ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) &&
+ ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) &&
nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) ||
- ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) &&
+ ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) &&
nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) ||
- ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) &&
+ ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) &&
nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) ||
- ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) &&
+ ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) &&
nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) ||
- ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE) &&
+ ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE) &&
nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)))
return -ENOBUFS;
- if (dev->wiphy.wowlan->n_patterns) {
+ if (rdev->wiphy.wowlan->n_patterns) {
struct nl80211_pattern_support pat = {
- .max_patterns = dev->wiphy.wowlan->n_patterns,
- .min_pattern_len = dev->wiphy.wowlan->pattern_min_len,
- .max_pattern_len = dev->wiphy.wowlan->pattern_max_len,
- .max_pkt_offset = dev->wiphy.wowlan->max_pkt_offset,
+ .max_patterns = rdev->wiphy.wowlan->n_patterns,
+ .min_pattern_len = rdev->wiphy.wowlan->pattern_min_len,
+ .max_pattern_len = rdev->wiphy.wowlan->pattern_max_len,
+ .max_pkt_offset = rdev->wiphy.wowlan->max_pkt_offset,
};
if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN,
@@ -1029,7 +1072,7 @@ static int nl80211_send_wowlan(struct sk_buff *msg,
return -ENOBUFS;
}
- if (large && nl80211_send_wowlan_tcp_caps(dev, msg))
+ if (large && nl80211_send_wowlan_tcp_caps(rdev, msg))
return -ENOBUFS;
nla_nest_end(msg, nl_wowlan);
@@ -1039,19 +1082,19 @@ static int nl80211_send_wowlan(struct sk_buff *msg,
#endif
static int nl80211_send_coalesce(struct sk_buff *msg,
- struct cfg80211_registered_device *dev)
+ struct cfg80211_registered_device *rdev)
{
struct nl80211_coalesce_rule_support rule;
- if (!dev->wiphy.coalesce)
+ if (!rdev->wiphy.coalesce)
return 0;
- rule.max_rules = dev->wiphy.coalesce->n_rules;
- rule.max_delay = dev->wiphy.coalesce->max_delay;
- rule.pat.max_patterns = dev->wiphy.coalesce->n_patterns;
- rule.pat.min_pattern_len = dev->wiphy.coalesce->pattern_min_len;
- rule.pat.max_pattern_len = dev->wiphy.coalesce->pattern_max_len;
- rule.pat.max_pkt_offset = dev->wiphy.coalesce->max_pkt_offset;
+ rule.max_rules = rdev->wiphy.coalesce->n_rules;
+ rule.max_delay = rdev->wiphy.coalesce->max_delay;
+ rule.pat.max_patterns = rdev->wiphy.coalesce->n_patterns;
+ rule.pat.min_pattern_len = rdev->wiphy.coalesce->pattern_min_len;
+ rule.pat.max_pattern_len = rdev->wiphy.coalesce->pattern_max_len;
+ rule.pat.max_pkt_offset = rdev->wiphy.coalesce->max_pkt_offset;
if (nla_put(msg, NL80211_ATTR_COALESCE_RULE, sizeof(rule), &rule))
return -ENOBUFS;
@@ -1182,7 +1225,8 @@ struct nl80211_dump_wiphy_state {
bool split;
};
-static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
+static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
+ enum nl80211_commands cmd,
struct sk_buff *msg, u32 portid, u32 seq,
int flags, struct nl80211_dump_wiphy_state *state)
{
@@ -1194,63 +1238,66 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
struct ieee80211_channel *chan;
int i;
const struct ieee80211_txrx_stypes *mgmt_stypes =
- dev->wiphy.mgmt_stypes;
+ rdev->wiphy.mgmt_stypes;
u32 features;
- hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY);
+ hdr = nl80211hdr_put(msg, portid, seq, flags, cmd);
if (!hdr)
return -ENOBUFS;
if (WARN_ON(!state))
return -EINVAL;
- if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) ||
+ if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
nla_put_string(msg, NL80211_ATTR_WIPHY_NAME,
- wiphy_name(&dev->wiphy)) ||
+ wiphy_name(&rdev->wiphy)) ||
nla_put_u32(msg, NL80211_ATTR_GENERATION,
cfg80211_rdev_list_generation))
goto nla_put_failure;
+ if (cmd != NL80211_CMD_NEW_WIPHY)
+ goto finish;
+
switch (state->split_start) {
case 0:
if (nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT,
- dev->wiphy.retry_short) ||
+ rdev->wiphy.retry_short) ||
nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG,
- dev->wiphy.retry_long) ||
+ rdev->wiphy.retry_long) ||
nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
- dev->wiphy.frag_threshold) ||
+ rdev->wiphy.frag_threshold) ||
nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD,
- dev->wiphy.rts_threshold) ||
+ rdev->wiphy.rts_threshold) ||
nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS,
- dev->wiphy.coverage_class) ||
+ rdev->wiphy.coverage_class) ||
nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
- dev->wiphy.max_scan_ssids) ||
+ rdev->wiphy.max_scan_ssids) ||
nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS,
- dev->wiphy.max_sched_scan_ssids) ||
+ rdev->wiphy.max_sched_scan_ssids) ||
nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN,
- dev->wiphy.max_scan_ie_len) ||
+ rdev->wiphy.max_scan_ie_len) ||
nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN,
- dev->wiphy.max_sched_scan_ie_len) ||
+ rdev->wiphy.max_sched_scan_ie_len) ||
nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS,
- dev->wiphy.max_match_sets))
+ rdev->wiphy.max_match_sets))
goto nla_put_failure;
- if ((dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) &&
+ if ((rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) &&
nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN))
goto nla_put_failure;
- if ((dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) &&
+ if ((rdev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) &&
nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH))
goto nla_put_failure;
- if ((dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) &&
+ if ((rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) &&
nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD))
goto nla_put_failure;
- if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) &&
+ if ((rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) &&
nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT))
goto nla_put_failure;
- if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) &&
+ if ((rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) &&
nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT))
goto nla_put_failure;
- if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) &&
+ if ((rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) &&
nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP))
goto nla_put_failure;
state->split_start++;
@@ -1258,35 +1305,35 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
break;
case 1:
if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES,
- sizeof(u32) * dev->wiphy.n_cipher_suites,
- dev->wiphy.cipher_suites))
+ sizeof(u32) * rdev->wiphy.n_cipher_suites,
+ rdev->wiphy.cipher_suites))
goto nla_put_failure;
if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS,
- dev->wiphy.max_num_pmkids))
+ rdev->wiphy.max_num_pmkids))
goto nla_put_failure;
- if ((dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) &&
+ if ((rdev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) &&
nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE))
goto nla_put_failure;
if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX,
- dev->wiphy.available_antennas_tx) ||
+ rdev->wiphy.available_antennas_tx) ||
nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
- dev->wiphy.available_antennas_rx))
+ rdev->wiphy.available_antennas_rx))
goto nla_put_failure;
- if ((dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) &&
+ if ((rdev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) &&
nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD,
- dev->wiphy.probe_resp_offload))
+ rdev->wiphy.probe_resp_offload))
goto nla_put_failure;
- if ((dev->wiphy.available_antennas_tx ||
- dev->wiphy.available_antennas_rx) &&
- dev->ops->get_antenna) {
+ if ((rdev->wiphy.available_antennas_tx ||
+ rdev->wiphy.available_antennas_rx) &&
+ rdev->ops->get_antenna) {
u32 tx_ant = 0, rx_ant = 0;
int res;
- res = rdev_get_antenna(dev, &tx_ant, &rx_ant);
+ res = rdev_get_antenna(rdev, &tx_ant, &rx_ant);
if (!res) {
if (nla_put_u32(msg,
NL80211_ATTR_WIPHY_ANTENNA_TX,
@@ -1303,7 +1350,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
break;
case 2:
if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES,
- dev->wiphy.interface_modes))
+ rdev->wiphy.interface_modes))
goto nla_put_failure;
state->split_start++;
if (state->split)
@@ -1317,7 +1364,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
band < IEEE80211_NUM_BANDS; band++) {
struct ieee80211_supported_band *sband;
- sband = dev->wiphy.bands[band];
+ sband = rdev->wiphy.bands[band];
if (!sband)
continue;
@@ -1394,7 +1441,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
i = 0;
#define CMD(op, n) \
do { \
- if (dev->ops->op) { \
+ if (rdev->ops->op) { \
i++; \
if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \
goto nla_put_failure; \
@@ -1418,59 +1465,58 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
CMD(set_pmksa, SET_PMKSA);
CMD(del_pmksa, DEL_PMKSA);
CMD(flush_pmksa, FLUSH_PMKSA);
- if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)
+ if (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)
CMD(remain_on_channel, REMAIN_ON_CHANNEL);
CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
CMD(mgmt_tx, FRAME);
CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL);
- if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
+ if (rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
i++;
if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS))
goto nla_put_failure;
}
- if (dev->ops->set_monitor_channel || dev->ops->start_ap ||
- dev->ops->join_mesh) {
+ if (rdev->ops->set_monitor_channel || rdev->ops->start_ap ||
+ rdev->ops->join_mesh) {
i++;
if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL))
goto nla_put_failure;
}
CMD(set_wds_peer, SET_WDS_PEER);
- if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) {
+ if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) {
CMD(tdls_mgmt, TDLS_MGMT);
CMD(tdls_oper, TDLS_OPER);
}
- if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
+ if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
CMD(sched_scan_start, START_SCHED_SCAN);
CMD(probe_client, PROBE_CLIENT);
CMD(set_noack_map, SET_NOACK_MAP);
- if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
+ if (rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
i++;
if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS))
goto nla_put_failure;
}
CMD(start_p2p_device, START_P2P_DEVICE);
CMD(set_mcast_rate, SET_MCAST_RATE);
+#ifdef CONFIG_NL80211_TESTMODE
+ CMD(testmode_cmd, TESTMODE);
+#endif
if (state->split) {
CMD(crit_proto_start, CRIT_PROTOCOL_START);
CMD(crit_proto_stop, CRIT_PROTOCOL_STOP);
- if (dev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)
+ if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)
CMD(channel_switch, CHANNEL_SWITCH);
+ CMD(set_qos_map, SET_QOS_MAP);
}
- CMD(set_qos_map, SET_QOS_MAP);
-
-#ifdef CONFIG_NL80211_TESTMODE
- CMD(testmode_cmd, TESTMODE);
-#endif
-
+ /* add into the if now */
#undef CMD
- if (dev->ops->connect || dev->ops->auth) {
+ if (rdev->ops->connect || rdev->ops->auth) {
i++;
if (nla_put_u32(msg, i, NL80211_CMD_CONNECT))
goto nla_put_failure;
}
- if (dev->ops->disconnect || dev->ops->deauth) {
+ if (rdev->ops->disconnect || rdev->ops->deauth) {
i++;
if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT))
goto nla_put_failure;
@@ -1481,14 +1527,14 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
if (state->split)
break;
case 5:
- if (dev->ops->remain_on_channel &&
- (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) &&
+ if (rdev->ops->remain_on_channel &&
+ (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) &&
nla_put_u32(msg,
NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION,
- dev->wiphy.max_remain_on_channel_duration))
+ rdev->wiphy.max_remain_on_channel_duration))
goto nla_put_failure;
- if ((dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) &&
+ if ((rdev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) &&
nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK))
goto nla_put_failure;
@@ -1499,7 +1545,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
break;
case 6:
#ifdef CONFIG_PM
- if (nl80211_send_wowlan(msg, dev, state->split))
+ if (nl80211_send_wowlan(msg, rdev, state->split))
goto nla_put_failure;
state->split_start++;
if (state->split)
@@ -1509,10 +1555,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
#endif
case 7:
if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES,
- dev->wiphy.software_iftypes))
+ rdev->wiphy.software_iftypes))
goto nla_put_failure;
- if (nl80211_put_iface_combinations(&dev->wiphy, msg,
+ if (nl80211_put_iface_combinations(&rdev->wiphy, msg,
state->split))
goto nla_put_failure;
@@ -1520,12 +1566,12 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
if (state->split)
break;
case 8:
- if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) &&
+ if ((rdev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) &&
nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME,
- dev->wiphy.ap_sme_capa))
+ rdev->wiphy.ap_sme_capa))
goto nla_put_failure;
- features = dev->wiphy.features;
+ features = rdev->wiphy.features;
/*
* We can only add the per-channel limit information if the
* dump is split, otherwise it makes it too big. Therefore
@@ -1536,16 +1582,16 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, features))
goto nla_put_failure;
- if (dev->wiphy.ht_capa_mod_mask &&
+ if (rdev->wiphy.ht_capa_mod_mask &&
nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK,
- sizeof(*dev->wiphy.ht_capa_mod_mask),
- dev->wiphy.ht_capa_mod_mask))
+ sizeof(*rdev->wiphy.ht_capa_mod_mask),
+ rdev->wiphy.ht_capa_mod_mask))
goto nla_put_failure;
- if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME &&
- dev->wiphy.max_acl_mac_addrs &&
+ if (rdev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME &&
+ rdev->wiphy.max_acl_mac_addrs &&
nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX,
- dev->wiphy.max_acl_mac_addrs))
+ rdev->wiphy.max_acl_mac_addrs))
goto nla_put_failure;
/*
@@ -1561,35 +1607,41 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
state->split_start++;
break;
case 9:
- if (dev->wiphy.extended_capabilities &&
+ if (rdev->wiphy.extended_capabilities &&
(nla_put(msg, NL80211_ATTR_EXT_CAPA,
- dev->wiphy.extended_capabilities_len,
- dev->wiphy.extended_capabilities) ||
+ rdev->wiphy.extended_capabilities_len,
+ rdev->wiphy.extended_capabilities) ||
nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK,
- dev->wiphy.extended_capabilities_len,
- dev->wiphy.extended_capabilities_mask)))
+ rdev->wiphy.extended_capabilities_len,
+ rdev->wiphy.extended_capabilities_mask)))
goto nla_put_failure;
- if (dev->wiphy.vht_capa_mod_mask &&
+ if (rdev->wiphy.vht_capa_mod_mask &&
nla_put(msg, NL80211_ATTR_VHT_CAPABILITY_MASK,
- sizeof(*dev->wiphy.vht_capa_mod_mask),
- dev->wiphy.vht_capa_mod_mask))
+ sizeof(*rdev->wiphy.vht_capa_mod_mask),
+ rdev->wiphy.vht_capa_mod_mask))
goto nla_put_failure;
state->split_start++;
break;
case 10:
- if (nl80211_send_coalesce(msg, dev))
+ if (nl80211_send_coalesce(msg, rdev))
goto nla_put_failure;
- if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ) &&
+ if ((rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ) &&
(nla_put_flag(msg, NL80211_ATTR_SUPPORT_5_MHZ) ||
nla_put_flag(msg, NL80211_ATTR_SUPPORT_10_MHZ)))
goto nla_put_failure;
+
+ if (rdev->wiphy.max_ap_assoc_sta &&
+ nla_put_u32(msg, NL80211_ATTR_MAX_AP_ASSOC_STA,
+ rdev->wiphy.max_ap_assoc_sta))
+ goto nla_put_failure;
+
state->split_start++;
break;
case 11:
- if (dev->wiphy.n_vendor_commands) {
+ if (rdev->wiphy.n_vendor_commands) {
const struct nl80211_vendor_cmd_info *info;
struct nlattr *nested;
@@ -1597,15 +1649,15 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
if (!nested)
goto nla_put_failure;
- for (i = 0; i < dev->wiphy.n_vendor_commands; i++) {
- info = &dev->wiphy.vendor_commands[i].info;
+ for (i = 0; i < rdev->wiphy.n_vendor_commands; i++) {
+ info = &rdev->wiphy.vendor_commands[i].info;
if (nla_put(msg, i + 1, sizeof(*info), info))
goto nla_put_failure;
}
nla_nest_end(msg, nested);
}
- if (dev->wiphy.n_vendor_events) {
+ if (rdev->wiphy.n_vendor_events) {
const struct nl80211_vendor_cmd_info *info;
struct nlattr *nested;
@@ -1614,18 +1666,26 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
if (!nested)
goto nla_put_failure;
- for (i = 0; i < dev->wiphy.n_vendor_events; i++) {
- info = &dev->wiphy.vendor_events[i];
+ for (i = 0; i < rdev->wiphy.n_vendor_events; i++) {
+ info = &rdev->wiphy.vendor_events[i];
if (nla_put(msg, i + 1, sizeof(*info), info))
goto nla_put_failure;
}
nla_nest_end(msg, nested);
}
+ state->split_start++;
+ break;
+ case 12:
+ if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH &&
+ nla_put_u8(msg, NL80211_ATTR_MAX_CSA_COUNTERS,
+ rdev->wiphy.max_num_csa_counters))
+ goto nla_put_failure;
/* done */
state->split_start = 0;
break;
}
+ finish:
return genlmsg_end(msg, hdr);
nla_put_failure:
@@ -1658,7 +1718,7 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb,
if (!netdev)
return -ENODEV;
if (netdev->ieee80211_ptr) {
- rdev = wiphy_to_dev(
+ rdev = wiphy_to_rdev(
netdev->ieee80211_ptr->wiphy);
state->filter_wiphy = rdev->wiphy_idx;
}
@@ -1671,7 +1731,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
{
int idx = 0, ret;
struct nl80211_dump_wiphy_state *state = (void *)cb->args[0];
- struct cfg80211_registered_device *dev;
+ struct cfg80211_registered_device *rdev;
rtnl_lock();
if (!state) {
@@ -1690,17 +1750,18 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[0] = (long)state;
}
- list_for_each_entry(dev, &cfg80211_rdev_list, list) {
- if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk)))
+ list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk)))
continue;
if (++idx <= state->start)
continue;
if (state->filter_wiphy != -1 &&
- state->filter_wiphy != dev->wiphy_idx)
+ state->filter_wiphy != rdev->wiphy_idx)
continue;
/* attempt to fit multiple wiphy data chunks into the skb */
do {
- ret = nl80211_send_wiphy(dev, skb,
+ ret = nl80211_send_wiphy(rdev, NL80211_CMD_NEW_WIPHY,
+ skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI, state);
@@ -1748,14 +1809,15 @@ static int nl80211_dump_wiphy_done(struct netlink_callback *cb)
static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info)
{
struct sk_buff *msg;
- struct cfg80211_registered_device *dev = info->user_ptr[0];
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct nl80211_dump_wiphy_state state = {};
msg = nlmsg_new(4096, GFP_KERNEL);
if (!msg)
return -ENOMEM;
- if (nl80211_send_wiphy(dev, msg, info->snd_portid, info->snd_seq, 0,
+ if (nl80211_send_wiphy(rdev, NL80211_CMD_NEW_WIPHY, msg,
+ info->snd_portid, info->snd_seq, 0,
&state) < 0) {
nlmsg_free(msg);
return -ENOBUFS;
@@ -1882,18 +1944,20 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
}
static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
- struct wireless_dev *wdev,
+ struct net_device *dev,
struct genl_info *info)
{
struct cfg80211_chan_def chandef;
int result;
enum nl80211_iftype iftype = NL80211_IFTYPE_MONITOR;
+ struct wireless_dev *wdev = NULL;
- if (wdev)
- iftype = wdev->iftype;
-
+ if (dev)
+ wdev = dev->ieee80211_ptr;
if (!nl80211_can_set_dev_channel(wdev))
return -EOPNOTSUPP;
+ if (wdev)
+ iftype = wdev->iftype;
result = nl80211_parse_chandef(rdev, info, &chandef);
if (result)
@@ -1902,14 +1966,27 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
switch (iftype) {
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_P2P_GO:
- if (wdev->beacon_interval) {
- result = -EBUSY;
- break;
- }
- if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef)) {
+ if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef, iftype)) {
result = -EINVAL;
break;
}
+ if (wdev->beacon_interval) {
+ if (!dev || !rdev->ops->set_ap_chanwidth ||
+ !(rdev->wiphy.features &
+ NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE)) {
+ result = -EBUSY;
+ break;
+ }
+
+ /* Only allow dynamic channel width changes */
+ if (chandef.chan != wdev->preset_chandef.chan) {
+ result = -EBUSY;
+ break;
+ }
+ result = rdev_set_ap_chanwidth(rdev, dev, &chandef);
+ if (result)
+ break;
+ }
wdev->preset_chandef = chandef;
result = 0;
break;
@@ -1931,7 +2008,7 @@ static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info)
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *netdev = info->user_ptr[1];
- return __nl80211_set_channel(rdev, netdev->ieee80211_ptr, info);
+ return __nl80211_set_channel(rdev, netdev, info);
}
static int nl80211_set_wds_peer(struct sk_buff *skb, struct genl_info *info)
@@ -1987,7 +2064,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
netdev = __dev_get_by_index(genl_info_net(info), ifindex);
if (netdev && netdev->ieee80211_ptr)
- rdev = wiphy_to_dev(netdev->ieee80211_ptr->wiphy);
+ rdev = wiphy_to_rdev(netdev->ieee80211_ptr->wiphy);
else
netdev = NULL;
}
@@ -2035,10 +2112,12 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
nla_for_each_nested(nl_txq_params,
info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS],
rem_txq_params) {
- nla_parse(tb, NL80211_TXQ_ATTR_MAX,
- nla_data(nl_txq_params),
- nla_len(nl_txq_params),
- txq_params_policy);
+ result = nla_parse(tb, NL80211_TXQ_ATTR_MAX,
+ nla_data(nl_txq_params),
+ nla_len(nl_txq_params),
+ txq_params_policy);
+ if (result)
+ return result;
result = parse_txq_params(tb, &txq_params);
if (result)
return result;
@@ -2051,9 +2130,10 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
}
if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
- result = __nl80211_set_channel(rdev,
- nl80211_can_set_dev_channel(wdev) ? wdev : NULL,
- info);
+ result = __nl80211_set_channel(
+ rdev,
+ nl80211_can_set_dev_channel(wdev) ? netdev : NULL,
+ info);
if (result)
return result;
}
@@ -2201,7 +2281,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
static inline u64 wdev_id(struct wireless_dev *wdev)
{
return (u64)wdev->identifier |
- ((u64)wiphy_to_dev(wdev->wiphy)->wiphy_idx << 32);
+ ((u64)wiphy_to_rdev(wdev->wiphy)->wiphy_idx << 32);
}
static int nl80211_send_chandef(struct sk_buff *msg,
@@ -2327,7 +2407,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info)
{
struct sk_buff *msg;
- struct cfg80211_registered_device *dev = info->user_ptr[0];
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct wireless_dev *wdev = info->user_ptr[1];
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -2335,7 +2415,7 @@ static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0,
- dev, wdev) < 0) {
+ rdev, wdev) < 0) {
nlmsg_free(msg);
return -ENOBUFS;
}
@@ -2486,6 +2566,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED;
u32 flags;
+ /* to avoid failing a new interface creation due to pending removal */
+ cfg80211_destroy_ifaces(rdev);
+
memset(&params, 0, sizeof(params));
if (!info->attrs[NL80211_ATTR_IFNAME])
@@ -2535,6 +2618,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
return PTR_ERR(wdev);
}
+ if (info->attrs[NL80211_ATTR_IFACE_SOCKET_OWNER])
+ wdev->owner_nlportid = info->snd_portid;
+
switch (type) {
case NL80211_IFTYPE_MESH_POINT:
if (!info->attrs[NL80211_ATTR_MESH_ID])
@@ -3114,7 +3200,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_ap_settings params;
int err;
- u8 radar_detect_width = 0;
if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
@@ -3230,24 +3315,10 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
} else if (!nl80211_get_ap_channel(rdev, &params))
return -EINVAL;
- if (!cfg80211_reg_can_beacon(&rdev->wiphy, &params.chandef))
+ if (!cfg80211_reg_can_beacon(&rdev->wiphy, &params.chandef,
+ wdev->iftype))
return -EINVAL;
- err = cfg80211_chandef_dfs_required(wdev->wiphy, &params.chandef);
- if (err < 0)
- return err;
- if (err) {
- radar_detect_width = BIT(params.chandef.width);
- params.radar_required = true;
- }
-
- err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
- params.chandef.chan,
- CHAN_MODE_SHARED,
- radar_detect_width);
- if (err)
- return err;
-
if (info->attrs[NL80211_ATTR_ACL_POLICY]) {
params.acl = parse_acl_data(&rdev->wiphy, info);
if (IS_ERR(params.acl))
@@ -3259,7 +3330,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
if (!err) {
wdev->preset_chandef = params.chandef;
wdev->beacon_interval = params.beacon_interval;
- wdev->channel = params.chandef.chan;
+ wdev->chandef = params.chandef;
wdev->ssid_len = params.ssid_len;
memcpy(wdev->ssid, params.ssid, wdev->ssid_len);
}
@@ -3304,7 +3375,7 @@ static int nl80211_stop_ap(struct sk_buff *skb, struct genl_info *info)
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
- return cfg80211_stop_ap(rdev, dev);
+ return cfg80211_stop_ap(rdev, dev, false);
}
static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = {
@@ -3585,6 +3656,10 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq,
nla_put_u32(msg, NL80211_STA_INFO_TX_FAILED,
sinfo->tx_failed))
goto nla_put_failure;
+ if ((sinfo->filled & STATION_INFO_EXPECTED_THROUGHPUT) &&
+ nla_put_u32(msg, NL80211_STA_INFO_EXPECTED_THROUGHPUT,
+ sinfo->expected_throughput))
+ goto nla_put_failure;
if ((sinfo->filled & STATION_INFO_BEACON_LOSS_COUNT) &&
nla_put_u32(msg, NL80211_STA_INFO_BEACON_LOSS,
sinfo->beacon_loss_count))
@@ -3647,13 +3722,13 @@ static int nl80211_dump_station(struct sk_buff *skb,
struct netlink_callback *cb)
{
struct station_info sinfo;
- struct cfg80211_registered_device *dev;
+ struct cfg80211_registered_device *rdev;
struct wireless_dev *wdev;
u8 mac_addr[ETH_ALEN];
int sta_idx = cb->args[2];
int err;
- err = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev);
+ err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
if (err)
return err;
@@ -3662,14 +3737,14 @@ static int nl80211_dump_station(struct sk_buff *skb,
goto out_err;
}
- if (!dev->ops->dump_station) {
+ if (!rdev->ops->dump_station) {
err = -EOPNOTSUPP;
goto out_err;
}
while (1) {
memset(&sinfo, 0, sizeof(sinfo));
- err = rdev_dump_station(dev, wdev->netdev, sta_idx,
+ err = rdev_dump_station(rdev, wdev->netdev, sta_idx,
mac_addr, &sinfo);
if (err == -ENOENT)
break;
@@ -3679,7 +3754,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
if (nl80211_send_station(skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- dev, wdev->netdev, mac_addr,
+ rdev, wdev->netdev, mac_addr,
&sinfo) < 0)
goto out;
@@ -3691,7 +3766,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
cb->args[2] = sta_idx;
err = skb->len;
out_err:
- nl80211_finish_wdev_dump(dev);
+ nl80211_finish_wdev_dump(rdev);
return err;
}
@@ -3902,8 +3977,8 @@ static struct net_device *get_vlan(struct genl_info *info,
return ERR_PTR(ret);
}
-static struct nla_policy
-nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] __read_mostly = {
+static const struct nla_policy
+nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] = {
[NL80211_STA_WME_UAPSD_QUEUES] = { .type = NLA_U8 },
[NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 },
};
@@ -4352,18 +4427,18 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
struct netlink_callback *cb)
{
struct mpath_info pinfo;
- struct cfg80211_registered_device *dev;
+ struct cfg80211_registered_device *rdev;
struct wireless_dev *wdev;
u8 dst[ETH_ALEN];
u8 next_hop[ETH_ALEN];
int path_idx = cb->args[2];
int err;
- err = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev);
+ err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
if (err)
return err;
- if (!dev->ops->dump_mpath) {
+ if (!rdev->ops->dump_mpath) {
err = -EOPNOTSUPP;
goto out_err;
}
@@ -4374,7 +4449,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
}
while (1) {
- err = rdev_dump_mpath(dev, wdev->netdev, path_idx, dst,
+ err = rdev_dump_mpath(rdev, wdev->netdev, path_idx, dst,
next_hop, &pinfo);
if (err == -ENOENT)
break;
@@ -4395,7 +4470,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
cb->args[2] = path_idx;
err = skb->len;
out_err:
- nl80211_finish_wdev_dump(dev);
+ nl80211_finish_wdev_dump(rdev);
return err;
}
@@ -4590,6 +4665,7 @@ static const struct nla_policy reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] =
[NL80211_ATTR_FREQ_RANGE_MAX_BW] = { .type = NLA_U32 },
[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN] = { .type = NLA_U32 },
[NL80211_ATTR_POWER_RULE_MAX_EIRP] = { .type = NLA_U32 },
+ [NL80211_ATTR_DFS_CAC_TIME] = { .type = NLA_U32 },
};
static int parse_reg_rule(struct nlattr *tb[],
@@ -4625,12 +4701,15 @@ static int parse_reg_rule(struct nlattr *tb[],
power_rule->max_antenna_gain =
nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]);
+ if (tb[NL80211_ATTR_DFS_CAC_TIME])
+ reg_rule->dfs_cac_ms =
+ nla_get_u32(tb[NL80211_ATTR_DFS_CAC_TIME]);
+
return 0;
}
static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
{
- int r;
char *data = NULL;
enum nl80211_user_reg_hint_type user_reg_hint_type;
@@ -4643,11 +4722,6 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
if (unlikely(!rcu_access_pointer(cfg80211_regdomain)))
return -EINPROGRESS;
- if (!info->attrs[NL80211_ATTR_REG_ALPHA2])
- return -EINVAL;
-
- data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]);
-
if (info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE])
user_reg_hint_type =
nla_get_u32(info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE]);
@@ -4657,14 +4731,16 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
switch (user_reg_hint_type) {
case NL80211_USER_REG_HINT_USER:
case NL80211_USER_REG_HINT_CELL_BASE:
- break;
+ if (!info->attrs[NL80211_ATTR_REG_ALPHA2])
+ return -EINVAL;
+
+ data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]);
+ return regulatory_hint_user(data, user_reg_hint_type);
+ case NL80211_USER_REG_HINT_INDOOR:
+ return regulatory_hint_indoor_user();
default:
return -EINVAL;
}
-
- r = regulatory_hint_user(data, user_reg_hint_type);
-
- return r;
}
static int nl80211_get_mesh_config(struct sk_buff *skb,
@@ -5086,6 +5162,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
const struct ieee80211_reg_rule *reg_rule;
const struct ieee80211_freq_range *freq_range;
const struct ieee80211_power_rule *power_rule;
+ unsigned int max_bandwidth_khz;
reg_rule = &regdom->reg_rules[i];
freq_range = &reg_rule->freq_range;
@@ -5095,6 +5172,11 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
if (!nl_reg_rule)
goto nla_put_failure_rcu;
+ max_bandwidth_khz = freq_range->max_bandwidth_khz;
+ if (!max_bandwidth_khz)
+ max_bandwidth_khz = reg_get_max_bandwidth(regdom,
+ reg_rule);
+
if (nla_put_u32(msg, NL80211_ATTR_REG_RULE_FLAGS,
reg_rule->flags) ||
nla_put_u32(msg, NL80211_ATTR_FREQ_RANGE_START,
@@ -5102,11 +5184,13 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
nla_put_u32(msg, NL80211_ATTR_FREQ_RANGE_END,
freq_range->end_freq_khz) ||
nla_put_u32(msg, NL80211_ATTR_FREQ_RANGE_MAX_BW,
- freq_range->max_bandwidth_khz) ||
+ max_bandwidth_khz) ||
nla_put_u32(msg, NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN,
power_rule->max_antenna_gain) ||
nla_put_u32(msg, NL80211_ATTR_POWER_RULE_MAX_EIRP,
- power_rule->max_eirp))
+ power_rule->max_eirp) ||
+ nla_put_u32(msg, NL80211_ATTR_DFS_CAC_TIME,
+ reg_rule->dfs_cac_ms))
goto nla_put_failure_rcu;
nla_nest_end(msg, nl_reg_rule);
@@ -5178,9 +5262,11 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES],
rem_reg_rules) {
- nla_parse(tb, NL80211_REG_RULE_ATTR_MAX,
- nla_data(nl_reg_rule), nla_len(nl_reg_rule),
- reg_rule_policy);
+ r = nla_parse(tb, NL80211_REG_RULE_ATTR_MAX,
+ nla_data(nl_reg_rule), nla_len(nl_reg_rule),
+ reg_rule_policy);
+ if (r)
+ goto bad_reg;
r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]);
if (r)
goto bad_reg;
@@ -5443,6 +5529,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
enum ieee80211_band band;
size_t ie_len;
struct nlattr *tb[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1];
+ s32 default_match_rssi = NL80211_SCAN_RSSI_THOLD_OFF;
if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) ||
!rdev->ops->sched_scan_start)
@@ -5477,11 +5564,40 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
if (n_ssids > wiphy->max_sched_scan_ssids)
return -EINVAL;
- if (info->attrs[NL80211_ATTR_SCHED_SCAN_MATCH])
+ /*
+ * First, count the number of 'real' matchsets. Due to an issue with
+ * the old implementation, matchsets containing only the RSSI attribute
+ * (NL80211_SCHED_SCAN_MATCH_ATTR_RSSI) are considered as the 'default'
+ * RSSI for all matchsets, rather than their own matchset for reporting
+ * all APs with a strong RSSI. This is needed to be compatible with
+ * older userspace that treated a matchset with only the RSSI as the
+ * global RSSI for all other matchsets - if there are other matchsets.
+ */
+ if (info->attrs[NL80211_ATTR_SCHED_SCAN_MATCH]) {
nla_for_each_nested(attr,
info->attrs[NL80211_ATTR_SCHED_SCAN_MATCH],
- tmp)
- n_match_sets++;
+ tmp) {
+ struct nlattr *rssi;
+
+ err = nla_parse(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
+ nla_data(attr), nla_len(attr),
+ nl80211_match_policy);
+ if (err)
+ return err;
+ /* add other standalone attributes here */
+ if (tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID]) {
+ n_match_sets++;
+ continue;
+ }
+ rssi = tb[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI];
+ if (rssi)
+ default_match_rssi = nla_get_s32(rssi);
+ }
+ }
+
+ /* However, if there's no other matchset, add the RSSI one */
+ if (!n_match_sets && default_match_rssi != NL80211_SCAN_RSSI_THOLD_OFF)
+ n_match_sets = 1;
if (n_match_sets > wiphy->max_match_sets)
return -EINVAL;
@@ -5602,11 +5718,22 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
tmp) {
struct nlattr *ssid, *rssi;
- nla_parse(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
- nla_data(attr), nla_len(attr),
- nl80211_match_policy);
+ err = nla_parse(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
+ nla_data(attr), nla_len(attr),
+ nl80211_match_policy);
+ if (err)
+ goto out_free;
ssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID];
if (ssid) {
+ if (WARN_ON(i >= n_match_sets)) {
+ /* this indicates a programming error,
+ * the loop above should have verified
+ * things properly
+ */
+ err = -EINVAL;
+ goto out_free;
+ }
+
if (nla_len(ssid) > IEEE80211_MAX_SSID_LEN) {
err = -EINVAL;
goto out_free;
@@ -5615,19 +5742,32 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
nla_data(ssid), nla_len(ssid));
request->match_sets[i].ssid.ssid_len =
nla_len(ssid);
+ /* special attribute - old implemenation w/a */
+ request->match_sets[i].rssi_thold =
+ default_match_rssi;
+ rssi = tb[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI];
+ if (rssi)
+ request->match_sets[i].rssi_thold =
+ nla_get_s32(rssi);
}
- rssi = tb[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI];
- if (rssi)
- request->rssi_thold = nla_get_u32(rssi);
- else
- request->rssi_thold =
- NL80211_SCAN_RSSI_THOLD_OFF;
i++;
}
+
+ /* there was no other matchset, so the RSSI one is alone */
+ if (i == 0)
+ request->match_sets[0].rssi_thold = default_match_rssi;
+
+ request->min_rssi_thold = INT_MAX;
+ for (i = 0; i < n_match_sets; i++)
+ request->min_rssi_thold =
+ min(request->match_sets[i].rssi_thold,
+ request->min_rssi_thold);
+ } else {
+ request->min_rssi_thold = NL80211_SCAN_RSSI_THOLD_OFF;
}
- if (info->attrs[NL80211_ATTR_IE]) {
- request->ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+ if (ie_len) {
+ request->ie_len = ie_len;
memcpy((void *)request->ie,
nla_data(info->attrs[NL80211_ATTR_IE]),
request->ie_len);
@@ -5682,6 +5822,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_chan_def chandef;
enum nl80211_dfs_regions dfs_region;
+ unsigned int cac_time_ms;
int err;
dfs_region = reg_get_dfs_region(wdev->wiphy);
@@ -5698,7 +5839,8 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
if (wdev->cac_started)
return -EBUSY;
- err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef);
+ err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef,
+ wdev->iftype);
if (err < 0)
return err;
@@ -5711,17 +5853,17 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
if (!rdev->ops->start_radar_detection)
return -EOPNOTSUPP;
- err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
- chandef.chan, CHAN_MODE_SHARED,
- BIT(chandef.width));
- if (err)
- return err;
+ cac_time_ms = cfg80211_chandef_dfs_cac_time(&rdev->wiphy, &chandef);
+ if (WARN_ON(!cac_time_ms))
+ cac_time_ms = IEEE80211_DFS_MIN_CAC_TIME_MS;
- err = rdev->ops->start_radar_detection(&rdev->wiphy, dev, &chandef);
+ err = rdev->ops->start_radar_detection(&rdev->wiphy, dev, &chandef,
+ cac_time_ms);
if (!err) {
- wdev->channel = chandef.chan;
+ wdev->chandef = chandef;
wdev->cac_started = true;
wdev->cac_start_time = jiffies;
+ wdev->cac_time_ms = cac_time_ms;
}
return err;
}
@@ -5739,6 +5881,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
u8 radar_detect_width = 0;
int err;
bool need_new_beacon = false;
+ int len, i;
if (!rdev->ops->channel_switch ||
!(rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH))
@@ -5751,10 +5894,15 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
/* useless if AP is not running */
if (!wdev->beacon_interval)
- return -EINVAL;
+ return -ENOTCONN;
break;
case NL80211_IFTYPE_ADHOC:
+ if (!wdev->ssid_len)
+ return -ENOTCONN;
+ break;
case NL80211_IFTYPE_MESH_POINT:
+ if (!wdev->mesh_id_len)
+ return -ENOTCONN;
break;
default:
return -EOPNOTSUPP;
@@ -5792,26 +5940,55 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
if (!csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON])
return -EINVAL;
- params.counter_offset_beacon =
- nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]);
- if (params.counter_offset_beacon >= params.beacon_csa.tail_len)
+ len = nla_len(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]);
+ if (!len || (len % sizeof(u16)))
return -EINVAL;
- /* sanity check - counters should be the same */
- if (params.beacon_csa.tail[params.counter_offset_beacon] !=
- params.count)
+ params.n_counter_offsets_beacon = len / sizeof(u16);
+ if (rdev->wiphy.max_num_csa_counters &&
+ (params.n_counter_offsets_beacon >
+ rdev->wiphy.max_num_csa_counters))
return -EINVAL;
+ params.counter_offsets_beacon =
+ nla_data(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]);
+
+ /* sanity checks - counters should fit and be the same */
+ for (i = 0; i < params.n_counter_offsets_beacon; i++) {
+ u16 offset = params.counter_offsets_beacon[i];
+
+ if (offset >= params.beacon_csa.tail_len)
+ return -EINVAL;
+
+ if (params.beacon_csa.tail[offset] != params.count)
+ return -EINVAL;
+ }
+
if (csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]) {
- params.counter_offset_presp =
- nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]);
- if (params.counter_offset_presp >=
- params.beacon_csa.probe_resp_len)
+ len = nla_len(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]);
+ if (!len || (len % sizeof(u16)))
return -EINVAL;
- if (params.beacon_csa.probe_resp[params.counter_offset_presp] !=
- params.count)
+ params.n_counter_offsets_presp = len / sizeof(u16);
+ if (rdev->wiphy.max_num_csa_counters &&
+ (params.n_counter_offsets_beacon >
+ rdev->wiphy.max_num_csa_counters))
return -EINVAL;
+
+ params.counter_offsets_presp =
+ nla_data(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]);
+
+ /* sanity checks - counters should fit and be the same */
+ for (i = 0; i < params.n_counter_offsets_presp; i++) {
+ u16 offset = params.counter_offsets_presp[i];
+
+ if (offset >= params.beacon_csa.probe_resp_len)
+ return -EINVAL;
+
+ if (params.beacon_csa.probe_resp[offset] !=
+ params.count)
+ return -EINVAL;
+ }
}
skip_beacons:
@@ -5819,22 +5996,25 @@ skip_beacons:
if (err)
return err;
- if (!cfg80211_reg_can_beacon(&rdev->wiphy, &params.chandef))
+ if (!cfg80211_reg_can_beacon(&rdev->wiphy, &params.chandef,
+ wdev->iftype))
return -EINVAL;
- if (dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP ||
- dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_GO ||
- dev->ieee80211_ptr->iftype == NL80211_IFTYPE_ADHOC) {
- err = cfg80211_chandef_dfs_required(wdev->wiphy,
- &params.chandef);
- if (err < 0) {
- return err;
- } else if (err) {
- radar_detect_width = BIT(params.chandef.width);
- params.radar_required = true;
- }
+ err = cfg80211_chandef_dfs_required(wdev->wiphy,
+ &params.chandef,
+ wdev->iftype);
+ if (err < 0)
+ return err;
+
+ if (err > 0) {
+ radar_detect_width = BIT(params.chandef.width);
+ params.radar_required = true;
}
+ /* TODO: I left this here for now. With channel switch, the
+ * verification is a bit more complicated, because we only do
+ * it later when the channel switch really happens.
+ */
err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
params.chandef.chan,
CHAN_MODE_SHARED,
@@ -6061,12 +6241,12 @@ static int nl80211_dump_survey(struct sk_buff *skb,
struct netlink_callback *cb)
{
struct survey_info survey;
- struct cfg80211_registered_device *dev;
+ struct cfg80211_registered_device *rdev;
struct wireless_dev *wdev;
int survey_idx = cb->args[2];
int res;
- res = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev);
+ res = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
if (res)
return res;
@@ -6075,7 +6255,7 @@ static int nl80211_dump_survey(struct sk_buff *skb,
goto out_err;
}
- if (!dev->ops->dump_survey) {
+ if (!rdev->ops->dump_survey) {
res = -EOPNOTSUPP;
goto out_err;
}
@@ -6083,7 +6263,7 @@ static int nl80211_dump_survey(struct sk_buff *skb,
while (1) {
struct ieee80211_channel *chan;
- res = rdev_dump_survey(dev, wdev->netdev, survey_idx, &survey);
+ res = rdev_dump_survey(rdev, wdev->netdev, survey_idx, &survey);
if (res == -ENOENT)
break;
if (res)
@@ -6095,7 +6275,7 @@ static int nl80211_dump_survey(struct sk_buff *skb,
goto out;
}
- chan = ieee80211_get_channel(&dev->wiphy,
+ chan = ieee80211_get_channel(&rdev->wiphy,
survey.channel->center_freq);
if (!chan || chan->flags & IEEE80211_CHAN_DISABLED) {
survey_idx++;
@@ -6114,7 +6294,7 @@ static int nl80211_dump_survey(struct sk_buff *skb,
cb->args[2] = survey_idx;
res = skb->len;
out_err:
- nl80211_finish_wdev_dump(dev);
+ nl80211_finish_wdev_dump(rdev);
return res;
}
@@ -6192,9 +6372,9 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
return -EOPNOTSUPP;
bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
- chan = ieee80211_get_channel(&rdev->wiphy,
- nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
- if (!chan || (chan->flags & IEEE80211_CHAN_DISABLED))
+ chan = nl80211_get_valid_chan(&rdev->wiphy,
+ info->attrs[NL80211_ATTR_WIPHY_FREQ]);
+ if (!chan)
return -EINVAL;
ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
@@ -6347,9 +6527,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
- chan = ieee80211_get_channel(&rdev->wiphy,
- nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
- if (!chan || (chan->flags & IEEE80211_CHAN_DISABLED))
+ chan = nl80211_get_valid_chan(&rdev->wiphy,
+ info->attrs[NL80211_ATTR_WIPHY_FREQ]);
+ if (!chan)
return -EINVAL;
ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
@@ -6590,7 +6770,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
- if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef))
+ if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef,
+ NL80211_IFTYPE_ADHOC))
return -EINVAL;
switch (ibss.chandef.width) {
@@ -6765,7 +6946,7 @@ struct sk_buff *__cfg80211_alloc_event_skb(struct wiphy *wiphy,
int vendor_event_idx,
int approxlen, gfp_t gfp)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
const struct nl80211_vendor_cmd_info *info;
switch (cmd) {
@@ -6985,6 +7166,9 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_MAC])
connect.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
+ else if (info->attrs[NL80211_ATTR_MAC_HINT])
+ connect.bssid_hint =
+ nla_data(info->attrs[NL80211_ATTR_MAC_HINT]);
connect.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
connect.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
@@ -7003,11 +7187,14 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
}
if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
- connect.channel =
- ieee80211_get_channel(wiphy,
- nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
- if (!connect.channel ||
- connect.channel->flags & IEEE80211_CHAN_DISABLED)
+ connect.channel = nl80211_get_valid_chan(
+ wiphy, info->attrs[NL80211_ATTR_WIPHY_FREQ]);
+ if (!connect.channel)
+ return -EINVAL;
+ } else if (info->attrs[NL80211_ATTR_WIPHY_FREQ_HINT]) {
+ connect.channel_hint = nl80211_get_valid_chan(
+ wiphy, info->attrs[NL80211_ATTR_WIPHY_FREQ_HINT]);
+ if (!connect.channel_hint)
return -EINVAL;
}
@@ -7174,6 +7361,7 @@ static int nl80211_tdls_mgmt(struct sk_buff *skb, struct genl_info *info)
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
u8 action_code, dialog_token;
+ u32 peer_capability = 0;
u16 status_code;
u8 *peer;
@@ -7192,9 +7380,12 @@ static int nl80211_tdls_mgmt(struct sk_buff *skb, struct genl_info *info)
action_code = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_ACTION]);
status_code = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]);
dialog_token = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_DIALOG_TOKEN]);
+ if (info->attrs[NL80211_ATTR_TDLS_PEER_CAPABILITY])
+ peer_capability =
+ nla_get_u32(info->attrs[NL80211_ATTR_TDLS_PEER_CAPABILITY]);
return rdev_tdls_mgmt(rdev, dev, peer, action_code,
- dialog_token, status_code,
+ dialog_token, status_code, peer_capability,
nla_data(info->attrs[NL80211_ATTR_IE]),
nla_len(info->attrs[NL80211_ATTR_IE]));
}
@@ -7421,6 +7612,7 @@ static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = {
[NL80211_TXRATE_HT] = { .type = NLA_BINARY,
.len = NL80211_MAX_SUPP_HT_RATES },
[NL80211_TXRATE_VHT] = { .len = sizeof(struct nl80211_txrate_vht)},
+ [NL80211_TXRATE_GI] = { .type = NLA_U8 },
};
static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
@@ -7467,16 +7659,19 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
* directly to the enum ieee80211_band values used in cfg80211.
*/
BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8);
- nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem)
- {
+ nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) {
enum ieee80211_band band = nla_type(tx_rates);
+ int err;
+
if (band < 0 || band >= IEEE80211_NUM_BANDS)
return -EINVAL;
sband = rdev->wiphy.bands[band];
if (sband == NULL)
return -EINVAL;
- nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates),
- nla_len(tx_rates), nl80211_txattr_policy);
+ err = nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates),
+ nla_len(tx_rates), nl80211_txattr_policy);
+ if (err)
+ return err;
if (tb[NL80211_TXRATE_LEGACY]) {
mask.control[band].legacy = rateset_to_mask(
sband,
@@ -7501,6 +7696,12 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
mask.control[band].vht_mcs))
return -EINVAL;
}
+ if (tb[NL80211_TXRATE_GI]) {
+ mask.control[band].gi =
+ nla_get_u8(tb[NL80211_TXRATE_GI]);
+ if (mask.control[band].gi > NL80211_TXRATE_FORCE_LGI)
+ return -EINVAL;
+ }
if (mask.control[band].legacy == 0) {
/* don't allow empty legacy rates if HT or VHT
@@ -7633,6 +7834,27 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
if (!chandef.chan && params.offchan)
return -EINVAL;
+ params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]);
+ params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]);
+
+ if (info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]) {
+ int len = nla_len(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]);
+ int i;
+
+ if (len % sizeof(u16))
+ return -EINVAL;
+
+ params.n_csa_offsets = len / sizeof(u16);
+ params.csa_offsets =
+ nla_data(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]);
+
+ /* check that all the offsets fit the frame */
+ for (i = 0; i < params.n_csa_offsets; i++) {
+ if (params.csa_offsets[i] >= params.len)
+ return -EINVAL;
+ }
+ }
+
if (!params.dont_wait_for_ack) {
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -7646,8 +7868,6 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
}
}
- params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]);
- params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]);
params.chan = chandef.chan;
err = cfg80211_mlme_mgmt_tx(rdev, wdev, &params, &cookie);
if (err)
@@ -7777,8 +7997,8 @@ static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info)
return err;
}
-static struct nla_policy
-nl80211_attr_cqm_policy[NL80211_ATTR_CQM_MAX + 1] __read_mostly = {
+static const struct nla_policy
+nl80211_attr_cqm_policy[NL80211_ATTR_CQM_MAX + 1] = {
[NL80211_ATTR_CQM_RSSI_THOLD] = { .type = NLA_U32 },
[NL80211_ATTR_CQM_RSSI_HYST] = { .type = NLA_U32 },
[NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT] = { .type = NLA_U32 },
@@ -8344,6 +8564,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN],
rem) {
+ u8 *mask_pat;
+
nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat),
nla_len(pat), NULL);
err = -EINVAL;
@@ -8367,19 +8589,18 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
goto error;
new_triggers.patterns[i].pkt_offset = pkt_offset;
- new_triggers.patterns[i].mask =
- kmalloc(mask_len + pat_len, GFP_KERNEL);
- if (!new_triggers.patterns[i].mask) {
+ mask_pat = kmalloc(mask_len + pat_len, GFP_KERNEL);
+ if (!mask_pat) {
err = -ENOMEM;
goto error;
}
- new_triggers.patterns[i].pattern =
- new_triggers.patterns[i].mask + mask_len;
- memcpy(new_triggers.patterns[i].mask,
- nla_data(pat_tb[NL80211_PKTPAT_MASK]),
+ new_triggers.patterns[i].mask = mask_pat;
+ memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_MASK]),
mask_len);
+ mask_pat += mask_len;
+ new_triggers.patterns[i].pattern = mask_pat;
new_triggers.patterns[i].pattern_len = pat_len;
- memcpy(new_triggers.patterns[i].pattern,
+ memcpy(mask_pat,
nla_data(pat_tb[NL80211_PKTPAT_PATTERN]),
pat_len);
i++;
@@ -8571,6 +8792,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
nla_for_each_nested(pat, tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN],
rem) {
+ u8 *mask_pat;
+
nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat),
nla_len(pat), NULL);
if (!pat_tb[NL80211_PKTPAT_MASK] ||
@@ -8592,17 +8815,19 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
return -EINVAL;
new_rule->patterns[i].pkt_offset = pkt_offset;
- new_rule->patterns[i].mask =
- kmalloc(mask_len + pat_len, GFP_KERNEL);
- if (!new_rule->patterns[i].mask)
+ mask_pat = kmalloc(mask_len + pat_len, GFP_KERNEL);
+ if (!mask_pat)
return -ENOMEM;
- new_rule->patterns[i].pattern =
- new_rule->patterns[i].mask + mask_len;
- memcpy(new_rule->patterns[i].mask,
- nla_data(pat_tb[NL80211_PKTPAT_MASK]), mask_len);
+
+ new_rule->patterns[i].mask = mask_pat;
+ memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_MASK]),
+ mask_len);
+
+ mask_pat += mask_len;
+ new_rule->patterns[i].pattern = mask_pat;
new_rule->patterns[i].pattern_len = pat_len;
- memcpy(new_rule->patterns[i].pattern,
- nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), pat_len);
+ memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_PATTERN]),
+ pat_len);
i++;
}
@@ -8847,9 +9072,8 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info)
if (wdev->p2p_started)
return 0;
- err = cfg80211_can_add_interface(rdev, wdev->iftype);
- if (err)
- return err;
+ if (rfkill_blocked(rdev->rfkill))
+ return -ERFKILL;
err = rdev_start_p2p_device(rdev, wdev);
if (err)
@@ -9058,7 +9282,7 @@ struct sk_buff *__cfg80211_alloc_reply_skb(struct wiphy *wiphy,
enum nl80211_attrs attr,
int approxlen)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
if (WARN_ON(!rdev->cur_cmd_info))
return NULL;
@@ -9182,7 +9406,7 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
}
dev = wdev->netdev;
- rdev = wiphy_to_dev(wdev->wiphy);
+ rdev = wiphy_to_rdev(wdev->wiphy);
if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV) {
if (!dev) {
@@ -9883,16 +10107,20 @@ static const struct genl_ops nl80211_ops[] = {
/* notification functions */
-void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev)
+void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev,
+ enum nl80211_commands cmd)
{
struct sk_buff *msg;
struct nl80211_dump_wiphy_state state = {};
+ WARN_ON(cmd != NL80211_CMD_NEW_WIPHY &&
+ cmd != NL80211_CMD_DEL_WIPHY);
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return;
- if (nl80211_send_wiphy(rdev, msg, 0, 0, 0, &state) < 0) {
+ if (nl80211_send_wiphy(rdev, cmd, msg, 0, 0, 0, &state) < 0) {
nlmsg_free(msg);
return;
}
@@ -10211,7 +10439,7 @@ void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf,
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
const struct ieee80211_mgmt *mgmt = (void *)buf;
u32 cmd;
@@ -10433,7 +10661,7 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr,
const u8* ie, u8 ie_len, gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct sk_buff *msg;
void *hdr;
@@ -10613,7 +10841,7 @@ void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie,
unsigned int duration, gfp_t gfp)
{
struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
trace_cfg80211_ready_on_channel(wdev, cookie, chan, duration);
nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL,
@@ -10627,7 +10855,7 @@ void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie,
gfp_t gfp)
{
struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan);
nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
@@ -10639,7 +10867,7 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
struct station_info *sinfo, gfp_t gfp)
{
struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct sk_buff *msg;
trace_cfg80211_new_sta(dev, mac_addr, sinfo);
@@ -10662,7 +10890,7 @@ EXPORT_SYMBOL(cfg80211_new_sta);
void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp)
{
struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct sk_buff *msg;
void *hdr;
@@ -10699,7 +10927,7 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
gfp_t gfp)
{
struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct sk_buff *msg;
void *hdr;
@@ -10734,7 +10962,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
const u8 *addr, gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct sk_buff *msg;
void *hdr;
u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid);
@@ -10854,7 +11082,7 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
const u8 *buf, size_t len, bool ack, gfp_t gfp)
{
struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct net_device *netdev = wdev->netdev;
struct sk_buff *msg;
void *hdr;
@@ -10898,7 +11126,7 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct sk_buff *msg;
struct nlattr *pinfoattr;
void *hdr;
@@ -10990,7 +11218,7 @@ void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid,
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
trace_cfg80211_gtk_rekey_notify(dev, bssid);
nl80211_gtk_rekey_notify(rdev, dev, bssid, replay_ctr, gfp);
@@ -11048,7 +11276,7 @@ void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index,
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
trace_cfg80211_pmksa_candidate_notify(dev, index, bssid, preauth);
nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp);
@@ -11095,7 +11323,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
ASSERT_WDEV_LOCK(wdev);
@@ -11107,7 +11335,8 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
wdev->iftype != NL80211_IFTYPE_MESH_POINT))
return;
- wdev->channel = chandef->chan;
+ wdev->chandef = *chandef;
+ wdev->preset_chandef = *chandef;
nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL);
}
EXPORT_SYMBOL(cfg80211_ch_switch_notify);
@@ -11118,7 +11347,7 @@ void cfg80211_cqm_txe_notify(struct net_device *dev,
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct sk_buff *msg;
struct nlattr *pinfoattr;
void *hdr;
@@ -11218,7 +11447,7 @@ void cfg80211_cqm_pktloss_notify(struct net_device *dev,
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct sk_buff *msg;
struct nlattr *pinfoattr;
void *hdr;
@@ -11265,7 +11494,7 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
u64 cookie, bool acked, gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct sk_buff *msg;
void *hdr;
@@ -11305,7 +11534,7 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy,
const u8 *frame, size_t len,
int freq, int sig_dbm)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct sk_buff *msg;
void *hdr;
struct cfg80211_beacon_registration *reg;
@@ -11352,7 +11581,7 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev,
struct cfg80211_wowlan_wakeup *wakeup,
gfp_t gfp)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct sk_buff *msg;
void *hdr;
int size = 200;
@@ -11462,7 +11691,7 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer,
u16 reason_code, gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct sk_buff *msg;
void *hdr;
@@ -11514,9 +11743,15 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
rcu_read_lock();
list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) {
- list_for_each_entry_rcu(wdev, &rdev->wdev_list, list)
+ bool schedule_destroy_work = false;
+
+ list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) {
cfg80211_mlme_unregister_socket(wdev, notify->portid);
+ if (wdev->owner_nlportid == notify->portid)
+ schedule_destroy_work = true;
+ }
+
spin_lock_bh(&rdev->beacon_registrations_lock);
list_for_each_entry_safe(reg, tmp, &rdev->beacon_registrations,
list) {
@@ -11527,11 +11762,24 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
}
}
spin_unlock_bh(&rdev->beacon_registrations_lock);
+
+ if (schedule_destroy_work) {
+ struct cfg80211_iface_destroy *destroy;
+
+ destroy = kzalloc(sizeof(*destroy), GFP_ATOMIC);
+ if (destroy) {
+ destroy->nlportid = notify->portid;
+ spin_lock(&rdev->destroy_list_lock);
+ list_add(&destroy->list, &rdev->destroy_list);
+ spin_unlock(&rdev->destroy_list_lock);
+ schedule_work(&rdev->destroy_work);
+ }
+ }
}
rcu_read_unlock();
- return NOTIFY_DONE;
+ return NOTIFY_OK;
}
static struct notifier_block nl80211_netlink_notifier = {
@@ -11542,7 +11790,7 @@ void cfg80211_ft_event(struct net_device *netdev,
struct cfg80211_ft_event_params *ft_event)
{
struct wiphy *wiphy = netdev->ieee80211_ptr->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct sk_buff *msg;
void *hdr;
@@ -11589,7 +11837,7 @@ void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp)
void *hdr;
u32 nlportid;
- rdev = wiphy_to_dev(wdev->wiphy);
+ rdev = wiphy_to_rdev(wdev->wiphy);
if (!rdev->crit_proto_nlportid)
return;
@@ -11621,6 +11869,35 @@ void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp)
}
EXPORT_SYMBOL(cfg80211_crit_proto_stopped);
+void nl80211_send_ap_stopped(struct wireless_dev *wdev)
+{
+ struct wiphy *wiphy = wdev->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+ struct sk_buff *msg;
+ void *hdr;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_STOP_AP);
+ if (!hdr)
+ goto out;
+
+ if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+ nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex) ||
+ nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)))
+ goto out;
+
+ genlmsg_end(msg, hdr);
+
+ genlmsg_multicast_netns(&nl80211_fam, wiphy_net(wiphy), msg, 0,
+ NL80211_MCGRP_MLME, GFP_KERNEL);
+ return;
+ out:
+ nlmsg_free(msg);
+}
+
/* initialisation/exit functions */
int nl80211_init(void)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 75799746d84..49c9a482dd1 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -5,7 +5,8 @@
int nl80211_init(void);
void nl80211_exit(void);
-void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev);
+void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev,
+ enum nl80211_commands cmd);
void nl80211_send_scan_start(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev,
@@ -74,6 +75,8 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,
enum nl80211_radar_event event,
struct net_device *netdev, gfp_t gfp);
+void nl80211_send_ap_stopped(struct wireless_dev *wdev);
+
void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev);
#endif /* __NET_WIRELESS_NL80211_H */
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index c8e225947ad..d95bbe34813 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -199,7 +199,7 @@ static inline int rdev_change_station(struct cfg80211_registered_device *rdev,
}
static inline int rdev_get_station(struct cfg80211_registered_device *rdev,
- struct net_device *dev, u8 *mac,
+ struct net_device *dev, const u8 *mac,
struct station_info *sinfo)
{
int ret;
@@ -769,13 +769,16 @@ static inline int rdev_set_rekey_data(struct cfg80211_registered_device *rdev,
static inline int rdev_tdls_mgmt(struct cfg80211_registered_device *rdev,
struct net_device *dev, u8 *peer,
u8 action_code, u8 dialog_token,
- u16 status_code, const u8 *buf, size_t len)
+ u16 status_code, u32 peer_capability,
+ const u8 *buf, size_t len)
{
int ret;
trace_rdev_tdls_mgmt(&rdev->wiphy, dev, peer, action_code,
- dialog_token, status_code, buf, len);
+ dialog_token, status_code, peer_capability,
+ buf, len);
ret = rdev->ops->tdls_mgmt(&rdev->wiphy, dev, peer, action_code,
- dialog_token, status_code, buf, len);
+ dialog_token, status_code, peer_capability,
+ buf, len);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
@@ -947,4 +950,17 @@ static inline int rdev_set_qos_map(struct cfg80211_registered_device *rdev,
return ret;
}
+static inline int
+rdev_set_ap_chanwidth(struct cfg80211_registered_device *rdev,
+ struct net_device *dev, struct cfg80211_chan_def *chandef)
+{
+ int ret;
+
+ trace_rdev_set_ap_chanwidth(&rdev->wiphy, dev, chandef);
+ ret = rdev->ops->set_ap_chanwidth(&rdev->wiphy, dev, chandef);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+
+ return ret;
+}
+
#endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index f0541370e68..1afdf45db38 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -65,11 +65,26 @@
#define REG_DBG_PRINT(args...)
#endif
+/**
+ * enum reg_request_treatment - regulatory request treatment
+ *
+ * @REG_REQ_OK: continue processing the regulatory request
+ * @REG_REQ_IGNORE: ignore the regulatory request
+ * @REG_REQ_INTERSECT: the regulatory domain resulting from this request should
+ * be intersected with the current one.
+ * @REG_REQ_ALREADY_SET: the regulatory request will not change the current
+ * regulatory settings, and no further processing is required.
+ * @REG_REQ_USER_HINT_HANDLED: a non alpha2 user hint was handled and no
+ * further processing is required, i.e., not need to update last_request
+ * etc. This should be used for user hints that do not provide an alpha2
+ * but some other type of regulatory hint, i.e., indoor operation.
+ */
enum reg_request_treatment {
REG_REQ_OK,
REG_REQ_IGNORE,
REG_REQ_INTERSECT,
REG_REQ_ALREADY_SET,
+ REG_REQ_USER_HINT_HANDLED,
};
static struct regulatory_request core_request_world = {
@@ -91,10 +106,6 @@ static struct regulatory_request __rcu *last_request =
/* To trigger userspace events */
static struct platform_device *reg_pdev;
-static struct device_type reg_device_type = {
- .uevent = reg_device_uevent,
-};
-
/*
* Central wireless core regulatory domains, we only need two,
* the current one and a world regulatory domain in case we have no
@@ -110,6 +121,14 @@ const struct ieee80211_regdomain __rcu *cfg80211_regdomain;
*/
static int reg_num_devs_support_basehint;
+/*
+ * State variable indicating if the platform on which the devices
+ * are attached is operating in an indoor environment. The state variable
+ * is relevant for all registered devices.
+ * (protected by RTNL)
+ */
+static bool reg_is_indoor;
+
static const struct ieee80211_regdomain *get_cfg80211_regdom(void)
{
return rtnl_dereference(cfg80211_regdomain);
@@ -244,11 +263,15 @@ static char user_alpha2[2];
module_param(ieee80211_regdom, charp, 0444);
MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code");
-static void reg_kfree_last_request(void)
+static void reg_free_request(struct regulatory_request *request)
{
- struct regulatory_request *lr;
+ if (request != get_last_request())
+ kfree(request);
+}
- lr = get_last_request();
+static void reg_free_last_request(void)
+{
+ struct regulatory_request *lr = get_last_request();
if (lr != &core_request_world && lr)
kfree_rcu(lr, rcu_head);
@@ -256,7 +279,13 @@ static void reg_kfree_last_request(void)
static void reg_update_last_request(struct regulatory_request *request)
{
- reg_kfree_last_request();
+ struct regulatory_request *lr;
+
+ lr = get_last_request();
+ if (lr == request)
+ return;
+
+ reg_free_last_request();
rcu_assign_pointer(last_request, request);
}
@@ -487,11 +516,16 @@ static inline void reg_regdb_query(const char *alpha2) {}
/*
* This lets us keep regulatory code which is updated on a regulatory
- * basis in userspace. Country information is filled in by
- * reg_device_uevent
+ * basis in userspace.
*/
static int call_crda(const char *alpha2)
{
+ char country[12];
+ char *env[] = { country, NULL };
+
+ snprintf(country, sizeof(country), "COUNTRY=%c%c",
+ alpha2[0], alpha2[1]);
+
if (!is_world_regdom((char *) alpha2))
pr_info("Calling CRDA for country: %c%c\n",
alpha2[0], alpha2[1]);
@@ -501,7 +535,7 @@ static int call_crda(const char *alpha2)
/* query internal regulatory database (if it exists) */
reg_regdb_query(alpha2);
- return kobject_uevent(&reg_pdev->dev.kobj, KOBJ_CHANGE);
+ return kobject_uevent_env(&reg_pdev->dev.kobj, KOBJ_CHANGE, env);
}
static enum reg_request_treatment
@@ -522,6 +556,71 @@ bool reg_is_valid_request(const char *alpha2)
return alpha2_equal(lr->alpha2, alpha2);
}
+static const struct ieee80211_regdomain *reg_get_regdomain(struct wiphy *wiphy)
+{
+ struct regulatory_request *lr = get_last_request();
+
+ /*
+ * Follow the driver's regulatory domain, if present, unless a country
+ * IE has been processed or a user wants to help complaince further
+ */
+ if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
+ lr->initiator != NL80211_REGDOM_SET_BY_USER &&
+ wiphy->regd)
+ return get_wiphy_regdom(wiphy);
+
+ return get_cfg80211_regdom();
+}
+
+unsigned int reg_get_max_bandwidth(const struct ieee80211_regdomain *rd,
+ const struct ieee80211_reg_rule *rule)
+{
+ const struct ieee80211_freq_range *freq_range = &rule->freq_range;
+ const struct ieee80211_freq_range *freq_range_tmp;
+ const struct ieee80211_reg_rule *tmp;
+ u32 start_freq, end_freq, idx, no;
+
+ for (idx = 0; idx < rd->n_reg_rules; idx++)
+ if (rule == &rd->reg_rules[idx])
+ break;
+
+ if (idx == rd->n_reg_rules)
+ return 0;
+
+ /* get start_freq */
+ no = idx;
+
+ while (no) {
+ tmp = &rd->reg_rules[--no];
+ freq_range_tmp = &tmp->freq_range;
+
+ if (freq_range_tmp->end_freq_khz < freq_range->start_freq_khz)
+ break;
+
+ freq_range = freq_range_tmp;
+ }
+
+ start_freq = freq_range->start_freq_khz;
+
+ /* get end_freq */
+ freq_range = &rule->freq_range;
+ no = idx;
+
+ while (no < rd->n_reg_rules - 1) {
+ tmp = &rd->reg_rules[++no];
+ freq_range_tmp = &tmp->freq_range;
+
+ if (freq_range_tmp->start_freq_khz > freq_range->end_freq_khz)
+ break;
+
+ freq_range = freq_range_tmp;
+ }
+
+ end_freq = freq_range->end_freq_khz;
+
+ return end_freq - start_freq;
+}
+
/* Sanity check on a regulatory rule */
static bool is_valid_reg_rule(const struct ieee80211_reg_rule *rule)
{
@@ -630,7 +729,9 @@ reg_intersect_dfs_region(const enum nl80211_dfs_regions dfs_region1,
* Helper for regdom_intersect(), this does the real
* mathematical intersection fun
*/
-static int reg_rules_intersect(const struct ieee80211_reg_rule *rule1,
+static int reg_rules_intersect(const struct ieee80211_regdomain *rd1,
+ const struct ieee80211_regdomain *rd2,
+ const struct ieee80211_reg_rule *rule1,
const struct ieee80211_reg_rule *rule2,
struct ieee80211_reg_rule *intersected_rule)
{
@@ -638,7 +739,7 @@ static int reg_rules_intersect(const struct ieee80211_reg_rule *rule1,
struct ieee80211_freq_range *freq_range;
const struct ieee80211_power_rule *power_rule1, *power_rule2;
struct ieee80211_power_rule *power_rule;
- u32 freq_diff;
+ u32 freq_diff, max_bandwidth1, max_bandwidth2;
freq_range1 = &rule1->freq_range;
freq_range2 = &rule2->freq_range;
@@ -652,8 +753,32 @@ static int reg_rules_intersect(const struct ieee80211_reg_rule *rule1,
freq_range2->start_freq_khz);
freq_range->end_freq_khz = min(freq_range1->end_freq_khz,
freq_range2->end_freq_khz);
- freq_range->max_bandwidth_khz = min(freq_range1->max_bandwidth_khz,
- freq_range2->max_bandwidth_khz);
+
+ max_bandwidth1 = freq_range1->max_bandwidth_khz;
+ max_bandwidth2 = freq_range2->max_bandwidth_khz;
+
+ if (rule1->flags & NL80211_RRF_AUTO_BW)
+ max_bandwidth1 = reg_get_max_bandwidth(rd1, rule1);
+ if (rule2->flags & NL80211_RRF_AUTO_BW)
+ max_bandwidth2 = reg_get_max_bandwidth(rd2, rule2);
+
+ freq_range->max_bandwidth_khz = min(max_bandwidth1, max_bandwidth2);
+
+ intersected_rule->flags = rule1->flags | rule2->flags;
+
+ /*
+ * In case NL80211_RRF_AUTO_BW requested for both rules
+ * set AUTO_BW in intersected rule also. Next we will
+ * calculate BW correctly in handle_channel function.
+ * In other case remove AUTO_BW flag while we calculate
+ * maximum bandwidth correctly and auto calculation is
+ * not required.
+ */
+ if ((rule1->flags & NL80211_RRF_AUTO_BW) &&
+ (rule2->flags & NL80211_RRF_AUTO_BW))
+ intersected_rule->flags |= NL80211_RRF_AUTO_BW;
+ else
+ intersected_rule->flags &= ~NL80211_RRF_AUTO_BW;
freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz;
if (freq_range->max_bandwidth_khz > freq_diff)
@@ -664,7 +789,8 @@ static int reg_rules_intersect(const struct ieee80211_reg_rule *rule1,
power_rule->max_antenna_gain = min(power_rule1->max_antenna_gain,
power_rule2->max_antenna_gain);
- intersected_rule->flags = rule1->flags | rule2->flags;
+ intersected_rule->dfs_cac_ms = max(rule1->dfs_cac_ms,
+ rule2->dfs_cac_ms);
if (!is_valid_reg_rule(intersected_rule))
return -EINVAL;
@@ -713,7 +839,8 @@ regdom_intersect(const struct ieee80211_regdomain *rd1,
rule1 = &rd1->reg_rules[x];
for (y = 0; y < rd2->n_reg_rules; y++) {
rule2 = &rd2->reg_rules[y];
- if (!reg_rules_intersect(rule1, rule2, &dummy_rule))
+ if (!reg_rules_intersect(rd1, rd2, rule1, rule2,
+ &dummy_rule))
num_rules++;
}
}
@@ -738,7 +865,8 @@ regdom_intersect(const struct ieee80211_regdomain *rd1,
* a memcpy()
*/
intersected_rule = &rd->reg_rules[rule_idx];
- r = reg_rules_intersect(rule1, rule2, intersected_rule);
+ r = reg_rules_intersect(rd1, rd2, rule1, rule2,
+ intersected_rule);
/*
* No need to memset here the intersected rule here as
* we're not using the stack anymore
@@ -776,6 +904,8 @@ static u32 map_regdom_flags(u32 rd_flags)
channel_flags |= IEEE80211_CHAN_RADAR;
if (rd_flags & NL80211_RRF_NO_OFDM)
channel_flags |= IEEE80211_CHAN_NO_OFDM;
+ if (rd_flags & NL80211_RRF_NO_OUTDOOR)
+ channel_flags |= IEEE80211_CHAN_INDOOR_ONLY;
return channel_flags;
}
@@ -821,18 +951,8 @@ const struct ieee80211_reg_rule *freq_reg_info(struct wiphy *wiphy,
u32 center_freq)
{
const struct ieee80211_regdomain *regd;
- struct regulatory_request *lr = get_last_request();
- /*
- * Follow the driver's regulatory domain, if present, unless a country
- * IE has been processed or a user wants to help complaince further
- */
- if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
- lr->initiator != NL80211_REGDOM_SET_BY_USER &&
- wiphy->regd)
- regd = get_wiphy_regdom(wiphy);
- else
- regd = get_cfg80211_regdom();
+ regd = reg_get_regdomain(wiphy);
return freq_reg_info_regd(wiphy, center_freq, regd);
}
@@ -857,31 +977,42 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator)
EXPORT_SYMBOL(reg_initiator_name);
#ifdef CONFIG_CFG80211_REG_DEBUG
-static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan,
+static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd,
+ struct ieee80211_channel *chan,
const struct ieee80211_reg_rule *reg_rule)
{
const struct ieee80211_power_rule *power_rule;
const struct ieee80211_freq_range *freq_range;
- char max_antenna_gain[32];
+ char max_antenna_gain[32], bw[32];
power_rule = &reg_rule->power_rule;
freq_range = &reg_rule->freq_range;
if (!power_rule->max_antenna_gain)
- snprintf(max_antenna_gain, 32, "N/A");
+ snprintf(max_antenna_gain, sizeof(max_antenna_gain), "N/A");
else
- snprintf(max_antenna_gain, 32, "%d", power_rule->max_antenna_gain);
+ snprintf(max_antenna_gain, sizeof(max_antenna_gain), "%d",
+ power_rule->max_antenna_gain);
+
+ if (reg_rule->flags & NL80211_RRF_AUTO_BW)
+ snprintf(bw, sizeof(bw), "%d KHz, %d KHz AUTO",
+ freq_range->max_bandwidth_khz,
+ reg_get_max_bandwidth(regd, reg_rule));
+ else
+ snprintf(bw, sizeof(bw), "%d KHz",
+ freq_range->max_bandwidth_khz);
REG_DBG_PRINT("Updating information on frequency %d MHz with regulatory rule:\n",
chan->center_freq);
- REG_DBG_PRINT("%d KHz - %d KHz @ %d KHz), (%s mBi, %d mBm)\n",
+ REG_DBG_PRINT("%d KHz - %d KHz @ %s), (%s mBi, %d mBm)\n",
freq_range->start_freq_khz, freq_range->end_freq_khz,
- freq_range->max_bandwidth_khz, max_antenna_gain,
+ bw, max_antenna_gain,
power_rule->max_eirp);
}
#else
-static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan,
+static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd,
+ struct ieee80211_channel *chan,
const struct ieee80211_reg_rule *reg_rule)
{
return;
@@ -903,6 +1034,8 @@ static void handle_channel(struct wiphy *wiphy,
const struct ieee80211_freq_range *freq_range = NULL;
struct wiphy *request_wiphy = NULL;
struct regulatory_request *lr = get_last_request();
+ const struct ieee80211_regdomain *regd;
+ u32 max_bandwidth_khz;
request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx);
@@ -939,16 +1072,22 @@ static void handle_channel(struct wiphy *wiphy,
return;
}
- chan_reg_rule_print_dbg(chan, reg_rule);
+ regd = reg_get_regdomain(wiphy);
+ chan_reg_rule_print_dbg(regd, chan, reg_rule);
power_rule = &reg_rule->power_rule;
freq_range = &reg_rule->freq_range;
- if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(40))
+ max_bandwidth_khz = freq_range->max_bandwidth_khz;
+ /* Check if auto calculation requested */
+ if (reg_rule->flags & NL80211_RRF_AUTO_BW)
+ max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule);
+
+ if (max_bandwidth_khz < MHZ_TO_KHZ(40))
bw_flags = IEEE80211_CHAN_NO_HT40;
- if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(80))
+ if (max_bandwidth_khz < MHZ_TO_KHZ(80))
bw_flags |= IEEE80211_CHAN_NO_80MHZ;
- if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(160))
+ if (max_bandwidth_khz < MHZ_TO_KHZ(160))
bw_flags |= IEEE80211_CHAN_NO_160MHZ;
if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
@@ -965,6 +1104,13 @@ static void handle_channel(struct wiphy *wiphy,
(int) MBI_TO_DBI(power_rule->max_antenna_gain);
chan->max_reg_power = chan->max_power = chan->orig_mpwr =
(int) MBM_TO_DBM(power_rule->max_eirp);
+
+ if (chan->flags & IEEE80211_CHAN_RADAR) {
+ chan->dfs_cac_ms = IEEE80211_DFS_MIN_CAC_TIME_MS;
+ if (reg_rule->dfs_cac_ms)
+ chan->dfs_cac_ms = reg_rule->dfs_cac_ms;
+ }
+
return;
}
@@ -977,6 +1123,14 @@ static void handle_channel(struct wiphy *wiphy,
min_t(int, chan->orig_mag,
MBI_TO_DBI(power_rule->max_antenna_gain));
chan->max_reg_power = (int) MBM_TO_DBM(power_rule->max_eirp);
+
+ if (chan->flags & IEEE80211_CHAN_RADAR) {
+ if (reg_rule->dfs_cac_ms)
+ chan->dfs_cac_ms = reg_rule->dfs_cac_ms;
+ else
+ chan->dfs_cac_ms = IEEE80211_DFS_MIN_CAC_TIME_MS;
+ }
+
if (chan->orig_mpwr) {
/*
* Devices that use REGULATORY_COUNTRY_IE_FOLLOW_POWER
@@ -1012,12 +1166,19 @@ static bool reg_request_cell_base(struct regulatory_request *request)
return request->user_reg_hint_type == NL80211_USER_REG_HINT_CELL_BASE;
}
+static bool reg_request_indoor(struct regulatory_request *request)
+{
+ if (request->initiator != NL80211_REGDOM_SET_BY_USER)
+ return false;
+ return request->user_reg_hint_type == NL80211_USER_REG_HINT_INDOOR;
+}
+
bool reg_last_request_cell_base(void)
{
return reg_request_cell_base(get_last_request());
}
-#ifdef CONFIG_CFG80211_CERTIFICATION_ONUS
+#ifdef CONFIG_CFG80211_REG_CELLULAR_HINTS
/* Core specific check */
static enum reg_request_treatment
reg_ignore_cell_hint(struct regulatory_request *pending_request)
@@ -1334,6 +1495,7 @@ static void handle_channel_custom(struct wiphy *wiphy,
const struct ieee80211_reg_rule *reg_rule = NULL;
const struct ieee80211_power_rule *power_rule = NULL;
const struct ieee80211_freq_range *freq_range = NULL;
+ u32 max_bandwidth_khz;
reg_rule = freq_reg_info_regd(wiphy, MHZ_TO_KHZ(chan->center_freq),
regd);
@@ -1346,16 +1508,21 @@ static void handle_channel_custom(struct wiphy *wiphy,
return;
}
- chan_reg_rule_print_dbg(chan, reg_rule);
+ chan_reg_rule_print_dbg(regd, chan, reg_rule);
power_rule = &reg_rule->power_rule;
freq_range = &reg_rule->freq_range;
- if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(40))
+ max_bandwidth_khz = freq_range->max_bandwidth_khz;
+ /* Check if auto calculation requested */
+ if (reg_rule->flags & NL80211_RRF_AUTO_BW)
+ max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule);
+
+ if (max_bandwidth_khz < MHZ_TO_KHZ(40))
bw_flags = IEEE80211_CHAN_NO_HT40;
- if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(80))
+ if (max_bandwidth_khz < MHZ_TO_KHZ(80))
bw_flags |= IEEE80211_CHAN_NO_80MHZ;
- if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(160))
+ if (max_bandwidth_khz < MHZ_TO_KHZ(160))
bw_flags |= IEEE80211_CHAN_NO_160MHZ;
chan->flags |= map_regdom_flags(reg_rule->flags) | bw_flags;
@@ -1448,6 +1615,11 @@ __reg_process_hint_user(struct regulatory_request *user_request)
{
struct regulatory_request *lr = get_last_request();
+ if (reg_request_indoor(user_request)) {
+ reg_is_indoor = true;
+ return REG_REQ_USER_HINT_HANDLED;
+ }
+
if (reg_request_cell_base(user_request))
return reg_ignore_cell_hint(user_request);
@@ -1495,8 +1667,9 @@ reg_process_hint_user(struct regulatory_request *user_request)
treatment = __reg_process_hint_user(user_request);
if (treatment == REG_REQ_IGNORE ||
- treatment == REG_REQ_ALREADY_SET) {
- kfree(user_request);
+ treatment == REG_REQ_ALREADY_SET ||
+ treatment == REG_REQ_USER_HINT_HANDLED) {
+ reg_free_request(user_request);
return treatment;
}
@@ -1556,14 +1729,15 @@ reg_process_hint_driver(struct wiphy *wiphy,
case REG_REQ_OK:
break;
case REG_REQ_IGNORE:
- kfree(driver_request);
+ case REG_REQ_USER_HINT_HANDLED:
+ reg_free_request(driver_request);
return treatment;
case REG_REQ_INTERSECT:
/* fall through */
case REG_REQ_ALREADY_SET:
regd = reg_copy_regd(get_cfg80211_regdom());
if (IS_ERR(regd)) {
- kfree(driver_request);
+ reg_free_request(driver_request);
return REG_REQ_IGNORE;
}
rcu_assign_pointer(wiphy->regd, regd);
@@ -1655,12 +1829,13 @@ reg_process_hint_country_ie(struct wiphy *wiphy,
case REG_REQ_OK:
break;
case REG_REQ_IGNORE:
+ case REG_REQ_USER_HINT_HANDLED:
/* fall through */
case REG_REQ_ALREADY_SET:
- kfree(country_ie_request);
+ reg_free_request(country_ie_request);
return treatment;
case REG_REQ_INTERSECT:
- kfree(country_ie_request);
+ reg_free_request(country_ie_request);
/*
* This doesn't happen yet, not sure we
* ever want to support it for this case.
@@ -1683,17 +1858,9 @@ static void reg_process_hint(struct regulatory_request *reg_request)
struct wiphy *wiphy = NULL;
enum reg_request_treatment treatment;
- if (WARN_ON(!reg_request->alpha2))
- return;
-
if (reg_request->wiphy_idx != WIPHY_IDX_INVALID)
wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx);
- if (reg_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && !wiphy) {
- kfree(reg_request);
- return;
- }
-
switch (reg_request->initiator) {
case NL80211_REGDOM_SET_BY_CORE:
reg_process_hint_core(reg_request);
@@ -1701,25 +1868,36 @@ static void reg_process_hint(struct regulatory_request *reg_request)
case NL80211_REGDOM_SET_BY_USER:
treatment = reg_process_hint_user(reg_request);
if (treatment == REG_REQ_IGNORE ||
- treatment == REG_REQ_ALREADY_SET)
+ treatment == REG_REQ_ALREADY_SET ||
+ treatment == REG_REQ_USER_HINT_HANDLED)
return;
- schedule_delayed_work(&reg_timeout, msecs_to_jiffies(3142));
+ queue_delayed_work(system_power_efficient_wq,
+ &reg_timeout, msecs_to_jiffies(3142));
return;
case NL80211_REGDOM_SET_BY_DRIVER:
+ if (!wiphy)
+ goto out_free;
treatment = reg_process_hint_driver(wiphy, reg_request);
break;
case NL80211_REGDOM_SET_BY_COUNTRY_IE:
+ if (!wiphy)
+ goto out_free;
treatment = reg_process_hint_country_ie(wiphy, reg_request);
break;
default:
WARN(1, "invalid initiator %d\n", reg_request->initiator);
- return;
+ goto out_free;
}
/* This is required so that the orig_* parameters are saved */
if (treatment == REG_REQ_ALREADY_SET && wiphy &&
wiphy->regulatory_flags & REGULATORY_STRICT_REG)
wiphy_update_regulatory(wiphy, reg_request->initiator);
+
+ return;
+
+out_free:
+ reg_free_request(reg_request);
}
/*
@@ -1735,7 +1913,7 @@ static void reg_process_pending_hints(void)
/* When last_request->processed becomes true this will be rescheduled */
if (lr && !lr->processed) {
- REG_DBG_PRINT("Pending regulatory request, waiting for it to be processed...\n");
+ reg_process_hint(lr);
return;
}
@@ -1845,6 +2023,22 @@ int regulatory_hint_user(const char *alpha2,
return 0;
}
+int regulatory_hint_indoor_user(void)
+{
+ struct regulatory_request *request;
+
+ request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL);
+ if (!request)
+ return -ENOMEM;
+
+ request->wiphy_idx = WIPHY_IDX_INVALID;
+ request->initiator = NL80211_REGDOM_SET_BY_USER;
+ request->user_reg_hint_type = NL80211_USER_REG_HINT_INDOOR;
+ queue_regulatory_request(request);
+
+ return 0;
+}
+
/* Driver hints */
int regulatory_hint(struct wiphy *wiphy, const char *alpha2)
{
@@ -2012,6 +2206,8 @@ static void restore_regulatory_settings(bool reset_user)
ASSERT_RTNL();
+ reg_is_indoor = false;
+
reset_regdomains(true, &world_regdom);
restore_alpha2(alpha2, reset_user);
@@ -2147,31 +2343,49 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
const struct ieee80211_reg_rule *reg_rule = NULL;
const struct ieee80211_freq_range *freq_range = NULL;
const struct ieee80211_power_rule *power_rule = NULL;
+ char bw[32], cac_time[32];
- pr_info(" (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp)\n");
+ pr_info(" (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp), (dfs_cac_time)\n");
for (i = 0; i < rd->n_reg_rules; i++) {
reg_rule = &rd->reg_rules[i];
freq_range = &reg_rule->freq_range;
power_rule = &reg_rule->power_rule;
+ if (reg_rule->flags & NL80211_RRF_AUTO_BW)
+ snprintf(bw, sizeof(bw), "%d KHz, %d KHz AUTO",
+ freq_range->max_bandwidth_khz,
+ reg_get_max_bandwidth(rd, reg_rule));
+ else
+ snprintf(bw, sizeof(bw), "%d KHz",
+ freq_range->max_bandwidth_khz);
+
+ if (reg_rule->flags & NL80211_RRF_DFS)
+ scnprintf(cac_time, sizeof(cac_time), "%u s",
+ reg_rule->dfs_cac_ms/1000);
+ else
+ scnprintf(cac_time, sizeof(cac_time), "N/A");
+
+
/*
* There may not be documentation for max antenna gain
* in certain regions
*/
if (power_rule->max_antenna_gain)
- pr_info(" (%d KHz - %d KHz @ %d KHz), (%d mBi, %d mBm)\n",
+ pr_info(" (%d KHz - %d KHz @ %s), (%d mBi, %d mBm), (%s)\n",
freq_range->start_freq_khz,
freq_range->end_freq_khz,
- freq_range->max_bandwidth_khz,
+ bw,
power_rule->max_antenna_gain,
- power_rule->max_eirp);
+ power_rule->max_eirp,
+ cac_time);
else
- pr_info(" (%d KHz - %d KHz @ %d KHz), (N/A, %d mBm)\n",
+ pr_info(" (%d KHz - %d KHz @ %s), (N/A, %d mBm), (%s)\n",
freq_range->start_freq_khz,
freq_range->end_freq_khz,
- freq_range->max_bandwidth_khz,
- power_rule->max_eirp);
+ bw,
+ power_rule->max_eirp,
+ cac_time);
}
}
@@ -2244,9 +2458,6 @@ static int reg_set_rd_user(const struct ieee80211_regdomain *rd,
{
const struct ieee80211_regdomain *intersected_rd = NULL;
- if (is_world_regdom(rd->alpha2))
- return -EINVAL;
-
if (!regdom_changes(rd->alpha2))
return -EALREADY;
@@ -2294,7 +2505,8 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd,
request_wiphy = wiphy_idx_to_wiphy(driver_request->wiphy_idx);
if (!request_wiphy) {
- schedule_delayed_work(&reg_timeout, 0);
+ queue_delayed_work(system_power_efficient_wq,
+ &reg_timeout, 0);
return -ENODEV;
}
@@ -2354,7 +2566,8 @@ static int reg_set_rd_country_ie(const struct ieee80211_regdomain *rd,
request_wiphy = wiphy_idx_to_wiphy(country_ie_request->wiphy_idx);
if (!request_wiphy) {
- schedule_delayed_work(&reg_timeout, 0);
+ queue_delayed_work(system_power_efficient_wq,
+ &reg_timeout, 0);
return -ENODEV;
}
@@ -2433,26 +2646,6 @@ int set_regdom(const struct ieee80211_regdomain *rd)
return 0;
}
-int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
- struct regulatory_request *lr;
- u8 alpha2[2];
- bool add = false;
-
- rcu_read_lock();
- lr = get_last_request();
- if (lr && !lr->processed) {
- memcpy(alpha2, lr->alpha2, 2);
- add = true;
- }
- rcu_read_unlock();
-
- if (add)
- return add_uevent_var(env, "COUNTRY=%c%c",
- alpha2[0], alpha2[1]);
- return 0;
-}
-
void wiphy_regulatory_register(struct wiphy *wiphy)
{
struct regulatory_request *lr;
@@ -2475,7 +2668,7 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy)
reg_num_devs_support_basehint--;
rcu_free_regdom(get_wiphy_regdom(wiphy));
- rcu_assign_pointer(wiphy->regd, NULL);
+ RCU_INIT_POINTER(wiphy->regd, NULL);
if (lr)
request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx);
@@ -2495,6 +2688,40 @@ static void reg_timeout_work(struct work_struct *work)
rtnl_unlock();
}
+/*
+ * See http://www.fcc.gov/document/5-ghz-unlicensed-spectrum-unii, for
+ * UNII band definitions
+ */
+int cfg80211_get_unii(int freq)
+{
+ /* UNII-1 */
+ if (freq >= 5150 && freq <= 5250)
+ return 0;
+
+ /* UNII-2A */
+ if (freq > 5250 && freq <= 5350)
+ return 1;
+
+ /* UNII-2B */
+ if (freq > 5350 && freq <= 5470)
+ return 2;
+
+ /* UNII-2C */
+ if (freq > 5470 && freq <= 5725)
+ return 3;
+
+ /* UNII-3 */
+ if (freq > 5725 && freq <= 5825)
+ return 4;
+
+ return -EINVAL;
+}
+
+bool regulatory_indoor_allowed(void)
+{
+ return reg_is_indoor;
+}
+
int __init regulatory_init(void)
{
int err = 0;
@@ -2503,8 +2730,6 @@ int __init regulatory_init(void)
if (IS_ERR(reg_pdev))
return PTR_ERR(reg_pdev);
- reg_pdev->dev.type = &reg_device_type;
-
spin_lock_init(&reg_requests_lock);
spin_lock_init(&reg_pending_beacons_lock);
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index 02bd8f4b092..5e48031ccb9 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -25,8 +25,8 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy);
int regulatory_hint_user(const char *alpha2,
enum nl80211_user_reg_hint_type user_reg_hint_type);
+int regulatory_hint_indoor_user(void);
-int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env);
void wiphy_regulatory_register(struct wiphy *wiphy);
void wiphy_regulatory_deregister(struct wiphy *wiphy);
@@ -34,6 +34,8 @@ int __init regulatory_init(void);
void regulatory_exit(void);
int set_regdom(const struct ieee80211_regdomain *rd);
+unsigned int reg_get_max_bandwidth(const struct ieee80211_regdomain *rd,
+ const struct ieee80211_reg_rule *rule);
bool reg_last_request_cell_base(void);
@@ -103,4 +105,21 @@ void regulatory_hint_country_ie(struct wiphy *wiphy,
*/
void regulatory_hint_disconnect(void);
+/**
+ * cfg80211_get_unii - get the U-NII band for the frequency
+ * @freq: the frequency for which we want to get the UNII band.
+
+ * Get a value specifying the U-NII band frequency belongs to.
+ * U-NII bands are defined by the FCC in C.F.R 47 part 15.
+ *
+ * Returns -EINVAL if freq is invalid, 0 for UNII-1, 1 for UNII-2A,
+ * 2 for UNII-2B, 3 for UNII-2C and 4 for UNII-3.
+ */
+int cfg80211_get_unii(int freq);
+
+/**
+ * regulatory_indoor_allowed - is indoor operation allowed
+ */
+bool regulatory_indoor_allowed(void);
+
#endif /* __NET_WIRELESS_REG_H */
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index d1ed4aebbbb..0798c62e608 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -81,10 +81,10 @@ static void bss_free(struct cfg80211_internal_bss *bss)
kfree(bss);
}
-static inline void bss_ref_get(struct cfg80211_registered_device *dev,
+static inline void bss_ref_get(struct cfg80211_registered_device *rdev,
struct cfg80211_internal_bss *bss)
{
- lockdep_assert_held(&dev->bss_lock);
+ lockdep_assert_held(&rdev->bss_lock);
bss->refcount++;
if (bss->pub.hidden_beacon_bss) {
@@ -95,10 +95,10 @@ static inline void bss_ref_get(struct cfg80211_registered_device *dev,
}
}
-static inline void bss_ref_put(struct cfg80211_registered_device *dev,
+static inline void bss_ref_put(struct cfg80211_registered_device *rdev,
struct cfg80211_internal_bss *bss)
{
- lockdep_assert_held(&dev->bss_lock);
+ lockdep_assert_held(&rdev->bss_lock);
if (bss->pub.hidden_beacon_bss) {
struct cfg80211_internal_bss *hbss;
@@ -114,10 +114,10 @@ static inline void bss_ref_put(struct cfg80211_registered_device *dev,
bss_free(bss);
}
-static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *dev,
+static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *rdev,
struct cfg80211_internal_bss *bss)
{
- lockdep_assert_held(&dev->bss_lock);
+ lockdep_assert_held(&rdev->bss_lock);
if (!list_empty(&bss->hidden_list)) {
/*
@@ -134,31 +134,31 @@ static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *dev,
}
list_del_init(&bss->list);
- rb_erase(&bss->rbn, &dev->bss_tree);
- bss_ref_put(dev, bss);
+ rb_erase(&bss->rbn, &rdev->bss_tree);
+ bss_ref_put(rdev, bss);
return true;
}
-static void __cfg80211_bss_expire(struct cfg80211_registered_device *dev,
+static void __cfg80211_bss_expire(struct cfg80211_registered_device *rdev,
unsigned long expire_time)
{
struct cfg80211_internal_bss *bss, *tmp;
bool expired = false;
- lockdep_assert_held(&dev->bss_lock);
+ lockdep_assert_held(&rdev->bss_lock);
- list_for_each_entry_safe(bss, tmp, &dev->bss_list, list) {
+ list_for_each_entry_safe(bss, tmp, &rdev->bss_list, list) {
if (atomic_read(&bss->hold))
continue;
if (!time_after(expire_time, bss->ts))
continue;
- if (__cfg80211_unlink_bss(dev, bss))
+ if (__cfg80211_unlink_bss(rdev, bss))
expired = true;
}
if (expired)
- dev->bss_generation++;
+ rdev->bss_generation++;
}
void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
@@ -238,11 +238,11 @@ void __cfg80211_scan_done(struct work_struct *wk)
void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
{
trace_cfg80211_scan_done(request, aborted);
- WARN_ON(request != wiphy_to_dev(request->wiphy)->scan_req);
+ WARN_ON(request != wiphy_to_rdev(request->wiphy)->scan_req);
request->aborted = aborted;
request->notified = true;
- queue_work(cfg80211_wq, &wiphy_to_dev(request->wiphy)->scan_done_wk);
+ queue_work(cfg80211_wq, &wiphy_to_rdev(request->wiphy)->scan_done_wk);
}
EXPORT_SYMBOL(cfg80211_scan_done);
@@ -278,20 +278,28 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy)
{
trace_cfg80211_sched_scan_results(wiphy);
/* ignore if we're not scanning */
- if (wiphy_to_dev(wiphy)->sched_scan_req)
+ if (wiphy_to_rdev(wiphy)->sched_scan_req)
queue_work(cfg80211_wq,
- &wiphy_to_dev(wiphy)->sched_scan_results_wk);
+ &wiphy_to_rdev(wiphy)->sched_scan_results_wk);
}
EXPORT_SYMBOL(cfg80211_sched_scan_results);
-void cfg80211_sched_scan_stopped(struct wiphy *wiphy)
+void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+
+ ASSERT_RTNL();
trace_cfg80211_sched_scan_stopped(wiphy);
- rtnl_lock();
__cfg80211_stop_sched_scan(rdev, true);
+}
+EXPORT_SYMBOL(cfg80211_sched_scan_stopped_rtnl);
+
+void cfg80211_sched_scan_stopped(struct wiphy *wiphy)
+{
+ rtnl_lock();
+ cfg80211_sched_scan_stopped_rtnl(wiphy);
rtnl_unlock();
}
EXPORT_SYMBOL(cfg80211_sched_scan_stopped);
@@ -322,21 +330,21 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
return 0;
}
-void cfg80211_bss_age(struct cfg80211_registered_device *dev,
+void cfg80211_bss_age(struct cfg80211_registered_device *rdev,
unsigned long age_secs)
{
struct cfg80211_internal_bss *bss;
unsigned long age_jiffies = msecs_to_jiffies(age_secs * MSEC_PER_SEC);
- spin_lock_bh(&dev->bss_lock);
- list_for_each_entry(bss, &dev->bss_list, list)
+ spin_lock_bh(&rdev->bss_lock);
+ list_for_each_entry(bss, &rdev->bss_list, list)
bss->ts -= age_jiffies;
- spin_unlock_bh(&dev->bss_lock);
+ spin_unlock_bh(&rdev->bss_lock);
}
-void cfg80211_bss_expire(struct cfg80211_registered_device *dev)
+void cfg80211_bss_expire(struct cfg80211_registered_device *rdev)
{
- __cfg80211_bss_expire(dev, jiffies - IEEE80211_SCAN_RESULT_EXPIRE);
+ __cfg80211_bss_expire(rdev, jiffies - IEEE80211_SCAN_RESULT_EXPIRE);
}
const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len)
@@ -526,32 +534,34 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
const u8 *ssid, size_t ssid_len,
u16 capa_mask, u16 capa_val)
{
- struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct cfg80211_internal_bss *bss, *res = NULL;
unsigned long now = jiffies;
trace_cfg80211_get_bss(wiphy, channel, bssid, ssid, ssid_len, capa_mask,
capa_val);
- spin_lock_bh(&dev->bss_lock);
+ spin_lock_bh(&rdev->bss_lock);
- list_for_each_entry(bss, &dev->bss_list, list) {
+ list_for_each_entry(bss, &rdev->bss_list, list) {
if ((bss->pub.capability & capa_mask) != capa_val)
continue;
if (channel && bss->pub.channel != channel)
continue;
+ if (!is_valid_ether_addr(bss->pub.bssid))
+ continue;
/* Don't get expired BSS structs */
if (time_after(now, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE) &&
!atomic_read(&bss->hold))
continue;
if (is_bss(&bss->pub, bssid, ssid, ssid_len)) {
res = bss;
- bss_ref_get(dev, res);
+ bss_ref_get(rdev, res);
break;
}
}
- spin_unlock_bh(&dev->bss_lock);
+ spin_unlock_bh(&rdev->bss_lock);
if (!res)
return NULL;
trace_cfg80211_return_bss(&res->pub);
@@ -559,10 +569,10 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
}
EXPORT_SYMBOL(cfg80211_get_bss);
-static void rb_insert_bss(struct cfg80211_registered_device *dev,
+static void rb_insert_bss(struct cfg80211_registered_device *rdev,
struct cfg80211_internal_bss *bss)
{
- struct rb_node **p = &dev->bss_tree.rb_node;
+ struct rb_node **p = &rdev->bss_tree.rb_node;
struct rb_node *parent = NULL;
struct cfg80211_internal_bss *tbss;
int cmp;
@@ -585,15 +595,15 @@ static void rb_insert_bss(struct cfg80211_registered_device *dev,
}
rb_link_node(&bss->rbn, parent, p);
- rb_insert_color(&bss->rbn, &dev->bss_tree);
+ rb_insert_color(&bss->rbn, &rdev->bss_tree);
}
static struct cfg80211_internal_bss *
-rb_find_bss(struct cfg80211_registered_device *dev,
+rb_find_bss(struct cfg80211_registered_device *rdev,
struct cfg80211_internal_bss *res,
enum bss_compare_mode mode)
{
- struct rb_node *n = dev->bss_tree.rb_node;
+ struct rb_node *n = rdev->bss_tree.rb_node;
struct cfg80211_internal_bss *bss;
int r;
@@ -612,7 +622,7 @@ rb_find_bss(struct cfg80211_registered_device *dev,
return NULL;
}
-static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev,
+static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev,
struct cfg80211_internal_bss *new)
{
const struct cfg80211_bss_ies *ies;
@@ -642,7 +652,7 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev,
/* This is the bad part ... */
- list_for_each_entry(bss, &dev->bss_list, list) {
+ list_for_each_entry(bss, &rdev->bss_list, list) {
if (!ether_addr_equal(bss->pub.bssid, new->pub.bssid))
continue;
if (bss->pub.channel != new->pub.channel)
@@ -659,9 +669,6 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev,
continue;
if (ssidlen && ie[1] != ssidlen)
continue;
- /* that would be odd ... */
- if (bss->pub.beacon_ies)
- continue;
if (WARN_ON_ONCE(bss->pub.hidden_beacon_bss))
continue;
if (WARN_ON_ONCE(!list_empty(&bss->hidden_list)))
@@ -679,8 +686,9 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev,
/* Returned bss is reference counted and must be cleaned up appropriately. */
static struct cfg80211_internal_bss *
-cfg80211_bss_update(struct cfg80211_registered_device *dev,
- struct cfg80211_internal_bss *tmp)
+cfg80211_bss_update(struct cfg80211_registered_device *rdev,
+ struct cfg80211_internal_bss *tmp,
+ bool signal_valid)
{
struct cfg80211_internal_bss *found = NULL;
@@ -689,14 +697,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
tmp->ts = jiffies;
- spin_lock_bh(&dev->bss_lock);
+ spin_lock_bh(&rdev->bss_lock);
if (WARN_ON(!rcu_access_pointer(tmp->pub.ies))) {
- spin_unlock_bh(&dev->bss_lock);
+ spin_unlock_bh(&rdev->bss_lock);
return NULL;
}
- found = rb_find_bss(dev, tmp, BSS_CMP_REGULAR);
+ found = rb_find_bss(rdev, tmp, BSS_CMP_REGULAR);
if (found) {
/* Update IEs */
@@ -765,7 +773,12 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
}
found->pub.beacon_interval = tmp->pub.beacon_interval;
- found->pub.signal = tmp->pub.signal;
+ /*
+ * don't update the signal if beacon was heard on
+ * adjacent channel.
+ */
+ if (signal_valid)
+ found->pub.signal = tmp->pub.signal;
found->pub.capability = tmp->pub.capability;
found->ts = tmp->ts;
} else {
@@ -778,7 +791,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
* is allocated on the stack since it's not needed in the
* more common case of an update
*/
- new = kzalloc(sizeof(*new) + dev->wiphy.bss_priv_size,
+ new = kzalloc(sizeof(*new) + rdev->wiphy.bss_priv_size,
GFP_ATOMIC);
if (!new) {
ies = (void *)rcu_dereference(tmp->pub.beacon_ies);
@@ -794,9 +807,9 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
INIT_LIST_HEAD(&new->hidden_list);
if (rcu_access_pointer(tmp->pub.proberesp_ies)) {
- hidden = rb_find_bss(dev, tmp, BSS_CMP_HIDE_ZLEN);
+ hidden = rb_find_bss(rdev, tmp, BSS_CMP_HIDE_ZLEN);
if (!hidden)
- hidden = rb_find_bss(dev, tmp,
+ hidden = rb_find_bss(rdev, tmp,
BSS_CMP_HIDE_NUL);
if (hidden) {
new->pub.hidden_beacon_bss = &hidden->pub;
@@ -813,24 +826,24 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
* expensive search for any probe responses that should
* be grouped with this beacon for updates ...
*/
- if (!cfg80211_combine_bsses(dev, new)) {
+ if (!cfg80211_combine_bsses(rdev, new)) {
kfree(new);
goto drop;
}
}
- list_add_tail(&new->list, &dev->bss_list);
- rb_insert_bss(dev, new);
+ list_add_tail(&new->list, &rdev->bss_list);
+ rb_insert_bss(rdev, new);
found = new;
}
- dev->bss_generation++;
- bss_ref_get(dev, found);
- spin_unlock_bh(&dev->bss_lock);
+ rdev->bss_generation++;
+ bss_ref_get(rdev, found);
+ spin_unlock_bh(&rdev->bss_lock);
return found;
drop:
- spin_unlock_bh(&dev->bss_lock);
+ spin_unlock_bh(&rdev->bss_lock);
return NULL;
}
@@ -869,14 +882,16 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen,
/* Returned bss is reference counted and must be cleaned up appropriately. */
struct cfg80211_bss*
cfg80211_inform_bss_width(struct wiphy *wiphy,
- struct ieee80211_channel *channel,
+ struct ieee80211_channel *rx_channel,
enum nl80211_bss_scan_width scan_width,
const u8 *bssid, u64 tsf, u16 capability,
u16 beacon_interval, const u8 *ie, size_t ielen,
s32 signal, gfp_t gfp)
{
struct cfg80211_bss_ies *ies;
+ struct ieee80211_channel *channel;
struct cfg80211_internal_bss tmp = {}, *res;
+ bool signal_valid;
if (WARN_ON(!wiphy))
return NULL;
@@ -885,7 +900,7 @@ cfg80211_inform_bss_width(struct wiphy *wiphy,
(signal < 0 || signal > 100)))
return NULL;
- channel = cfg80211_get_bss_channel(wiphy, ie, ielen, channel);
+ channel = cfg80211_get_bss_channel(wiphy, ie, ielen, rx_channel);
if (!channel)
return NULL;
@@ -913,7 +928,9 @@ cfg80211_inform_bss_width(struct wiphy *wiphy,
rcu_assign_pointer(tmp.pub.beacon_ies, ies);
rcu_assign_pointer(tmp.pub.ies, ies);
- res = cfg80211_bss_update(wiphy_to_dev(wiphy), &tmp);
+ signal_valid = abs(rx_channel->center_freq - channel->center_freq) <=
+ wiphy->max_adj_channel_rssi_comp;
+ res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid);
if (!res)
return NULL;
@@ -929,20 +946,22 @@ EXPORT_SYMBOL(cfg80211_inform_bss_width);
/* Returned bss is reference counted and must be cleaned up appropriately. */
struct cfg80211_bss *
cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
- struct ieee80211_channel *channel,
+ struct ieee80211_channel *rx_channel,
enum nl80211_bss_scan_width scan_width,
struct ieee80211_mgmt *mgmt, size_t len,
s32 signal, gfp_t gfp)
{
struct cfg80211_internal_bss tmp = {}, *res;
struct cfg80211_bss_ies *ies;
+ struct ieee80211_channel *channel;
+ bool signal_valid;
size_t ielen = len - offsetof(struct ieee80211_mgmt,
u.probe_resp.variable);
BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) !=
offsetof(struct ieee80211_mgmt, u.beacon.variable));
- trace_cfg80211_inform_bss_width_frame(wiphy, channel, scan_width, mgmt,
+ trace_cfg80211_inform_bss_width_frame(wiphy, rx_channel, scan_width, mgmt,
len, signal);
if (WARN_ON(!mgmt))
@@ -959,7 +978,7 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
return NULL;
channel = cfg80211_get_bss_channel(wiphy, mgmt->u.beacon.variable,
- ielen, channel);
+ ielen, rx_channel);
if (!channel)
return NULL;
@@ -983,7 +1002,9 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
- res = cfg80211_bss_update(wiphy_to_dev(wiphy), &tmp);
+ signal_valid = abs(rx_channel->center_freq - channel->center_freq) <=
+ wiphy->max_adj_channel_rssi_comp;
+ res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid);
if (!res)
return NULL;
@@ -998,7 +1019,7 @@ EXPORT_SYMBOL(cfg80211_inform_bss_width_frame);
void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
{
- struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct cfg80211_internal_bss *bss;
if (!pub)
@@ -1006,15 +1027,15 @@ void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
bss = container_of(pub, struct cfg80211_internal_bss, pub);
- spin_lock_bh(&dev->bss_lock);
- bss_ref_get(dev, bss);
- spin_unlock_bh(&dev->bss_lock);
+ spin_lock_bh(&rdev->bss_lock);
+ bss_ref_get(rdev, bss);
+ spin_unlock_bh(&rdev->bss_lock);
}
EXPORT_SYMBOL(cfg80211_ref_bss);
void cfg80211_put_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
{
- struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct cfg80211_internal_bss *bss;
if (!pub)
@@ -1022,15 +1043,15 @@ void cfg80211_put_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
bss = container_of(pub, struct cfg80211_internal_bss, pub);
- spin_lock_bh(&dev->bss_lock);
- bss_ref_put(dev, bss);
- spin_unlock_bh(&dev->bss_lock);
+ spin_lock_bh(&rdev->bss_lock);
+ bss_ref_put(rdev, bss);
+ spin_unlock_bh(&rdev->bss_lock);
}
EXPORT_SYMBOL(cfg80211_put_bss);
void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
{
- struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct cfg80211_internal_bss *bss;
if (WARN_ON(!pub))
@@ -1038,12 +1059,12 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
bss = container_of(pub, struct cfg80211_internal_bss, pub);
- spin_lock_bh(&dev->bss_lock);
+ spin_lock_bh(&rdev->bss_lock);
if (!list_empty(&bss->list)) {
- if (__cfg80211_unlink_bss(dev, bss))
- dev->bss_generation++;
+ if (__cfg80211_unlink_bss(rdev, bss))
+ rdev->bss_generation++;
}
- spin_unlock_bh(&dev->bss_lock);
+ spin_unlock_bh(&rdev->bss_lock);
}
EXPORT_SYMBOL(cfg80211_unlink_bss);
@@ -1060,7 +1081,7 @@ cfg80211_get_dev_from_ifindex(struct net *net, int ifindex)
if (!dev)
return ERR_PTR(-ENODEV);
if (dev->ieee80211_ptr)
- rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy);
+ rdev = wiphy_to_rdev(dev->ieee80211_ptr->wiphy);
else
rdev = ERR_PTR(-ENODEV);
dev_put(dev);
@@ -1140,7 +1161,11 @@ int cfg80211_wext_siwscan(struct net_device *dev,
int k;
int wiphy_freq = wiphy->bands[band]->channels[j].center_freq;
for (k = 0; k < wreq->num_channels; k++) {
- int wext_freq = cfg80211_wext_freq(wiphy, &wreq->channel_list[k]);
+ struct iw_freq *freq =
+ &wreq->channel_list[k];
+ int wext_freq =
+ cfg80211_wext_freq(freq);
+
if (wext_freq == wiphy_freq)
goto wext_freq_found;
}
@@ -1452,7 +1477,7 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info,
}
-static int ieee80211_scan_results(struct cfg80211_registered_device *dev,
+static int ieee80211_scan_results(struct cfg80211_registered_device *rdev,
struct iw_request_info *info,
char *buf, size_t len)
{
@@ -1460,18 +1485,18 @@ static int ieee80211_scan_results(struct cfg80211_registered_device *dev,
char *end_buf = buf + len;
struct cfg80211_internal_bss *bss;
- spin_lock_bh(&dev->bss_lock);
- cfg80211_bss_expire(dev);
+ spin_lock_bh(&rdev->bss_lock);
+ cfg80211_bss_expire(rdev);
- list_for_each_entry(bss, &dev->bss_list, list) {
+ list_for_each_entry(bss, &rdev->bss_list, list) {
if (buf + len - current_ev <= IW_EV_ADDR_LEN) {
- spin_unlock_bh(&dev->bss_lock);
+ spin_unlock_bh(&rdev->bss_lock);
return -E2BIG;
}
- current_ev = ieee80211_bss(&dev->wiphy, info, bss,
+ current_ev = ieee80211_bss(&rdev->wiphy, info, bss,
current_ev, end_buf);
}
- spin_unlock_bh(&dev->bss_lock);
+ spin_unlock_bh(&rdev->bss_lock);
return current_ev - buf;
}
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index f04d4c32e96..8bbeeb30221 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -59,12 +59,11 @@ static void cfg80211_sme_free(struct wireless_dev *wdev)
static int cfg80211_conn_scan(struct wireless_dev *wdev)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_scan_request *request;
int n_channels, err;
ASSERT_RTNL();
- ASSERT_RDEV_LOCK(rdev);
ASSERT_WDEV_LOCK(wdev);
if (rdev->scan_req || rdev->scan_msg)
@@ -131,7 +130,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
static int cfg80211_conn_do_work(struct wireless_dev *wdev)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_connect_params *params;
struct cfg80211_assoc_request req = {};
int err;
@@ -150,7 +149,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
case CFG80211_CONN_SCAN_AGAIN:
return cfg80211_conn_scan(wdev);
case CFG80211_CONN_AUTHENTICATE_NEXT:
- BUG_ON(!rdev->ops->auth);
+ if (WARN_ON(!rdev->ops->auth))
+ return -EOPNOTSUPP;
wdev->conn->state = CFG80211_CONN_AUTHENTICATING;
return cfg80211_mlme_auth(rdev, wdev->netdev,
params->channel, params->auth_type,
@@ -162,7 +162,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
case CFG80211_CONN_AUTH_FAILED:
return -ENOTCONN;
case CFG80211_CONN_ASSOCIATE_NEXT:
- BUG_ON(!rdev->ops->assoc);
+ if (WARN_ON(!rdev->ops->assoc))
+ return -EOPNOTSUPP;
wdev->conn->state = CFG80211_CONN_ASSOCIATING;
if (wdev->conn->prev_bssid_valid)
req.prev_bssid = wdev->conn->prev_bssid;
@@ -235,7 +236,6 @@ void cfg80211_conn_work(struct work_struct *work)
NULL, 0, NULL, 0,
WLAN_STATUS_UNSPECIFIED_FAILURE,
false, NULL);
- cfg80211_sme_free(wdev);
}
wdev_unlock(wdev);
}
@@ -246,7 +246,7 @@ void cfg80211_conn_work(struct work_struct *work)
/* Returned bss is reference counted and must be cleaned up appropriately. */
static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_bss *bss;
u16 capa = WLAN_CAPABILITY_ESS;
@@ -276,7 +276,7 @@ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev)
static void __cfg80211_sme_scan_done(struct net_device *dev)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_bss *bss;
ASSERT_WDEV_LOCK(wdev);
@@ -307,7 +307,7 @@ void cfg80211_sme_scan_done(struct net_device *dev)
void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len)
{
struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
u16 status_code = le16_to_cpu(mgmt->u.auth.status_code);
@@ -353,7 +353,7 @@ void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len)
bool cfg80211_sme_rx_assoc_resp(struct wireless_dev *wdev, u16 status)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
if (!wdev->conn)
return false;
@@ -387,7 +387,7 @@ void cfg80211_sme_deauth(struct wireless_dev *wdev)
void cfg80211_sme_auth_timeout(struct wireless_dev *wdev)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
if (!wdev->conn)
return;
@@ -398,7 +398,7 @@ void cfg80211_sme_auth_timeout(struct wireless_dev *wdev)
void cfg80211_sme_disassoc(struct wireless_dev *wdev)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
if (!wdev->conn)
return;
@@ -409,7 +409,7 @@ void cfg80211_sme_disassoc(struct wireless_dev *wdev)
void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
if (!wdev->conn)
return;
@@ -422,7 +422,7 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
struct cfg80211_connect_params *connect,
const u8 *prev_bssid)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_bss *bss;
int err;
@@ -469,7 +469,7 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
}
wdev->conn->params.ssid = wdev->ssid;
- wdev->conn->params.ssid_len = connect->ssid_len;
+ wdev->conn->params.ssid_len = wdev->ssid_len;
/* see if we have the bss already */
bss = cfg80211_get_conn_bss(wdev);
@@ -481,7 +481,6 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
/* we're good if we have a matching bss struct */
if (bss) {
- wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
err = cfg80211_conn_do_work(wdev);
cfg80211_put_bss(wdev->wiphy, bss);
} else {
@@ -507,7 +506,7 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
static int cfg80211_sme_disconnect(struct wireless_dev *wdev, u16 reason)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
int err;
if (!wdev->conn)
@@ -595,7 +594,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
return;
}
- nl80211_send_connect_result(wiphy_to_dev(wdev->wiphy), dev,
+ nl80211_send_connect_result(wiphy_to_rdev(wdev->wiphy), dev,
bssid, req_ie, req_ie_len,
resp_ie, resp_ie_len,
status, GFP_KERNEL);
@@ -626,7 +625,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
#endif
if (!bss && (status == WLAN_STATUS_SUCCESS)) {
- WARN_ON_ONCE(!wiphy_to_dev(wdev->wiphy)->ops->connect);
+ WARN_ON_ONCE(!wiphy_to_rdev(wdev->wiphy)->ops->connect);
bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid,
wdev->ssid, wdev->ssid_len,
WLAN_CAPABILITY_ESS,
@@ -649,6 +648,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
cfg80211_unhold_bss(bss_from_pub(bss));
cfg80211_put_bss(wdev->wiphy, bss);
}
+ cfg80211_sme_free(wdev);
return;
}
@@ -688,7 +688,7 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
u16 status, gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_event *ev;
unsigned long flags;
@@ -743,7 +743,8 @@ void __cfg80211_roamed(struct wireless_dev *wdev,
cfg80211_hold_bss(bss_from_pub(bss));
wdev->current_bss = bss_from_pub(bss);
- nl80211_send_roamed(wiphy_to_dev(wdev->wiphy), wdev->netdev, bss->bssid,
+ nl80211_send_roamed(wiphy_to_rdev(wdev->wiphy),
+ wdev->netdev, bss->bssid,
req_ie, req_ie_len, resp_ie, resp_ie_len,
GFP_KERNEL);
@@ -802,7 +803,7 @@ void cfg80211_roamed_bss(struct net_device *dev,
size_t resp_ie_len, gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_event *ev;
unsigned long flags;
@@ -835,7 +836,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
size_t ie_len, u16 reason, bool from_ap)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
int i;
#ifdef CONFIG_CFG80211_WEXT
union iwreq_data wrqu;
@@ -878,10 +879,10 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
}
void cfg80211_disconnected(struct net_device *dev, u16 reason,
- u8 *ie, size_t ie_len, gfp_t gfp)
+ const u8 *ie, size_t ie_len, gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_event *ev;
unsigned long flags;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index fbcc23edee5..7cc887f9da1 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1468,9 +1468,10 @@ TRACE_EVENT(rdev_sched_scan_start,
TRACE_EVENT(rdev_tdls_mgmt,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
u8 *peer, u8 action_code, u8 dialog_token,
- u16 status_code, const u8 *buf, size_t len),
+ u16 status_code, u32 peer_capability,
+ const u8 *buf, size_t len),
TP_ARGS(wiphy, netdev, peer, action_code, dialog_token, status_code,
- buf, len),
+ peer_capability, buf, len),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
@@ -1478,6 +1479,7 @@ TRACE_EVENT(rdev_tdls_mgmt,
__field(u8, action_code)
__field(u8, dialog_token)
__field(u16, status_code)
+ __field(u32, peer_capability)
__dynamic_array(u8, buf, len)
),
TP_fast_assign(
@@ -1487,13 +1489,15 @@ TRACE_EVENT(rdev_tdls_mgmt,
__entry->action_code = action_code;
__entry->dialog_token = dialog_token;
__entry->status_code = status_code;
+ __entry->peer_capability = peer_capability;
memcpy(__get_dynamic_array(buf), buf, len);
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", action_code: %u, "
- "dialog_token: %u, status_code: %u, buf: %#.2x ",
+ "dialog_token: %u, status_code: %u, peer_capability: %u buf: %#.2x ",
WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer),
__entry->action_code, __entry->dialog_token,
- __entry->status_code, ((u8 *)__get_dynamic_array(buf))[0])
+ __entry->status_code, __entry->peer_capability,
+ ((u8 *)__get_dynamic_array(buf))[0])
);
TRACE_EVENT(rdev_dump_survey,
@@ -1872,29 +1876,33 @@ TRACE_EVENT(rdev_channel_switch,
WIPHY_ENTRY
NETDEV_ENTRY
CHAN_DEF_ENTRY
- __field(u16, counter_offset_beacon)
- __field(u16, counter_offset_presp)
__field(bool, radar_required)
__field(bool, block_tx)
__field(u8, count)
+ __dynamic_array(u16, bcn_ofs, params->n_counter_offsets_beacon)
+ __dynamic_array(u16, pres_ofs, params->n_counter_offsets_presp)
),
TP_fast_assign(
WIPHY_ASSIGN;
NETDEV_ASSIGN;
CHAN_DEF_ASSIGN(&params->chandef);
- __entry->counter_offset_beacon = params->counter_offset_beacon;
- __entry->counter_offset_presp = params->counter_offset_presp;
__entry->radar_required = params->radar_required;
__entry->block_tx = params->block_tx;
__entry->count = params->count;
+ memcpy(__get_dynamic_array(bcn_ofs),
+ params->counter_offsets_beacon,
+ params->n_counter_offsets_beacon * sizeof(u16));
+
+ /* probe response offsets are optional */
+ if (params->n_counter_offsets_presp)
+ memcpy(__get_dynamic_array(pres_ofs),
+ params->counter_offsets_presp,
+ params->n_counter_offsets_presp * sizeof(u16));
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT
- ", block_tx: %d, count: %u, radar_required: %d"
- ", counter offsets (beacon/presp): %u/%u",
+ ", block_tx: %d, count: %u, radar_required: %d",
WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG,
- __entry->block_tx, __entry->count, __entry->radar_required,
- __entry->counter_offset_beacon,
- __entry->counter_offset_presp)
+ __entry->block_tx, __entry->count, __entry->radar_required)
);
TRACE_EVENT(rdev_set_qos_map,
@@ -1915,6 +1923,24 @@ TRACE_EVENT(rdev_set_qos_map,
WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->num_des)
);
+TRACE_EVENT(rdev_set_ap_chanwidth,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_chan_def *chandef),
+ TP_ARGS(wiphy, netdev, chandef),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ CHAN_DEF_ENTRY
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ CHAN_DEF_ASSIGN(chandef);
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT,
+ WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG)
+);
+
/*************************************************************
* cfg80211 exported functions traces *
*************************************************************/
@@ -2068,7 +2094,8 @@ TRACE_EVENT(cfg80211_michael_mic_failure,
MAC_ASSIGN(addr, addr);
__entry->key_type = key_type;
__entry->key_id = key_id;
- memcpy(__entry->tsc, tsc, 6);
+ if (tsc)
+ memcpy(__entry->tsc, tsc, 6);
),
TP_printk(NETDEV_PR_FMT ", " MAC_PR_FMT ", key type: %d, key id: %d, tsc: %pm",
NETDEV_PR_ARG, MAC_PR_ARG(addr), __entry->key_type,
@@ -2189,18 +2216,21 @@ TRACE_EVENT(cfg80211_cqm_rssi_notify,
);
TRACE_EVENT(cfg80211_reg_can_beacon,
- TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef),
- TP_ARGS(wiphy, chandef),
+ TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef,
+ enum nl80211_iftype iftype),
+ TP_ARGS(wiphy, chandef, iftype),
TP_STRUCT__entry(
WIPHY_ENTRY
CHAN_DEF_ENTRY
+ __field(enum nl80211_iftype, iftype)
),
TP_fast_assign(
WIPHY_ASSIGN;
CHAN_DEF_ASSIGN(chandef);
+ __entry->iftype = iftype;
),
- TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT,
- WIPHY_PR_ARG, CHAN_DEF_PR_ARG)
+ TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d",
+ WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->iftype)
);
TRACE_EVENT(cfg80211_chandef_dfs_required,
@@ -2278,11 +2308,6 @@ DECLARE_EVENT_CLASS(cfg80211_rx_evt,
TP_printk(NETDEV_PR_FMT ", " MAC_PR_FMT, NETDEV_PR_ARG, MAC_PR_ARG(addr))
);
-DEFINE_EVENT(cfg80211_rx_evt, cfg80211_ibss_joined,
- TP_PROTO(struct net_device *netdev, const u8 *addr),
- TP_ARGS(netdev, addr)
-);
-
DEFINE_EVENT(cfg80211_rx_evt, cfg80211_rx_spurious_frame,
TP_PROTO(struct net_device *netdev, const u8 *addr),
TP_ARGS(netdev, addr)
@@ -2293,6 +2318,24 @@ DEFINE_EVENT(cfg80211_rx_evt, cfg80211_rx_unexpected_4addr_frame,
TP_ARGS(netdev, addr)
);
+TRACE_EVENT(cfg80211_ibss_joined,
+ TP_PROTO(struct net_device *netdev, const u8 *bssid,
+ struct ieee80211_channel *channel),
+ TP_ARGS(netdev, bssid, channel),
+ TP_STRUCT__entry(
+ NETDEV_ENTRY
+ MAC_ENTRY(bssid)
+ CHAN_ENTRY
+ ),
+ TP_fast_assign(
+ NETDEV_ASSIGN;
+ MAC_ASSIGN(bssid, bssid);
+ CHAN_ASSIGN(channel);
+ ),
+ TP_printk(NETDEV_PR_FMT ", bssid: " MAC_PR_FMT ", " CHAN_PR_FMT,
+ NETDEV_PR_ARG, MAC_PR_ARG(bssid), CHAN_PR_ARG)
+);
+
TRACE_EVENT(cfg80211_probe_status,
TP_PROTO(struct net_device *netdev, const u8 *addr, u64 cookie,
bool acked),
@@ -2598,6 +2641,21 @@ TRACE_EVENT(cfg80211_ft_event,
WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(target_ap))
);
+TRACE_EVENT(cfg80211_stop_iface,
+ TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
+ TP_ARGS(wiphy, wdev),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ WDEV_ENTRY
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ WDEV_ASSIGN;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT,
+ WIPHY_PR_ARG, WDEV_PR_ARG)
+);
+
#endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/wireless/util.c b/net/wireless/util.c
index d39c37104ae..728f1c0dc70 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -11,6 +11,7 @@
#include <net/ip.h>
#include <net/dsfield.h>
#include <linux/if_vlan.h>
+#include <linux/mpls.h>
#include "core.h"
#include "rdev-ops.h"
@@ -475,7 +476,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
EXPORT_SYMBOL(ieee80211_data_to_8023);
int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
- enum nl80211_iftype iftype, u8 *bssid, bool qos)
+ enum nl80211_iftype iftype,
+ const u8 *bssid, bool qos)
{
struct ieee80211_hdr hdr;
u16 hdrlen, ethertype;
@@ -717,6 +719,21 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb,
case htons(ETH_P_IPV6):
dscp = ipv6_get_dsfield(ipv6_hdr(skb)) & 0xfc;
break;
+ case htons(ETH_P_MPLS_UC):
+ case htons(ETH_P_MPLS_MC): {
+ struct mpls_label mpls_tmp, *mpls;
+
+ mpls = skb_header_pointer(skb, sizeof(struct ethhdr),
+ sizeof(*mpls), &mpls_tmp);
+ if (!mpls)
+ return 0;
+
+ return (ntohl(mpls->entry) & MPLS_LS_TC_MASK)
+ >> MPLS_LS_TC_SHIFT;
+ }
+ case htons(ETH_P_80221):
+ /* 802.21 is always network control traffic */
+ return 7;
default:
return 0;
}
@@ -754,7 +771,7 @@ EXPORT_SYMBOL(ieee80211_bss_get_ie);
void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct net_device *dev = wdev->netdev;
int i;
@@ -820,7 +837,11 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev)
ev->dc.reason, true);
break;
case EVENT_IBSS_JOINED:
- __cfg80211_ibss_joined(wdev->netdev, ev->ij.bssid);
+ __cfg80211_ibss_joined(wdev->netdev, ev->ij.bssid,
+ ev->ij.channel);
+ break;
+ case EVENT_STOPPED:
+ __cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev);
break;
}
wdev_unlock(wdev);
@@ -837,7 +858,6 @@ void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev)
struct wireless_dev *wdev;
ASSERT_RTNL();
- ASSERT_RDEV_LOCK(rdev);
list_for_each_entry(wdev, &rdev->wdev_list, list)
cfg80211_process_wdev_events(wdev);
@@ -850,7 +870,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
int err;
enum nl80211_iftype otype = dev->ieee80211_ptr->iftype;
- ASSERT_RDEV_LOCK(rdev);
+ ASSERT_RTNL();
/* don't support changing VLANs, you just re-create them */
if (otype == NL80211_IFTYPE_AP_VLAN)
@@ -872,11 +892,6 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
return -EBUSY;
if (ntype != otype && netif_running(dev)) {
- err = cfg80211_can_change_interface(rdev, dev->ieee80211_ptr,
- ntype);
- if (err)
- return err;
-
dev->ieee80211_ptr->use_4addr = false;
dev->ieee80211_ptr->mesh_id_up_len = 0;
wdev_lock(dev->ieee80211_ptr);
@@ -885,7 +900,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
switch (otype) {
case NL80211_IFTYPE_AP:
- cfg80211_stop_ap(rdev, dev);
+ cfg80211_stop_ap(rdev, dev, true);
break;
case NL80211_IFTYPE_ADHOC:
cfg80211_leave_ibss(rdev, dev, false);
@@ -1252,6 +1267,120 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
return res;
}
+int cfg80211_iter_combinations(struct wiphy *wiphy,
+ const int num_different_channels,
+ const u8 radar_detect,
+ const int iftype_num[NUM_NL80211_IFTYPES],
+ void (*iter)(const struct ieee80211_iface_combination *c,
+ void *data),
+ void *data)
+{
+ const struct ieee80211_regdomain *regdom;
+ enum nl80211_dfs_regions region = 0;
+ int i, j, iftype;
+ int num_interfaces = 0;
+ u32 used_iftypes = 0;
+
+ if (radar_detect) {
+ rcu_read_lock();
+ regdom = rcu_dereference(cfg80211_regdomain);
+ if (regdom)
+ region = regdom->dfs_region;
+ rcu_read_unlock();
+ }
+
+ for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
+ num_interfaces += iftype_num[iftype];
+ if (iftype_num[iftype] > 0 &&
+ !(wiphy->software_iftypes & BIT(iftype)))
+ used_iftypes |= BIT(iftype);
+ }
+
+ for (i = 0; i < wiphy->n_iface_combinations; i++) {
+ const struct ieee80211_iface_combination *c;
+ struct ieee80211_iface_limit *limits;
+ u32 all_iftypes = 0;
+
+ c = &wiphy->iface_combinations[i];
+
+ if (num_interfaces > c->max_interfaces)
+ continue;
+ if (num_different_channels > c->num_different_channels)
+ continue;
+
+ limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits,
+ GFP_KERNEL);
+ if (!limits)
+ return -ENOMEM;
+
+ for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
+ if (wiphy->software_iftypes & BIT(iftype))
+ continue;
+ for (j = 0; j < c->n_limits; j++) {
+ all_iftypes |= limits[j].types;
+ if (!(limits[j].types & BIT(iftype)))
+ continue;
+ if (limits[j].max < iftype_num[iftype])
+ goto cont;
+ limits[j].max -= iftype_num[iftype];
+ }
+ }
+
+ if (radar_detect != (c->radar_detect_widths & radar_detect))
+ goto cont;
+
+ if (radar_detect && c->radar_detect_regions &&
+ !(c->radar_detect_regions & BIT(region)))
+ goto cont;
+
+ /* Finally check that all iftypes that we're currently
+ * using are actually part of this combination. If they
+ * aren't then we can't use this combination and have
+ * to continue to the next.
+ */
+ if ((all_iftypes & used_iftypes) != used_iftypes)
+ goto cont;
+
+ /* This combination covered all interface types and
+ * supported the requested numbers, so we're good.
+ */
+
+ (*iter)(c, data);
+ cont:
+ kfree(limits);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(cfg80211_iter_combinations);
+
+static void
+cfg80211_iter_sum_ifcombs(const struct ieee80211_iface_combination *c,
+ void *data)
+{
+ int *num = data;
+ (*num)++;
+}
+
+int cfg80211_check_combinations(struct wiphy *wiphy,
+ const int num_different_channels,
+ const u8 radar_detect,
+ const int iftype_num[NUM_NL80211_IFTYPES])
+{
+ int err, num = 0;
+
+ err = cfg80211_iter_combinations(wiphy, num_different_channels,
+ radar_detect, iftype_num,
+ cfg80211_iter_sum_ifcombs, &num);
+ if (err)
+ return err;
+ if (num == 0)
+ return -EBUSY;
+
+ return 0;
+}
+EXPORT_SYMBOL(cfg80211_check_combinations);
+
int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev,
enum nl80211_iftype iftype,
@@ -1260,7 +1389,6 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
u8 radar_detect)
{
struct wireless_dev *wdev_iter;
- u32 used_iftypes = BIT(iftype);
int num[NUM_NL80211_IFTYPES];
struct ieee80211_channel
*used_channels[CFG80211_MAX_NUM_DIFFERENT_CHANNELS];
@@ -1268,43 +1396,14 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
enum cfg80211_chan_mode chmode;
int num_different_channels = 0;
int total = 1;
- bool radar_required = false;
- int i, j;
+ int i;
ASSERT_RTNL();
if (WARN_ON(hweight32(radar_detect) > 1))
return -EINVAL;
- switch (iftype) {
- case NL80211_IFTYPE_ADHOC:
- case NL80211_IFTYPE_AP:
- case NL80211_IFTYPE_AP_VLAN:
- case NL80211_IFTYPE_MESH_POINT:
- case NL80211_IFTYPE_P2P_GO:
- case NL80211_IFTYPE_WDS:
- /* if the interface could potentially choose a DFS channel,
- * then mark DFS as required.
- */
- if (!chan) {
- if (chanmode != CHAN_MODE_UNDEFINED && radar_detect)
- radar_required = true;
- break;
- }
- radar_required = !!(chan->flags & IEEE80211_CHAN_RADAR);
- break;
- case NL80211_IFTYPE_P2P_CLIENT:
- case NL80211_IFTYPE_STATION:
- case NL80211_IFTYPE_P2P_DEVICE:
- case NL80211_IFTYPE_MONITOR:
- break;
- case NUM_NL80211_IFTYPES:
- case NL80211_IFTYPE_UNSPECIFIED:
- default:
- return -EINVAL;
- }
-
- if (radar_required && !radar_detect)
+ if (WARN_ON(iftype >= NUM_NL80211_IFTYPES))
return -EINVAL;
/* Always allow software iftypes */
@@ -1319,6 +1418,11 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
num[iftype] = 1;
+ /* TODO: We'll probably not need this anymore, since this
+ * should only be called with CHAN_MODE_UNDEFINED. There are
+ * still a couple of pending calls where other chanmodes are
+ * used, but we should get rid of them.
+ */
switch (chanmode) {
case CHAN_MODE_UNDEFINED:
break;
@@ -1356,7 +1460,7 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
*/
mutex_lock_nested(&wdev_iter->mtx, 1);
__acquire(wdev_iter->mtx);
- cfg80211_get_chan_state(wdev_iter, &ch, &chmode);
+ cfg80211_get_chan_state(wdev_iter, &ch, &chmode, &radar_detect);
wdev_unlock(wdev_iter);
switch (chmode) {
@@ -1382,65 +1486,13 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
num[wdev_iter->iftype]++;
total++;
- used_iftypes |= BIT(wdev_iter->iftype);
}
if (total == 1 && !radar_detect)
return 0;
- for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) {
- const struct ieee80211_iface_combination *c;
- struct ieee80211_iface_limit *limits;
- u32 all_iftypes = 0;
-
- c = &rdev->wiphy.iface_combinations[i];
-
- if (total > c->max_interfaces)
- continue;
- if (num_different_channels > c->num_different_channels)
- continue;
-
- limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits,
- GFP_KERNEL);
- if (!limits)
- return -ENOMEM;
-
- for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
- if (rdev->wiphy.software_iftypes & BIT(iftype))
- continue;
- for (j = 0; j < c->n_limits; j++) {
- all_iftypes |= limits[j].types;
- if (!(limits[j].types & BIT(iftype)))
- continue;
- if (limits[j].max < num[iftype])
- goto cont;
- limits[j].max -= num[iftype];
- }
- }
-
- if (radar_detect && !(c->radar_detect_widths & radar_detect))
- goto cont;
-
- /*
- * Finally check that all iftypes that we're currently
- * using are actually part of this combination. If they
- * aren't then we can't use this combination and have
- * to continue to the next.
- */
- if ((all_iftypes & used_iftypes) != used_iftypes)
- goto cont;
-
- /*
- * This combination covered all interface types and
- * supported the requested numbers, so we're good.
- */
- kfree(limits);
- return 0;
- cont:
- kfree(limits);
- }
-
- return -EBUSY;
+ return cfg80211_check_combinations(&rdev->wiphy, num_different_channels,
+ radar_detect, num);
}
int ieee80211_get_ratemask(struct ieee80211_supported_band *sband,
@@ -1494,6 +1546,24 @@ unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy)
}
EXPORT_SYMBOL(ieee80211_get_num_supported_channels);
+int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
+ struct station_info *sinfo)
+{
+ struct cfg80211_registered_device *rdev;
+ struct wireless_dev *wdev;
+
+ wdev = dev->ieee80211_ptr;
+ if (!wdev)
+ return -EOPNOTSUPP;
+
+ rdev = wiphy_to_rdev(wdev->wiphy);
+ if (!rdev->ops->get_station)
+ return -EOPNOTSUPP;
+
+ return rdev_get_station(rdev, dev, mac_addr, sinfo);
+}
+EXPORT_SYMBOL(cfg80211_get_station);
+
/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */
/* Ethernet-II snap header (RFC1042 for most EtherTypes) */
const unsigned char rfc1042_header[] __aligned(2) =
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 5661a54ac7e..11120bb1416 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -73,7 +73,7 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info,
struct vif_params vifparams;
enum nl80211_iftype type;
- rdev = wiphy_to_dev(wdev->wiphy);
+ rdev = wiphy_to_rdev(wdev->wiphy);
switch (*mode) {
case IW_MODE_INFRA:
@@ -253,12 +253,12 @@ EXPORT_SYMBOL_GPL(cfg80211_wext_giwrange);
/**
* cfg80211_wext_freq - get wext frequency for non-"auto"
- * @wiphy: the wiphy
+ * @dev: the net device
* @freq: the wext freq encoding
*
* Returns a frequency, or a negative error code, or 0 for auto.
*/
-int cfg80211_wext_freq(struct wiphy *wiphy, struct iw_freq *freq)
+int cfg80211_wext_freq(struct iw_freq *freq)
{
/*
* Parse frequency - return 0 for auto and
@@ -286,7 +286,7 @@ int cfg80211_wext_siwrts(struct net_device *dev,
struct iw_param *rts, char *extra)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
u32 orts = wdev->wiphy->rts_threshold;
int err;
@@ -324,7 +324,7 @@ int cfg80211_wext_siwfrag(struct net_device *dev,
struct iw_param *frag, char *extra)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
u32 ofrag = wdev->wiphy->frag_threshold;
int err;
@@ -364,7 +364,7 @@ static int cfg80211_wext_siwretry(struct net_device *dev,
struct iw_param *retry, char *extra)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
u32 changed = 0;
u8 olong = wdev->wiphy->retry_long;
u8 oshort = wdev->wiphy->retry_short;
@@ -587,7 +587,7 @@ static int cfg80211_wext_siwencode(struct net_device *dev,
struct iw_point *erq, char *keybuf)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
int idx, err;
bool remove = false;
struct key_params params;
@@ -647,7 +647,7 @@ static int cfg80211_wext_siwencodeext(struct net_device *dev,
struct iw_point *erq, char *extra)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct iw_encode_ext *ext = (struct iw_encode_ext *) extra;
const u8 *addr;
int idx;
@@ -775,7 +775,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev,
struct iw_freq *wextfreq, char *extra)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_chan_def chandef = {
.width = NL80211_CHAN_WIDTH_20_NOHT,
};
@@ -787,7 +787,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev,
case NL80211_IFTYPE_ADHOC:
return cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra);
case NL80211_IFTYPE_MONITOR:
- freq = cfg80211_wext_freq(wdev->wiphy, wextfreq);
+ freq = cfg80211_wext_freq(wextfreq);
if (freq < 0)
return freq;
if (freq == 0)
@@ -798,7 +798,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev,
return -EINVAL;
return cfg80211_set_monitor_channel(rdev, &chandef);
case NL80211_IFTYPE_MESH_POINT:
- freq = cfg80211_wext_freq(wdev->wiphy, wextfreq);
+ freq = cfg80211_wext_freq(wextfreq);
if (freq < 0)
return freq;
if (freq == 0)
@@ -818,7 +818,7 @@ static int cfg80211_wext_giwfreq(struct net_device *dev,
struct iw_freq *freq, char *extra)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_chan_def chandef;
int ret;
@@ -847,7 +847,7 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev,
union iwreq_data *data, char *extra)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
enum nl80211_tx_power_setting type;
int dbm = 0;
@@ -899,7 +899,7 @@ static int cfg80211_wext_giwtxpower(struct net_device *dev,
union iwreq_data *data, char *extra)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
int err, val;
if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM)
@@ -1119,7 +1119,7 @@ static int cfg80211_wext_siwpower(struct net_device *dev,
struct iw_param *wrq, char *extra)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
bool ps = wdev->ps;
int timeout = wdev->ps_timeout;
int err;
@@ -1177,7 +1177,7 @@ static int cfg80211_wds_wext_siwap(struct net_device *dev,
struct sockaddr *addr, char *extra)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
int err;
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_WDS))
@@ -1221,7 +1221,7 @@ static int cfg80211_wext_siwrate(struct net_device *dev,
struct iw_param *rate, char *extra)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_bitrate_mask mask;
u32 fixed, maxrate;
struct ieee80211_supported_band *sband;
@@ -1272,7 +1272,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev,
struct iw_param *rate, char *extra)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
/* we are under RTNL - globally locked - so can use a static struct */
static struct station_info sinfo;
u8 addr[ETH_ALEN];
@@ -1310,7 +1310,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev,
static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
/* we are under RTNL - globally locked - so can use static structs */
static struct iw_statistics wstats;
static struct station_info sinfo;
@@ -1449,7 +1449,7 @@ static int cfg80211_wext_siwpmksa(struct net_device *dev,
struct iw_point *data, char *extra)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_pmksa cfg_pmksa;
struct iw_pmksa *pmksa = (struct iw_pmksa *)extra;
diff --git a/net/wireless/wext-compat.h b/net/wireless/wext-compat.h
index 5d766b0118e..ebcacca2f73 100644
--- a/net/wireless/wext-compat.h
+++ b/net/wireless/wext-compat.h
@@ -50,7 +50,7 @@ int cfg80211_wext_siwgenie(struct net_device *dev,
struct iw_point *data, char *extra);
-int cfg80211_wext_freq(struct wiphy *wiphy, struct iw_freq *freq);
+int cfg80211_wext_freq(struct iw_freq *freq);
extern const struct iw_handler_def cfg80211_wext_handler;
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 14c9a2583ba..c7e5c8eb4f2 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -21,7 +21,7 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
const u8 *prev_bssid = NULL;
int err, i;
- ASSERT_RDEV_LOCK(rdev);
+ ASSERT_RTNL();
ASSERT_WDEV_LOCK(wdev);
if (!netif_running(wdev->netdev))
@@ -67,7 +67,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
struct iw_freq *wextfreq, char *extra)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct ieee80211_channel *chan = NULL;
int err, freq;
@@ -75,7 +75,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
return -EINVAL;
- freq = cfg80211_wext_freq(wdev->wiphy, wextfreq);
+ freq = cfg80211_wext_freq(wextfreq);
if (freq < 0)
return freq;
@@ -169,7 +169,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
struct iw_point *data, char *ssid)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
size_t len = data->length;
int err;
@@ -260,7 +260,7 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev,
struct sockaddr *ap_addr, char *extra)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
u8 *bssid = ap_addr->sa_data;
int err;
@@ -333,7 +333,7 @@ int cfg80211_wext_siwgenie(struct net_device *dev,
struct iw_point *data, char *extra)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
u8 *ie = extra;
int ie_len = data->length, err;
@@ -390,7 +390,7 @@ int cfg80211_wext_siwmlme(struct net_device *dev,
if (!wdev)
return -EOPNOTSUPP;
- rdev = wiphy_to_dev(wdev->wiphy);
+ rdev = wiphy_to_rdev(wdev->wiphy);
if (wdev->iftype != NL80211_IFTYPE_STATION)
return -EINVAL;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 6177479c7de..5ad4418ef09 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1064,7 +1064,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
x25_start_heartbeat(make);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk, skb->len);
+ sk->sk_data_ready(sk);
rc = 1;
sock_put(sk);
out:
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index d1b0dc79bb6..7ac50098a37 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -79,7 +79,7 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
skb_set_owner_r(skbn, sk);
skb_queue_tail(&sk->sk_receive_queue, skbn);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk, skbn->len);
+ sk->sk_data_ready(sk);
return 0;
}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 6c7ac016ce3..85d1d476461 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -16,6 +16,81 @@
static struct kmem_cache *secpath_cachep __read_mostly;
+static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
+static struct xfrm_input_afinfo __rcu *xfrm_input_afinfo[NPROTO];
+
+int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo)
+{
+ int err = 0;
+
+ if (unlikely(afinfo == NULL))
+ return -EINVAL;
+ if (unlikely(afinfo->family >= NPROTO))
+ return -EAFNOSUPPORT;
+ spin_lock_bh(&xfrm_input_afinfo_lock);
+ if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL))
+ err = -ENOBUFS;
+ else
+ rcu_assign_pointer(xfrm_input_afinfo[afinfo->family], afinfo);
+ spin_unlock_bh(&xfrm_input_afinfo_lock);
+ return err;
+}
+EXPORT_SYMBOL(xfrm_input_register_afinfo);
+
+int xfrm_input_unregister_afinfo(struct xfrm_input_afinfo *afinfo)
+{
+ int err = 0;
+
+ if (unlikely(afinfo == NULL))
+ return -EINVAL;
+ if (unlikely(afinfo->family >= NPROTO))
+ return -EAFNOSUPPORT;
+ spin_lock_bh(&xfrm_input_afinfo_lock);
+ if (likely(xfrm_input_afinfo[afinfo->family] != NULL)) {
+ if (unlikely(xfrm_input_afinfo[afinfo->family] != afinfo))
+ err = -EINVAL;
+ else
+ RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->family], NULL);
+ }
+ spin_unlock_bh(&xfrm_input_afinfo_lock);
+ synchronize_rcu();
+ return err;
+}
+EXPORT_SYMBOL(xfrm_input_unregister_afinfo);
+
+static struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family)
+{
+ struct xfrm_input_afinfo *afinfo;
+
+ if (unlikely(family >= NPROTO))
+ return NULL;
+ rcu_read_lock();
+ afinfo = rcu_dereference(xfrm_input_afinfo[family]);
+ if (unlikely(!afinfo))
+ rcu_read_unlock();
+ return afinfo;
+}
+
+static void xfrm_input_put_afinfo(struct xfrm_input_afinfo *afinfo)
+{
+ rcu_read_unlock();
+}
+
+static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol,
+ int err)
+{
+ int ret;
+ struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family);
+
+ if (!afinfo)
+ return -EAFNOSUPPORT;
+
+ ret = afinfo->callback(skb, protocol, err);
+ xfrm_input_put_afinfo(afinfo);
+
+ return ret;
+}
+
void __secpath_destroy(struct sec_path *sp)
{
int i;
@@ -108,7 +183,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
int err;
__be32 seq;
__be32 seq_hi;
- struct xfrm_state *x;
+ struct xfrm_state *x = NULL;
xfrm_address_t *daddr;
struct xfrm_mode *inner_mode;
unsigned int family;
@@ -120,9 +195,14 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
async = 1;
x = xfrm_input_state(skb);
seq = XFRM_SKB_CB(skb)->seq.input.low;
+ family = x->outer_mode->afinfo->family;
goto resume;
}
+ daddr = (xfrm_address_t *)(skb_network_header(skb) +
+ XFRM_SPI_SKB_CB(skb)->daddroff);
+ family = XFRM_SPI_SKB_CB(skb)->family;
+
/* Allocate new secpath or COW existing one. */
if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
struct sec_path *sp;
@@ -137,10 +217,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
skb->sp = sp;
}
- daddr = (xfrm_address_t *)(skb_network_header(skb) +
- XFRM_SPI_SKB_CB(skb)->daddroff);
- family = XFRM_SPI_SKB_CB(skb)->family;
-
seq = 0;
if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
@@ -162,6 +238,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
skb->sp->xvec[skb->sp->len++] = x;
+ if (xfrm_tunnel_check(skb, x, family)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
+ goto drop;
+ }
+
spin_lock(&x->lock);
if (unlikely(x->km.state == XFRM_STATE_ACQ)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
@@ -201,7 +282,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
if (nexthdr == -EINPROGRESS)
return 0;
-
resume:
spin_lock(&x->lock);
if (nexthdr <= 0) {
@@ -263,6 +343,10 @@ resume:
}
} while (!err);
+ err = xfrm_rcv_cb(skb, family, x->type->proto, 0);
+ if (err)
+ goto drop;
+
nf_reset(skb);
if (decaps) {
@@ -276,6 +360,7 @@ resume:
drop_unlock:
spin_unlock(&x->lock);
drop:
+ xfrm_rcv_cb(skb, family, x && x->type ? x->type->proto : nexthdr, -1);
kfree_skb(skb);
return 0;
}
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 3bb2cdc13b4..c51e8f7b865 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -199,6 +199,7 @@ int xfrm_output(struct sk_buff *skb)
return xfrm_output2(skb);
}
+EXPORT_SYMBOL_GPL(xfrm_output);
int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
{
@@ -213,6 +214,7 @@ int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
return -EAFNOSUPPORT;
return inner_mode->afinfo->extract_output(x, skb);
}
+EXPORT_SYMBOL_GPL(xfrm_inner_extract_output);
void xfrm_local_error(struct sk_buff *skb, int mtu)
{
@@ -233,7 +235,4 @@ void xfrm_local_error(struct sk_buff *skb, int mtu)
afinfo->local_error(skb, mtu);
xfrm_state_put_afinfo(afinfo);
}
-
-EXPORT_SYMBOL_GPL(xfrm_output);
-EXPORT_SYMBOL_GPL(xfrm_inner_extract_output);
EXPORT_SYMBOL_GPL(xfrm_local_error);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 1d5c7bf2993..0525d78ba32 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -39,8 +39,6 @@
#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
#define XFRM_MAX_QUEUE_LEN 100
-static struct dst_entry *xfrm_policy_sk_bundles;
-
static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
__read_mostly;
@@ -661,7 +659,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
hlist_add_head(&policy->bydst, chain);
xfrm_pol_hold(policy);
net->xfrm.policy_count[dir]++;
- atomic_inc(&flow_cache_genid);
+ atomic_inc(&net->xfrm.flow_cache_genid);
/* After previous checking, family can either be AF_INET or AF_INET6 */
if (policy->family == AF_INET)
@@ -771,7 +769,7 @@ EXPORT_SYMBOL(xfrm_policy_byid);
#ifdef CONFIG_SECURITY_NETWORK_XFRM
static inline int
-xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
+xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
{
int dir, err = 0;
@@ -785,10 +783,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
continue;
err = security_xfrm_policy_delete(pol->security);
if (err) {
- xfrm_audit_policy_delete(pol, 0,
- audit_info->loginuid,
- audit_info->sessionid,
- audit_info->secid);
+ xfrm_audit_policy_delete(pol, 0, task_valid);
return err;
}
}
@@ -802,9 +797,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
pol->security);
if (err) {
xfrm_audit_policy_delete(pol, 0,
- audit_info->loginuid,
- audit_info->sessionid,
- audit_info->secid);
+ task_valid);
return err;
}
}
@@ -814,19 +807,19 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
}
#else
static inline int
-xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
+xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
{
return 0;
}
#endif
-int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
+int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
{
int dir, err = 0, cnt = 0;
write_lock_bh(&net->xfrm.xfrm_policy_lock);
- err = xfrm_policy_flush_secctx_check(net, type, audit_info);
+ err = xfrm_policy_flush_secctx_check(net, type, task_valid);
if (err)
goto out;
@@ -843,9 +836,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
write_unlock_bh(&net->xfrm.xfrm_policy_lock);
cnt++;
- xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
- audit_info->sessionid,
- audit_info->secid);
+ xfrm_audit_policy_delete(pol, 1, task_valid);
xfrm_policy_kill(pol);
@@ -864,10 +855,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
write_unlock_bh(&net->xfrm.xfrm_policy_lock);
cnt++;
- xfrm_audit_policy_delete(pol, 1,
- audit_info->loginuid,
- audit_info->sessionid,
- audit_info->secid);
+ xfrm_audit_policy_delete(pol, 1, task_valid);
xfrm_policy_kill(pol);
write_lock_bh(&net->xfrm.xfrm_policy_lock);
@@ -1844,7 +1832,7 @@ purge_queue:
xfrm_pol_put(pol);
}
-static int xdst_queue_output(struct sk_buff *skb)
+static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
{
unsigned long sched_next;
struct dst_entry *dst = skb_dst(skb);
@@ -2110,12 +2098,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
}
dst_hold(&xdst->u.dst);
-
- spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
- xdst->u.dst.next = xfrm_policy_sk_bundles;
- xfrm_policy_sk_bundles = &xdst->u.dst;
- spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
-
+ xdst->u.dst.flags |= DST_NOCACHE;
route = xdst->route;
}
}
@@ -2549,33 +2532,15 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
return dst;
}
-static void __xfrm_garbage_collect(struct net *net)
-{
- struct dst_entry *head, *next;
-
- spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
- head = xfrm_policy_sk_bundles;
- xfrm_policy_sk_bundles = NULL;
- spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
-
- while (head) {
- next = head->next;
- dst_free(head);
- head = next;
- }
-}
-
void xfrm_garbage_collect(struct net *net)
{
- flow_cache_flush();
- __xfrm_garbage_collect(net);
+ flow_cache_flush(net);
}
EXPORT_SYMBOL(xfrm_garbage_collect);
static void xfrm_garbage_collect_deferred(struct net *net)
{
- flow_cache_flush_deferred();
- __xfrm_garbage_collect(net);
+ flow_cache_flush_deferred(net);
}
static void xfrm_init_pmtu(struct dst_entry *dst)
@@ -2810,21 +2775,19 @@ static struct notifier_block xfrm_dev_notifier = {
static int __net_init xfrm_statistics_init(struct net *net)
{
int rv;
-
- if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics,
- sizeof(struct linux_xfrm_mib),
- __alignof__(struct linux_xfrm_mib)) < 0)
+ net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib);
+ if (!net->mib.xfrm_statistics)
return -ENOMEM;
rv = xfrm_proc_init(net);
if (rv < 0)
- snmp_mib_free((void __percpu **)net->mib.xfrm_statistics);
+ free_percpu(net->mib.xfrm_statistics);
return rv;
}
static void xfrm_statistics_fini(struct net *net)
{
xfrm_proc_fini(net);
- snmp_mib_free((void __percpu **)net->mib.xfrm_statistics);
+ free_percpu(net->mib.xfrm_statistics);
}
#else
static int __net_init xfrm_statistics_init(struct net *net)
@@ -2889,21 +2852,14 @@ out_byidx:
static void xfrm_policy_fini(struct net *net)
{
- struct xfrm_audit audit_info;
unsigned int sz;
int dir;
flush_work(&net->xfrm.policy_hash_work);
#ifdef CONFIG_XFRM_SUB_POLICY
- audit_info.loginuid = INVALID_UID;
- audit_info.sessionid = (unsigned int)-1;
- audit_info.secid = 0;
- xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info);
+ xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false);
#endif
- audit_info.loginuid = INVALID_UID;
- audit_info.sessionid = (unsigned int)-1;
- audit_info.secid = 0;
- xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
+ xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false);
WARN_ON(!list_empty(&net->xfrm.policy_all));
@@ -2940,15 +2896,19 @@ static int __net_init xfrm_net_init(struct net *net)
rv = xfrm_sysctl_init(net);
if (rv < 0)
goto out_sysctl;
+ rv = flow_cache_init(net);
+ if (rv < 0)
+ goto out;
/* Initialize the per-net locks here */
spin_lock_init(&net->xfrm.xfrm_state_lock);
rwlock_init(&net->xfrm.xfrm_policy_lock);
- spin_lock_init(&net->xfrm.xfrm_policy_sk_bundle_lock);
mutex_init(&net->xfrm.xfrm_cfg_mutex);
return 0;
+out:
+ xfrm_sysctl_fini(net);
out_sysctl:
xfrm_policy_fini(net);
out_policy:
@@ -2961,6 +2921,7 @@ out_statistics:
static void __net_exit xfrm_net_exit(struct net *net)
{
+ flow_cache_fini(net);
xfrm_sysctl_fini(net);
xfrm_policy_fini(net);
xfrm_state_fini(net);
@@ -3013,15 +2974,14 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
}
}
-void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
- kuid_t auid, unsigned int sessionid, u32 secid)
+void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid)
{
struct audit_buffer *audit_buf;
audit_buf = xfrm_audit_start("SPD-add");
if (audit_buf == NULL)
return;
- xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
+ xfrm_audit_helper_usrinfo(task_valid, audit_buf);
audit_log_format(audit_buf, " res=%u", result);
xfrm_audit_common_policyinfo(xp, audit_buf);
audit_log_end(audit_buf);
@@ -3029,14 +2989,14 @@ void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
- kuid_t auid, unsigned int sessionid, u32 secid)
+ bool task_valid)
{
struct audit_buffer *audit_buf;
audit_buf = xfrm_audit_start("SPD-delete");
if (audit_buf == NULL)
return;
- xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
+ xfrm_audit_helper_usrinfo(task_valid, audit_buf);
audit_log_format(audit_buf, " res=%u", result);
xfrm_audit_common_policyinfo(xp, audit_buf);
audit_log_end(audit_buf);
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index fc5abd0b456..9c4fbd8935f 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -54,8 +54,7 @@ static int xfrm_statistics_seq_show(struct seq_file *seq, void *v)
int i;
for (i = 0; xfrm_mib_list[i].name; i++)
seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name,
- snmp_fold_field((void __percpu **)
- net->mib.xfrm_statistics,
+ snmp_fold_field(net->mib.xfrm_statistics,
xfrm_mib_list[i].entry));
return 0;
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 40f1b3e92e7..0ab54134bb4 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -161,6 +161,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock);
int __xfrm_state_delete(struct xfrm_state *x);
int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
+bool km_is_alive(const struct km_event *c);
void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
static DEFINE_SPINLOCK(xfrm_type_lock);
@@ -462,9 +463,7 @@ expired:
if (!err)
km_state_expired(x, 1, 0);
- xfrm_audit_state_delete(x, err ? 0 : 1,
- audit_get_loginuid(current),
- audit_get_sessionid(current), 0);
+ xfrm_audit_state_delete(x, err ? 0 : 1, true);
out:
spin_unlock(&x->lock);
@@ -561,7 +560,7 @@ EXPORT_SYMBOL(xfrm_state_delete);
#ifdef CONFIG_SECURITY_NETWORK_XFRM
static inline int
-xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info)
+xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
{
int i, err = 0;
@@ -571,10 +570,7 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi
hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
if (xfrm_id_proto_match(x->id.proto, proto) &&
(err = security_xfrm_state_delete(x)) != 0) {
- xfrm_audit_state_delete(x, 0,
- audit_info->loginuid,
- audit_info->sessionid,
- audit_info->secid);
+ xfrm_audit_state_delete(x, 0, task_valid);
return err;
}
}
@@ -584,18 +580,18 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi
}
#else
static inline int
-xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info)
+xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
{
return 0;
}
#endif
-int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info)
+int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
{
int i, err = 0, cnt = 0;
spin_lock_bh(&net->xfrm.xfrm_state_lock);
- err = xfrm_state_flush_secctx_check(net, proto, audit_info);
+ err = xfrm_state_flush_secctx_check(net, proto, task_valid);
if (err)
goto out;
@@ -611,9 +607,7 @@ restart:
err = xfrm_state_delete(x);
xfrm_audit_state_delete(x, err ? 0 : 1,
- audit_info->loginuid,
- audit_info->sessionid,
- audit_info->secid);
+ task_valid);
xfrm_state_put(x);
if (!err)
cnt++;
@@ -788,6 +782,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
struct xfrm_state *best = NULL;
u32 mark = pol->mark.v & pol->mark.m;
unsigned short encap_family = tmpl->encap_family;
+ struct km_event c;
to_put = NULL;
@@ -832,6 +827,17 @@ found:
error = -EEXIST;
goto out;
}
+
+ c.net = net;
+ /* If the KMs have no listeners (yet...), avoid allocating an SA
+ * for each and every packet - garbage collection might not
+ * handle the flood.
+ */
+ if (!km_is_alive(&c)) {
+ error = -ESRCH;
+ goto out;
+ }
+
x = xfrm_state_alloc(net);
if (x == NULL) {
error = -ENOMEM;
@@ -1135,10 +1141,9 @@ out:
EXPORT_SYMBOL(xfrm_state_add);
#ifdef CONFIG_XFRM_MIGRATE
-static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
+static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig)
{
struct net *net = xs_net(orig);
- int err = -ENOMEM;
struct xfrm_state *x = xfrm_state_alloc(net);
if (!x)
goto out;
@@ -1192,15 +1197,13 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
}
if (orig->replay_esn) {
- err = xfrm_replay_clone(x, orig);
- if (err)
+ if (xfrm_replay_clone(x, orig))
goto error;
}
memcpy(&x->mark, &orig->mark, sizeof(x->mark));
- err = xfrm_init_state(x);
- if (err)
+ if (xfrm_init_state(x) < 0)
goto error;
x->props.flags = orig->props.flags;
@@ -1218,8 +1221,6 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
error:
xfrm_state_put(x);
out:
- if (errp)
- *errp = err;
return NULL;
}
@@ -1274,9 +1275,8 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
struct xfrm_migrate *m)
{
struct xfrm_state *xc;
- int err;
- xc = xfrm_state_clone(x, &err);
+ xc = xfrm_state_clone(x);
if (!xc)
return NULL;
@@ -1289,7 +1289,7 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
state is to be updated as it is a part of triplet */
xfrm_state_insert(xc);
} else {
- if ((err = xfrm_state_add(xc)) < 0)
+ if (xfrm_state_add(xc) < 0)
goto error;
}
@@ -1601,6 +1601,23 @@ unlock:
}
EXPORT_SYMBOL(xfrm_alloc_spi);
+static bool __xfrm_state_filter_match(struct xfrm_state *x,
+ struct xfrm_address_filter *filter)
+{
+ if (filter) {
+ if ((filter->family == AF_INET ||
+ filter->family == AF_INET6) &&
+ x->props.family != filter->family)
+ return false;
+
+ return addr_match(&x->props.saddr, &filter->saddr,
+ filter->splen) &&
+ addr_match(&x->id.daddr, &filter->daddr,
+ filter->dplen);
+ }
+ return true;
+}
+
int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
int (*func)(struct xfrm_state *, int, void*),
void *data)
@@ -1623,6 +1640,8 @@ int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
state = container_of(x, struct xfrm_state, km);
if (!xfrm_id_proto_match(state->id.proto, walk->proto))
continue;
+ if (!__xfrm_state_filter_match(state, walk->filter))
+ continue;
err = func(state, walk->seq, data);
if (err) {
list_move_tail(&walk->all, &x->all);
@@ -1641,17 +1660,21 @@ out:
}
EXPORT_SYMBOL(xfrm_state_walk);
-void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto)
+void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
+ struct xfrm_address_filter *filter)
{
INIT_LIST_HEAD(&walk->all);
walk->proto = proto;
walk->state = XFRM_STATE_DEAD;
walk->seq = 0;
+ walk->filter = filter;
}
EXPORT_SYMBOL(xfrm_state_walk_init);
void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net)
{
+ kfree(walk->filter);
+
if (list_empty(&walk->all))
return;
@@ -1804,6 +1827,24 @@ int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address
}
EXPORT_SYMBOL(km_report);
+bool km_is_alive(const struct km_event *c)
+{
+ struct xfrm_mgr *km;
+ bool is_alive = false;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(km, &xfrm_km_list, list) {
+ if (km->is_alive && km->is_alive(c)) {
+ is_alive = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return is_alive;
+}
+EXPORT_SYMBOL(km_is_alive);
+
int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
{
int err;
@@ -2080,14 +2121,10 @@ out_bydst:
void xfrm_state_fini(struct net *net)
{
- struct xfrm_audit audit_info;
unsigned int sz;
flush_work(&net->xfrm.state_hash_work);
- audit_info.loginuid = INVALID_UID;
- audit_info.sessionid = (unsigned int)-1;
- audit_info.secid = 0;
- xfrm_state_flush(net, IPSEC_PROTO_ANY, &audit_info);
+ xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
flush_work(&net->xfrm.state_gc_work);
WARN_ON(!list_empty(&net->xfrm.state_all));
@@ -2150,30 +2187,28 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
}
}
-void xfrm_audit_state_add(struct xfrm_state *x, int result,
- kuid_t auid, unsigned int sessionid, u32 secid)
+void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid)
{
struct audit_buffer *audit_buf;
audit_buf = xfrm_audit_start("SAD-add");
if (audit_buf == NULL)
return;
- xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
+ xfrm_audit_helper_usrinfo(task_valid, audit_buf);
xfrm_audit_helper_sainfo(x, audit_buf);
audit_log_format(audit_buf, " res=%u", result);
audit_log_end(audit_buf);
}
EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
-void xfrm_audit_state_delete(struct xfrm_state *x, int result,
- kuid_t auid, unsigned int sessionid, u32 secid)
+void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid)
{
struct audit_buffer *audit_buf;
audit_buf = xfrm_audit_start("SAD-delete");
if (audit_buf == NULL)
return;
- xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
+ xfrm_audit_helper_usrinfo(task_valid, audit_buf);
xfrm_audit_helper_sainfo(x, audit_buf);
audit_log_format(audit_buf, " res=%u", result);
audit_log_end(audit_buf);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 2f7ddc3a59b..d4db6ebb089 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -137,7 +137,8 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
if (!rt)
return 0;
- if (p->id.proto != IPPROTO_ESP)
+ /* As only ESP and AH support ESN feature. */
+ if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH))
return -EINVAL;
if (p->replay_window != 0)
@@ -176,9 +177,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
attrs[XFRMA_ALG_AEAD] ||
attrs[XFRMA_ALG_CRYPT] ||
attrs[XFRMA_ALG_COMP] ||
- attrs[XFRMA_TFCPAD] ||
- (ntohl(p->id.spi) >= 0x10000))
-
+ attrs[XFRMA_TFCPAD])
goto out;
break;
@@ -206,7 +205,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
attrs[XFRMA_ALG_AUTH] ||
attrs[XFRMA_ALG_AUTH_TRUNC] ||
attrs[XFRMA_ALG_CRYPT] ||
- attrs[XFRMA_TFCPAD])
+ attrs[XFRMA_TFCPAD] ||
+ (ntohl(p->id.spi) >= 0x10000))
goto out;
break;
@@ -596,9 +596,6 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
struct xfrm_state *x;
int err;
struct km_event c;
- kuid_t loginuid = audit_get_loginuid(current);
- unsigned int sessionid = audit_get_sessionid(current);
- u32 sid;
err = verify_newsa_info(p, attrs);
if (err)
@@ -614,8 +611,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
else
err = xfrm_state_update(x);
- security_task_getsecid(current, &sid);
- xfrm_audit_state_add(x, err ? 0 : 1, loginuid, sessionid, sid);
+ xfrm_audit_state_add(x, err ? 0 : 1, true);
if (err < 0) {
x->km.state = XFRM_STATE_DEAD;
@@ -675,9 +671,6 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
int err = -ESRCH;
struct km_event c;
struct xfrm_usersa_id *p = nlmsg_data(nlh);
- kuid_t loginuid = audit_get_loginuid(current);
- unsigned int sessionid = audit_get_sessionid(current);
- u32 sid;
x = xfrm_user_state_lookup(net, p, attrs, &err);
if (x == NULL)
@@ -702,8 +695,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
km_state_notify(x, &c);
out:
- security_task_getsecid(current, &sid);
- xfrm_audit_state_delete(x, err ? 0 : 1, loginuid, sessionid, sid);
+ xfrm_audit_state_delete(x, err ? 0 : 1, true);
xfrm_state_put(x);
return err;
}
@@ -881,6 +873,7 @@ static int xfrm_dump_sa_done(struct netlink_callback *cb)
return 0;
}
+static const struct nla_policy xfrma_policy[XFRMA_MAX+1];
static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
@@ -896,8 +889,31 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
info.nlmsg_flags = NLM_F_MULTI;
if (!cb->args[0]) {
+ struct nlattr *attrs[XFRMA_MAX+1];
+ struct xfrm_address_filter *filter = NULL;
+ u8 proto = 0;
+ int err;
+
cb->args[0] = 1;
- xfrm_state_walk_init(walk, 0);
+
+ err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX,
+ xfrma_policy);
+ if (err < 0)
+ return err;
+
+ if (attrs[XFRMA_ADDRESS_FILTER]) {
+ filter = kmalloc(sizeof(*filter), GFP_KERNEL);
+ if (filter == NULL)
+ return -ENOMEM;
+
+ memcpy(filter, nla_data(attrs[XFRMA_ADDRESS_FILTER]),
+ sizeof(*filter));
+ }
+
+ if (attrs[XFRMA_PROTO])
+ proto = nla_get_u8(attrs[XFRMA_PROTO]);
+
+ xfrm_state_walk_init(walk, proto, filter);
}
(void) xfrm_state_walk(net, walk, dump_one_state, &info);
@@ -930,6 +946,20 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
return skb;
}
+/* A wrapper for nlmsg_multicast() checking that nlsk is still available.
+ * Must be called with RCU read lock.
+ */
+static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb,
+ u32 pid, unsigned int group)
+{
+ struct sock *nlsk = rcu_dereference(net->xfrm.nlsk);
+
+ if (nlsk)
+ return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC);
+ else
+ return -1;
+}
+
static inline size_t xfrm_spdinfo_msgsize(void)
{
return NLMSG_ALIGN(4)
@@ -1389,9 +1419,6 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
struct km_event c;
int err;
int excl;
- kuid_t loginuid = audit_get_loginuid(current);
- unsigned int sessionid = audit_get_sessionid(current);
- u32 sid;
err = verify_newpolicy_info(p);
if (err)
@@ -1410,8 +1437,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
* a type XFRM_MSG_UPDPOLICY - JHS */
excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
err = xfrm_policy_insert(p->dir, xp, excl);
- security_task_getsecid(current, &sid);
- xfrm_audit_policy_add(xp, err ? 0 : 1, loginuid, sessionid, sid);
+ xfrm_audit_policy_add(xp, err ? 0 : 1, true);
if (err) {
security_xfrm_policy_free(xp->security);
@@ -1648,13 +1674,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
NETLINK_CB(skb).portid);
}
} else {
- kuid_t loginuid = audit_get_loginuid(current);
- unsigned int sessionid = audit_get_sessionid(current);
- u32 sid;
-
- security_task_getsecid(current, &sid);
- xfrm_audit_policy_delete(xp, err ? 0 : 1, loginuid, sessionid,
- sid);
+ xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
if (err != 0)
goto out;
@@ -1679,13 +1699,9 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *net = sock_net(skb->sk);
struct km_event c;
struct xfrm_usersa_flush *p = nlmsg_data(nlh);
- struct xfrm_audit audit_info;
int err;
- audit_info.loginuid = audit_get_loginuid(current);
- audit_info.sessionid = audit_get_sessionid(current);
- security_task_getsecid(current, &audit_info.secid);
- err = xfrm_state_flush(net, p->proto, &audit_info);
+ err = xfrm_state_flush(net, p->proto, true);
if (err) {
if (err == -ESRCH) /* empty table */
return 0;
@@ -1869,16 +1885,12 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
struct km_event c;
u8 type = XFRM_POLICY_TYPE_MAIN;
int err;
- struct xfrm_audit audit_info;
err = copy_from_user_policy_type(&type, attrs);
if (err)
return err;
- audit_info.loginuid = audit_get_loginuid(current);
- audit_info.sessionid = audit_get_sessionid(current);
- security_task_getsecid(current, &audit_info.secid);
- err = xfrm_policy_flush(net, type, &audit_info);
+ err = xfrm_policy_flush(net, type, true);
if (err) {
if (err == -ESRCH) /* empty table */
return 0;
@@ -1944,14 +1956,8 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
err = 0;
if (up->hard) {
- kuid_t loginuid = audit_get_loginuid(current);
- unsigned int sessionid = audit_get_sessionid(current);
- u32 sid;
-
- security_task_getsecid(current, &sid);
xfrm_policy_delete(xp, p->dir);
- xfrm_audit_policy_delete(xp, 1, loginuid, sessionid, sid);
-
+ xfrm_audit_policy_delete(xp, 1, true);
} else {
// reset the timers here?
WARN(1, "Dont know what to do with soft policy expire\n");
@@ -1987,13 +1993,8 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
km_state_expired(x, ue->hard, nlh->nlmsg_pid);
if (ue->hard) {
- kuid_t loginuid = audit_get_loginuid(current);
- unsigned int sessionid = audit_get_sessionid(current);
- u32 sid;
-
- security_task_getsecid(current, &sid);
__xfrm_state_delete(x);
- xfrm_audit_state_delete(x, 1, loginuid, sessionid, sid);
+ xfrm_audit_state_delete(x, 1, true);
}
err = 0;
out:
@@ -2240,7 +2241,7 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
if (build_migrate(skb, m, num_migrate, k, sel, dir, type) < 0)
BUG();
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MIGRATE);
}
#else
static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
@@ -2303,6 +2304,8 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_TFCPAD] = { .type = NLA_U32 },
[XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) },
[XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 },
+ [XFRMA_PROTO] = { .type = NLA_U8 },
+ [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) },
};
static const struct xfrm_link {
@@ -2350,7 +2353,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
link = &xfrm_dispatch[type];
/* All operations require privileges, even GET */
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ if (!netlink_net_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) ||
@@ -2429,7 +2432,7 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c)
return -EMSGSIZE;
}
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE);
}
static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event *c)
@@ -2444,7 +2447,7 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event
if (build_aevent(skb, x, c) < 0)
BUG();
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_AEVENTS);
}
static int xfrm_notify_sa_flush(const struct km_event *c)
@@ -2470,7 +2473,7 @@ static int xfrm_notify_sa_flush(const struct km_event *c)
nlmsg_end(skb, nlh);
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA);
}
static inline size_t xfrm_sa_len(struct xfrm_state *x)
@@ -2557,7 +2560,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c)
nlmsg_end(skb, nlh);
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA);
out_free_skb:
kfree_skb(skb);
@@ -2648,7 +2651,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
if (build_acquire(skb, x, xt, xp) < 0)
BUG();
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_ACQUIRE);
}
/* User gives us xfrm_user_policy_info followed by an array of 0
@@ -2762,7 +2765,7 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct
if (build_polexpire(skb, xp, dir, c) < 0)
BUG();
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE);
}
static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c)
@@ -2824,7 +2827,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e
nlmsg_end(skb, nlh);
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY);
out_free_skb:
kfree_skb(skb);
@@ -2852,7 +2855,7 @@ static int xfrm_notify_policy_flush(const struct km_event *c)
nlmsg_end(skb, nlh);
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY);
out_free_skb:
kfree_skb(skb);
@@ -2921,7 +2924,7 @@ static int xfrm_send_report(struct net *net, u8 proto,
if (build_report(skb, proto, sel, addr) < 0)
BUG();
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT);
}
static inline size_t xfrm_mapping_msgsize(void)
@@ -2973,7 +2976,12 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
if (build_mapping(skb, x, ipaddr, sport) < 0)
BUG();
- return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC);
+ return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MAPPING);
+}
+
+static bool xfrm_is_alive(const struct km_event *c)
+{
+ return (bool)xfrm_acquire_is_on(c->net);
}
static struct xfrm_mgr netlink_mgr = {
@@ -2985,6 +2993,7 @@ static struct xfrm_mgr netlink_mgr = {
.report = xfrm_send_report,
.migrate = xfrm_send_migrate,
.new_mapping = xfrm_send_mapping,
+ .is_alive = xfrm_is_alive,
};
static int __net_init xfrm_user_net_init(struct net *net)